mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-07-20 11:51:40 +02:00
Compare commits
15 Commits
v0.6.5
...
lustre_job
Author | SHA1 | Date | |
---|---|---|---|
|
1fcb302620 | ||
|
00d8041254 | ||
|
6b0ac45d07 | ||
|
9e746006e8 | ||
|
fbcacf1a4f | ||
|
4bd71224df | ||
|
6bf3bfd10a | ||
|
0fbff00996 | ||
|
8849824ba9 | ||
|
0b343171a7 | ||
|
b973e8ac9c | ||
|
ed511b7c09 | ||
|
a0acf01dc3 | ||
|
58461f1f72 | ||
|
c09d8fb118 |
61
.github/workflows/Release.yml
vendored
61
.github/workflows/Release.yml
vendored
@@ -133,13 +133,63 @@ jobs:
|
|||||||
name: cc-metric-collector SRPM for UBI 8
|
name: cc-metric-collector SRPM for UBI 8
|
||||||
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Build on Ubuntu 20.04 using official go package
|
||||||
|
#
|
||||||
|
Ubuntu-focal-build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
container: ubuntu:20.04
|
||||||
|
# The job outputs link to the outputs of the 'debrename' step
|
||||||
|
# Only job outputs can be used in child jobs
|
||||||
|
outputs:
|
||||||
|
deb : ${{steps.debrename.outputs.DEB}}
|
||||||
|
steps:
|
||||||
|
# Use apt to install development packages
|
||||||
|
- name: Install development packages
|
||||||
|
run: |
|
||||||
|
apt update && apt --assume-yes upgrade
|
||||||
|
apt --assume-yes install build-essential sed git wget bash
|
||||||
|
# Checkout git repository and submodules
|
||||||
|
# fetch-depth must be 0 to use git describe
|
||||||
|
# See: https://github.com/marketplace/actions/checkout
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
submodules: recursive
|
||||||
|
fetch-depth: 0
|
||||||
|
# Use official golang package
|
||||||
|
- name: Install Golang
|
||||||
|
run: |
|
||||||
|
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
||||||
|
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
||||||
|
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||||
|
go version
|
||||||
|
- name: DEB build MetricCollector
|
||||||
|
id: dpkg-build
|
||||||
|
run: |
|
||||||
|
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||||
|
make DEB
|
||||||
|
- name: Rename DEB (add '_ubuntu20.04')
|
||||||
|
id: debrename
|
||||||
|
run: |
|
||||||
|
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
||||||
|
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu20.04.deb"
|
||||||
|
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
||||||
|
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
||||||
|
# See: https://github.com/actions/upload-artifact
|
||||||
|
- name: Save DEB as artifact
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: cc-metric-collector DEB for Ubuntu 20.04
|
||||||
|
path: ${{ steps.debrename.outputs.DEB }}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Create release with fresh RPMs
|
# Create release with fresh RPMs
|
||||||
#
|
#
|
||||||
Release:
|
Release:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# We need the RPMs, so add dependency
|
# We need the RPMs, so add dependency
|
||||||
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build]
|
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
# See: https://github.com/actions/download-artifact
|
# See: https://github.com/actions/download-artifact
|
||||||
@@ -161,6 +211,11 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: cc-metric-collector SRPM for UBI 8
|
name: cc-metric-collector SRPM for UBI 8
|
||||||
|
|
||||||
|
- name: Download Ubuntu 20.04 DEB
|
||||||
|
uses: actions/download-artifact@v2
|
||||||
|
with:
|
||||||
|
name: cc-metric-collector DEB for Ubuntu 20.04
|
||||||
|
|
||||||
# The download actions do not publish the name of the downloaded file,
|
# The download actions do not publish the name of the downloaded file,
|
||||||
# so we re-use the job outputs of the parent jobs. The files are all
|
# so we re-use the job outputs of the parent jobs. The files are all
|
||||||
# downloaded to the current folder.
|
# downloaded to the current folder.
|
||||||
@@ -174,14 +229,17 @@ jobs:
|
|||||||
ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}")
|
ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}")
|
||||||
UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}")
|
UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}")
|
||||||
UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}")
|
UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}")
|
||||||
|
U_2004_DEB=$(basename "${{ needs.Ubuntu-focal-build.outputs.deb}}")
|
||||||
echo "ALMA_85_RPM::${ALMA_85_RPM}"
|
echo "ALMA_85_RPM::${ALMA_85_RPM}"
|
||||||
echo "ALMA_85_SRPM::${ALMA_85_SRPM}"
|
echo "ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||||
echo "UBI_8_RPM::${UBI_8_RPM}"
|
echo "UBI_8_RPM::${UBI_8_RPM}"
|
||||||
echo "UBI_8_SRPM::${UBI_8_SRPM}"
|
echo "UBI_8_SRPM::${UBI_8_SRPM}"
|
||||||
|
echo "U_2004_DEB::${U_2004_DEB}"
|
||||||
echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}"
|
echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}"
|
||||||
echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}"
|
echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||||
echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}"
|
echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}"
|
||||||
echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}"
|
echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}"
|
||||||
|
echo "::set-output name=U_2004_DEB::${U_2004_DEB}"
|
||||||
|
|
||||||
# See: https://github.com/softprops/action-gh-release
|
# See: https://github.com/softprops/action-gh-release
|
||||||
- name: Release
|
- name: Release
|
||||||
@@ -194,3 +252,4 @@ jobs:
|
|||||||
${{ steps.files.outputs.ALMA_85_SRPM }}
|
${{ steps.files.outputs.ALMA_85_SRPM }}
|
||||||
${{ steps.files.outputs.UBI_8_RPM }}
|
${{ steps.files.outputs.UBI_8_RPM }}
|
||||||
${{ steps.files.outputs.UBI_8_SRPM }}
|
${{ steps.files.outputs.UBI_8_SRPM }}
|
||||||
|
${{ steps.files.outputs.U_2004_DEB }}
|
||||||
|
@@ -15,10 +15,10 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
|
||||||
mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||||
)
|
)
|
||||||
|
|
||||||
type CentralConfigFile struct {
|
type CentralConfigFile struct {
|
||||||
|
@@ -5,7 +5,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"os/user"
|
"os/user"
|
||||||
@@ -14,8 +14,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DEFAULT_BEEGFS_CMD = "beegfs-ctl"
|
const DEFAULT_BEEGFS_CMD = "beegfs-ctl"
|
||||||
@@ -115,7 +115,7 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
//get mounpoint
|
//get mounpoint
|
||||||
buffer, _ := ioutil.ReadFile(string("/proc/mounts"))
|
buffer, _ := os.ReadFile(string("/proc/mounts"))
|
||||||
mounts := strings.Split(string(buffer), "\n")
|
mounts := strings.Split(string(buffer), "\n")
|
||||||
var mountpoints []string
|
var mountpoints []string
|
||||||
for _, line := range mounts {
|
for _, line := range mounts {
|
||||||
@@ -157,9 +157,9 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
|
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
|
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
|
||||||
data, _ := ioutil.ReadAll(cmdStderr)
|
data, _ := io.ReadAll(cmdStderr)
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stderr: \"%s\"\n", string(data))
|
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stderr: \"%s\"\n", string(data))
|
||||||
data, _ = ioutil.ReadAll(cmdStdout)
|
data, _ = io.ReadAll(cmdStdout)
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stdout: \"%s\"\n", string(data))
|
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stdout: \"%s\"\n", string(data))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@@ -5,7 +5,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"os/user"
|
"os/user"
|
||||||
@@ -14,8 +14,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Struct for the collector-specific JSON config
|
// Struct for the collector-specific JSON config
|
||||||
@@ -108,7 +108,7 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
//get mounpoint
|
//get mounpoint
|
||||||
buffer, _ := ioutil.ReadFile(string("/proc/mounts"))
|
buffer, _ := os.ReadFile(string("/proc/mounts"))
|
||||||
mounts := strings.Split(string(buffer), "\n")
|
mounts := strings.Split(string(buffer), "\n")
|
||||||
var mountpoints []string
|
var mountpoints []string
|
||||||
for _, line := range mounts {
|
for _, line := range mounts {
|
||||||
@@ -149,9 +149,9 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
|
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
|
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
|
||||||
data, _ := ioutil.ReadAll(cmdStderr)
|
data, _ := io.ReadAll(cmdStderr)
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stderr: \"%s\"\n", string(data))
|
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stderr: \"%s\"\n", string(data))
|
||||||
data, _ = ioutil.ReadAll(cmdStdout)
|
data, _ = io.ReadAll(cmdStdout)
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stdout: \"%s\"\n", string(data))
|
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stdout: \"%s\"\n", string(data))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@@ -6,9 +6,9 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Map of all available metric collectors
|
// Map of all available metric collectors
|
||||||
@@ -20,6 +20,7 @@ var AvailableCollectors = map[string]MetricCollector{
|
|||||||
"netstat": new(NetstatCollector),
|
"netstat": new(NetstatCollector),
|
||||||
"ibstat": new(InfinibandCollector),
|
"ibstat": new(InfinibandCollector),
|
||||||
"lustrestat": new(LustreCollector),
|
"lustrestat": new(LustreCollector),
|
||||||
|
"lustre_jobstat": new(LustreJobstatCollector),
|
||||||
"cpustat": new(CpustatCollector),
|
"cpustat": new(CpustatCollector),
|
||||||
"topprocs": new(TopProcsCollector),
|
"topprocs": new(TopProcsCollector),
|
||||||
"nvidia": new(NvidiaCollector),
|
"nvidia": new(NvidiaCollector),
|
||||||
@@ -37,7 +38,7 @@ var AvailableCollectors = map[string]MetricCollector{
|
|||||||
"beegfs_meta": new(BeegfsMetaCollector),
|
"beegfs_meta": new(BeegfsMetaCollector),
|
||||||
"beegfs_storage": new(BeegfsStorageCollector),
|
"beegfs_storage": new(BeegfsStorageCollector),
|
||||||
"rocm_smi": new(RocmSmiCollector),
|
"rocm_smi": new(RocmSmiCollector),
|
||||||
"schedstat": new(SchedstatCollector),
|
"schedstat": new(SchedstatCollector),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Metric collector manager data structure
|
// Metric collector manager data structure
|
||||||
|
@@ -10,8 +10,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@@ -3,14 +3,14 @@ package collectors
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -88,7 +88,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
// Read package ID
|
// Read package ID
|
||||||
physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id")
|
physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id")
|
||||||
line, err := ioutil.ReadFile(physicalPackageIDFile)
|
line, err := os.ReadFile(physicalPackageIDFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to read physical package ID from file '%s': %v", physicalPackageIDFile, err)
|
return fmt.Errorf("unable to read physical package ID from file '%s': %v", physicalPackageIDFile, err)
|
||||||
}
|
}
|
||||||
@@ -100,7 +100,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
// Read core ID
|
// Read core ID
|
||||||
coreIDFile := filepath.Join(cpuDir, "topology", "core_id")
|
coreIDFile := filepath.Join(cpuDir, "topology", "core_id")
|
||||||
line, err = ioutil.ReadFile(coreIDFile)
|
line, err = os.ReadFile(coreIDFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to read core ID from file '%s': %v", coreIDFile, err)
|
return fmt.Errorf("unable to read core ID from file '%s': %v", coreIDFile, err)
|
||||||
}
|
}
|
||||||
@@ -188,7 +188,7 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Read current frequency
|
// Read current frequency
|
||||||
line, err := ioutil.ReadFile(t.scalingCurFreqFile)
|
line, err := os.ReadFile(t.scalingCurFreqFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
sysconf "github.com/tklauser/go-sysconf"
|
sysconf "github.com/tklauser/go-sysconf"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -22,12 +22,12 @@ type CpustatCollectorConfig struct {
|
|||||||
|
|
||||||
type CpustatCollector struct {
|
type CpustatCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
config CpustatCollectorConfig
|
config CpustatCollectorConfig
|
||||||
lastTimestamp time.Time // Store time stamp of last tick to derive values
|
lastTimestamp time.Time // Store time stamp of last tick to derive values
|
||||||
matches map[string]int
|
matches map[string]int
|
||||||
cputags map[string]map[string]string
|
cputags map[string]map[string]string
|
||||||
nodetags map[string]string
|
nodetags map[string]string
|
||||||
olddata map[string]map[string]int64
|
olddata map[string]map[string]int64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *CpustatCollector) Init(config json.RawMessage) error {
|
func (m *CpustatCollector) Init(config json.RawMessage) error {
|
||||||
@@ -118,7 +118,7 @@ func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]st
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, value := range values {
|
for name, value := range values {
|
||||||
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value * 100}, now)
|
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value * 100}, now)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
@@ -3,13 +3,13 @@ package collectors
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"io/ioutil"
|
|
||||||
"log"
|
"log"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -53,7 +53,7 @@ func (m *CustomCmdCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, f := range m.config.Files {
|
for _, f := range m.config.Files {
|
||||||
_, err = ioutil.ReadFile(f)
|
_, err = os.ReadFile(f)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
m.files = append(m.files, f)
|
m.files = append(m.files, f)
|
||||||
} else {
|
} else {
|
||||||
@@ -106,7 +106,7 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, file := range m.files {
|
for _, file := range m.files {
|
||||||
buffer, err := ioutil.ReadFile(file)
|
buffer, err := os.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Print(err)
|
log.Print(err)
|
||||||
return
|
return
|
||||||
|
@@ -8,8 +8,8 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// "log"
|
// "log"
|
||||||
|
@@ -5,7 +5,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"os/user"
|
"os/user"
|
||||||
@@ -13,8 +13,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DEFAULT_GPFS_CMD = "mmpmon"
|
const DEFAULT_GPFS_CMD = "mmpmon"
|
||||||
@@ -118,8 +118,8 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
cmd.Stderr = cmdStderr
|
cmd.Stderr = cmdStderr
|
||||||
err := cmd.Run()
|
err := cmd.Run()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
dataStdErr, _ := ioutil.ReadAll(cmdStderr)
|
dataStdErr, _ := io.ReadAll(cmdStderr)
|
||||||
dataStdOut, _ := ioutil.ReadAll(cmdStdout)
|
dataStdOut, _ := io.ReadAll(cmdStdout)
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to execute command \"%s\": %v\n", cmd.String(), err),
|
fmt.Sprintf("Read(): Failed to execute command \"%s\": %v\n", cmd.String(), err),
|
||||||
|
@@ -2,11 +2,10 @@ package collectors
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
@@ -19,8 +18,9 @@ import (
|
|||||||
const IB_BASEPATH = "/sys/class/infiniband/"
|
const IB_BASEPATH = "/sys/class/infiniband/"
|
||||||
|
|
||||||
type InfinibandCollectorMetric struct {
|
type InfinibandCollectorMetric struct {
|
||||||
path string
|
path string
|
||||||
unit string
|
unit string
|
||||||
|
scale int64
|
||||||
}
|
}
|
||||||
|
|
||||||
type InfinibandCollectorInfo struct {
|
type InfinibandCollectorInfo struct {
|
||||||
@@ -84,7 +84,7 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
|||||||
for _, path := range ibDirs {
|
for _, path := range ibDirs {
|
||||||
|
|
||||||
// Skip, when no LID is assigned
|
// Skip, when no LID is assigned
|
||||||
line, err := ioutil.ReadFile(filepath.Join(path, "lid"))
|
line, err := os.ReadFile(filepath.Join(path, "lid"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -113,10 +113,10 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
|||||||
// Check access to counter files
|
// Check access to counter files
|
||||||
countersDir := filepath.Join(path, "counters")
|
countersDir := filepath.Join(path, "counters")
|
||||||
portCounterFiles := map[string]InfinibandCollectorMetric{
|
portCounterFiles := map[string]InfinibandCollectorMetric{
|
||||||
"ib_recv": {path: filepath.Join(countersDir, "port_rcv_data"), unit: "bytes"},
|
"ib_recv": {path: filepath.Join(countersDir, "port_rcv_data"), unit: "bytes", scale: 4},
|
||||||
"ib_xmit": {path: filepath.Join(countersDir, "port_xmit_data"), unit: "bytes"},
|
"ib_xmit": {path: filepath.Join(countersDir, "port_xmit_data"), unit: "bytes", scale: 4},
|
||||||
"ib_recv_pkts": {path: filepath.Join(countersDir, "port_rcv_packets"), unit: "packets"},
|
"ib_recv_pkts": {path: filepath.Join(countersDir, "port_rcv_packets"), unit: "packets", scale: 1},
|
||||||
"ib_xmit_pkts": {path: filepath.Join(countersDir, "port_xmit_packets"), unit: "packets"},
|
"ib_xmit_pkts": {path: filepath.Join(countersDir, "port_xmit_packets"), unit: "packets", scale: 1},
|
||||||
}
|
}
|
||||||
for _, counter := range portCounterFiles {
|
for _, counter := range portCounterFiles {
|
||||||
err := unix.Access(counter.path, unix.R_OK)
|
err := unix.Access(counter.path, unix.R_OK)
|
||||||
@@ -174,7 +174,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
for counterName, counterDef := range info.portCounterFiles {
|
for counterName, counterDef := range info.portCounterFiles {
|
||||||
|
|
||||||
// Read counter file
|
// Read counter file
|
||||||
line, err := ioutil.ReadFile(counterDef.path)
|
line, err := os.ReadFile(counterDef.path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
@@ -191,6 +191,8 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
fmt.Sprintf("Read(): Failed to convert Infininiband metrice %s='%s' to int64: %v", counterName, data, err))
|
fmt.Sprintf("Read(): Failed to convert Infininiband metrice %s='%s' to int64: %v", counterName, data, err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// Scale raw value
|
||||||
|
v *= counterDef.scale
|
||||||
|
|
||||||
// Send absolut values
|
// Send absolut values
|
||||||
if m.config.SendAbsoluteValues {
|
if m.config.SendAbsoluteValues {
|
||||||
|
@@ -4,8 +4,8 @@ import (
|
|||||||
"bufio"
|
"bufio"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
|
||||||
// "log"
|
// "log"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
@@ -10,7 +10,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const IPMITOOL_PATH = `ipmitool`
|
const IPMITOOL_PATH = `ipmitool`
|
||||||
|
@@ -12,7 +12,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
@@ -24,10 +23,10 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
|
||||||
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
|
||||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -154,12 +153,13 @@ func getBaseFreq() float64 {
|
|||||||
}
|
}
|
||||||
var freq float64 = math.NaN()
|
var freq float64 = math.NaN()
|
||||||
for _, f := range files {
|
for _, f := range files {
|
||||||
buffer, err := ioutil.ReadFile(f)
|
buffer, err := os.ReadFile(f)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
data := strings.Replace(string(buffer), "\n", "", -1)
|
data := strings.Replace(string(buffer), "\n", "", -1)
|
||||||
x, err := strconv.ParseInt(data, 0, 64)
|
x, err := strconv.ParseInt(data, 0, 64)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
freq = float64(x) * 1e6
|
freq = float64(x)
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -168,11 +168,11 @@ func getBaseFreq() float64 {
|
|||||||
C.power_init(0)
|
C.power_init(0)
|
||||||
info := C.get_powerInfo()
|
info := C.get_powerInfo()
|
||||||
if float64(info.baseFrequency) != 0 {
|
if float64(info.baseFrequency) != 0 {
|
||||||
freq = float64(info.baseFrequency) * 1e6
|
freq = float64(info.baseFrequency)
|
||||||
}
|
}
|
||||||
C.power_finalize()
|
C.power_finalize()
|
||||||
}
|
}
|
||||||
return freq
|
return freq * 1e3
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LikwidCollector) Init(config json.RawMessage) error {
|
func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||||
|
@@ -7,6 +7,9 @@ The `likwid` collector is probably the most complicated collector. The LIKWID li
|
|||||||
"likwid": {
|
"likwid": {
|
||||||
"force_overwrite" : false,
|
"force_overwrite" : false,
|
||||||
"invalid_to_zero" : false,
|
"invalid_to_zero" : false,
|
||||||
|
"liblikwid_path" : "/path/to/liblikwid.so",
|
||||||
|
"accessdaemon_path" : "/folder/that/contains/likwid-accessD",
|
||||||
|
"access_mode" : "direct or accessdaemon or perf_event",
|
||||||
"eventsets": [
|
"eventsets": [
|
||||||
{
|
{
|
||||||
"events" : {
|
"events" : {
|
||||||
|
@@ -3,13 +3,13 @@ package collectors
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -72,7 +72,7 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if !m.init {
|
if !m.init {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
buffer, err := ioutil.ReadFile(LOADAVGFILE)
|
buffer, err := os.ReadFile(LOADAVGFILE)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
|
270
collectors/lustreJobstatMetric.go
Normal file
270
collectors/lustreJobstatMetric.go
Normal file
@@ -0,0 +1,270 @@
|
|||||||
|
package collectors
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"os/exec"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LustreJobstatCollectorConfig struct {
|
||||||
|
LCtlCommand string `json:"lctl_command,omitempty"`
|
||||||
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
|
Sudo bool `json:"use_sudo,omitempty"`
|
||||||
|
SendAbsoluteValues bool `json:"send_abs_values,omitempty"`
|
||||||
|
SendDerivedValues bool `json:"send_derived_values,omitempty"`
|
||||||
|
SendDiffValues bool `json:"send_diff_values,omitempty"`
|
||||||
|
JobRegex string `json:"jobid_regex,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type LustreJobstatCollector struct {
|
||||||
|
metricCollector
|
||||||
|
tags map[string]string
|
||||||
|
config LustreJobstatCollectorConfig
|
||||||
|
lctl string
|
||||||
|
sudoCmd string
|
||||||
|
lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
|
||||||
|
definitions []LustreMetricDefinition // Combined list without excluded metrics
|
||||||
|
//stats map[string]map[string]int64 // Data for last value per device and metric
|
||||||
|
lastMdtData *map[string]map[string]LustreMetricData
|
||||||
|
lastObdfilterData *map[string]map[string]LustreMetricData
|
||||||
|
jobidRegex *regexp.Regexp
|
||||||
|
}
|
||||||
|
|
||||||
|
var defaultJobidRegex = `^(?P<jobid>[\d\w\.]+)$`
|
||||||
|
|
||||||
|
var LustreMetricJobstatsDefinition = []LustreMetricDefinition{
|
||||||
|
{
|
||||||
|
name: "lustre_job_read_samples",
|
||||||
|
lineprefix: "read",
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "requests",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_job_read_min_bytes",
|
||||||
|
lineprefix: "read_bytes",
|
||||||
|
offsetname: "min",
|
||||||
|
unit: "bytes",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_job_read_max_bytes",
|
||||||
|
lineprefix: "read_bytes",
|
||||||
|
offsetname: "max",
|
||||||
|
unit: "bytes",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *LustreJobstatCollector) executeLustreCommand(option string) []string {
|
||||||
|
return executeLustreCommand(m.sudoCmd, m.lctl, LCTL_OPTION, option, m.config.Sudo)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *LustreJobstatCollector) Init(config json.RawMessage) error {
|
||||||
|
var err error
|
||||||
|
m.name = "LustreJobstatCollector"
|
||||||
|
m.parallel = true
|
||||||
|
m.config.JobRegex = defaultJobidRegex
|
||||||
|
m.config.SendAbsoluteValues = true
|
||||||
|
if len(config) > 0 {
|
||||||
|
err = json.Unmarshal(config, &m.config)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.setup()
|
||||||
|
m.tags = map[string]string{"type": "jobid"}
|
||||||
|
m.meta = map[string]string{"source": m.name, "group": "Lustre", "scope": "job"}
|
||||||
|
|
||||||
|
// Lustre file system statistics can only be queried by user root
|
||||||
|
// or with password-less sudo
|
||||||
|
// if !m.config.Sudo {
|
||||||
|
// user, err := user.Current()
|
||||||
|
// if err != nil {
|
||||||
|
// cclog.ComponentError(m.name, "Failed to get current user:", err.Error())
|
||||||
|
// return err
|
||||||
|
// }
|
||||||
|
// if user.Uid != "0" {
|
||||||
|
// cclog.ComponentError(m.name, "Lustre file system statistics can only be queried by user root")
|
||||||
|
// return err
|
||||||
|
// }
|
||||||
|
// } else {
|
||||||
|
// p, err := exec.LookPath("sudo")
|
||||||
|
// if err != nil {
|
||||||
|
// cclog.ComponentError(m.name, "Cannot find 'sudo'")
|
||||||
|
// return err
|
||||||
|
// }
|
||||||
|
// m.sudoCmd = p
|
||||||
|
// }
|
||||||
|
|
||||||
|
p, err := exec.LookPath(m.config.LCtlCommand)
|
||||||
|
if err != nil {
|
||||||
|
p, err = exec.LookPath(LCTL_CMD)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.lctl = p
|
||||||
|
|
||||||
|
m.definitions = make([]LustreMetricDefinition, 0)
|
||||||
|
if m.config.SendAbsoluteValues {
|
||||||
|
for _, def := range LustreMetricJobstatsDefinition {
|
||||||
|
if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip {
|
||||||
|
m.definitions = append(m.definitions, def)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(m.definitions) == 0 {
|
||||||
|
return errors.New("no metrics to collect")
|
||||||
|
}
|
||||||
|
|
||||||
|
x := make(map[string]map[string]LustreMetricData)
|
||||||
|
m.lastMdtData = &x
|
||||||
|
x = make(map[string]map[string]LustreMetricData)
|
||||||
|
m.lastObdfilterData = &x
|
||||||
|
|
||||||
|
if len(m.config.JobRegex) > 0 {
|
||||||
|
jregex := strings.ReplaceAll(m.config.JobRegex, "%", "\\")
|
||||||
|
r, err := regexp.Compile(jregex)
|
||||||
|
if err == nil {
|
||||||
|
m.jobidRegex = r
|
||||||
|
} else {
|
||||||
|
cclog.ComponentError(m.name, "Cannot compile jobid regex")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m.lastTimestamp = time.Now()
|
||||||
|
m.init = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *LustreJobstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
|
if !m.init {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
getValue := func(data map[string]map[string]LustreMetricData, device string, jobid string, operation string, field string) int64 {
|
||||||
|
var value int64 = -1
|
||||||
|
if ddata, ok := data[device]; ok {
|
||||||
|
if jdata, ok := ddata[jobid]; ok {
|
||||||
|
if opdata, ok := jdata.op_data[operation]; ok {
|
||||||
|
if v, ok := opdata[field]; ok {
|
||||||
|
value = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
jobIdToTags := func(jobregex *regexp.Regexp, job string) map[string]string {
|
||||||
|
tags := make(map[string]string)
|
||||||
|
groups := jobregex.SubexpNames()
|
||||||
|
for _, match := range jobregex.FindAllStringSubmatch(job, -1) {
|
||||||
|
for groupIdx, group := range match {
|
||||||
|
if len(groups[groupIdx]) > 0 {
|
||||||
|
tags[groups[groupIdx]] = group
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tags
|
||||||
|
}
|
||||||
|
|
||||||
|
generateMetric := func(definition LustreMetricDefinition, data map[string]map[string]LustreMetricData, last map[string]map[string]LustreMetricData, now time.Time) {
|
||||||
|
tdiff := now.Sub(m.lastTimestamp)
|
||||||
|
for dev, ddata := range data {
|
||||||
|
for jobid, jdata := range ddata {
|
||||||
|
jobtags := jobIdToTags(m.jobidRegex, jobid)
|
||||||
|
if _, ok := jobtags["jobid"]; !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
cur := getValue(data, dev, jobid, definition.lineprefix, definition.offsetname)
|
||||||
|
old := getValue(last, dev, jobid, definition.lineprefix, definition.offsetname)
|
||||||
|
var x interface{} = -1
|
||||||
|
var valid = false
|
||||||
|
switch definition.calc {
|
||||||
|
case "none":
|
||||||
|
x = cur
|
||||||
|
valid = true
|
||||||
|
case "difference":
|
||||||
|
if len(last) > 0 {
|
||||||
|
if old >= 0 {
|
||||||
|
x = cur - old
|
||||||
|
valid = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case "derivative":
|
||||||
|
if len(last) > 0 {
|
||||||
|
if old >= 0 {
|
||||||
|
x = float64(cur-old) / tdiff.Seconds()
|
||||||
|
valid = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if valid {
|
||||||
|
y, err := lp.New(definition.name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||||
|
if err == nil {
|
||||||
|
y.AddTag("stype", "device")
|
||||||
|
y.AddTag("stype-id", dev)
|
||||||
|
if j, ok := jobtags["jobid"]; ok {
|
||||||
|
y.AddTag("type-id", j)
|
||||||
|
} else {
|
||||||
|
y.AddTag("type-id", jobid)
|
||||||
|
}
|
||||||
|
for k, v := range jobtags {
|
||||||
|
switch k {
|
||||||
|
case "jobid":
|
||||||
|
case "hostname":
|
||||||
|
y.AddTag("hostname", v)
|
||||||
|
default:
|
||||||
|
y.AddMeta(k, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(definition.unit) > 0 {
|
||||||
|
y.AddMeta("unit", definition.unit)
|
||||||
|
} else {
|
||||||
|
if unit, ok := jdata.op_units[definition.lineprefix]; ok {
|
||||||
|
y.AddMeta("unit", unit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
mdt_lines := m.executeLustreCommand("mdt.*.job_stats")
|
||||||
|
if len(mdt_lines) > 0 {
|
||||||
|
mdt_data := readCommandOutput(mdt_lines)
|
||||||
|
for _, def := range m.definitions {
|
||||||
|
generateMetric(def, mdt_data, *m.lastMdtData, now)
|
||||||
|
}
|
||||||
|
m.lastMdtData = &mdt_data
|
||||||
|
}
|
||||||
|
|
||||||
|
obdfilter_lines := m.executeLustreCommand("obdfilter.*.job_stats")
|
||||||
|
if len(obdfilter_lines) > 0 {
|
||||||
|
obdfilter_data := readCommandOutput(obdfilter_lines)
|
||||||
|
for _, def := range m.definitions {
|
||||||
|
generateMetric(def, obdfilter_data, *m.lastObdfilterData, now)
|
||||||
|
}
|
||||||
|
m.lastObdfilterData = &obdfilter_data
|
||||||
|
}
|
||||||
|
|
||||||
|
m.lastTimestamp = now
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *LustreJobstatCollector) Close() {
|
||||||
|
m.init = false
|
||||||
|
}
|
35
collectors/lustreJobstatMetric.md
Normal file
35
collectors/lustreJobstatMetric.md
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
|
||||||
|
## `lustre_jobstat` collector
|
||||||
|
|
||||||
|
**Note**: This collector is meant to run on the Lustre servers, **not** the clients
|
||||||
|
|
||||||
|
The Lustre filesystem provides a feature (`job_stats`) to group processes on client side with an identifier string (like a compute job with its jobid) and retrieve the file system operation counts on the server side. Check the section [How to configure `job_stats`]() for more information.
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
```json
|
||||||
|
"lustre_jobstat_": {
|
||||||
|
"lctl_command": "/path/to/lctl",
|
||||||
|
"use_sudo": false,
|
||||||
|
"exclude_metrics": [
|
||||||
|
"setattr",
|
||||||
|
"getattr"
|
||||||
|
],
|
||||||
|
"send_abs_values" : true,
|
||||||
|
|
||||||
|
"jobid_regex" : "^(?P<jobid>[%d%w%.]+)$"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `lustre_jobstat` collector uses the `lctl` application with the `get_param` option to get all `mdt.*.job_stats` and `obdfilter.*.job_stats` metrics. These metrics are only available for root users. If password-less sudo is configured, you can enable `sudo` in the configuration. In the `exclude_metrics` list, some metrics can be excluded to reduce network traffic and storage. With the `send_abs_values` flag, the collector sends absolute values for the configured metrics. The `jobid_regex` can be used to split the Lustre `job_stats` identifier into multiple parts. Since JSON cannot handle strings like `\d`, use `%` instead of `\`.
|
||||||
|
|
||||||
|
Metrics:
|
||||||
|
- `lustre_job_read_samples` (unit: `requests`)
|
||||||
|
- `lustre_job_read_min_bytes` (unit: `bytes`)
|
||||||
|
- `lustre_job_read_max_bytes` (unit: `bytes`)
|
||||||
|
|
||||||
|
The collector adds the tags: `type=jobid,typeid=<jobid_from_regex>,stype=device,stype=<device_name_from_output>`.
|
||||||
|
|
||||||
|
The collector adds the mega information: `unit=<unit>,scope=job`
|
||||||
|
|
||||||
|
### How to configure `job_stats`
|
@@ -10,14 +10,10 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const LUSTRE_SYSFS = `/sys/fs/lustre`
|
|
||||||
const LCTL_CMD = `lctl`
|
|
||||||
const LCTL_OPTION = `get_param`
|
|
||||||
|
|
||||||
type LustreCollectorConfig struct {
|
type LustreCollectorConfig struct {
|
||||||
LCtlCommand string `json:"lctl_command,omitempty"`
|
LCtlCommand string `json:"lctl_command,omitempty"`
|
||||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
@@ -27,14 +23,6 @@ type LustreCollectorConfig struct {
|
|||||||
SendDiffValues bool `json:"send_diff_values,omitempty"`
|
SendDiffValues bool `json:"send_diff_values,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type LustreMetricDefinition struct {
|
|
||||||
name string
|
|
||||||
lineprefix string
|
|
||||||
lineoffset int
|
|
||||||
unit string
|
|
||||||
calc string
|
|
||||||
}
|
|
||||||
|
|
||||||
type LustreCollector struct {
|
type LustreCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
tags map[string]string
|
tags map[string]string
|
||||||
@@ -46,17 +34,209 @@ type LustreCollector struct {
|
|||||||
stats map[string]map[string]int64 // Data for last value per device and metric
|
stats map[string]map[string]int64 // Data for last value per device and metric
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var LustreAbsMetrics = []LustreMetricDefinition{
|
||||||
|
{
|
||||||
|
name: "lustre_read_requests",
|
||||||
|
lineprefix: "read_bytes",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "requests",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_write_requests",
|
||||||
|
lineprefix: "write_bytes",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "requests",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_read_bytes",
|
||||||
|
lineprefix: "read_bytes",
|
||||||
|
lineoffset: 6,
|
||||||
|
offsetname: "sum",
|
||||||
|
unit: "bytes",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_write_bytes",
|
||||||
|
lineprefix: "write_bytes",
|
||||||
|
lineoffset: 6,
|
||||||
|
offsetname: "sum",
|
||||||
|
unit: "bytes",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_open",
|
||||||
|
lineprefix: "open",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_close",
|
||||||
|
lineprefix: "close",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_setattr",
|
||||||
|
lineprefix: "setattr",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_getattr",
|
||||||
|
lineprefix: "getattr",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_statfs",
|
||||||
|
lineprefix: "statfs",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_inode_permission",
|
||||||
|
lineprefix: "inode_permission",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "none",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var LustreDiffMetrics = []LustreMetricDefinition{
|
||||||
|
{
|
||||||
|
name: "lustre_read_requests_diff",
|
||||||
|
lineprefix: "read_bytes",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "requests",
|
||||||
|
calc: "difference",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_write_requests_diff",
|
||||||
|
lineprefix: "write_bytes",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "requests",
|
||||||
|
calc: "difference",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_read_bytes_diff",
|
||||||
|
lineprefix: "read_bytes",
|
||||||
|
lineoffset: 6,
|
||||||
|
offsetname: "sum",
|
||||||
|
unit: "bytes",
|
||||||
|
calc: "difference",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_write_bytes_diff",
|
||||||
|
lineprefix: "write_bytes",
|
||||||
|
lineoffset: 6,
|
||||||
|
offsetname: "sum",
|
||||||
|
unit: "bytes",
|
||||||
|
calc: "difference",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_open_diff",
|
||||||
|
lineprefix: "open",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "difference",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_close_diff",
|
||||||
|
lineprefix: "close",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "difference",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_setattr_diff",
|
||||||
|
lineprefix: "setattr",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "difference",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_getattr_diff",
|
||||||
|
lineprefix: "getattr",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "difference",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_statfs_diff",
|
||||||
|
lineprefix: "statfs",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "difference",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_inode_permission_diff",
|
||||||
|
lineprefix: "inode_permission",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "",
|
||||||
|
calc: "difference",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var LustreDeriveMetrics = []LustreMetricDefinition{
|
||||||
|
{
|
||||||
|
name: "lustre_read_requests_rate",
|
||||||
|
lineprefix: "read_bytes",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "requests/sec",
|
||||||
|
calc: "derivative",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_write_requests_rate",
|
||||||
|
lineprefix: "write_bytes",
|
||||||
|
lineoffset: 1,
|
||||||
|
offsetname: "samples",
|
||||||
|
unit: "requests/sec",
|
||||||
|
calc: "derivative",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_read_bw",
|
||||||
|
lineprefix: "read_bytes",
|
||||||
|
lineoffset: 6,
|
||||||
|
offsetname: "sum",
|
||||||
|
unit: "bytes/sec",
|
||||||
|
calc: "derivative",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "lustre_write_bw",
|
||||||
|
lineprefix: "write_bytes",
|
||||||
|
lineoffset: 6,
|
||||||
|
offsetname: "sum",
|
||||||
|
unit: "bytes/sec",
|
||||||
|
calc: "derivative",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
func (m *LustreCollector) getDeviceDataCommand(device string) []string {
|
func (m *LustreCollector) getDeviceDataCommand(device string) []string {
|
||||||
var command *exec.Cmd
|
return executeLustreCommand(m.sudoCmd, m.lctl, LCTL_OPTION, fmt.Sprintf("llite.%s.stats", device), m.config.Sudo)
|
||||||
statsfile := fmt.Sprintf("llite.%s.stats", device)
|
|
||||||
if m.config.Sudo {
|
|
||||||
command = exec.Command(m.sudoCmd, m.lctl, LCTL_OPTION, statsfile)
|
|
||||||
} else {
|
|
||||||
command = exec.Command(m.lctl, LCTL_OPTION, statsfile)
|
|
||||||
}
|
|
||||||
command.Wait()
|
|
||||||
stdout, _ := command.Output()
|
|
||||||
return strings.Split(string(stdout), "\n")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LustreCollector) getDevices() []string {
|
func (m *LustreCollector) getDevices() []string {
|
||||||
@@ -108,183 +288,6 @@ func getMetricData(lines []string, prefix string, offset int) (int64, error) {
|
|||||||
// return strings.Split(string(buffer), "\n")
|
// return strings.Split(string(buffer), "\n")
|
||||||
// }
|
// }
|
||||||
|
|
||||||
var LustreAbsMetrics = []LustreMetricDefinition{
|
|
||||||
{
|
|
||||||
name: "lustre_read_requests",
|
|
||||||
lineprefix: "read_bytes",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "requests",
|
|
||||||
calc: "none",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_write_requests",
|
|
||||||
lineprefix: "write_bytes",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "requests",
|
|
||||||
calc: "none",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_read_bytes",
|
|
||||||
lineprefix: "read_bytes",
|
|
||||||
lineoffset: 6,
|
|
||||||
unit: "bytes",
|
|
||||||
calc: "none",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_write_bytes",
|
|
||||||
lineprefix: "write_bytes",
|
|
||||||
lineoffset: 6,
|
|
||||||
unit: "bytes",
|
|
||||||
calc: "none",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_open",
|
|
||||||
lineprefix: "open",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "none",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_close",
|
|
||||||
lineprefix: "close",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "none",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_setattr",
|
|
||||||
lineprefix: "setattr",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "none",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_getattr",
|
|
||||||
lineprefix: "getattr",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "none",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_statfs",
|
|
||||||
lineprefix: "statfs",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "none",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_inode_permission",
|
|
||||||
lineprefix: "inode_permission",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "none",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
var LustreDiffMetrics = []LustreMetricDefinition{
|
|
||||||
{
|
|
||||||
name: "lustre_read_requests_diff",
|
|
||||||
lineprefix: "read_bytes",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "requests",
|
|
||||||
calc: "difference",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_write_requests_diff",
|
|
||||||
lineprefix: "write_bytes",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "requests",
|
|
||||||
calc: "difference",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_read_bytes_diff",
|
|
||||||
lineprefix: "read_bytes",
|
|
||||||
lineoffset: 6,
|
|
||||||
unit: "bytes",
|
|
||||||
calc: "difference",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_write_bytes_diff",
|
|
||||||
lineprefix: "write_bytes",
|
|
||||||
lineoffset: 6,
|
|
||||||
unit: "bytes",
|
|
||||||
calc: "difference",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_open_diff",
|
|
||||||
lineprefix: "open",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "difference",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_close_diff",
|
|
||||||
lineprefix: "close",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "difference",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_setattr_diff",
|
|
||||||
lineprefix: "setattr",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "difference",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_getattr_diff",
|
|
||||||
lineprefix: "getattr",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "difference",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_statfs_diff",
|
|
||||||
lineprefix: "statfs",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "difference",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_inode_permission_diff",
|
|
||||||
lineprefix: "inode_permission",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "",
|
|
||||||
calc: "difference",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
var LustreDeriveMetrics = []LustreMetricDefinition{
|
|
||||||
{
|
|
||||||
name: "lustre_read_requests_rate",
|
|
||||||
lineprefix: "read_bytes",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "requests/sec",
|
|
||||||
calc: "derivative",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_write_requests_rate",
|
|
||||||
lineprefix: "write_bytes",
|
|
||||||
lineoffset: 1,
|
|
||||||
unit: "requests/sec",
|
|
||||||
calc: "derivative",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_read_bw",
|
|
||||||
lineprefix: "read_bytes",
|
|
||||||
lineoffset: 6,
|
|
||||||
unit: "bytes/sec",
|
|
||||||
calc: "derivative",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "lustre_write_bw",
|
|
||||||
lineprefix: "write_bytes",
|
|
||||||
lineoffset: 6,
|
|
||||||
unit: "bytes/sec",
|
|
||||||
calc: "derivative",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *LustreCollector) Init(config json.RawMessage) error {
|
func (m *LustreCollector) Init(config json.RawMessage) error {
|
||||||
var err error
|
var err error
|
||||||
m.name = "LustreCollector"
|
m.name = "LustreCollector"
|
||||||
@@ -297,7 +300,7 @@ func (m *LustreCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
m.setup()
|
m.setup()
|
||||||
m.tags = map[string]string{"type": "node"}
|
m.tags = map[string]string{"type": "node"}
|
||||||
m.meta = map[string]string{"source": m.name, "group": "Lustre"}
|
m.meta = map[string]string{"source": m.name, "group": "Lustre", "scope": "node"}
|
||||||
|
|
||||||
// Lustre file system statistics can only be queried by user root
|
// Lustre file system statistics can only be queried by user root
|
||||||
// or with password-less sudo
|
// or with password-less sudo
|
||||||
|
190
collectors/lustreMetricCommon.go
Normal file
190
collectors/lustreMetricCommon.go
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
package collectors
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const LUSTRE_SYSFS = `/sys/fs/lustre`
|
||||||
|
const LCTL_CMD = `lctl`
|
||||||
|
const LCTL_OPTION = `get_param`
|
||||||
|
|
||||||
|
type LustreMetricDefinition struct {
|
||||||
|
name string
|
||||||
|
lineprefix string
|
||||||
|
lineoffset int
|
||||||
|
offsetname string
|
||||||
|
unit string
|
||||||
|
calc string
|
||||||
|
}
|
||||||
|
|
||||||
|
type LustreMetricData struct {
|
||||||
|
sample_time int64
|
||||||
|
start_time int64
|
||||||
|
elapsed_time int64
|
||||||
|
op_data map[string]map[string]int64
|
||||||
|
op_units map[string]string
|
||||||
|
sample_time_unit string
|
||||||
|
start_time_unit string
|
||||||
|
elapsed_time_unit string
|
||||||
|
}
|
||||||
|
|
||||||
|
var devicePattern = regexp.MustCompile(`^[\w\d\-_]+\.([\w\d\-_]+)\.[\w\d\-_]+=$`)
|
||||||
|
var jobPattern = regexp.MustCompile(`^-\s*job_id:\s*([\w\d\-_\.:]+)$`)
|
||||||
|
var snapshotPattern = regexp.MustCompile(`^\s*snapshot_time\s*:\s*([\d\.]+)\s*([\w\d\-_\.]*)$`)
|
||||||
|
var startPattern = regexp.MustCompile(`^\s*start_time\s*:\s*([\d\.]+)\s*([\w\d\-_\.]*)$`)
|
||||||
|
var elapsedPattern = regexp.MustCompile(`^\s*elapsed_time\s*:\s*([\d\.]+)\s*([\w\d\-_\.]*)$`)
|
||||||
|
var linePattern = regexp.MustCompile(`^\s*([\w\d\-_\.]+):\s*\{\s*samples:\s*([\d\.]+),\s*unit:\s*([\w\d\-_\.]+),\s*min:\s*([\d\.]+),\s*max:\s*([\d\.]+),\s*sum:\s*([\d\.]+),\s*sumsq:\s*([\d\.]+)\s*\}`)
|
||||||
|
|
||||||
|
func executeLustreCommand(sudo, lctl, option, search string, use_sudo bool) []string {
|
||||||
|
var command *exec.Cmd
|
||||||
|
if use_sudo {
|
||||||
|
command = exec.Command(sudo, lctl, option, search)
|
||||||
|
} else {
|
||||||
|
command = exec.Command(lctl, option, search)
|
||||||
|
}
|
||||||
|
command.Wait()
|
||||||
|
stdout, _ := command.Output()
|
||||||
|
return strings.Split(string(stdout), "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitTree(lines []string, splitRegex *regexp.Regexp) map[string][]string {
|
||||||
|
entries := make(map[string][]string)
|
||||||
|
ent_lines := make([]int, 0)
|
||||||
|
for i, l := range lines {
|
||||||
|
m := splitRegex.FindStringSubmatch(l)
|
||||||
|
if len(m) == 2 {
|
||||||
|
ent_lines = append(ent_lines, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(ent_lines) > 0 {
|
||||||
|
for i, idx := range ent_lines[:len(ent_lines)-1] {
|
||||||
|
m := splitRegex.FindStringSubmatch(lines[idx])
|
||||||
|
entries[m[1]] = lines[idx+1 : ent_lines[i+1]]
|
||||||
|
}
|
||||||
|
last := ent_lines[len(ent_lines)-1]
|
||||||
|
m := splitRegex.FindStringSubmatch(lines[last])
|
||||||
|
entries[m[1]] = lines[last:]
|
||||||
|
}
|
||||||
|
return entries
|
||||||
|
}
|
||||||
|
|
||||||
|
func readDevices(lines []string) map[string][]string {
|
||||||
|
return splitTree(lines, devicePattern)
|
||||||
|
}
|
||||||
|
|
||||||
|
func readJobs(lines []string) map[string][]string {
|
||||||
|
return splitTree(lines, jobPattern)
|
||||||
|
}
|
||||||
|
|
||||||
|
func readJobdata(lines []string) LustreMetricData {
|
||||||
|
|
||||||
|
jobdata := LustreMetricData{
|
||||||
|
op_data: make(map[string]map[string]int64),
|
||||||
|
op_units: make(map[string]string),
|
||||||
|
sample_time: 0,
|
||||||
|
sample_time_unit: "nsec",
|
||||||
|
start_time: 0,
|
||||||
|
start_time_unit: "nsec",
|
||||||
|
elapsed_time: 0,
|
||||||
|
elapsed_time_unit: "nsec",
|
||||||
|
}
|
||||||
|
parseTime := func(value, unit string) int64 {
|
||||||
|
var t int64 = 0
|
||||||
|
if len(unit) == 0 {
|
||||||
|
unit = "secs"
|
||||||
|
}
|
||||||
|
values := strings.Split(value, ".")
|
||||||
|
units := strings.Split(unit, ".")
|
||||||
|
if len(values) != len(units) {
|
||||||
|
fmt.Printf("Invalid time specification '%s' and '%s'\n", value, unit)
|
||||||
|
}
|
||||||
|
for i, v := range values {
|
||||||
|
if len(units) > i {
|
||||||
|
s, err := strconv.ParseInt(v, 10, 64)
|
||||||
|
if err == nil {
|
||||||
|
switch units[i] {
|
||||||
|
case "secs":
|
||||||
|
t += s * 1e9
|
||||||
|
case "msecs":
|
||||||
|
t += s * 1e6
|
||||||
|
case "usecs":
|
||||||
|
t += s * 1e3
|
||||||
|
case "nsecs":
|
||||||
|
t += s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
parseNumber := func(value string) int64 {
|
||||||
|
s, err := strconv.ParseInt(value, 10, 64)
|
||||||
|
if err == nil {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
for _, l := range lines {
|
||||||
|
if jobdata.sample_time == 0 {
|
||||||
|
m := snapshotPattern.FindStringSubmatch(l)
|
||||||
|
if len(m) == 3 {
|
||||||
|
if len(m[2]) > 0 {
|
||||||
|
jobdata.sample_time = parseTime(m[1], m[2])
|
||||||
|
} else {
|
||||||
|
jobdata.sample_time = parseTime(m[1], "secs")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if jobdata.start_time == 0 {
|
||||||
|
m := startPattern.FindStringSubmatch(l)
|
||||||
|
if len(m) == 3 {
|
||||||
|
if len(m[2]) > 0 {
|
||||||
|
jobdata.start_time = parseTime(m[1], m[2])
|
||||||
|
} else {
|
||||||
|
jobdata.start_time = parseTime(m[1], "secs")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if jobdata.elapsed_time == 0 {
|
||||||
|
m := elapsedPattern.FindStringSubmatch(l)
|
||||||
|
if len(m) == 3 {
|
||||||
|
if len(m[2]) > 0 {
|
||||||
|
jobdata.elapsed_time = parseTime(m[1], m[2])
|
||||||
|
} else {
|
||||||
|
jobdata.elapsed_time = parseTime(m[1], "secs")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m := linePattern.FindStringSubmatch(l)
|
||||||
|
if len(m) == 8 {
|
||||||
|
jobdata.op_units[m[1]] = m[3]
|
||||||
|
jobdata.op_data[m[1]] = map[string]int64{
|
||||||
|
"samples": parseNumber(m[2]),
|
||||||
|
"min": parseNumber(m[4]),
|
||||||
|
"max": parseNumber(m[5]),
|
||||||
|
"sum": parseNumber(m[6]),
|
||||||
|
"sumsq": parseNumber(m[7]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return jobdata
|
||||||
|
}
|
||||||
|
|
||||||
|
func readCommandOutput(lines []string) map[string]map[string]LustreMetricData {
|
||||||
|
var data map[string]map[string]LustreMetricData = make(map[string]map[string]LustreMetricData)
|
||||||
|
devs := readDevices(lines)
|
||||||
|
for d, ddata := range devs {
|
||||||
|
data[d] = make(map[string]LustreMetricData)
|
||||||
|
jobs := readJobs(ddata)
|
||||||
|
for j, jdata := range jobs {
|
||||||
|
x := readJobdata(jdata)
|
||||||
|
data[d][j] = x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
@@ -12,8 +12,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const MEMSTATFILE = "/proc/meminfo"
|
const MEMSTATFILE = "/proc/meminfo"
|
||||||
@@ -68,7 +68,8 @@ func getStats(filename string) map[string]MemstatStats {
|
|||||||
} else if len(linefields) == 5 {
|
} else if len(linefields) == 5 {
|
||||||
v, err := strconv.ParseFloat(linefields[3], 64)
|
v, err := strconv.ParseFloat(linefields[3], 64)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
stats[strings.Trim(linefields[0], ":")] = MemstatStats{
|
cclog.ComponentDebug("getStats", strings.Trim(linefields[2], ":"), v, linefields[4])
|
||||||
|
stats[strings.Trim(linefields[2], ":")] = MemstatStats{
|
||||||
value: v,
|
value: v,
|
||||||
unit: linefields[4],
|
unit: linefields[4],
|
||||||
}
|
}
|
||||||
@@ -160,7 +161,6 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
if !m.init {
|
if !m.init {
|
||||||
cclog.ComponentPrint(m.name, "Here")
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -188,16 +188,20 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
unit := ""
|
unit := ""
|
||||||
if totalVal, total := stats["MemTotal"]; total {
|
if totalVal, total := stats["MemTotal"]; total {
|
||||||
if freeVal, free := stats["MemFree"]; free {
|
if freeVal, free := stats["MemFree"]; free {
|
||||||
|
memUsed = totalVal.value - freeVal.value
|
||||||
|
if len(totalVal.unit) > 0 {
|
||||||
|
unit = totalVal.unit
|
||||||
|
} else if len(freeVal.unit) > 0 {
|
||||||
|
unit = freeVal.unit
|
||||||
|
}
|
||||||
if bufVal, buffers := stats["Buffers"]; buffers {
|
if bufVal, buffers := stats["Buffers"]; buffers {
|
||||||
|
memUsed -= bufVal.value
|
||||||
|
if len(bufVal.unit) > 0 && len(unit) == 0 {
|
||||||
|
unit = bufVal.unit
|
||||||
|
}
|
||||||
if cacheVal, cached := stats["Cached"]; cached {
|
if cacheVal, cached := stats["Cached"]; cached {
|
||||||
memUsed = totalVal.value - (freeVal.value + bufVal.value + cacheVal.value)
|
memUsed -= cacheVal.value
|
||||||
if len(totalVal.unit) > 0 {
|
if len(cacheVal.unit) > 0 && len(unit) == 0 {
|
||||||
unit = totalVal.unit
|
|
||||||
} else if len(freeVal.unit) > 0 {
|
|
||||||
unit = freeVal.unit
|
|
||||||
} else if len(bufVal.unit) > 0 {
|
|
||||||
unit = bufVal.unit
|
|
||||||
} else if len(cacheVal.unit) > 0 {
|
|
||||||
unit = cacheVal.unit
|
unit = cacheVal.unit
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -5,7 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MetricCollector interface {
|
type MetricCollector interface {
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const NETSTATFILE = "/proc/net/dev"
|
const NETSTATFILE = "/proc/net/dev"
|
||||||
|
@@ -11,7 +11,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// First part contains the code for the general NfsCollector.
|
// First part contains the code for the general NfsCollector.
|
||||||
|
@@ -10,8 +10,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@@ -8,8 +8,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -6,8 +6,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"github.com/ClusterCockpit/go-rocm-smi/pkg/rocm_smi"
|
"github.com/ClusterCockpit/go-rocm-smi/pkg/rocm_smi"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -66,14 +66,14 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
ret := rocm_smi.Init()
|
ret := rocm_smi.Init()
|
||||||
if ret != rocm_smi.STATUS_SUCCESS {
|
if ret != rocm_smi.STATUS_SUCCESS {
|
||||||
err = errors.New("Failed to initialize ROCm SMI library")
|
err = errors.New("failed to initialize ROCm SMI library")
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
numDevs, ret := rocm_smi.NumMonitorDevices()
|
numDevs, ret := rocm_smi.NumMonitorDevices()
|
||||||
if ret != rocm_smi.STATUS_SUCCESS {
|
if ret != rocm_smi.STATUS_SUCCESS {
|
||||||
err = errors.New("Failed to get number of GPUs from ROCm SMI library")
|
err = errors.New("failed to get number of GPUs from ROCm SMI library")
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -98,14 +98,14 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
device, ret := rocm_smi.DeviceGetHandleByIndex(i)
|
device, ret := rocm_smi.DeviceGetHandleByIndex(i)
|
||||||
if ret != rocm_smi.STATUS_SUCCESS {
|
if ret != rocm_smi.STATUS_SUCCESS {
|
||||||
err = fmt.Errorf("Failed to get handle for GPU %d", i)
|
err = fmt.Errorf("failed to get handle for GPU %d", i)
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
pciInfo, ret := rocm_smi.DeviceGetPciInfo(device)
|
pciInfo, ret := rocm_smi.DeviceGetPciInfo(device)
|
||||||
if ret != rocm_smi.STATUS_SUCCESS {
|
if ret != rocm_smi.STATUS_SUCCESS {
|
||||||
err = fmt.Errorf("Failed to get PCI information for GPU %d", i)
|
err = fmt.Errorf("failed to get PCI information for GPU %d", i)
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@@ -4,8 +4,8 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// These are the fields we read from the JSON configuration
|
// These are the fields we read from the JSON configuration
|
||||||
|
@@ -5,8 +5,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// These are the fields we read from the JSON configuration
|
// These are the fields we read from the JSON configuration
|
||||||
|
@@ -1,17 +1,17 @@
|
|||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"bufio"
|
|
||||||
"time"
|
|
||||||
"os"
|
|
||||||
"strings"
|
|
||||||
"strconv"
|
|
||||||
"math"
|
"math"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const SCHEDSTATFILE = `/proc/schedstat`
|
const SCHEDSTATFILE = `/proc/schedstat`
|
||||||
@@ -25,11 +25,11 @@ type SchedstatCollectorConfig struct {
|
|||||||
// defined by metricCollector (name, init, ...)
|
// defined by metricCollector (name, init, ...)
|
||||||
type SchedstatCollector struct {
|
type SchedstatCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
config SchedstatCollectorConfig // the configuration structure
|
config SchedstatCollectorConfig // the configuration structure
|
||||||
lastTimestamp time.Time // Store time stamp of last tick to derive values
|
lastTimestamp time.Time // Store time stamp of last tick to derive values
|
||||||
meta map[string]string // default meta information
|
meta map[string]string // default meta information
|
||||||
cputags map[string]map[string]string // default tags
|
cputags map[string]map[string]string // default tags
|
||||||
olddata map[string]map[string]int64 // default tags
|
olddata map[string]map[string]int64 // default tags
|
||||||
}
|
}
|
||||||
|
|
||||||
// Functions to implement MetricCollector interface
|
// Functions to implement MetricCollector interface
|
||||||
@@ -52,7 +52,7 @@ func (m *SchedstatCollector) Init(config json.RawMessage) error {
|
|||||||
// Define meta information sent with each metric
|
// Define meta information sent with each metric
|
||||||
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
||||||
m.meta = map[string]string{"source": m.name, "group": "SCHEDSTAT"}
|
m.meta = map[string]string{"source": m.name, "group": "SCHEDSTAT"}
|
||||||
|
|
||||||
// Read in the JSON configuration
|
// Read in the JSON configuration
|
||||||
if len(config) > 0 {
|
if len(config) > 0 {
|
||||||
err = json.Unmarshal(config, &m.config)
|
err = json.Unmarshal(config, &m.config)
|
||||||
@@ -83,12 +83,11 @@ func (m *SchedstatCollector) Init(config json.RawMessage) error {
|
|||||||
running, _ := strconv.ParseInt(linefields[7], 10, 64)
|
running, _ := strconv.ParseInt(linefields[7], 10, 64)
|
||||||
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
|
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
|
||||||
m.cputags[linefields[0]] = map[string]string{"type": "hwthread", "type-id": fmt.Sprintf("%d", cpu)}
|
m.cputags[linefields[0]] = map[string]string{"type": "hwthread", "type-id": fmt.Sprintf("%d", cpu)}
|
||||||
m.olddata[linefields[0]] = map[string]int64{"running" : running, "waiting" : waiting}
|
m.olddata[linefields[0]] = map[string]int64{"running": running, "waiting": waiting}
|
||||||
num_cpus++
|
num_cpus++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Save current timestamp
|
// Save current timestamp
|
||||||
m.lastTimestamp = time.Now()
|
m.lastTimestamp = time.Now()
|
||||||
|
|
||||||
@@ -102,7 +101,7 @@ func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]
|
|||||||
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
|
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
|
||||||
diff_running := running - m.olddata[linefields[0]]["running"]
|
diff_running := running - m.olddata[linefields[0]]["running"]
|
||||||
diff_waiting := waiting - m.olddata[linefields[0]]["waiting"]
|
diff_waiting := waiting - m.olddata[linefields[0]]["waiting"]
|
||||||
|
|
||||||
var l_running float64 = float64(diff_running) / tsdelta.Seconds() / (math.Pow(1000, 3))
|
var l_running float64 = float64(diff_running) / tsdelta.Seconds() / (math.Pow(1000, 3))
|
||||||
var l_waiting float64 = float64(diff_waiting) / tsdelta.Seconds() / (math.Pow(1000, 3))
|
var l_waiting float64 = float64(diff_waiting) / tsdelta.Seconds() / (math.Pow(1000, 3))
|
||||||
|
|
||||||
@@ -110,11 +109,11 @@ func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]
|
|||||||
m.olddata[linefields[0]]["waiting"] = waiting
|
m.olddata[linefields[0]]["waiting"] = waiting
|
||||||
value := l_running + l_waiting
|
value := l_running + l_waiting
|
||||||
|
|
||||||
y, err := lp.New("cpu_load_core", tags, m.meta, map[string]interface{}{"value": value}, now)
|
y, err := lp.New("cpu_load_core", tags, m.meta, map[string]interface{}{"value": value}, now)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
// Send it to output channel
|
// Send it to output channel
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read collects all metrics belonging to the sample collector
|
// Read collects all metrics belonging to the sample collector
|
||||||
|
@@ -3,14 +3,14 @@ package collectors
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// See: https://www.kernel.org/doc/html/latest/hwmon/sysfs-interface.html
|
// See: https://www.kernel.org/doc/html/latest/hwmon/sysfs-interface.html
|
||||||
@@ -83,14 +83,14 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
// sensor name
|
// sensor name
|
||||||
nameFile := filepath.Join(filepath.Dir(file), "name")
|
nameFile := filepath.Join(filepath.Dir(file), "name")
|
||||||
name, err := ioutil.ReadFile(nameFile)
|
name, err := os.ReadFile(nameFile)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
sensor.name = strings.TrimSpace(string(name))
|
sensor.name = strings.TrimSpace(string(name))
|
||||||
}
|
}
|
||||||
|
|
||||||
// sensor label
|
// sensor label
|
||||||
labelFile := strings.TrimSuffix(file, "_input") + "_label"
|
labelFile := strings.TrimSuffix(file, "_input") + "_label"
|
||||||
label, err := ioutil.ReadFile(labelFile)
|
label, err := os.ReadFile(labelFile)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
sensor.label = strings.TrimSpace(string(label))
|
sensor.label = strings.TrimSpace(string(label))
|
||||||
}
|
}
|
||||||
@@ -117,7 +117,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Sensor file
|
// Sensor file
|
||||||
_, err = ioutil.ReadFile(file)
|
_, err = os.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -139,7 +139,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
|||||||
// max temperature
|
// max temperature
|
||||||
if m.config.ReportMaxTemp {
|
if m.config.ReportMaxTemp {
|
||||||
maxTempFile := strings.TrimSuffix(file, "_input") + "_max"
|
maxTempFile := strings.TrimSuffix(file, "_input") + "_max"
|
||||||
if buffer, err := ioutil.ReadFile(maxTempFile); err == nil {
|
if buffer, err := os.ReadFile(maxTempFile); err == nil {
|
||||||
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
|
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
|
||||||
sensor.maxTempName = strings.Replace(sensor.metricName, "temp", "max_temp", 1)
|
sensor.maxTempName = strings.Replace(sensor.metricName, "temp", "max_temp", 1)
|
||||||
sensor.maxTemp = x / 1000
|
sensor.maxTemp = x / 1000
|
||||||
@@ -150,7 +150,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
|||||||
// critical temperature
|
// critical temperature
|
||||||
if m.config.ReportCriticalTemp {
|
if m.config.ReportCriticalTemp {
|
||||||
criticalTempFile := strings.TrimSuffix(file, "_input") + "_crit"
|
criticalTempFile := strings.TrimSuffix(file, "_input") + "_crit"
|
||||||
if buffer, err := ioutil.ReadFile(criticalTempFile); err == nil {
|
if buffer, err := os.ReadFile(criticalTempFile); err == nil {
|
||||||
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
|
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
|
||||||
sensor.critTempName = strings.Replace(sensor.metricName, "temp", "crit_temp", 1)
|
sensor.critTempName = strings.Replace(sensor.metricName, "temp", "crit_temp", 1)
|
||||||
sensor.critTemp = x / 1000
|
sensor.critTemp = x / 1000
|
||||||
@@ -175,7 +175,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
|
|
||||||
for _, sensor := range m.sensors {
|
for _, sensor := range m.sensors {
|
||||||
// Read sensor file
|
// Read sensor file
|
||||||
buffer, err := ioutil.ReadFile(sensor.file)
|
buffer, err := os.ReadFile(sensor.file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
|
@@ -9,7 +9,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const MAX_NUM_PROCS = 10
|
const MAX_NUM_PROCS = 10
|
||||||
|
2
go.mod
2
go.mod
@@ -13,6 +13,7 @@ require (
|
|||||||
github.com/nats-io/nats.go v1.16.0
|
github.com/nats-io/nats.go v1.16.0
|
||||||
github.com/prometheus/client_golang v1.12.2
|
github.com/prometheus/client_golang v1.12.2
|
||||||
github.com/stmcginnis/gofish v0.13.0
|
github.com/stmcginnis/gofish v0.13.0
|
||||||
|
github.com/tklauser/go-sysconf v0.3.10
|
||||||
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e
|
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -31,6 +32,7 @@ require (
|
|||||||
github.com/prometheus/common v0.37.0 // indirect
|
github.com/prometheus/common v0.37.0 // indirect
|
||||||
github.com/prometheus/procfs v0.7.3 // indirect
|
github.com/prometheus/procfs v0.7.3 // indirect
|
||||||
github.com/shopspring/decimal v1.3.1 // indirect
|
github.com/shopspring/decimal v1.3.1 // indirect
|
||||||
|
github.com/tklauser/numcpus v0.4.0 // indirect
|
||||||
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect
|
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect
|
||||||
golang.org/x/net v0.0.0-20220708220712-1185a9018129 // indirect
|
golang.org/x/net v0.0.0-20220708220712-1185a9018129 // indirect
|
||||||
google.golang.org/protobuf v1.28.0 // indirect
|
google.golang.org/protobuf v1.28.0 // indirect
|
||||||
|
5
go.sum
5
go.sum
@@ -287,6 +287,10 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
|
|||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
|
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
|
||||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw=
|
||||||
|
github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk=
|
||||||
|
github.com/tklauser/numcpus v0.4.0 h1:E53Dm1HjH1/R2/aoCtXtPgzmElmn51aOkhCFSuZq//o=
|
||||||
|
github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ=
|
||||||
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
|
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
|
||||||
github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=
|
github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=
|
||||||
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
|
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
|
||||||
@@ -445,6 +449,7 @@ golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBc
|
|||||||
golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220513210249-45d2b4557a2a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220513210249-45d2b4557a2a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e h1:NHvCuwuS43lGnYhten69ZWqi2QOj/CiDNcKbVqwVoew=
|
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e h1:NHvCuwuS43lGnYhten69ZWqi2QOj/CiDNcKbVqwVoew=
|
||||||
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
@@ -9,10 +9,10 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||||
|
|
||||||
"github.com/PaesslerAG/gval"
|
"github.com/PaesslerAG/gval"
|
||||||
)
|
)
|
||||||
|
@@ -8,8 +8,8 @@ import (
|
|||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||||
)
|
)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -4,11 +4,11 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
|
||||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||||
)
|
)
|
||||||
|
|
||||||
type metricCachePeriod struct {
|
type metricCachePeriod struct {
|
||||||
|
@@ -7,11 +7,11 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||||
units "github.com/ClusterCockpit/cc-units"
|
units "github.com/ClusterCockpit/cc-units"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -281,7 +281,9 @@ func (r *metricRouter) Start() {
|
|||||||
// Foward message received from collector channel
|
// Foward message received from collector channel
|
||||||
coll_forward := func(p lp.CCMetric) {
|
coll_forward := func(p lp.CCMetric) {
|
||||||
// receive from metric collector
|
// receive from metric collector
|
||||||
p.AddTag(r.config.HostnameTagName, r.hostname)
|
if !p.HasTag(r.config.HostnameTagName) {
|
||||||
|
p.AddTag(r.config.HostnameTagName, r.hostname)
|
||||||
|
}
|
||||||
if r.config.IntervalStamp {
|
if r.config.IntervalStamp {
|
||||||
p.SetTime(r.timestamp)
|
p.SetTime(r.timestamp)
|
||||||
}
|
}
|
||||||
@@ -310,7 +312,9 @@ func (r *metricRouter) Start() {
|
|||||||
cache_forward := func(p lp.CCMetric) {
|
cache_forward := func(p lp.CCMetric) {
|
||||||
// receive from metric collector
|
// receive from metric collector
|
||||||
if !r.dropMetric(p) {
|
if !r.dropMetric(p) {
|
||||||
p.AddTag(r.config.HostnameTagName, r.hostname)
|
if !p.HasTag(r.config.HostnameTagName) {
|
||||||
|
p.AddTag(r.config.HostnameTagName, r.hostname)
|
||||||
|
}
|
||||||
forward(p)
|
forward(p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -10,7 +10,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
cclogger "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclogger "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
const SYSFS_NUMABASE = `/sys/devices/system/node`
|
const SYSFS_NUMABASE = `/sys/devices/system/node`
|
@@ -3,7 +3,7 @@ package multiChanTicker
|
|||||||
import (
|
import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
type multiChanTicker struct {
|
type multiChanTicker struct {
|
@@ -10,8 +10,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
)
|
)
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
package receivers
|
package receivers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type defaultReceiverConfig struct {
|
type defaultReceiverConfig struct {
|
||||||
|
@@ -6,8 +6,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
nats "github.com/nats-io/nats.go"
|
nats "github.com/nats-io/nats.go"
|
||||||
)
|
)
|
||||||
|
@@ -12,8 +12,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PrometheusReceiverConfig struct {
|
type PrometheusReceiverConfig struct {
|
||||||
|
@@ -5,8 +5,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
var AvailableReceivers = map[string]func(name string, config json.RawMessage) (Receiver, error){
|
var AvailableReceivers = map[string]func(name string, config json.RawMessage) (Receiver, error){
|
||||||
|
@@ -10,8 +10,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
|
||||||
// See: https://pkg.go.dev/github.com/stmcginnis/gofish
|
// See: https://pkg.go.dev/github.com/stmcginnis/gofish
|
||||||
"github.com/stmcginnis/gofish"
|
"github.com/stmcginnis/gofish"
|
||||||
|
@@ -4,7 +4,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
// SampleReceiver configuration: receiver type, listen address, port
|
// SampleReceiver configuration: receiver type, listen address, port
|
||||||
|
@@ -4,7 +4,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GangliaMetricName(point lp.CCMetric) string {
|
func GangliaMetricName(point lp.CCMetric) string {
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
// "time"
|
// "time"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const GMETRIC_EXEC = `gmetric`
|
const GMETRIC_EXEC = `gmetric`
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||||
influxdb2ApiHttp "github.com/influxdata/influxdb-client-go/v2/api/http"
|
influxdb2ApiHttp "github.com/influxdata/influxdb-client-go/v2/api/http"
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||||
"github.com/influxdata/influxdb-client-go/v2/api/write"
|
"github.com/influxdata/influxdb-client-go/v2/api/write"
|
||||||
|
@@ -71,8 +71,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
package sinks
|
package sinks
|
||||||
|
|
||||||
import (
|
import (
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type defaultSinkConfig struct {
|
type defaultSinkConfig struct {
|
||||||
|
@@ -8,8 +8,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
nats "github.com/nats-io/nats.go"
|
nats "github.com/nats-io/nats.go"
|
||||||
)
|
)
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
|
@@ -5,8 +5,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type SampleSinkConfig struct {
|
type SampleSinkConfig struct {
|
||||||
|
@@ -6,8 +6,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const SINK_MAX_FORWARD = 50
|
const SINK_MAX_FORWARD = 50
|
||||||
|
@@ -7,7 +7,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
// "time"
|
// "time"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type StdoutSink struct {
|
type StdoutSink struct {
|
||||||
|
Reference in New Issue
Block a user