mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-07-21 12:21:41 +02:00
Compare commits
31 Commits
v0.6.2
...
app_metric
Author | SHA1 | Date | |
---|---|---|---|
|
1ce40aea16 | ||
|
5918f96fd8 | ||
|
8cb87a2165 | ||
|
3e91a37dee | ||
|
ed68baeada | ||
|
888db31dbf | ||
|
c938d32629 | ||
|
d5daf54d4f | ||
|
18bffd7c14 | ||
|
bd0105b370 | ||
|
b1a8674c4c | ||
|
234ad3c54e | ||
|
7bb80780e0 | ||
|
e66d52bb32 | ||
|
9840d0193d | ||
|
ce7eef8d30 | ||
|
92e45ca62c | ||
|
fd10a279fc | ||
|
9e63d0ea59 | ||
|
76bb033a88 | ||
|
deb1bcfa2f | ||
|
7a67d5e25f | ||
|
9ae0806aa9 | ||
|
4bd71224df | ||
|
6bf3bfd10a | ||
|
0fbff00996 | ||
|
8849824ba9 | ||
|
ed511b7c09 | ||
|
a0acf01dc3 | ||
|
58461f1f72 | ||
|
c09d8fb118 |
61
.github/workflows/Release.yml
vendored
61
.github/workflows/Release.yml
vendored
@@ -133,13 +133,63 @@ jobs:
|
|||||||
name: cc-metric-collector SRPM for UBI 8
|
name: cc-metric-collector SRPM for UBI 8
|
||||||
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Build on Ubuntu 20.04 using official go package
|
||||||
|
#
|
||||||
|
Ubuntu-focal-build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
container: ubuntu:20.04
|
||||||
|
# The job outputs link to the outputs of the 'debrename' step
|
||||||
|
# Only job outputs can be used in child jobs
|
||||||
|
outputs:
|
||||||
|
deb : ${{steps.debrename.outputs.DEB}}
|
||||||
|
steps:
|
||||||
|
# Use apt to install development packages
|
||||||
|
- name: Install development packages
|
||||||
|
run: |
|
||||||
|
apt update && apt --assume-yes upgrade
|
||||||
|
apt --assume-yes install build-essential sed git wget bash
|
||||||
|
# Checkout git repository and submodules
|
||||||
|
# fetch-depth must be 0 to use git describe
|
||||||
|
# See: https://github.com/marketplace/actions/checkout
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
submodules: recursive
|
||||||
|
fetch-depth: 0
|
||||||
|
# Use official golang package
|
||||||
|
- name: Install Golang
|
||||||
|
run: |
|
||||||
|
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
||||||
|
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
||||||
|
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||||
|
go version
|
||||||
|
- name: DEB build MetricCollector
|
||||||
|
id: dpkg-build
|
||||||
|
run: |
|
||||||
|
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||||
|
make DEB
|
||||||
|
- name: Rename DEB (add '_ubuntu20.04')
|
||||||
|
id: debrename
|
||||||
|
run: |
|
||||||
|
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
||||||
|
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu20.04.deb"
|
||||||
|
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
||||||
|
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
||||||
|
# See: https://github.com/actions/upload-artifact
|
||||||
|
- name: Save DEB as artifact
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: cc-metric-collector DEB for Ubuntu 20.04
|
||||||
|
path: ${{ steps.debrename.outputs.DEB }}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Create release with fresh RPMs
|
# Create release with fresh RPMs
|
||||||
#
|
#
|
||||||
Release:
|
Release:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# We need the RPMs, so add dependency
|
# We need the RPMs, so add dependency
|
||||||
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build]
|
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
# See: https://github.com/actions/download-artifact
|
# See: https://github.com/actions/download-artifact
|
||||||
@@ -161,6 +211,11 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: cc-metric-collector SRPM for UBI 8
|
name: cc-metric-collector SRPM for UBI 8
|
||||||
|
|
||||||
|
- name: Download Ubuntu 20.04 DEB
|
||||||
|
uses: actions/download-artifact@v2
|
||||||
|
with:
|
||||||
|
name: cc-metric-collector DEB for Ubuntu 20.04
|
||||||
|
|
||||||
# The download actions do not publish the name of the downloaded file,
|
# The download actions do not publish the name of the downloaded file,
|
||||||
# so we re-use the job outputs of the parent jobs. The files are all
|
# so we re-use the job outputs of the parent jobs. The files are all
|
||||||
# downloaded to the current folder.
|
# downloaded to the current folder.
|
||||||
@@ -174,14 +229,17 @@ jobs:
|
|||||||
ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}")
|
ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}")
|
||||||
UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}")
|
UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}")
|
||||||
UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}")
|
UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}")
|
||||||
|
U_2004_DEB=$(basename "${{ needs.Ubuntu-focal-build.outputs.deb}}")
|
||||||
echo "ALMA_85_RPM::${ALMA_85_RPM}"
|
echo "ALMA_85_RPM::${ALMA_85_RPM}"
|
||||||
echo "ALMA_85_SRPM::${ALMA_85_SRPM}"
|
echo "ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||||
echo "UBI_8_RPM::${UBI_8_RPM}"
|
echo "UBI_8_RPM::${UBI_8_RPM}"
|
||||||
echo "UBI_8_SRPM::${UBI_8_SRPM}"
|
echo "UBI_8_SRPM::${UBI_8_SRPM}"
|
||||||
|
echo "U_2004_DEB::${U_2004_DEB}"
|
||||||
echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}"
|
echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}"
|
||||||
echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}"
|
echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||||
echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}"
|
echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}"
|
||||||
echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}"
|
echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}"
|
||||||
|
echo "::set-output name=U_2004_DEB::${U_2004_DEB}"
|
||||||
|
|
||||||
# See: https://github.com/softprops/action-gh-release
|
# See: https://github.com/softprops/action-gh-release
|
||||||
- name: Release
|
- name: Release
|
||||||
@@ -194,3 +252,4 @@ jobs:
|
|||||||
${{ steps.files.outputs.ALMA_85_SRPM }}
|
${{ steps.files.outputs.ALMA_85_SRPM }}
|
||||||
${{ steps.files.outputs.UBI_8_RPM }}
|
${{ steps.files.outputs.UBI_8_RPM }}
|
||||||
${{ steps.files.outputs.UBI_8_SRPM }}
|
${{ steps.files.outputs.UBI_8_SRPM }}
|
||||||
|
${{ steps.files.outputs.U_2004_DEB }}
|
||||||
|
2
Makefile
2
Makefile
@@ -22,7 +22,7 @@ GOBIN = $(shell which go)
|
|||||||
.PHONY: all
|
.PHONY: all
|
||||||
all: $(APP)
|
all: $(APP)
|
||||||
|
|
||||||
$(APP): $(GOSRC)
|
$(APP): $(GOSRC) go.mod
|
||||||
make -C collectors
|
make -C collectors
|
||||||
$(GOBIN) get
|
$(GOBIN) get
|
||||||
$(GOBIN) build -o $(APP) $(GOSRC_APP)
|
$(GOBIN) build -o $(APP) $(GOSRC_APP)
|
||||||
|
@@ -15,10 +15,10 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
|
||||||
mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||||
)
|
)
|
||||||
|
|
||||||
type CentralConfigFile struct {
|
type CentralConfigFile struct {
|
||||||
|
@@ -5,7 +5,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"os/user"
|
"os/user"
|
||||||
@@ -14,8 +14,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DEFAULT_BEEGFS_CMD = "beegfs-ctl"
|
const DEFAULT_BEEGFS_CMD = "beegfs-ctl"
|
||||||
@@ -115,7 +115,7 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
//get mounpoint
|
//get mounpoint
|
||||||
buffer, _ := ioutil.ReadFile(string("/proc/mounts"))
|
buffer, _ := os.ReadFile(string("/proc/mounts"))
|
||||||
mounts := strings.Split(string(buffer), "\n")
|
mounts := strings.Split(string(buffer), "\n")
|
||||||
var mountpoints []string
|
var mountpoints []string
|
||||||
for _, line := range mounts {
|
for _, line := range mounts {
|
||||||
@@ -157,9 +157,9 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
|
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
|
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
|
||||||
data, _ := ioutil.ReadAll(cmdStderr)
|
data, _ := io.ReadAll(cmdStderr)
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stderr: \"%s\"\n", string(data))
|
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stderr: \"%s\"\n", string(data))
|
||||||
data, _ = ioutil.ReadAll(cmdStdout)
|
data, _ = io.ReadAll(cmdStdout)
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stdout: \"%s\"\n", string(data))
|
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stdout: \"%s\"\n", string(data))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@@ -5,7 +5,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"os/user"
|
"os/user"
|
||||||
@@ -14,8 +14,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Struct for the collector-specific JSON config
|
// Struct for the collector-specific JSON config
|
||||||
@@ -108,7 +108,7 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
//get mounpoint
|
//get mounpoint
|
||||||
buffer, _ := ioutil.ReadFile(string("/proc/mounts"))
|
buffer, _ := os.ReadFile(string("/proc/mounts"))
|
||||||
mounts := strings.Split(string(buffer), "\n")
|
mounts := strings.Split(string(buffer), "\n")
|
||||||
var mountpoints []string
|
var mountpoints []string
|
||||||
for _, line := range mounts {
|
for _, line := range mounts {
|
||||||
@@ -149,9 +149,9 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
|
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
|
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
|
||||||
data, _ := ioutil.ReadAll(cmdStderr)
|
data, _ := io.ReadAll(cmdStderr)
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stderr: \"%s\"\n", string(data))
|
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stderr: \"%s\"\n", string(data))
|
||||||
data, _ = ioutil.ReadAll(cmdStdout)
|
data, _ = io.ReadAll(cmdStdout)
|
||||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stdout: \"%s\"\n", string(data))
|
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stdout: \"%s\"\n", string(data))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@@ -6,9 +6,9 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Map of all available metric collectors
|
// Map of all available metric collectors
|
||||||
@@ -36,7 +36,9 @@ var AvailableCollectors = map[string]MetricCollector{
|
|||||||
"numastats": new(NUMAStatsCollector),
|
"numastats": new(NUMAStatsCollector),
|
||||||
"beegfs_meta": new(BeegfsMetaCollector),
|
"beegfs_meta": new(BeegfsMetaCollector),
|
||||||
"beegfs_storage": new(BeegfsStorageCollector),
|
"beegfs_storage": new(BeegfsStorageCollector),
|
||||||
|
"rapl": new(RAPLCollector),
|
||||||
"rocm_smi": new(RocmSmiCollector),
|
"rocm_smi": new(RocmSmiCollector),
|
||||||
|
"self": new(SelfCollector),
|
||||||
"schedstat": new(SchedstatCollector),
|
"schedstat": new(SchedstatCollector),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -10,8 +10,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -142,6 +142,11 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if at least one CPU with frequency information was detected
|
||||||
|
if len(m.topology) == 0 {
|
||||||
|
return fmt.Errorf("No CPU frequency info found in %s", cpuInfoFile)
|
||||||
|
}
|
||||||
|
|
||||||
numPhysicalPackageID_int := maxPhysicalPackageID + 1
|
numPhysicalPackageID_int := maxPhysicalPackageID + 1
|
||||||
numPhysicalPackageID := fmt.Sprint(numPhysicalPackageID_int)
|
numPhysicalPackageID := fmt.Sprint(numPhysicalPackageID_int)
|
||||||
numNonHT := fmt.Sprint(numNonHT_int)
|
numNonHT := fmt.Sprint(numNonHT_int)
|
||||||
|
@@ -3,14 +3,14 @@ package collectors
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -23,20 +23,18 @@ type CPUFreqCollectorTopology struct {
|
|||||||
numPhysicalPackages string // number of sockets / packages
|
numPhysicalPackages string // number of sockets / packages
|
||||||
numPhysicalPackages_int int64
|
numPhysicalPackages_int int64
|
||||||
isHT bool
|
isHT bool
|
||||||
numNonHT string // number of non hyperthreading processors
|
numNonHT string // number of non hyper-threading processors
|
||||||
numNonHT_int int64
|
numNonHT_int int64
|
||||||
scalingCurFreqFile string
|
scalingCurFreqFile string
|
||||||
tagSet map[string]string
|
tagSet map[string]string
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// CPUFreqCollector
|
// CPUFreqCollector
|
||||||
// a metric collector to measure the current frequency of the CPUs
|
// a metric collector to measure the current frequency of the CPUs
|
||||||
// as obtained from the hardware (in KHz)
|
// as obtained from the hardware (in KHz)
|
||||||
// Only measure on the first hyper thread
|
// Only measure on the first hyper-thread
|
||||||
//
|
//
|
||||||
// See: https://www.kernel.org/doc/html/latest/admin-guide/pm/cpufreq.html
|
// See: https://www.kernel.org/doc/html/latest/admin-guide/pm/cpufreq.html
|
||||||
//
|
|
||||||
type CPUFreqCollector struct {
|
type CPUFreqCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
topology []CPUFreqCollectorTopology
|
topology []CPUFreqCollectorTopology
|
||||||
@@ -88,7 +86,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
// Read package ID
|
// Read package ID
|
||||||
physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id")
|
physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id")
|
||||||
line, err := ioutil.ReadFile(physicalPackageIDFile)
|
line, err := os.ReadFile(physicalPackageIDFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to read physical package ID from file '%s': %v", physicalPackageIDFile, err)
|
return fmt.Errorf("unable to read physical package ID from file '%s': %v", physicalPackageIDFile, err)
|
||||||
}
|
}
|
||||||
@@ -100,7 +98,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
// Read core ID
|
// Read core ID
|
||||||
coreIDFile := filepath.Join(cpuDir, "topology", "core_id")
|
coreIDFile := filepath.Join(cpuDir, "topology", "core_id")
|
||||||
line, err = ioutil.ReadFile(coreIDFile)
|
line, err = os.ReadFile(coreIDFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to read core ID from file '%s': %v", coreIDFile, err)
|
return fmt.Errorf("unable to read core ID from file '%s': %v", coreIDFile, err)
|
||||||
}
|
}
|
||||||
@@ -126,7 +124,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
t.scalingCurFreqFile = scalingCurFreqFile
|
t.scalingCurFreqFile = scalingCurFreqFile
|
||||||
}
|
}
|
||||||
|
|
||||||
// is processor a hyperthread?
|
// is processor a hyper-thread?
|
||||||
coreSeenBefore := make(map[string]bool)
|
coreSeenBefore := make(map[string]bool)
|
||||||
for i := range m.topology {
|
for i := range m.topology {
|
||||||
t := &m.topology[i]
|
t := &m.topology[i]
|
||||||
@@ -136,23 +134,20 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
coreSeenBefore[globalID] = true
|
coreSeenBefore[globalID] = true
|
||||||
}
|
}
|
||||||
|
|
||||||
// number of non hyper thread cores and packages / sockets
|
// number of non hyper-thread cores and packages / sockets
|
||||||
var numNonHT_int int64 = 0
|
var numNonHT_int int64 = 0
|
||||||
var maxPhysicalPackageID int64 = 0
|
PhysicalPackageIDs := make(map[int64]struct{})
|
||||||
for i := range m.topology {
|
for i := range m.topology {
|
||||||
t := &m.topology[i]
|
t := &m.topology[i]
|
||||||
|
|
||||||
// Update maxPackageID
|
|
||||||
if t.physicalPackageID_int > maxPhysicalPackageID {
|
|
||||||
maxPhysicalPackageID = t.physicalPackageID_int
|
|
||||||
}
|
|
||||||
|
|
||||||
if !t.isHT {
|
if !t.isHT {
|
||||||
numNonHT_int++
|
numNonHT_int++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PhysicalPackageIDs[t.physicalPackageID_int] = struct{}{}
|
||||||
}
|
}
|
||||||
|
|
||||||
numPhysicalPackageID_int := maxPhysicalPackageID + 1
|
numPhysicalPackageID_int := int64(len(PhysicalPackageIDs))
|
||||||
numPhysicalPackageID := fmt.Sprint(numPhysicalPackageID_int)
|
numPhysicalPackageID := fmt.Sprint(numPhysicalPackageID_int)
|
||||||
numNonHT := fmt.Sprint(numNonHT_int)
|
numNonHT := fmt.Sprint(numNonHT_int)
|
||||||
for i := range m.topology {
|
for i := range m.topology {
|
||||||
@@ -168,6 +163,13 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialized
|
||||||
|
cclog.ComponentDebug(
|
||||||
|
m.name,
|
||||||
|
"initialized",
|
||||||
|
numPhysicalPackageID_int, "physical packages,",
|
||||||
|
len(cpuDirs), "CPUs,",
|
||||||
|
numNonHT, "non-hyper-threading CPUs")
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -182,13 +184,13 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
for i := range m.topology {
|
for i := range m.topology {
|
||||||
t := &m.topology[i]
|
t := &m.topology[i]
|
||||||
|
|
||||||
// skip hyperthreads
|
// skip hyper-threads
|
||||||
if t.isHT {
|
if t.isHT {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read current frequency
|
// Read current frequency
|
||||||
line, err := ioutil.ReadFile(t.scalingCurFreqFile)
|
line, err := os.ReadFile(t.scalingCurFreqFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
|
@@ -1,4 +1,5 @@
|
|||||||
## `cpufreq_cpuinfo` collector
|
## `cpufreq_cpuinfo` collector
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"cpufreq": {
|
"cpufreq": {
|
||||||
"exclude_metrics": []
|
"exclude_metrics": []
|
||||||
@@ -8,4 +9,5 @@
|
|||||||
The `cpufreq` collector reads the clock frequency from `/sys/devices/system/cpu/cpu*/cpufreq` and outputs a handful **hwthread** metrics.
|
The `cpufreq` collector reads the clock frequency from `/sys/devices/system/cpu/cpu*/cpufreq` and outputs a handful **hwthread** metrics.
|
||||||
|
|
||||||
Metrics:
|
Metrics:
|
||||||
|
|
||||||
* `cpufreq`
|
* `cpufreq`
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
sysconf "github.com/tklauser/go-sysconf"
|
sysconf "github.com/tklauser/go-sysconf"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
|
|
||||||
## `cpustat` collector
|
## `cpustat` collector
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"cpustat": {
|
"cpustat": {
|
||||||
"exclude_metrics": [
|
"exclude_metrics": [
|
||||||
@@ -8,9 +9,10 @@
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
The `cpustat` collector reads data from `/proc/stats` and outputs a handful **node** and **hwthread** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
The `cpustat` collector reads data from `/proc/stat` and outputs a handful **node** and **hwthread** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||||
|
|
||||||
Metrics:
|
Metrics:
|
||||||
|
|
||||||
* `cpu_user`
|
* `cpu_user`
|
||||||
* `cpu_nice`
|
* `cpu_nice`
|
||||||
* `cpu_system`
|
* `cpu_system`
|
||||||
|
@@ -3,13 +3,13 @@ package collectors
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"io/ioutil"
|
|
||||||
"log"
|
"log"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -53,7 +53,7 @@ func (m *CustomCmdCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, f := range m.config.Files {
|
for _, f := range m.config.Files {
|
||||||
_, err = ioutil.ReadFile(f)
|
_, err = os.ReadFile(f)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
m.files = append(m.files, f)
|
m.files = append(m.files, f)
|
||||||
} else {
|
} else {
|
||||||
@@ -106,7 +106,7 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, file := range m.files {
|
for _, file := range m.files {
|
||||||
buffer, err := ioutil.ReadFile(file)
|
buffer, err := os.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Print(err)
|
log.Print(err)
|
||||||
return
|
return
|
||||||
|
@@ -8,8 +8,8 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// "log"
|
// "log"
|
||||||
|
@@ -5,7 +5,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"os/user"
|
"os/user"
|
||||||
@@ -13,8 +13,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DEFAULT_GPFS_CMD = "mmpmon"
|
const DEFAULT_GPFS_CMD = "mmpmon"
|
||||||
@@ -118,8 +118,8 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
cmd.Stderr = cmdStderr
|
cmd.Stderr = cmdStderr
|
||||||
err := cmd.Run()
|
err := cmd.Run()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
dataStdErr, _ := ioutil.ReadAll(cmdStderr)
|
dataStdErr, _ := io.ReadAll(cmdStderr)
|
||||||
dataStdOut, _ := ioutil.ReadAll(cmdStdout)
|
dataStdOut, _ := io.ReadAll(cmdStdout)
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to execute command \"%s\": %v\n", cmd.String(), err),
|
fmt.Sprintf("Read(): Failed to execute command \"%s\": %v\n", cmd.String(), err),
|
||||||
|
@@ -2,11 +2,10 @@ package collectors
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
@@ -21,6 +20,7 @@ const IB_BASEPATH = "/sys/class/infiniband/"
|
|||||||
type InfinibandCollectorMetric struct {
|
type InfinibandCollectorMetric struct {
|
||||||
path string
|
path string
|
||||||
unit string
|
unit string
|
||||||
|
scale int64
|
||||||
}
|
}
|
||||||
|
|
||||||
type InfinibandCollectorInfo struct {
|
type InfinibandCollectorInfo struct {
|
||||||
@@ -84,7 +84,7 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
|||||||
for _, path := range ibDirs {
|
for _, path := range ibDirs {
|
||||||
|
|
||||||
// Skip, when no LID is assigned
|
// Skip, when no LID is assigned
|
||||||
line, err := ioutil.ReadFile(filepath.Join(path, "lid"))
|
line, err := os.ReadFile(filepath.Join(path, "lid"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -113,10 +113,10 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
|||||||
// Check access to counter files
|
// Check access to counter files
|
||||||
countersDir := filepath.Join(path, "counters")
|
countersDir := filepath.Join(path, "counters")
|
||||||
portCounterFiles := map[string]InfinibandCollectorMetric{
|
portCounterFiles := map[string]InfinibandCollectorMetric{
|
||||||
"ib_recv": {path: filepath.Join(countersDir, "port_rcv_data"), unit: "bytes"},
|
"ib_recv": {path: filepath.Join(countersDir, "port_rcv_data"), unit: "bytes", scale: 4},
|
||||||
"ib_xmit": {path: filepath.Join(countersDir, "port_xmit_data"), unit: "bytes"},
|
"ib_xmit": {path: filepath.Join(countersDir, "port_xmit_data"), unit: "bytes", scale: 4},
|
||||||
"ib_recv_pkts": {path: filepath.Join(countersDir, "port_rcv_packets"), unit: "packets"},
|
"ib_recv_pkts": {path: filepath.Join(countersDir, "port_rcv_packets"), unit: "packets", scale: 1},
|
||||||
"ib_xmit_pkts": {path: filepath.Join(countersDir, "port_xmit_packets"), unit: "packets"},
|
"ib_xmit_pkts": {path: filepath.Join(countersDir, "port_xmit_packets"), unit: "packets", scale: 1},
|
||||||
}
|
}
|
||||||
for _, counter := range portCounterFiles {
|
for _, counter := range portCounterFiles {
|
||||||
err := unix.Access(counter.path, unix.R_OK)
|
err := unix.Access(counter.path, unix.R_OK)
|
||||||
@@ -174,7 +174,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
for counterName, counterDef := range info.portCounterFiles {
|
for counterName, counterDef := range info.portCounterFiles {
|
||||||
|
|
||||||
// Read counter file
|
// Read counter file
|
||||||
line, err := ioutil.ReadFile(counterDef.path)
|
line, err := os.ReadFile(counterDef.path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
@@ -191,6 +191,8 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
fmt.Sprintf("Read(): Failed to convert Infininiband metrice %s='%s' to int64: %v", counterName, data, err))
|
fmt.Sprintf("Read(): Failed to convert Infininiband metrice %s='%s' to int64: %v", counterName, data, err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// Scale raw value
|
||||||
|
v *= counterDef.scale
|
||||||
|
|
||||||
// Send absolut values
|
// Send absolut values
|
||||||
if m.config.SendAbsoluteValues {
|
if m.config.SendAbsoluteValues {
|
||||||
|
@@ -4,8 +4,8 @@ import (
|
|||||||
"bufio"
|
"bufio"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
|
||||||
// "log"
|
// "log"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
@@ -1,51 +1,58 @@
|
|||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const IPMITOOL_PATH = `ipmitool`
|
|
||||||
const IPMISENSORS_PATH = `ipmi-sensors`
|
const IPMISENSORS_PATH = `ipmi-sensors`
|
||||||
|
|
||||||
type IpmiCollectorConfig struct {
|
type IpmiCollector struct {
|
||||||
|
metricCollector
|
||||||
|
config struct {
|
||||||
ExcludeDevices []string `json:"exclude_devices"`
|
ExcludeDevices []string `json:"exclude_devices"`
|
||||||
IpmitoolPath string `json:"ipmitool_path"`
|
IpmitoolPath string `json:"ipmitool_path"`
|
||||||
IpmisensorsPath string `json:"ipmisensors_path"`
|
IpmisensorsPath string `json:"ipmisensors_path"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type IpmiCollector struct {
|
|
||||||
metricCollector
|
|
||||||
//tags map[string]string
|
|
||||||
//matches map[string]string
|
|
||||||
config IpmiCollectorConfig
|
|
||||||
ipmitool string
|
ipmitool string
|
||||||
ipmisensors string
|
ipmisensors string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *IpmiCollector) Init(config json.RawMessage) error {
|
func (m *IpmiCollector) Init(config json.RawMessage) error {
|
||||||
|
// Check if already initialized
|
||||||
|
if m.init {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
m.name = "IpmiCollector"
|
m.name = "IpmiCollector"
|
||||||
m.setup()
|
m.setup()
|
||||||
m.parallel = true
|
m.parallel = true
|
||||||
m.meta = map[string]string{"source": m.name, "group": "IPMI"}
|
m.meta = map[string]string{
|
||||||
m.config.IpmitoolPath = string(IPMITOOL_PATH)
|
"source": m.name,
|
||||||
m.config.IpmisensorsPath = string(IPMISENSORS_PATH)
|
"group": "IPMI",
|
||||||
m.ipmitool = ""
|
}
|
||||||
m.ipmisensors = ""
|
// default path to IPMI tools
|
||||||
|
m.config.IpmitoolPath = "ipmitool"
|
||||||
|
m.config.IpmisensorsPath = "ipmi-sensors"
|
||||||
if len(config) > 0 {
|
if len(config) > 0 {
|
||||||
err := json.Unmarshal(config, &m.config)
|
err := json.Unmarshal(config, &m.config)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Check if executables ipmitool or ipmisensors are found
|
||||||
p, err := exec.LookPath(m.config.IpmitoolPath)
|
p, err := exec.LookPath(m.config.IpmitoolPath)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
m.ipmitool = p
|
m.ipmitool = p
|
||||||
@@ -62,25 +69,33 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
|
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
|
||||||
|
|
||||||
|
// Setup ipmitool command
|
||||||
command := exec.Command(cmd, "sensor")
|
command := exec.Command(cmd, "sensor")
|
||||||
command.Wait()
|
stdout, _ := command.StdoutPipe()
|
||||||
stdout, err := command.Output()
|
errBuf := new(bytes.Buffer)
|
||||||
if err != nil {
|
command.Stderr = errBuf
|
||||||
log.Print(err)
|
|
||||||
|
// start command
|
||||||
|
if err := command.Start(); err != nil {
|
||||||
|
cclog.ComponentError(
|
||||||
|
m.name,
|
||||||
|
fmt.Sprintf("readIpmiTool(): Failed to start command \"%s\": %v", command.String(), err),
|
||||||
|
)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ll := strings.Split(string(stdout), "\n")
|
// Read command output
|
||||||
|
scanner := bufio.NewScanner(stdout)
|
||||||
for _, line := range ll {
|
for scanner.Scan() {
|
||||||
lv := strings.Split(line, "|")
|
lv := strings.Split(scanner.Text(), "|")
|
||||||
if len(lv) < 3 {
|
if len(lv) < 3 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
v, err := strconv.ParseFloat(strings.Trim(lv[1], " "), 64)
|
v, err := strconv.ParseFloat(strings.TrimSpace(lv[1]), 64)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
name := strings.ToLower(strings.Replace(strings.Trim(lv[0], " "), " ", "_", -1))
|
name := strings.ToLower(strings.Replace(strings.TrimSpace(lv[0]), " ", "_", -1))
|
||||||
unit := strings.Trim(lv[2], " ")
|
unit := strings.TrimSpace(lv[2])
|
||||||
if unit == "Volts" {
|
if unit == "Volts" {
|
||||||
unit = "Volts"
|
unit = "Volts"
|
||||||
} else if unit == "degrees C" {
|
} else if unit == "degrees C" {
|
||||||
@@ -98,6 +113,17 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wait for command end
|
||||||
|
if err := command.Wait(); err != nil {
|
||||||
|
errMsg, _ := io.ReadAll(errBuf)
|
||||||
|
cclog.ComponentError(
|
||||||
|
m.name,
|
||||||
|
fmt.Sprintf("readIpmiTool(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err),
|
||||||
|
fmt.Sprintf("readIpmiTool(): command stderr: \"%s\"\n", string(errMsg)),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
|
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
|
||||||
@@ -131,18 +157,18 @@ func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
|
|
||||||
|
// Check if already initialized
|
||||||
|
if !m.init {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if len(m.config.IpmitoolPath) > 0 {
|
if len(m.config.IpmitoolPath) > 0 {
|
||||||
_, err := os.Stat(m.config.IpmitoolPath)
|
|
||||||
if err == nil {
|
|
||||||
m.readIpmiTool(m.config.IpmitoolPath, output)
|
m.readIpmiTool(m.config.IpmitoolPath, output)
|
||||||
}
|
|
||||||
} else if len(m.config.IpmisensorsPath) > 0 {
|
} else if len(m.config.IpmisensorsPath) > 0 {
|
||||||
_, err := os.Stat(m.config.IpmisensorsPath)
|
|
||||||
if err == nil {
|
|
||||||
m.readIpmiSensors(m.config.IpmisensorsPath, output)
|
m.readIpmiSensors(m.config.IpmisensorsPath, output)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
func (m *IpmiCollector) Close() {
|
func (m *IpmiCollector) Close() {
|
||||||
m.init = false
|
m.init = false
|
||||||
|
@@ -11,6 +11,3 @@
|
|||||||
The `ipmistat` collector reads data from `ipmitool` (`ipmitool sensor`) or `ipmi-sensors` (`ipmi-sensors --sdr-cache-recreate --comma-separated-output`).
|
The `ipmistat` collector reads data from `ipmitool` (`ipmitool sensor`) or `ipmi-sensors` (`ipmi-sensors --sdr-cache-recreate --comma-separated-output`).
|
||||||
|
|
||||||
The metrics depend on the output of the underlying tools but contain temperature, power and energy metrics.
|
The metrics depend on the output of the underlying tools but contain temperature, power and energy metrics.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@@ -12,7 +12,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
@@ -24,10 +23,10 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
|
||||||
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
|
||||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -154,12 +153,13 @@ func getBaseFreq() float64 {
|
|||||||
}
|
}
|
||||||
var freq float64 = math.NaN()
|
var freq float64 = math.NaN()
|
||||||
for _, f := range files {
|
for _, f := range files {
|
||||||
buffer, err := ioutil.ReadFile(f)
|
buffer, err := os.ReadFile(f)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
data := strings.Replace(string(buffer), "\n", "", -1)
|
data := strings.Replace(string(buffer), "\n", "", -1)
|
||||||
x, err := strconv.ParseInt(data, 0, 64)
|
x, err := strconv.ParseInt(data, 0, 64)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
freq = float64(x) * 1e6
|
freq = float64(x)
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -168,11 +168,11 @@ func getBaseFreq() float64 {
|
|||||||
C.power_init(0)
|
C.power_init(0)
|
||||||
info := C.get_powerInfo()
|
info := C.get_powerInfo()
|
||||||
if float64(info.baseFrequency) != 0 {
|
if float64(info.baseFrequency) != 0 {
|
||||||
freq = float64(info.baseFrequency) * 1e6
|
freq = float64(info.baseFrequency)
|
||||||
}
|
}
|
||||||
C.power_finalize()
|
C.power_finalize()
|
||||||
}
|
}
|
||||||
return freq
|
return freq * 1e3
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LikwidCollector) Init(config json.RawMessage) error {
|
func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||||
|
@@ -7,6 +7,9 @@ The `likwid` collector is probably the most complicated collector. The LIKWID li
|
|||||||
"likwid": {
|
"likwid": {
|
||||||
"force_overwrite" : false,
|
"force_overwrite" : false,
|
||||||
"invalid_to_zero" : false,
|
"invalid_to_zero" : false,
|
||||||
|
"liblikwid_path" : "/path/to/liblikwid.so",
|
||||||
|
"accessdaemon_path" : "/folder/that/contains/likwid-accessD",
|
||||||
|
"access_mode" : "direct or accessdaemon or perf_event",
|
||||||
"eventsets": [
|
"eventsets": [
|
||||||
{
|
{
|
||||||
"events" : {
|
"events" : {
|
||||||
|
@@ -3,13 +3,13 @@ package collectors
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -72,7 +72,7 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if !m.init {
|
if !m.init {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
buffer, err := ioutil.ReadFile(LOADAVGFILE)
|
buffer, err := os.ReadFile(LOADAVGFILE)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
|
@@ -10,8 +10,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const LUSTRE_SYSFS = `/sys/fs/lustre`
|
const LUSTRE_SYSFS = `/sys/fs/lustre`
|
||||||
|
@@ -12,8 +12,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const MEMSTATFILE = "/proc/meminfo"
|
const MEMSTATFILE = "/proc/meminfo"
|
||||||
@@ -68,7 +68,8 @@ func getStats(filename string) map[string]MemstatStats {
|
|||||||
} else if len(linefields) == 5 {
|
} else if len(linefields) == 5 {
|
||||||
v, err := strconv.ParseFloat(linefields[3], 64)
|
v, err := strconv.ParseFloat(linefields[3], 64)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
stats[strings.Trim(linefields[0], ":")] = MemstatStats{
|
cclog.ComponentDebug("getStats", strings.Trim(linefields[2], ":"), v, linefields[4])
|
||||||
|
stats[strings.Trim(linefields[2], ":")] = MemstatStats{
|
||||||
value: v,
|
value: v,
|
||||||
unit: linefields[4],
|
unit: linefields[4],
|
||||||
}
|
}
|
||||||
@@ -160,7 +161,6 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
if !m.init {
|
if !m.init {
|
||||||
cclog.ComponentPrint(m.name, "Here")
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -188,16 +188,20 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
unit := ""
|
unit := ""
|
||||||
if totalVal, total := stats["MemTotal"]; total {
|
if totalVal, total := stats["MemTotal"]; total {
|
||||||
if freeVal, free := stats["MemFree"]; free {
|
if freeVal, free := stats["MemFree"]; free {
|
||||||
if bufVal, buffers := stats["Buffers"]; buffers {
|
memUsed = totalVal.value - freeVal.value
|
||||||
if cacheVal, cached := stats["Cached"]; cached {
|
|
||||||
memUsed = totalVal.value - (freeVal.value + bufVal.value + cacheVal.value)
|
|
||||||
if len(totalVal.unit) > 0 {
|
if len(totalVal.unit) > 0 {
|
||||||
unit = totalVal.unit
|
unit = totalVal.unit
|
||||||
} else if len(freeVal.unit) > 0 {
|
} else if len(freeVal.unit) > 0 {
|
||||||
unit = freeVal.unit
|
unit = freeVal.unit
|
||||||
} else if len(bufVal.unit) > 0 {
|
}
|
||||||
|
if bufVal, buffers := stats["Buffers"]; buffers {
|
||||||
|
memUsed -= bufVal.value
|
||||||
|
if len(bufVal.unit) > 0 && len(unit) == 0 {
|
||||||
unit = bufVal.unit
|
unit = bufVal.unit
|
||||||
} else if len(cacheVal.unit) > 0 {
|
}
|
||||||
|
if cacheVal, cached := stats["Cached"]; cached {
|
||||||
|
memUsed -= cacheVal.value
|
||||||
|
if len(cacheVal.unit) > 0 && len(unit) == 0 {
|
||||||
unit = cacheVal.unit
|
unit = cacheVal.unit
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -5,7 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MetricCollector interface {
|
type MetricCollector interface {
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const NETSTATFILE = "/proc/net/dev"
|
const NETSTATFILE = "/proc/net/dev"
|
||||||
|
@@ -11,7 +11,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// First part contains the code for the general NfsCollector.
|
// First part contains the code for the general NfsCollector.
|
||||||
|
@@ -10,33 +10,42 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
// Non-Uniform Memory Access (NUMA) policy hit/miss statistics
|
||||||
// Numa policy hit/miss statistics
|
|
||||||
//
|
//
|
||||||
// numa_hit:
|
// numa_hit:
|
||||||
|
//
|
||||||
// A process wanted to allocate memory from this node, and succeeded.
|
// A process wanted to allocate memory from this node, and succeeded.
|
||||||
|
//
|
||||||
// numa_miss:
|
// numa_miss:
|
||||||
|
//
|
||||||
// A process wanted to allocate memory from another node,
|
// A process wanted to allocate memory from another node,
|
||||||
// but ended up with memory from this node.
|
// but ended up with memory from this node.
|
||||||
|
//
|
||||||
// numa_foreign:
|
// numa_foreign:
|
||||||
|
//
|
||||||
// A process wanted to allocate on this node,
|
// A process wanted to allocate on this node,
|
||||||
// but ended up with memory from another node.
|
// but ended up with memory from another node.
|
||||||
|
//
|
||||||
// local_node:
|
// local_node:
|
||||||
|
//
|
||||||
// A process ran on this node's CPU,
|
// A process ran on this node's CPU,
|
||||||
// and got memory from this node.
|
// and got memory from this node.
|
||||||
|
//
|
||||||
// other_node:
|
// other_node:
|
||||||
|
//
|
||||||
// A process ran on a different node's CPU
|
// A process ran on a different node's CPU
|
||||||
// and got memory from this node.
|
// and got memory from this node.
|
||||||
|
//
|
||||||
// interleave_hit:
|
// interleave_hit:
|
||||||
|
//
|
||||||
// Interleaving wanted to allocate from this node
|
// Interleaving wanted to allocate from this node
|
||||||
// and succeeded.
|
// and succeeded.
|
||||||
//
|
//
|
||||||
// See: https://www.kernel.org/doc/html/latest/admin-guide/numastat.html
|
// See: https://www.kernel.org/doc/html/latest/admin-guide/numastat.html
|
||||||
//
|
|
||||||
type NUMAStatsCollectorTopolgy struct {
|
type NUMAStatsCollectorTopolgy struct {
|
||||||
file string
|
file string
|
||||||
tagSet map[string]string
|
tagSet map[string]string
|
||||||
@@ -82,6 +91,8 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialized
|
||||||
|
cclog.ComponentDebug(m.name, "initialized", len(m.topology), "NUMA domains")
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@@ -1,12 +1,14 @@
|
|||||||
|
|
||||||
## `numastat` collector
|
## `numastat` collector
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"numastat": {}
|
"numastats": {}
|
||||||
```
|
```
|
||||||
|
|
||||||
The `numastat` collector reads data from `/sys/devices/system/node/node*/numastat` and outputs a handful **memoryDomain** metrics. See: https://www.kernel.org/doc/html/latest/admin-guide/numastat.html
|
The `numastat` collector reads data from `/sys/devices/system/node/node*/numastat` and outputs a handful **memoryDomain** metrics. See: <https://www.kernel.org/doc/html/latest/admin-guide/numastat.html>
|
||||||
|
|
||||||
Metrics:
|
Metrics:
|
||||||
|
|
||||||
* `numastats_numa_hit`: A process wanted to allocate memory from this node, and succeeded.
|
* `numastats_numa_hit`: A process wanted to allocate memory from this node, and succeeded.
|
||||||
* `numastats_numa_miss`: A process wanted to allocate memory from another node, but ended up with memory from this node.
|
* `numastats_numa_miss`: A process wanted to allocate memory from another node, but ended up with memory from this node.
|
||||||
* `numastats_numa_foreign`: A process wanted to allocate on this node, but ended up with memory from another node.
|
* `numastats_numa_foreign`: A process wanted to allocate on this node, but ended up with memory from another node.
|
||||||
|
@@ -8,8 +8,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
262
collectors/raplMetric.go
Normal file
262
collectors/raplMetric.go
Normal file
@@ -0,0 +1,262 @@
|
|||||||
|
package collectors
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
)
|
||||||
|
|
||||||
|
// running average power limit (RAPL) monitoring attributes for a zone
|
||||||
|
type RAPLZoneInfo struct {
|
||||||
|
// tags describing the RAPL zone:
|
||||||
|
// * zone_name, subzone_name: e.g. psys, dram, core, uncore, package-0
|
||||||
|
// * zone_id: e.g. 0:1 (zone 0 sub zone 1)
|
||||||
|
tags map[string]string
|
||||||
|
energyFilepath string // path to a file containing the zones current energy counter in micro joules
|
||||||
|
energy int64 // current reading of the energy counter in micro joules
|
||||||
|
energyTimestamp time.Time // timestamp when energy counter was read
|
||||||
|
maxEnergyRange int64 // Range of the above energy counter in micro-joules
|
||||||
|
}
|
||||||
|
|
||||||
|
type RAPLCollector struct {
|
||||||
|
metricCollector
|
||||||
|
config struct {
|
||||||
|
// Exclude IDs for RAPL zones, e.g.
|
||||||
|
// * 0 for zone 0
|
||||||
|
// * 0:1 for zone 0 subzone 1
|
||||||
|
ExcludeByID []string `json:"exclude_device_by_id,omitempty"`
|
||||||
|
// Exclude names for RAPL zones, e.g. psys, dram, core, uncore, package-0
|
||||||
|
ExcludeByName []string `json:"exclude_device_by_name,omitempty"`
|
||||||
|
}
|
||||||
|
RAPLZoneInfo []RAPLZoneInfo
|
||||||
|
meta map[string]string // default meta information
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init initializes the running average power limit (RAPL) collector
|
||||||
|
func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||||
|
|
||||||
|
// Check if already initialized
|
||||||
|
if m.init {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error = nil
|
||||||
|
m.name = "RAPLCollector"
|
||||||
|
m.setup()
|
||||||
|
m.parallel = true
|
||||||
|
m.meta = map[string]string{
|
||||||
|
"source": m.name,
|
||||||
|
"group": "energy",
|
||||||
|
"unit": "Watt",
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read in the JSON configuration
|
||||||
|
if len(config) > 0 {
|
||||||
|
err = json.Unmarshal(config, &m.config)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure excluded RAPL zones
|
||||||
|
isIDExcluded := make(map[string]bool)
|
||||||
|
if m.config.ExcludeByID != nil {
|
||||||
|
for _, ID := range m.config.ExcludeByID {
|
||||||
|
isIDExcluded[ID] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
isNameExcluded := make(map[string]bool)
|
||||||
|
if m.config.ExcludeByName != nil {
|
||||||
|
for _, name := range m.config.ExcludeByName {
|
||||||
|
isNameExcluded[name] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// readZoneInfo reads RAPL monitoring attributes for a zone given by zonePath
|
||||||
|
// See: https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes
|
||||||
|
readZoneInfo := func(zonePath string) (z struct {
|
||||||
|
name string // zones name e.g. psys, dram, core, uncore, package-0
|
||||||
|
energyFilepath string // path to a file containing the zones current energy counter in micro joules
|
||||||
|
energy int64 // current reading of the energy counter in micro joules
|
||||||
|
energyTimestamp time.Time // timestamp when energy counter was read
|
||||||
|
maxEnergyRange int64 // Range of the above energy counter in micro-joules
|
||||||
|
ok bool // Are all information available?
|
||||||
|
}) {
|
||||||
|
// zones name e.g. psys, dram, core, uncore, package-0
|
||||||
|
foundName := false
|
||||||
|
if v, err :=
|
||||||
|
os.ReadFile(
|
||||||
|
filepath.Join(zonePath, "name")); err == nil {
|
||||||
|
foundName = true
|
||||||
|
z.name = strings.TrimSpace(string(v))
|
||||||
|
}
|
||||||
|
|
||||||
|
// path to a file containing the zones current energy counter in micro joules
|
||||||
|
z.energyFilepath = filepath.Join(zonePath, "energy_uj")
|
||||||
|
|
||||||
|
// current reading of the energy counter in micro joules
|
||||||
|
foundEnergy := false
|
||||||
|
if v, err := os.ReadFile(z.energyFilepath); err == nil {
|
||||||
|
// timestamp when energy counter was read
|
||||||
|
z.energyTimestamp = time.Now()
|
||||||
|
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||||
|
foundEnergy = true
|
||||||
|
z.energy = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Range of the above energy counter in micro-joules
|
||||||
|
foundMaxEnergyRange := false
|
||||||
|
if v, err :=
|
||||||
|
os.ReadFile(
|
||||||
|
filepath.Join(zonePath, "max_energy_range_uj")); err == nil {
|
||||||
|
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||||
|
foundMaxEnergyRange = true
|
||||||
|
z.maxEnergyRange = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Are all information available?
|
||||||
|
z.ok = foundName && foundEnergy && foundMaxEnergyRange
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
powerCapPrefix := "/sys/devices/virtual/powercap"
|
||||||
|
controlType := "intel-rapl"
|
||||||
|
controlTypePath := filepath.Join(powerCapPrefix, controlType)
|
||||||
|
|
||||||
|
// Find all RAPL zones
|
||||||
|
zonePrefix := filepath.Join(controlTypePath, controlType+":")
|
||||||
|
zonesPath, err := filepath.Glob(zonePrefix + "*")
|
||||||
|
if err != nil || zonesPath == nil {
|
||||||
|
return fmt.Errorf("unable to find any zones under %s", controlTypePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, zonePath := range zonesPath {
|
||||||
|
zoneID := strings.TrimPrefix(zonePath, zonePrefix)
|
||||||
|
z := readZoneInfo(zonePath)
|
||||||
|
if z.ok &&
|
||||||
|
!isIDExcluded[zoneID] &&
|
||||||
|
!isNameExcluded[z.name] {
|
||||||
|
|
||||||
|
// Add RAPL monitoring attributes for a zone
|
||||||
|
m.RAPLZoneInfo =
|
||||||
|
append(
|
||||||
|
m.RAPLZoneInfo,
|
||||||
|
RAPLZoneInfo{
|
||||||
|
tags: map[string]string{
|
||||||
|
"id": zoneID,
|
||||||
|
"zone_name": z.name,
|
||||||
|
},
|
||||||
|
energyFilepath: z.energyFilepath,
|
||||||
|
energy: z.energy,
|
||||||
|
energyTimestamp: z.energyTimestamp,
|
||||||
|
maxEnergyRange: z.maxEnergyRange,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// find all sub zones for the given zone
|
||||||
|
subZonePrefix := filepath.Join(zonePath, controlType+":"+zoneID+":")
|
||||||
|
subZonesPath, err := filepath.Glob(subZonePrefix + "*")
|
||||||
|
if err != nil || subZonesPath == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, subZonePath := range subZonesPath {
|
||||||
|
subZoneID := strings.TrimPrefix(subZonePath, subZonePrefix)
|
||||||
|
sz := readZoneInfo(subZonePath)
|
||||||
|
if len(zoneID) > 0 && len(z.name) > 0 &&
|
||||||
|
sz.ok &&
|
||||||
|
!isIDExcluded[zoneID+":"+subZoneID] &&
|
||||||
|
!isNameExcluded[sz.name] {
|
||||||
|
m.RAPLZoneInfo =
|
||||||
|
append(
|
||||||
|
m.RAPLZoneInfo,
|
||||||
|
RAPLZoneInfo{
|
||||||
|
tags: map[string]string{
|
||||||
|
"id": zoneID + ":" + subZoneID,
|
||||||
|
"zone_name": z.name,
|
||||||
|
"sub_zone_name": sz.name,
|
||||||
|
},
|
||||||
|
energyFilepath: sz.energyFilepath,
|
||||||
|
energy: sz.energy,
|
||||||
|
energyTimestamp: sz.energyTimestamp,
|
||||||
|
maxEnergyRange: sz.maxEnergyRange,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if m.RAPLZoneInfo == nil {
|
||||||
|
return fmt.Errorf("no running average power limit (RAPL) device found in %s", controlTypePath)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialized
|
||||||
|
cclog.ComponentDebug(
|
||||||
|
m.name,
|
||||||
|
"initialized",
|
||||||
|
len(m.RAPLZoneInfo),
|
||||||
|
"zones with running average power limit (RAPL) monitoring attributes")
|
||||||
|
m.init = true
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read reads running average power limit (RAPL) monitoring attributes for all initialized zones
|
||||||
|
// See: https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes
|
||||||
|
func (m *RAPLCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
|
|
||||||
|
for i := range m.RAPLZoneInfo {
|
||||||
|
p := &m.RAPLZoneInfo[i]
|
||||||
|
|
||||||
|
// Read current value of the energy counter in micro joules
|
||||||
|
if v, err := os.ReadFile(p.energyFilepath); err == nil {
|
||||||
|
energyTimestamp := time.Now()
|
||||||
|
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||||
|
energy := i
|
||||||
|
|
||||||
|
// Compute average power (Δ energy / Δ time)
|
||||||
|
energyDiff := energy - p.energy
|
||||||
|
if energyDiff < 0 {
|
||||||
|
// Handle overflow:
|
||||||
|
// ( p.maxEnergyRange - p.energy ) + energy
|
||||||
|
// = p.maxEnergyRange + ( energy - p.energy )
|
||||||
|
// = p.maxEnergyRange + diffEnergy
|
||||||
|
energyDiff += p.maxEnergyRange
|
||||||
|
}
|
||||||
|
timeDiff := energyTimestamp.Sub(p.energyTimestamp)
|
||||||
|
averagePower := float64(energyDiff) / float64(timeDiff.Microseconds())
|
||||||
|
|
||||||
|
y, err := lp.New(
|
||||||
|
"rapl_average_power",
|
||||||
|
p.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]interface{}{"value": averagePower},
|
||||||
|
energyTimestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save current energy counter state
|
||||||
|
p.energy = energy
|
||||||
|
p.energyTimestamp = energyTimestamp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close closes running average power limit (RAPL) metric collector
|
||||||
|
func (m *RAPLCollector) Close() {
|
||||||
|
// Unset flag
|
||||||
|
m.init = false
|
||||||
|
}
|
18
collectors/raplMetric.md
Normal file
18
collectors/raplMetric.md
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Running average power limit (RAPL) metric collector
|
||||||
|
|
||||||
|
This collector reads running average power limit (RAPL) monitoring attributes to compute average power consumption metrics. See <https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes>.
|
||||||
|
|
||||||
|
The Likwid metric collector provides similar functionality.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
```json
|
||||||
|
"rapl": {
|
||||||
|
"exclude_device_by_id": ["0:1", "0:2"],
|
||||||
|
"exclude_device_by_name": ["psys"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Metrics
|
||||||
|
|
||||||
|
* `rapl_average_power`: average power consumption in Watt. The average is computed over the entire runtime from the last measurement to the current measurement
|
@@ -6,8 +6,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"github.com/ClusterCockpit/go-rocm-smi/pkg/rocm_smi"
|
"github.com/ClusterCockpit/go-rocm-smi/pkg/rocm_smi"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -66,14 +66,14 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
ret := rocm_smi.Init()
|
ret := rocm_smi.Init()
|
||||||
if ret != rocm_smi.STATUS_SUCCESS {
|
if ret != rocm_smi.STATUS_SUCCESS {
|
||||||
err = errors.New("Failed to initialize ROCm SMI library")
|
err = errors.New("failed to initialize ROCm SMI library")
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
numDevs, ret := rocm_smi.NumMonitorDevices()
|
numDevs, ret := rocm_smi.NumMonitorDevices()
|
||||||
if ret != rocm_smi.STATUS_SUCCESS {
|
if ret != rocm_smi.STATUS_SUCCESS {
|
||||||
err = errors.New("Failed to get number of GPUs from ROCm SMI library")
|
err = errors.New("failed to get number of GPUs from ROCm SMI library")
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -98,14 +98,14 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
device, ret := rocm_smi.DeviceGetHandleByIndex(i)
|
device, ret := rocm_smi.DeviceGetHandleByIndex(i)
|
||||||
if ret != rocm_smi.STATUS_SUCCESS {
|
if ret != rocm_smi.STATUS_SUCCESS {
|
||||||
err = fmt.Errorf("Failed to get handle for GPU %d", i)
|
err = fmt.Errorf("failed to get handle for GPU %d", i)
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
pciInfo, ret := rocm_smi.DeviceGetPciInfo(device)
|
pciInfo, ret := rocm_smi.DeviceGetPciInfo(device)
|
||||||
if ret != rocm_smi.STATUS_SUCCESS {
|
if ret != rocm_smi.STATUS_SUCCESS {
|
||||||
err = fmt.Errorf("Failed to get PCI information for GPU %d", i)
|
err = fmt.Errorf("failed to get PCI information for GPU %d", i)
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@@ -4,8 +4,8 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// These are the fields we read from the JSON configuration
|
// These are the fields we read from the JSON configuration
|
||||||
@@ -17,7 +17,7 @@ type SampleCollectorConfig struct {
|
|||||||
// defined by metricCollector (name, init, ...)
|
// defined by metricCollector (name, init, ...)
|
||||||
type SampleCollector struct {
|
type SampleCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
config SampleTimerCollectorConfig // the configuration structure
|
config SampleCollectorConfig // the configuration structure
|
||||||
meta map[string]string // default meta information
|
meta map[string]string // default meta information
|
||||||
tags map[string]string // default tags
|
tags map[string]string // default tags
|
||||||
}
|
}
|
||||||
@@ -36,14 +36,14 @@ func (m *SampleCollector) Init(config json.RawMessage) error {
|
|||||||
// This is for later use, also call it early
|
// This is for later use, also call it early
|
||||||
m.setup()
|
m.setup()
|
||||||
// Tell whether the collector should be run in parallel with others (reading files, ...)
|
// Tell whether the collector should be run in parallel with others (reading files, ...)
|
||||||
// or it should be run serially, mostly for collectors acutally doing measurements
|
// or it should be run serially, mostly for collectors actually doing measurements
|
||||||
// because they should not measure the execution of the other collectors
|
// because they should not measure the execution of the other collectors
|
||||||
m.parallel = true
|
m.parallel = true
|
||||||
// Define meta information sent with each metric
|
// Define meta information sent with each metric
|
||||||
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
||||||
m.meta = map[string]string{"source": m.name, "group": "SAMPLE"}
|
m.meta = map[string]string{"source": m.name, "group": "SAMPLE"}
|
||||||
// Define tags sent with each metric
|
// Define tags sent with each metric
|
||||||
// The 'type' tag is always needed, it defines the granulatity of the metric
|
// The 'type' tag is always needed, it defines the granularity of the metric
|
||||||
// node -> whole system
|
// node -> whole system
|
||||||
// socket -> CPU socket (requires socket ID as 'type-id' tag)
|
// socket -> CPU socket (requires socket ID as 'type-id' tag)
|
||||||
// die -> CPU die (requires CPU die ID as 'type-id' tag)
|
// die -> CPU die (requires CPU die ID as 'type-id' tag)
|
||||||
|
@@ -5,8 +5,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// These are the fields we read from the JSON configuration
|
// These are the fields we read from the JSON configuration
|
||||||
@@ -38,7 +38,7 @@ func (m *SampleTimerCollector) Init(name string, config json.RawMessage) error {
|
|||||||
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
||||||
m.meta = map[string]string{"source": m.name, "group": "SAMPLE"}
|
m.meta = map[string]string{"source": m.name, "group": "SAMPLE"}
|
||||||
// Define tags sent with each metric
|
// Define tags sent with each metric
|
||||||
// The 'type' tag is always needed, it defines the granulatity of the metric
|
// The 'type' tag is always needed, it defines the granularity of the metric
|
||||||
// node -> whole system
|
// node -> whole system
|
||||||
// socket -> CPU socket (requires socket ID as 'type-id' tag)
|
// socket -> CPU socket (requires socket ID as 'type-id' tag)
|
||||||
// cpu -> single CPU hardware thread (requires cpu ID as 'type-id' tag)
|
// cpu -> single CPU hardware thread (requires cpu ID as 'type-id' tag)
|
||||||
@@ -60,7 +60,7 @@ func (m *SampleTimerCollector) Init(name string, config json.RawMessage) error {
|
|||||||
|
|
||||||
// Storage for output channel
|
// Storage for output channel
|
||||||
m.output = nil
|
m.output = nil
|
||||||
// Mangement channel for the timer function.
|
// Management channel for the timer function.
|
||||||
m.done = make(chan bool)
|
m.done = make(chan bool)
|
||||||
// Create the own ticker
|
// Create the own ticker
|
||||||
m.ticker = time.NewTicker(m.interval)
|
m.ticker = time.NewTicker(m.interval)
|
||||||
@@ -94,7 +94,7 @@ func (m *SampleTimerCollector) ReadMetrics(timestamp time.Time) {
|
|||||||
|
|
||||||
value := 1.0
|
value := 1.0
|
||||||
|
|
||||||
// If you want to measure something for a specific amout of time, use interval
|
// If you want to measure something for a specific amount of time, use interval
|
||||||
// start := readState()
|
// start := readState()
|
||||||
// time.Sleep(interval)
|
// time.Sleep(interval)
|
||||||
// stop := readState()
|
// stop := readState()
|
||||||
|
@@ -1,17 +1,17 @@
|
|||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"bufio"
|
|
||||||
"time"
|
|
||||||
"os"
|
|
||||||
"strings"
|
|
||||||
"strconv"
|
|
||||||
"math"
|
"math"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const SCHEDSTATFILE = `/proc/schedstat`
|
const SCHEDSTATFILE = `/proc/schedstat`
|
||||||
@@ -88,7 +88,6 @@ func (m *SchedstatCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Save current timestamp
|
// Save current timestamp
|
||||||
m.lastTimestamp = time.Now()
|
m.lastTimestamp = time.Now()
|
||||||
|
|
||||||
|
144
collectors/selfMetric.go
Normal file
144
collectors/selfMetric.go
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package collectors
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"runtime"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SelfCollectorConfig struct {
|
||||||
|
MemStats bool `json:"read_mem_stats"`
|
||||||
|
GoRoutines bool `json:"read_goroutines"`
|
||||||
|
CgoCalls bool `json:"read_cgo_calls"`
|
||||||
|
Rusage bool `json:"read_rusage"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type SelfCollector struct {
|
||||||
|
metricCollector
|
||||||
|
config SelfCollectorConfig // the configuration structure
|
||||||
|
meta map[string]string // default meta information
|
||||||
|
tags map[string]string // default tags
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SelfCollector) Init(config json.RawMessage) error {
|
||||||
|
var err error = nil
|
||||||
|
m.name = "SelfCollector"
|
||||||
|
m.setup()
|
||||||
|
m.parallel = true
|
||||||
|
m.meta = map[string]string{"source": m.name, "group": "Self"}
|
||||||
|
m.tags = map[string]string{"type": "node"}
|
||||||
|
if len(config) > 0 {
|
||||||
|
err = json.Unmarshal(config, &m.config)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.init = true
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SelfCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
|
timestamp := time.Now()
|
||||||
|
|
||||||
|
if m.config.MemStats {
|
||||||
|
var memstats runtime.MemStats
|
||||||
|
runtime.ReadMemStats(&memstats)
|
||||||
|
|
||||||
|
y, err := lp.New("total_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.TotalAlloc}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapAlloc}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_sys", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapSys}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_idle", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapIdle}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_inuse", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapInuse}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_released", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapReleased}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_objects", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapObjects}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.config.GoRoutines {
|
||||||
|
y, err := lp.New("num_goroutines", m.tags, m.meta, map[string]interface{}{"value": runtime.NumGoroutine()}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.config.CgoCalls {
|
||||||
|
y, err := lp.New("num_cgo_calls", m.tags, m.meta, map[string]interface{}{"value": runtime.NumCgoCall()}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.config.Rusage {
|
||||||
|
var rusage syscall.Rusage
|
||||||
|
err := syscall.Getrusage(syscall.RUSAGE_SELF, &rusage)
|
||||||
|
if err == nil {
|
||||||
|
sec, nsec := rusage.Utime.Unix()
|
||||||
|
t := float64(sec) + (float64(nsec) * 1e-9)
|
||||||
|
y, err := lp.New("rusage_user_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "seconds")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
sec, nsec = rusage.Stime.Unix()
|
||||||
|
t = float64(sec) + (float64(nsec) * 1e-9)
|
||||||
|
y, err = lp.New("rusage_system_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "seconds")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_vol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nvcsw}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_invol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nivcsw}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_signals", m.tags, m.meta, map[string]interface{}{"value": rusage.Nsignals}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_major_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Majflt}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_minor_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Minflt}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SelfCollector) Close() {
|
||||||
|
m.init = false
|
||||||
|
}
|
34
collectors/selfMetric.md
Normal file
34
collectors/selfMetric.md
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
## `self` collector
|
||||||
|
|
||||||
|
```json
|
||||||
|
"self": {
|
||||||
|
"read_mem_stats" : true,
|
||||||
|
"read_goroutines" : true,
|
||||||
|
"read_cgo_calls" : true,
|
||||||
|
"read_rusage" : true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `self` collector reads the data from the `runtime` and `syscall` packages, so monitors the execution of the cc-metric-collector itself.
|
||||||
|
|
||||||
|
Metrics:
|
||||||
|
* If `read_mem_stats == true`:
|
||||||
|
* `total_alloc`: The metric reports cumulative bytes allocated for heap objects.
|
||||||
|
* `heap_alloc`: The metric reports bytes of allocated heap objects.
|
||||||
|
* `heap_sys`: The metric reports bytes of heap memory obtained from the OS.
|
||||||
|
* `heap_idle`: The metric reports bytes in idle (unused) spans.
|
||||||
|
* `heap_inuse`: The metric reports bytes in in-use spans.
|
||||||
|
* `heap_released`: The metric reports bytes of physical memory returned to the OS.
|
||||||
|
* `heap_objects`: The metric reports the number of allocated heap objects.
|
||||||
|
* If `read_goroutines == true`:
|
||||||
|
* `num_goroutines`: The metric reports the number of goroutines that currently exist.
|
||||||
|
* If `read_cgo_calls == true`:
|
||||||
|
* `num_cgo_calls`: The metric reports the number of cgo calls made by the current process.
|
||||||
|
* If `read_rusage == true`:
|
||||||
|
* `rusage_user_time`: The metric reports the amount of time that this process has been scheduled in user mode.
|
||||||
|
* `rusage_system_time`: The metric reports the amount of time that this process has been scheduled in kernel mode.
|
||||||
|
* `rusage_vol_ctx_switch`: The metric reports the amount of voluntary context switches.
|
||||||
|
* `rusage_invol_ctx_switch`: The metric reports the amount of involuntary context switches.
|
||||||
|
* `rusage_signals`: The metric reports the number of signals received.
|
||||||
|
* `rusage_major_pgfaults`: The metric reports the number of major faults the process has made which have required loading a memory page from disk.
|
||||||
|
* `rusage_minor_pgfaults`: The metric reports the number of minor faults the process has made which have not required loading a memory page from disk.
|
@@ -3,14 +3,14 @@ package collectors
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
// See: https://www.kernel.org/doc/html/latest/hwmon/sysfs-interface.html
|
// See: https://www.kernel.org/doc/html/latest/hwmon/sysfs-interface.html
|
||||||
@@ -83,14 +83,14 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
// sensor name
|
// sensor name
|
||||||
nameFile := filepath.Join(filepath.Dir(file), "name")
|
nameFile := filepath.Join(filepath.Dir(file), "name")
|
||||||
name, err := ioutil.ReadFile(nameFile)
|
name, err := os.ReadFile(nameFile)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
sensor.name = strings.TrimSpace(string(name))
|
sensor.name = strings.TrimSpace(string(name))
|
||||||
}
|
}
|
||||||
|
|
||||||
// sensor label
|
// sensor label
|
||||||
labelFile := strings.TrimSuffix(file, "_input") + "_label"
|
labelFile := strings.TrimSuffix(file, "_input") + "_label"
|
||||||
label, err := ioutil.ReadFile(labelFile)
|
label, err := os.ReadFile(labelFile)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
sensor.label = strings.TrimSpace(string(label))
|
sensor.label = strings.TrimSpace(string(label))
|
||||||
}
|
}
|
||||||
@@ -117,7 +117,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Sensor file
|
// Sensor file
|
||||||
_, err = ioutil.ReadFile(file)
|
_, err = os.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -139,7 +139,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
|||||||
// max temperature
|
// max temperature
|
||||||
if m.config.ReportMaxTemp {
|
if m.config.ReportMaxTemp {
|
||||||
maxTempFile := strings.TrimSuffix(file, "_input") + "_max"
|
maxTempFile := strings.TrimSuffix(file, "_input") + "_max"
|
||||||
if buffer, err := ioutil.ReadFile(maxTempFile); err == nil {
|
if buffer, err := os.ReadFile(maxTempFile); err == nil {
|
||||||
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
|
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
|
||||||
sensor.maxTempName = strings.Replace(sensor.metricName, "temp", "max_temp", 1)
|
sensor.maxTempName = strings.Replace(sensor.metricName, "temp", "max_temp", 1)
|
||||||
sensor.maxTemp = x / 1000
|
sensor.maxTemp = x / 1000
|
||||||
@@ -150,7 +150,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
|||||||
// critical temperature
|
// critical temperature
|
||||||
if m.config.ReportCriticalTemp {
|
if m.config.ReportCriticalTemp {
|
||||||
criticalTempFile := strings.TrimSuffix(file, "_input") + "_crit"
|
criticalTempFile := strings.TrimSuffix(file, "_input") + "_crit"
|
||||||
if buffer, err := ioutil.ReadFile(criticalTempFile); err == nil {
|
if buffer, err := os.ReadFile(criticalTempFile); err == nil {
|
||||||
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
|
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
|
||||||
sensor.critTempName = strings.Replace(sensor.metricName, "temp", "crit_temp", 1)
|
sensor.critTempName = strings.Replace(sensor.metricName, "temp", "crit_temp", 1)
|
||||||
sensor.critTemp = x / 1000
|
sensor.critTemp = x / 1000
|
||||||
@@ -175,7 +175,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
|
|
||||||
for _, sensor := range m.sensors {
|
for _, sensor := range m.sensors {
|
||||||
// Read sensor file
|
// Read sensor file
|
||||||
buffer, err := ioutil.ReadFile(sensor.file)
|
buffer, err := os.ReadFile(sensor.file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
|
@@ -9,7 +9,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const MAX_NUM_PROCS = 10
|
const MAX_NUM_PROCS = 10
|
||||||
|
28
go.mod
28
go.mod
@@ -6,33 +6,35 @@ require (
|
|||||||
github.com/ClusterCockpit/cc-units v0.3.0
|
github.com/ClusterCockpit/cc-units v0.3.0
|
||||||
github.com/ClusterCockpit/go-rocm-smi v0.3.0
|
github.com/ClusterCockpit/go-rocm-smi v0.3.0
|
||||||
github.com/NVIDIA/go-nvml v0.11.6-0
|
github.com/NVIDIA/go-nvml v0.11.6-0
|
||||||
github.com/PaesslerAG/gval v1.2.0
|
github.com/PaesslerAG/gval v1.2.1
|
||||||
github.com/gorilla/mux v1.8.0
|
github.com/gorilla/mux v1.8.0
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.9.1
|
github.com/influxdata/influxdb-client-go/v2 v2.12.0
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
|
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
|
||||||
github.com/nats-io/nats.go v1.16.0
|
github.com/nats-io/nats.go v1.20.0
|
||||||
github.com/prometheus/client_golang v1.12.2
|
github.com/prometheus/client_golang v1.14.0
|
||||||
github.com/stmcginnis/gofish v0.13.0
|
github.com/stmcginnis/gofish v0.13.0
|
||||||
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e
|
github.com/tklauser/go-sysconf v0.3.11
|
||||||
|
golang.org/x/sys v0.2.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||||
github.com/beorn7/perks v1.0.1 // indirect
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
github.com/cespare/xxhash/v2 v2.1.2 // indirect
|
github.com/cespare/xxhash/v2 v2.1.2 // indirect
|
||||||
github.com/deepmap/oapi-codegen v1.11.0 // indirect
|
github.com/deepmap/oapi-codegen v1.12.3 // indirect
|
||||||
github.com/golang/protobuf v1.5.2 // indirect
|
github.com/golang/protobuf v1.5.2 // indirect
|
||||||
github.com/google/uuid v1.3.0 // indirect
|
github.com/google/uuid v1.3.0 // indirect
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
|
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
||||||
github.com/nats-io/nats-server/v2 v2.8.4 // indirect
|
github.com/nats-io/nats-server/v2 v2.8.4 // indirect
|
||||||
github.com/nats-io/nkeys v0.3.0 // indirect
|
github.com/nats-io/nkeys v0.3.0 // indirect
|
||||||
github.com/nats-io/nuid v1.0.1 // indirect
|
github.com/nats-io/nuid v1.0.1 // indirect
|
||||||
github.com/pkg/errors v0.9.1 // indirect
|
github.com/pkg/errors v0.9.1 // indirect
|
||||||
github.com/prometheus/client_model v0.2.0 // indirect
|
github.com/prometheus/client_model v0.3.0 // indirect
|
||||||
github.com/prometheus/common v0.37.0 // indirect
|
github.com/prometheus/common v0.37.0 // indirect
|
||||||
github.com/prometheus/procfs v0.7.3 // indirect
|
github.com/prometheus/procfs v0.8.0 // indirect
|
||||||
github.com/shopspring/decimal v1.3.1 // indirect
|
github.com/shopspring/decimal v1.3.1 // indirect
|
||||||
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect
|
github.com/tklauser/numcpus v0.6.0 // indirect
|
||||||
golang.org/x/net v0.0.0-20220708220712-1185a9018129 // indirect
|
golang.org/x/crypto v0.3.0 // indirect
|
||||||
google.golang.org/protobuf v1.28.0 // indirect
|
golang.org/x/net v0.2.0 // indirect
|
||||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
google.golang.org/protobuf v1.28.1 // indirect
|
||||||
)
|
)
|
||||||
|
5
go.sum
5
go.sum
@@ -287,6 +287,10 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
|
|||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
|
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
|
||||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw=
|
||||||
|
github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk=
|
||||||
|
github.com/tklauser/numcpus v0.4.0 h1:E53Dm1HjH1/R2/aoCtXtPgzmElmn51aOkhCFSuZq//o=
|
||||||
|
github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ=
|
||||||
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
|
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
|
||||||
github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=
|
github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=
|
||||||
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
|
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
|
||||||
@@ -445,6 +449,7 @@ golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBc
|
|||||||
golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220513210249-45d2b4557a2a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220513210249-45d2b4557a2a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e h1:NHvCuwuS43lGnYhten69ZWqi2QOj/CiDNcKbVqwVoew=
|
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e h1:NHvCuwuS43lGnYhten69ZWqi2QOj/CiDNcKbVqwVoew=
|
||||||
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
@@ -9,10 +9,10 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||||
|
|
||||||
"github.com/PaesslerAG/gval"
|
"github.com/PaesslerAG/gval"
|
||||||
)
|
)
|
||||||
|
@@ -8,8 +8,8 @@ import (
|
|||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||||
)
|
)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -4,11 +4,11 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
|
||||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||||
)
|
)
|
||||||
|
|
||||||
type metricCachePeriod struct {
|
type metricCachePeriod struct {
|
||||||
|
@@ -7,11 +7,11 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
|
||||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||||
units "github.com/ClusterCockpit/cc-units"
|
units "github.com/ClusterCockpit/cc-units"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -10,7 +10,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
cclogger "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclogger "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
const SYSFS_NUMABASE = `/sys/devices/system/node`
|
const SYSFS_NUMABASE = `/sys/devices/system/node`
|
@@ -3,7 +3,7 @@ package multiChanTicker
|
|||||||
import (
|
import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
type multiChanTicker struct {
|
type multiChanTicker struct {
|
@@ -21,5 +21,30 @@
|
|||||||
"endpoint": "https://my-endpoint-2"
|
"endpoint": "https://my-endpoint-2"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"ipmi_recv": {
|
||||||
|
"type": "ipmi",
|
||||||
|
"exclude_metrics": [
|
||||||
|
"fan_speed",
|
||||||
|
"voltage"
|
||||||
|
],
|
||||||
|
"client_config": [
|
||||||
|
{
|
||||||
|
"username": "username-1",
|
||||||
|
"password": "password-1",
|
||||||
|
"endpoint": "ipmi-sensors://my-endpoint-1",
|
||||||
|
"host_list": [
|
||||||
|
"my-host-1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"username": "username-2",
|
||||||
|
"password": "password-2",
|
||||||
|
"endpoint": "ipmi-sensors://my-endpoint-2",
|
||||||
|
"host_list": [
|
||||||
|
"my-host-2"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
164
receivers/appMetricReceiver.go
Normal file
164
receivers/appMetricReceiver.go
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
package receivers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
influx "github.com/influxdata/line-protocol"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SampleReceiver configuration: receiver type, listen address, port
|
||||||
|
type AppMetricReceiverConfig struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
SocketFile string `json:"socket_file"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type AppMetricReceiver struct {
|
||||||
|
receiver
|
||||||
|
config AppMetricReceiverConfig
|
||||||
|
|
||||||
|
// Storage for static information
|
||||||
|
meta map[string]string
|
||||||
|
// Use in case of own go routine
|
||||||
|
done chan bool
|
||||||
|
wg sync.WaitGroup
|
||||||
|
// Influx stuff
|
||||||
|
handler *influx.MetricHandler
|
||||||
|
parser *influx.Parser
|
||||||
|
// WaitGroup for individual connections
|
||||||
|
connWg sync.WaitGroup
|
||||||
|
listener net.Listener
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AppMetricReceiver) newConnection(conn net.Conn) {
|
||||||
|
//defer conn.Close()
|
||||||
|
//defer wg.Done()
|
||||||
|
|
||||||
|
buffer, err := bufio.NewReader(conn).ReadBytes('\n')
|
||||||
|
if err != nil {
|
||||||
|
conn.Close()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics, err := r.parser.Parse(buffer)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(r.name, "failed to parse received metrics")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, m := range metrics {
|
||||||
|
y := lp.FromInfluxMetric(m)
|
||||||
|
for k, v := range r.meta {
|
||||||
|
y.AddMeta(k, v)
|
||||||
|
}
|
||||||
|
if r.sink != nil {
|
||||||
|
r.sink <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r.newConnection(conn)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AppMetricReceiver) newAccepter(listenSocket net.Listener) {
|
||||||
|
accept_loop:
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-r.done:
|
||||||
|
break accept_loop
|
||||||
|
default:
|
||||||
|
conn, err := listenSocket.Accept()
|
||||||
|
if err == nil {
|
||||||
|
r.connWg.Add(1)
|
||||||
|
go func() {
|
||||||
|
r.newConnection(conn)
|
||||||
|
r.connWg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.wg.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implement functions required for Receiver interface
|
||||||
|
// Start(), Close()
|
||||||
|
// See: metricReceiver.go
|
||||||
|
|
||||||
|
func (r *AppMetricReceiver) Start() {
|
||||||
|
var err error = nil
|
||||||
|
cclog.ComponentDebug(r.name, "START")
|
||||||
|
|
||||||
|
r.listener, err = net.Listen("unix", r.config.SocketFile)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(r.name, "failed to listen at socket", r.config.SocketFile)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(r.config.SocketFile); err != nil {
|
||||||
|
cclog.ComponentError(r.name, "failed to create socket", r.config.SocketFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
r.done = make(chan bool)
|
||||||
|
r.wg.Add(1)
|
||||||
|
go r.newAccepter(r.listener)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close receiver: close network connection, close files, close libraries, ...
|
||||||
|
func (r *AppMetricReceiver) Close() {
|
||||||
|
cclog.ComponentDebug(r.name, "CLOSE")
|
||||||
|
|
||||||
|
if _, err := os.Stat(r.config.SocketFile); err == nil {
|
||||||
|
if err := os.RemoveAll(r.config.SocketFile); err != nil {
|
||||||
|
cclog.ComponentError(r.name, "Failed to remove UNIX socket", r.config.SocketFile)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// in case of own go routine, send the signal and wait
|
||||||
|
r.listener.Close()
|
||||||
|
r.done <- true
|
||||||
|
close(r.done)
|
||||||
|
r.connWg.Wait()
|
||||||
|
r.wg.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// New function to create a new instance of the receiver
|
||||||
|
// Initialize the receiver by giving it a name and reading in the config JSON
|
||||||
|
func NewAppMetricReceiver(name string, config json.RawMessage) (Receiver, error) {
|
||||||
|
r := new(AppMetricReceiver)
|
||||||
|
|
||||||
|
// Set name of SampleReceiver
|
||||||
|
// The name should be chosen in such a way that different instances of SampleReceiver can be distinguished
|
||||||
|
r.name = fmt.Sprintf("AppMetricReceiver(%s)", name)
|
||||||
|
|
||||||
|
// Set static information
|
||||||
|
r.meta = map[string]string{"source": r.name}
|
||||||
|
|
||||||
|
// Set defaults in r.config
|
||||||
|
// Allow overwriting these defaults by reading config JSON
|
||||||
|
r.config.SocketFile = "/tmp/cc.sock"
|
||||||
|
|
||||||
|
// Read the sample receiver specific JSON config
|
||||||
|
if len(config) > 0 {
|
||||||
|
err := json.Unmarshal(config, &r.config)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(r.name, "Error reading config:", err.Error())
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(r.config.SocketFile) == 0 {
|
||||||
|
cclog.ComponentError(r.name, "Invalid socket_file setting:", r.config.SocketFile)
|
||||||
|
return nil, fmt.Errorf("invalid socket_file setting: %s", r.config.SocketFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that all required fields in the configuration are set
|
||||||
|
// Use 'if len(r.config.Option) > 0' for strings
|
||||||
|
r.handler = influx.NewMetricHandler()
|
||||||
|
r.parser = influx.NewParser(r.handler)
|
||||||
|
r.parser.SetTimeFunc(DefaultTime)
|
||||||
|
|
||||||
|
return r, nil
|
||||||
|
}
|
23
receivers/appMetricReceiver.md
Normal file
23
receivers/appMetricReceiver.md
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
## `appmetrics` receiver
|
||||||
|
|
||||||
|
The `appmetrics` receiver can be used to submit metrics from an application into the monitoring system. It listens for incoming connections on a UNIX socket.
|
||||||
|
|
||||||
|
### Configuration structure
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"<name>": {
|
||||||
|
"type": "appmetrics",
|
||||||
|
"socket_file" : "/tmp/cc.sock",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- `type`: makes the receiver a `appmetrics` receiver
|
||||||
|
- `socket_file`: Listen UNIX socket
|
||||||
|
|
||||||
|
### Inputs from applications
|
||||||
|
|
||||||
|
Applcations can connect to the `appmetrics` socket and provide metric in the [InfluxDB line protocol](https://github.com/influxdata/line-protocol). It is currently not possible to submit meta information as the Influx line protocol does not know them.
|
||||||
|
|
||||||
|
|
@@ -10,8 +10,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
)
|
)
|
||||||
|
525
receivers/ipmiReceiver.go
Normal file
525
receivers/ipmiReceiver.go
Normal file
@@ -0,0 +1,525 @@
|
|||||||
|
package receivers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os/exec"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
)
|
||||||
|
|
||||||
|
type IPMIReceiverClientConfig struct {
|
||||||
|
|
||||||
|
// Hostname the IPMI service belongs to
|
||||||
|
Protocol string // Protocol / tool to use for IPMI sensor reading
|
||||||
|
DriverType string // Out of band IPMI driver
|
||||||
|
Fanout int // Maximum number of simultaneous IPMI connections
|
||||||
|
NumHosts int // Number of remote IPMI devices with the same configuration
|
||||||
|
IPMIHosts string // List of remote IPMI devices to communicate with
|
||||||
|
IPMI2HostMapping map[string]string // Mapping between IPMI device name and host name
|
||||||
|
Username string // User name to authenticate with
|
||||||
|
Password string // Password to use for authentication
|
||||||
|
CLIOptions []string // Additional command line options for ipmi-sensors
|
||||||
|
isExcluded map[string]bool // is metric excluded
|
||||||
|
}
|
||||||
|
|
||||||
|
type IPMIReceiver struct {
|
||||||
|
receiver
|
||||||
|
config struct {
|
||||||
|
Interval time.Duration
|
||||||
|
|
||||||
|
// Client config for each IPMI hosts
|
||||||
|
ClientConfigs []IPMIReceiverClientConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
// Storage for static information
|
||||||
|
meta map[string]string
|
||||||
|
|
||||||
|
done chan bool // channel to finish / stop IPMI receiver
|
||||||
|
wg sync.WaitGroup // wait group for IPMI receiver
|
||||||
|
}
|
||||||
|
|
||||||
|
// doReadMetrics reads metrics from all configure IPMI hosts.
|
||||||
|
func (r *IPMIReceiver) doReadMetric() {
|
||||||
|
for i := range r.config.ClientConfigs {
|
||||||
|
clientConfig := &r.config.ClientConfigs[i]
|
||||||
|
var cmd_options []string
|
||||||
|
if clientConfig.Protocol == "ipmi-sensors" {
|
||||||
|
cmd_options = append(cmd_options,
|
||||||
|
"--always-prefix",
|
||||||
|
"--sdr-cache-recreate",
|
||||||
|
// Attempt to interpret OEM data, such as event data, sensor readings, or general extra info
|
||||||
|
"--interpret-oem-data",
|
||||||
|
// Ignore not-available (i.e. N/A) sensors in output
|
||||||
|
"--ignore-not-available-sensors",
|
||||||
|
// Ignore unrecognized sensor events
|
||||||
|
"--ignore-unrecognized-events",
|
||||||
|
// Output fields in comma separated format
|
||||||
|
"--comma-separated-output",
|
||||||
|
// Do not output column headers
|
||||||
|
"--no-header-output",
|
||||||
|
// Output non-abbreviated units (e.g. 'Amps' instead of 'A').
|
||||||
|
// May aid in disambiguation of units (e.g. 'C' for Celsius or Coulombs).
|
||||||
|
"--non-abbreviated-units",
|
||||||
|
"--fanout", fmt.Sprint(clientConfig.Fanout),
|
||||||
|
"--driver-type", clientConfig.DriverType,
|
||||||
|
"--hostname", clientConfig.IPMIHosts,
|
||||||
|
"--username", clientConfig.Username,
|
||||||
|
"--password", clientConfig.Password,
|
||||||
|
)
|
||||||
|
cmd_options := append(cmd_options, clientConfig.CLIOptions...)
|
||||||
|
|
||||||
|
command := exec.Command("ipmi-sensors", cmd_options...)
|
||||||
|
stdout, _ := command.StdoutPipe()
|
||||||
|
errBuf := new(bytes.Buffer)
|
||||||
|
command.Stderr = errBuf
|
||||||
|
|
||||||
|
// start command
|
||||||
|
if err := command.Start(); err != nil {
|
||||||
|
cclog.ComponentError(
|
||||||
|
r.name,
|
||||||
|
fmt.Sprintf("doReadMetric(): Failed to start command \"%s\": %v", command.String(), err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read command output
|
||||||
|
const (
|
||||||
|
idxID = iota
|
||||||
|
idxName
|
||||||
|
idxType
|
||||||
|
idxReading
|
||||||
|
idxUnits
|
||||||
|
idxEvent
|
||||||
|
)
|
||||||
|
numPrefixRegex := regexp.MustCompile("^[[:digit:]][[:digit:]]-(.*)$")
|
||||||
|
scanner := bufio.NewScanner(stdout)
|
||||||
|
for scanner.Scan() {
|
||||||
|
// Read host
|
||||||
|
v1 := strings.Split(scanner.Text(), ": ")
|
||||||
|
if len(v1) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
host, ok := clientConfig.IPMI2HostMapping[v1[0]]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read sensors
|
||||||
|
v2 := strings.Split(v1[1], ",")
|
||||||
|
if len(v2) != 6 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Skip sensors with non available sensor readings
|
||||||
|
if v2[idxReading] == "N/A" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
metric := strings.ToLower(v2[idxType])
|
||||||
|
name := strings.ToLower(
|
||||||
|
strings.Replace(
|
||||||
|
strings.TrimSpace(
|
||||||
|
v2[idxName]), " ", "_", -1))
|
||||||
|
// remove prefix enumeration like 01-...
|
||||||
|
if v := numPrefixRegex.FindStringSubmatch(name); v != nil {
|
||||||
|
name = v[1]
|
||||||
|
}
|
||||||
|
unit := v2[idxUnits]
|
||||||
|
if unit == "Watts" {
|
||||||
|
|
||||||
|
// Power
|
||||||
|
metric = "power"
|
||||||
|
name = strings.TrimSuffix(name, "_power")
|
||||||
|
name = strings.TrimSuffix(name, "_pwr")
|
||||||
|
name = strings.TrimPrefix(name, "pwr_")
|
||||||
|
} else if metric == "voltage" &&
|
||||||
|
unit == "Volts" {
|
||||||
|
|
||||||
|
// Voltage
|
||||||
|
name = strings.TrimPrefix(name, "volt_")
|
||||||
|
} else if metric == "current" &&
|
||||||
|
unit == "Amps" {
|
||||||
|
|
||||||
|
// Current
|
||||||
|
unit = "Ampere"
|
||||||
|
} else if metric == "temperature" &&
|
||||||
|
unit == "degrees C" {
|
||||||
|
|
||||||
|
// Temperature
|
||||||
|
name = strings.TrimSuffix(name, "_temp")
|
||||||
|
unit = "degC"
|
||||||
|
} else if metric == "temperature" &&
|
||||||
|
unit == "degrees F" {
|
||||||
|
|
||||||
|
// Temperature
|
||||||
|
name = strings.TrimSuffix(name, "_temp")
|
||||||
|
unit = "degF"
|
||||||
|
} else if metric == "fan" && unit == "RPM" {
|
||||||
|
|
||||||
|
// Fan speed
|
||||||
|
metric = "fan_speed"
|
||||||
|
name = strings.TrimSuffix(name, "_tach")
|
||||||
|
name = strings.TrimPrefix(name, "spd_")
|
||||||
|
} else if (metric == "cooling device" ||
|
||||||
|
metric == "other units based sensor") &&
|
||||||
|
name == "system_air_flow" &&
|
||||||
|
unit == "CFM" {
|
||||||
|
|
||||||
|
// Air flow
|
||||||
|
metric = "air_flow"
|
||||||
|
name = strings.TrimSuffix(name, "_air_flow")
|
||||||
|
unit = "CubicFeetPerMinute"
|
||||||
|
} else if (metric == "processor" ||
|
||||||
|
metric == "other units based sensor") &&
|
||||||
|
(name == "cpu_utilization" ||
|
||||||
|
name == "io_utilization" ||
|
||||||
|
name == "mem_utilization" ||
|
||||||
|
name == "sys_utilization") &&
|
||||||
|
(unit == "unspecified" ||
|
||||||
|
unit == "%") {
|
||||||
|
|
||||||
|
// Utilization
|
||||||
|
metric = "utilization"
|
||||||
|
name = strings.TrimSuffix(name, "_utilization")
|
||||||
|
unit = "percent"
|
||||||
|
} else {
|
||||||
|
if false {
|
||||||
|
// Debug output for unprocessed metrics
|
||||||
|
fmt.Printf(
|
||||||
|
"host: '%s', metric: '%s', name: '%s', unit: '%s'\n",
|
||||||
|
host, metric, name, unit)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip excluded metrics
|
||||||
|
if clientConfig.isExcluded[metric] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse sensor value
|
||||||
|
value, err := strconv.ParseFloat(v2[idxReading], 64)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
y, err := lp.New(
|
||||||
|
metric,
|
||||||
|
map[string]string{
|
||||||
|
"hostname": host,
|
||||||
|
"type": "node",
|
||||||
|
"name": name,
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"source": r.name,
|
||||||
|
"group": "IPMI",
|
||||||
|
"unit": unit,
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": value,
|
||||||
|
},
|
||||||
|
time.Now())
|
||||||
|
if err == nil {
|
||||||
|
r.sink <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for command end
|
||||||
|
if err := command.Wait(); err != nil {
|
||||||
|
errMsg, _ := io.ReadAll(errBuf)
|
||||||
|
cclog.ComponentError(
|
||||||
|
r.name,
|
||||||
|
fmt.Sprintf("doReadMetric(): Failed to wait for the end of command \"%s\": %v\n",
|
||||||
|
strings.Replace(command.String(), clientConfig.Password, "<PW>", -1), err),
|
||||||
|
fmt.Sprintf("doReadMetric(): command stderr: \"%s\"\n", string(errMsg)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *IPMIReceiver) Start() {
|
||||||
|
cclog.ComponentDebug(r.name, "START")
|
||||||
|
|
||||||
|
// Start IPMI receiver
|
||||||
|
r.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer r.wg.Done()
|
||||||
|
|
||||||
|
// Create ticker
|
||||||
|
ticker := time.NewTicker(r.config.Interval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
r.doReadMetric()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case tickerTime := <-ticker.C:
|
||||||
|
// Check if we missed the ticker event
|
||||||
|
if since := time.Since(tickerTime); since > 5*time.Second {
|
||||||
|
cclog.ComponentInfo(r.name, "Missed ticker event for more then", since)
|
||||||
|
}
|
||||||
|
|
||||||
|
// process ticker event -> continue
|
||||||
|
continue
|
||||||
|
case <-r.done:
|
||||||
|
// process done event
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
cclog.ComponentDebug(r.name, "STARTED")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close receiver: close network connection, close files, close libraries, ...
|
||||||
|
func (r *IPMIReceiver) Close() {
|
||||||
|
cclog.ComponentDebug(r.name, "CLOSE")
|
||||||
|
|
||||||
|
// Send the signal and wait
|
||||||
|
close(r.done)
|
||||||
|
r.wg.Wait()
|
||||||
|
|
||||||
|
cclog.ComponentDebug(r.name, "DONE")
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewIPMIReceiver creates a new instance of the redfish receiver
|
||||||
|
// Initialize the receiver by giving it a name and reading in the config JSON
|
||||||
|
func NewIPMIReceiver(name string, config json.RawMessage) (Receiver, error) {
|
||||||
|
r := new(IPMIReceiver)
|
||||||
|
|
||||||
|
// Config options from config file
|
||||||
|
configJSON := struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
|
||||||
|
// How often the IPMI sensor metrics should be read and send to the sink (default: 30 s)
|
||||||
|
IntervalString string `json:"interval,omitempty"`
|
||||||
|
|
||||||
|
// Maximum number of simultaneous IPMI connections (default: 64)
|
||||||
|
Fanout int `json:"fanout,omitempty"`
|
||||||
|
|
||||||
|
// Out of band IPMI driver (default: LAN_2_0)
|
||||||
|
DriverType string `json:"driver_type,omitempty"`
|
||||||
|
|
||||||
|
// Default client username, password and endpoint
|
||||||
|
Username *string `json:"username"` // User name to authenticate with
|
||||||
|
Password *string `json:"password"` // Password to use for authentication
|
||||||
|
Endpoint *string `json:"endpoint"` // URL of the IPMI device
|
||||||
|
|
||||||
|
// Globally excluded metrics
|
||||||
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
|
|
||||||
|
ClientConfigs []struct {
|
||||||
|
Fanout int `json:"fanout,omitempty"` // Maximum number of simultaneous IPMI connections (default: 64)
|
||||||
|
DriverType string `json:"driver_type,omitempty"` // Out of band IPMI driver (default: LAN_2_0)
|
||||||
|
HostList []string `json:"host_list"` // List of hosts with the same client configuration
|
||||||
|
Username *string `json:"username"` // User name to authenticate with
|
||||||
|
Password *string `json:"password"` // Password to use for authentication
|
||||||
|
Endpoint *string `json:"endpoint"` // URL of the IPMI service
|
||||||
|
|
||||||
|
// Per client excluded metrics
|
||||||
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
|
|
||||||
|
// Additional command line options for ipmi-sensors
|
||||||
|
CLIOptions []string `json:"cli_options,omitempty"`
|
||||||
|
} `json:"client_config"`
|
||||||
|
}{
|
||||||
|
// Set defaults values
|
||||||
|
// Allow overwriting these defaults by reading config JSON
|
||||||
|
Fanout: 64,
|
||||||
|
DriverType: "LAN_2_0",
|
||||||
|
IntervalString: "30s",
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set name of IPMIReceiver
|
||||||
|
r.name = fmt.Sprintf("IPMIReceiver(%s)", name)
|
||||||
|
|
||||||
|
// Create done channel
|
||||||
|
r.done = make(chan bool)
|
||||||
|
|
||||||
|
// Set static information
|
||||||
|
r.meta = map[string]string{"source": r.name}
|
||||||
|
|
||||||
|
// Read the IPMI receiver specific JSON config
|
||||||
|
if len(config) > 0 {
|
||||||
|
err := json.Unmarshal(config, &configJSON)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(r.name, "Error reading config:", err.Error())
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert interval string representation to duration
|
||||||
|
var err error
|
||||||
|
r.config.Interval, err = time.ParseDuration(configJSON.IntervalString)
|
||||||
|
if err != nil {
|
||||||
|
err := fmt.Errorf(
|
||||||
|
"Failed to parse duration string interval='%s': %w",
|
||||||
|
configJSON.IntervalString,
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
cclog.Error(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create client config from JSON config
|
||||||
|
totalNumHosts := 0
|
||||||
|
for i := range configJSON.ClientConfigs {
|
||||||
|
clientConfigJSON := &configJSON.ClientConfigs[i]
|
||||||
|
|
||||||
|
var endpoint string
|
||||||
|
if clientConfigJSON.Endpoint != nil {
|
||||||
|
endpoint = *clientConfigJSON.Endpoint
|
||||||
|
} else if configJSON.Endpoint != nil {
|
||||||
|
endpoint = *configJSON.Endpoint
|
||||||
|
} else {
|
||||||
|
err := fmt.Errorf("client config number %v requires endpoint", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fanout := configJSON.Fanout
|
||||||
|
if clientConfigJSON.Fanout != 0 {
|
||||||
|
fanout = clientConfigJSON.Fanout
|
||||||
|
}
|
||||||
|
|
||||||
|
driverType := configJSON.DriverType
|
||||||
|
if clientConfigJSON.DriverType != "" {
|
||||||
|
driverType = clientConfigJSON.DriverType
|
||||||
|
}
|
||||||
|
if driverType != "LAN" && driverType != "LAN_2_0" {
|
||||||
|
err := fmt.Errorf("client config number %v has invalid driver type %s", i, driverType)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var protocol string
|
||||||
|
var host_pattern string
|
||||||
|
if e := strings.Split(endpoint, "://"); len(e) == 2 {
|
||||||
|
protocol = e[0]
|
||||||
|
host_pattern = e[1]
|
||||||
|
} else {
|
||||||
|
err := fmt.Errorf("client config number %v has invalid endpoint %s", i, endpoint)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var username string
|
||||||
|
if clientConfigJSON.Username != nil {
|
||||||
|
username = *clientConfigJSON.Username
|
||||||
|
} else if configJSON.Username != nil {
|
||||||
|
username = *configJSON.Username
|
||||||
|
} else {
|
||||||
|
err := fmt.Errorf("client config number %v requires username", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var password string
|
||||||
|
if clientConfigJSON.Password != nil {
|
||||||
|
password = *clientConfigJSON.Password
|
||||||
|
} else if configJSON.Password != nil {
|
||||||
|
password = *configJSON.Password
|
||||||
|
} else {
|
||||||
|
err := fmt.Errorf("client config number %v requires password", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create mapping between ipmi hostname and node hostname
|
||||||
|
// This also guaranties that all ipmi hostnames are uniqu
|
||||||
|
ipmi2HostMapping := make(map[string]string)
|
||||||
|
for _, host := range clientConfigJSON.HostList {
|
||||||
|
ipmiHost := strings.Replace(host_pattern, "%h", host, -1)
|
||||||
|
ipmi2HostMapping[ipmiHost] = host
|
||||||
|
}
|
||||||
|
|
||||||
|
numHosts := len(ipmi2HostMapping)
|
||||||
|
totalNumHosts += numHosts
|
||||||
|
ipmiHostList := make([]string, 0, numHosts)
|
||||||
|
for ipmiHost := range ipmi2HostMapping {
|
||||||
|
ipmiHostList = append(ipmiHostList, ipmiHost)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Additional command line options
|
||||||
|
for _, v := range clientConfigJSON.CLIOptions {
|
||||||
|
switch {
|
||||||
|
case v == "-u" || strings.HasPrefix(v, "--username"):
|
||||||
|
err := fmt.Errorf("client config number %v: do not set username in cli_options. Use json config username instead", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
case v == "-p" || strings.HasPrefix(v, "--password"):
|
||||||
|
err := fmt.Errorf("client config number %v: do not set password in cli_options. Use json config password instead", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
case v == "-h" || strings.HasPrefix(v, "--hostname"):
|
||||||
|
err := fmt.Errorf("client config number %v: do not set hostname in cli_options. Use json config host_list instead", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
case v == "-D" || strings.HasPrefix(v, "--driver-type"):
|
||||||
|
err := fmt.Errorf("client config number %v: do not set driver type in cli_options. Use json config driver_type instead", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
case v == "-F" || strings.HasPrefix(v, " --fanout"):
|
||||||
|
err := fmt.Errorf("client config number %v: do not set fanout in cli_options. Use json config fanout instead", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
case v == "--always-prefix" ||
|
||||||
|
v == "--sdr-cache-recreate" ||
|
||||||
|
v == "--interpret-oem-data" ||
|
||||||
|
v == "--ignore-not-available-sensors" ||
|
||||||
|
v == "--ignore-unrecognized-events" ||
|
||||||
|
v == "--comma-separated-output" ||
|
||||||
|
v == "--no-header-output" ||
|
||||||
|
v == "--non-abbreviated-units":
|
||||||
|
err := fmt.Errorf("client config number %v: Do not use option %s in cli_options, it is used internally", i, v)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cliOptions := make([]string, 0)
|
||||||
|
cliOptions = append(cliOptions, clientConfigJSON.CLIOptions...)
|
||||||
|
|
||||||
|
// Is metrics excluded globally or per client
|
||||||
|
isExcluded := make(map[string]bool)
|
||||||
|
for _, key := range clientConfigJSON.ExcludeMetrics {
|
||||||
|
isExcluded[key] = true
|
||||||
|
}
|
||||||
|
for _, key := range configJSON.ExcludeMetrics {
|
||||||
|
isExcluded[key] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
r.config.ClientConfigs = append(
|
||||||
|
r.config.ClientConfigs,
|
||||||
|
IPMIReceiverClientConfig{
|
||||||
|
Protocol: protocol,
|
||||||
|
Fanout: fanout,
|
||||||
|
DriverType: driverType,
|
||||||
|
NumHosts: numHosts,
|
||||||
|
IPMIHosts: strings.Join(ipmiHostList, ","),
|
||||||
|
IPMI2HostMapping: ipmi2HostMapping,
|
||||||
|
Username: username,
|
||||||
|
Password: password,
|
||||||
|
CLIOptions: cliOptions,
|
||||||
|
isExcluded: isExcluded,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if totalNumHosts == 0 {
|
||||||
|
err := fmt.Errorf("at least one IPMI host config is required")
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.ComponentInfo(r.name, "monitoring", totalNumHosts, "IPMI hosts")
|
||||||
|
return r, nil
|
||||||
|
}
|
48
receivers/ipmiReceiver.md
Normal file
48
receivers/ipmiReceiver.md
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
## IPMI Receiver
|
||||||
|
|
||||||
|
The IPMI Receiver uses `ipmi-sensors` from the [FreeIPMI](https://www.gnu.org/software/freeipmi/) project to read IPMI sensor readings and sensor data repository (SDR) information. The available metrics depend on the sensors provided by the hardware vendor but typically contain temperature, fan speed, voltage and power metrics.
|
||||||
|
|
||||||
|
### Configuration structure
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"<IPMI receiver name>": {
|
||||||
|
"type": "ipmi",
|
||||||
|
"interval": "30s",
|
||||||
|
"fanout": 256,
|
||||||
|
"username": "<Username>",
|
||||||
|
"password": "<Password>",
|
||||||
|
"endpoint": "ipmi-sensors://%h-p",
|
||||||
|
"exclude_metrics": [ "fan_speed", "voltage" ],
|
||||||
|
"client_config": [
|
||||||
|
{
|
||||||
|
"host_list": ["n1", "n2", "n3", "n4" ]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"host_list": [ "n5", "n6" ],
|
||||||
|
"driver_type": "LAN",
|
||||||
|
"cli_options": [ "--workaround-flags=..." ],
|
||||||
|
"password": "<Password 2>"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Global settings:
|
||||||
|
|
||||||
|
- `interval`: How often the IPMI sensor metrics should be read and send to the sink (default: 30 s)
|
||||||
|
|
||||||
|
Global and per IPMI device settings (per IPMI device settings overwrite the global settings):
|
||||||
|
|
||||||
|
- `exclude_metrics`: list of excluded metrics e.g. fan_speed, power, temperature, utilization, voltage
|
||||||
|
- `fanout`: Maximum number of simultaneous IPMI connections (default: 64)
|
||||||
|
- `driver_type`: Out of band IPMI driver (default: LAN_2_0)
|
||||||
|
- `username`: User name to authenticate with
|
||||||
|
- `password`: Password to use for authentication
|
||||||
|
- `endpoint`: URL of the IPMI device (placeholder `%h` gets replaced by the hostname)
|
||||||
|
|
||||||
|
Per IPMI device settings:
|
||||||
|
|
||||||
|
- `host_list`: List of hosts with the same client configuration
|
||||||
|
- `cli_options`: Additional command line options for ipmi-sensors
|
@@ -1,7 +1,7 @@
|
|||||||
package receivers
|
package receivers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type defaultReceiverConfig struct {
|
type defaultReceiverConfig struct {
|
||||||
|
@@ -6,8 +6,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
nats "github.com/nats-io/nats.go"
|
nats "github.com/nats-io/nats.go"
|
||||||
)
|
)
|
||||||
|
@@ -12,8 +12,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PrometheusReceiverConfig struct {
|
type PrometheusReceiverConfig struct {
|
||||||
|
@@ -2,16 +2,19 @@ package receivers
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
var AvailableReceivers = map[string]func(name string, config json.RawMessage) (Receiver, error){
|
var AvailableReceivers = map[string]func(name string, config json.RawMessage) (Receiver, error){
|
||||||
|
"ipmi": NewIPMIReceiver,
|
||||||
"nats": NewNatsReceiver,
|
"nats": NewNatsReceiver,
|
||||||
"redfish": NewRedfishReceiver,
|
"redfish": NewRedfishReceiver,
|
||||||
|
"appmetrics": NewAppMetricReceiver,
|
||||||
}
|
}
|
||||||
|
|
||||||
type receiveManager struct {
|
type receiveManager struct {
|
||||||
@@ -71,9 +74,13 @@ func (rm *receiveManager) AddInput(name string, rawConfig json.RawMessage) error
|
|||||||
cclog.ComponentError("ReceiveManager", "SKIP", config.Type, "JSON config error:", err.Error())
|
cclog.ComponentError("ReceiveManager", "SKIP", config.Type, "JSON config error:", err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if config.Type == "" {
|
||||||
|
cclog.ComponentError("ReceiveManager", "SKIP", "JSON config for receiver", name, "does not contain a receiver type")
|
||||||
|
return fmt.Errorf("JSON config for receiver %s does not contain a receiver type", name)
|
||||||
|
}
|
||||||
if _, found := AvailableReceivers[config.Type]; !found {
|
if _, found := AvailableReceivers[config.Type]; !found {
|
||||||
cclog.ComponentError("ReceiveManager", "SKIP", config.Type, "unknown receiver:", err.Error())
|
cclog.ComponentError("ReceiveManager", "SKIP", "unknown receiver type:", config.Type)
|
||||||
return err
|
return fmt.Errorf("unknown receiver type: %s", config.Type)
|
||||||
}
|
}
|
||||||
r, err := AvailableReceivers[config.Type](name, rawConfig)
|
r, err := AvailableReceivers[config.Type](name, rawConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@@ -10,8 +10,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
|
||||||
// See: https://pkg.go.dev/github.com/stmcginnis/gofish
|
// See: https://pkg.go.dev/github.com/stmcginnis/gofish
|
||||||
"github.com/stmcginnis/gofish"
|
"github.com/stmcginnis/gofish"
|
||||||
|
@@ -4,7 +4,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
// SampleReceiver configuration: receiver type, listen address, port
|
// SampleReceiver configuration: receiver type, listen address, port
|
||||||
|
@@ -20,7 +20,9 @@ The configuration file for the sinks is a list of configurations. The `type` fie
|
|||||||
[
|
[
|
||||||
"mystdout" : {
|
"mystdout" : {
|
||||||
"type" : "stdout",
|
"type" : "stdout",
|
||||||
"meta_as_tags" : false
|
"meta_as_tags" : [
|
||||||
|
"unit"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"metricstore" : {
|
"metricstore" : {
|
||||||
"type" : "http",
|
"type" : "http",
|
||||||
|
@@ -4,7 +4,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GangliaMetricName(point lp.CCMetric) string {
|
func GangliaMetricName(point lp.CCMetric) string {
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
// "time"
|
// "time"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const GMETRIC_EXEC = `gmetric`
|
const GMETRIC_EXEC = `gmetric`
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||||
influxdb2ApiHttp "github.com/influxdata/influxdb-client-go/v2/api/http"
|
influxdb2ApiHttp "github.com/influxdata/influxdb-client-go/v2/api/http"
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||||
"github.com/influxdata/influxdb-client-go/v2/api/write"
|
"github.com/influxdata/influxdb-client-go/v2/api/write"
|
||||||
|
@@ -71,8 +71,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
package sinks
|
package sinks
|
||||||
|
|
||||||
import (
|
import (
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type defaultSinkConfig struct {
|
type defaultSinkConfig struct {
|
||||||
|
@@ -8,8 +8,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
nats "github.com/nats-io/nats.go"
|
nats "github.com/nats-io/nats.go"
|
||||||
)
|
)
|
||||||
|
@@ -9,8 +9,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
|
@@ -5,8 +5,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type SampleSinkConfig struct {
|
type SampleSinkConfig struct {
|
||||||
|
@@ -6,8 +6,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const SINK_MAX_FORWARD = 50
|
const SINK_MAX_FORWARD = 50
|
||||||
|
@@ -7,7 +7,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
// "time"
|
// "time"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
)
|
)
|
||||||
|
|
||||||
type StdoutSink struct {
|
type StdoutSink struct {
|
||||||
|
Reference in New Issue
Block a user