mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2024-12-26 15:29:04 +01:00
Merge branch 'develop' into smartmon_collector
This commit is contained in:
commit
e2438f8cec
61
.github/workflows/Release.yml
vendored
61
.github/workflows/Release.yml
vendored
@ -133,13 +133,63 @@ jobs:
|
||||
name: cc-metric-collector SRPM for UBI 8
|
||||
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
||||
|
||||
#
|
||||
# Build on Ubuntu 20.04 using official go package
|
||||
#
|
||||
Ubuntu-focal-build:
|
||||
runs-on: ubuntu-latest
|
||||
container: ubuntu:20.04
|
||||
# The job outputs link to the outputs of the 'debrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
outputs:
|
||||
deb : ${{steps.debrename.outputs.DEB}}
|
||||
steps:
|
||||
# Use apt to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
apt update && apt --assume-yes upgrade
|
||||
apt --assume-yes install build-essential sed git wget bash
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
- name: Install Golang
|
||||
run: |
|
||||
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
||||
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
go version
|
||||
- name: DEB build MetricCollector
|
||||
id: dpkg-build
|
||||
run: |
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
make DEB
|
||||
- name: Rename DEB (add '_ubuntu20.04')
|
||||
id: debrename
|
||||
run: |
|
||||
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
||||
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu20.04.deb"
|
||||
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
||||
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save DEB as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-metric-collector DEB for Ubuntu 20.04
|
||||
path: ${{ steps.debrename.outputs.DEB }}
|
||||
|
||||
#
|
||||
# Create release with fresh RPMs
|
||||
#
|
||||
Release:
|
||||
runs-on: ubuntu-latest
|
||||
# We need the RPMs, so add dependency
|
||||
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build]
|
||||
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build]
|
||||
|
||||
steps:
|
||||
# See: https://github.com/actions/download-artifact
|
||||
@ -161,6 +211,11 @@ jobs:
|
||||
with:
|
||||
name: cc-metric-collector SRPM for UBI 8
|
||||
|
||||
- name: Download Ubuntu 20.04 DEB
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-metric-collector DEB for Ubuntu 20.04
|
||||
|
||||
# The download actions do not publish the name of the downloaded file,
|
||||
# so we re-use the job outputs of the parent jobs. The files are all
|
||||
# downloaded to the current folder.
|
||||
@ -174,14 +229,17 @@ jobs:
|
||||
ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}")
|
||||
UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}")
|
||||
UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}")
|
||||
U_2004_DEB=$(basename "${{ needs.Ubuntu-focal-build.outputs.deb}}")
|
||||
echo "ALMA_85_RPM::${ALMA_85_RPM}"
|
||||
echo "ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||
echo "UBI_8_RPM::${UBI_8_RPM}"
|
||||
echo "UBI_8_SRPM::${UBI_8_SRPM}"
|
||||
echo "U_2004_DEB::${U_2004_DEB}"
|
||||
echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}"
|
||||
echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||
echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}"
|
||||
echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}"
|
||||
echo "::set-output name=U_2004_DEB::${U_2004_DEB}"
|
||||
|
||||
# See: https://github.com/softprops/action-gh-release
|
||||
- name: Release
|
||||
@ -194,3 +252,4 @@ jobs:
|
||||
${{ steps.files.outputs.ALMA_85_SRPM }}
|
||||
${{ steps.files.outputs.UBI_8_RPM }}
|
||||
${{ steps.files.outputs.UBI_8_SRPM }}
|
||||
${{ steps.files.outputs.U_2004_DEB }}
|
||||
|
28
.github/workflows/runonce.yml
vendored
28
.github/workflows/runonce.yml
vendored
@ -31,4 +31,30 @@ jobs:
|
||||
run: make
|
||||
|
||||
- name: Run MetricCollector once
|
||||
run: ./cc-metric-collector --once --config .github/ci-config.json
|
||||
run: ./cc-metric-collector --once --config .github/ci-config.json
|
||||
|
||||
#
|
||||
# Job build-1-19
|
||||
# Build on latest Ubuntu using golang version 1.19
|
||||
#
|
||||
build-1-19:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
# Checkout git repository and submodules
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
- name: Setup Golang
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: '1.19'
|
||||
|
||||
- name: Build MetricCollector
|
||||
run: make
|
||||
|
||||
- name: Run MetricCollector once
|
||||
run: ./cc-metric-collector --once --config .github/ci-config.json
|
||||
|
@ -1,6 +1,6 @@
|
||||
# cc-metric-collector
|
||||
|
||||
A node agent for measuring, processing and forwarding node level metrics. It is part of the ClusterCockpit ecosystem.
|
||||
A node agent for measuring, processing and forwarding node level metrics. It is part of the [ClusterCockpit ecosystem](./docs/introduction.md).
|
||||
|
||||
The metric collector sends (and receives) metric in the [InfluxDB line protocol](https://docs.influxdata.com/influxdb/cloud/reference/syntax/line-protocol/) as it provides flexibility while providing a separation between tags (like index columns in relational databases) and fields (like data columns).
|
||||
|
||||
@ -11,7 +11,7 @@ The receiver runs as a go routine side-by-side with the timer loop and asynchron
|
||||
# Configuration
|
||||
|
||||
Configuration is implemented using a single json document that is distributed over network and may be persisted as file.
|
||||
Supported metrics are documented [here](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md).
|
||||
Supported metrics are documented [here](https://github.com/ClusterCockpit/cc-specifications/blob/master/interfaces/lineprotocol/README.md).
|
||||
|
||||
There is a main configuration file with basic settings that point to the other configuration files for the different components.
|
||||
|
||||
@ -26,7 +26,7 @@ There is a main configuration file with basic settings that point to the other c
|
||||
}
|
||||
```
|
||||
|
||||
The `interval` defines how often the metrics should be read and send to the sink. The `duration` tells collectors how long one measurement has to take. This is important for some collectors, like the `likwid` collector.
|
||||
The `interval` defines how often the metrics should be read and send to the sink. The `duration` tells collectors how long one measurement has to take. This is important for some collectors, like the `likwid` collector. For more information, see [here](./docs/configuration.md).
|
||||
|
||||
See the component READMEs for their configuration:
|
||||
|
||||
@ -44,6 +44,8 @@ $ go get (requires at least golang 1.16)
|
||||
$ make
|
||||
```
|
||||
|
||||
For more information, see [here](./docs/building.md).
|
||||
|
||||
# Running
|
||||
|
||||
```
|
||||
|
@ -35,7 +35,7 @@ In contrast to the configuration files for sinks and receivers, the collectors c
|
||||
* [`nfs4stat`](./nfs4Metric.md)
|
||||
* [`cpufreq`](./cpufreqMetric.md)
|
||||
* [`cpufreq_cpuinfo`](./cpufreqCpuinfoMetric.md)
|
||||
* [`numastat`](./numastatMetric.md)
|
||||
* [`numastats`](./numastatsMetric.md)
|
||||
* [`gpfs`](./gpfsMetric.md)
|
||||
* [`beegfs_meta`](./beegfsmetaMetric.md)
|
||||
* [`beegfs_storage`](./beegfsstorageMetric.md)
|
||||
|
@ -5,7 +5,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
@ -115,7 +115,7 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
|
||||
return
|
||||
}
|
||||
//get mounpoint
|
||||
buffer, _ := ioutil.ReadFile(string("/proc/mounts"))
|
||||
buffer, _ := os.ReadFile(string("/proc/mounts"))
|
||||
mounts := strings.Split(string(buffer), "\n")
|
||||
var mountpoints []string
|
||||
for _, line := range mounts {
|
||||
@ -157,9 +157,9 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
|
||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
|
||||
data, _ := ioutil.ReadAll(cmdStderr)
|
||||
data, _ := io.ReadAll(cmdStderr)
|
||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stderr: \"%s\"\n", string(data))
|
||||
data, _ = ioutil.ReadAll(cmdStdout)
|
||||
data, _ = io.ReadAll(cmdStdout)
|
||||
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stdout: \"%s\"\n", string(data))
|
||||
return
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
@ -108,7 +108,7 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
|
||||
return
|
||||
}
|
||||
//get mounpoint
|
||||
buffer, _ := ioutil.ReadFile(string("/proc/mounts"))
|
||||
buffer, _ := os.ReadFile(string("/proc/mounts"))
|
||||
mounts := strings.Split(string(buffer), "\n")
|
||||
var mountpoints []string
|
||||
for _, line := range mounts {
|
||||
@ -149,9 +149,9 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
|
||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
|
||||
data, _ := ioutil.ReadAll(cmdStderr)
|
||||
data, _ := io.ReadAll(cmdStderr)
|
||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stderr: \"%s\"\n", string(data))
|
||||
data, _ = ioutil.ReadAll(cmdStdout)
|
||||
data, _ = io.ReadAll(cmdStdout)
|
||||
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stdout: \"%s\"\n", string(data))
|
||||
return
|
||||
}
|
||||
|
@ -38,6 +38,7 @@ var AvailableCollectors = map[string]MetricCollector{
|
||||
"beegfs_storage": new(BeegfsStorageCollector),
|
||||
"rocm_smi": new(RocmSmiCollector),
|
||||
"smartmon": new(SmartMonCollector),
|
||||
"schedstat": new(SchedstatCollector),
|
||||
}
|
||||
|
||||
// Metric collector manager data structure
|
||||
|
@ -3,7 +3,7 @@ package collectors
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -88,7 +88,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// Read package ID
|
||||
physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id")
|
||||
line, err := ioutil.ReadFile(physicalPackageIDFile)
|
||||
line, err := os.ReadFile(physicalPackageIDFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read physical package ID from file '%s': %v", physicalPackageIDFile, err)
|
||||
}
|
||||
@ -100,7 +100,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// Read core ID
|
||||
coreIDFile := filepath.Join(cpuDir, "topology", "core_id")
|
||||
line, err = ioutil.ReadFile(coreIDFile)
|
||||
line, err = os.ReadFile(coreIDFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read core ID from file '%s': %v", coreIDFile, err)
|
||||
}
|
||||
@ -188,7 +188,7 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
}
|
||||
|
||||
// Read current frequency
|
||||
line, err := ioutil.ReadFile(t.scalingCurFreqFile)
|
||||
line, err := os.ReadFile(t.scalingCurFreqFile)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
|
@ -11,6 +11,7 @@ import (
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
sysconf "github.com/tklauser/go-sysconf"
|
||||
)
|
||||
|
||||
const CPUSTATFILE = `/proc/stat`
|
||||
@ -22,9 +23,11 @@ type CpustatCollectorConfig struct {
|
||||
type CpustatCollector struct {
|
||||
metricCollector
|
||||
config CpustatCollectorConfig
|
||||
lastTimestamp time.Time // Store time stamp of last tick to derive values
|
||||
matches map[string]int
|
||||
cputags map[string]map[string]string
|
||||
nodetags map[string]string
|
||||
olddata map[string]map[string]int64
|
||||
}
|
||||
|
||||
func (m *CpustatCollector) Init(config json.RawMessage) error {
|
||||
@ -76,36 +79,48 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
|
||||
// Pre-generate tags for all CPUs
|
||||
num_cpus := 0
|
||||
m.cputags = make(map[string]map[string]string)
|
||||
m.olddata = make(map[string]map[string]int64)
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
linefields := strings.Fields(line)
|
||||
if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
|
||||
if strings.Compare(linefields[0], "cpu") == 0 {
|
||||
m.olddata["cpu"] = make(map[string]int64)
|
||||
for k, v := range m.matches {
|
||||
m.olddata["cpu"][k], _ = strconv.ParseInt(linefields[v], 0, 64)
|
||||
}
|
||||
} else if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
|
||||
cpustr := strings.TrimLeft(linefields[0], "cpu")
|
||||
cpu, _ := strconv.Atoi(cpustr)
|
||||
m.cputags[linefields[0]] = map[string]string{"type": "hwthread", "type-id": fmt.Sprintf("%d", cpu)}
|
||||
m.olddata[linefields[0]] = make(map[string]int64)
|
||||
for k, v := range m.matches {
|
||||
m.olddata[linefields[0]][k], _ = strconv.ParseInt(linefields[v], 0, 64)
|
||||
}
|
||||
num_cpus++
|
||||
}
|
||||
}
|
||||
m.lastTimestamp = time.Now()
|
||||
m.init = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]string, output chan lp.CCMetric) {
|
||||
func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]string, output chan lp.CCMetric, now time.Time, tsdelta time.Duration) {
|
||||
values := make(map[string]float64)
|
||||
total := 0.0
|
||||
clktck, _ := sysconf.Sysconf(sysconf.SC_CLK_TCK)
|
||||
for match, index := range m.matches {
|
||||
if len(match) > 0 {
|
||||
x, err := strconv.ParseInt(linefields[index], 0, 64)
|
||||
if err == nil {
|
||||
values[match] = float64(x)
|
||||
total += values[match]
|
||||
vdiff := x - m.olddata[linefields[0]][match]
|
||||
m.olddata[linefields[0]][match] = x // Store new value for next run
|
||||
values[match] = float64(vdiff) / float64(tsdelta.Seconds()) / float64(clktck)
|
||||
}
|
||||
}
|
||||
}
|
||||
t := time.Now()
|
||||
|
||||
for name, value := range values {
|
||||
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": (value * 100.0) / total}, t)
|
||||
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value * 100}, now)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@ -117,6 +132,9 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
return
|
||||
}
|
||||
num_cpus := 0
|
||||
now := time.Now()
|
||||
tsdelta := now.Sub(m.lastTimestamp)
|
||||
|
||||
file, err := os.Open(string(CPUSTATFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
@ -128,9 +146,9 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
line := scanner.Text()
|
||||
linefields := strings.Fields(line)
|
||||
if strings.Compare(linefields[0], "cpu") == 0 {
|
||||
m.parseStatLine(linefields, m.nodetags, output)
|
||||
m.parseStatLine(linefields, m.nodetags, output, now, tsdelta)
|
||||
} else if strings.HasPrefix(linefields[0], "cpu") {
|
||||
m.parseStatLine(linefields, m.cputags[linefields[0]], output)
|
||||
m.parseStatLine(linefields, m.cputags[linefields[0]], output, now, tsdelta)
|
||||
num_cpus++
|
||||
}
|
||||
}
|
||||
@ -139,11 +157,13 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
m.nodetags,
|
||||
m.meta,
|
||||
map[string]interface{}{"value": int(num_cpus)},
|
||||
time.Now(),
|
||||
now,
|
||||
)
|
||||
if err == nil {
|
||||
output <- num_cpus_metric
|
||||
}
|
||||
|
||||
m.lastTimestamp = now
|
||||
}
|
||||
|
||||
func (m *CpustatCollector) Close() {
|
||||
|
@ -3,8 +3,8 @@ package collectors
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
@ -53,7 +53,7 @@ func (m *CustomCmdCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
}
|
||||
for _, f := range m.config.Files {
|
||||
_, err = ioutil.ReadFile(f)
|
||||
_, err = os.ReadFile(f)
|
||||
if err == nil {
|
||||
m.files = append(m.files, f)
|
||||
} else {
|
||||
@ -106,7 +106,7 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetri
|
||||
}
|
||||
}
|
||||
for _, file := range m.files {
|
||||
buffer, err := ioutil.ReadFile(file)
|
||||
buffer, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
return
|
||||
|
@ -5,7 +5,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"log"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
@ -118,8 +118,8 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
cmd.Stderr = cmdStderr
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
dataStdErr, _ := ioutil.ReadAll(cmdStderr)
|
||||
dataStdOut, _ := ioutil.ReadAll(cmdStdout)
|
||||
dataStdErr, _ := io.ReadAll(cmdStderr)
|
||||
dataStdOut, _ := io.ReadAll(cmdStdout)
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to execute command \"%s\": %v\n", cmd.String(), err),
|
||||
|
@ -2,7 +2,6 @@ package collectors
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||
@ -19,8 +18,9 @@ import (
|
||||
const IB_BASEPATH = "/sys/class/infiniband/"
|
||||
|
||||
type InfinibandCollectorMetric struct {
|
||||
path string
|
||||
unit string
|
||||
path string
|
||||
unit string
|
||||
scale int64
|
||||
}
|
||||
|
||||
type InfinibandCollectorInfo struct {
|
||||
@ -84,7 +84,7 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
||||
for _, path := range ibDirs {
|
||||
|
||||
// Skip, when no LID is assigned
|
||||
line, err := ioutil.ReadFile(filepath.Join(path, "lid"))
|
||||
line, err := os.ReadFile(filepath.Join(path, "lid"))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
@ -113,10 +113,10 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
||||
// Check access to counter files
|
||||
countersDir := filepath.Join(path, "counters")
|
||||
portCounterFiles := map[string]InfinibandCollectorMetric{
|
||||
"ib_recv": {path: filepath.Join(countersDir, "port_rcv_data"), unit: "bytes"},
|
||||
"ib_xmit": {path: filepath.Join(countersDir, "port_xmit_data"), unit: "bytes"},
|
||||
"ib_recv_pkts": {path: filepath.Join(countersDir, "port_rcv_packets"), unit: "packets"},
|
||||
"ib_xmit_pkts": {path: filepath.Join(countersDir, "port_xmit_packets"), unit: "packets"},
|
||||
"ib_recv": {path: filepath.Join(countersDir, "port_rcv_data"), unit: "bytes", scale: 4},
|
||||
"ib_xmit": {path: filepath.Join(countersDir, "port_xmit_data"), unit: "bytes", scale: 4},
|
||||
"ib_recv_pkts": {path: filepath.Join(countersDir, "port_rcv_packets"), unit: "packets", scale: 1},
|
||||
"ib_xmit_pkts": {path: filepath.Join(countersDir, "port_xmit_packets"), unit: "packets", scale: 1},
|
||||
}
|
||||
for _, counter := range portCounterFiles {
|
||||
err := unix.Access(counter.path, unix.R_OK)
|
||||
@ -174,7 +174,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
||||
for counterName, counterDef := range info.portCounterFiles {
|
||||
|
||||
// Read counter file
|
||||
line, err := ioutil.ReadFile(counterDef.path)
|
||||
line, err := os.ReadFile(counterDef.path)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
@ -191,6 +191,8 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
||||
fmt.Sprintf("Read(): Failed to convert Infininiband metrice %s='%s' to int64: %v", counterName, data, err))
|
||||
continue
|
||||
}
|
||||
// Scale raw value
|
||||
v *= counterDef.scale
|
||||
|
||||
// Send absolut values
|
||||
if m.config.SendAbsoluteValues {
|
||||
|
@ -12,7 +12,6 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"os/signal"
|
||||
@ -154,12 +153,13 @@ func getBaseFreq() float64 {
|
||||
}
|
||||
var freq float64 = math.NaN()
|
||||
for _, f := range files {
|
||||
buffer, err := ioutil.ReadFile(f)
|
||||
buffer, err := os.ReadFile(f)
|
||||
if err == nil {
|
||||
data := strings.Replace(string(buffer), "\n", "", -1)
|
||||
x, err := strconv.ParseInt(data, 0, 64)
|
||||
if err == nil {
|
||||
freq = float64(x) * 1e6
|
||||
freq = float64(x)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -168,11 +168,11 @@ func getBaseFreq() float64 {
|
||||
C.power_init(0)
|
||||
info := C.get_powerInfo()
|
||||
if float64(info.baseFrequency) != 0 {
|
||||
freq = float64(info.baseFrequency) * 1e6
|
||||
freq = float64(info.baseFrequency)
|
||||
}
|
||||
C.power_finalize()
|
||||
}
|
||||
return freq
|
||||
return freq * 1e3
|
||||
}
|
||||
|
||||
func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||
|
@ -7,6 +7,9 @@ The `likwid` collector is probably the most complicated collector. The LIKWID li
|
||||
"likwid": {
|
||||
"force_overwrite" : false,
|
||||
"invalid_to_zero" : false,
|
||||
"liblikwid_path" : "/path/to/liblikwid.so",
|
||||
"accessdaemon_path" : "/folder/that/contains/likwid-accessD",
|
||||
"access_mode" : "direct or accessdaemon or perf_event",
|
||||
"eventsets": [
|
||||
{
|
||||
"events" : {
|
||||
|
@ -3,7 +3,7 @@ package collectors
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@ -72,7 +72,7 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
buffer, err := ioutil.ReadFile(LOADAVGFILE)
|
||||
buffer, err := os.ReadFile(LOADAVGFILE)
|
||||
if err != nil {
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
|
@ -68,7 +68,8 @@ func getStats(filename string) map[string]MemstatStats {
|
||||
} else if len(linefields) == 5 {
|
||||
v, err := strconv.ParseFloat(linefields[3], 64)
|
||||
if err == nil {
|
||||
stats[strings.Trim(linefields[0], ":")] = MemstatStats{
|
||||
cclog.ComponentDebug("getStats", strings.Trim(linefields[2], ":"), v, linefields[4])
|
||||
stats[strings.Trim(linefields[2], ":")] = MemstatStats{
|
||||
value: v,
|
||||
unit: linefields[4],
|
||||
}
|
||||
@ -160,7 +161,6 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
|
||||
|
||||
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
cclog.ComponentPrint(m.name, "Here")
|
||||
return
|
||||
}
|
||||
|
||||
@ -188,16 +188,20 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
unit := ""
|
||||
if totalVal, total := stats["MemTotal"]; total {
|
||||
if freeVal, free := stats["MemFree"]; free {
|
||||
memUsed = totalVal.value - freeVal.value
|
||||
if len(totalVal.unit) > 0 {
|
||||
unit = totalVal.unit
|
||||
} else if len(freeVal.unit) > 0 {
|
||||
unit = freeVal.unit
|
||||
}
|
||||
if bufVal, buffers := stats["Buffers"]; buffers {
|
||||
memUsed -= bufVal.value
|
||||
if len(bufVal.unit) > 0 && len(unit) == 0 {
|
||||
unit = bufVal.unit
|
||||
}
|
||||
if cacheVal, cached := stats["Cached"]; cached {
|
||||
memUsed = totalVal.value - (freeVal.value + bufVal.value + cacheVal.value)
|
||||
if len(totalVal.unit) > 0 {
|
||||
unit = totalVal.unit
|
||||
} else if len(freeVal.unit) > 0 {
|
||||
unit = freeVal.unit
|
||||
} else if len(bufVal.unit) > 0 {
|
||||
unit = bufVal.unit
|
||||
} else if len(cacheVal.unit) > 0 {
|
||||
memUsed -= cacheVal.value
|
||||
if len(cacheVal.unit) > 0 && len(unit) == 0 {
|
||||
unit = cacheVal.unit
|
||||
}
|
||||
}
|
||||
|
@ -66,14 +66,14 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
|
||||
|
||||
ret := rocm_smi.Init()
|
||||
if ret != rocm_smi.STATUS_SUCCESS {
|
||||
err = errors.New("Failed to initialize ROCm SMI library")
|
||||
err = errors.New("failed to initialize ROCm SMI library")
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
numDevs, ret := rocm_smi.NumMonitorDevices()
|
||||
if ret != rocm_smi.STATUS_SUCCESS {
|
||||
err = errors.New("Failed to get number of GPUs from ROCm SMI library")
|
||||
err = errors.New("failed to get number of GPUs from ROCm SMI library")
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
}
|
||||
@ -98,14 +98,14 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
device, ret := rocm_smi.DeviceGetHandleByIndex(i)
|
||||
if ret != rocm_smi.STATUS_SUCCESS {
|
||||
err = fmt.Errorf("Failed to get handle for GPU %d", i)
|
||||
err = fmt.Errorf("failed to get handle for GPU %d", i)
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
pciInfo, ret := rocm_smi.DeviceGetPciInfo(device)
|
||||
if ret != rocm_smi.STATUS_SUCCESS {
|
||||
err = fmt.Errorf("Failed to get PCI information for GPU %d", i)
|
||||
err = fmt.Errorf("failed to get PCI information for GPU %d", i)
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
}
|
||||
|
155
collectors/schedstatMetric.go
Normal file
155
collectors/schedstatMetric.go
Normal file
@ -0,0 +1,155 @@
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"bufio"
|
||||
"time"
|
||||
"os"
|
||||
"strings"
|
||||
"strconv"
|
||||
"math"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
)
|
||||
|
||||
const SCHEDSTATFILE = `/proc/schedstat`
|
||||
|
||||
// These are the fields we read from the JSON configuration
|
||||
type SchedstatCollectorConfig struct {
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
}
|
||||
|
||||
// This contains all variables we need during execution and the variables
|
||||
// defined by metricCollector (name, init, ...)
|
||||
type SchedstatCollector struct {
|
||||
metricCollector
|
||||
config SchedstatCollectorConfig // the configuration structure
|
||||
lastTimestamp time.Time // Store time stamp of last tick to derive values
|
||||
meta map[string]string // default meta information
|
||||
cputags map[string]map[string]string // default tags
|
||||
olddata map[string]map[string]int64 // default tags
|
||||
}
|
||||
|
||||
// Functions to implement MetricCollector interface
|
||||
// Init(...), Read(...), Close()
|
||||
// See: metricCollector.go
|
||||
|
||||
// Init initializes the sample collector
|
||||
// Called once by the collector manager
|
||||
// All tags, meta data tags and metrics that do not change over the runtime should be set here
|
||||
func (m *SchedstatCollector) Init(config json.RawMessage) error {
|
||||
var err error = nil
|
||||
// Always set the name early in Init() to use it in cclog.Component* functions
|
||||
m.name = "SchedstatCollector"
|
||||
// This is for later use, also call it early
|
||||
m.setup()
|
||||
// Tell whether the collector should be run in parallel with others (reading files, ...)
|
||||
// or it should be run serially, mostly for collectors acutally doing measurements
|
||||
// because they should not measure the execution of the other collectors
|
||||
m.parallel = true
|
||||
// Define meta information sent with each metric
|
||||
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
||||
m.meta = map[string]string{"source": m.name, "group": "SCHEDSTAT"}
|
||||
|
||||
// Read in the JSON configuration
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Check input file
|
||||
file, err := os.Open(string(SCHEDSTATFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Pre-generate tags for all CPUs
|
||||
num_cpus := 0
|
||||
m.cputags = make(map[string]map[string]string)
|
||||
m.olddata = make(map[string]map[string]int64)
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
linefields := strings.Fields(line)
|
||||
if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
|
||||
cpustr := strings.TrimLeft(linefields[0], "cpu")
|
||||
cpu, _ := strconv.Atoi(cpustr)
|
||||
running, _ := strconv.ParseInt(linefields[7], 10, 64)
|
||||
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
|
||||
m.cputags[linefields[0]] = map[string]string{"type": "hwthread", "type-id": fmt.Sprintf("%d", cpu)}
|
||||
m.olddata[linefields[0]] = map[string]int64{"running" : running, "waiting" : waiting}
|
||||
num_cpus++
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Save current timestamp
|
||||
m.lastTimestamp = time.Now()
|
||||
|
||||
// Set this flag only if everything is initialized properly, all required files exist, ...
|
||||
m.init = true
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]string, output chan lp.CCMetric, now time.Time, tsdelta time.Duration) {
|
||||
running, _ := strconv.ParseInt(linefields[7], 10, 64)
|
||||
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
|
||||
diff_running := running - m.olddata[linefields[0]]["running"]
|
||||
diff_waiting := waiting - m.olddata[linefields[0]]["waiting"]
|
||||
|
||||
var l_running float64 = float64(diff_running) / tsdelta.Seconds() / (math.Pow(1000, 3))
|
||||
var l_waiting float64 = float64(diff_waiting) / tsdelta.Seconds() / (math.Pow(1000, 3))
|
||||
|
||||
m.olddata[linefields[0]]["running"] = running
|
||||
m.olddata[linefields[0]]["waiting"] = waiting
|
||||
value := l_running + l_waiting
|
||||
|
||||
y, err := lp.New("cpu_load_core", tags, m.meta, map[string]interface{}{"value": value}, now)
|
||||
if err == nil {
|
||||
// Send it to output channel
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
// Read collects all metrics belonging to the sample collector
|
||||
// and sends them through the output channel to the collector manager
|
||||
func (m *SchedstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
|
||||
//timestamps
|
||||
now := time.Now()
|
||||
tsdelta := now.Sub(m.lastTimestamp)
|
||||
|
||||
file, err := os.Open(string(SCHEDSTATFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
linefields := strings.Fields(line)
|
||||
if strings.HasPrefix(linefields[0], "cpu") {
|
||||
m.ParseProcLine(linefields, m.cputags[linefields[0]], output, now, tsdelta)
|
||||
}
|
||||
}
|
||||
|
||||
m.lastTimestamp = now
|
||||
|
||||
}
|
||||
|
||||
// Close metric collector: close network connection, close files, close libraries, ...
|
||||
// Called once by the collector manager
|
||||
func (m *SchedstatCollector) Close() {
|
||||
// Unset flag
|
||||
m.init = false
|
||||
}
|
11
collectors/schedstatMetric.md
Normal file
11
collectors/schedstatMetric.md
Normal file
@ -0,0 +1,11 @@
|
||||
|
||||
## `schedstat` collector
|
||||
```json
|
||||
"schedstat": {
|
||||
}
|
||||
```
|
||||
|
||||
The `schedstat` collector reads data from /proc/schedstat and calculates a load value, separated by hwthread. This might be useful to detect bad cpu pinning on shared nodes etc.
|
||||
|
||||
Metric:
|
||||
* `cpu_load_core`
|
@ -3,7 +3,7 @@ package collectors
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -83,14 +83,14 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// sensor name
|
||||
nameFile := filepath.Join(filepath.Dir(file), "name")
|
||||
name, err := ioutil.ReadFile(nameFile)
|
||||
name, err := os.ReadFile(nameFile)
|
||||
if err == nil {
|
||||
sensor.name = strings.TrimSpace(string(name))
|
||||
}
|
||||
|
||||
// sensor label
|
||||
labelFile := strings.TrimSuffix(file, "_input") + "_label"
|
||||
label, err := ioutil.ReadFile(labelFile)
|
||||
label, err := os.ReadFile(labelFile)
|
||||
if err == nil {
|
||||
sensor.label = strings.TrimSpace(string(label))
|
||||
}
|
||||
@ -117,7 +117,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
// Sensor file
|
||||
_, err = ioutil.ReadFile(file)
|
||||
_, err = os.ReadFile(file)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
@ -139,7 +139,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
||||
// max temperature
|
||||
if m.config.ReportMaxTemp {
|
||||
maxTempFile := strings.TrimSuffix(file, "_input") + "_max"
|
||||
if buffer, err := ioutil.ReadFile(maxTempFile); err == nil {
|
||||
if buffer, err := os.ReadFile(maxTempFile); err == nil {
|
||||
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
|
||||
sensor.maxTempName = strings.Replace(sensor.metricName, "temp", "max_temp", 1)
|
||||
sensor.maxTemp = x / 1000
|
||||
@ -150,7 +150,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
||||
// critical temperature
|
||||
if m.config.ReportCriticalTemp {
|
||||
criticalTempFile := strings.TrimSuffix(file, "_input") + "_crit"
|
||||
if buffer, err := ioutil.ReadFile(criticalTempFile); err == nil {
|
||||
if buffer, err := os.ReadFile(criticalTempFile); err == nil {
|
||||
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
|
||||
sensor.critTempName = strings.Replace(sensor.metricName, "temp", "crit_temp", 1)
|
||||
sensor.critTemp = x / 1000
|
||||
@ -175,7 +175,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
|
||||
for _, sensor := range m.sensors {
|
||||
// Read sensor file
|
||||
buffer, err := ioutil.ReadFile(sensor.file)
|
||||
buffer, err := os.ReadFile(sensor.file)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
|
60
docs/building.md
Normal file
60
docs/building.md
Normal file
@ -0,0 +1,60 @@
|
||||
# Building the cc-metric-collector
|
||||
|
||||
In most cases, a simple `make` in the main folder is enough to get a `cc-metric-collector` binary. It is basically a `go build` but some collectors require additional tasks. There is currently no Golang interface to LIKWID, so it uses `cgo` to create bindings but `cgo` requires the LIKWID header files. Therefore, it checks whether LIKWID is installed and if not it downloads LIKWID and copies the headers.
|
||||
|
||||
## System integration
|
||||
|
||||
The main configuration settings for system integration are pre-defined in `scripts/cc-metric-collector.config`. The file contains the UNIX user and group used for execution, the PID file location and other settings. Adjust it accordingly and copy it to `/etc/default/cc-metric-collector`
|
||||
|
||||
```bash
|
||||
$ install --mode 644 \
|
||||
--owner $CC_USER \
|
||||
--group $CC_GROUP \
|
||||
scripts/cc-metric-collector.config /etc/default/cc-metric-collector
|
||||
$ edit /etc/default/cc-metric-collector
|
||||
```
|
||||
|
||||
### SysVinit and similar
|
||||
|
||||
If you are using a init system based in `/etc/init.d` daemons, you can use the sample `scripts/cc-metric-collector.init`. It reads the basic configuration from `/etc/default/cc-metric-collector`
|
||||
|
||||
```bash
|
||||
$ install --mode 755 \
|
||||
--owner $CC_USER \
|
||||
--group $CC_GROUP \
|
||||
scripts/cc-metric-collector.init /etc/init.d/cc-metric-collector
|
||||
```
|
||||
|
||||
### Systemd
|
||||
|
||||
If you are using `systemd` as init system, you can use the sample systemd service file `scripts/cc-metric-collector.service`, the configuration file `scripts/cc-metric-collector.config`.
|
||||
|
||||
```bash
|
||||
$ install --mode 644 \
|
||||
--owner $CC_USER \
|
||||
--group $CC_GROUP \
|
||||
scripts/cc-metric-collector.service /etc/systemd/system/cc-metric-collector.service
|
||||
$ systemctl enable cc-metric-collector
|
||||
```
|
||||
|
||||
## RPM
|
||||
|
||||
In order to get a RPM packages for cc-metric-collector, just use:
|
||||
|
||||
```bash
|
||||
$ make RPM
|
||||
```
|
||||
|
||||
It uses the RPM SPEC file `scripts/cc-metric-collector.spec` and requires the RPM tools (`rpm` and `rpmspec`) and `git`.
|
||||
|
||||
## DEB
|
||||
|
||||
In order to get very simple Debian packages for cc-metric-collector, just use:
|
||||
|
||||
```bash
|
||||
$ make DEB
|
||||
```
|
||||
|
||||
It uses the DEB control file `scripts/cc-metric-collector.control` and requires `dpkg-deb`, `awk`, `sed` and `git`. It creates only a binary deb package.
|
||||
|
||||
_This option is not well tested and therefore experimental_
|
@ -6,27 +6,52 @@ It is basically a copy of the [InfluxDB line protocol](https://github.com/influx
|
||||
|
||||
```golang
|
||||
type ccMetric struct {
|
||||
name string // same as
|
||||
tags []*influx.Tag // original
|
||||
fields []*influx.Field // Influx
|
||||
tm time.Time // line-protocol
|
||||
meta []*influx.Tag
|
||||
name string // Measurement name
|
||||
meta map[string]string // map of meta data tags
|
||||
tags map[string]string // map of of tags
|
||||
fields map[string]interface{} // map of of fields
|
||||
tm time.Time // timestamp
|
||||
}
|
||||
|
||||
type CCMetric interface {
|
||||
influx.MutableMetric // the same functions as defined by influx.MutableMetric
|
||||
RemoveTag(key string) // this is not published by the original influx.MutableMetric
|
||||
Meta() map[string]string
|
||||
MetaList() []*inlux.Tag
|
||||
AddMeta(key, value string)
|
||||
HasMeta(key string) bool
|
||||
GetMeta(key string) (string, bool)
|
||||
RemoveMeta(key string)
|
||||
ToPoint(metaAsTags map[string]bool) *write.Point // Generate influxDB point for data type ccMetric
|
||||
ToLineProtocol(metaAsTags map[string]bool) string // Generate influxDB line protocol for data type ccMetric
|
||||
String() string // Return line-protocol like string
|
||||
|
||||
Name() string // Get metric name
|
||||
SetName(name string) // Set metric name
|
||||
|
||||
Time() time.Time // Get timestamp
|
||||
SetTime(t time.Time) // Set timestamp
|
||||
|
||||
Tags() map[string]string // Map of tags
|
||||
AddTag(key, value string) // Add a tag
|
||||
GetTag(key string) (value string, ok bool) // Get a tag by its key
|
||||
HasTag(key string) (ok bool) // Check if a tag key is present
|
||||
RemoveTag(key string) // Remove a tag by its key
|
||||
|
||||
Meta() map[string]string // Map of meta data tags
|
||||
AddMeta(key, value string) // Add a meta data tag
|
||||
GetMeta(key string) (value string, ok bool) // Get a meta data tab addressed by its key
|
||||
HasMeta(key string) (ok bool) // Check if a meta data key is present
|
||||
RemoveMeta(key string) // Remove a meta data tag by its key
|
||||
|
||||
Fields() map[string]interface{} // Map of fields
|
||||
AddField(key string, value interface{}) // Add a field
|
||||
GetField(key string) (value interface{}, ok bool) // Get a field addressed by its key
|
||||
HasField(key string) (ok bool) // Check if a field key is present
|
||||
RemoveField(key string) // Remove a field addressed by its key
|
||||
}
|
||||
|
||||
func New(name string, tags map[string]string, meta map[string]string, fields map[string]interface{}, tm time.Time) (CCMetric, error)
|
||||
func FromMetric(other CCMetric) CCMetric
|
||||
func FromInfluxMetric(other lp.Metric) CCMetric
|
||||
```
|
||||
|
||||
The `CCMetric` interface provides the same functions as the `MutableMetric` like `{Add, Remove, Has}{Tag, Field}` and additionally provides `{Add, Remove, Has}Meta`.
|
||||
The `CCMetric` interface provides the same functions as the `MutableMetric` like `{Add, Get, Remove, Has}{Tag, Field}` and additionally provides `{Add, Get, Remove, Has}Meta`.
|
||||
|
||||
The InfluxDB protocol creates a new metric with `influx.New(name, tags, fields, time)` while CCMetric uses `ccMetric.New(name, tags, meta, fields, time)` where `tags` and `meta` are both of type `map[string]string`.
|
||||
|
||||
You can copy a CCMetric with `FromMetric(other CCMetric) CCMetric`. If you get an `influx.Metric` from a function, like the line protocol parser, you can use `FromInfluxMetric(other influx.Metric) CCMetric` to get a CCMetric out of it (see `NatsReceiver` for an example).
|
||||
|
||||
Although the [cc-specifications](https://github.com/ClusterCockpit/cc-specifications/blob/master/interfaces/lineprotocol/README.md) defines that there is only a `value` field for the metric value, the CCMetric still can have multiple values similar to the InfluxDB line protocol.
|
||||
|
@ -50,6 +50,7 @@ type CCMetric interface {
|
||||
GetField(key string) (value interface{}, ok bool) // Get a field addressed by its key
|
||||
HasField(key string) (ok bool) // Check if a field key is present
|
||||
RemoveField(key string) // Remove a field addressed by its key
|
||||
String() string // Return line-protocol like string
|
||||
}
|
||||
|
||||
// String implements the stringer interface for data type ccMetric
|
||||
@ -217,23 +218,26 @@ func New(
|
||||
}
|
||||
|
||||
// FromMetric copies the metric <other>
|
||||
func FromMetric(other ccMetric) CCMetric {
|
||||
func FromMetric(other CCMetric) CCMetric {
|
||||
otags := other.Tags()
|
||||
ometa := other.Meta()
|
||||
ofields := other.Fields()
|
||||
m := &ccMetric{
|
||||
name: other.Name(),
|
||||
tags: make(map[string]string, len(other.tags)),
|
||||
meta: make(map[string]string, len(other.meta)),
|
||||
fields: make(map[string]interface{}, len(other.fields)),
|
||||
tags: make(map[string]string, len(otags)),
|
||||
meta: make(map[string]string, len(ometa)),
|
||||
fields: make(map[string]interface{}, len(ofields)),
|
||||
tm: other.Time(),
|
||||
}
|
||||
|
||||
// deep copy tags, meta data tags and fields
|
||||
for key, value := range other.tags {
|
||||
for key, value := range otags {
|
||||
m.tags[key] = value
|
||||
}
|
||||
for key, value := range other.meta {
|
||||
for key, value := range ometa {
|
||||
m.meta[key] = value
|
||||
}
|
||||
for key, value := range other.fields {
|
||||
for key, value := range ofields {
|
||||
m.fields[key] = value
|
||||
}
|
||||
return m
|
||||
|
@ -5,7 +5,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
@ -84,7 +84,7 @@ func (r *HttpReceiver) ServerHttp(w http.ResponseWriter, req *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(req.Body)
|
||||
body, err := io.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
|
File diff suppressed because it is too large
Load Diff
54
receivers/redfishReceiver.md
Normal file
54
receivers/redfishReceiver.md
Normal file
@ -0,0 +1,54 @@
|
||||
## Redfish receiver
|
||||
|
||||
The Redfish receiver uses the [Redfish (specification)](https://www.dmtf.org/standards/redfish) to query thermal and power metrics. Thermal metrics may include various fan speeds and temperatures. Power metrics may include the current power consumption of various hardware components. It may also include the minimum, maximum and average power consumption of these components in a given time interval. The receiver will poll each configured redfish device once in a given interval. Multiple devices can be accessed in parallel to increase throughput.
|
||||
|
||||
### Configuration structure
|
||||
|
||||
```json
|
||||
{
|
||||
"<redfish receiver name>": {
|
||||
"type": "redfish",
|
||||
"username": "<user A>",
|
||||
"password": "<password A>",
|
||||
"endpoint": "https://%h-bmc",
|
||||
"exclude_metrics": [ "min_consumed_watts" ],
|
||||
"client_config": [
|
||||
{
|
||||
"host_list": [ "<host 1>", "<host 2>" ]
|
||||
},
|
||||
{
|
||||
"host_list": [ "<host 3>", "<host 4>" ]
|
||||
"disable_power_metrics": true
|
||||
},
|
||||
{
|
||||
"host_list": [ "<host 5>" ],
|
||||
"username": "<user B>",
|
||||
"password": "<password B>",
|
||||
"endpoint": "https://%h-BMC",
|
||||
"disable_thermal_metrics": true
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Global settings:
|
||||
|
||||
- `fanout`: Maximum number of simultaneous redfish connections (default: 64)
|
||||
- `interval`: How often the redfish power metrics should be read and send to the sink (default: 30 s)
|
||||
- `http_insecure`: Control whether a client verifies the server's certificate (default: true == do not verify server's certificate)
|
||||
- `http_timeout`: Time limit for requests made by this HTTP client (default: 10 s)
|
||||
|
||||
Global and per redfish device settings (per redfish device settings overwrite the global settings):
|
||||
|
||||
- `disable_power_metrics`: disable collection of power metrics
|
||||
- `disable_processor_metrics`: disable collection of processor metrics
|
||||
- `disable_thermal_metrics`: disable collection of thermal metrics
|
||||
- `exclude_metrics`: list of excluded metrics
|
||||
- `username`: User name to authenticate with
|
||||
- `password`: Password to use for authentication
|
||||
- `endpoint`: URL of the redfish service (placeholder `%h` gets replaced by the hostname)
|
||||
|
||||
Per redfish device settings:
|
||||
|
||||
- `host_list`: List of hosts with the same client configuration
|
@ -6,7 +6,7 @@ CC_HOME=/tmp
|
||||
|
||||
LOG_DIR=/var/log
|
||||
|
||||
DATA_DIR=/var/lib/grafana
|
||||
DATA_DIR=/var/lib/cc-metric-collector
|
||||
|
||||
MAX_OPEN_FILES=10000
|
||||
|
||||
|
16
scripts/cc-metric-collector.deb.rules
Normal file
16
scripts/cc-metric-collector.deb.rules
Normal file
@ -0,0 +1,16 @@
|
||||
|
||||
#!/usr/bin/make -f
|
||||
# You must remove unused comment lines for the released package.
|
||||
#export DH_VERBOSE = 1
|
||||
#export DEB_BUILD_MAINT_OPTIONS = hardening=+all
|
||||
#export DEB_CFLAGS_MAINT_APPEND = -Wall -pedantic
|
||||
#export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed
|
||||
|
||||
%:
|
||||
dh $@
|
||||
|
||||
override_dh_auto_build:
|
||||
make
|
||||
|
||||
override_dh_auto_install:
|
||||
make PREFIX=/usr install
|
@ -19,7 +19,7 @@
|
||||
PATH=/bin:/usr/bin:/sbin:/usr/sbin
|
||||
NAME=cc-metric-collector
|
||||
DESC="ClusterCockpit metric collector"
|
||||
DEFAULT=/etc/default/${NAME}.json
|
||||
DEFAULT=/etc/default/${NAME}
|
||||
|
||||
CC_USER=clustercockpit
|
||||
CC_GROUP=clustercockpit
|
||||
|
Loading…
Reference in New Issue
Block a user