mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-07-20 03:41:42 +02:00
Compare commits
2 Commits
likwid_col
...
likwidener
Author | SHA1 | Date | |
---|---|---|---|
|
813b59b16e | ||
|
303fe1d80f |
16
.github/workflows/Release.yml
vendored
16
.github/workflows/Release.yml
vendored
@@ -45,10 +45,10 @@ jobs:
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
dnf --assumeyes install \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.21.7-1.module_el8+960+4060efbe.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.20.6-2.module_el8+658+f14b2092.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.20.6-1.module_el8+602+8bb8a8d6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -115,10 +115,10 @@ jobs:
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.21.7-1.module_el8+960+4060efbe.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.20.6-2.module_el8+658+f14b2092.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.20.6-1.module_el8+602+8bb8a8d6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
|
68
.github/workflows/runonce.yml
vendored
68
.github/workflows/runonce.yml
vendored
@@ -10,6 +10,32 @@ on:
|
||||
|
||||
jobs:
|
||||
|
||||
#
|
||||
# Job build-1-20
|
||||
# Build on latest Ubuntu using golang version 1.20
|
||||
#
|
||||
build-1-20:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
# Checkout git repository and submodules
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
- name: Setup Golang
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: '1.20'
|
||||
|
||||
- name: Build MetricCollector
|
||||
run: make
|
||||
|
||||
- name: Run MetricCollector once
|
||||
run: ./cc-metric-collector --once --config .github/ci-config.json
|
||||
|
||||
#
|
||||
# Job build-1-21
|
||||
# Build on latest Ubuntu using golang version 1.21
|
||||
@@ -36,32 +62,6 @@ jobs:
|
||||
- name: Run MetricCollector once
|
||||
run: ./cc-metric-collector --once --config .github/ci-config.json
|
||||
|
||||
#
|
||||
# Job build-1-22
|
||||
# Build on latest Ubuntu using golang version 1.22
|
||||
#
|
||||
build-1-22:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
# Checkout git repository and submodules
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
# See: https://github.com/marketplace/actions/setup-go-environment
|
||||
- name: Setup Golang
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: '1.22'
|
||||
|
||||
- name: Build MetricCollector
|
||||
run: make
|
||||
|
||||
- name: Run MetricCollector once
|
||||
run: ./cc-metric-collector --once --config .github/ci-config.json
|
||||
|
||||
#
|
||||
# Build on AlmaLinux 8 using go-toolset
|
||||
#
|
||||
@@ -92,10 +92,10 @@ jobs:
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
dnf --assumeyes install \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.21.7-1.module_el8+960+4060efbe.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.20.6-2.module_el8+658+f14b2092.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.20.6-1.module_el8+602+8bb8a8d6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
@@ -130,10 +130,10 @@ jobs:
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
dnf --assumeyes --disableplugin=subscription-manager install \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.21.7-1.module_el8+960+4060efbe.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.20.6-2.module_el8+658+f14b2092.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.20.6-1.module_el8+602+8bb8a8d6.x86_64.rpm
|
||||
|
||||
- name: RPM build MetricCollector
|
||||
id: rpmbuild
|
||||
|
@@ -1,5 +1,5 @@
|
||||
# LIKWID version
|
||||
LIKWID_VERSION := 5.2.2
|
||||
LIKWID_VERSION := 5.3.0
|
||||
LIKWID_INSTALLED_FOLDER := $(shell dirname $$(which likwid-topology 2>/dev/null) 2>/dev/null)
|
||||
|
||||
LIKWID_FOLDER := $(CURDIR)/likwid
|
||||
|
@@ -41,6 +41,7 @@ var AvailableCollectors = map[string]MetricCollector{
|
||||
"self": new(SelfCollector),
|
||||
"schedstat": new(SchedstatCollector),
|
||||
"nfsiostat": new(NfsIOStatCollector),
|
||||
"likwidenergy": new(LikwidEnergyCollector),
|
||||
}
|
||||
|
||||
// Metric collector manager data structure
|
||||
|
@@ -99,7 +99,10 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetri
|
||||
continue
|
||||
}
|
||||
|
||||
output <- lp.FromInfluxMetric(c)
|
||||
y := lp.FromInfluxMetric(c)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, file := range m.files {
|
||||
@@ -118,7 +121,10 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetri
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
output <- lp.FromInfluxMetric(f)
|
||||
y := lp.FromInfluxMetric(f)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -374,14 +374,6 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
}
|
||||
defer watcher.Close()
|
||||
if len(m.config.LockfilePath) > 0 {
|
||||
if _, err := os.Stat(m.config.LockfilePath); os.IsNotExist(err) {
|
||||
file, err := os.Create(m.config.LockfilePath)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Cannot create lockfile", m.config.LockfilePath, ":", err.Error())
|
||||
return true, err
|
||||
}
|
||||
file.Close()
|
||||
}
|
||||
info, err := os.Stat(m.config.LockfilePath)
|
||||
if err != nil {
|
||||
return true, err
|
||||
@@ -390,9 +382,9 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
if uid != uint32(os.Getuid()) {
|
||||
usr, err := user.LookupId(fmt.Sprint(uid))
|
||||
if err == nil {
|
||||
return true, fmt.Errorf("access to performance counters locked by %s", usr.Username)
|
||||
return true, fmt.Errorf("Access to performance counters locked by %s", usr.Username)
|
||||
} else {
|
||||
return true, fmt.Errorf("access to performance counters locked by %d", uid)
|
||||
return true, fmt.Errorf("Access to performance counters locked by %d", uid)
|
||||
}
|
||||
}
|
||||
err = watcher.Add(m.config.LockfilePath)
|
||||
|
@@ -267,45 +267,3 @@ IPC PMC0/PMC1 -> {
|
||||
```
|
||||
|
||||
The script `scripts/likwid_perfgroup_to_cc_config.py` might help you.
|
||||
|
||||
### Internal structure
|
||||
|
||||
This section describes the internal structure of the `likwid` collector.
|
||||
|
||||
#### At initialization
|
||||
|
||||
After setting the defaults, the configuration is read.
|
||||
|
||||
Based on the configuration, the library is searched using `dlopen` to see whether it makes sense to proceed.
|
||||
|
||||
Next, the user-given metrics are tested to ensure they can be evaluated. For this, it creates a list of all user-given events/counters with the value `1.0` which is provided to the metric evaluator. The same is done for the global metrics by using the metric names with value `1.0`. If the evaluator does not fail, the metric can be evaluated and the collector initialization can proceed.
|
||||
|
||||
A separate thread is started to do the measurement. This is not done using a common goroutine but a real application thread with full control. This is required because LIKWID's access system tracks the processes of the using application and the PID should not change between measurements because that would require teardown and reopening of the access system.
|
||||
|
||||
With the separate thread, the access system is initialized by setting the user-given access mode and adding all hardware threads.
|
||||
|
||||
LIKWID measures per hardware thread in general but only some HW threads read the counters available only e.g. per CPU socket (often memory traffic). For this, the collector gets the system topology through LIKWID and creates different mappings like 'hwthread to list offset' and others. With this, the hardware threads responsible for a topological entity can be determined because those read the counters of the per CPU socket units. These mappings are later used in the measurement phase.
|
||||
|
||||
In the end, we read the base CPU frequency of the system. It may be used in the metric evaluation.
|
||||
|
||||
#### Measurements
|
||||
|
||||
The reading of events is done by the separate application thread.
|
||||
|
||||
It traverses over all configured event sets, creates valid LIKWID eventstrings out of them and pass them to take a measurement. This could be done only once but when the LIKWID lock changes, LIKWID has to be completely reopened to provide access again. With this reopening, the already added event sets are gone.
|
||||
|
||||
LIKWID has it's own locking mechanism using a lock file. But not the content of the file is of interest but the owner. In order to track changes of the file, a `fsnotify` watcher is installed on the file. If the file does not exist, it is created and consequently is owned by the same user as `cc-metric-collector`. The LikwidCollector has to watch the file on it's own because LIKWID does not provide proper error handling for this.
|
||||
|
||||
Each call to the LIKWID library for loading the event set, setting up the counting facilities as well as starting and stopping of the counters is wrapped into lockfile checks to ensure no state change happens. If the file owner changed, the LikwidCollector cannot access the counters anymore, so no further operation can be done and measurment stops.
|
||||
|
||||
Although start/stop would be sufficient, the LikwidCollector performs start, read, wait, read, `getLastResult`, stop. Reason might be "historic" but is not 100% clear anymore. The author failed to document ;)
|
||||
|
||||
#### Metric evaluation
|
||||
|
||||
After each meaurement, the metrics of the event set are directly evaluated. It updates the counter->result mapping with the new measurements, calls the evaluator and generates the `CCMetric` with the user-given settings if it should be published. Each metric name to result calculation is stored for the global metric evaluation, which is done as a final step.
|
||||
|
||||
#### Shutdown
|
||||
|
||||
Since each measurment involves a complete initialize to finalize cycle of the LIKWID library, only the topology module needs to be closed.
|
||||
|
||||
Moreover, the separate application thread is stopped.
|
281
collectors/likwidenergyMetric.go
Normal file
281
collectors/likwidenergyMetric.go
Normal file
@@ -0,0 +1,281 @@
|
||||
package collectors
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -I./likwid
|
||||
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
#include <stdlib.h>
|
||||
#include <likwid.h>
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||
)
|
||||
|
||||
const (
|
||||
LIKWIDENERGY_LIB_NAME = "liblikwid.so"
|
||||
LIKWIDENERGY_LIB_DL_FLAGS = dl.RTLD_LAZY | dl.RTLD_GLOBAL
|
||||
LIKWIDENERGY_DEF_ACCESSMODE = "direct"
|
||||
LIKWIDENERGY_DEF_LOCKFILE = "/var/run/likwid.lock"
|
||||
)
|
||||
|
||||
// These are the fields we read from the JSON configuration
|
||||
type LikwidEnergyCollectorConfig struct {
|
||||
AccessMode string `json:"access_mode,omitempty"`
|
||||
DaemonPath string `json:"accessdaemon_path,omitempty"`
|
||||
LibraryPath string `json:"liblikwid_path,omitempty"`
|
||||
LockfilePath string `json:"lockfile_path,omitempty"`
|
||||
SendDiff bool `json:"send_difference,omitempty"`
|
||||
SendAbs bool `json:"send_absolute,omitempty"`
|
||||
}
|
||||
|
||||
type LikwidEnergyDomainEntry struct {
|
||||
readcpu int
|
||||
value uint32
|
||||
total uint64
|
||||
tags map[string]string
|
||||
}
|
||||
|
||||
type LikwidEnergyDomain struct {
|
||||
values map[int]LikwidEnergyDomainEntry
|
||||
granularity string
|
||||
metricname string
|
||||
domaintype int
|
||||
energyUnit float64
|
||||
}
|
||||
|
||||
// This contains all variables we need during execution and the variables
|
||||
// defined by metricCollector (name, init, ...)
|
||||
type LikwidEnergyCollector struct {
|
||||
metricCollector
|
||||
config LikwidEnergyCollectorConfig // the configuration structure
|
||||
meta map[string]string // default meta information
|
||||
tags map[string]string // default tags
|
||||
domains map[int]LikwidEnergyDomain
|
||||
}
|
||||
|
||||
// Init initializes the sample collector
|
||||
// Called once by the collector manager
|
||||
// All tags, meta data tags and metrics that do not change over the runtime should be set here
|
||||
func (m *LikwidEnergyCollector) Init(config json.RawMessage) error {
|
||||
var err error = nil
|
||||
// Always set the name early in Init() to use it in cclog.Component* functions
|
||||
m.name = "LikwidEnergyCollector"
|
||||
// This is for later use, also call it early
|
||||
m.setup()
|
||||
// Tell whether the collector should be run in parallel with others (reading files, ...)
|
||||
// or it should be run serially, mostly for collectors actually doing measurements
|
||||
// because they should not measure the execution of the other collectors
|
||||
m.parallel = true
|
||||
// Define meta information sent with each metric
|
||||
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
||||
m.meta = map[string]string{"source": m.name, "group": "LIKWID", "unit": "Joules"}
|
||||
// Define tags sent with each metric
|
||||
// The 'type' tag is always needed, it defines the granularity of the metric
|
||||
// node -> whole system
|
||||
// socket -> CPU socket (requires socket ID as 'type-id' tag)
|
||||
// die -> CPU die (requires CPU die ID as 'type-id' tag)
|
||||
// memoryDomain -> NUMA domain (requires NUMA domain ID as 'type-id' tag)
|
||||
// llc -> Last level cache (requires last level cache ID as 'type-id' tag)
|
||||
// core -> single CPU core that may consist of multiple hardware threads (SMT) (requires core ID as 'type-id' tag)
|
||||
// hwthtread -> single CPU hardware thread (requires hardware thread ID as 'type-id' tag)
|
||||
// accelerator -> A accelerator device like GPU or FPGA (requires an accelerator ID as 'type-id' tag)
|
||||
m.tags = map[string]string{}
|
||||
// Read in the JSON configuration
|
||||
m.config.AccessMode = LIKWID_DEF_ACCESSMODE
|
||||
m.config.LibraryPath = LIKWID_LIB_NAME
|
||||
m.config.LockfilePath = LIKWID_DEF_LOCKFILE
|
||||
m.config.SendAbs = true
|
||||
m.config.SendDiff = true
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
cclog.ComponentDebug(m.name, "Opening ", m.config.LibraryPath)
|
||||
lib := dl.New(m.config.LibraryPath, LIKWID_LIB_DL_FLAGS)
|
||||
if lib == nil {
|
||||
return fmt.Errorf("error instantiating DynamicLibrary for %s", m.config.LibraryPath)
|
||||
}
|
||||
err = lib.Open()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening %s: %v", m.config.LibraryPath, err)
|
||||
}
|
||||
cclog.ComponentDebug(m.name, "Init topology ", m.config.AccessMode)
|
||||
ret := C.topology_init()
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("error initializing topology: %d", ret)
|
||||
}
|
||||
cclog.ComponentDebug(m.name, "Setting accessmode ", m.config.AccessMode)
|
||||
switch m.config.AccessMode {
|
||||
case "direct":
|
||||
C.HPMmode(0)
|
||||
case "accessdaemon":
|
||||
if len(m.config.DaemonPath) > 0 {
|
||||
p := os.Getenv("PATH")
|
||||
os.Setenv("PATH", m.config.DaemonPath+":"+p)
|
||||
}
|
||||
C.HPMmode(1)
|
||||
retCode := C.HPMinit()
|
||||
if retCode != 0 {
|
||||
err := fmt.Errorf("C.HPMinit() failed with return code %v", retCode)
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
}
|
||||
}
|
||||
initCpus := make([]int, 0)
|
||||
ret = C.HPMaddThread(0)
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("error initializing access: %d", ret)
|
||||
}
|
||||
initCpus = append(initCpus, 0)
|
||||
|
||||
cinfo := C.get_cpuInfo()
|
||||
domainnames := make(map[int]string)
|
||||
if cinfo.isIntel == C.int(1) {
|
||||
domainnames[0] = "pkg"
|
||||
domainnames[1] = "pp0"
|
||||
domainnames[2] = "pp1"
|
||||
domainnames[3] = "dram"
|
||||
domainnames[4] = "platform"
|
||||
} else {
|
||||
switch cinfo.family {
|
||||
case 0x17:
|
||||
domainnames[0] = "core"
|
||||
domainnames[1] = "pkg"
|
||||
case 0x19:
|
||||
switch cinfo.model {
|
||||
case 0x01, 0x21, 0x50:
|
||||
domainnames[0] = "core"
|
||||
domainnames[1] = "pkg"
|
||||
case 0x61, 0x11:
|
||||
domainnames[0] = "core"
|
||||
domainnames[1] = "l3"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set up everything that the collector requires during the Read() execution
|
||||
// Check files required, test execution of some commands, create data structure
|
||||
// for all topological entities (sockets, NUMA domains, ...)
|
||||
// Return some useful error message in case of any failures
|
||||
cclog.ComponentDebug(m.name, "Initializing Power module")
|
||||
ret = C.power_init(0)
|
||||
if ret == C.int(0) {
|
||||
cclog.ComponentPrint(m.name, "No RAPL support")
|
||||
}
|
||||
m.domains = make(map[int]LikwidEnergyDomain)
|
||||
Pinfo := C.get_powerInfo()
|
||||
for i := 0; i < int(Pinfo.numDomains); i++ {
|
||||
d := Pinfo.domains[C.int(i)]
|
||||
name := domainnames[int(d._type)]
|
||||
domain := LikwidEnergyDomain{
|
||||
values: make(map[int]LikwidEnergyDomainEntry),
|
||||
metricname: fmt.Sprintf("likwidenergy_%s", strings.ToLower(name)),
|
||||
granularity: "socket",
|
||||
domaintype: int(d._type),
|
||||
energyUnit: float64(C.power_getEnergyUnit(C.int(d._type))),
|
||||
}
|
||||
if name == "core" {
|
||||
domain.granularity = "core"
|
||||
}
|
||||
|
||||
for _, c := range topo.GetTypeList(domain.granularity) {
|
||||
clist := topo.GetSocketHwthreads(c)
|
||||
if len(clist) > 0 {
|
||||
var cur C.PowerData
|
||||
if _, ok := intArrayContains(initCpus, clist[0]); !ok {
|
||||
initCpus = append(initCpus, clist[0])
|
||||
C.HPMaddThread(C.int(clist[0]))
|
||||
}
|
||||
cclog.ComponentDebug(m.name, "Reading current value on CPU ", clist[0], " for ", domain.metricname, "on", domain.granularity, c)
|
||||
ret = C.power_start(&cur, C.int(clist[0]), C.PowerType(domain.domaintype))
|
||||
cclog.ComponentDebug(m.name, "Reading ", uint64(cur.before))
|
||||
if ret == 0 {
|
||||
domain.values[c] = LikwidEnergyDomainEntry{
|
||||
readcpu: clist[0],
|
||||
value: uint32(cur.before),
|
||||
total: uint64(cur.before),
|
||||
tags: map[string]string{
|
||||
"type": domain.granularity,
|
||||
"type-id": fmt.Sprintf("%d", c),
|
||||
},
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
cclog.ComponentDebug(m.name, "Adding domain ", domain.metricname, " with granularity ", domain.granularity)
|
||||
m.domains[domain.domaintype] = domain
|
||||
}
|
||||
|
||||
// Set this flag only if everything is initialized properly, all required files exist, ...
|
||||
m.init = true
|
||||
return err
|
||||
}
|
||||
|
||||
// Read collects all metrics belonging to the sample collector
|
||||
// and sends them through the output channel to the collector manager
|
||||
func (m *LikwidEnergyCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
// Create a sample metric
|
||||
timestamp := time.Now()
|
||||
|
||||
for dt, domain := range m.domains {
|
||||
for i, entry := range domain.values {
|
||||
var cur C.PowerData
|
||||
ret := C.power_start(&cur, C.int(entry.readcpu), C.PowerType(dt))
|
||||
if ret == 0 {
|
||||
now := uint32(cur.before)
|
||||
diff := now - entry.value
|
||||
if now < entry.value {
|
||||
diff = (^uint32(0)) - entry.value
|
||||
diff += now
|
||||
}
|
||||
if m.config.SendDiff {
|
||||
y, err := lp.New(domain.metricname, entry.tags, m.meta, map[string]interface{}{"value": float64(diff) * domain.energyUnit}, timestamp)
|
||||
if err == nil {
|
||||
for k, v := range m.tags {
|
||||
y.AddTag(k, v)
|
||||
}
|
||||
// Send it to output channel
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if m.config.SendAbs {
|
||||
total := float64(entry.total + uint64(diff))
|
||||
|
||||
y, err := lp.New(fmt.Sprintf("%s_abs", domain.metricname), entry.tags, m.meta, map[string]interface{}{"value": total * domain.energyUnit}, timestamp)
|
||||
if err == nil {
|
||||
for k, v := range m.tags {
|
||||
y.AddTag(k, v)
|
||||
}
|
||||
// Send it to output channel
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
entry.value = uint32(cur.before)
|
||||
entry.total += uint64(diff)
|
||||
domain.values[i] = entry
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Close metric collector: close network connection, close files, close libraries, ...
|
||||
// Called once by the collector manager
|
||||
func (m *LikwidEnergyCollector) Close() {
|
||||
C.power_finalize()
|
||||
C.topology_finalize()
|
||||
// Unset flag
|
||||
m.init = false
|
||||
}
|
20
collectors/likwidenergyMetric.md
Normal file
20
collectors/likwidenergyMetric.md
Normal file
@@ -0,0 +1,20 @@
|
||||
## `likwidenergy` collector
|
||||
|
||||
In contrast to the more general [`likwid` collector](./likwidMetric.md), this collector just reads the RAPL counters to provide energy metrics. In contrast to the `likwid` collector, this collector keeps the energy counters running the whole time and not just of a measurement interval. It covers all RAPL domains (`PKG`, `DRAM`, `PP0`, `PP1`, ...). Depending whether the domain is per socket, per L3 segment or per core, metrics are read and send.
|
||||
|
||||
```json
|
||||
{
|
||||
"likwidenergy" : {
|
||||
"liblikwid_path" : "/path/to/liblikwid.so",
|
||||
"accessdaemon_path" : "/folder/that/contains/likwid-accessD",
|
||||
"access_mode" : "direct or accessdaemon",
|
||||
"send_difference": true,
|
||||
"send_absolute": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The first three entries (`liblikwid_path`, `accessdaemon_path` and `access_mode`) are required to set up the access to the RAPL counters. The `access_mode` = `perf_event` is not supported at the moment.
|
||||
|
||||
With `send_differences` the difference to the last measurement is provided to the system. With `send_absolute`, the absolute value since start of the system is submitted as metric. It reads the counter at initialization and then updates the value after each measurement.
|
||||
|
24
go.mod
24
go.mod
@@ -1,6 +1,6 @@
|
||||
module github.com/ClusterCockpit/cc-metric-collector
|
||||
|
||||
go 1.21
|
||||
go 1.20
|
||||
|
||||
require (
|
||||
github.com/ClusterCockpit/cc-units v0.4.0
|
||||
@@ -12,30 +12,30 @@ require (
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.13.0
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1
|
||||
github.com/nats-io/nats.go v1.33.1
|
||||
github.com/prometheus/client_golang v1.19.0
|
||||
github.com/nats-io/nats.go v1.32.0
|
||||
github.com/prometheus/client_golang v1.18.0
|
||||
github.com/stmcginnis/gofish v0.15.0
|
||||
github.com/tklauser/go-sysconf v0.3.13
|
||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1
|
||||
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225
|
||||
golang.org/x/sys v0.18.0
|
||||
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a
|
||||
golang.org/x/sys v0.16.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/klauspost/compress v1.17.7 // indirect
|
||||
github.com/google/uuid v1.5.0 // indirect
|
||||
github.com/klauspost/compress v1.17.4 // indirect
|
||||
github.com/nats-io/nkeys v0.4.7 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
||||
github.com/prometheus/client_model v0.6.0 // indirect
|
||||
github.com/prometheus/common v0.49.0 // indirect
|
||||
github.com/prometheus/client_model v0.5.0 // indirect
|
||||
github.com/prometheus/common v0.46.0 // indirect
|
||||
github.com/prometheus/procfs v0.12.0 // indirect
|
||||
github.com/shopspring/decimal v1.3.1 // indirect
|
||||
github.com/tklauser/numcpus v0.7.0 // indirect
|
||||
golang.org/x/crypto v0.21.0 // indirect
|
||||
golang.org/x/net v0.22.0 // indirect
|
||||
google.golang.org/protobuf v1.33.0 // indirect
|
||||
golang.org/x/crypto v0.18.0 // indirect
|
||||
golang.org/x/net v0.20.0 // indirect
|
||||
google.golang.org/protobuf v1.32.0 // indirect
|
||||
)
|
||||
|
58
go.sum
58
go.sum
@@ -7,6 +7,7 @@ github.com/NVIDIA/go-nvml v0.12.0-2 h1:Sg239yy7jmopu/cuvYauoMj9fOpcGMngxVxxS1EBX
|
||||
github.com/NVIDIA/go-nvml v0.12.0-2/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0=
|
||||
github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E=
|
||||
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
||||
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
||||
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
||||
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
|
||||
@@ -18,16 +19,20 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj
|
||||
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
|
||||
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
|
||||
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
|
||||
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
|
||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
|
||||
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.13.0 h1:ioBbLmR5NMbAjP4UVA5r9b5xGjpABD7j65pI8kFphDM=
|
||||
@@ -35,20 +40,24 @@ github.com/influxdata/influxdb-client-go/v2 v2.13.0/go.mod h1:k+spCbt9hcvqvUiz0s
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
|
||||
github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98=
|
||||
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig=
|
||||
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo=
|
||||
github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY=
|
||||
github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY=
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
|
||||
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
||||
github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg=
|
||||
github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
|
||||
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
|
||||
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/nats-io/nats.go v1.33.1 h1:8TxLZZ/seeEfR97qV0/Bl939tpDnt2Z2fK3HkPypj70=
|
||||
github.com/nats-io/nats.go v1.33.1/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8=
|
||||
github.com/nats-io/nats.go v1.32.0 h1:Bx9BZS+aXYlxW08k8Gd3yR2s73pV5XSoAQUyp1Kwvp0=
|
||||
github.com/nats-io/nats.go v1.32.0/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8=
|
||||
github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI=
|
||||
github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc=
|
||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||
@@ -56,15 +65,18 @@ github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OS
|
||||
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
||||
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
|
||||
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU=
|
||||
github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k=
|
||||
github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos=
|
||||
github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8=
|
||||
github.com/prometheus/common v0.49.0 h1:ToNTdK4zSnPVJmh698mGFkDor9wBI/iGaJy5dbH1EgI=
|
||||
github.com/prometheus/common v0.49.0/go.mod h1:Kxm+EULxRbUkjGU6WFsQqo3ORzB4tyKvlWFOE9mB2sE=
|
||||
github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk=
|
||||
github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA=
|
||||
github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw=
|
||||
github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI=
|
||||
github.com/prometheus/common v0.46.0 h1:doXzt5ybi1HBKpsZOL0sSkaNHJJqkyfEWZGGqqScV0Y=
|
||||
github.com/prometheus/common v0.46.0/go.mod h1:Tp0qkxpb9Jsg54QMe+EAmqXkSV7Evdy1BTn+g2pa/hQ=
|
||||
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
|
||||
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
|
||||
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
||||
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
|
||||
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
|
||||
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
||||
@@ -76,6 +88,7 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/tklauser/go-sysconf v0.3.13 h1:GBUpcahXSpR2xN01jhkNAbTLRk2Yzgggk8IM08lq3r4=
|
||||
github.com/tklauser/go-sysconf v0.3.13/go.mod h1:zwleP4Q4OehZHGn4CYZDipCgg9usW5IJePewFCGVEa0=
|
||||
@@ -83,20 +96,21 @@ github.com/tklauser/numcpus v0.7.0 h1:yjuerZP127QG9m5Zh/mSO4wqurYil27tHrqwRoRjpr
|
||||
github.com/tklauser/numcpus v0.7.0/go.mod h1:bb6dMVcj8A42tSE7i32fsIUCbQNllK5iDguyOZRUzAY=
|
||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 h1:P7S/GeHBAFEZIYp0ePPs2kHXoazz8q2KsyxHyQVGCJg=
|
||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1/go.mod h1:9CWpnTUmlQkfdpdutA1nNf4iE5lAVt3QZOu0Z6hahBE=
|
||||
golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
|
||||
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
|
||||
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ=
|
||||
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc=
|
||||
golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
|
||||
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||
golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc=
|
||||
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
|
||||
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA=
|
||||
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08=
|
||||
golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo=
|
||||
golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
|
||||
golang.org/x/sys v0.0.0-20210122093101-04d7465088b8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
|
||||
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
|
||||
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
|
||||
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||
google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I=
|
||||
google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
@@ -6,7 +6,6 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"maps"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -33,14 +32,10 @@ type RedfishReceiverClientConfig struct {
|
||||
|
||||
doPowerMetric bool
|
||||
doProcessorMetrics bool
|
||||
doSensors bool
|
||||
doThermalMetrics bool
|
||||
|
||||
skipProcessorMetricsURL map[string]bool
|
||||
|
||||
// readSensorURLs stores for each chassis ID a list of sensor URLs to read
|
||||
readSensorURLs map[string][]string
|
||||
|
||||
gofish gofish.ClientConfig
|
||||
}
|
||||
|
||||
@@ -61,226 +56,7 @@ type RedfishReceiver struct {
|
||||
wg sync.WaitGroup // wait group for redfish receiver
|
||||
}
|
||||
|
||||
// deleteEmptyTags removes tags or meta data tags with empty value
|
||||
func deleteEmptyTags(tags map[string]string) {
|
||||
maps.DeleteFunc(
|
||||
tags,
|
||||
func(key string, value string) bool {
|
||||
return value == ""
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// setMetricValue sets the value entry in the fields map
|
||||
func setMetricValue(value any) map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"value": value,
|
||||
}
|
||||
}
|
||||
|
||||
// sendMetric sends the metric through the sink channel
|
||||
func (r *RedfishReceiver) sendMetric(name string, tags map[string]string, meta map[string]string, value any, timestamp time.Time) {
|
||||
|
||||
deleteEmptyTags(tags)
|
||||
deleteEmptyTags(meta)
|
||||
y, err := lp.New(name, tags, meta, setMetricValue(value), timestamp)
|
||||
if err == nil {
|
||||
r.sink <- y
|
||||
}
|
||||
}
|
||||
|
||||
// readSensors reads sensors from a redfish device
|
||||
// See: https://redfish.dmtf.org/schemas/v1/Sensor.json
|
||||
// Redfish URI: /redfish/v1/Chassis/{ChassisId}/Sensors/{SensorId}
|
||||
func (r *RedfishReceiver) readSensors(
|
||||
clientConfig *RedfishReceiverClientConfig,
|
||||
chassis *redfish.Chassis) error {
|
||||
|
||||
writeTemperatureSensor := func(sensor *redfish.Sensor) {
|
||||
tags := map[string]string{
|
||||
"hostname": clientConfig.Hostname,
|
||||
"type": "node",
|
||||
// ChassisType shall indicate the physical form factor for the type of chassis
|
||||
"chassis_typ": string(chassis.ChassisType),
|
||||
// Chassis name
|
||||
"chassis_name": chassis.Name,
|
||||
// ID uniquely identifies the resource
|
||||
"sensor_id": sensor.ID,
|
||||
// The area or device to which this sensor measurement applies
|
||||
"temperature_physical_context": string(sensor.PhysicalContext),
|
||||
// Name
|
||||
"temperature_name": sensor.Name,
|
||||
}
|
||||
|
||||
// Set meta data tags
|
||||
meta := map[string]string{
|
||||
"source": r.name,
|
||||
"group": "Temperature",
|
||||
"unit": "degC",
|
||||
}
|
||||
|
||||
r.sendMetric("temperature", tags, meta, sensor.Reading, time.Now())
|
||||
}
|
||||
|
||||
writeFanSpeedSensor := func(sensor *redfish.Sensor) {
|
||||
tags := map[string]string{
|
||||
"hostname": clientConfig.Hostname,
|
||||
"type": "node",
|
||||
// ChassisType shall indicate the physical form factor for the type of chassis
|
||||
"chassis_typ": string(chassis.ChassisType),
|
||||
// Chassis name
|
||||
"chassis_name": chassis.Name,
|
||||
// ID uniquely identifies the resource
|
||||
"sensor_id": sensor.ID,
|
||||
// The area or device to which this sensor measurement applies
|
||||
"fan_physical_context": string(sensor.PhysicalContext),
|
||||
// Name
|
||||
"fan_name": sensor.Name,
|
||||
}
|
||||
|
||||
// Set meta data tags
|
||||
meta := map[string]string{
|
||||
"source": r.name,
|
||||
"group": "FanSpeed",
|
||||
"unit": string(sensor.ReadingUnits),
|
||||
}
|
||||
|
||||
r.sendMetric("fan_speed", tags, meta, sensor.Reading, time.Now())
|
||||
}
|
||||
|
||||
writePowerSensor := func(sensor *redfish.Sensor) {
|
||||
// Set tags
|
||||
tags := map[string]string{
|
||||
"hostname": clientConfig.Hostname,
|
||||
"type": "node",
|
||||
// ChassisType shall indicate the physical form factor for the type of chassis
|
||||
"chassis_typ": string(chassis.ChassisType),
|
||||
// Chassis name
|
||||
"chassis_name": chassis.Name,
|
||||
// ID uniquely identifies the resource
|
||||
"sensor_id": sensor.ID,
|
||||
// The area or device to which this sensor measurement applies
|
||||
"power_physical_context": string(sensor.PhysicalContext),
|
||||
// Name
|
||||
"power_name": sensor.Name,
|
||||
}
|
||||
|
||||
// Set meta data tags
|
||||
meta := map[string]string{
|
||||
"source": r.name,
|
||||
"group": "Energy",
|
||||
"unit": "watts",
|
||||
}
|
||||
|
||||
r.sendMetric("power", tags, meta, sensor.Reading, time.Now())
|
||||
}
|
||||
|
||||
if _, ok := clientConfig.readSensorURLs[chassis.ID]; !ok {
|
||||
// First time run of read sensors for this chassis
|
||||
|
||||
clientConfig.readSensorURLs[chassis.ID] = make([]string, 0)
|
||||
|
||||
// Get sensor information for this chassis
|
||||
sensors, err := chassis.Sensors()
|
||||
if err != nil {
|
||||
return fmt.Errorf("readSensors: chassis.Sensors() failed: %v", err)
|
||||
}
|
||||
|
||||
// Skip empty sensors information
|
||||
if sensors == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, sensor := range sensors {
|
||||
|
||||
// Skip all sensors which are not in enabled state or which are unhealthy
|
||||
if sensor.Status.State != common.EnabledState || sensor.Status.Health != common.OKHealth {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip sensors with missing readings units or type
|
||||
if sensor.ReadingUnits == "" || sensor.ReadingType == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Power readings
|
||||
if (sensor.ReadingType == redfish.PowerReadingType && sensor.ReadingUnits == "Watts") ||
|
||||
(sensor.ReadingType == redfish.CurrentReadingType && sensor.ReadingUnits == "Watts") {
|
||||
if clientConfig.isExcluded["power"] {
|
||||
continue
|
||||
}
|
||||
|
||||
clientConfig.readSensorURLs[chassis.ID] = append(clientConfig.readSensorURLs[chassis.ID], sensor.ODataID)
|
||||
writePowerSensor(sensor)
|
||||
continue
|
||||
}
|
||||
|
||||
// Fan speed readings
|
||||
if (sensor.ReadingType == redfish.AirFlowReadingType && sensor.ReadingUnits == "RPM") ||
|
||||
(sensor.ReadingType == redfish.AirFlowReadingType && sensor.ReadingUnits == "Percent") {
|
||||
// Skip, when fan_speed metric is excluded
|
||||
if clientConfig.isExcluded["fan_speed"] {
|
||||
continue
|
||||
}
|
||||
|
||||
clientConfig.readSensorURLs[chassis.ID] = append(clientConfig.readSensorURLs[chassis.ID], sensor.ODataID)
|
||||
writeFanSpeedSensor(sensor)
|
||||
}
|
||||
|
||||
// Temperature readings
|
||||
if sensor.ReadingType == redfish.TemperatureReadingType && sensor.ReadingUnits == "C" {
|
||||
if clientConfig.isExcluded["temperature"] {
|
||||
continue
|
||||
}
|
||||
|
||||
clientConfig.readSensorURLs[chassis.ID] = append(clientConfig.readSensorURLs[chassis.ID], sensor.ODataID)
|
||||
writeTemperatureSensor(sensor)
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
common.CollectCollection(
|
||||
func(uri string) {
|
||||
sensor, err := redfish.GetSensor(chassis.GetClient(), uri)
|
||||
if err != nil {
|
||||
cclog.ComponentError(r.name, "redfish.GetSensor() for uri '", uri, "' failed")
|
||||
}
|
||||
|
||||
// Power readings
|
||||
if (sensor.ReadingType == redfish.PowerReadingType && sensor.ReadingUnits == "Watts") ||
|
||||
(sensor.ReadingType == redfish.CurrentReadingType && sensor.ReadingUnits == "Watts") {
|
||||
|
||||
writePowerSensor(sensor)
|
||||
return
|
||||
}
|
||||
|
||||
// Fan speed readings
|
||||
if (sensor.ReadingType == redfish.AirFlowReadingType && sensor.ReadingUnits == "RPM") ||
|
||||
(sensor.ReadingType == redfish.AirFlowReadingType && sensor.ReadingUnits == "Percent") {
|
||||
|
||||
writeFanSpeedSensor(sensor)
|
||||
return
|
||||
}
|
||||
|
||||
// Temperature readings
|
||||
if sensor.ReadingType == redfish.TemperatureReadingType && sensor.ReadingUnits == "C" {
|
||||
|
||||
writeTemperatureSensor(sensor)
|
||||
return
|
||||
}
|
||||
},
|
||||
clientConfig.readSensorURLs[chassis.ID])
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// readThermalMetrics reads thermal metrics from a redfish device
|
||||
// See: https://redfish.dmtf.org/schemas/v1/Thermal.json
|
||||
// Redfish URI: /redfish/v1/Chassis/{ChassisId}/Thermal
|
||||
// -> deprecated in favor of the ThermalSubsystem schema
|
||||
// -> on Lenovo servers /redfish/v1/Chassis/{ChassisId}/ThermalSubsystem/ThermalMetrics links to /redfish/v1/Chassis/{ChassisId}/Sensors/{SensorId}
|
||||
func (r *RedfishReceiver) readThermalMetrics(
|
||||
clientConfig *RedfishReceiverClientConfig,
|
||||
chassis *redfish.Chassis) error {
|
||||
@@ -330,6 +106,13 @@ func (r *RedfishReceiver) readThermalMetrics(
|
||||
"temperature_name": temperature.Name,
|
||||
}
|
||||
|
||||
// Delete empty tags
|
||||
for key, value := range tags {
|
||||
if value == "" {
|
||||
delete(tags, key)
|
||||
}
|
||||
}
|
||||
|
||||
// Set meta data tags
|
||||
meta := map[string]string{
|
||||
"source": r.name,
|
||||
@@ -340,7 +123,14 @@ func (r *RedfishReceiver) readThermalMetrics(
|
||||
// ReadingCelsius shall be the current value of the temperature sensor's reading.
|
||||
value := temperature.ReadingCelsius
|
||||
|
||||
r.sendMetric("temperature", tags, meta, value, timestamp)
|
||||
y, err := lp.New("temperature", tags, meta,
|
||||
map[string]interface{}{
|
||||
"value": value,
|
||||
},
|
||||
timestamp)
|
||||
if err == nil {
|
||||
r.sink <- y
|
||||
}
|
||||
}
|
||||
|
||||
for _, fan := range thermal.Fans {
|
||||
@@ -374,6 +164,13 @@ func (r *RedfishReceiver) readThermalMetrics(
|
||||
"fan_name": fan.Name,
|
||||
}
|
||||
|
||||
// Delete empty tags
|
||||
for key, value := range tags {
|
||||
if value == "" {
|
||||
delete(tags, key)
|
||||
}
|
||||
}
|
||||
|
||||
// Set meta data tags
|
||||
meta := map[string]string{
|
||||
"source": r.name,
|
||||
@@ -381,16 +178,23 @@ func (r *RedfishReceiver) readThermalMetrics(
|
||||
"unit": string(fan.ReadingUnits),
|
||||
}
|
||||
|
||||
r.sendMetric("fan_speed", tags, meta, fan.Reading, timestamp)
|
||||
// Reading shall be the current value of the fan sensor's reading
|
||||
value := fan.Reading
|
||||
|
||||
y, err := lp.New("fan_speed", tags, meta,
|
||||
map[string]interface{}{
|
||||
"value": value,
|
||||
},
|
||||
timestamp)
|
||||
if err == nil {
|
||||
r.sink <- y
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// readPowerMetrics reads power metrics from a redfish device
|
||||
// See: https://redfish.dmtf.org/schemas/v1/Power.json
|
||||
// Redfish URI: /redfish/v1/Chassis/{ChassisId}/Power
|
||||
// -> deprecated in favor of the PowerSubsystem schema
|
||||
func (r *RedfishReceiver) readPowerMetrics(
|
||||
clientConfig *RedfishReceiverClientConfig,
|
||||
chassis *redfish.Chassis) error {
|
||||
@@ -470,6 +274,13 @@ func (r *RedfishReceiver) readPowerMetrics(
|
||||
"power_control_name": pc.Name,
|
||||
}
|
||||
|
||||
// Delete empty tags
|
||||
for key, value := range tags {
|
||||
if value == "" {
|
||||
delete(tags, key)
|
||||
}
|
||||
}
|
||||
|
||||
// Set meta data tags
|
||||
meta := map[string]string{
|
||||
"source": r.name,
|
||||
@@ -478,8 +289,23 @@ func (r *RedfishReceiver) readPowerMetrics(
|
||||
"unit": "watts",
|
||||
}
|
||||
|
||||
// Delete empty meta data tags
|
||||
for key, value := range meta {
|
||||
if value == "" {
|
||||
delete(meta, key)
|
||||
}
|
||||
}
|
||||
|
||||
for name, value := range metrics {
|
||||
r.sendMetric(name, tags, meta, value, timestamp)
|
||||
|
||||
y, err := lp.New(name, tags, meta,
|
||||
map[string]interface{}{
|
||||
"value": value,
|
||||
},
|
||||
timestamp)
|
||||
if err == nil {
|
||||
r.sink <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -488,7 +314,6 @@ func (r *RedfishReceiver) readPowerMetrics(
|
||||
|
||||
// readProcessorMetrics reads processor metrics from a redfish device
|
||||
// See: https://redfish.dmtf.org/schemas/v1/ProcessorMetrics.json
|
||||
// Redfish URI: /redfish/v1/Systems/{ComputerSystemId}/Processors/{ProcessorId}/ProcessorMetrics
|
||||
func (r *RedfishReceiver) readProcessorMetrics(
|
||||
clientConfig *RedfishReceiverClientConfig,
|
||||
processor *redfish.Processor) error {
|
||||
@@ -549,6 +374,13 @@ func (r *RedfishReceiver) readProcessorMetrics(
|
||||
"processor_id": processor.ID,
|
||||
}
|
||||
|
||||
// Delete empty tags
|
||||
for key, value := range tags {
|
||||
if value == "" {
|
||||
delete(tags, key)
|
||||
}
|
||||
}
|
||||
|
||||
// Set meta data tags
|
||||
metaPower := map[string]string{
|
||||
"source": r.name,
|
||||
@@ -561,7 +393,14 @@ func (r *RedfishReceiver) readProcessorMetrics(
|
||||
if !clientConfig.isExcluded[namePower] &&
|
||||
// Some servers return "ConsumedPowerWatt":65535 instead of "ConsumedPowerWatt":null
|
||||
processorMetrics.ConsumedPowerWatt != 65535 {
|
||||
r.sendMetric(namePower, tags, metaPower, processorMetrics.ConsumedPowerWatt, timestamp)
|
||||
y, err := lp.New(namePower, tags, metaPower,
|
||||
map[string]interface{}{
|
||||
"value": processorMetrics.ConsumedPowerWatt,
|
||||
},
|
||||
timestamp)
|
||||
if err == nil {
|
||||
r.sink <- y
|
||||
}
|
||||
}
|
||||
// Set meta data tags
|
||||
metaThermal := map[string]string{
|
||||
@@ -573,7 +412,14 @@ func (r *RedfishReceiver) readProcessorMetrics(
|
||||
nameThermal := "temperature"
|
||||
|
||||
if !clientConfig.isExcluded[nameThermal] {
|
||||
r.sendMetric(nameThermal, tags, metaThermal, processorMetrics.TemperatureCelsius, timestamp)
|
||||
y, err := lp.New(nameThermal, tags, metaThermal,
|
||||
map[string]interface{}{
|
||||
"value": processorMetrics.TemperatureCelsius,
|
||||
},
|
||||
timestamp)
|
||||
if err == nil {
|
||||
r.sink <- y
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -606,7 +452,6 @@ func (r *RedfishReceiver) readMetrics(clientConfig *RedfishReceiverClientConfig)
|
||||
|
||||
// Get all chassis managed by this service
|
||||
isChassisListRequired :=
|
||||
clientConfig.doSensors ||
|
||||
clientConfig.doThermalMetrics ||
|
||||
clientConfig.doPowerMetric
|
||||
var chassisList []*redfish.Chassis
|
||||
@@ -627,16 +472,6 @@ func (r *RedfishReceiver) readMetrics(clientConfig *RedfishReceiverClientConfig)
|
||||
}
|
||||
}
|
||||
|
||||
// Read sensors
|
||||
if clientConfig.doSensors {
|
||||
for _, chassis := range chassisList {
|
||||
err := r.readSensors(clientConfig, chassis)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read thermal metrics
|
||||
if clientConfig.doThermalMetrics {
|
||||
for _, chassis := range chassisList {
|
||||
@@ -801,7 +636,6 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
|
||||
// Globally disable collection of power, processor or thermal metrics
|
||||
DisablePowerMetrics bool `json:"disable_power_metrics"`
|
||||
DisableProcessorMetrics bool `json:"disable_processor_metrics"`
|
||||
DisableSensors bool `json:"disable_sensors"`
|
||||
DisableThermalMetrics bool `json:"disable_thermal_metrics"`
|
||||
|
||||
// Globally excluded metrics
|
||||
@@ -816,7 +650,6 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
|
||||
// Per client disable collection of power,processor or thermal metrics
|
||||
DisablePowerMetrics bool `json:"disable_power_metrics"`
|
||||
DisableProcessorMetrics bool `json:"disable_processor_metrics"`
|
||||
DisableSensors bool `json:"disable_sensors"`
|
||||
DisableThermalMetrics bool `json:"disable_thermal_metrics"`
|
||||
|
||||
// Per client excluded metrics
|
||||
@@ -933,9 +766,6 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
|
||||
doProcessorMetrics :=
|
||||
!(configJSON.DisableProcessorMetrics ||
|
||||
clientConfigJSON.DisableProcessorMetrics)
|
||||
doSensors :=
|
||||
!(configJSON.DisableSensors ||
|
||||
clientConfigJSON.DisableSensors)
|
||||
doThermalMetrics :=
|
||||
!(configJSON.DisableThermalMetrics ||
|
||||
clientConfigJSON.DisableThermalMetrics)
|
||||
@@ -968,10 +798,8 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
|
||||
isExcluded: isExcluded,
|
||||
doPowerMetric: doPowerMetric,
|
||||
doProcessorMetrics: doProcessorMetrics,
|
||||
doSensors: doSensors,
|
||||
doThermalMetrics: doThermalMetrics,
|
||||
skipProcessorMetricsURL: make(map[string]bool),
|
||||
readSensorURLs: map[string][]string{},
|
||||
gofish: gofish.ClientConfig{
|
||||
Username: username,
|
||||
Password: password,
|
||||
|
@@ -17,17 +17,15 @@ The Redfish receiver uses the [Redfish (specification)](https://www.dmtf.org/sta
|
||||
"host_list": "n[1,2-4]"
|
||||
},
|
||||
{
|
||||
"host_list": "n5",
|
||||
"disable_power_metrics": true,
|
||||
"disable_processor_metrics": true,
|
||||
"disable_thermal_metrics": true
|
||||
"host_list": "n5"
|
||||
"disable_power_metrics": true
|
||||
},
|
||||
{
|
||||
"host_list": "n6" ],
|
||||
"username": "<Username 2>",
|
||||
"password": "<Password 2>",
|
||||
"endpoint": "https://%h-BMC",
|
||||
"disable_sensor_metrics": true
|
||||
"disable_thermal_metrics": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -43,18 +41,9 @@ Global settings:
|
||||
|
||||
Global and per redfish device settings (per redfish device settings overwrite the global settings):
|
||||
|
||||
- `disable_power_metrics`:
|
||||
disable collection of power metrics
|
||||
(`/redfish/v1/Chassis/{ChassisId}/Power`)
|
||||
- `disable_processor_metrics`:
|
||||
disable collection of processor metrics
|
||||
(`/redfish/v1/Systems/{ComputerSystemId}/Processors/{ProcessorId}/ProcessorMetrics`)
|
||||
- `disable_sensors`:
|
||||
disable collection of fan, power and thermal sensor metrics
|
||||
(`/redfish/v1/Chassis/{ChassisId}/Sensors/{SensorId}`)
|
||||
- `disable_thermal_metrics`:
|
||||
disable collection of thermal metrics
|
||||
(`/redfish/v1/Chassis/{ChassisId}/Thermal`)
|
||||
- `disable_power_metrics`: disable collection of power metrics
|
||||
- `disable_processor_metrics`: disable collection of processor metrics
|
||||
- `disable_thermal_metrics`: disable collection of thermal metrics
|
||||
- `exclude_metrics`: list of excluded metrics
|
||||
- `username`: User name to authenticate with
|
||||
- `password`: Password to use for authentication
|
||||
|
Reference in New Issue
Block a user