Compare commits

..

2 Commits

Author SHA1 Message Date
Thomas Roehl
813b59b16e Required LIKWID 5.3.0 2024-04-10 20:00:58 +02:00
Thomas Roehl
303fe1d80f Add collector for always running energy measurements with LIKWID 2024-04-10 19:57:08 +02:00
13 changed files with 490 additions and 401 deletions

View File

@@ -45,10 +45,10 @@ jobs:
- name: Install build dependencies
run: |
dnf --assumeyes install \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.21.7-1.module_el8+960+4060efbe.noarch.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.20.6-2.module_el8+658+f14b2092.noarch.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.20.6-1.module_el8+602+8bb8a8d6.x86_64.rpm
- name: RPM build MetricCollector
id: rpmbuild
@@ -115,10 +115,10 @@ jobs:
- name: Install build dependencies
run: |
dnf --assumeyes --disableplugin=subscription-manager install \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.21.7-1.module_el8+960+4060efbe.noarch.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.20.6-2.module_el8+658+f14b2092.noarch.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.20.6-1.module_el8+602+8bb8a8d6.x86_64.rpm
- name: RPM build MetricCollector
id: rpmbuild

View File

@@ -10,6 +10,32 @@ on:
jobs:
#
# Job build-1-20
# Build on latest Ubuntu using golang version 1.20
#
build-1-20:
runs-on: ubuntu-latest
steps:
# See: https://github.com/marketplace/actions/checkout
# Checkout git repository and submodules
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive
# See: https://github.com/marketplace/actions/setup-go-environment
- name: Setup Golang
uses: actions/setup-go@v4
with:
go-version: '1.20'
- name: Build MetricCollector
run: make
- name: Run MetricCollector once
run: ./cc-metric-collector --once --config .github/ci-config.json
#
# Job build-1-21
# Build on latest Ubuntu using golang version 1.21
@@ -36,32 +62,6 @@ jobs:
- name: Run MetricCollector once
run: ./cc-metric-collector --once --config .github/ci-config.json
#
# Job build-1-22
# Build on latest Ubuntu using golang version 1.22
#
build-1-22:
runs-on: ubuntu-latest
steps:
# See: https://github.com/marketplace/actions/checkout
# Checkout git repository and submodules
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive
# See: https://github.com/marketplace/actions/setup-go-environment
- name: Setup Golang
uses: actions/setup-go@v4
with:
go-version: '1.22'
- name: Build MetricCollector
run: make
- name: Run MetricCollector once
run: ./cc-metric-collector --once --config .github/ci-config.json
#
# Build on AlmaLinux 8 using go-toolset
#
@@ -92,10 +92,10 @@ jobs:
- name: Install build dependencies
run: |
dnf --assumeyes install \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.21.7-1.module_el8+960+4060efbe.noarch.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.20.6-2.module_el8+658+f14b2092.noarch.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.20.6-1.module_el8+602+8bb8a8d6.x86_64.rpm
- name: RPM build MetricCollector
id: rpmbuild
@@ -130,10 +130,10 @@ jobs:
- name: Install build dependencies
run: |
dnf --assumeyes --disableplugin=subscription-manager install \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.21.7-1.module_el8+960+4060efbe.noarch.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.21.7-1.module_el8+960+4060efbe.x86_64.rpm
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.20.6-2.module_el8+658+f14b2092.x86_64.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.20.6-2.module_el8+658+f14b2092.noarch.rpm \
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.20.6-1.module_el8+602+8bb8a8d6.x86_64.rpm
- name: RPM build MetricCollector
id: rpmbuild

View File

@@ -1,5 +1,5 @@
# LIKWID version
LIKWID_VERSION := 5.2.2
LIKWID_VERSION := 5.3.0
LIKWID_INSTALLED_FOLDER := $(shell dirname $$(which likwid-topology 2>/dev/null) 2>/dev/null)
LIKWID_FOLDER := $(CURDIR)/likwid

View File

@@ -41,6 +41,7 @@ var AvailableCollectors = map[string]MetricCollector{
"self": new(SelfCollector),
"schedstat": new(SchedstatCollector),
"nfsiostat": new(NfsIOStatCollector),
"likwidenergy": new(LikwidEnergyCollector),
}
// Metric collector manager data structure

View File

@@ -99,7 +99,10 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetri
continue
}
output <- lp.FromInfluxMetric(c)
y := lp.FromInfluxMetric(c)
if err == nil {
output <- y
}
}
}
for _, file := range m.files {
@@ -118,7 +121,10 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetri
if skip {
continue
}
output <- lp.FromInfluxMetric(f)
y := lp.FromInfluxMetric(f)
if err == nil {
output <- y
}
}
}
}

View File

@@ -374,14 +374,6 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
}
defer watcher.Close()
if len(m.config.LockfilePath) > 0 {
if _, err := os.Stat(m.config.LockfilePath); os.IsNotExist(err) {
file, err := os.Create(m.config.LockfilePath)
if err != nil {
cclog.ComponentError(m.name, "Cannot create lockfile", m.config.LockfilePath, ":", err.Error())
return true, err
}
file.Close()
}
info, err := os.Stat(m.config.LockfilePath)
if err != nil {
return true, err
@@ -390,9 +382,9 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
if uid != uint32(os.Getuid()) {
usr, err := user.LookupId(fmt.Sprint(uid))
if err == nil {
return true, fmt.Errorf("access to performance counters locked by %s", usr.Username)
return true, fmt.Errorf("Access to performance counters locked by %s", usr.Username)
} else {
return true, fmt.Errorf("access to performance counters locked by %d", uid)
return true, fmt.Errorf("Access to performance counters locked by %d", uid)
}
}
err = watcher.Add(m.config.LockfilePath)

View File

@@ -267,45 +267,3 @@ IPC PMC0/PMC1 -> {
```
The script `scripts/likwid_perfgroup_to_cc_config.py` might help you.
### Internal structure
This section describes the internal structure of the `likwid` collector.
#### At initialization
After setting the defaults, the configuration is read.
Based on the configuration, the library is searched using `dlopen` to see whether it makes sense to proceed.
Next, the user-given metrics are tested to ensure they can be evaluated. For this, it creates a list of all user-given events/counters with the value `1.0` which is provided to the metric evaluator. The same is done for the global metrics by using the metric names with value `1.0`. If the evaluator does not fail, the metric can be evaluated and the collector initialization can proceed.
A separate thread is started to do the measurement. This is not done using a common goroutine but a real application thread with full control. This is required because LIKWID's access system tracks the processes of the using application and the PID should not change between measurements because that would require teardown and reopening of the access system.
With the separate thread, the access system is initialized by setting the user-given access mode and adding all hardware threads.
LIKWID measures per hardware thread in general but only some HW threads read the counters available only e.g. per CPU socket (often memory traffic). For this, the collector gets the system topology through LIKWID and creates different mappings like 'hwthread to list offset' and others. With this, the hardware threads responsible for a topological entity can be determined because those read the counters of the per CPU socket units. These mappings are later used in the measurement phase.
In the end, we read the base CPU frequency of the system. It may be used in the metric evaluation.
#### Measurements
The reading of events is done by the separate application thread.
It traverses over all configured event sets, creates valid LIKWID eventstrings out of them and pass them to take a measurement. This could be done only once but when the LIKWID lock changes, LIKWID has to be completely reopened to provide access again. With this reopening, the already added event sets are gone.
LIKWID has it's own locking mechanism using a lock file. But not the content of the file is of interest but the owner. In order to track changes of the file, a `fsnotify` watcher is installed on the file. If the file does not exist, it is created and consequently is owned by the same user as `cc-metric-collector`. The LikwidCollector has to watch the file on it's own because LIKWID does not provide proper error handling for this.
Each call to the LIKWID library for loading the event set, setting up the counting facilities as well as starting and stopping of the counters is wrapped into lockfile checks to ensure no state change happens. If the file owner changed, the LikwidCollector cannot access the counters anymore, so no further operation can be done and measurment stops.
Although start/stop would be sufficient, the LikwidCollector performs start, read, wait, read, `getLastResult`, stop. Reason might be "historic" but is not 100% clear anymore. The author failed to document ;)
#### Metric evaluation
After each meaurement, the metrics of the event set are directly evaluated. It updates the counter->result mapping with the new measurements, calls the evaluator and generates the `CCMetric` with the user-given settings if it should be published. Each metric name to result calculation is stored for the global metric evaluation, which is done as a final step.
#### Shutdown
Since each measurment involves a complete initialize to finalize cycle of the LIKWID library, only the topology module needs to be closed.
Moreover, the separate application thread is stopped.

View File

@@ -0,0 +1,281 @@
package collectors
/*
#cgo CFLAGS: -I./likwid
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#include <stdlib.h>
#include <likwid.h>
*/
import "C"
import (
"encoding/json"
"fmt"
"os"
"strings"
"time"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
"github.com/NVIDIA/go-nvml/pkg/dl"
)
const (
LIKWIDENERGY_LIB_NAME = "liblikwid.so"
LIKWIDENERGY_LIB_DL_FLAGS = dl.RTLD_LAZY | dl.RTLD_GLOBAL
LIKWIDENERGY_DEF_ACCESSMODE = "direct"
LIKWIDENERGY_DEF_LOCKFILE = "/var/run/likwid.lock"
)
// These are the fields we read from the JSON configuration
type LikwidEnergyCollectorConfig struct {
AccessMode string `json:"access_mode,omitempty"`
DaemonPath string `json:"accessdaemon_path,omitempty"`
LibraryPath string `json:"liblikwid_path,omitempty"`
LockfilePath string `json:"lockfile_path,omitempty"`
SendDiff bool `json:"send_difference,omitempty"`
SendAbs bool `json:"send_absolute,omitempty"`
}
type LikwidEnergyDomainEntry struct {
readcpu int
value uint32
total uint64
tags map[string]string
}
type LikwidEnergyDomain struct {
values map[int]LikwidEnergyDomainEntry
granularity string
metricname string
domaintype int
energyUnit float64
}
// This contains all variables we need during execution and the variables
// defined by metricCollector (name, init, ...)
type LikwidEnergyCollector struct {
metricCollector
config LikwidEnergyCollectorConfig // the configuration structure
meta map[string]string // default meta information
tags map[string]string // default tags
domains map[int]LikwidEnergyDomain
}
// Init initializes the sample collector
// Called once by the collector manager
// All tags, meta data tags and metrics that do not change over the runtime should be set here
func (m *LikwidEnergyCollector) Init(config json.RawMessage) error {
var err error = nil
// Always set the name early in Init() to use it in cclog.Component* functions
m.name = "LikwidEnergyCollector"
// This is for later use, also call it early
m.setup()
// Tell whether the collector should be run in parallel with others (reading files, ...)
// or it should be run serially, mostly for collectors actually doing measurements
// because they should not measure the execution of the other collectors
m.parallel = true
// Define meta information sent with each metric
// (Can also be dynamic or this is the basic set with extension through AddMeta())
m.meta = map[string]string{"source": m.name, "group": "LIKWID", "unit": "Joules"}
// Define tags sent with each metric
// The 'type' tag is always needed, it defines the granularity of the metric
// node -> whole system
// socket -> CPU socket (requires socket ID as 'type-id' tag)
// die -> CPU die (requires CPU die ID as 'type-id' tag)
// memoryDomain -> NUMA domain (requires NUMA domain ID as 'type-id' tag)
// llc -> Last level cache (requires last level cache ID as 'type-id' tag)
// core -> single CPU core that may consist of multiple hardware threads (SMT) (requires core ID as 'type-id' tag)
// hwthtread -> single CPU hardware thread (requires hardware thread ID as 'type-id' tag)
// accelerator -> A accelerator device like GPU or FPGA (requires an accelerator ID as 'type-id' tag)
m.tags = map[string]string{}
// Read in the JSON configuration
m.config.AccessMode = LIKWID_DEF_ACCESSMODE
m.config.LibraryPath = LIKWID_LIB_NAME
m.config.LockfilePath = LIKWID_DEF_LOCKFILE
m.config.SendAbs = true
m.config.SendDiff = true
if len(config) > 0 {
err = json.Unmarshal(config, &m.config)
if err != nil {
cclog.ComponentError(m.name, "Error reading config:", err.Error())
return err
}
}
cclog.ComponentDebug(m.name, "Opening ", m.config.LibraryPath)
lib := dl.New(m.config.LibraryPath, LIKWID_LIB_DL_FLAGS)
if lib == nil {
return fmt.Errorf("error instantiating DynamicLibrary for %s", m.config.LibraryPath)
}
err = lib.Open()
if err != nil {
return fmt.Errorf("error opening %s: %v", m.config.LibraryPath, err)
}
cclog.ComponentDebug(m.name, "Init topology ", m.config.AccessMode)
ret := C.topology_init()
if ret != 0 {
return fmt.Errorf("error initializing topology: %d", ret)
}
cclog.ComponentDebug(m.name, "Setting accessmode ", m.config.AccessMode)
switch m.config.AccessMode {
case "direct":
C.HPMmode(0)
case "accessdaemon":
if len(m.config.DaemonPath) > 0 {
p := os.Getenv("PATH")
os.Setenv("PATH", m.config.DaemonPath+":"+p)
}
C.HPMmode(1)
retCode := C.HPMinit()
if retCode != 0 {
err := fmt.Errorf("C.HPMinit() failed with return code %v", retCode)
cclog.ComponentError(m.name, err.Error())
}
}
initCpus := make([]int, 0)
ret = C.HPMaddThread(0)
if ret != 0 {
return fmt.Errorf("error initializing access: %d", ret)
}
initCpus = append(initCpus, 0)
cinfo := C.get_cpuInfo()
domainnames := make(map[int]string)
if cinfo.isIntel == C.int(1) {
domainnames[0] = "pkg"
domainnames[1] = "pp0"
domainnames[2] = "pp1"
domainnames[3] = "dram"
domainnames[4] = "platform"
} else {
switch cinfo.family {
case 0x17:
domainnames[0] = "core"
domainnames[1] = "pkg"
case 0x19:
switch cinfo.model {
case 0x01, 0x21, 0x50:
domainnames[0] = "core"
domainnames[1] = "pkg"
case 0x61, 0x11:
domainnames[0] = "core"
domainnames[1] = "l3"
}
}
}
// Set up everything that the collector requires during the Read() execution
// Check files required, test execution of some commands, create data structure
// for all topological entities (sockets, NUMA domains, ...)
// Return some useful error message in case of any failures
cclog.ComponentDebug(m.name, "Initializing Power module")
ret = C.power_init(0)
if ret == C.int(0) {
cclog.ComponentPrint(m.name, "No RAPL support")
}
m.domains = make(map[int]LikwidEnergyDomain)
Pinfo := C.get_powerInfo()
for i := 0; i < int(Pinfo.numDomains); i++ {
d := Pinfo.domains[C.int(i)]
name := domainnames[int(d._type)]
domain := LikwidEnergyDomain{
values: make(map[int]LikwidEnergyDomainEntry),
metricname: fmt.Sprintf("likwidenergy_%s", strings.ToLower(name)),
granularity: "socket",
domaintype: int(d._type),
energyUnit: float64(C.power_getEnergyUnit(C.int(d._type))),
}
if name == "core" {
domain.granularity = "core"
}
for _, c := range topo.GetTypeList(domain.granularity) {
clist := topo.GetSocketHwthreads(c)
if len(clist) > 0 {
var cur C.PowerData
if _, ok := intArrayContains(initCpus, clist[0]); !ok {
initCpus = append(initCpus, clist[0])
C.HPMaddThread(C.int(clist[0]))
}
cclog.ComponentDebug(m.name, "Reading current value on CPU ", clist[0], " for ", domain.metricname, "on", domain.granularity, c)
ret = C.power_start(&cur, C.int(clist[0]), C.PowerType(domain.domaintype))
cclog.ComponentDebug(m.name, "Reading ", uint64(cur.before))
if ret == 0 {
domain.values[c] = LikwidEnergyDomainEntry{
readcpu: clist[0],
value: uint32(cur.before),
total: uint64(cur.before),
tags: map[string]string{
"type": domain.granularity,
"type-id": fmt.Sprintf("%d", c),
},
}
}
}
}
cclog.ComponentDebug(m.name, "Adding domain ", domain.metricname, " with granularity ", domain.granularity)
m.domains[domain.domaintype] = domain
}
// Set this flag only if everything is initialized properly, all required files exist, ...
m.init = true
return err
}
// Read collects all metrics belonging to the sample collector
// and sends them through the output channel to the collector manager
func (m *LikwidEnergyCollector) Read(interval time.Duration, output chan lp.CCMetric) {
// Create a sample metric
timestamp := time.Now()
for dt, domain := range m.domains {
for i, entry := range domain.values {
var cur C.PowerData
ret := C.power_start(&cur, C.int(entry.readcpu), C.PowerType(dt))
if ret == 0 {
now := uint32(cur.before)
diff := now - entry.value
if now < entry.value {
diff = (^uint32(0)) - entry.value
diff += now
}
if m.config.SendDiff {
y, err := lp.New(domain.metricname, entry.tags, m.meta, map[string]interface{}{"value": float64(diff) * domain.energyUnit}, timestamp)
if err == nil {
for k, v := range m.tags {
y.AddTag(k, v)
}
// Send it to output channel
output <- y
}
}
if m.config.SendAbs {
total := float64(entry.total + uint64(diff))
y, err := lp.New(fmt.Sprintf("%s_abs", domain.metricname), entry.tags, m.meta, map[string]interface{}{"value": total * domain.energyUnit}, timestamp)
if err == nil {
for k, v := range m.tags {
y.AddTag(k, v)
}
// Send it to output channel
output <- y
}
}
entry.value = uint32(cur.before)
entry.total += uint64(diff)
domain.values[i] = entry
}
}
}
}
// Close metric collector: close network connection, close files, close libraries, ...
// Called once by the collector manager
func (m *LikwidEnergyCollector) Close() {
C.power_finalize()
C.topology_finalize()
// Unset flag
m.init = false
}

View File

@@ -0,0 +1,20 @@
## `likwidenergy` collector
In contrast to the more general [`likwid` collector](./likwidMetric.md), this collector just reads the RAPL counters to provide energy metrics. In contrast to the `likwid` collector, this collector keeps the energy counters running the whole time and not just of a measurement interval. It covers all RAPL domains (`PKG`, `DRAM`, `PP0`, `PP1`, ...). Depending whether the domain is per socket, per L3 segment or per core, metrics are read and send.
```json
{
"likwidenergy" : {
"liblikwid_path" : "/path/to/liblikwid.so",
"accessdaemon_path" : "/folder/that/contains/likwid-accessD",
"access_mode" : "direct or accessdaemon",
"send_difference": true,
"send_absolute": true
}
}
```
The first three entries (`liblikwid_path`, `accessdaemon_path` and `access_mode`) are required to set up the access to the RAPL counters. The `access_mode` = `perf_event` is not supported at the moment.
With `send_differences` the difference to the last measurement is provided to the system. With `send_absolute`, the absolute value since start of the system is submitted as metric. It reads the counter at initialization and then updates the value after each measurement.

24
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/ClusterCockpit/cc-metric-collector
go 1.21
go 1.20
require (
github.com/ClusterCockpit/cc-units v0.4.0
@@ -12,30 +12,30 @@ require (
github.com/influxdata/influxdb-client-go/v2 v2.13.0
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
github.com/influxdata/line-protocol/v2 v2.2.1
github.com/nats-io/nats.go v1.33.1
github.com/prometheus/client_golang v1.19.0
github.com/nats-io/nats.go v1.32.0
github.com/prometheus/client_golang v1.18.0
github.com/stmcginnis/gofish v0.15.0
github.com/tklauser/go-sysconf v0.3.13
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225
golang.org/x/sys v0.18.0
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a
golang.org/x/sys v0.16.0
)
require (
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/klauspost/compress v1.17.7 // indirect
github.com/google/uuid v1.5.0 // indirect
github.com/klauspost/compress v1.17.4 // indirect
github.com/nats-io/nkeys v0.4.7 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/oapi-codegen/runtime v1.1.1 // indirect
github.com/prometheus/client_model v0.6.0 // indirect
github.com/prometheus/common v0.49.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.46.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
github.com/tklauser/numcpus v0.7.0 // indirect
golang.org/x/crypto v0.21.0 // indirect
golang.org/x/net v0.22.0 // indirect
google.golang.org/protobuf v1.33.0 // indirect
golang.org/x/crypto v0.18.0 // indirect
golang.org/x/net v0.20.0 // indirect
google.golang.org/protobuf v1.32.0 // indirect
)

58
go.sum
View File

@@ -7,6 +7,7 @@ github.com/NVIDIA/go-nvml v0.12.0-2 h1:Sg239yy7jmopu/cuvYauoMj9fOpcGMngxVxxS1EBX
github.com/NVIDIA/go-nvml v0.12.0-2/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0=
github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E=
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
@@ -18,16 +19,20 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/influxdata/influxdb-client-go/v2 v2.13.0 h1:ioBbLmR5NMbAjP4UVA5r9b5xGjpABD7j65pI8kFphDM=
@@ -35,20 +40,24 @@ github.com/influxdata/influxdb-client-go/v2 v2.13.0/go.mod h1:k+spCbt9hcvqvUiz0s
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98=
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig=
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo=
github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY=
github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY=
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg=
github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/nats-io/nats.go v1.33.1 h1:8TxLZZ/seeEfR97qV0/Bl939tpDnt2Z2fK3HkPypj70=
github.com/nats-io/nats.go v1.33.1/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8=
github.com/nats-io/nats.go v1.32.0 h1:Bx9BZS+aXYlxW08k8Gd3yR2s73pV5XSoAQUyp1Kwvp0=
github.com/nats-io/nats.go v1.32.0/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8=
github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI=
github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
@@ -56,15 +65,18 @@ github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OS
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU=
github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k=
github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos=
github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8=
github.com/prometheus/common v0.49.0 h1:ToNTdK4zSnPVJmh698mGFkDor9wBI/iGaJy5dbH1EgI=
github.com/prometheus/common v0.49.0/go.mod h1:Kxm+EULxRbUkjGU6WFsQqo3ORzB4tyKvlWFOE9mB2sE=
github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk=
github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA=
github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw=
github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI=
github.com/prometheus/common v0.46.0 h1:doXzt5ybi1HBKpsZOL0sSkaNHJJqkyfEWZGGqqScV0Y=
github.com/prometheus/common v0.46.0/go.mod h1:Tp0qkxpb9Jsg54QMe+EAmqXkSV7Evdy1BTn+g2pa/hQ=
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
@@ -76,6 +88,7 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tklauser/go-sysconf v0.3.13 h1:GBUpcahXSpR2xN01jhkNAbTLRk2Yzgggk8IM08lq3r4=
github.com/tklauser/go-sysconf v0.3.13/go.mod h1:zwleP4Q4OehZHGn4CYZDipCgg9usW5IJePewFCGVEa0=
@@ -83,20 +96,21 @@ github.com/tklauser/numcpus v0.7.0 h1:yjuerZP127QG9m5Zh/mSO4wqurYil27tHrqwRoRjpr
github.com/tklauser/numcpus v0.7.0/go.mod h1:bb6dMVcj8A42tSE7i32fsIUCbQNllK5iDguyOZRUzAY=
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 h1:P7S/GeHBAFEZIYp0ePPs2kHXoazz8q2KsyxHyQVGCJg=
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1/go.mod h1:9CWpnTUmlQkfdpdutA1nNf4iE5lAVt3QZOu0Z6hahBE=
golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ=
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc=
golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc=
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA=
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08=
golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo=
golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
golang.org/x/sys v0.0.0-20210122093101-04d7465088b8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I=
google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -6,7 +6,6 @@ import (
"encoding/json"
"fmt"
"io"
"maps"
"net/http"
"strconv"
"strings"
@@ -33,14 +32,10 @@ type RedfishReceiverClientConfig struct {
doPowerMetric bool
doProcessorMetrics bool
doSensors bool
doThermalMetrics bool
skipProcessorMetricsURL map[string]bool
// readSensorURLs stores for each chassis ID a list of sensor URLs to read
readSensorURLs map[string][]string
gofish gofish.ClientConfig
}
@@ -61,226 +56,7 @@ type RedfishReceiver struct {
wg sync.WaitGroup // wait group for redfish receiver
}
// deleteEmptyTags removes tags or meta data tags with empty value
func deleteEmptyTags(tags map[string]string) {
maps.DeleteFunc(
tags,
func(key string, value string) bool {
return value == ""
},
)
}
// setMetricValue sets the value entry in the fields map
func setMetricValue(value any) map[string]interface{} {
return map[string]interface{}{
"value": value,
}
}
// sendMetric sends the metric through the sink channel
func (r *RedfishReceiver) sendMetric(name string, tags map[string]string, meta map[string]string, value any, timestamp time.Time) {
deleteEmptyTags(tags)
deleteEmptyTags(meta)
y, err := lp.New(name, tags, meta, setMetricValue(value), timestamp)
if err == nil {
r.sink <- y
}
}
// readSensors reads sensors from a redfish device
// See: https://redfish.dmtf.org/schemas/v1/Sensor.json
// Redfish URI: /redfish/v1/Chassis/{ChassisId}/Sensors/{SensorId}
func (r *RedfishReceiver) readSensors(
clientConfig *RedfishReceiverClientConfig,
chassis *redfish.Chassis) error {
writeTemperatureSensor := func(sensor *redfish.Sensor) {
tags := map[string]string{
"hostname": clientConfig.Hostname,
"type": "node",
// ChassisType shall indicate the physical form factor for the type of chassis
"chassis_typ": string(chassis.ChassisType),
// Chassis name
"chassis_name": chassis.Name,
// ID uniquely identifies the resource
"sensor_id": sensor.ID,
// The area or device to which this sensor measurement applies
"temperature_physical_context": string(sensor.PhysicalContext),
// Name
"temperature_name": sensor.Name,
}
// Set meta data tags
meta := map[string]string{
"source": r.name,
"group": "Temperature",
"unit": "degC",
}
r.sendMetric("temperature", tags, meta, sensor.Reading, time.Now())
}
writeFanSpeedSensor := func(sensor *redfish.Sensor) {
tags := map[string]string{
"hostname": clientConfig.Hostname,
"type": "node",
// ChassisType shall indicate the physical form factor for the type of chassis
"chassis_typ": string(chassis.ChassisType),
// Chassis name
"chassis_name": chassis.Name,
// ID uniquely identifies the resource
"sensor_id": sensor.ID,
// The area or device to which this sensor measurement applies
"fan_physical_context": string(sensor.PhysicalContext),
// Name
"fan_name": sensor.Name,
}
// Set meta data tags
meta := map[string]string{
"source": r.name,
"group": "FanSpeed",
"unit": string(sensor.ReadingUnits),
}
r.sendMetric("fan_speed", tags, meta, sensor.Reading, time.Now())
}
writePowerSensor := func(sensor *redfish.Sensor) {
// Set tags
tags := map[string]string{
"hostname": clientConfig.Hostname,
"type": "node",
// ChassisType shall indicate the physical form factor for the type of chassis
"chassis_typ": string(chassis.ChassisType),
// Chassis name
"chassis_name": chassis.Name,
// ID uniquely identifies the resource
"sensor_id": sensor.ID,
// The area or device to which this sensor measurement applies
"power_physical_context": string(sensor.PhysicalContext),
// Name
"power_name": sensor.Name,
}
// Set meta data tags
meta := map[string]string{
"source": r.name,
"group": "Energy",
"unit": "watts",
}
r.sendMetric("power", tags, meta, sensor.Reading, time.Now())
}
if _, ok := clientConfig.readSensorURLs[chassis.ID]; !ok {
// First time run of read sensors for this chassis
clientConfig.readSensorURLs[chassis.ID] = make([]string, 0)
// Get sensor information for this chassis
sensors, err := chassis.Sensors()
if err != nil {
return fmt.Errorf("readSensors: chassis.Sensors() failed: %v", err)
}
// Skip empty sensors information
if sensors == nil {
return nil
}
for _, sensor := range sensors {
// Skip all sensors which are not in enabled state or which are unhealthy
if sensor.Status.State != common.EnabledState || sensor.Status.Health != common.OKHealth {
continue
}
// Skip sensors with missing readings units or type
if sensor.ReadingUnits == "" || sensor.ReadingType == "" {
continue
}
// Power readings
if (sensor.ReadingType == redfish.PowerReadingType && sensor.ReadingUnits == "Watts") ||
(sensor.ReadingType == redfish.CurrentReadingType && sensor.ReadingUnits == "Watts") {
if clientConfig.isExcluded["power"] {
continue
}
clientConfig.readSensorURLs[chassis.ID] = append(clientConfig.readSensorURLs[chassis.ID], sensor.ODataID)
writePowerSensor(sensor)
continue
}
// Fan speed readings
if (sensor.ReadingType == redfish.AirFlowReadingType && sensor.ReadingUnits == "RPM") ||
(sensor.ReadingType == redfish.AirFlowReadingType && sensor.ReadingUnits == "Percent") {
// Skip, when fan_speed metric is excluded
if clientConfig.isExcluded["fan_speed"] {
continue
}
clientConfig.readSensorURLs[chassis.ID] = append(clientConfig.readSensorURLs[chassis.ID], sensor.ODataID)
writeFanSpeedSensor(sensor)
}
// Temperature readings
if sensor.ReadingType == redfish.TemperatureReadingType && sensor.ReadingUnits == "C" {
if clientConfig.isExcluded["temperature"] {
continue
}
clientConfig.readSensorURLs[chassis.ID] = append(clientConfig.readSensorURLs[chassis.ID], sensor.ODataID)
writeTemperatureSensor(sensor)
continue
}
}
} else {
common.CollectCollection(
func(uri string) {
sensor, err := redfish.GetSensor(chassis.GetClient(), uri)
if err != nil {
cclog.ComponentError(r.name, "redfish.GetSensor() for uri '", uri, "' failed")
}
// Power readings
if (sensor.ReadingType == redfish.PowerReadingType && sensor.ReadingUnits == "Watts") ||
(sensor.ReadingType == redfish.CurrentReadingType && sensor.ReadingUnits == "Watts") {
writePowerSensor(sensor)
return
}
// Fan speed readings
if (sensor.ReadingType == redfish.AirFlowReadingType && sensor.ReadingUnits == "RPM") ||
(sensor.ReadingType == redfish.AirFlowReadingType && sensor.ReadingUnits == "Percent") {
writeFanSpeedSensor(sensor)
return
}
// Temperature readings
if sensor.ReadingType == redfish.TemperatureReadingType && sensor.ReadingUnits == "C" {
writeTemperatureSensor(sensor)
return
}
},
clientConfig.readSensorURLs[chassis.ID])
}
return nil
}
// readThermalMetrics reads thermal metrics from a redfish device
// See: https://redfish.dmtf.org/schemas/v1/Thermal.json
// Redfish URI: /redfish/v1/Chassis/{ChassisId}/Thermal
// -> deprecated in favor of the ThermalSubsystem schema
// -> on Lenovo servers /redfish/v1/Chassis/{ChassisId}/ThermalSubsystem/ThermalMetrics links to /redfish/v1/Chassis/{ChassisId}/Sensors/{SensorId}
func (r *RedfishReceiver) readThermalMetrics(
clientConfig *RedfishReceiverClientConfig,
chassis *redfish.Chassis) error {
@@ -330,6 +106,13 @@ func (r *RedfishReceiver) readThermalMetrics(
"temperature_name": temperature.Name,
}
// Delete empty tags
for key, value := range tags {
if value == "" {
delete(tags, key)
}
}
// Set meta data tags
meta := map[string]string{
"source": r.name,
@@ -340,7 +123,14 @@ func (r *RedfishReceiver) readThermalMetrics(
// ReadingCelsius shall be the current value of the temperature sensor's reading.
value := temperature.ReadingCelsius
r.sendMetric("temperature", tags, meta, value, timestamp)
y, err := lp.New("temperature", tags, meta,
map[string]interface{}{
"value": value,
},
timestamp)
if err == nil {
r.sink <- y
}
}
for _, fan := range thermal.Fans {
@@ -374,6 +164,13 @@ func (r *RedfishReceiver) readThermalMetrics(
"fan_name": fan.Name,
}
// Delete empty tags
for key, value := range tags {
if value == "" {
delete(tags, key)
}
}
// Set meta data tags
meta := map[string]string{
"source": r.name,
@@ -381,16 +178,23 @@ func (r *RedfishReceiver) readThermalMetrics(
"unit": string(fan.ReadingUnits),
}
r.sendMetric("fan_speed", tags, meta, fan.Reading, timestamp)
// Reading shall be the current value of the fan sensor's reading
value := fan.Reading
y, err := lp.New("fan_speed", tags, meta,
map[string]interface{}{
"value": value,
},
timestamp)
if err == nil {
r.sink <- y
}
}
return nil
}
// readPowerMetrics reads power metrics from a redfish device
// See: https://redfish.dmtf.org/schemas/v1/Power.json
// Redfish URI: /redfish/v1/Chassis/{ChassisId}/Power
// -> deprecated in favor of the PowerSubsystem schema
func (r *RedfishReceiver) readPowerMetrics(
clientConfig *RedfishReceiverClientConfig,
chassis *redfish.Chassis) error {
@@ -470,6 +274,13 @@ func (r *RedfishReceiver) readPowerMetrics(
"power_control_name": pc.Name,
}
// Delete empty tags
for key, value := range tags {
if value == "" {
delete(tags, key)
}
}
// Set meta data tags
meta := map[string]string{
"source": r.name,
@@ -478,8 +289,23 @@ func (r *RedfishReceiver) readPowerMetrics(
"unit": "watts",
}
// Delete empty meta data tags
for key, value := range meta {
if value == "" {
delete(meta, key)
}
}
for name, value := range metrics {
r.sendMetric(name, tags, meta, value, timestamp)
y, err := lp.New(name, tags, meta,
map[string]interface{}{
"value": value,
},
timestamp)
if err == nil {
r.sink <- y
}
}
}
@@ -488,7 +314,6 @@ func (r *RedfishReceiver) readPowerMetrics(
// readProcessorMetrics reads processor metrics from a redfish device
// See: https://redfish.dmtf.org/schemas/v1/ProcessorMetrics.json
// Redfish URI: /redfish/v1/Systems/{ComputerSystemId}/Processors/{ProcessorId}/ProcessorMetrics
func (r *RedfishReceiver) readProcessorMetrics(
clientConfig *RedfishReceiverClientConfig,
processor *redfish.Processor) error {
@@ -549,6 +374,13 @@ func (r *RedfishReceiver) readProcessorMetrics(
"processor_id": processor.ID,
}
// Delete empty tags
for key, value := range tags {
if value == "" {
delete(tags, key)
}
}
// Set meta data tags
metaPower := map[string]string{
"source": r.name,
@@ -561,7 +393,14 @@ func (r *RedfishReceiver) readProcessorMetrics(
if !clientConfig.isExcluded[namePower] &&
// Some servers return "ConsumedPowerWatt":65535 instead of "ConsumedPowerWatt":null
processorMetrics.ConsumedPowerWatt != 65535 {
r.sendMetric(namePower, tags, metaPower, processorMetrics.ConsumedPowerWatt, timestamp)
y, err := lp.New(namePower, tags, metaPower,
map[string]interface{}{
"value": processorMetrics.ConsumedPowerWatt,
},
timestamp)
if err == nil {
r.sink <- y
}
}
// Set meta data tags
metaThermal := map[string]string{
@@ -573,7 +412,14 @@ func (r *RedfishReceiver) readProcessorMetrics(
nameThermal := "temperature"
if !clientConfig.isExcluded[nameThermal] {
r.sendMetric(nameThermal, tags, metaThermal, processorMetrics.TemperatureCelsius, timestamp)
y, err := lp.New(nameThermal, tags, metaThermal,
map[string]interface{}{
"value": processorMetrics.TemperatureCelsius,
},
timestamp)
if err == nil {
r.sink <- y
}
}
return nil
}
@@ -606,7 +452,6 @@ func (r *RedfishReceiver) readMetrics(clientConfig *RedfishReceiverClientConfig)
// Get all chassis managed by this service
isChassisListRequired :=
clientConfig.doSensors ||
clientConfig.doThermalMetrics ||
clientConfig.doPowerMetric
var chassisList []*redfish.Chassis
@@ -627,16 +472,6 @@ func (r *RedfishReceiver) readMetrics(clientConfig *RedfishReceiverClientConfig)
}
}
// Read sensors
if clientConfig.doSensors {
for _, chassis := range chassisList {
err := r.readSensors(clientConfig, chassis)
if err != nil {
return err
}
}
}
// read thermal metrics
if clientConfig.doThermalMetrics {
for _, chassis := range chassisList {
@@ -801,7 +636,6 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
// Globally disable collection of power, processor or thermal metrics
DisablePowerMetrics bool `json:"disable_power_metrics"`
DisableProcessorMetrics bool `json:"disable_processor_metrics"`
DisableSensors bool `json:"disable_sensors"`
DisableThermalMetrics bool `json:"disable_thermal_metrics"`
// Globally excluded metrics
@@ -816,7 +650,6 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
// Per client disable collection of power,processor or thermal metrics
DisablePowerMetrics bool `json:"disable_power_metrics"`
DisableProcessorMetrics bool `json:"disable_processor_metrics"`
DisableSensors bool `json:"disable_sensors"`
DisableThermalMetrics bool `json:"disable_thermal_metrics"`
// Per client excluded metrics
@@ -933,9 +766,6 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
doProcessorMetrics :=
!(configJSON.DisableProcessorMetrics ||
clientConfigJSON.DisableProcessorMetrics)
doSensors :=
!(configJSON.DisableSensors ||
clientConfigJSON.DisableSensors)
doThermalMetrics :=
!(configJSON.DisableThermalMetrics ||
clientConfigJSON.DisableThermalMetrics)
@@ -968,10 +798,8 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
isExcluded: isExcluded,
doPowerMetric: doPowerMetric,
doProcessorMetrics: doProcessorMetrics,
doSensors: doSensors,
doThermalMetrics: doThermalMetrics,
skipProcessorMetricsURL: make(map[string]bool),
readSensorURLs: map[string][]string{},
gofish: gofish.ClientConfig{
Username: username,
Password: password,

View File

@@ -17,17 +17,15 @@ The Redfish receiver uses the [Redfish (specification)](https://www.dmtf.org/sta
"host_list": "n[1,2-4]"
},
{
"host_list": "n5",
"disable_power_metrics": true,
"disable_processor_metrics": true,
"disable_thermal_metrics": true
"host_list": "n5"
"disable_power_metrics": true
},
{
"host_list": "n6" ],
"username": "<Username 2>",
"password": "<Password 2>",
"endpoint": "https://%h-BMC",
"disable_sensor_metrics": true
"disable_thermal_metrics": true
}
]
}
@@ -43,18 +41,9 @@ Global settings:
Global and per redfish device settings (per redfish device settings overwrite the global settings):
- `disable_power_metrics`:
disable collection of power metrics
(`/redfish/v1/Chassis/{ChassisId}/Power`)
- `disable_processor_metrics`:
disable collection of processor metrics
(`/redfish/v1/Systems/{ComputerSystemId}/Processors/{ProcessorId}/ProcessorMetrics`)
- `disable_sensors`:
disable collection of fan, power and thermal sensor metrics
(`/redfish/v1/Chassis/{ChassisId}/Sensors/{SensorId}`)
- `disable_thermal_metrics`:
disable collection of thermal metrics
(`/redfish/v1/Chassis/{ChassisId}/Thermal`)
- `disable_power_metrics`: disable collection of power metrics
- `disable_processor_metrics`: disable collection of processor metrics
- `disable_thermal_metrics`: disable collection of thermal metrics
- `exclude_metrics`: list of excluded metrics
- `username`: User name to authenticate with
- `password`: Password to use for authentication