Compare commits

..

3 Commits

Author SHA1 Message Date
dependabot[bot]
2787baa3e5 Bump golang.org/x/sys from 0.43.0 to 0.44.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.43.0 to 0.44.0.
- [Commits](https://github.com/golang/sys/compare/v0.43.0...v0.44.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-version: 0.44.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-05-11 03:27:47 +00:00
Holger Obermaier
656ea73d12 Fix: num_cpus could not be excluded 2026-05-07 14:47:23 +02:00
Holger Obermaier
330f923596 Fixed exclude_metrics and check for used metrics 2026-05-07 12:25:07 +02:00
6 changed files with 20 additions and 42 deletions

View File

@@ -27,6 +27,7 @@ const CPUSTATFILE = `/proc/stat`
type CpustatCollectorConfig struct {
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
excludeNumCPUs bool
}
type CpustatCollector struct {
@@ -79,6 +80,7 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
m.matches[match] = index
}
}
m.config.excludeNumCPUs = slices.Contains(m.config.ExcludeMetrics, "num_cpus")
// Check input file
file, err := os.Open(CPUSTATFILE)
@@ -95,11 +97,13 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
line := scanner.Text()
linefields := strings.Fields(line)
if strings.Compare(linefields[0], "cpu") == 0 {
// Kernel system statistics for all CPUs
m.olddata["cpu"] = make(map[string]int64)
for k, v := range m.matches {
m.olddata["cpu"][k], _ = strconv.ParseInt(linefields[v], 0, 64)
}
} else if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
// Kernel system statistics per CPU
cpustr := strings.TrimLeft(linefields[0], "cpu")
cpu, _ := strconv.Atoi(cpustr)
m.cputags[linefields[0]] = map[string]string{
@@ -191,9 +195,10 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMessage
}
}
num_cpus_metric, err := lp.NewMetric("num_cpus", m.nodetags, m.meta, num_cpus, now)
if err == nil {
output <- num_cpus_metric
if !m.config.excludeNumCPUs {
if num_cpus_metric, err := lp.NewMetric("num_cpus", m.nodetags, m.meta, num_cpus, now); err == nil {
output <- num_cpus_metric
}
}
m.lastTimestamp = now

View File

@@ -72,7 +72,8 @@ func getStats(filename string) map[string]MemstatStats {
for scanner.Scan() {
line := scanner.Text()
linefields := strings.Fields(line)
if len(linefields) == 3 {
switch len(linefields) {
case 3:
v, err := strconv.ParseFloat(linefields[1], 64)
if err == nil {
stats[strings.Trim(linefields[0], ":")] = MemstatStats{
@@ -80,7 +81,7 @@ func getStats(filename string) map[string]MemstatStats {
unit: linefields[2],
}
}
} else if len(linefields) == 5 {
case 5:
v, err := strconv.ParseFloat(linefields[3], 64)
if err == nil {
cclog.ComponentDebug("getStats", strings.Trim(linefields[2], ":"), v, linefields[4])
@@ -106,7 +107,10 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
return fmt.Errorf("%s Init(): Error decoding JSON config: %w", m.name, err)
}
}
m.meta = map[string]string{"source": m.name, "group": "Memory"}
m.meta = map[string]string{
"source": m.name,
"group": "Memory",
}
m.stats = make(map[string]int64)
m.matches = make(map[string]string)
m.tags = map[string]string{"type": "node"}
@@ -145,7 +149,7 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
"KernelStack": "mem_kernelstack",
}
for k, v := range matches {
if !slices.Contains(m.config.ExcludeMetrics, k) {
if !slices.Contains(m.config.ExcludeMetrics, v) {
m.matches[k] = v
}
}
@@ -153,7 +157,7 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
if !slices.Contains(m.config.ExcludeMetrics, "mem_used") {
m.sendMemUsed = true
}
if len(m.matches) == 0 {
if len(m.matches) == 0 && !m.sendMemUsed {
return fmt.Errorf("%s Init(): no metrics to collect", m.name)
}
if err := m.setup(); err != nil {

View File

@@ -1115,31 +1115,6 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
return nil
}
func readEfficiency(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
if !device.excludeMetrics["nv_util_eff"] {
maxPower, ret := nvml.DeviceGetEnforcedPowerLimit(device.device)
if ret == nvml.SUCCESS {
curPower, ret := nvml.DeviceGetPowerUsage(device.device)
if ret == nvml.SUCCESS {
util, ret := nvml.DeviceGetUtilizationRates(device.device)
if ret == nvml.SUCCESS {
factor := float64(curPower) / float64(maxPower)
eff := uint32(float64(util.Gpu) * factor)
if eff > 100 {
eff = 100
}
y, err := lp.NewMetric("nv_util_eff", device.tags, device.meta, eff, time.Now())
if err == nil {
y.AddTag("unit", "percent")
output <- y
}
}
}
}
}
return nil
}
func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage) {
var err error
if !m.init {
@@ -1245,11 +1220,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
if err != nil {
cclog.ComponentDebug(m.name, "readNVLinkStats for device", name, "failed")
}
err = readEfficiency(device, output)
if err != nil {
cclog.ComponentDebug(m.name, "readEfficiency for device", name, "failed")
}
}
// Actual read loop over all attached Nvidia GPUs

View File

@@ -85,6 +85,5 @@ Metrics:
* `nv_energy`
* `nv_energy_abs`
* `nv_average_power`
* `nv_util_eff` (`nv_util` * (`nv_power_usage` / `nv_power_max_limit`))
Some metrics add the additional sub type tag (`stype`) like the `nv_nvlink_*` metrics set `stype=nvlink,stype-id=<link_number>`.

2
go.mod
View File

@@ -10,7 +10,7 @@ require (
github.com/fsnotify/fsnotify v1.10.0
github.com/tklauser/go-sysconf v0.3.16
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1
golang.org/x/sys v0.43.0
golang.org/x/sys v0.44.0
)
require (

4
go.sum
View File

@@ -184,8 +184,8 @@ golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA=
golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs=
golang.org/x/sys v0.0.0-20210122093101-04d7465088b8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=