Compare commits

...

6 Commits

Author SHA1 Message Date
Holger Obermaier
656ea73d12 Fix: num_cpus could not be excluded 2026-05-07 14:47:23 +02:00
Holger Obermaier
330f923596 Fixed exclude_metrics and check for used metrics 2026-05-07 12:25:07 +02:00
Holger Obermaier
8e58072ff6 Use NewMetric to create a new metric 2026-05-06 13:22:02 +02:00
Holger Obermaier
0f6fee9db4 Do not save current state of infiniband counters, only last state is required 2026-05-06 10:42:06 +02:00
Holger Obermaier
7585ee7289 Add bandwidth metrics for ib_total and ib_total_pkts 2026-05-05 14:13:38 +02:00
Michael Panzlaff
30b2eb69dd Merge pull request #213 from ClusterCockpit/fix/libdrm-ubuntu-deb
CI: Install libdrm-dev for building (required on Ubuntu)
2026-05-04 14:30:44 +02:00
18 changed files with 108 additions and 189 deletions

View File

@@ -227,8 +227,7 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMess
for key, data := range m.matches { for key, data := range m.matches {
value, _ := strconv.ParseFloat(data, 32) value, _ := strconv.ParseFloat(data, 32)
y, err := lp.NewMessage(key, m.tags, m.meta, map[string]any{"value": value}, time.Now()) if y, err := lp.NewMetric(key, m.tags, m.meta, value, time.Now()); err == nil {
if err == nil {
output <- y output <- y
} }
} }

View File

@@ -218,8 +218,7 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
for key, data := range m.matches { for key, data := range m.matches {
value, _ := strconv.ParseFloat(data, 32) value, _ := strconv.ParseFloat(data, 32)
y, err := lp.NewMessage(key, m.tags, m.meta, map[string]any{"value": value}, time.Now()) if y, err := lp.NewMetric(key, m.tags, m.meta, value, time.Now()); err == nil {
if err == nil {
output <- y output <- y
} }
} }

View File

@@ -171,7 +171,7 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CC
fmt.Sprintf("Read(): Failed to convert cpu MHz '%s' to float64: %v", lineSplit[1], err)) fmt.Sprintf("Read(): Failed to convert cpu MHz '%s' to float64: %v", lineSplit[1], err))
return return
} }
if y, err := lp.NewMessage("cpufreq", t.tagSet, m.meta, map[string]any{"value": value}, now); err == nil { if y, err := lp.NewMetric("cpufreq", t.tagSet, m.meta, value, now); err == nil {
output <- y output <- y
} }
} }

View File

@@ -129,7 +129,7 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMessage
continue continue
} }
if y, err := lp.NewMessage("cpufreq", t.tagSet, m.meta, map[string]any{"value": cpuFreq}, now); err == nil { if y, err := lp.NewMetric("cpufreq", t.tagSet, m.meta, cpuFreq, now); err == nil {
output <- y output <- y
} }
} }

View File

@@ -27,6 +27,7 @@ const CPUSTATFILE = `/proc/stat`
type CpustatCollectorConfig struct { type CpustatCollectorConfig struct {
ExcludeMetrics []string `json:"exclude_metrics,omitempty"` ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
excludeNumCPUs bool
} }
type CpustatCollector struct { type CpustatCollector struct {
@@ -79,6 +80,7 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
m.matches[match] = index m.matches[match] = index
} }
} }
m.config.excludeNumCPUs = slices.Contains(m.config.ExcludeMetrics, "num_cpus")
// Check input file // Check input file
file, err := os.Open(CPUSTATFILE) file, err := os.Open(CPUSTATFILE)
@@ -95,11 +97,13 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
line := scanner.Text() line := scanner.Text()
linefields := strings.Fields(line) linefields := strings.Fields(line)
if strings.Compare(linefields[0], "cpu") == 0 { if strings.Compare(linefields[0], "cpu") == 0 {
// Kernel system statistics for all CPUs
m.olddata["cpu"] = make(map[string]int64) m.olddata["cpu"] = make(map[string]int64)
for k, v := range m.matches { for k, v := range m.matches {
m.olddata["cpu"][k], _ = strconv.ParseInt(linefields[v], 0, 64) m.olddata["cpu"][k], _ = strconv.ParseInt(linefields[v], 0, 64)
} }
} else if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 { } else if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
// Kernel system statistics per CPU
cpustr := strings.TrimLeft(linefields[0], "cpu") cpustr := strings.TrimLeft(linefields[0], "cpu")
cpu, _ := strconv.Atoi(cpustr) cpu, _ := strconv.Atoi(cpustr)
m.cputags[linefields[0]] = map[string]string{ m.cputags[linefields[0]] = map[string]string{
@@ -141,7 +145,7 @@ func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]st
sum := float64(0) sum := float64(0)
for name, value := range values { for name, value := range values {
sum += value sum += value
y, err := lp.NewMessage(name, tags, m.meta, map[string]any{"value": value * 100}, now) y, err := lp.NewMetric(name, tags, m.meta, value*100, now)
if err == nil { if err == nil {
y.AddTag("unit", "Percent") y.AddTag("unit", "Percent")
output <- y output <- y
@@ -149,7 +153,7 @@ func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]st
} }
if v, ok := values["cpu_idle"]; ok { if v, ok := values["cpu_idle"]; ok {
sum -= v sum -= v
y, err := lp.NewMessage("cpu_used", tags, m.meta, map[string]any{"value": sum * 100}, now) y, err := lp.NewMetric("cpu_used", tags, m.meta, sum*100, now)
if err == nil { if err == nil {
y.AddTag("unit", "Percent") y.AddTag("unit", "Percent")
output <- y output <- y
@@ -191,14 +195,10 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMessage
} }
} }
num_cpus_metric, err := lp.NewMessage("num_cpus", if !m.config.excludeNumCPUs {
m.nodetags, if num_cpus_metric, err := lp.NewMetric("num_cpus", m.nodetags, m.meta, num_cpus, now); err == nil {
m.meta, output <- num_cpus_metric
map[string]any{"value": num_cpus}, }
now,
)
if err == nil {
output <- num_cpus_metric
} }
m.lastTimestamp = now m.lastTimestamp = now

View File

@@ -128,30 +128,14 @@ mountLoop:
tags := map[string]string{"type": "node", "device": linefields[0]} tags := map[string]string{"type": "node", "device": linefields[0]}
total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000_000_000) total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000_000_000)
if m.allowedMetrics["disk_total"] { if m.allowedMetrics["disk_total"] {
y, err := lp.NewMessage( if y, err := lp.NewMetric("disk_total", tags, m.meta, total, time.Now()); err == nil {
"disk_total",
tags,
m.meta,
map[string]any{
"value": total,
},
time.Now())
if err == nil {
y.AddMeta("unit", "GBytes") y.AddMeta("unit", "GBytes")
output <- y output <- y
} }
} }
free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000_000_000) free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000_000_000)
if m.allowedMetrics["disk_free"] { if m.allowedMetrics["disk_free"] {
y, err := lp.NewMessage( if y, err := lp.NewMetric("disk_free", tags, m.meta, free, time.Now()); err == nil {
"disk_free",
tags,
m.meta,
map[string]any{
"value": free,
},
time.Now())
if err == nil {
y.AddMeta("unit", "GBytes") y.AddMeta("unit", "GBytes")
output <- y output <- y
} }
@@ -164,16 +148,7 @@ mountLoop:
} }
} }
if m.allowedMetrics["part_max_used"] { if m.allowedMetrics["part_max_used"] {
y, err := lp.NewMessage( y, err := lp.NewMetric("part_max_used", map[string]string{"type": "node"}, m.meta, int(part_max_used), time.Now())
"part_max_used",
map[string]string{
"type": "node",
},
m.meta,
map[string]any{
"value": int(part_max_used),
},
time.Now())
if err == nil { if err == nil {
y.AddMeta("unit", "percent") y.AddMeta("unit", "percent")
output <- y output <- y

View File

@@ -32,7 +32,6 @@ type InfinibandCollectorMetric struct {
scale int64 scale int64
addToIBTotal bool addToIBTotal bool
addToIBTotalPkgs bool addToIBTotalPkgs bool
currentState int64
lastState int64 lastState int64
} }
@@ -202,7 +201,9 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMess
for i := range m.info { for i := range m.info {
info := &m.info[i] info := &m.info[i]
var ib_total, ib_total_pkts int64 var ib_total, ib_total_last_state,
ib_total_pkts, ib_total_pkts_last_state int64
var ib_total_last_state_available, ib_total_pkts_last_state_available bool
for i := range info.portCounterFiles { for i := range info.portCounterFiles {
counterDef := &info.portCounterFiles[i] counterDef := &info.portCounterFiles[i]
@@ -227,19 +228,9 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMess
// Scale raw value // Scale raw value
v *= counterDef.scale v *= counterDef.scale
// Save current state
counterDef.currentState = v
// Send absolut values // Send absolut values
if m.config.SendAbsoluteValues { if m.config.SendAbsoluteValues {
if y, err := lp.NewMessage( if y, err := lp.NewMetric(counterDef.name, info.tagSet, m.meta, v, now); err == nil {
counterDef.name,
info.tagSet,
m.meta,
map[string]any{
"value": counterDef.currentState,
},
now); err == nil {
y.AddMeta("unit", counterDef.unit) y.AddMeta("unit", counterDef.unit)
output <- y output <- y
} }
@@ -248,59 +239,65 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMess
// Send derived values // Send derived values
if m.config.SendDerivedValues { if m.config.SendDerivedValues {
if counterDef.lastState >= 0 { if counterDef.lastState >= 0 {
rate := float64((counterDef.currentState - counterDef.lastState)) / timeDiff rate := float64((v - counterDef.lastState)) / timeDiff
if y, err := lp.NewMessage( if y, err := lp.NewMetric(counterDef.name+"_bw", info.tagSet, m.meta, rate, now); err == nil {
counterDef.name+"_bw",
info.tagSet,
m.meta,
map[string]any{
"value": rate,
},
now); err == nil {
y.AddMeta("unit", counterDef.unit+"/sec") y.AddMeta("unit", counterDef.unit+"/sec")
output <- y output <- y
}
// Sum up total values of last state
if m.config.SendTotalValues {
switch {
case counterDef.addToIBTotal:
ib_total_last_state += counterDef.lastState
ib_total_last_state_available = true
case counterDef.addToIBTotalPkgs:
ib_total_pkts_last_state += counterDef.lastState
ib_total_pkts_last_state_available = true
}
} }
} }
counterDef.lastState = counterDef.currentState counterDef.lastState = v
} }
// Sum up total values // Sum up total values
if m.config.SendTotalValues { if m.config.SendTotalValues {
switch { switch {
case counterDef.addToIBTotal: case counterDef.addToIBTotal:
ib_total += counterDef.currentState ib_total += v
case counterDef.addToIBTotalPkgs: case counterDef.addToIBTotalPkgs:
ib_total_pkts += counterDef.currentState ib_total_pkts += v
} }
} }
} }
// Send total values // Send total values
if m.config.SendTotalValues { if m.config.SendTotalValues {
if y, err := lp.NewMessage( if y, err := lp.NewMetric("ib_total", info.tagSet, m.meta, ib_total, now); err == nil {
"ib_total",
info.tagSet,
m.meta,
map[string]any{
"value": ib_total,
},
now); err == nil {
y.AddMeta("unit", "bytes") y.AddMeta("unit", "bytes")
output <- y output <- y
} }
if y, err := lp.NewMessage( if y, err := lp.NewMetric("ib_total_pkts", info.tagSet, m.meta, ib_total_pkts, now); err == nil {
"ib_total_pkts",
info.tagSet,
m.meta,
map[string]any{
"value": ib_total_pkts,
},
now); err == nil {
y.AddMeta("unit", "packets") y.AddMeta("unit", "packets")
output <- y output <- y
} }
if m.config.SendDerivedValues && ib_total_last_state_available {
rate := float64((ib_total - ib_total_last_state)) / timeDiff
if y, err := lp.NewMetric("ib_total_bw", info.tagSet, m.meta, rate, now); err == nil {
y.AddMeta("unit", "bytes/sec")
output <- y
}
}
if m.config.SendDerivedValues && ib_total_pkts_last_state_available {
rate := float64((ib_total_pkts - ib_total_pkts_last_state)) / timeDiff
if y, err := lp.NewMetric("ib_total_pkts_bw", info.tagSet, m.meta, rate, now); err == nil {
y.AddMeta("unit", "packets/sec")
output <- y
}
}
} }
} }
} }

View File

@@ -41,5 +41,7 @@ Metrics:
* `ib_xmit_bw` (if `send_derived_values == true`) * `ib_xmit_bw` (if `send_derived_values == true`)
* `ib_recv_pkts_bw` (if `send_derived_values == true`) * `ib_recv_pkts_bw` (if `send_derived_values == true`)
* `ib_xmit_pkts_bw` (if `send_derived_values == true`) * `ib_xmit_pkts_bw` (if `send_derived_values == true`)
* `ib_total_bw` (if `send_total_values == true` and `send_derived_values == true`)
* `ib_total_pkts_bw` (if `send_total_values == true` and `send_derived_values == true`)
The collector adds a `device` tag to all metrics The collector adds a `device` tag to all metrics

View File

@@ -28,9 +28,9 @@ type IpmiCollector struct {
metricCollector metricCollector
config struct { config struct {
IpmitoolPath string `json:"ipmitool_path"` IpmitoolPath string `json:"ipmitool_path"`
IpmisensorsPath string `json:"ipmisensors_path"` IpmisensorsPath string `json:"ipmisensors_path"`
Sudo bool `json:"use_sudo"` Sudo bool `json:"use_sudo"`
} }
ipmitool string ipmitool string
@@ -157,7 +157,7 @@ func (m *IpmiCollector) readIpmiTool(output chan lp.CCMessage) error {
unit = "Watts" unit = "Watts"
} }
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now()) y, err := lp.NewMetric(name, map[string]string{"type": "node"}, m.meta, v, time.Now())
if err != nil { if err != nil {
cclog.ComponentErrorf(m.name, "Failed to create message: %v", err) cclog.ComponentErrorf(m.name, "Failed to create message: %v", err)
continue continue
@@ -209,7 +209,7 @@ func (m *IpmiCollector) readIpmiSensors(output chan lp.CCMessage) error {
continue continue
} }
name := strings.ToLower(strings.ReplaceAll(lv[1], " ", "_")) name := strings.ToLower(strings.ReplaceAll(lv[1], " ", "_"))
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now()) y, err := lp.NewMetric(name, map[string]string{"type": "node"}, m.meta, v, time.Now())
if err != nil { if err != nil {
cclog.ComponentErrorf(m.name, "Failed to create message: %v", err) cclog.ComponentErrorf(m.name, "Failed to create message: %v", err)
continue continue

View File

@@ -109,7 +109,7 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMessage
if m.load_skips[i] { if m.load_skips[i] {
continue continue
} }
y, err := lp.NewMessage(name, m.tags, m.meta, map[string]any{"value": x}, now) y, err := lp.NewMetric(name, m.tags, m.meta, x, now)
if err == nil { if err == nil {
output <- y output <- y
} }
@@ -128,7 +128,7 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMessage
if m.proc_skips[i] { if m.proc_skips[i] {
continue continue
} }
y, err := lp.NewMessage(name, m.tags, m.meta, map[string]any{"value": x}, now) y, err := lp.NewMetric(name, m.tags, m.meta, x, now)
if err == nil { if err == nil {
output <- y output <- y
} }

View File

@@ -72,7 +72,8 @@ func getStats(filename string) map[string]MemstatStats {
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()
linefields := strings.Fields(line) linefields := strings.Fields(line)
if len(linefields) == 3 { switch len(linefields) {
case 3:
v, err := strconv.ParseFloat(linefields[1], 64) v, err := strconv.ParseFloat(linefields[1], 64)
if err == nil { if err == nil {
stats[strings.Trim(linefields[0], ":")] = MemstatStats{ stats[strings.Trim(linefields[0], ":")] = MemstatStats{
@@ -80,7 +81,7 @@ func getStats(filename string) map[string]MemstatStats {
unit: linefields[2], unit: linefields[2],
} }
} }
} else if len(linefields) == 5 { case 5:
v, err := strconv.ParseFloat(linefields[3], 64) v, err := strconv.ParseFloat(linefields[3], 64)
if err == nil { if err == nil {
cclog.ComponentDebug("getStats", strings.Trim(linefields[2], ":"), v, linefields[4]) cclog.ComponentDebug("getStats", strings.Trim(linefields[2], ":"), v, linefields[4])
@@ -106,7 +107,10 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
return fmt.Errorf("%s Init(): Error decoding JSON config: %w", m.name, err) return fmt.Errorf("%s Init(): Error decoding JSON config: %w", m.name, err)
} }
} }
m.meta = map[string]string{"source": m.name, "group": "Memory"} m.meta = map[string]string{
"source": m.name,
"group": "Memory",
}
m.stats = make(map[string]int64) m.stats = make(map[string]int64)
m.matches = make(map[string]string) m.matches = make(map[string]string)
m.tags = map[string]string{"type": "node"} m.tags = map[string]string{"type": "node"}
@@ -145,7 +149,7 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
"KernelStack": "mem_kernelstack", "KernelStack": "mem_kernelstack",
} }
for k, v := range matches { for k, v := range matches {
if !slices.Contains(m.config.ExcludeMetrics, k) { if !slices.Contains(m.config.ExcludeMetrics, v) {
m.matches[k] = v m.matches[k] = v
} }
} }
@@ -153,7 +157,7 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
if !slices.Contains(m.config.ExcludeMetrics, "mem_used") { if !slices.Contains(m.config.ExcludeMetrics, "mem_used") {
m.sendMemUsed = true m.sendMemUsed = true
} }
if len(m.matches) == 0 { if len(m.matches) == 0 && !m.sendMemUsed {
return fmt.Errorf("%s Init(): no metrics to collect", m.name) return fmt.Errorf("%s Init(): no metrics to collect", m.name)
} }
if err := m.setup(); err != nil { if err := m.setup(); err != nil {
@@ -213,7 +217,7 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMessage
} }
} }
y, err := lp.NewMessage(name, tags, m.meta, map[string]any{"value": value}, time.Now()) y, err := lp.NewMetric(name, tags, m.meta, value, time.Now())
if err == nil { if err == nil {
if len(unit) > 0 { if len(unit) > 0 {
y.AddMeta("unit", unit) y.AddMeta("unit", unit)
@@ -252,7 +256,7 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMessage
} }
} }
} }
y, err := lp.NewMessage("mem_used", tags, m.meta, map[string]any{"value": memUsed}, time.Now()) y, err := lp.NewMetric("mem_used", tags, m.meta, memUsed, time.Now())
if err == nil { if err == nil {
if len(unit) > 0 { if len(unit) > 0 {
y.AddMeta("unit", unit) y.AddMeta("unit", unit)

View File

@@ -262,14 +262,14 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMessage
continue continue
} }
if m.config.SendAbsoluteValues { if m.config.SendAbsoluteValues {
if y, err := lp.NewMessage(metric.name, metric.tags, metric.meta, map[string]any{"value": v}, now); err == nil { if y, err := lp.NewMetric(metric.name, metric.tags, metric.meta, v, now); err == nil {
output <- y output <- y
} }
} }
if m.config.SendDerivedValues { if m.config.SendDerivedValues {
if metric.lastValue >= 0 { if metric.lastValue >= 0 {
rate := float64(v-metric.lastValue) / timeDiff rate := float64(v-metric.lastValue) / timeDiff
if y, err := lp.NewMessage(metric.name+"_bw", metric.tags, metric.meta_rates, map[string]any{"value": rate}, now); err == nil { if y, err := lp.NewMetric(metric.name+"_bw", metric.tags, metric.meta_rates, rate, now); err == nil {
output <- y output <- y
} }
} }

View File

@@ -146,14 +146,13 @@ func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
continue continue
} }
valueMap := make(map[string]any)
if data.current >= 0 && data.last >= 0 { if data.current >= 0 && data.last >= 0 {
valueMap["value"] = data.current - data.last value := data.current - data.last
} y, err := lp.NewMetric(fmt.Sprintf("%s_%s", prefix, name), m.tags, m.meta, value, timestamp)
y, err := lp.NewMessage(fmt.Sprintf("%s_%s", prefix, name), m.tags, m.meta, valueMap, timestamp) if err == nil {
if err == nil { y.AddMeta("version", m.version)
y.AddMeta("version", m.version) output <- y
output <- y }
} }
} }
} }

View File

@@ -145,14 +145,7 @@ func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMessa
if old, ok := m.data[mntpoint]; ok { if old, ok := m.data[mntpoint]; ok {
for name, newVal := range values { for name, newVal := range values {
if m.config.SendAbsoluteValues { if m.config.SendAbsoluteValues {
msg, err := lp.NewMessage( msg, err := lp.NewMetric("nfsio_"+name, m.tags, m.meta, newVal, now)
"nfsio_"+name,
m.tags,
m.meta,
map[string]any{
"value": newVal,
},
now)
if err == nil { if err == nil {
msg.AddTag("stype", "filesystem") msg.AddTag("stype", "filesystem")
msg.AddTag("stype-id", mntpoint) msg.AddTag("stype-id", mntpoint)
@@ -161,7 +154,7 @@ func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMessa
} }
if m.config.SendDerivedValues { if m.config.SendDerivedValues {
rate := float64(newVal-old[name]) / timeDiff rate := float64(newVal-old[name]) / timeDiff
msg, err := lp.NewMessage(fmt.Sprintf("nfsio_%s_bw", name), m.tags, m.meta, map[string]any{"value": rate}, now) msg, err := lp.NewMetric(fmt.Sprintf("nfsio_%s_bw", name), m.tags, m.meta, rate, now)
if err == nil { if err == nil {
if strings.HasPrefix(name, "page") { if strings.HasPrefix(name, "page") {
msg.AddMeta("unit", "4K_pages/s") msg.AddMeta("unit", "4K_pages/s")

View File

@@ -242,12 +242,7 @@ func (m *RAPLCollector) Read(interval time.Duration, output chan lp.CCMessage) {
timeDiff := energyTimestamp.Sub(p.energyTimestamp) timeDiff := energyTimestamp.Sub(p.energyTimestamp)
averagePower := float64(energyDiff) / float64(timeDiff.Microseconds()) averagePower := float64(energyDiff) / float64(timeDiff.Microseconds())
y, err := lp.NewMessage( y, err := lp.NewMetric("rapl_average_power", p.tags, m.meta, averagePower, energyTimestamp)
"rapl_average_power",
p.tags,
m.meta,
map[string]any{"value": averagePower},
energyTimestamp)
if err == nil { if err == nil {
output <- y output <- y
} }

View File

@@ -158,128 +158,110 @@ func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMessage
if !dev.excludeMetrics["rocm_gfx_util"] { if !dev.excludeMetrics["rocm_gfx_util"] {
value := metrics.Average_gfx_activity value := metrics.Average_gfx_activity
y, err := lp.NewMessage("rocm_gfx_util", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_gfx_util", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_umc_util"] { if !dev.excludeMetrics["rocm_umc_util"] {
value := metrics.Average_umc_activity value := metrics.Average_umc_activity
y, err := lp.NewMessage("rocm_umc_util", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_umc_util", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_mm_util"] { if !dev.excludeMetrics["rocm_mm_util"] {
value := metrics.Average_mm_activity value := metrics.Average_mm_activity
y, err := lp.NewMessage("rocm_mm_util", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_mm_util", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_avg_power"] { if !dev.excludeMetrics["rocm_avg_power"] {
value := metrics.Average_socket_power value := metrics.Average_socket_power
y, err := lp.NewMessage("rocm_avg_power", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_avg_power", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_temp_mem"] { if !dev.excludeMetrics["rocm_temp_mem"] {
value := metrics.Temperature_mem value := metrics.Temperature_mem
y, err := lp.NewMessage("rocm_temp_mem", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_temp_mem", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_temp_hotspot"] { if !dev.excludeMetrics["rocm_temp_hotspot"] {
value := metrics.Temperature_hotspot value := metrics.Temperature_hotspot
y, err := lp.NewMessage("rocm_temp_hotspot", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_temp_hotspot", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_temp_edge"] { if !dev.excludeMetrics["rocm_temp_edge"] {
value := metrics.Temperature_edge value := metrics.Temperature_edge
y, err := lp.NewMessage("rocm_temp_edge", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_temp_edge", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_temp_vrgfx"] { if !dev.excludeMetrics["rocm_temp_vrgfx"] {
value := metrics.Temperature_vrgfx value := metrics.Temperature_vrgfx
y, err := lp.NewMessage("rocm_temp_vrgfx", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_temp_vrgfx", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_temp_vrsoc"] { if !dev.excludeMetrics["rocm_temp_vrsoc"] {
value := metrics.Temperature_vrsoc value := metrics.Temperature_vrsoc
y, err := lp.NewMessage("rocm_temp_vrsoc", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_temp_vrsoc", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_temp_vrmem"] { if !dev.excludeMetrics["rocm_temp_vrmem"] {
value := metrics.Temperature_vrmem value := metrics.Temperature_vrmem
y, err := lp.NewMessage("rocm_temp_vrmem", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_temp_vrmem", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_gfx_clock"] { if !dev.excludeMetrics["rocm_gfx_clock"] {
value := metrics.Average_gfxclk_frequency value := metrics.Average_gfxclk_frequency
y, err := lp.NewMessage("rocm_gfx_clock", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_gfx_clock", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_soc_clock"] { if !dev.excludeMetrics["rocm_soc_clock"] {
value := metrics.Average_socclk_frequency value := metrics.Average_socclk_frequency
y, err := lp.NewMessage("rocm_soc_clock", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_soc_clock", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_u_clock"] { if !dev.excludeMetrics["rocm_u_clock"] {
value := metrics.Average_uclk_frequency value := metrics.Average_uclk_frequency
y, err := lp.NewMessage("rocm_u_clock", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_u_clock", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_v0_clock"] { if !dev.excludeMetrics["rocm_v0_clock"] {
value := metrics.Average_vclk0_frequency value := metrics.Average_vclk0_frequency
y, err := lp.NewMessage("rocm_v0_clock", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_v0_clock", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_v1_clock"] { if !dev.excludeMetrics["rocm_v1_clock"] {
value := metrics.Average_vclk1_frequency value := metrics.Average_vclk1_frequency
y, err := lp.NewMessage("rocm_v1_clock", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_v1_clock", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_d0_clock"] { if !dev.excludeMetrics["rocm_d0_clock"] {
value := metrics.Average_dclk0_frequency value := metrics.Average_dclk0_frequency
y, err := lp.NewMessage("rocm_d0_clock", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_d0_clock", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_d1_clock"] { if !dev.excludeMetrics["rocm_d1_clock"] {
value := metrics.Average_dclk1_frequency value := metrics.Average_dclk1_frequency
y, err := lp.NewMessage("rocm_d1_clock", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_d1_clock", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
output <- y output <- y
} }
} }
if !dev.excludeMetrics["rocm_temp_hbm"] { if !dev.excludeMetrics["rocm_temp_hbm"] {
for i := range rocm_smi.NUM_HBM_INSTANCES { for i := range rocm_smi.NUM_HBM_INSTANCES {
value := metrics.Temperature_hbm[i] value := metrics.Temperature_hbm[i]
y, err := lp.NewMessage("rocm_temp_hbm", dev.tags, dev.meta, map[string]any{"value": value}, timestamp) if y, err := lp.NewMetric("rocm_temp_hbm", dev.tags, dev.meta, value, timestamp); err == nil {
if err == nil {
y.AddTag("stype", "device") y.AddTag("stype", "device")
y.AddTag("stype-id", strconv.Itoa(i)) y.AddTag("stype-id", strconv.Itoa(i))
output <- y output <- y

View File

@@ -201,26 +201,14 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMessage) {
continue continue
} }
x /= 1000 x /= 1000
y, err := lp.NewMessage( y, err := lp.NewMetric(sensor.metricName, sensor.tags, m.meta, x, time.Now())
sensor.metricName,
sensor.tags,
m.meta,
map[string]any{"value": x},
time.Now(),
)
if err == nil { if err == nil {
output <- y output <- y
} }
// max temperature // max temperature
if m.config.ReportMaxTemp && sensor.maxTemp != 0 { if m.config.ReportMaxTemp && sensor.maxTemp != 0 {
y, err := lp.NewMessage( y, err := lp.NewMetric(sensor.maxTempName, sensor.tags, m.meta, sensor.maxTemp, time.Now())
sensor.maxTempName,
sensor.tags,
m.meta,
map[string]any{"value": sensor.maxTemp},
time.Now(),
)
if err == nil { if err == nil {
output <- y output <- y
} }
@@ -228,13 +216,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMessage) {
// critical temperature // critical temperature
if m.config.ReportCriticalTemp && sensor.critTemp != 0 { if m.config.ReportCriticalTemp && sensor.critTemp != 0 {
y, err := lp.NewMessage( y, err := lp.NewMetric(sensor.critTempName, sensor.tags, m.meta, sensor.critTemp, time.Now())
sensor.critTempName,
sensor.tags,
m.meta,
map[string]any{"value": sensor.critTemp},
time.Now(),
)
if err == nil { if err == nil {
output <- y output <- y
} }

View File

@@ -86,15 +86,7 @@ func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMessag
lines := strings.Split(string(stdout), "\n") lines := strings.Split(string(stdout), "\n")
for i := 1; i < m.config.Num_procs+1; i++ { for i := 1; i < m.config.Num_procs+1; i++ {
name := fmt.Sprintf("topproc%d", i) name := fmt.Sprintf("topproc%d", i)
y, err := lp.NewMessage( if y, err := lp.NewMetric(name, m.tags, m.meta, lines[i], time.Now()); err == nil {
name,
m.tags,
m.meta,
map[string]any{
"value": lines[i],
},
time.Now())
if err == nil {
output <- y output <- y
} }
} }