Compare commits

..

1 Commits

Author SHA1 Message Date
Thomas Roehl
b3eb0679b9 Fix max clock metrics 2025-10-20 16:58:39 +02:00
2 changed files with 17 additions and 31 deletions

View File

@@ -20,17 +20,18 @@ import (
lp "github.com/ClusterCockpit/cc-lib/ccMessage" lp "github.com/ClusterCockpit/cc-lib/ccMessage"
) )
// Konstante für den Pfad zu /proc/diskstats
const IOSTATFILE = `/proc/diskstats` const IOSTATFILE = `/proc/diskstats`
type IOstatCollectorConfig struct { type IOstatCollectorConfig struct {
ExcludeMetrics []string `json:"exclude_metrics,omitempty"` ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
// Neues Feld zum Ausschließen von Devices per JSON-Konfiguration
ExcludeDevices []string `json:"exclude_devices,omitempty"` ExcludeDevices []string `json:"exclude_devices,omitempty"`
} }
type IOstatCollectorEntry struct { type IOstatCollectorEntry struct {
currentValues map[string]int64 lastValues map[string]int64
lastValues map[string]int64 tags map[string]string
tags map[string]string
} }
type IOstatCollector struct { type IOstatCollector struct {
@@ -104,27 +105,16 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip { if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
continue continue
} }
currentValues := make(map[string]int64) values := make(map[string]int64)
lastValues := make(map[string]int64)
for m := range m.matches { for m := range m.matches {
currentValues[m] = 0 values[m] = 0
lastValues[m] = 0
}
for name, idx := range m.matches {
if idx < len(linefields) {
if value, err := strconv.ParseInt(linefields[idx], 0, 64); err == nil {
currentValues[name] = value
lastValues[name] = value // Set last to current for first read
}
}
} }
m.devices[device] = IOstatCollectorEntry{ m.devices[device] = IOstatCollectorEntry{
tags: map[string]string{ tags: map[string]string{
"device": device, "device": device,
"type": "node", "type": "node",
}, },
currentValues: currentValues, lastValues: values,
lastValues: lastValues,
} }
} }
m.init = true m.init = true
@@ -163,22 +153,18 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
if _, ok := m.devices[device]; !ok { if _, ok := m.devices[device]; !ok {
continue continue
} }
// Update current and last values
entry := m.devices[device] entry := m.devices[device]
for name, idx := range m.matches { for name, idx := range m.matches {
if idx < len(linefields) { if idx < len(linefields) {
x, err := strconv.ParseInt(linefields[idx], 0, 64) x, err := strconv.ParseInt(linefields[idx], 0, 64)
if err == nil { if err == nil {
// Calculate difference using previous current and new value diff := x - entry.lastValues[name]
diff := x - entry.currentValues[name] y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
y, err := lp.NewMetric(name, entry.tags, m.meta, int(diff), time.Now())
if err == nil { if err == nil {
output <- y output <- y
} }
// Update last to previous current, and current to new value
entry.lastValues[name] = entry.currentValues[name]
entry.currentValues[name] = x
} }
entry.lastValues[name] = x
} }
} }
m.devices[device] = entry m.devices[device] = entry

View File

@@ -589,7 +589,7 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
if !device.excludeMetrics["nv_max_graphics_clock"] { if !device.excludeMetrics["nv_max_graphics_clock"] {
max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS) max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS)
if ret == nvml.SUCCESS { if ret == nvml.SUCCESS {
y, err := lp.NewMessage("nv_max_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now()) y, err := lp.NewMetric("nv_max_graphics_clock", device.tags, device.meta, float64(max_gclk), time.Now())
if err == nil { if err == nil {
y.AddMeta("unit", "MHz") y.AddMeta("unit", "MHz")
output <- y output <- y
@@ -598,9 +598,9 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
} }
if !device.excludeMetrics["nv_max_sm_clock"] { if !device.excludeMetrics["nv_max_sm_clock"] {
maxSmClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM) maxSmClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_SM)
if ret == nvml.SUCCESS { if ret == nvml.SUCCESS {
y, err := lp.NewMessage("nv_max_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxSmClock)}, time.Now()) y, err := lp.NewMetric("nv_max_sm_clock", device.tags, device.meta, float64(maxSmClock), time.Now())
if err == nil { if err == nil {
y.AddMeta("unit", "MHz") y.AddMeta("unit", "MHz")
output <- y output <- y
@@ -609,9 +609,9 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
} }
if !device.excludeMetrics["nv_max_mem_clock"] { if !device.excludeMetrics["nv_max_mem_clock"] {
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM) maxMemClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_MEM)
if ret == nvml.SUCCESS { if ret == nvml.SUCCESS {
y, err := lp.NewMessage("nv_max_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now()) y, err := lp.NewMetric("nv_max_mem_clock", device.tags, device.meta, float64(maxMemClock), time.Now())
if err == nil { if err == nil {
y.AddMeta("unit", "MHz") y.AddMeta("unit", "MHz")
output <- y output <- y
@@ -620,9 +620,9 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
} }
if !device.excludeMetrics["nv_max_video_clock"] { if !device.excludeMetrics["nv_max_video_clock"] {
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO) maxVideoClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_VIDEO)
if ret == nvml.SUCCESS { if ret == nvml.SUCCESS {
y, err := lp.NewMessage("nv_max_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now()) y, err := lp.NewMetric("nv_max_video_clock", device.tags, device.meta, float64(maxVideoClock), time.Now())
if err == nil { if err == nil {
y.AddMeta("unit", "MHz") y.AddMeta("unit", "MHz")
output <- y output <- y