mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-10-21 13:25:07 +02:00
Compare commits
3 Commits
fix-iostat
...
move_examp
Author | SHA1 | Date | |
---|---|---|---|
|
ce9e21c48e | ||
|
6243203880 | ||
|
c7c9f8c273 |
@@ -20,18 +20,17 @@ import (
|
|||||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Konstante für den Pfad zu /proc/diskstats
|
|
||||||
const IOSTATFILE = `/proc/diskstats`
|
const IOSTATFILE = `/proc/diskstats`
|
||||||
|
|
||||||
type IOstatCollectorConfig struct {
|
type IOstatCollectorConfig struct {
|
||||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
// Neues Feld zum Ausschließen von Devices per JSON-Konfiguration
|
|
||||||
ExcludeDevices []string `json:"exclude_devices,omitempty"`
|
ExcludeDevices []string `json:"exclude_devices,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type IOstatCollectorEntry struct {
|
type IOstatCollectorEntry struct {
|
||||||
lastValues map[string]int64
|
currentValues map[string]int64
|
||||||
tags map[string]string
|
lastValues map[string]int64
|
||||||
|
tags map[string]string
|
||||||
}
|
}
|
||||||
|
|
||||||
type IOstatCollector struct {
|
type IOstatCollector struct {
|
||||||
@@ -105,16 +104,27 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
|||||||
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
|
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
values := make(map[string]int64)
|
currentValues := make(map[string]int64)
|
||||||
|
lastValues := make(map[string]int64)
|
||||||
for m := range m.matches {
|
for m := range m.matches {
|
||||||
values[m] = 0
|
currentValues[m] = 0
|
||||||
|
lastValues[m] = 0
|
||||||
|
}
|
||||||
|
for name, idx := range m.matches {
|
||||||
|
if idx < len(linefields) {
|
||||||
|
if value, err := strconv.ParseInt(linefields[idx], 0, 64); err == nil {
|
||||||
|
currentValues[name] = value
|
||||||
|
lastValues[name] = value // Set last to current for first read
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
m.devices[device] = IOstatCollectorEntry{
|
m.devices[device] = IOstatCollectorEntry{
|
||||||
tags: map[string]string{
|
tags: map[string]string{
|
||||||
"device": device,
|
"device": device,
|
||||||
"type": "node",
|
"type": "node",
|
||||||
},
|
},
|
||||||
lastValues: values,
|
currentValues: currentValues,
|
||||||
|
lastValues: lastValues,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.init = true
|
m.init = true
|
||||||
@@ -153,18 +163,22 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
|||||||
if _, ok := m.devices[device]; !ok {
|
if _, ok := m.devices[device]; !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// Update current and last values
|
||||||
entry := m.devices[device]
|
entry := m.devices[device]
|
||||||
for name, idx := range m.matches {
|
for name, idx := range m.matches {
|
||||||
if idx < len(linefields) {
|
if idx < len(linefields) {
|
||||||
x, err := strconv.ParseInt(linefields[idx], 0, 64)
|
x, err := strconv.ParseInt(linefields[idx], 0, 64)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
diff := x - entry.lastValues[name]
|
// Calculate difference using previous current and new value
|
||||||
y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
|
diff := x - entry.currentValues[name]
|
||||||
|
y, err := lp.NewMetric(name, entry.tags, m.meta, int(diff), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
// Update last to previous current, and current to new value
|
||||||
|
entry.lastValues[name] = entry.currentValues[name]
|
||||||
|
entry.currentValues[name] = x
|
||||||
}
|
}
|
||||||
entry.lastValues[name] = x
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.devices[device] = entry
|
m.devices[device] = entry
|
||||||
|
@@ -589,7 +589,7 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
|||||||
if !device.excludeMetrics["nv_max_graphics_clock"] {
|
if !device.excludeMetrics["nv_max_graphics_clock"] {
|
||||||
max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS)
|
max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_max_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
|
y, err := lp.NewMetric("nv_max_graphics_clock", device.tags, device.meta, float64(max_gclk), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -598,9 +598,9 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !device.excludeMetrics["nv_max_sm_clock"] {
|
if !device.excludeMetrics["nv_max_sm_clock"] {
|
||||||
maxSmClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM)
|
maxSmClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_SM)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_max_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxSmClock)}, time.Now())
|
y, err := lp.NewMetric("nv_max_sm_clock", device.tags, device.meta, float64(maxSmClock), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -609,9 +609,9 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !device.excludeMetrics["nv_max_mem_clock"] {
|
if !device.excludeMetrics["nv_max_mem_clock"] {
|
||||||
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM)
|
maxMemClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_MEM)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_max_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
|
y, err := lp.NewMetric("nv_max_mem_clock", device.tags, device.meta, float64(maxMemClock), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -620,9 +620,9 @@ func readMaxClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !device.excludeMetrics["nv_max_video_clock"] {
|
if !device.excludeMetrics["nv_max_video_clock"] {
|
||||||
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO)
|
maxVideoClock, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_VIDEO)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_max_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
|
y, err := lp.NewMetric("nv_max_video_clock", device.tags, device.meta, float64(maxVideoClock), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
|
@@ -4,7 +4,7 @@ The configuration of the CC metric collector consists of five configuration file
|
|||||||
|
|
||||||
## Global configuration
|
## Global configuration
|
||||||
|
|
||||||
The global file contains the paths to the other four files and some global options.
|
The global file contains the paths to the other four files and some global options. You can find examples in `example_configs`.
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user