add only_metrics, diff_values and derived_values

This commit is contained in:
brinkcoder
2025-03-05 01:04:06 +01:00
parent 4702ab1570
commit 636c3f312d
2 changed files with 156 additions and 53 deletions

View File

@@ -2,17 +2,15 @@ package collectors
import ( import (
"bufio" "bufio"
"os"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
// "log"
"encoding/json" "encoding/json"
"errors" "errors"
"os"
"strconv" "strconv"
"strings" "strings"
"time" "time"
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
) )
const IOSTATFILE = `/proc/diskstats` const IOSTATFILE = `/proc/diskstats`
@@ -20,6 +18,33 @@ const IOSTAT_SYSFSPATH = `/sys/block`
type IOstatCollectorConfig struct { type IOstatCollectorConfig struct {
ExcludeMetrics []string `json:"exclude_metrics,omitempty"` ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
OnlyMetrics []string `json:"only_metrics,omitempty"`
ExcludeDevices []string `json:"exclude_devices,omitempty"`
SendAbsoluteValues *bool `json:"send_abs_values,omitempty"`
SendDiffValues *bool `json:"send_diff_values,omitempty"`
SendDerivedValues *bool `json:"send_derived_values,omitempty"`
}
// Helper methods for default values.
// - send_abs_values defaults to true,
// - send_diff_values and send_derived_values default to false.
func (cfg *IOstatCollectorConfig) AbsValues() bool {
if cfg.SendAbsoluteValues == nil {
return true
}
return *cfg.SendAbsoluteValues
}
func (cfg *IOstatCollectorConfig) DiffValues() bool {
if cfg.SendDiffValues == nil {
return false
}
return *cfg.SendDiffValues
}
func (cfg *IOstatCollectorConfig) DerivedValues() bool {
if cfg.SendDerivedValues == nil {
return false
}
return *cfg.SendDerivedValues
} }
type IOstatCollectorEntry struct { type IOstatCollectorEntry struct {
@@ -34,6 +59,24 @@ type IOstatCollector struct {
devices map[string]IOstatCollectorEntry devices map[string]IOstatCollectorEntry
} }
// shouldOutput returns true if a metric should be forwarded based on only_metrics and exclude_metrics.
func (m *IOstatCollector) shouldOutput(metricName string) bool {
if len(m.config.OnlyMetrics) > 0 {
for _, name := range m.config.OnlyMetrics {
if name == metricName {
return true
}
}
return false
}
for _, name := range m.config.ExcludeMetrics {
if name == metricName {
return false
}
}
return true
}
func (m *IOstatCollector) Init(config json.RawMessage) error { func (m *IOstatCollector) Init(config json.RawMessage) error {
var err error var err error
m.name = "IOstatCollector" m.name = "IOstatCollector"
@@ -46,8 +89,8 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
return err return err
} }
} }
// https://www.kernel.org/doc/html/latest/admin-guide/iostats.html // Define mapping from metric names to field indices in /proc/diskstats.
matches := map[string]int{ allMatches := map[string]int{
"io_reads": 3, "io_reads": 3,
"io_reads_merged": 4, "io_reads_merged": 4,
"io_read_sectors": 5, "io_read_sectors": 5,
@@ -66,38 +109,57 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
"io_flushes": 18, "io_flushes": 18,
"io_flushes_ms": 19, "io_flushes_ms": 19,
} }
m.devices = make(map[string]IOstatCollectorEntry)
m.matches = make(map[string]int) m.matches = make(map[string]int)
for k, v := range matches { // Allow a metric if either its base name, or base name+"_diff" or base name+"_rate" is present in only_metrics.
for k, v := range allMatches {
allowed := false
if len(m.config.OnlyMetrics) > 0 {
for _, metric := range m.config.OnlyMetrics {
if metric == k || metric == k+"_diff" || metric == k+"_rate" {
allowed = true
break
}
}
} else {
if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip { if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip {
allowed = true
}
}
if allowed {
m.matches[k] = v m.matches[k] = v
} }
} }
if len(m.matches) == 0 { if len(m.matches) == 0 {
return errors.New("no metrics to collect") return errors.New("no metrics to collect")
} }
file, err := os.Open(string(IOSTATFILE)) m.devices = make(map[string]IOstatCollectorEntry)
file, err := os.Open(IOSTATFILE)
if err != nil { if err != nil {
cclog.ComponentError(m.name, err.Error()) cclog.ComponentError(m.name, err.Error())
return err return err
} }
defer file.Close() defer file.Close()
scanner := bufio.NewScanner(file) scanner := bufio.NewScanner(file)
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()
linefields := strings.Fields(line) linefields := strings.Fields(line)
if len(linefields) < 3 {
continue
}
device := linefields[2] device := linefields[2]
if strings.Contains(device, "loop") { if strings.Contains(device, "loop") {
continue continue
} }
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
continue
}
values := make(map[string]int64) values := make(map[string]int64)
for m := range m.matches { for mname := range m.matches {
values[m] = 0 values[mname] = 0
} }
m.devices[device] = IOstatCollectorEntry{ m.devices[device] = IOstatCollectorEntry{
tags: map[string]string{ tags: map[string]string{
"device": linefields[2], "device": device,
"type": "node", "type": "node",
}, },
lastValues: values, lastValues: values,
@@ -111,14 +173,12 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
if !m.init { if !m.init {
return return
} }
file, err := os.Open(IOSTATFILE)
file, err := os.Open(string(IOSTATFILE))
if err != nil { if err != nil {
cclog.ComponentError(m.name, err.Error()) cclog.ComponentError(m.name, err.Error())
return return
} }
defer file.Close() defer file.Close()
scanner := bufio.NewScanner(file) scanner := bufio.NewScanner(file)
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()
@@ -126,27 +186,53 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
continue continue
} }
linefields := strings.Fields(line) linefields := strings.Fields(line)
if len(linefields) < 3 {
continue
}
device := linefields[2] device := linefields[2]
if strings.Contains(device, "loop") { if strings.Contains(device, "loop") {
continue continue
} }
if _, ok := m.devices[device]; !ok { if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
continue
}
entry, ok := m.devices[device]
if !ok {
continue continue
} }
entry := m.devices[device]
for name, idx := range m.matches { for name, idx := range m.matches {
if idx < len(linefields) { if idx >= len(linefields) {
continue
}
x, err := strconv.ParseInt(linefields[idx], 0, 64) x, err := strconv.ParseInt(linefields[idx], 0, 64)
if err != nil {
continue
}
// Send absolute metric if enabled.
if m.config.AbsValues() && m.shouldOutput(name) {
msg, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(x)}, time.Now())
if err == nil { if err == nil {
output <- msg
}
}
diff := x - entry.lastValues[name] diff := x - entry.lastValues[name]
y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now()) // Send diff metric if enabled.
if m.config.DiffValues() && m.shouldOutput(name+"_diff") {
msg, err := lp.NewMessage(name+"_diff", entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
if err == nil { if err == nil {
output <- y output <- msg
}
}
// Send derived metric if enabled.
if m.config.DerivedValues() && m.shouldOutput(name+"_rate") {
rate := float64(diff) / interval.Seconds()
msg, err := lp.NewMessage(name+"_rate", entry.tags, m.meta, map[string]interface{}{"value": rate}, time.Now())
if err == nil {
output <- msg
} }
} }
entry.lastValues[name] = x entry.lastValues[name] = x
} }
}
m.devices[device] = entry m.devices[device] = entry
} }
} }

View File

@@ -1,34 +1,51 @@
## `iostat` collector ## `iostat` collector
```json ```json
"iostat": { "iostat": {
"exclude_metrics": [ "exclude_metrics": [
"read_ms" "io_read_ms"
], ],
"exclude_devices": [
"nvme0n1p1",
"nvme0n1p2",
"md127"
],
"only_metrics": [],
"send_abs_values": true,
"send_diff_values": true,
"send_derived_values": true
} }
``` ```
The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
Metrics: Both filtering mechanisms are supported:
* `io_reads` - `exclude_metrics`: Excludes the specified metrics.
* `io_reads_merged` - `only_metrics`: If provided, only the listed metrics are collected. This takes precedence over `exclude_metrics`.
* `io_read_sectors`
* `io_read_ms` **Absolute Metrics:**
* `io_writes` - `io_reads`
* `io_writes_merged` - `io_reads_merged`
* `io_writes_sectors` - `io_read_sectors`
* `io_writes_ms` - `io_read_ms`
* `io_ioops` - `io_writes`
* `io_ioops_ms` - `io_writes_merged`
* `io_ioops_weighted_ms` - `io_writes_sectors`
* `io_discards` - `io_writes_ms`
* `io_discards_merged` - `io_ioops`
* `io_discards_sectors` - `io_ioops_ms`
* `io_discards_ms` - `io_ioops_weighted_ms`
* `io_flushes` - `io_discards`
* `io_flushes_ms` - `io_discards_merged`
- `io_discards_sectors`
- `io_discards_ms`
- `io_flushes`
- `io_flushes_ms`
**Diff Metrics:**
For each metric, if `send_diff_values` is enabled, the collector computes the difference (current value minus previous value) and sends it with the suffix `_diff`.
**Derived Metrics:**
For each metric, if `send_derived_values` is enabled, the collector computes the derived rate (difference divided by the time interval) and sends it with the suffix `_rate`.
The device name is added as tag `device`. For more details, see https://www.kernel.org/doc/html/latest/admin-guide/iostats.html The device name is added as tag `device`. For more details, see https://www.kernel.org/doc/html/latest/admin-guide/iostats.html