From 636c3f312dbe7c52e2865deace0f1287262806b6 Mon Sep 17 00:00:00 2001 From: brinkcoder Date: Wed, 5 Mar 2025 01:04:06 +0100 Subject: [PATCH] add only_metrics, diff_values and derived_values --- collectors/iostatMetric.go | 150 +++++++++++++++++++++++++++++-------- collectors/iostatMetric.md | 59 +++++++++------ 2 files changed, 156 insertions(+), 53 deletions(-) diff --git a/collectors/iostatMetric.go b/collectors/iostatMetric.go index 8715d7e..e78b252 100644 --- a/collectors/iostatMetric.go +++ b/collectors/iostatMetric.go @@ -2,24 +2,49 @@ package collectors import ( "bufio" - "os" - - cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger" - lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message" - - // "log" "encoding/json" "errors" + "os" "strconv" "strings" "time" + + lp "github.com/ClusterCockpit/cc-lib/ccMessage" + cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger" ) const IOSTATFILE = `/proc/diskstats` const IOSTAT_SYSFSPATH = `/sys/block` type IOstatCollectorConfig struct { - ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + OnlyMetrics []string `json:"only_metrics,omitempty"` + ExcludeDevices []string `json:"exclude_devices,omitempty"` + SendAbsoluteValues *bool `json:"send_abs_values,omitempty"` + SendDiffValues *bool `json:"send_diff_values,omitempty"` + SendDerivedValues *bool `json:"send_derived_values,omitempty"` +} + +// Helper methods for default values. +// - send_abs_values defaults to true, +// - send_diff_values and send_derived_values default to false. +func (cfg *IOstatCollectorConfig) AbsValues() bool { + if cfg.SendAbsoluteValues == nil { + return true + } + return *cfg.SendAbsoluteValues +} +func (cfg *IOstatCollectorConfig) DiffValues() bool { + if cfg.SendDiffValues == nil { + return false + } + return *cfg.SendDiffValues +} +func (cfg *IOstatCollectorConfig) DerivedValues() bool { + if cfg.SendDerivedValues == nil { + return false + } + return *cfg.SendDerivedValues } type IOstatCollectorEntry struct { @@ -34,6 +59,24 @@ type IOstatCollector struct { devices map[string]IOstatCollectorEntry } +// shouldOutput returns true if a metric should be forwarded based on only_metrics and exclude_metrics. +func (m *IOstatCollector) shouldOutput(metricName string) bool { + if len(m.config.OnlyMetrics) > 0 { + for _, name := range m.config.OnlyMetrics { + if name == metricName { + return true + } + } + return false + } + for _, name := range m.config.ExcludeMetrics { + if name == metricName { + return false + } + } + return true +} + func (m *IOstatCollector) Init(config json.RawMessage) error { var err error m.name = "IOstatCollector" @@ -46,8 +89,8 @@ func (m *IOstatCollector) Init(config json.RawMessage) error { return err } } - // https://www.kernel.org/doc/html/latest/admin-guide/iostats.html - matches := map[string]int{ + // Define mapping from metric names to field indices in /proc/diskstats. + allMatches := map[string]int{ "io_reads": 3, "io_reads_merged": 4, "io_read_sectors": 5, @@ -66,38 +109,57 @@ func (m *IOstatCollector) Init(config json.RawMessage) error { "io_flushes": 18, "io_flushes_ms": 19, } - m.devices = make(map[string]IOstatCollectorEntry) m.matches = make(map[string]int) - for k, v := range matches { - if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip { + // Allow a metric if either its base name, or base name+"_diff" or base name+"_rate" is present in only_metrics. + for k, v := range allMatches { + allowed := false + if len(m.config.OnlyMetrics) > 0 { + for _, metric := range m.config.OnlyMetrics { + if metric == k || metric == k+"_diff" || metric == k+"_rate" { + allowed = true + break + } + } + } else { + if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip { + allowed = true + } + } + if allowed { m.matches[k] = v } } if len(m.matches) == 0 { return errors.New("no metrics to collect") } - file, err := os.Open(string(IOSTATFILE)) + m.devices = make(map[string]IOstatCollectorEntry) + file, err := os.Open(IOSTATFILE) if err != nil { cclog.ComponentError(m.name, err.Error()) return err } defer file.Close() - scanner := bufio.NewScanner(file) for scanner.Scan() { line := scanner.Text() linefields := strings.Fields(line) + if len(linefields) < 3 { + continue + } device := linefields[2] if strings.Contains(device, "loop") { continue } + if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip { + continue + } values := make(map[string]int64) - for m := range m.matches { - values[m] = 0 + for mname := range m.matches { + values[mname] = 0 } m.devices[device] = IOstatCollectorEntry{ tags: map[string]string{ - "device": linefields[2], + "device": device, "type": "node", }, lastValues: values, @@ -111,14 +173,12 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage) if !m.init { return } - - file, err := os.Open(string(IOSTATFILE)) + file, err := os.Open(IOSTATFILE) if err != nil { cclog.ComponentError(m.name, err.Error()) return } defer file.Close() - scanner := bufio.NewScanner(file) for scanner.Scan() { line := scanner.Text() @@ -126,26 +186,52 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage) continue } linefields := strings.Fields(line) + if len(linefields) < 3 { + continue + } device := linefields[2] if strings.Contains(device, "loop") { continue } - if _, ok := m.devices[device]; !ok { + if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip { + continue + } + entry, ok := m.devices[device] + if !ok { continue } - entry := m.devices[device] for name, idx := range m.matches { - if idx < len(linefields) { - x, err := strconv.ParseInt(linefields[idx], 0, 64) - if err == nil { - diff := x - entry.lastValues[name] - y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now()) - if err == nil { - output <- y - } - } - entry.lastValues[name] = x + if idx >= len(linefields) { + continue } + x, err := strconv.ParseInt(linefields[idx], 0, 64) + if err != nil { + continue + } + // Send absolute metric if enabled. + if m.config.AbsValues() && m.shouldOutput(name) { + msg, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(x)}, time.Now()) + if err == nil { + output <- msg + } + } + diff := x - entry.lastValues[name] + // Send diff metric if enabled. + if m.config.DiffValues() && m.shouldOutput(name+"_diff") { + msg, err := lp.NewMessage(name+"_diff", entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now()) + if err == nil { + output <- msg + } + } + // Send derived metric if enabled. + if m.config.DerivedValues() && m.shouldOutput(name+"_rate") { + rate := float64(diff) / interval.Seconds() + msg, err := lp.NewMessage(name+"_rate", entry.tags, m.meta, map[string]interface{}{"value": rate}, time.Now()) + if err == nil { + output <- msg + } + } + entry.lastValues[name] = x } m.devices[device] = entry } diff --git a/collectors/iostatMetric.md b/collectors/iostatMetric.md index e3e8604..b7cf4a3 100644 --- a/collectors/iostatMetric.md +++ b/collectors/iostatMetric.md @@ -1,34 +1,51 @@ - ## `iostat` collector ```json "iostat": { "exclude_metrics": [ - "read_ms" + "io_read_ms" ], + "exclude_devices": [ + "nvme0n1p1", + "nvme0n1p2", + "md127" + ], + "only_metrics": [], + "send_abs_values": true, + "send_diff_values": true, + "send_derived_values": true } ``` The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. -Metrics: -* `io_reads` -* `io_reads_merged` -* `io_read_sectors` -* `io_read_ms` -* `io_writes` -* `io_writes_merged` -* `io_writes_sectors` -* `io_writes_ms` -* `io_ioops` -* `io_ioops_ms` -* `io_ioops_weighted_ms` -* `io_discards` -* `io_discards_merged` -* `io_discards_sectors` -* `io_discards_ms` -* `io_flushes` -* `io_flushes_ms` +Both filtering mechanisms are supported: +- `exclude_metrics`: Excludes the specified metrics. +- `only_metrics`: If provided, only the listed metrics are collected. This takes precedence over `exclude_metrics`. + +**Absolute Metrics:** +- `io_reads` +- `io_reads_merged` +- `io_read_sectors` +- `io_read_ms` +- `io_writes` +- `io_writes_merged` +- `io_writes_sectors` +- `io_writes_ms` +- `io_ioops` +- `io_ioops_ms` +- `io_ioops_weighted_ms` +- `io_discards` +- `io_discards_merged` +- `io_discards_sectors` +- `io_discards_ms` +- `io_flushes` +- `io_flushes_ms` + +**Diff Metrics:** +For each metric, if `send_diff_values` is enabled, the collector computes the difference (current value minus previous value) and sends it with the suffix `_diff`. + +**Derived Metrics:** +For each metric, if `send_derived_values` is enabled, the collector computes the derived rate (difference divided by the time interval) and sends it with the suffix `_rate`. The device name is added as tag `device`. For more details, see https://www.kernel.org/doc/html/latest/admin-guide/iostats.html -