add only_metrics, diff_values and derived_values

This commit is contained in:
brinkcoder 2025-03-05 01:04:06 +01:00
parent 4702ab1570
commit 636c3f312d
2 changed files with 156 additions and 53 deletions

View File

@ -2,24 +2,49 @@ package collectors
import (
"bufio"
"os"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
// "log"
"encoding/json"
"errors"
"os"
"strconv"
"strings"
"time"
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
)
const IOSTATFILE = `/proc/diskstats`
const IOSTAT_SYSFSPATH = `/sys/block`
type IOstatCollectorConfig struct {
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
OnlyMetrics []string `json:"only_metrics,omitempty"`
ExcludeDevices []string `json:"exclude_devices,omitempty"`
SendAbsoluteValues *bool `json:"send_abs_values,omitempty"`
SendDiffValues *bool `json:"send_diff_values,omitempty"`
SendDerivedValues *bool `json:"send_derived_values,omitempty"`
}
// Helper methods for default values.
// - send_abs_values defaults to true,
// - send_diff_values and send_derived_values default to false.
func (cfg *IOstatCollectorConfig) AbsValues() bool {
if cfg.SendAbsoluteValues == nil {
return true
}
return *cfg.SendAbsoluteValues
}
func (cfg *IOstatCollectorConfig) DiffValues() bool {
if cfg.SendDiffValues == nil {
return false
}
return *cfg.SendDiffValues
}
func (cfg *IOstatCollectorConfig) DerivedValues() bool {
if cfg.SendDerivedValues == nil {
return false
}
return *cfg.SendDerivedValues
}
type IOstatCollectorEntry struct {
@ -34,6 +59,24 @@ type IOstatCollector struct {
devices map[string]IOstatCollectorEntry
}
// shouldOutput returns true if a metric should be forwarded based on only_metrics and exclude_metrics.
func (m *IOstatCollector) shouldOutput(metricName string) bool {
if len(m.config.OnlyMetrics) > 0 {
for _, name := range m.config.OnlyMetrics {
if name == metricName {
return true
}
}
return false
}
for _, name := range m.config.ExcludeMetrics {
if name == metricName {
return false
}
}
return true
}
func (m *IOstatCollector) Init(config json.RawMessage) error {
var err error
m.name = "IOstatCollector"
@ -46,8 +89,8 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
return err
}
}
// https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
matches := map[string]int{
// Define mapping from metric names to field indices in /proc/diskstats.
allMatches := map[string]int{
"io_reads": 3,
"io_reads_merged": 4,
"io_read_sectors": 5,
@ -66,38 +109,57 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
"io_flushes": 18,
"io_flushes_ms": 19,
}
m.devices = make(map[string]IOstatCollectorEntry)
m.matches = make(map[string]int)
for k, v := range matches {
if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip {
// Allow a metric if either its base name, or base name+"_diff" or base name+"_rate" is present in only_metrics.
for k, v := range allMatches {
allowed := false
if len(m.config.OnlyMetrics) > 0 {
for _, metric := range m.config.OnlyMetrics {
if metric == k || metric == k+"_diff" || metric == k+"_rate" {
allowed = true
break
}
}
} else {
if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip {
allowed = true
}
}
if allowed {
m.matches[k] = v
}
}
if len(m.matches) == 0 {
return errors.New("no metrics to collect")
}
file, err := os.Open(string(IOSTATFILE))
m.devices = make(map[string]IOstatCollectorEntry)
file, err := os.Open(IOSTATFILE)
if err != nil {
cclog.ComponentError(m.name, err.Error())
return err
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
linefields := strings.Fields(line)
if len(linefields) < 3 {
continue
}
device := linefields[2]
if strings.Contains(device, "loop") {
continue
}
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
continue
}
values := make(map[string]int64)
for m := range m.matches {
values[m] = 0
for mname := range m.matches {
values[mname] = 0
}
m.devices[device] = IOstatCollectorEntry{
tags: map[string]string{
"device": linefields[2],
"device": device,
"type": "node",
},
lastValues: values,
@ -111,14 +173,12 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
if !m.init {
return
}
file, err := os.Open(string(IOSTATFILE))
file, err := os.Open(IOSTATFILE)
if err != nil {
cclog.ComponentError(m.name, err.Error())
return
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
@ -126,26 +186,52 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
continue
}
linefields := strings.Fields(line)
if len(linefields) < 3 {
continue
}
device := linefields[2]
if strings.Contains(device, "loop") {
continue
}
if _, ok := m.devices[device]; !ok {
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
continue
}
entry, ok := m.devices[device]
if !ok {
continue
}
entry := m.devices[device]
for name, idx := range m.matches {
if idx < len(linefields) {
x, err := strconv.ParseInt(linefields[idx], 0, 64)
if err == nil {
diff := x - entry.lastValues[name]
y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
if err == nil {
output <- y
}
}
entry.lastValues[name] = x
if idx >= len(linefields) {
continue
}
x, err := strconv.ParseInt(linefields[idx], 0, 64)
if err != nil {
continue
}
// Send absolute metric if enabled.
if m.config.AbsValues() && m.shouldOutput(name) {
msg, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(x)}, time.Now())
if err == nil {
output <- msg
}
}
diff := x - entry.lastValues[name]
// Send diff metric if enabled.
if m.config.DiffValues() && m.shouldOutput(name+"_diff") {
msg, err := lp.NewMessage(name+"_diff", entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
if err == nil {
output <- msg
}
}
// Send derived metric if enabled.
if m.config.DerivedValues() && m.shouldOutput(name+"_rate") {
rate := float64(diff) / interval.Seconds()
msg, err := lp.NewMessage(name+"_rate", entry.tags, m.meta, map[string]interface{}{"value": rate}, time.Now())
if err == nil {
output <- msg
}
}
entry.lastValues[name] = x
}
m.devices[device] = entry
}

View File

@ -1,34 +1,51 @@
## `iostat` collector
```json
"iostat": {
"exclude_metrics": [
"read_ms"
"io_read_ms"
],
"exclude_devices": [
"nvme0n1p1",
"nvme0n1p2",
"md127"
],
"only_metrics": [],
"send_abs_values": true,
"send_diff_values": true,
"send_derived_values": true
}
```
The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
Metrics:
* `io_reads`
* `io_reads_merged`
* `io_read_sectors`
* `io_read_ms`
* `io_writes`
* `io_writes_merged`
* `io_writes_sectors`
* `io_writes_ms`
* `io_ioops`
* `io_ioops_ms`
* `io_ioops_weighted_ms`
* `io_discards`
* `io_discards_merged`
* `io_discards_sectors`
* `io_discards_ms`
* `io_flushes`
* `io_flushes_ms`
Both filtering mechanisms are supported:
- `exclude_metrics`: Excludes the specified metrics.
- `only_metrics`: If provided, only the listed metrics are collected. This takes precedence over `exclude_metrics`.
**Absolute Metrics:**
- `io_reads`
- `io_reads_merged`
- `io_read_sectors`
- `io_read_ms`
- `io_writes`
- `io_writes_merged`
- `io_writes_sectors`
- `io_writes_ms`
- `io_ioops`
- `io_ioops_ms`
- `io_ioops_weighted_ms`
- `io_discards`
- `io_discards_merged`
- `io_discards_sectors`
- `io_discards_ms`
- `io_flushes`
- `io_flushes_ms`
**Diff Metrics:**
For each metric, if `send_diff_values` is enabled, the collector computes the difference (current value minus previous value) and sends it with the suffix `_diff`.
**Derived Metrics:**
For each metric, if `send_derived_values` is enabled, the collector computes the derived rate (difference divided by the time interval) and sends it with the suffix `_rate`.
The device name is added as tag `device`. For more details, see https://www.kernel.org/doc/html/latest/admin-guide/iostats.html