mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-04-05 13:15:55 +02:00
add only_metrics, diff_values and derived_values
This commit is contained in:
parent
4702ab1570
commit
636c3f312d
@ -2,24 +2,49 @@ package collectors
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
|
||||
// "log"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
)
|
||||
|
||||
const IOSTATFILE = `/proc/diskstats`
|
||||
const IOSTAT_SYSFSPATH = `/sys/block`
|
||||
|
||||
type IOstatCollectorConfig struct {
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
OnlyMetrics []string `json:"only_metrics,omitempty"`
|
||||
ExcludeDevices []string `json:"exclude_devices,omitempty"`
|
||||
SendAbsoluteValues *bool `json:"send_abs_values,omitempty"`
|
||||
SendDiffValues *bool `json:"send_diff_values,omitempty"`
|
||||
SendDerivedValues *bool `json:"send_derived_values,omitempty"`
|
||||
}
|
||||
|
||||
// Helper methods for default values.
|
||||
// - send_abs_values defaults to true,
|
||||
// - send_diff_values and send_derived_values default to false.
|
||||
func (cfg *IOstatCollectorConfig) AbsValues() bool {
|
||||
if cfg.SendAbsoluteValues == nil {
|
||||
return true
|
||||
}
|
||||
return *cfg.SendAbsoluteValues
|
||||
}
|
||||
func (cfg *IOstatCollectorConfig) DiffValues() bool {
|
||||
if cfg.SendDiffValues == nil {
|
||||
return false
|
||||
}
|
||||
return *cfg.SendDiffValues
|
||||
}
|
||||
func (cfg *IOstatCollectorConfig) DerivedValues() bool {
|
||||
if cfg.SendDerivedValues == nil {
|
||||
return false
|
||||
}
|
||||
return *cfg.SendDerivedValues
|
||||
}
|
||||
|
||||
type IOstatCollectorEntry struct {
|
||||
@ -34,6 +59,24 @@ type IOstatCollector struct {
|
||||
devices map[string]IOstatCollectorEntry
|
||||
}
|
||||
|
||||
// shouldOutput returns true if a metric should be forwarded based on only_metrics and exclude_metrics.
|
||||
func (m *IOstatCollector) shouldOutput(metricName string) bool {
|
||||
if len(m.config.OnlyMetrics) > 0 {
|
||||
for _, name := range m.config.OnlyMetrics {
|
||||
if name == metricName {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
for _, name := range m.config.ExcludeMetrics {
|
||||
if name == metricName {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "IOstatCollector"
|
||||
@ -46,8 +89,8 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
|
||||
matches := map[string]int{
|
||||
// Define mapping from metric names to field indices in /proc/diskstats.
|
||||
allMatches := map[string]int{
|
||||
"io_reads": 3,
|
||||
"io_reads_merged": 4,
|
||||
"io_read_sectors": 5,
|
||||
@ -66,38 +109,57 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||
"io_flushes": 18,
|
||||
"io_flushes_ms": 19,
|
||||
}
|
||||
m.devices = make(map[string]IOstatCollectorEntry)
|
||||
m.matches = make(map[string]int)
|
||||
for k, v := range matches {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip {
|
||||
// Allow a metric if either its base name, or base name+"_diff" or base name+"_rate" is present in only_metrics.
|
||||
for k, v := range allMatches {
|
||||
allowed := false
|
||||
if len(m.config.OnlyMetrics) > 0 {
|
||||
for _, metric := range m.config.OnlyMetrics {
|
||||
if metric == k || metric == k+"_diff" || metric == k+"_rate" {
|
||||
allowed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip {
|
||||
allowed = true
|
||||
}
|
||||
}
|
||||
if allowed {
|
||||
m.matches[k] = v
|
||||
}
|
||||
}
|
||||
if len(m.matches) == 0 {
|
||||
return errors.New("no metrics to collect")
|
||||
}
|
||||
file, err := os.Open(string(IOSTATFILE))
|
||||
m.devices = make(map[string]IOstatCollectorEntry)
|
||||
file, err := os.Open(IOSTATFILE)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
linefields := strings.Fields(line)
|
||||
if len(linefields) < 3 {
|
||||
continue
|
||||
}
|
||||
device := linefields[2]
|
||||
if strings.Contains(device, "loop") {
|
||||
continue
|
||||
}
|
||||
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
|
||||
continue
|
||||
}
|
||||
values := make(map[string]int64)
|
||||
for m := range m.matches {
|
||||
values[m] = 0
|
||||
for mname := range m.matches {
|
||||
values[mname] = 0
|
||||
}
|
||||
m.devices[device] = IOstatCollectorEntry{
|
||||
tags: map[string]string{
|
||||
"device": linefields[2],
|
||||
"device": device,
|
||||
"type": "node",
|
||||
},
|
||||
lastValues: values,
|
||||
@ -111,14 +173,12 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
|
||||
file, err := os.Open(string(IOSTATFILE))
|
||||
file, err := os.Open(IOSTATFILE)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
@ -126,26 +186,52 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
continue
|
||||
}
|
||||
linefields := strings.Fields(line)
|
||||
if len(linefields) < 3 {
|
||||
continue
|
||||
}
|
||||
device := linefields[2]
|
||||
if strings.Contains(device, "loop") {
|
||||
continue
|
||||
}
|
||||
if _, ok := m.devices[device]; !ok {
|
||||
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
|
||||
continue
|
||||
}
|
||||
entry, ok := m.devices[device]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
entry := m.devices[device]
|
||||
for name, idx := range m.matches {
|
||||
if idx < len(linefields) {
|
||||
x, err := strconv.ParseInt(linefields[idx], 0, 64)
|
||||
if err == nil {
|
||||
diff := x - entry.lastValues[name]
|
||||
y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
entry.lastValues[name] = x
|
||||
if idx >= len(linefields) {
|
||||
continue
|
||||
}
|
||||
x, err := strconv.ParseInt(linefields[idx], 0, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
// Send absolute metric if enabled.
|
||||
if m.config.AbsValues() && m.shouldOutput(name) {
|
||||
msg, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(x)}, time.Now())
|
||||
if err == nil {
|
||||
output <- msg
|
||||
}
|
||||
}
|
||||
diff := x - entry.lastValues[name]
|
||||
// Send diff metric if enabled.
|
||||
if m.config.DiffValues() && m.shouldOutput(name+"_diff") {
|
||||
msg, err := lp.NewMessage(name+"_diff", entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
|
||||
if err == nil {
|
||||
output <- msg
|
||||
}
|
||||
}
|
||||
// Send derived metric if enabled.
|
||||
if m.config.DerivedValues() && m.shouldOutput(name+"_rate") {
|
||||
rate := float64(diff) / interval.Seconds()
|
||||
msg, err := lp.NewMessage(name+"_rate", entry.tags, m.meta, map[string]interface{}{"value": rate}, time.Now())
|
||||
if err == nil {
|
||||
output <- msg
|
||||
}
|
||||
}
|
||||
entry.lastValues[name] = x
|
||||
}
|
||||
m.devices[device] = entry
|
||||
}
|
||||
|
@ -1,34 +1,51 @@
|
||||
|
||||
## `iostat` collector
|
||||
|
||||
```json
|
||||
"iostat": {
|
||||
"exclude_metrics": [
|
||||
"read_ms"
|
||||
"io_read_ms"
|
||||
],
|
||||
"exclude_devices": [
|
||||
"nvme0n1p1",
|
||||
"nvme0n1p2",
|
||||
"md127"
|
||||
],
|
||||
"only_metrics": [],
|
||||
"send_abs_values": true,
|
||||
"send_diff_values": true,
|
||||
"send_derived_values": true
|
||||
}
|
||||
```
|
||||
|
||||
The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
Metrics:
|
||||
* `io_reads`
|
||||
* `io_reads_merged`
|
||||
* `io_read_sectors`
|
||||
* `io_read_ms`
|
||||
* `io_writes`
|
||||
* `io_writes_merged`
|
||||
* `io_writes_sectors`
|
||||
* `io_writes_ms`
|
||||
* `io_ioops`
|
||||
* `io_ioops_ms`
|
||||
* `io_ioops_weighted_ms`
|
||||
* `io_discards`
|
||||
* `io_discards_merged`
|
||||
* `io_discards_sectors`
|
||||
* `io_discards_ms`
|
||||
* `io_flushes`
|
||||
* `io_flushes_ms`
|
||||
Both filtering mechanisms are supported:
|
||||
- `exclude_metrics`: Excludes the specified metrics.
|
||||
- `only_metrics`: If provided, only the listed metrics are collected. This takes precedence over `exclude_metrics`.
|
||||
|
||||
**Absolute Metrics:**
|
||||
- `io_reads`
|
||||
- `io_reads_merged`
|
||||
- `io_read_sectors`
|
||||
- `io_read_ms`
|
||||
- `io_writes`
|
||||
- `io_writes_merged`
|
||||
- `io_writes_sectors`
|
||||
- `io_writes_ms`
|
||||
- `io_ioops`
|
||||
- `io_ioops_ms`
|
||||
- `io_ioops_weighted_ms`
|
||||
- `io_discards`
|
||||
- `io_discards_merged`
|
||||
- `io_discards_sectors`
|
||||
- `io_discards_ms`
|
||||
- `io_flushes`
|
||||
- `io_flushes_ms`
|
||||
|
||||
**Diff Metrics:**
|
||||
For each metric, if `send_diff_values` is enabled, the collector computes the difference (current value minus previous value) and sends it with the suffix `_diff`.
|
||||
|
||||
**Derived Metrics:**
|
||||
For each metric, if `send_derived_values` is enabled, the collector computes the derived rate (difference divided by the time interval) and sends it with the suffix `_rate`.
|
||||
|
||||
The device name is added as tag `device`. For more details, see https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user