mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-04-05 21:25:55 +02:00
add only_metrics, diff_values and derived_values
This commit is contained in:
parent
4702ab1570
commit
636c3f312d
@ -2,24 +2,49 @@ package collectors
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"os"
|
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
|
||||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
|
||||||
|
|
||||||
// "log"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
const IOSTATFILE = `/proc/diskstats`
|
const IOSTATFILE = `/proc/diskstats`
|
||||||
const IOSTAT_SYSFSPATH = `/sys/block`
|
const IOSTAT_SYSFSPATH = `/sys/block`
|
||||||
|
|
||||||
type IOstatCollectorConfig struct {
|
type IOstatCollectorConfig struct {
|
||||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
|
OnlyMetrics []string `json:"only_metrics,omitempty"`
|
||||||
|
ExcludeDevices []string `json:"exclude_devices,omitempty"`
|
||||||
|
SendAbsoluteValues *bool `json:"send_abs_values,omitempty"`
|
||||||
|
SendDiffValues *bool `json:"send_diff_values,omitempty"`
|
||||||
|
SendDerivedValues *bool `json:"send_derived_values,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper methods for default values.
|
||||||
|
// - send_abs_values defaults to true,
|
||||||
|
// - send_diff_values and send_derived_values default to false.
|
||||||
|
func (cfg *IOstatCollectorConfig) AbsValues() bool {
|
||||||
|
if cfg.SendAbsoluteValues == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return *cfg.SendAbsoluteValues
|
||||||
|
}
|
||||||
|
func (cfg *IOstatCollectorConfig) DiffValues() bool {
|
||||||
|
if cfg.SendDiffValues == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return *cfg.SendDiffValues
|
||||||
|
}
|
||||||
|
func (cfg *IOstatCollectorConfig) DerivedValues() bool {
|
||||||
|
if cfg.SendDerivedValues == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return *cfg.SendDerivedValues
|
||||||
}
|
}
|
||||||
|
|
||||||
type IOstatCollectorEntry struct {
|
type IOstatCollectorEntry struct {
|
||||||
@ -34,6 +59,24 @@ type IOstatCollector struct {
|
|||||||
devices map[string]IOstatCollectorEntry
|
devices map[string]IOstatCollectorEntry
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// shouldOutput returns true if a metric should be forwarded based on only_metrics and exclude_metrics.
|
||||||
|
func (m *IOstatCollector) shouldOutput(metricName string) bool {
|
||||||
|
if len(m.config.OnlyMetrics) > 0 {
|
||||||
|
for _, name := range m.config.OnlyMetrics {
|
||||||
|
if name == metricName {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, name := range m.config.ExcludeMetrics {
|
||||||
|
if name == metricName {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func (m *IOstatCollector) Init(config json.RawMessage) error {
|
func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||||
var err error
|
var err error
|
||||||
m.name = "IOstatCollector"
|
m.name = "IOstatCollector"
|
||||||
@ -46,8 +89,8 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
|
// Define mapping from metric names to field indices in /proc/diskstats.
|
||||||
matches := map[string]int{
|
allMatches := map[string]int{
|
||||||
"io_reads": 3,
|
"io_reads": 3,
|
||||||
"io_reads_merged": 4,
|
"io_reads_merged": 4,
|
||||||
"io_read_sectors": 5,
|
"io_read_sectors": 5,
|
||||||
@ -66,38 +109,57 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
|||||||
"io_flushes": 18,
|
"io_flushes": 18,
|
||||||
"io_flushes_ms": 19,
|
"io_flushes_ms": 19,
|
||||||
}
|
}
|
||||||
m.devices = make(map[string]IOstatCollectorEntry)
|
|
||||||
m.matches = make(map[string]int)
|
m.matches = make(map[string]int)
|
||||||
for k, v := range matches {
|
// Allow a metric if either its base name, or base name+"_diff" or base name+"_rate" is present in only_metrics.
|
||||||
if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip {
|
for k, v := range allMatches {
|
||||||
|
allowed := false
|
||||||
|
if len(m.config.OnlyMetrics) > 0 {
|
||||||
|
for _, metric := range m.config.OnlyMetrics {
|
||||||
|
if metric == k || metric == k+"_diff" || metric == k+"_rate" {
|
||||||
|
allowed = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip {
|
||||||
|
allowed = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if allowed {
|
||||||
m.matches[k] = v
|
m.matches[k] = v
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(m.matches) == 0 {
|
if len(m.matches) == 0 {
|
||||||
return errors.New("no metrics to collect")
|
return errors.New("no metrics to collect")
|
||||||
}
|
}
|
||||||
file, err := os.Open(string(IOSTATFILE))
|
m.devices = make(map[string]IOstatCollectorEntry)
|
||||||
|
file, err := os.Open(IOSTATFILE)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
scanner := bufio.NewScanner(file)
|
scanner := bufio.NewScanner(file)
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Text()
|
line := scanner.Text()
|
||||||
linefields := strings.Fields(line)
|
linefields := strings.Fields(line)
|
||||||
|
if len(linefields) < 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
device := linefields[2]
|
device := linefields[2]
|
||||||
if strings.Contains(device, "loop") {
|
if strings.Contains(device, "loop") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
|
||||||
|
continue
|
||||||
|
}
|
||||||
values := make(map[string]int64)
|
values := make(map[string]int64)
|
||||||
for m := range m.matches {
|
for mname := range m.matches {
|
||||||
values[m] = 0
|
values[mname] = 0
|
||||||
}
|
}
|
||||||
m.devices[device] = IOstatCollectorEntry{
|
m.devices[device] = IOstatCollectorEntry{
|
||||||
tags: map[string]string{
|
tags: map[string]string{
|
||||||
"device": linefields[2],
|
"device": device,
|
||||||
"type": "node",
|
"type": "node",
|
||||||
},
|
},
|
||||||
lastValues: values,
|
lastValues: values,
|
||||||
@ -111,14 +173,12 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
|||||||
if !m.init {
|
if !m.init {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
file, err := os.Open(IOSTATFILE)
|
||||||
file, err := os.Open(string(IOSTATFILE))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
scanner := bufio.NewScanner(file)
|
scanner := bufio.NewScanner(file)
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Text()
|
line := scanner.Text()
|
||||||
@ -126,26 +186,52 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
linefields := strings.Fields(line)
|
linefields := strings.Fields(line)
|
||||||
|
if len(linefields) < 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
device := linefields[2]
|
device := linefields[2]
|
||||||
if strings.Contains(device, "loop") {
|
if strings.Contains(device, "loop") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if _, ok := m.devices[device]; !ok {
|
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
entry, ok := m.devices[device]
|
||||||
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
entry := m.devices[device]
|
|
||||||
for name, idx := range m.matches {
|
for name, idx := range m.matches {
|
||||||
if idx < len(linefields) {
|
if idx >= len(linefields) {
|
||||||
x, err := strconv.ParseInt(linefields[idx], 0, 64)
|
continue
|
||||||
if err == nil {
|
|
||||||
diff := x - entry.lastValues[name]
|
|
||||||
y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
|
|
||||||
if err == nil {
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
entry.lastValues[name] = x
|
|
||||||
}
|
}
|
||||||
|
x, err := strconv.ParseInt(linefields[idx], 0, 64)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Send absolute metric if enabled.
|
||||||
|
if m.config.AbsValues() && m.shouldOutput(name) {
|
||||||
|
msg, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(x)}, time.Now())
|
||||||
|
if err == nil {
|
||||||
|
output <- msg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff := x - entry.lastValues[name]
|
||||||
|
// Send diff metric if enabled.
|
||||||
|
if m.config.DiffValues() && m.shouldOutput(name+"_diff") {
|
||||||
|
msg, err := lp.NewMessage(name+"_diff", entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
|
||||||
|
if err == nil {
|
||||||
|
output <- msg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Send derived metric if enabled.
|
||||||
|
if m.config.DerivedValues() && m.shouldOutput(name+"_rate") {
|
||||||
|
rate := float64(diff) / interval.Seconds()
|
||||||
|
msg, err := lp.NewMessage(name+"_rate", entry.tags, m.meta, map[string]interface{}{"value": rate}, time.Now())
|
||||||
|
if err == nil {
|
||||||
|
output <- msg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entry.lastValues[name] = x
|
||||||
}
|
}
|
||||||
m.devices[device] = entry
|
m.devices[device] = entry
|
||||||
}
|
}
|
||||||
|
@ -1,34 +1,51 @@
|
|||||||
|
|
||||||
## `iostat` collector
|
## `iostat` collector
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"iostat": {
|
"iostat": {
|
||||||
"exclude_metrics": [
|
"exclude_metrics": [
|
||||||
"read_ms"
|
"io_read_ms"
|
||||||
],
|
],
|
||||||
|
"exclude_devices": [
|
||||||
|
"nvme0n1p1",
|
||||||
|
"nvme0n1p2",
|
||||||
|
"md127"
|
||||||
|
],
|
||||||
|
"only_metrics": [],
|
||||||
|
"send_abs_values": true,
|
||||||
|
"send_diff_values": true,
|
||||||
|
"send_derived_values": true
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||||
|
|
||||||
Metrics:
|
Both filtering mechanisms are supported:
|
||||||
* `io_reads`
|
- `exclude_metrics`: Excludes the specified metrics.
|
||||||
* `io_reads_merged`
|
- `only_metrics`: If provided, only the listed metrics are collected. This takes precedence over `exclude_metrics`.
|
||||||
* `io_read_sectors`
|
|
||||||
* `io_read_ms`
|
**Absolute Metrics:**
|
||||||
* `io_writes`
|
- `io_reads`
|
||||||
* `io_writes_merged`
|
- `io_reads_merged`
|
||||||
* `io_writes_sectors`
|
- `io_read_sectors`
|
||||||
* `io_writes_ms`
|
- `io_read_ms`
|
||||||
* `io_ioops`
|
- `io_writes`
|
||||||
* `io_ioops_ms`
|
- `io_writes_merged`
|
||||||
* `io_ioops_weighted_ms`
|
- `io_writes_sectors`
|
||||||
* `io_discards`
|
- `io_writes_ms`
|
||||||
* `io_discards_merged`
|
- `io_ioops`
|
||||||
* `io_discards_sectors`
|
- `io_ioops_ms`
|
||||||
* `io_discards_ms`
|
- `io_ioops_weighted_ms`
|
||||||
* `io_flushes`
|
- `io_discards`
|
||||||
* `io_flushes_ms`
|
- `io_discards_merged`
|
||||||
|
- `io_discards_sectors`
|
||||||
|
- `io_discards_ms`
|
||||||
|
- `io_flushes`
|
||||||
|
- `io_flushes_ms`
|
||||||
|
|
||||||
|
**Diff Metrics:**
|
||||||
|
For each metric, if `send_diff_values` is enabled, the collector computes the difference (current value minus previous value) and sends it with the suffix `_diff`.
|
||||||
|
|
||||||
|
**Derived Metrics:**
|
||||||
|
For each metric, if `send_derived_values` is enabled, the collector computes the derived rate (difference divided by the time interval) and sends it with the suffix `_rate`.
|
||||||
|
|
||||||
The device name is added as tag `device`. For more details, see https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
|
The device name is added as tag `device`. For more details, see https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user