From 0e7b4fc936018d910ea1549ecfb65c7c6f6294e8 Mon Sep 17 00:00:00 2001 From: brinkcoder Date: Wed, 5 Mar 2025 01:15:18 +0100 Subject: [PATCH] add only_metrics, exclude_metrics, interface_alias. docs: consistency --- collectors/netstatMetric.go | 175 ++++++++++++++++++++---------------- collectors/netstatMetric.md | 45 ++++++---- 2 files changed, 126 insertions(+), 94 deletions(-) diff --git a/collectors/netstatMetric.go b/collectors/netstatMetric.go index 7933e53..12bfc69 100644 --- a/collectors/netstatMetric.go +++ b/collectors/netstatMetric.go @@ -5,20 +5,24 @@ import ( "encoding/json" "errors" "os" + "regexp" "strconv" "strings" "time" + lp "github.com/ClusterCockpit/cc-lib/ccMessage" cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger" - lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message" ) const NETSTATFILE = "/proc/net/dev" type NetstatCollectorConfig struct { - IncludeDevices []string `json:"include_devices"` - SendAbsoluteValues bool `json:"send_abs_values"` - SendDerivedValues bool `json:"send_derived_values"` + IncludeDevices []string `json:"include_devices"` + SendAbsoluteValues bool `json:"send_abs_values"` + SendDerivedValues bool `json:"send_derived_values"` + InterfaceAliases map[string][]string `json:"interface_aliases,omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + OnlyMetrics []string `json:"only_metrics,omitempty"` } type NetstatCollectorMetric struct { @@ -32,9 +36,43 @@ type NetstatCollectorMetric struct { type NetstatCollector struct { metricCollector - config NetstatCollectorConfig - matches map[string][]NetstatCollectorMetric - lastTimestamp time.Time + config NetstatCollectorConfig + aliasToCanonical map[string]string + matches map[string][]NetstatCollectorMetric + lastTimestamp time.Time +} + +func (m *NetstatCollector) buildAliasMapping() { + m.aliasToCanonical = make(map[string]string) + for canon, aliases := range m.config.InterfaceAliases { + for _, alias := range aliases { + m.aliasToCanonical[alias] = canon + } + } +} + +func getCanonicalName(raw string, aliasToCanonical map[string]string) string { + if canon, ok := aliasToCanonical[raw]; ok { + return canon + } + return raw +} + +func (m *NetstatCollector) shouldOutput(metricName string) bool { + if len(m.config.OnlyMetrics) > 0 { + for _, n := range m.config.OnlyMetrics { + if n == metricName { + return true + } + } + return false + } + for _, n := range m.config.ExcludeMetrics { + if n == metricName { + return false + } + } + return true } func (m *NetstatCollector) Init(config json.RawMessage) error { @@ -43,41 +81,20 @@ func (m *NetstatCollector) Init(config json.RawMessage) error { m.setup() m.lastTimestamp = time.Now() - const ( - fieldInterface = iota - fieldReceiveBytes - fieldReceivePackets - fieldReceiveErrs - fieldReceiveDrop - fieldReceiveFifo - fieldReceiveFrame - fieldReceiveCompressed - fieldReceiveMulticast - fieldTransmitBytes - fieldTransmitPackets - fieldTransmitErrs - fieldTransmitDrop - fieldTransmitFifo - fieldTransmitColls - fieldTransmitCarrier - fieldTransmitCompressed - ) - - m.matches = make(map[string][]NetstatCollectorMetric) - - // Set default configuration, + // Set default configuration m.config.SendAbsoluteValues = true m.config.SendDerivedValues = false - // Read configuration file, allow overwriting default config + if len(config) > 0 { - err := json.Unmarshal(config, &m.config) - if err != nil { + if err := json.Unmarshal(config, &m.config); err != nil { cclog.ComponentError(m.name, "Error reading config:", err.Error()) return err } } - // Check access to net statistic file + m.buildAliasMapping() + + // Open /proc/net/dev and read interfaces. file, err := os.Open(NETSTATFILE) if err != nil { cclog.ComponentError(m.name, err.Error()) @@ -85,30 +102,38 @@ func (m *NetstatCollector) Init(config json.RawMessage) error { } defer file.Close() + m.matches = make(map[string][]NetstatCollectorMetric) scanner := bufio.NewScanner(file) + // Regex to split interface name and rest of line. + reInterface := regexp.MustCompile(`^([^:]+):\s*(.*)$`) + // Field positions based on /proc/net/dev format. + const ( + fieldReceiveBytes = iota + 1 + fieldReceivePackets + fieldTransmitBytes = 9 + fieldTransmitPackets + ) for scanner.Scan() { - l := scanner.Text() - - // Skip lines with no net device entry - if !strings.Contains(l, ":") { + line := scanner.Text() + // Skip header lines. + if !strings.Contains(line, ":") { continue } - - // Split line into fields - f := strings.Fields(l) - - // Get net device entry - dev := strings.Trim(f[0], ": ") - - // Check if device is a included device - if _, ok := stringArrayContains(m.config.IncludeDevices, dev); ok { - tags := map[string]string{"stype": "network", "stype-id": dev, "type": "node"} + matches := reInterface.FindStringSubmatch(line) + if len(matches) != 3 { + continue + } + raw := strings.TrimSpace(matches[1]) + canonical := getCanonicalName(raw, m.aliasToCanonical) + // Check if device is included. + if _, ok := stringArrayContains(m.config.IncludeDevices, canonical); ok { + tags := map[string]string{"stype": "network", "stype-id": raw, "type": "node"} meta_unit_byte := map[string]string{"source": m.name, "group": "Network", "unit": "bytes"} meta_unit_byte_per_sec := map[string]string{"source": m.name, "group": "Network", "unit": "bytes/sec"} meta_unit_pkts := map[string]string{"source": m.name, "group": "Network", "unit": "packets"} meta_unit_pkts_per_sec := map[string]string{"source": m.name, "group": "Network", "unit": "packets/sec"} - m.matches[dev] = []NetstatCollectorMetric{ + m.matches[canonical] = []NetstatCollectorMetric{ { name: "net_bytes_in", index: fieldReceiveBytes, @@ -143,11 +168,10 @@ func (m *NetstatCollector) Init(config json.RawMessage) error { }, } } - } if len(m.matches) == 0 { - return errors.New("no devices to collector metrics found") + return errors.New("no devices to collect metrics found") } m.init = true return nil @@ -157,14 +181,11 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMessage if !m.init { return } - // Current time stamp now := time.Now() - // time difference to last time stamp timeDiff := now.Sub(m.lastTimestamp).Seconds() - // Save current timestamp m.lastTimestamp = now - file, err := os.Open(string(NETSTATFILE)) + file, err := os.Open(NETSTATFILE) if err != nil { cclog.ComponentError(m.name, err.Error()) return @@ -173,43 +194,39 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMessage scanner := bufio.NewScanner(file) for scanner.Scan() { - l := scanner.Text() - - // Skip lines with no net device entry - if !strings.Contains(l, ":") { + line := scanner.Text() + if !strings.Contains(line, ":") { continue } - - // Split line into fields - f := strings.Fields(l) - - // Get net device entry - dev := strings.Trim(f[0], ":") - - // Check if device is a included device - if devmetrics, ok := m.matches[dev]; ok { + reInterface := regexp.MustCompile(`^([^:]+):\s*(.*)$`) + matches := reInterface.FindStringSubmatch(line) + if len(matches) != 3 { + continue + } + raw := strings.TrimSpace(matches[1]) + canonical := getCanonicalName(raw, m.aliasToCanonical) + if devmetrics, ok := m.matches[canonical]; ok { + fields := strings.Fields(matches[2]) for i := range devmetrics { metric := &devmetrics[i] - - // Read value - v, err := strconv.ParseInt(f[metric.index], 10, 64) + v, err := strconv.ParseInt(fields[metric.index-1], 10, 64) if err != nil { continue } - if m.config.SendAbsoluteValues { + // Send absolute metric if enabled. + if m.config.SendAbsoluteValues && m.shouldOutput(metric.name) { if y, err := lp.NewMessage(metric.name, metric.tags, metric.meta, map[string]interface{}{"value": v}, now); err == nil { output <- y } } - if m.config.SendDerivedValues { - if metric.lastValue >= 0 { - rate := float64(v-metric.lastValue) / timeDiff - if y, err := lp.NewMessage(metric.name+"_bw", metric.tags, metric.meta_rates, map[string]interface{}{"value": rate}, now); err == nil { - output <- y - } + // Send derived metric if enabled. + if m.config.SendDerivedValues && metric.lastValue >= 0 && m.shouldOutput(metric.name+"_bw") { + rate := float64(v-metric.lastValue) / timeDiff + if y, err := lp.NewMessage(metric.name+"_bw", metric.tags, metric.meta_rates, map[string]interface{}{"value": rate}, now); err == nil { + output <- y } - metric.lastValue = v } + metric.lastValue = v } } } diff --git a/collectors/netstatMetric.md b/collectors/netstatMetric.md index 7ce0af9..520d865 100644 --- a/collectors/netstatMetric.md +++ b/collectors/netstatMetric.md @@ -1,27 +1,42 @@ - ## `netstat` collector ```json "netstat": { "include_devices": [ - "eth0" + "eth0", + "eno1" ], - "send_abs_values" : true, - "send_derived_values" : true + "send_abs_values": true, + "send_derived_values": true, + "interface_aliases": { + "eno1": ["eno1np0", "eno1_alt"], + "eth0": ["eth0_alias"] + }, + "exclude_metrics": [ + "net_pkts_in" + ], + "only_metrics": [ + "net_bytes_in_bw" + ] } ``` -The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. With the `include_devices` list you can specify which network devices should be measured. **Note**: Most other collectors use an _exclude_ list instead of an include list. +The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. With the `include_devices` list you can specify which network interfaces should be measured. Optionally, you can define an `interface_aliases` mapping. For each canonical device (as listed in include_devices), you may provide an array of aliases that may be reported by the system. When an alias is detected, it is mapped to the canonical name, while the output tag `stype-id` always shows the actual system-reported name. -Metrics: -* `net_bytes_in` (`unit=bytes`) -* `net_bytes_out` (`unit=bytes`) -* `net_pkts_in` (`unit=packets`) -* `net_pkts_out` (`unit=packets`) -* `net_bytes_in_bw` (`unit=bytes/sec` if `send_derived_values == true`) -* `net_bytes_out_bw` (`unit=bytes/sec` if `send_derived_values == true`) -* `net_pkts_in_bw` (`unit=packets/sec` if `send_derived_values == true`) -* `net_pkts_out_bw` (`unit=packets/sec` if `send_derived_values == true`) +Both filtering mechanisms are supported: +- `exclude_metrics`: Excludes the specified metrics. +- `only_metrics`: If provided, only the listed metrics are collected. This takes precedence over `exclude_metrics`. + +**Absolute Metrics:** +- `net_bytes_in` (unit: `bytes`) +- `net_bytes_out` (unit: `bytes`) +- `net_pkts_in` (unit: `packets`) +- `net_pkts_out` (unit: `packets`) + +**Derived Metrics:** +- `net_bytes_in_bw` (unit: `bytes/sec`) +- `net_bytes_out_bw` (unit: `bytes/sec`) +- `net_pkts_in_bw` (unit: `packets/sec`) +- `net_pkts_out_bw` (unit: `packets/sec`) The device name is added as tag `stype=network,stype-id=`. -