From 5a26c37e0db912e8f43cbaae746c9fb368752437 Mon Sep 17 00:00:00 2001 From: brinkcoder Date: Wed, 5 Mar 2025 00:59:44 +0100 Subject: [PATCH] add only_metrics and exclude_mounts --- collectors/diskstatMetric.go | 87 ++++++++++++++++++++++-------------- collectors/diskstatMetric.md | 24 ++++++---- 2 files changed, 70 insertions(+), 41 deletions(-) diff --git a/collectors/diskstatMetric.go b/collectors/diskstatMetric.go index 0298362..6466797 100644 --- a/collectors/diskstatMetric.go +++ b/collectors/diskstatMetric.go @@ -8,23 +8,38 @@ import ( "syscall" "time" + lp "github.com/ClusterCockpit/cc-lib/ccMessage" cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger" - lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message" ) -// "log" - const MOUNTFILE = `/proc/self/mounts` type DiskstatCollectorConfig struct { ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + OnlyMetrics []string `json:"only_metrics,omitempty"` + ExcludeMounts []string `json:"exclude_mounts,omitempty"` } type DiskstatCollector struct { metricCollector - //matches map[string]int - config IOstatCollectorConfig - //devices map[string]IOstatCollectorEntry + config DiskstatCollectorConfig +} + +func (m *DiskstatCollector) shouldOutput(metricName string) bool { + if len(m.config.OnlyMetrics) > 0 { + for _, n := range m.config.OnlyMetrics { + if n == metricName { + return true + } + } + return false + } + for _, n := range m.config.ExcludeMetrics { + if n == metricName { + return false + } + } + return true } func (m *DiskstatCollector) Init(config json.RawMessage) error { @@ -33,12 +48,11 @@ func (m *DiskstatCollector) Init(config json.RawMessage) error { m.meta = map[string]string{"source": m.name, "group": "Disk"} m.setup() if len(config) > 0 { - err := json.Unmarshal(config, &m.config) - if err != nil { + if err := json.Unmarshal(config, &m.config); err != nil { return err } } - file, err := os.Open(string(MOUNTFILE)) + file, err := os.Open(MOUNTFILE) if err != nil { cclog.ComponentError(m.name, err.Error()) return err @@ -53,7 +67,7 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMessag return } - file, err := os.Open(string(MOUNTFILE)) + file, err := os.Open(MOUNTFILE) if err != nil { cclog.ComponentError(m.name, err.Error()) return @@ -62,6 +76,7 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMessag part_max_used := uint64(0) scanner := bufio.NewScanner(file) +mountLoop: for scanner.Scan() { line := scanner.Text() if len(line) == 0 { @@ -71,37 +86,41 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMessag continue } linefields := strings.Fields(line) - if strings.Contains(linefields[0], "loop") { + if strings.Contains(linefields[0], "loop") || strings.Contains(linefields[1], "boot") { continue } - if strings.Contains(linefields[1], "boot") { - continue + + mountPath := strings.Replace(linefields[1], `\040`, " ", -1) + for _, excl := range m.config.ExcludeMounts { + if strings.Contains(mountPath, excl) { + continue mountLoop + } } - path := strings.Replace(linefields[1], `\040`, " ", -1) - stat := syscall.Statfs_t{ - Blocks: 0, - Bsize: 0, - Bfree: 0, - } - err := syscall.Statfs(path, &stat) - if err != nil { + + stat := syscall.Statfs_t{} + if err := syscall.Statfs(mountPath, &stat); err != nil { continue } if stat.Blocks == 0 || stat.Bsize == 0 { continue } + tags := map[string]string{"type": "node", "device": linefields[0]} total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000000000) - y, err := lp.NewMessage("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now()) - if err == nil { - y.AddMeta("unit", "GBytes") - output <- y + if m.shouldOutput("disk_total") { + y, err := lp.NewMessage("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now()) + if err == nil { + y.AddMeta("unit", "GBytes") + output <- y + } } free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000000000) - y, err = lp.NewMessage("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now()) - if err == nil { - y.AddMeta("unit", "GBytes") - output <- y + if m.shouldOutput("disk_free") { + y, err := lp.NewMessage("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now()) + if err == nil { + y.AddMeta("unit", "GBytes") + output <- y + } } if total > 0 { perc := (100 * (total - free)) / total @@ -110,10 +129,12 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMessag } } } - y, err := lp.NewMessage("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": int(part_max_used)}, time.Now()) - if err == nil { - y.AddMeta("unit", "percent") - output <- y + if m.shouldOutput("part_max_used") { + y, err := lp.NewMessage("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": int(part_max_used)}, time.Now()) + if err == nil { + y.AddMeta("unit", "percent") + output <- y + } } } diff --git a/collectors/diskstatMetric.md b/collectors/diskstatMetric.md index a38f154..83228d2 100644 --- a/collectors/diskstatMetric.md +++ b/collectors/diskstatMetric.md @@ -1,21 +1,29 @@ - ## `diskstat` collector ```json "diskstat": { "exclude_metrics": [ - "disk_total" + "part_max_used" ], + "only_metrics": [ + "disk_free", + ], + "exclude_mounts": [ + "slurm-tmpfs" + ] } ``` -The `diskstat` collector reads data from `/proc/self/mounts` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. +The `diskstat` collector reads data from `/proc/self/mounts` and outputs a handful **node** metrics. +Any mount point containing one of the strings specified in `exclude_mounts` will be skipped during metric collection. + +Both filtering mechanisms are supported: +- `exclude_metrics`: Excludes the specified metrics. +- `only_metrics`: If provided, only the listed metrics are collected. This takes precedence over `exclude_metrics`. Metrics per device (with `device` tag): -* `disk_total` (unit `GBytes`) -* `disk_free` (unit `GBytes`) +- `disk_total` (unit `GBytes`) +- `disk_free` (unit `GBytes`) Global metrics: -* `part_max_used` (unit `percent`) - - +- `part_max_used` (unit `percent`)