From 435528fa97ae5d2c15607a3c3efd38ad48ca0ba1 Mon Sep 17 00:00:00 2001 From: Thomas Gruber Date: Mon, 21 Feb 2022 12:44:26 +0100 Subject: [PATCH] Split diskstat Collector (#38) * Split diskstats (free, total space) and iostats (reads, writes, ... * Add iostat Collector to CollectorManager --- collectors/README.md | 1 + collectors/collectorManager.go | 1 + collectors/diskstatMetric.go | 127 ++++++++++++++------------- collectors/diskstatMetric.md | 29 ++---- collectors/iostatMetric.go | 155 +++++++++++++++++++++++++++++++++ collectors/iostatMetric.md | 34 ++++++++ 6 files changed, 262 insertions(+), 85 deletions(-) create mode 100644 collectors/iostatMetric.go create mode 100644 collectors/iostatMetric.md diff --git a/collectors/README.md b/collectors/README.md index 393b200..00e0da7 100644 --- a/collectors/README.md +++ b/collectors/README.md @@ -18,6 +18,7 @@ In contrast to the configuration files for sinks and receivers, the collectors c * [`cpustat`](./cpustatMetric.md) * [`memstat`](./memstatMetric.md) +* [`iostat`](./iostatMetric.md) * [`diskstat`](./diskstatMetric.md) * [`loadavg`](./loadavgMetric.md) * [`netstat`](./netstatMetric.md) diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 7918793..86b423e 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -25,6 +25,7 @@ var AvailableCollectors = map[string]MetricCollector{ "topprocs": new(TopProcsCollector), "nvidia": new(NvidiaCollector), "customcmd": new(CustomCmdCollector), + "iostat": new(IOstatCollector), "diskstat": new(DiskstatCollector), "tempstat": new(TempCollector), "ipmistat": new(IpmiCollector), diff --git a/collectors/diskstatMetric.go b/collectors/diskstatMetric.go index 50c41cd..819a1ab 100644 --- a/collectors/diskstatMetric.go +++ b/collectors/diskstatMetric.go @@ -1,18 +1,21 @@ package collectors import ( - "io/ioutil" - lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" - // "log" + "bufio" "encoding/json" - "errors" - "strconv" + "fmt" + "os" "strings" + "syscall" "time" + + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) -const DISKSTATFILE = `/proc/diskstats` -const DISKSTAT_SYSFSPATH = `/sys/block` +// "log" + +const MOUNTFILE = `/proc/self/mounts` type DiskstatCollectorConfig struct { ExcludeMetrics []string `json:"exclude_metrics,omitempty"` @@ -20,93 +23,89 @@ type DiskstatCollectorConfig struct { type DiskstatCollector struct { metricCollector - matches map[int]string - config DiskstatCollectorConfig + //matches map[string]int + config IOstatCollectorConfig + //devices map[string]IOstatCollectorEntry } func (m *DiskstatCollector) Init(config json.RawMessage) error { - var err error m.name = "DiskstatCollector" m.meta = map[string]string{"source": m.name, "group": "Disk"} m.setup() if len(config) > 0 { - err = json.Unmarshal(config, &m.config) + err := json.Unmarshal(config, &m.config) if err != nil { return err } } - // https://www.kernel.org/doc/html/latest/admin-guide/iostats.html - matches := map[int]string{ - 3: "reads", - 4: "reads_merged", - 5: "read_sectors", - 6: "read_ms", - 7: "writes", - 8: "writes_merged", - 9: "writes_sectors", - 10: "writes_ms", - 11: "ioops", - 12: "ioops_ms", - 13: "ioops_weighted_ms", - 14: "discards", - 15: "discards_merged", - 16: "discards_sectors", - 17: "discards_ms", - 18: "flushes", - 19: "flushes_ms", + file, err := os.Open(string(MOUNTFILE)) + if err != nil { + cclog.ComponentError(m.name, err.Error()) + return err } - m.matches = make(map[int]string) - for k, v := range matches { - _, skip := stringArrayContains(m.config.ExcludeMetrics, v) - if !skip { - m.matches[k] = v - } - } - if len(m.matches) == 0 { - return errors.New("No metrics to collect") - } - _, err = ioutil.ReadFile(string(DISKSTATFILE)) - if err == nil { - m.init = true - } - return err + defer file.Close() + m.init = true + return nil } func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric) { - var lines []string if !m.init { return } - buffer, err := ioutil.ReadFile(string(DISKSTATFILE)) + file, err := os.Open(string(MOUNTFILE)) if err != nil { + cclog.ComponentError(m.name, err.Error()) return } - lines = strings.Split(string(buffer), "\n") + defer file.Close() - for _, line := range lines { + part_max_used := uint64(0) + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() if len(line) == 0 { continue } - f := strings.Fields(line) - if strings.Contains(f[2], "loop") { + if !strings.HasPrefix(line, "/dev") { continue } - tags := map[string]string{ - "device": f[2], - "type": "node", + linefields := strings.Fields(line) + if strings.Contains(linefields[0], "loop") { + continue } - for idx, name := range m.matches { - if idx < len(f) { - x, err := strconv.ParseInt(f[idx], 0, 64) - if err == nil { - y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": int(x)}, time.Now()) - if err == nil { - output <- y - } - } - } + if strings.Contains(linefields[1], "boot") { + continue } + path := strings.Replace(linefields[1], `\040`, " ", -1) + stat := syscall.Statfs_t{} + err := syscall.Statfs(path, &stat) + if err != nil { + fmt.Println(err.Error()) + return + } + tags := map[string]string{"type": "node", "device": linefields[0]} + total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000000000) + y, err := lp.New("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now()) + if err == nil { + y.AddMeta("unit", "GBytes") + output <- y + } + free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000000000) + y, err = lp.New("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now()) + if err == nil { + y.AddMeta("unit", "GBytes") + output <- y + } + perc := (100 * (total - free)) / total + if perc > part_max_used { + part_max_used = perc + } + } + y, err := lp.New("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": part_max_used}, time.Now()) + if err == nil { + y.AddMeta("unit", "percent") + output <- y } } diff --git a/collectors/diskstatMetric.md b/collectors/diskstatMetric.md index 1ac341d..a38f154 100644 --- a/collectors/diskstatMetric.md +++ b/collectors/diskstatMetric.md @@ -4,31 +4,18 @@ ```json "diskstat": { "exclude_metrics": [ - "read_ms" + "disk_total" ], } ``` -The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. +The `diskstat` collector reads data from `/proc/self/mounts` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. -Metrics: -* `reads` -* `reads_merged` -* `read_sectors` -* `read_ms` -* `writes` -* `writes_merged` -* `writes_sectors` -* `writes_ms` -* `ioops` -* `ioops_ms` -* `ioops_weighted_ms` -* `discards` -* `discards_merged` -* `discards_sectors` -* `discards_ms` -* `flushes` -* `flushes_ms` +Metrics per device (with `device` tag): +* `disk_total` (unit `GBytes`) +* `disk_free` (unit `GBytes`) + +Global metrics: +* `part_max_used` (unit `percent`) -The device name is added as tag `device`. diff --git a/collectors/iostatMetric.go b/collectors/iostatMetric.go new file mode 100644 index 0000000..ca7f33c --- /dev/null +++ b/collectors/iostatMetric.go @@ -0,0 +1,155 @@ +package collectors + +import ( + "bufio" + "os" + + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + + // "log" + "encoding/json" + "errors" + "strconv" + "strings" + "time" +) + +const IOSTATFILE = `/proc/diskstats` +const IOSTAT_SYSFSPATH = `/sys/block` + +type IOstatCollectorConfig struct { + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` +} + +type IOstatCollectorEntry struct { + lastValues map[string]int64 + tags map[string]string +} + +type IOstatCollector struct { + metricCollector + matches map[string]int + config IOstatCollectorConfig + devices map[string]IOstatCollectorEntry +} + +func (m *IOstatCollector) Init(config json.RawMessage) error { + var err error + m.name = "IOstatCollector" + m.meta = map[string]string{"source": m.name, "group": "Disk"} + m.setup() + if len(config) > 0 { + err = json.Unmarshal(config, &m.config) + if err != nil { + return err + } + } + // https://www.kernel.org/doc/html/latest/admin-guide/iostats.html + matches := map[string]int{ + "io_reads": 3, + "io_reads_merged": 4, + "io_read_sectors": 5, + "io_read_ms": 6, + "io_writes": 7, + "io_writes_merged": 8, + "io_writes_sectors": 9, + "io_writes_ms": 10, + "io_ioops": 11, + "io_ioops_ms": 12, + "io_ioops_weighted_ms": 13, + "io_discards": 14, + "io_discards_merged": 15, + "io_discards_sectors": 16, + "io_discards_ms": 17, + "io_flushes": 18, + "io_flushes_ms": 19, + } + m.devices = make(map[string]IOstatCollectorEntry) + m.matches = make(map[string]int) + for k, v := range matches { + if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip { + m.matches[k] = v + } + } + if len(m.matches) == 0 { + return errors.New("no metrics to collect") + } + file, err := os.Open(string(IOSTATFILE)) + if err != nil { + cclog.ComponentError(m.name, err.Error()) + return err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + linefields := strings.Fields(line) + device := linefields[2] + if strings.Contains(device, "loop") { + continue + } + values := make(map[string]int64) + for m := range m.matches { + values[m] = 0 + } + m.devices[device] = IOstatCollectorEntry{ + tags: map[string]string{ + "device": linefields[2], + "type": "node", + }, + lastValues: values, + } + } + m.init = true + return err +} + +func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMetric) { + if !m.init { + return + } + + file, err := os.Open(string(IOSTATFILE)) + if err != nil { + cclog.ComponentError(m.name, err.Error()) + return + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + linefields := strings.Fields(line) + device := linefields[2] + if strings.Contains(device, "loop") { + continue + } + if _, ok := m.devices[device]; !ok { + continue + } + entry := m.devices[device] + for name, idx := range m.matches { + if idx < len(linefields) { + x, err := strconv.ParseInt(linefields[idx], 0, 64) + if err == nil { + diff := x - entry.lastValues[name] + y, err := lp.New(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now()) + if err == nil { + output <- y + } + } + entry.lastValues[name] = x + } + } + m.devices[device] = entry + } +} + +func (m *IOstatCollector) Close() { + m.init = false +} diff --git a/collectors/iostatMetric.md b/collectors/iostatMetric.md new file mode 100644 index 0000000..e3e8604 --- /dev/null +++ b/collectors/iostatMetric.md @@ -0,0 +1,34 @@ + +## `iostat` collector + +```json + "iostat": { + "exclude_metrics": [ + "read_ms" + ], + } +``` + +The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. + +Metrics: +* `io_reads` +* `io_reads_merged` +* `io_read_sectors` +* `io_read_ms` +* `io_writes` +* `io_writes_merged` +* `io_writes_sectors` +* `io_writes_ms` +* `io_ioops` +* `io_ioops_ms` +* `io_ioops_weighted_ms` +* `io_discards` +* `io_discards_merged` +* `io_discards_sectors` +* `io_discards_ms` +* `io_flushes` +* `io_flushes_ms` + +The device name is added as tag `device`. For more details, see https://www.kernel.org/doc/html/latest/admin-guide/iostats.html +