From cb1d6b91e528d7f4ddc45da74bb273d97c0ca0d0 Mon Sep 17 00:00:00 2001
From: Thomas Roehl <Thomas.Roehl@googlemail.com>
Date: Fri, 18 Feb 2022 14:18:18 +0100
Subject: [PATCH] Split diskstats (free, total space) and iostats (reads,
 writes, ...

---
 collectors/README.md         |   1 +
 collectors/diskstatMetric.go | 127 ++++++++++++++--------------
 collectors/diskstatMetric.md |  29 ++-----
 collectors/iostatMetric.go   | 155 +++++++++++++++++++++++++++++++++++
 collectors/iostatMetric.md   |  34 ++++++++
 5 files changed, 261 insertions(+), 85 deletions(-)
 create mode 100644 collectors/iostatMetric.go
 create mode 100644 collectors/iostatMetric.md

diff --git a/collectors/README.md b/collectors/README.md
index 393b200..00e0da7 100644
--- a/collectors/README.md
+++ b/collectors/README.md
@@ -18,6 +18,7 @@ In contrast to the configuration files for sinks and receivers, the collectors c
 
 * [`cpustat`](./cpustatMetric.md)
 * [`memstat`](./memstatMetric.md)
+* [`iostat`](./iostatMetric.md)
 * [`diskstat`](./diskstatMetric.md)
 * [`loadavg`](./loadavgMetric.md)
 * [`netstat`](./netstatMetric.md)
diff --git a/collectors/diskstatMetric.go b/collectors/diskstatMetric.go
index 50c41cd..819a1ab 100644
--- a/collectors/diskstatMetric.go
+++ b/collectors/diskstatMetric.go
@@ -1,18 +1,21 @@
 package collectors
 
 import (
-	"io/ioutil"
-	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
-	//	"log"
+	"bufio"
 	"encoding/json"
-	"errors"
-	"strconv"
+	"fmt"
+	"os"
 	"strings"
+	"syscall"
 	"time"
+
+	cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
+	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 )
 
-const DISKSTATFILE = `/proc/diskstats`
-const DISKSTAT_SYSFSPATH = `/sys/block`
+//	"log"
+
+const MOUNTFILE = `/proc/self/mounts`
 
 type DiskstatCollectorConfig struct {
 	ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
@@ -20,93 +23,89 @@ type DiskstatCollectorConfig struct {
 
 type DiskstatCollector struct {
 	metricCollector
-	matches map[int]string
-	config  DiskstatCollectorConfig
+	//matches map[string]int
+	config IOstatCollectorConfig
+	//devices map[string]IOstatCollectorEntry
 }
 
 func (m *DiskstatCollector) Init(config json.RawMessage) error {
-	var err error
 	m.name = "DiskstatCollector"
 	m.meta = map[string]string{"source": m.name, "group": "Disk"}
 	m.setup()
 	if len(config) > 0 {
-		err = json.Unmarshal(config, &m.config)
+		err := json.Unmarshal(config, &m.config)
 		if err != nil {
 			return err
 		}
 	}
-	// https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
-	matches := map[int]string{
-		3:  "reads",
-		4:  "reads_merged",
-		5:  "read_sectors",
-		6:  "read_ms",
-		7:  "writes",
-		8:  "writes_merged",
-		9:  "writes_sectors",
-		10: "writes_ms",
-		11: "ioops",
-		12: "ioops_ms",
-		13: "ioops_weighted_ms",
-		14: "discards",
-		15: "discards_merged",
-		16: "discards_sectors",
-		17: "discards_ms",
-		18: "flushes",
-		19: "flushes_ms",
+	file, err := os.Open(string(MOUNTFILE))
+	if err != nil {
+		cclog.ComponentError(m.name, err.Error())
+		return err
 	}
-	m.matches = make(map[int]string)
-	for k, v := range matches {
-		_, skip := stringArrayContains(m.config.ExcludeMetrics, v)
-		if !skip {
-			m.matches[k] = v
-		}
-	}
-	if len(m.matches) == 0 {
-		return errors.New("No metrics to collect")
-	}
-	_, err = ioutil.ReadFile(string(DISKSTATFILE))
-	if err == nil {
-		m.init = true
-	}
-	return err
+	defer file.Close()
+	m.init = true
+	return nil
 }
 
 func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
-	var lines []string
 	if !m.init {
 		return
 	}
 
-	buffer, err := ioutil.ReadFile(string(DISKSTATFILE))
+	file, err := os.Open(string(MOUNTFILE))
 	if err != nil {
+		cclog.ComponentError(m.name, err.Error())
 		return
 	}
-	lines = strings.Split(string(buffer), "\n")
+	defer file.Close()
 
-	for _, line := range lines {
+	part_max_used := uint64(0)
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := scanner.Text()
 		if len(line) == 0 {
 			continue
 		}
-		f := strings.Fields(line)
-		if strings.Contains(f[2], "loop") {
+		if !strings.HasPrefix(line, "/dev") {
 			continue
 		}
-		tags := map[string]string{
-			"device": f[2],
-			"type":   "node",
+		linefields := strings.Fields(line)
+		if strings.Contains(linefields[0], "loop") {
+			continue
 		}
-		for idx, name := range m.matches {
-			if idx < len(f) {
-				x, err := strconv.ParseInt(f[idx], 0, 64)
-				if err == nil {
-					y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": int(x)}, time.Now())
-					if err == nil {
-						output <- y
-					}
-				}
-			}
+		if strings.Contains(linefields[1], "boot") {
+			continue
 		}
+		path := strings.Replace(linefields[1], `\040`, " ", -1)
+		stat := syscall.Statfs_t{}
+		err := syscall.Statfs(path, &stat)
+		if err != nil {
+			fmt.Println(err.Error())
+			return
+		}
+		tags := map[string]string{"type": "node", "device": linefields[0]}
+		total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000000000)
+		y, err := lp.New("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now())
+		if err == nil {
+			y.AddMeta("unit", "GBytes")
+			output <- y
+		}
+		free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000000000)
+		y, err = lp.New("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now())
+		if err == nil {
+			y.AddMeta("unit", "GBytes")
+			output <- y
+		}
+		perc := (100 * (total - free)) / total
+		if perc > part_max_used {
+			part_max_used = perc
+		}
+	}
+	y, err := lp.New("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": part_max_used}, time.Now())
+	if err == nil {
+		y.AddMeta("unit", "percent")
+		output <- y
 	}
 }
 
diff --git a/collectors/diskstatMetric.md b/collectors/diskstatMetric.md
index 1ac341d..a38f154 100644
--- a/collectors/diskstatMetric.md
+++ b/collectors/diskstatMetric.md
@@ -4,31 +4,18 @@
 ```json
   "diskstat": {
     "exclude_metrics": [
-      "read_ms"
+      "disk_total"
     ],
   }
 ```
 
-The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
+The `diskstat` collector reads data from `/proc/self/mounts` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
 
-Metrics:
-* `reads`
-* `reads_merged`
-* `read_sectors`
-* `read_ms`
-* `writes`
-* `writes_merged`
-* `writes_sectors`
-* `writes_ms`
-* `ioops`
-* `ioops_ms`
-* `ioops_weighted_ms`
-* `discards`
-* `discards_merged`
-* `discards_sectors`
-* `discards_ms`
-* `flushes`
-* `flushes_ms`
+Metrics per device (with `device` tag):
+* `disk_total` (unit `GBytes`)
+* `disk_free` (unit `GBytes`)
+
+Global metrics:
+* `part_max_used` (unit `percent`)
 
-The device name is added as tag `device`.
 
diff --git a/collectors/iostatMetric.go b/collectors/iostatMetric.go
new file mode 100644
index 0000000..ca7f33c
--- /dev/null
+++ b/collectors/iostatMetric.go
@@ -0,0 +1,155 @@
+package collectors
+
+import (
+	"bufio"
+	"os"
+
+	cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
+	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
+
+	//	"log"
+	"encoding/json"
+	"errors"
+	"strconv"
+	"strings"
+	"time"
+)
+
+const IOSTATFILE = `/proc/diskstats`
+const IOSTAT_SYSFSPATH = `/sys/block`
+
+type IOstatCollectorConfig struct {
+	ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
+}
+
+type IOstatCollectorEntry struct {
+	lastValues map[string]int64
+	tags       map[string]string
+}
+
+type IOstatCollector struct {
+	metricCollector
+	matches map[string]int
+	config  IOstatCollectorConfig
+	devices map[string]IOstatCollectorEntry
+}
+
+func (m *IOstatCollector) Init(config json.RawMessage) error {
+	var err error
+	m.name = "IOstatCollector"
+	m.meta = map[string]string{"source": m.name, "group": "Disk"}
+	m.setup()
+	if len(config) > 0 {
+		err = json.Unmarshal(config, &m.config)
+		if err != nil {
+			return err
+		}
+	}
+	// https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
+	matches := map[string]int{
+		"io_reads":             3,
+		"io_reads_merged":      4,
+		"io_read_sectors":      5,
+		"io_read_ms":           6,
+		"io_writes":            7,
+		"io_writes_merged":     8,
+		"io_writes_sectors":    9,
+		"io_writes_ms":         10,
+		"io_ioops":             11,
+		"io_ioops_ms":          12,
+		"io_ioops_weighted_ms": 13,
+		"io_discards":          14,
+		"io_discards_merged":   15,
+		"io_discards_sectors":  16,
+		"io_discards_ms":       17,
+		"io_flushes":           18,
+		"io_flushes_ms":        19,
+	}
+	m.devices = make(map[string]IOstatCollectorEntry)
+	m.matches = make(map[string]int)
+	for k, v := range matches {
+		if _, skip := stringArrayContains(m.config.ExcludeMetrics, k); !skip {
+			m.matches[k] = v
+		}
+	}
+	if len(m.matches) == 0 {
+		return errors.New("no metrics to collect")
+	}
+	file, err := os.Open(string(IOSTATFILE))
+	if err != nil {
+		cclog.ComponentError(m.name, err.Error())
+		return err
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := scanner.Text()
+		linefields := strings.Fields(line)
+		device := linefields[2]
+		if strings.Contains(device, "loop") {
+			continue
+		}
+		values := make(map[string]int64)
+		for m := range m.matches {
+			values[m] = 0
+		}
+		m.devices[device] = IOstatCollectorEntry{
+			tags: map[string]string{
+				"device": linefields[2],
+				"type":   "node",
+			},
+			lastValues: values,
+		}
+	}
+	m.init = true
+	return err
+}
+
+func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
+	if !m.init {
+		return
+	}
+
+	file, err := os.Open(string(IOSTATFILE))
+	if err != nil {
+		cclog.ComponentError(m.name, err.Error())
+		return
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if len(line) == 0 {
+			continue
+		}
+		linefields := strings.Fields(line)
+		device := linefields[2]
+		if strings.Contains(device, "loop") {
+			continue
+		}
+		if _, ok := m.devices[device]; !ok {
+			continue
+		}
+		entry := m.devices[device]
+		for name, idx := range m.matches {
+			if idx < len(linefields) {
+				x, err := strconv.ParseInt(linefields[idx], 0, 64)
+				if err == nil {
+					diff := x - entry.lastValues[name]
+					y, err := lp.New(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
+					if err == nil {
+						output <- y
+					}
+				}
+				entry.lastValues[name] = x
+			}
+		}
+		m.devices[device] = entry
+	}
+}
+
+func (m *IOstatCollector) Close() {
+	m.init = false
+}
diff --git a/collectors/iostatMetric.md b/collectors/iostatMetric.md
new file mode 100644
index 0000000..e3e8604
--- /dev/null
+++ b/collectors/iostatMetric.md
@@ -0,0 +1,34 @@
+
+## `iostat` collector
+
+```json
+  "iostat": {
+    "exclude_metrics": [
+      "read_ms"
+    ],
+  }
+```
+
+The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
+
+Metrics:
+* `io_reads`
+* `io_reads_merged`
+* `io_read_sectors`
+* `io_read_ms`
+* `io_writes`
+* `io_writes_merged`
+* `io_writes_sectors`
+* `io_writes_ms`
+* `io_ioops`
+* `io_ioops_ms`
+* `io_ioops_weighted_ms`
+* `io_discards`
+* `io_discards_merged`
+* `io_discards_sectors`
+* `io_discards_ms`
+* `io_flushes`
+* `io_flushes_ms`
+
+The device name is added as tag `device`. For more details, see https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
+