From e00b7af5ee2e2d69d8bbb2a1fbfabfc03a2cd042 Mon Sep 17 00:00:00 2001
From: Thomas Roehl <Thomas.Roehl@googlemail.com>
Date: Tue, 15 Mar 2022 14:44:21 +0100
Subject: [PATCH] Update LustreCollector

---
 collectors/lustreMetric.go | 373 +++++++++++++++++++++++++++----------
 collectors/lustreMetric.md |  34 ++--
 2 files changed, 301 insertions(+), 106 deletions(-)

diff --git a/collectors/lustreMetric.go b/collectors/lustreMetric.go
index 67efd7a..d5a96e4 100644
--- a/collectors/lustreMetric.go
+++ b/collectors/lustreMetric.go
@@ -19,23 +19,31 @@ const LCTL_CMD = `lctl`
 const LCTL_OPTION = `get_param`
 
 type LustreCollectorConfig struct {
-	LCtlCommand        string   `json:"lctl_command"`
-	ExcludeMetrics     []string `json:"exclude_metrics"`
-	SendAllMetrics     bool     `json:"send_all_metrics"`
-	Sudo               bool     `json:"use_sudo"`
-	SendAbsoluteValues bool     `json:"send_abs_values"`
-	SendDerivedValues  bool     `json:"send_derived_values"`
+	LCtlCommand        string   `json:"lctl_command,omitempty"`
+	ExcludeMetrics     []string `json:"exclude_metrics,omitempty"`
+	Sudo               bool     `json:"use_sudo,omitempty"`
+	SendAbsoluteValues bool     `json:"send_abs_values,omitempty"`
+	SendDerivedValues  bool     `json:"send_derived_values,omitempty"`
+	SendDiffValues     bool     `json:"send_diff_values,omitempty"`
+}
+
+type LustreMetricDefinition struct {
+	name       string
+	lineprefix string
+	lineoffset int
+	unit       string
+	calc       string
 }
 
 type LustreCollector struct {
 	metricCollector
 	tags          map[string]string
-	matches       map[string]map[string]int
-	stats         map[string]map[string]int64
 	config        LustreCollectorConfig
 	lctl          string
 	sudoCmd       string
-	lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
+	lastTimestamp time.Time                   // Store time stamp of last tick to derive bandwidths
+	definitions   []LustreMetricDefinition    // Combined list without excluded metrics
+	stats         map[string]map[string]int64 // Data for last value per device and metric
 }
 
 func (m *LustreCollector) getDeviceDataCommand(device string) []string {
@@ -78,6 +86,16 @@ func (m *LustreCollector) getDevices() []string {
 	return devices
 }
 
+func getMetricData(lines []string, prefix string, offset int) (int64, error) {
+	for _, line := range lines {
+		if strings.HasPrefix(line, prefix) {
+			lf := strings.Fields(line)
+			return strconv.ParseInt(lf[offset], 0, 64)
+		}
+	}
+	return 0, errors.New("no such line in data")
+}
+
 // //Version reading the stats data of a device from sysfs
 // func (m *LustreCollector) getDeviceDataSysfs(device string) []string {
 // 	llitedir := filepath.Join(LUSTRE_SYSFS, "llite")
@@ -90,6 +108,183 @@ func (m *LustreCollector) getDevices() []string {
 // 	return strings.Split(string(buffer), "\n")
 // }
 
+var LustreAbsMetrics = []LustreMetricDefinition{
+	{
+		name:       "lustre_read_requests",
+		lineprefix: "read_bytes",
+		lineoffset: 1,
+		unit:       "requests",
+		calc:       "none",
+	},
+	{
+		name:       "lustre_write_requests",
+		lineprefix: "write_bytes",
+		lineoffset: 1,
+		unit:       "requests",
+		calc:       "none",
+	},
+	{
+		name:       "lustre_read_bytes",
+		lineprefix: "read_bytes",
+		lineoffset: 6,
+		unit:       "bytes",
+		calc:       "none",
+	},
+	{
+		name:       "lustre_write_bytes",
+		lineprefix: "write_bytes",
+		lineoffset: 6,
+		unit:       "bytes",
+		calc:       "none",
+	},
+	{
+		name:       "lustre_open",
+		lineprefix: "open",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "none",
+	},
+	{
+		name:       "lustre_close",
+		lineprefix: "close",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "none",
+	},
+	{
+		name:       "lustre_setattr",
+		lineprefix: "setattr",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "none",
+	},
+	{
+		name:       "lustre_getattr",
+		lineprefix: "getattr",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "none",
+	},
+	{
+		name:       "lustre_statfs",
+		lineprefix: "statfs",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "none",
+	},
+	{
+		name:       "lustre_inode_permission",
+		lineprefix: "inode_permission",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "none",
+	},
+}
+
+var LustreDiffMetrics = []LustreMetricDefinition{
+	{
+		name:       "lustre_read_requests_diff",
+		lineprefix: "read_bytes",
+		lineoffset: 1,
+		unit:       "requests",
+		calc:       "difference",
+	},
+	{
+		name:       "lustre_write_requests_diff",
+		lineprefix: "write_bytes",
+		lineoffset: 1,
+		unit:       "requests",
+		calc:       "difference",
+	},
+	{
+		name:       "lustre_read_bytes_diff",
+		lineprefix: "read_bytes",
+		lineoffset: 6,
+		unit:       "bytes",
+		calc:       "difference",
+	},
+	{
+		name:       "lustre_write_bytes_diff",
+		lineprefix: "write_bytes",
+		lineoffset: 6,
+		unit:       "bytes",
+		calc:       "difference",
+	},
+	{
+		name:       "lustre_open_diff",
+		lineprefix: "open",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "difference",
+	},
+	{
+		name:       "lustre_close_diff",
+		lineprefix: "close",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "difference",
+	},
+	{
+		name:       "lustre_setattr_diff",
+		lineprefix: "setattr",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "difference",
+	},
+	{
+		name:       "lustre_getattr_diff",
+		lineprefix: "getattr",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "difference",
+	},
+	{
+		name:       "lustre_statfs_diff",
+		lineprefix: "statfs",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "difference",
+	},
+	{
+		name:       "lustre_inode_permission_diff",
+		lineprefix: "inode_permission",
+		lineoffset: 1,
+		unit:       "",
+		calc:       "difference",
+	},
+}
+
+var LustreDeriveMetrics = []LustreMetricDefinition{
+	{
+		name:       "lustre_read_requests_rate",
+		lineprefix: "read_bytes",
+		lineoffset: 1,
+		unit:       "requests/sec",
+		calc:       "derivative",
+	},
+	{
+		name:       "lustre_write_requests_rate",
+		lineprefix: "write_bytes",
+		lineoffset: 1,
+		unit:       "requests/sec",
+		calc:       "derivative",
+	},
+	{
+		name:       "lustre_read_bw",
+		lineprefix: "read_bytes",
+		lineoffset: 6,
+		unit:       "bytes/sec",
+		calc:       "derivative",
+	},
+	{
+		name:       "lustre_write_bw",
+		lineprefix: "write_bytes",
+		lineoffset: 6,
+		unit:       "bytes/sec",
+		calc:       "derivative",
+	},
+}
+
 func (m *LustreCollector) Init(config json.RawMessage) error {
 	var err error
 	m.name = "LustreCollector"
@@ -102,17 +297,9 @@ func (m *LustreCollector) Init(config json.RawMessage) error {
 	m.setup()
 	m.tags = map[string]string{"type": "node"}
 	m.meta = map[string]string{"source": m.name, "group": "Lustre"}
-	defmatches := map[string]map[string]int{
-		"read_bytes":       {"lustre_read_bytes": 6, "lustre_read_requests": 1},
-		"write_bytes":      {"lustre_write_bytes": 6, "lustre_write_requests": 1},
-		"open":             {"lustre_open": 1},
-		"close":            {"lustre_close": 1},
-		"setattr":          {"lustre_setattr": 1},
-		"getattr":          {"lustre_getattr": 1},
-		"statfs":           {"lustre_statfs": 1},
-		"inode_permission": {"lustre_inode_permission": 1}}
 
 	// Lustre file system statistics can only be queried by user root
+	// or with password-less sudo
 	if !m.config.Sudo {
 		user, err := user.Current()
 		if err != nil {
@@ -123,23 +310,15 @@ func (m *LustreCollector) Init(config json.RawMessage) error {
 			cclog.ComponentError(m.name, "Lustre file system statistics can only be queried by user root")
 			return err
 		}
+	} else {
+		p, err := exec.LookPath("sudo")
+		if err != nil {
+			cclog.ComponentError(m.name, "Cannot find 'sudo'")
+			return err
+		}
+		m.sudoCmd = p
 	}
 
-	m.matches = make(map[string]map[string]int)
-	for lineprefix, names := range defmatches {
-		for metricname, offset := range names {
-			_, skip := stringArrayContains(m.config.ExcludeMetrics, metricname)
-			if skip {
-				continue
-			}
-			if _, prefixExist := m.matches[lineprefix]; !prefixExist {
-				m.matches[lineprefix] = make(map[string]int)
-			}
-			if _, metricExist := m.matches[lineprefix][metricname]; !metricExist {
-				m.matches[lineprefix][metricname] = offset
-			}
-		}
-	}
 	p, err := exec.LookPath(m.config.LCtlCommand)
 	if err != nil {
 		p, err = exec.LookPath(LCTL_CMD)
@@ -148,23 +327,47 @@ func (m *LustreCollector) Init(config json.RawMessage) error {
 		}
 	}
 	m.lctl = p
-	if m.config.Sudo {
-		p, err := exec.LookPath("sudo")
-		if err != nil {
-			m.sudoCmd = p
+
+	m.definitions = []LustreMetricDefinition{}
+	if m.config.SendAbsoluteValues {
+		for _, def := range LustreAbsMetrics {
+			if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip {
+				m.definitions = append(m.definitions, def)
+			}
 		}
 	}
+	if m.config.SendDiffValues {
+		for _, def := range LustreDiffMetrics {
+			if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip {
+				m.definitions = append(m.definitions, def)
+			}
+		}
+	}
+	if m.config.SendDerivedValues {
+		for _, def := range LustreDeriveMetrics {
+			if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip {
+				m.definitions = append(m.definitions, def)
+			}
+		}
+	}
+	if len(m.definitions) == 0 {
+		return errors.New("no metrics to collect")
+	}
 
 	devices := m.getDevices()
 	if len(devices) == 0 {
-		return errors.New("no metrics to collect")
+		return errors.New("no Lustre devices found")
 	}
 	m.stats = make(map[string]map[string]int64)
 	for _, d := range devices {
 		m.stats[d] = make(map[string]int64)
-		for _, names := range m.matches {
-			for metricname := range names {
-				m.stats[d][metricname] = 0
+		data := m.getDeviceDataCommand(d)
+		for _, def := range m.definitions {
+			x, err := getMetricData(data, def.lineprefix, def.lineoffset)
+			if err == nil {
+				m.stats[d][def.name] = x
+			} else {
+				m.stats[d][def.name] = 0
 			}
 		}
 	}
@@ -180,63 +383,43 @@ func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric)
 	now := time.Now()
 	tdiff := now.Sub(m.lastTimestamp)
 	for device, devData := range m.stats {
-		stats := m.getDeviceDataCommand(device)
-		processed := []string{}
-
-		for _, line := range stats {
-			lf := strings.Fields(line)
-			if len(lf) > 1 {
-				if fields, ok := m.matches[lf[0]]; ok {
-					for name, idx := range fields {
-						x, err := strconv.ParseInt(lf[idx], 0, 64)
-						if err == nil {
-							value := x - devData[name]
-							devData[name] = x
-							if value < 0 {
-								value = 0
-							}
-							if m.config.SendAbsoluteValues {
-								y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
-								if err == nil {
-									y.AddTag("device", device)
-									if strings.Contains(name, "byte") {
-										y.AddMeta("unit", "Byte")
-									}
-									output <- y
-									if m.config.SendAllMetrics {
-										processed = append(processed, name)
-									}
-								}
-							}
-							if m.config.SendDerivedValues && strings.Contains(name, "bytes") {
-								y, err := lp.New(name+"_bw", m.tags, m.meta, map[string]interface{}{"value": float64(value) / tdiff.Seconds()}, time.Now())
-								if err == nil {
-									y.AddTag("device", device)
-									y.AddMeta("unit", "Bytes/sec")
-									output <- y
-									if m.config.SendAllMetrics {
-										processed = append(processed, name)
-									}
-								}
-							}
-						}
-					}
-				}
+		data := m.getDeviceDataCommand(device)
+		for _, def := range m.definitions {
+			var use_x int64
+			var err error
+			var y lp.CCMetric
+			x, err := getMetricData(data, def.lineprefix, def.lineoffset)
+			if err == nil {
+				use_x = x
+			} else {
+				use_x = devData[def.name]
 			}
-		}
-		if m.config.SendAllMetrics {
-			for name := range devData {
-				if _, done := stringArrayContains(processed, name); !done {
-					y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": 0}, time.Now())
-					if err == nil {
-						y.AddTag("device", device)
-						if strings.Contains(name, "byte") {
-							y.AddMeta("unit", "Byte")
-						}
-						output <- y
-					}
+			var value interface{}
+			switch def.calc {
+			case "none":
+				value = use_x
+				y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
+			case "difference":
+				value = use_x - devData[def.name]
+				if value.(int64) < 0 {
+					value = 0
 				}
+				y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
+			case "derivative":
+				value = float64(use_x-devData[def.name]) / tdiff.Seconds()
+				if value.(float64) < 0 {
+					value = 0
+				}
+				y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 			}
+			if err == nil {
+				y.AddTag("device", device)
+				if len(def.unit) > 0 {
+					y.AddMeta("unit", def.unit)
+				}
+				output <- y
+			}
+			devData[def.name] = use_x
 		}
 	}
 	m.lastTimestamp = now
diff --git a/collectors/lustreMetric.md b/collectors/lustreMetric.md
index de4ed60..f11b85f 100644
--- a/collectors/lustreMetric.md
+++ b/collectors/lustreMetric.md
@@ -3,32 +3,44 @@
 
 ```json
   "lustrestat": {
-    "procfiles" : [
-      "/proc/fs/lustre/llite/lnec-XXXXXX/stats"
-    ],
+    "lctl_command": "/path/to/lctl",
     "exclude_metrics": [
       "setattr",
       "getattr"
     ],
     "send_abs_values" : true,
-    "send_derived_values" : true
+    "send_derived_values" : true,
+    "send_diff_values": true,
+    "use_sudo": false
   }
 ```
 
-The `lustrestat` collector reads from the procfs stat files for Lustre like `/proc/fs/lustre/llite/lnec-XXXXXX/stats`.
+The `lustrestat` collector uses the `lctl` application with the `get_param` option to get all `llite` metrics (Lustre client). The `llite` metrics are only available for root users. If password-less sudo is configured, you can enable `sudo` in the configuration.
 
 Metrics:
-* `lustre_read_bytes`
-* `lustre_read_requests`
-* `lustre_write_bytes`
-* `lustre_write_requests`
+* `lustre_read_bytes` (unit `bytes`)
+* `lustre_read_requests` (unit `requests`)
+* `lustre_write_bytes` (unit `bytes`)
+* `lustre_write_requests` (unit `requests`)
 * `lustre_open`
 * `lustre_close`
 * `lustre_getattr`
 * `lustre_setattr`
 * `lustre_statfs`
 * `lustre_inode_permission`
-* `lustre_read_bytes_bw` (if `send_derived_values == true`)
-* `lustre_write_bytes_bw` (if `send_derived_values == true`)
+* `lustre_read_bw` (if `send_derived_values == true`, unit `bytes/sec`)
+* `lustre_write_bw` (if `send_derived_values == true`, unit `bytes/sec`)
+* `lustre_read_requests_rate` (if `send_derived_values == true`, unit `requests/sec`)
+* `lustre_write_requests_rate` (if `send_derived_values == true`, unit `requests/sec`)
+* `lustre_read_bytes_diff` (if `send_diff_values == true`, unit `bytes`)
+* `lustre_read_requests_diff` (if `send_diff_values == true`, unit `requests`)
+* `lustre_write_bytes_diff` (if `send_diff_values == true`, unit `bytes`)
+* `lustre_write_requests_diff` (if `send_diff_values == true`, unit `requests`)
+* `lustre_open_diff` (if `send_diff_values == true`)
+* `lustre_close_diff` (if `send_diff_values == true`)
+* `lustre_getattr_diff` (if `send_diff_values == true`)
+* `lustre_setattr_diff` (if `send_diff_values == true`)
+* `lustre_statfs_diff` (if `send_diff_values == true`)
+* `lustre_inode_permission_diff` (if `send_diff_values == true`)
 
 This collector adds an `device` tag.
\ No newline at end of file