From 04fa267d9d52bae296a9162c8af84186e5fe1121 Mon Sep 17 00:00:00 2001 From: brinkcoder Date: Wed, 5 Mar 2025 01:11:59 +0100 Subject: [PATCH] add only_metrics. docs: clarify usage of filtering, consistency for metric list and units --- collectors/lustreMetric.go | 70 ++++++++++++++++---------------- collectors/lustreMetric.md | 81 +++++++++++++++++++++++--------------- 2 files changed, 84 insertions(+), 67 deletions(-) diff --git a/collectors/lustreMetric.go b/collectors/lustreMetric.go index f6d6ef6..05de17e 100644 --- a/collectors/lustreMetric.go +++ b/collectors/lustreMetric.go @@ -10,8 +10,8 @@ import ( "strings" "time" + lp "github.com/ClusterCockpit/cc-lib/ccMessage" cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger" - lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message" ) const LUSTRE_SYSFS = `/sys/fs/lustre` @@ -21,6 +21,7 @@ const LCTL_OPTION = `get_param` type LustreCollectorConfig struct { LCtlCommand string `json:"lctl_command,omitempty"` ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + OnlyMetrics []string `json:"only_metrics,omitempty"` Sudo bool `json:"use_sudo,omitempty"` SendAbsoluteValues bool `json:"send_abs_values,omitempty"` SendDerivedValues bool `json:"send_derived_values,omitempty"` @@ -41,9 +42,26 @@ type LustreCollector struct { config LustreCollectorConfig lctl string sudoCmd string - lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths + lastTimestamp time.Time // Timestamp of last tick for diff/derivative calculations definitions []LustreMetricDefinition // Combined list without excluded metrics - stats map[string]map[string]int64 // Data for last value per device and metric + stats map[string]map[string]int64 // Last measurement per device and metric +} + +func (m *LustreCollector) shouldOutput(metricName string) bool { + if len(m.config.OnlyMetrics) > 0 { + for _, n := range m.config.OnlyMetrics { + if n == metricName { + return true + } + } + return false + } + for _, n := range m.config.ExcludeMetrics { + if n == metricName { + return false + } + } + return true } func (m *LustreCollector) getDeviceDataCommand(device string) []string { @@ -61,20 +79,7 @@ func (m *LustreCollector) getDeviceDataCommand(device string) []string { func (m *LustreCollector) getDevices() []string { devices := make([]string, 0) - - // //Version reading devices from sysfs - // globPattern := filepath.Join(LUSTRE_SYSFS, "llite/*/stats") - // files, err := filepath.Glob(globPattern) - // if err != nil { - // return devices - // } - // for _, f := range files { - // pathlist := strings.Split(f, "/") - // devices = append(devices, pathlist[4]) - // } - data := m.getDeviceDataCommand("*") - for _, line := range data { if strings.HasPrefix(line, "llite") { linefields := strings.Split(line, ".") @@ -96,18 +101,6 @@ func getMetricData(lines []string, prefix string, offset int) (int64, error) { return 0, errors.New("no such line in data") } -// //Version reading the stats data of a device from sysfs -// func (m *LustreCollector) getDeviceDataSysfs(device string) []string { -// llitedir := filepath.Join(LUSTRE_SYSFS, "llite") -// devdir := filepath.Join(llitedir, device) -// statsfile := filepath.Join(devdir, "stats") -// buffer, err := os.ReadFile(statsfile) -// if err != nil { -// return make([]string, 0) -// } -// return strings.Split(string(buffer), "\n") -// } - var LustreAbsMetrics = []LustreMetricDefinition{ { name: "lustre_read_requests", @@ -308,7 +301,7 @@ func (m *LustreCollector) Init(config json.RawMessage) error { return err } if user.Uid != "0" { - cclog.ComponentError(m.name, "Lustre file system statistics can only be queried by user root") + cclog.ComponentError(m.name, "Lustre statistics can only be queried by root") return err } } else { @@ -332,23 +325,26 @@ func (m *LustreCollector) Init(config json.RawMessage) error { m.definitions = []LustreMetricDefinition{} if m.config.SendAbsoluteValues { for _, def := range LustreAbsMetrics { - if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip { - m.definitions = append(m.definitions, def) + if !m.shouldOutput(def.name) { + continue } + m.definitions = append(m.definitions, def) } } if m.config.SendDiffValues { for _, def := range LustreDiffMetrics { - if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip { - m.definitions = append(m.definitions, def) + if !m.shouldOutput(def.name) { + continue } + m.definitions = append(m.definitions, def) } } if m.config.SendDerivedValues { for _, def := range LustreDeriveMetrics { - if _, skip := stringArrayContains(m.config.ExcludeMetrics, def.name); !skip { - m.definitions = append(m.definitions, def) + if !m.shouldOutput(def.name) { + continue } + m.definitions = append(m.definitions, def) } } if len(m.definitions) == 0 { @@ -418,7 +414,9 @@ func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMessage) if len(def.unit) > 0 { y.AddMeta("unit", def.unit) } - output <- y + if m.shouldOutput(y.Name()) { + output <- y + } } devData[def.name] = use_x } diff --git a/collectors/lustreMetric.md b/collectors/lustreMetric.md index f11b85f..412ef71 100644 --- a/collectors/lustreMetric.md +++ b/collectors/lustreMetric.md @@ -1,46 +1,65 @@ - ## `lustrestat` collector ```json "lustrestat": { "lctl_command": "/path/to/lctl", "exclude_metrics": [ - "setattr", - "getattr" + "lustre_setattr", + "lustre_getattr" ], - "send_abs_values" : true, - "send_derived_values" : true, + "only_metrics": [ + "lustre_read_bytes", + "lustre_read_bytes_diff", + "lustre_read_bw", + "lustre_open", + "lustre_open_diff" + ], + "send_abs_values": true, "send_diff_values": true, + "send_derived_values": true, "use_sudo": false } ``` The `lustrestat` collector uses the `lctl` application with the `get_param` option to get all `llite` metrics (Lustre client). The `llite` metrics are only available for root users. If password-less sudo is configured, you can enable `sudo` in the configuration. -Metrics: -* `lustre_read_bytes` (unit `bytes`) -* `lustre_read_requests` (unit `requests`) -* `lustre_write_bytes` (unit `bytes`) -* `lustre_write_requests` (unit `requests`) -* `lustre_open` -* `lustre_close` -* `lustre_getattr` -* `lustre_setattr` -* `lustre_statfs` -* `lustre_inode_permission` -* `lustre_read_bw` (if `send_derived_values == true`, unit `bytes/sec`) -* `lustre_write_bw` (if `send_derived_values == true`, unit `bytes/sec`) -* `lustre_read_requests_rate` (if `send_derived_values == true`, unit `requests/sec`) -* `lustre_write_requests_rate` (if `send_derived_values == true`, unit `requests/sec`) -* `lustre_read_bytes_diff` (if `send_diff_values == true`, unit `bytes`) -* `lustre_read_requests_diff` (if `send_diff_values == true`, unit `requests`) -* `lustre_write_bytes_diff` (if `send_diff_values == true`, unit `bytes`) -* `lustre_write_requests_diff` (if `send_diff_values == true`, unit `requests`) -* `lustre_open_diff` (if `send_diff_values == true`) -* `lustre_close_diff` (if `send_diff_values == true`) -* `lustre_getattr_diff` (if `send_diff_values == true`) -* `lustre_setattr_diff` (if `send_diff_values == true`) -* `lustre_statfs_diff` (if `send_diff_values == true`) -* `lustre_inode_permission_diff` (if `send_diff_values == true`) +At least one of the settings for absolute, diff, and derived values must be set to true. -This collector adds an `device` tag. \ No newline at end of file +Both filtering mechanisms are supported: +- `exclude_metrics`: Excludes the specified metrics. +- `only_metrics`: If provided, only the listed metrics are collected. This takes precedence over `exclude_metrics`. + + +Metrics are categorized as follows: + +**Absolute Metrics:** +- `lustre_read_bytes` (unit: `bytes`) +- `lustre_read_requests` (unit: `requests`) +- `lustre_write_bytes` (unit: `bytes`) +- `lustre_write_requests` (unit: `requests`) +- `lustre_open` +- `lustre_close` +- `lustre_getattr` +- `lustre_setattr` +- `lustre_statfs` +- `lustre_inode_permission` + +**Diff Metrics:** +- `lustre_read_bytes_diff` (unit: `bytes`) +- `lustre_read_requests_diff` (unit: `requests`) +- `lustre_write_bytes_diff` (unit: `bytes`) +- `lustre_write_requests_diff` (unit: `requests`) +- `lustre_open_diff` +- `lustre_close_diff` +- `lustre_getattr_diff` +- `lustre_setattr_diff` +- `lustre_statfs_diff` +- `lustre_inode_permission_diff` + +**Derived Metrics:** +- `lustre_read_bw` (unit: `bytes/sec`) +- `lustre_write_bw` (unit: `bytes/sec`) +- `lustre_read_requests_rate` (unit: `requests/sec`) +- `lustre_write_requests_rate` (unit: `requests/sec`) + +This collector adds a `device` tag.