From 547bc0461f77d02002b1b8d46828b51536329dad Mon Sep 17 00:00:00 2001 From: Mehmet Soysal Date: Fri, 4 Mar 2022 14:35:47 +0100 Subject: [PATCH] Beegfs collector (#50) * added beegfs collectors to collectors/README.md * added beegfs collectors and docs * added new beegfs collectors to AvailableCollectors list * Feedback implemented * changed error type * changed error to only return * changed beegfs lookup path * fixed typo in md files Co-authored-by: Mehmet Soysal --- collectors/README.md | 2 + collectors/beegfsmetaMetric.go | 229 ++++++++++++++++++++++++++++++ collectors/beegfsmetaMetric.md | 75 ++++++++++ collectors/beegfsstorageMetric.go | 221 ++++++++++++++++++++++++++++ collectors/beegfsstorageMetric.md | 55 +++++++ collectors/collectorManager.go | 2 + 6 files changed, 584 insertions(+) create mode 100644 collectors/beegfsmetaMetric.go create mode 100644 collectors/beegfsmetaMetric.md create mode 100644 collectors/beegfsstorageMetric.go create mode 100644 collectors/beegfsstorageMetric.md diff --git a/collectors/README.md b/collectors/README.md index 00e0da7..3fcdd49 100644 --- a/collectors/README.md +++ b/collectors/README.md @@ -37,6 +37,8 @@ In contrast to the configuration files for sinks and receivers, the collectors c * [`cpufreq_cpuinfo`](./cpufreqCpuinfoMetric.md) * [`numastat`](./numastatMetric.md) * [`gpfs`](./gpfsMetric.md) +* [`beegfs_meta`](./beegfsmetaMetric.md) +* [`beegfs_storage`](./beegfsstorageMetric.md) ## Todos diff --git a/collectors/beegfsmetaMetric.go b/collectors/beegfsmetaMetric.go new file mode 100644 index 0000000..57b1e39 --- /dev/null +++ b/collectors/beegfsmetaMetric.go @@ -0,0 +1,229 @@ +package collectors + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "os" + "os/exec" + "os/user" + "regexp" + "strconv" + "strings" + "time" + + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" +) + +const DEFAULT_BEEGFS_CMD = "beegfs-ctl" + +// Struct for the collector-specific JSON config +type BeegfsMetaCollectorConfig struct { + Beegfs string `json:"beegfs_path"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + ExcludeFilesystem []string `json:"exclude_filesystem"` +} + +type BeegfsMetaCollector struct { + metricCollector + tags map[string]string + matches map[string]string + config BeegfsMetaCollectorConfig + skipFS map[string]struct{} +} + +func (m *BeegfsMetaCollector) Init(config json.RawMessage) error { + // Check if already initialized + if m.init { + return nil + } + // Metrics + var nodeMdstat_array = [39]string{ + "sum", "ack", "close", "entInf", + "fndOwn", "mkdir", "create", "rddir", + "refrEn", "mdsInf", "rmdir", "rmLnk", + "mvDirIns", "mvFiIns", "open", "ren", + "sChDrct", "sAttr", "sDirPat", "stat", + "statfs", "trunc", "symlnk", "unlnk", + "lookLI", "statLI", "revalLI", "openLI", + "createLI", "hardlnk", "flckAp", "flckEn", + "flckRg", "dirparent", "listXA", "getXA", + "rmXA", "setXA", "mirror"} + + m.name = "BeegfsMetaCollector" + m.setup() + // Set default beegfs-ctl binary + + m.config.Beegfs = DEFAULT_BEEGFS_CMD + + // Read JSON configuration + if len(config) > 0 { + err := json.Unmarshal(config, &m.config) + if err != nil { + return err + } + } + + //create map with possible variables + m.matches = make(map[string]string) + for _, value := range nodeMdstat_array { + _, skip := stringArrayContains(m.config.ExcludeMetrics, value) + if skip { + m.matches["other"] = "0" + } else { + m.matches["beegfs_cmeta_"+value] = "0" + } + } + + m.meta = map[string]string{ + "source": m.name, + "group": "BeegfsMeta", + } + m.tags = map[string]string{ + "type": "node", + "filesystem": "", + } + m.skipFS = make(map[string]struct{}) + for _, fs := range m.config.ExcludeFilesystem { + m.skipFS[fs] = struct{}{} + } + + // Beegfs file system statistics can only be queried by user root + user, err := user.Current() + if err != nil { + return fmt.Errorf("BeegfsMetaCollector.Init(): Failed to get current user: %v", err) + } + if user.Uid != "0" { + return fmt.Errorf("BeegfsMetaCollector.Init(): BeeGFS file system statistics can only be queried by user root") + } + + // Check if beegfs-ctl is in executable search path + _, err = exec.LookPath(m.config.Beegfs) + if err != nil { + return fmt.Errorf("BeegfsMetaCollector.Init(): Failed to find beegfs-ctl binary '%s': %v", m.config.Beegfs, err) + } + m.init = true + return nil +} + +func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetric) { + if !m.init { + return + } + //get mounpoint + buffer, _ := ioutil.ReadFile(string("/proc/mounts")) + mounts := strings.Split(string(buffer), "\n") + var mountpoints []string + for _, line := range mounts { + if len(line) == 0 { + continue + } + f := strings.Fields(line) + if strings.Contains(f[0], "beegfs_ondemand") { + // Skip excluded filesystems + if _, skip := m.skipFS[f[1]]; skip { + continue + } + mountpoints = append(mountpoints, f[1]) + } + } + + if len(mountpoints) == 0 { + return + } + + for _, mountpoint := range mountpoints { + m.tags["filesystem"] = mountpoint + + // bwwgfs-ctl: + // --clientstats: Show client IO statistics. + // --nodetype=meta: The node type to query (meta, storage). + // --interval: + // --mount=/mnt/beeond/: Which mount point + //cmd := exec.Command(m.config.Beegfs, "/root/mc/test.txt") + mountoption := "--mount=" + mountpoint + cmd := exec.Command(m.config.Beegfs, "--clientstats", + "--nodetype=meta", mountoption, "--allstats") + cmd.Stdin = strings.NewReader("\n") + cmdStdout := new(bytes.Buffer) + cmdStderr := new(bytes.Buffer) + cmd.Stdout = cmdStdout + cmd.Stderr = cmdStderr + err := cmd.Run() + if err != nil { + fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error()) + fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode()) + data, _ := ioutil.ReadAll(cmdStderr) + fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stderr: \"%s\"\n", string(data)) + data, _ = ioutil.ReadAll(cmdStdout) + fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stdout: \"%s\"\n", string(data)) + return + } + // Read I/O statistics + scanner := bufio.NewScanner(cmdStdout) + + sumLine := regexp.MustCompile(`^Sum:\s+\d+\s+\[[a-zA-Z]+\]+`) + //Line := regexp.MustCompile(`^(.*)\s+(\d)+\s+\[([a-zA-Z]+)\]+`) + statsLine := regexp.MustCompile(`^(.*?)\s+?(\d.*?)$`) + singleSpacePattern := regexp.MustCompile(`\s+`) + removePattern := regexp.MustCompile(`[\[|\]]`) + + for scanner.Scan() { + readLine := scanner.Text() + //fmt.Println(readLine) + // Jump few lines, we only want the I/O stats from nodes + if !sumLine.MatchString(readLine) { + continue + } + + match := statsLine.FindStringSubmatch(readLine) + // nodeName = "Sum:" or would be nodes + // nodeName := match[1] + //Remove multiple whitespaces + dummy := removePattern.ReplaceAllString(match[2], " ") + metaStats := strings.TrimSpace(singleSpacePattern.ReplaceAllString(dummy, " ")) + split := strings.Split(metaStats, " ") + + // fill map with values + // split[i+1] = mdname + // split[i] = amount of md operations + for i := 0; i <= len(split)-1; i += 2 { + if _, ok := m.matches[split[i+1]]; ok { + m.matches["beegfs_cmeta_"+split[i+1]] = split[i] + } else { + f1, err := strconv.ParseFloat(m.matches["other"], 32) + if err != nil { + cclog.ComponentError( + m.name, + fmt.Sprintf("Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err)) + continue + } + f2, err := strconv.ParseFloat(split[i], 32) + if err != nil { + cclog.ComponentError( + m.name, + fmt.Sprintf("Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err)) + continue + } + //mdStat["other"] = fmt.Sprintf("%f", f1+f2) + m.matches["beegfs_cstorage_other"] = fmt.Sprintf("%f", f1+f2) + } + } + + for key, data := range m.matches { + value, _ := strconv.ParseFloat(data, 32) + y, err := lp.New(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now()) + if err == nil { + output <- y + } + } + } + } +} + +func (m *BeegfsMetaCollector) Close() { + m.init = false +} diff --git a/collectors/beegfsmetaMetric.md b/collectors/beegfsmetaMetric.md new file mode 100644 index 0000000..932e72f --- /dev/null +++ b/collectors/beegfsmetaMetric.md @@ -0,0 +1,75 @@ +## `BeeGFS on Demand` collector +This Collector is to collect BeeGFS on Demand (BeeOND) metadata clientstats. + +```json + "beegfs_meta": { + "beegfs_path": "/usr/bin/beegfs-ctl", + "exclude_filesystem": [ + "/mnt/ignore_me" + ], + "exclude_metrics": [ + "ack", + "entInf", + "fndOwn" + ] + } +``` + +The `BeeGFS On Demand (BeeOND)` collector uses the `beegfs-ctl` command to read performance metrics for +BeeGFS filesystems. + +The reported filesystems can be filtered with the `exclude_filesystem` option +in the configuration. + +The path to the `beegfs-ctl` command can be configured with the `beegfs_path` option +in the configuration. + +When using the `exclude_metrics` option, the excluded metrics are summed as `other`. + +Important: The metrics listed below, are similar to the naming of BeeGFS. The Collector prefixes these with `beegfs_cstorage`(beegfs client storage). + +For example beegfs metric `open`-> `beegfs_cstorage_open` + +Available Metrics: + +* sum +* ack +* close +* entInf +* fndOwn +* mkdir +* create +* rddir +* refrEnt +* mdsInf +* rmdir +* rmLnk +* mvDirIns +* mvFiIns +* open +* ren +* sChDrct +* sAttr +* sDirPat +* stat +* statfs +* trunc +* symlnk +* unlnk +* lookLI +* statLI +* revalLI +* openLI +* createLI +* hardlnk +* flckAp +* flckEn +* flckRg +* dirparent +* listXA +* getXA +* rmXA +* setXA +* mirror + +The collector adds a `filesystem` tag to all metrics \ No newline at end of file diff --git a/collectors/beegfsstorageMetric.go b/collectors/beegfsstorageMetric.go new file mode 100644 index 0000000..cbc8314 --- /dev/null +++ b/collectors/beegfsstorageMetric.go @@ -0,0 +1,221 @@ +package collectors + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "os" + "os/exec" + "os/user" + "regexp" + "strconv" + "strings" + "time" + + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" +) + +// Struct for the collector-specific JSON config +type BeegfsStorageCollectorConfig struct { + Beegfs string `json:"beegfs_path"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + ExcludeFilesystem []string `json:"exclude_filesystem"` +} + +type BeegfsStorageCollector struct { + metricCollector + tags map[string]string + matches map[string]string + config BeegfsStorageCollectorConfig + skipFS map[string]struct{} +} + +func (m *BeegfsStorageCollector) Init(config json.RawMessage) error { + // Check if already initialized + if m.init { + return nil + } + // Metrics + var storageStat_array = [18]string{ + "sum", "ack", "sChDrct", "getFSize", + "sAttr", "statfs", "trunc", "close", + "fsync", "ops-rd", "MiB-rd/s", "ops-wr", + "MiB-wr/s", "gendbg", "hrtbeat", "remNode", + "storInf", "unlnk"} + + m.name = "BeegfsStorageCollector" + m.setup() + // Set default beegfs-ctl binary + + m.config.Beegfs = DEFAULT_BEEGFS_CMD + + // Read JSON configuration + if len(config) > 0 { + err := json.Unmarshal(config, &m.config) + if err != nil { + return err + } + } + println(m.config.Beegfs) + //create map with possible variables + m.matches = make(map[string]string) + for _, value := range storageStat_array { + _, skip := stringArrayContains(m.config.ExcludeMetrics, value) + if skip { + m.matches["other"] = "0" + } else { + m.matches["beegfs_cstorage_"+value] = "0" + } + } + + m.meta = map[string]string{ + "source": m.name, + "group": "BeegfsStorage", + } + m.tags = map[string]string{ + "type": "node", + "filesystem": "", + } + m.skipFS = make(map[string]struct{}) + for _, fs := range m.config.ExcludeFilesystem { + m.skipFS[fs] = struct{}{} + } + + // Beegfs file system statistics can only be queried by user root + user, err := user.Current() + if err != nil { + return fmt.Errorf("BeegfsStorageCollector.Init(): Failed to get current user: %v", err) + } + if user.Uid != "0" { + return fmt.Errorf("BeegfsStorageCollector.Init(): BeeGFS file system statistics can only be queried by user root") + } + + // Check if beegfs-ctl is in executable search path + _, err = exec.LookPath(m.config.Beegfs) + if err != nil { + return fmt.Errorf("BeegfsStorageCollector.Init(): Failed to find beegfs-ctl binary '%s': %v", m.config.Beegfs, err) + } + m.init = true + return nil +} + +func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCMetric) { + if !m.init { + return + } + //get mounpoint + buffer, _ := ioutil.ReadFile(string("/proc/mounts")) + mounts := strings.Split(string(buffer), "\n") + var mountpoints []string + for _, line := range mounts { + if len(line) == 0 { + continue + } + f := strings.Fields(line) + if strings.Contains(f[0], "beegfs_ondemand") { + // Skip excluded filesystems + if _, skip := m.skipFS[f[1]]; skip { + continue + } + mountpoints = append(mountpoints, f[1]) + } + } + if len(mountpoints) == 0 { + return + } + // collects stats for each BeeGFS on Demand FS + for _, mountpoint := range mountpoints { + m.tags["filesystem"] = mountpoint + + // bwwgfs-ctl: + // --clientstats: Show client IO statistics. + // --nodetype=meta: The node type to query (meta, storage). + // --interval: + // --mount=/mnt/beeond/: Which mount point + //cmd := exec.Command(m.config.Beegfs, "/root/mc/test.txt") + mountoption := "--mount=" + mountpoint + cmd := exec.Command(m.config.Beegfs, "--clientstats", + "--nodetype=storage", mountoption, "--allstats") + cmd.Stdin = strings.NewReader("\n") + cmdStdout := new(bytes.Buffer) + cmdStderr := new(bytes.Buffer) + cmd.Stdout = cmdStdout + cmd.Stderr = cmdStderr + err := cmd.Run() + if err != nil { + fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error()) + fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode()) + data, _ := ioutil.ReadAll(cmdStderr) + fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stderr: \"%s\"\n", string(data)) + data, _ = ioutil.ReadAll(cmdStdout) + fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stdout: \"%s\"\n", string(data)) + return + } + // Read I/O statistics + scanner := bufio.NewScanner(cmdStdout) + + sumLine := regexp.MustCompile(`^Sum:\s+\d+\s+\[[a-zA-Z]+\]+`) + //Line := regexp.MustCompile(`^(.*)\s+(\d)+\s+\[([a-zA-Z]+)\]+`) + statsLine := regexp.MustCompile(`^(.*?)\s+?(\d.*?)$`) + singleSpacePattern := regexp.MustCompile(`\s+`) + removePattern := regexp.MustCompile(`[\[|\]]`) + + for scanner.Scan() { + readLine := scanner.Text() + //fmt.Println(readLine) + // Jump few lines, we only want the I/O stats from nodes + if !sumLine.MatchString(readLine) { + continue + } + + match := statsLine.FindStringSubmatch(readLine) + // nodeName = "Sum:" or would be nodes + // nodeName := match[1] + //Remove multiple whitespaces + dummy := removePattern.ReplaceAllString(match[2], " ") + metaStats := strings.TrimSpace(singleSpacePattern.ReplaceAllString(dummy, " ")) + split := strings.Split(metaStats, " ") + + // fill map with values + // split[i+1] = mdname + // split[i] = amount of operations + for i := 0; i <= len(split)-1; i += 2 { + if _, ok := m.matches[split[i+1]]; ok { + m.matches["beegfs_cstorage_"+split[i+1]] = split[i] + //m.matches[split[i+1]] = split[i] + } else { + f1, err := strconv.ParseFloat(m.matches["other"], 32) + if err != nil { + cclog.ComponentError( + m.name, + fmt.Sprintf("Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err)) + continue + } + f2, err := strconv.ParseFloat(split[i], 32) + if err != nil { + cclog.ComponentError( + m.name, + fmt.Sprintf("Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err)) + continue + } + m.matches["beegfs_cstorage_other"] = fmt.Sprintf("%f", f1+f2) + } + } + + for key, data := range m.matches { + value, _ := strconv.ParseFloat(data, 32) + y, err := lp.New(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now()) + if err == nil { + output <- y + } + } + } + } +} + +func (m *BeegfsStorageCollector) Close() { + m.init = false +} diff --git a/collectors/beegfsstorageMetric.md b/collectors/beegfsstorageMetric.md new file mode 100644 index 0000000..519b5bf --- /dev/null +++ b/collectors/beegfsstorageMetric.md @@ -0,0 +1,55 @@ +## `BeeGFS on Demand` collector +This Collector is to collect BeeGFS on Demand (BeeOND) storage stats. + +```json + "beegfs_storage": { + "beegfs_path": "/usr/bin/beegfs-ctl", + "exclude_filesystem": [ + "/mnt/ignore_me" + ], + "exclude_metrics": [ + "ack", + "storInf", + "unlnk" + ] + } +``` + +The `BeeGFS On Demand (BeeOND)` collector uses the `beegfs-ctl` command to read performance metrics for BeeGFS filesystems. + +The reported filesystems can be filtered with the `exclude_filesystem` option +in the configuration. + +The path to the `beegfs-ctl` command can be configured with the `beegfs_path` option +in the configuration. + +When using the `exclude_metrics` option, the excluded metrics are summed as `other`. + +Important: The metrics listed below, are similar to the naming of BeeGFS. The Collector prefixes these with `beegfs_cstorage_`(beegfs client meta). +For example beegfs metric `open`-> `beegfs_cstorage_` + +Note: BeeGFS FS offers many Metadata Information. Probably it makes sense to exlcude most of them. Nevertheless, these excluded metrics will be summed as `beegfs_cstorage_other`. + +Available Metrics: + +* "sum" +* "ack" +* "sChDrct" +* "getFSize" +* "sAttr" +* "statfs" +* "trunc" +* "close" +* "fsync" +* "ops-rd" +* "MiB-rd/s" +* "ops-wr" +* "MiB-wr/s" +* "endbg" +* "hrtbeat" +* "remNode" +* "storInf" +* "unlnk" + + +The collector adds a `filesystem` tag to all metrics \ No newline at end of file diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 46d791a..e9ccfe7 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -34,6 +34,8 @@ var AvailableCollectors = map[string]MetricCollector{ "nfs3stat": new(Nfs3Collector), "nfs4stat": new(Nfs4Collector), "numastats": new(NUMAStatsCollector), + "beegfs_meta": new(BeegfsMetaCollector), + "beegfs_storage": new(BeegfsStorageCollector), } // Metric collector manager data structure