From 43a8ea683dd398f6d3df9508c93a48c8f08debba Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Thu, 6 Jan 2022 15:25:51 +0100 Subject: [PATCH 01/22] Cast collector measurement duration to seconds. Thanks to KIT --- collectors/likwidMetric.go | 2 +- metric-collector.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/collectors/likwidMetric.go b/collectors/likwidMetric.go index 2fd1129..34e2364 100644 --- a/collectors/likwidMetric.go +++ b/collectors/likwidMetric.go @@ -200,7 +200,7 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) res := C.perfmon_getLastResult(gid, eidx, C.int(tid)) m.results[i][tid][gctr] = float64(res) } - m.results[i][tid]["time"] = float64(interval) + m.results[i][tid]["time"] = interval.Seconds() m.results[i][tid]["inverseClock"] = float64(1.0 / m.basefreq) for _, metric := range evset.Metrics { expression, err := govaluate.NewEvaluableExpression(metric.Calc) diff --git a/metric-collector.go b/metric-collector.go index f6c8f5c..fd3b556 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -281,7 +281,7 @@ func main() { // storage locations for _, c := range config.Collectors { col := Collectors[c] - col.Read(time.Duration(config.Duration), &tmpPoints) + col.Read(time.Duration(config.Duration)*time.Second, &tmpPoints) for { if len(tmpPoints) == 0 { From 11e40c6ee3a11a5d1add7964e37813ec23c512c1 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 10:15:41 +0100 Subject: [PATCH 02/22] Add IB metrics ib_recv_pkts and ib_xmit_pkts --- collectors/infinibandMetric.go | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index 93725d1..54c974e 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -47,6 +47,8 @@ func (m *InfinibandCollector) Help() { fmt.Println("Metrics:") fmt.Println("- ib_recv") fmt.Println("- ib_xmit") + fmt.Println("- ib_recv_pkts") + fmt.Println("- ib_xmit_pkts") } func (m *InfinibandCollector) Init(config []byte) error { @@ -143,6 +145,26 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin } } } + if strings.HasPrefix(line, "PortRcvPkts") || strings.HasPrefix(line, "RcvPkts") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_recv_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + *out = append(*out, y) + } + } + } + if strings.HasPrefix(line, "PortXmitPkts") || strings.HasPrefix(line, "XmtPkts") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_xmit_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + *out = append(*out, y) + } + } + } } return nil } @@ -171,6 +193,28 @@ func DoSysfsRead(dev string, lid string, port string, tags map[string]string, ou } } } + buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_packets", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_recv_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + *out = append(*out, y) + } + } + } + buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_packets", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_xmit_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + *out = append(*out, y) + } + } + } return nil } From b97c5886600c1be0e130481ae9da65f56c268c59 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 14:25:24 +0100 Subject: [PATCH 03/22] Add GPFS / IBM Spectrum Scale collector --- collectors/gpfs.go | 298 ++++++++++++++++++++++++++++++++++++++++++++ metric-collector.go | 1 + 2 files changed, 299 insertions(+) create mode 100644 collectors/gpfs.go diff --git a/collectors/gpfs.go b/collectors/gpfs.go new file mode 100644 index 0000000..14398b4 --- /dev/null +++ b/collectors/gpfs.go @@ -0,0 +1,298 @@ +package collectors + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "log" + "os" + "os/exec" + "os/user" + "strconv" + "strings" + "time" + + lp "github.com/influxdata/line-protocol" +) + +type GpfsCollectorConfig struct { + Mmpmon string `json:"mmpmon"` +} + +type GpfsCollector struct { + MetricCollector + config GpfsCollectorConfig +} + +func (m *GpfsCollector) Init(config []byte) error { + var err error + m.name = "GpfsCollector" + m.setup() + + // Set default mmpmon binary + m.config.Mmpmon = "/usr/lpp/mmfs/bin/mmpmon" + + // Read JSON configuration + if len(config) > 0 { + err = json.Unmarshal(config, &m.config) + if err != nil { + log.Print(err.Error()) + return err + } + } + + // GPFS / IBM Spectrum Scale file system statistics can only be queried by user root + user, err := user.Current() + if err != nil { + return fmt.Errorf("GpfsCollector.Init(): Failed to get current user: %v", err) + } + if user.Uid != "0" { + return fmt.Errorf("GpfsCollector.Init(): GPFS file system statistics can only be queried by user root") + } + + // Check if mmpmon is in executable search path + _, err = exec.LookPath(m.config.Mmpmon) + if err != nil { + return fmt.Errorf("GpfsCollector.Init(): Failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err) + } + + m.init = true + return nil +} + +func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { + if !m.init { + return + } + + // mmpmon: + // -p: generate output that can be parsed + // -s: suppress the prompt on input + // fs_io_s: Displays I/O statistics per mounted file system + cmd := exec.Command(m.config.Mmpmon, "-p", "-s") + cmd.Stdin = strings.NewReader("once fs_io_s\n") + cmdStdout := new(bytes.Buffer) + cmdStderr := new(bytes.Buffer) + cmd.Stdout = cmdStdout + cmd.Stderr = cmdStderr + err := cmd.Run() + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error()) + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode()) + data, _ := ioutil.ReadAll(cmdStderr) + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): command stderr: \"%s\"\n", string(data)) + data, _ = ioutil.ReadAll(cmdStdout) + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): command stdout: \"%s\"\n", string(data)) + return + } + + // Read I/O statistics + scanner := bufio.NewScanner(cmdStdout) + for scanner.Scan() { + lineSplit := strings.Fields(scanner.Text()) + if lineSplit[0] == "_fs_io_s_" { + key_value := make(map[string]string) + for i := 1; i < len(lineSplit); i += 2 { + key_value[lineSplit[i]] = lineSplit[i+1] + } + + // Ignore keys: + // _n_: node IP address, + // _nn_: node name, + // _cl_: cluster name, + // _d_: number of disks + + filesystem, ok := key_value["_fs_"] + if !ok { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to get filesystem name.\n") + continue + } + + // return code + rc, err := strconv.Atoi(key_value["_rc_"]) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert return code: %s\n", err.Error()) + continue + } + if rc != 0 { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Filesystem %s not ok.", filesystem) + continue + } + + // unix epoch in microseconds + timestampInt, err := strconv.ParseInt(key_value["_t_"]+key_value["_tu_"], 10, 64) + timestamp := time.UnixMicro(timestampInt) + if err != nil { + fmt.Fprintf(os.Stderr, + "GpfsCollector.Read(): Failed to convert time stamp '%s': %s\n", + key_value["_t_"]+key_value["_tu_"], err.Error()) + continue + } + + // bytes read + bytesRead, err := strconv.ParseInt(key_value["_br_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, + "GpfsCollector.Read(): Failed to convert bytes read '%s': %s\n", + key_value["_br_"], err.Error()) + continue + } + y, err := lp.New( + "gpfs_bytes_read", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": bytesRead, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // bytes written + bytesWritten, err := strconv.ParseInt(key_value["_bw_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, + "GpfsCollector.Read(): Failed to convert bytes written '%s': %s\n", + key_value["_bw_"], err.Error()) + continue + } + y, err = lp.New( + "gpfs_bytes_written", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": bytesWritten, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // number of opens + numOpens, err := strconv.ParseInt(key_value["_oc_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, + "GpfsCollector.Read(): Failed to convert number of opens '%s': %s\n", + key_value["_oc_"], err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_opens", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numOpens, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // number of closes + numCloses, err := strconv.ParseInt(key_value["_cc_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of closes: %s\n", err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_closes", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numCloses, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // number of reads + numReads, err := strconv.ParseInt(key_value["_rdc_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of reads: %s\n", err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_reads", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numReads, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // number of writes + numWrites, err := strconv.ParseInt(key_value["_wc_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of writes: %s\n", err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_writes", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numWrites, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // number of read directories + numReaddirs, err := strconv.ParseInt(key_value["_dir_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of read directories: %s\n", err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_readdirs", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numReaddirs, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // Number of inode updates + numInodeUpdates, err := strconv.ParseInt(key_value["_iu_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert Number of inode updates: %s\n", err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_inode_updates", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numInodeUpdates, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + } + } +} + +func (m *GpfsCollector) Close() { + m.init = false + return +} diff --git a/metric-collector.go b/metric-collector.go index fd3b556..0b75675 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -32,6 +32,7 @@ var Collectors = map[string]collectors.MetricGetter{ "diskstat": &collectors.DiskstatCollector{}, "tempstat": &collectors.TempCollector{}, "ipmistat": &collectors.IpmiCollector{}, + "gpfs": &collectors.GpfsCollector{}, } var Sinks = map[string]sinks.SinkFuncs{ From 82b10b365e5459f07889a5a2de6a2c2b68e7458c Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 14:47:59 +0100 Subject: [PATCH 04/22] Fix to work with golang 1.16 --- collectors/gpfs.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/collectors/gpfs.go b/collectors/gpfs.go index 14398b4..db8a0d0 100644 --- a/collectors/gpfs.go +++ b/collectors/gpfs.go @@ -121,6 +121,7 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { continue } + /* requires go 1.17 // unix epoch in microseconds timestampInt, err := strconv.ParseInt(key_value["_t_"]+key_value["_tu_"], 10, 64) timestamp := time.UnixMicro(timestampInt) @@ -130,6 +131,8 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { key_value["_t_"]+key_value["_tu_"], err.Error()) continue } + */ + timestamp := time.Now() // bytes read bytesRead, err := strconv.ParseInt(key_value["_br_"], 10, 64) From 7b29a14e1a89ba515935ede4ba308681007a9321 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 15:35:12 +0100 Subject: [PATCH 05/22] Drop domain part of host name --- metric-collector.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metric-collector.go b/metric-collector.go index 0b75675..04c221f 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -166,6 +166,8 @@ func main() { log.Print(err) return } + // Drop domain part of host name + host = strings.SplitN(host, `.`, 2)[0] clicfg := ReadCli() err = CreatePidfile(clicfg["pidfile"]) err = SetLogging(clicfg["logfile"]) From f17719113d3fe84d178b0a13b01e8066308b0df8 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 15:55:15 +0100 Subject: [PATCH 06/22] Updated to latest stable version of likwid --- collectors/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/collectors/Makefile b/collectors/Makefile index ab47caa..0c637b5 100644 --- a/collectors/Makefile +++ b/collectors/Makefile @@ -8,9 +8,9 @@ ACCESSMODE = direct # if CENTRAL_INSTALL == true ####################################################################### # Path to central installation (if CENTRAL_INSTALL=true) -LIKWID_BASE=/apps/likwid/5.2.0 -# LIKWID version (should be same major version as central installation, 5.1.x) -LIKWID_VERSION = 5.2.0 +LIKWID_BASE=/apps/likwid/5.2.1 +# LIKWID version (should be same major version as central installation, 5.2.x) +LIKWID_VERSION = 5.2.1 ####################################################################### # if CENTRAL_INSTALL == false and ACCESSMODE == accessdaemon From dcb5b4add5a003b9bc4e70696995f46ad373a3d6 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 16:41:32 +0100 Subject: [PATCH 07/22] Define source code dependencies in Makefile --- Makefile | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index e82685e..f49162e 100644 --- a/Makefile +++ b/Makefile @@ -1,20 +1,27 @@ APP = cc-metric-collector +GOSRC_APP := metric-collector.go +GOSRC_COLLECTORS := $(wildcard collectors/*.go) +GOSRC_SINKS := $(wildcard sinks/*.go) +GOSRC_RECEIVERS := $(wildcard receivers/*.go) +GOSRC := $(GOSRC_APP) $(GOSRC_COLLECTORS) $(GOSRC_SINKS) $(GOSRC_RECEIVERS) +.PHONY: all all: $(APP) -$(APP): metric-collector.go +$(APP): $(GOSRC) make -C collectors go get - go build -o $(APP) metric-collector.go + go build -o $(APP) $(GOSRC_APP) +.PHONY: clean clean: make -C collectors clean rm -f $(APP) +.PHONY: fmt fmt: - go fmt collectors/*.go - go fmt sinks/*.go - go fmt receivers/*.go - go fmt metric-collector.go + go fmt $(GOSRC_COLLECTORS) + go fmt $(GOSRC_SINKS) + go fmt $(GOSRC_RECEIVERS) + go fmt $(GOSRC_APP) -.PHONY: clean From f91150f4ba9db90b038d3414df37cff7d0e488db Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 20 Jan 2022 12:09:22 +0100 Subject: [PATCH 08/22] Add vet and staticcheck make targets --- Makefile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Makefile b/Makefile index f49162e..91a1200 100644 --- a/Makefile +++ b/Makefile @@ -25,3 +25,14 @@ fmt: go fmt $(GOSRC_RECEIVERS) go fmt $(GOSRC_APP) +# Examine Go source code and reports suspicious constructs +.PHONY: vet + go vet ./... + + +# Run linter for the Go programming language. +# Using static analysis, it finds bugs and performance issues, offers simplifications, and enforces style rules +.PHONY: staticcheck +staticcheck: + go install honnef.co/go/tools/cmd/staticcheck@latest + $$(go env GOPATH)/bin/staticcheck ./... From 8860b8d0f76408fc078414f38be0d4311d726d78 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 20 Jan 2022 12:13:50 +0100 Subject: [PATCH 09/22] Add vet and staticcheck make targets --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 91a1200..892bbcc 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ fmt: # Examine Go source code and reports suspicious constructs .PHONY: vet +vet: go vet ./... From 5d263adddec4b23fc6299fa99fc017615ba3b163 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 20 Jan 2022 12:38:52 +0100 Subject: [PATCH 10/22] Avoid go vet warning: struct field tag `json:"..., omitempty"` not compatible with reflect.StructTag.Get: suspicious space in struct tag value --- collectors/cpustatMetric.go | 2 +- collectors/diskstatMetric.go | 2 +- collectors/infinibandMetric.go | 2 +- collectors/loadavgMetric.go | 2 +- collectors/nvidiaMetric.go | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/collectors/cpustatMetric.go b/collectors/cpustatMetric.go index fe31c3c..9e44fa7 100644 --- a/collectors/cpustatMetric.go +++ b/collectors/cpustatMetric.go @@ -13,7 +13,7 @@ import ( const CPUSTATFILE = `/proc/stat` type CpustatCollectorConfig struct { - ExcludeMetrics []string `json:"exclude_metrics, omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` } type CpustatCollector struct { diff --git a/collectors/diskstatMetric.go b/collectors/diskstatMetric.go index e2d2f25..5080ca2 100644 --- a/collectors/diskstatMetric.go +++ b/collectors/diskstatMetric.go @@ -15,7 +15,7 @@ const DISKSTATFILE = `/proc/diskstats` const DISKSTAT_SYSFSPATH = `/sys/block` type DiskstatCollectorConfig struct { - ExcludeMetrics []string `json:"exclude_metrics, omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` } type DiskstatCollector struct { diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index 54c974e..a9552f7 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -20,7 +20,7 @@ const LIDFILE = `/sys/class/infiniband/mlx4_0/ports/1/lid` const PERFQUERY = `/usr/sbin/perfquery` type InfinibandCollectorConfig struct { - ExcludeDevices []string `json:"exclude_devices, omitempty"` + ExcludeDevices []string `json:"exclude_devices,omitempty"` PerfQueryPath string `json:"perfquery_path"` } diff --git a/collectors/loadavgMetric.go b/collectors/loadavgMetric.go index dbccf22..21cf350 100644 --- a/collectors/loadavgMetric.go +++ b/collectors/loadavgMetric.go @@ -12,7 +12,7 @@ import ( const LOADAVGFILE = `/proc/loadavg` type LoadavgCollectorConfig struct { - ExcludeMetrics []string `json:"exclude_metrics, omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` } type LoadavgCollector struct { diff --git a/collectors/nvidiaMetric.go b/collectors/nvidiaMetric.go index 4597610..bd63e2c 100644 --- a/collectors/nvidiaMetric.go +++ b/collectors/nvidiaMetric.go @@ -11,8 +11,8 @@ import ( ) type NvidiaCollectorConfig struct { - ExcludeMetrics []string `json:"exclude_metrics, omitempty"` - ExcludeDevices []string `json:"exclude_devices, omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + ExcludeDevices []string `json:"exclude_devices,omitempty"` } type NvidiaCollector struct { From 0feb880c3b65cd01547de519109d535755f65a47 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 20 Jan 2022 16:32:10 +0100 Subject: [PATCH 11/22] Correct go syntax in README.md --- collectors/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectors/README.md b/collectors/README.md index b5ae4e1..df02dd6 100644 --- a/collectors/README.md +++ b/collectors/README.md @@ -339,7 +339,7 @@ func (m *SampleCollector) Read(interval time.Duration, out *[]lp.MutableMetric) return } // tags for the metric, if type != node use proper type and type-id - tags := map[string][string]{"type" : "node"} + tags := map[string]string{"type" : "node"} // Each metric has exactly one field: value ! value := map[string]interface{}{"value": int(x)} y, err := lp.New("sample_metric", tags, value, time.Now()) From 83b784e6f0315f2835c18c1fb494c150e28418c9 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Fri, 21 Jan 2022 09:59:57 +0100 Subject: [PATCH 12/22] Add CPU frequency collector --- collectors/cpufreqMetric.go | 189 ++++++++++++++++++++++++++++++++++++ metric-collector.go | 12 ++- 2 files changed, 196 insertions(+), 5 deletions(-) create mode 100644 collectors/cpufreqMetric.go diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go new file mode 100644 index 0000000..94f8f4a --- /dev/null +++ b/collectors/cpufreqMetric.go @@ -0,0 +1,189 @@ +package collectors + +import ( + "bufio" + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + lp "github.com/influxdata/line-protocol" + "golang.org/x/sys/unix" +) + +var warnLog *log.Logger = log.New(os.Stderr, "Warning: ", log.LstdFlags) + +// +// readOneLine reads one line from a file. +// It returns ok when file was successfully read. +// In this case text contains the first line of the files contents. +// +func readOneLine(filename string) (text string, ok bool) { + file, err := os.Open(filename) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + ok = scanner.Scan() + text = scanner.Text() + return +} + +type CPUFreqCollectorCPU struct { + // coreID, packageID, num_cores, num_package + tagSet map[string]string + scalingCurFreqFile string +} + +// +// CPUFreqCollector +// a metric collector to measure the current frequency of the CPUs +// as obtained from the hardware (in KHz) +// Only measure on the first hyper thread +// +// See: https://www.kernel.org/doc/html/latest/admin-guide/pm/cpufreq.html +// +type CPUFreqCollector struct { + MetricCollector + config struct { + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + } + cpus []CPUFreqCollectorCPU +} + +func (m *CPUFreqCollector) Init(config []byte) error { + m.name = "CPUFreqCollector" + m.setup() + if len(config) > 0 { + err := json.Unmarshal(config, &m.config) + if err != nil { + return err + } + } + + // Initialize CPU list + m.cpus = make([]CPUFreqCollectorCPU, 0) + + // Loop for all CPU directories + baseDir := "/sys/devices/system/cpu" + globPattern := filepath.Join(baseDir, "cpu[0-9]*") + cpuDirs, err := filepath.Glob(globPattern) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to glob files with pattern %s: %v", globPattern, err) + } + if cpuDirs == nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to find any files with pattern %s", globPattern) + } + + maxPackageID := 0 + maxCoreID := 0 + for _, cpuDir := range cpuDirs { + cpuID := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu") + + // Read thread sibling list + threadSiblingListFile := filepath.Join(cpuDir, "topology", "thread_siblings_list") + threadSiblingList, ok := readOneLine(threadSiblingListFile) + if !ok { + return fmt.Errorf("CPUFreqCollector.Init() unable to read thread siblings list from %s", threadSiblingListFile) + } + + // Read frequency only from first hardware thread + // Ignore Simultaneous Multithreading (SMT) / Hyper-Threading + if strings.Split(threadSiblingList, ",")[0] == cpuID { + // Read package ID + packageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") + packageID, ok := readOneLine(packageIDFile) + if !ok { + return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", packageIDFile) + } + packageID_int, err := strconv.Atoi(packageID) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err) + } + + // Update maxPackageID + if packageID_int > maxPackageID { + maxPackageID = packageID_int + } + + // Read core ID + coreIDFile := filepath.Join(cpuDir, "topology", "core_id") + coreID, ok := readOneLine(coreIDFile) + if !ok { + return fmt.Errorf("CPUFreqCollector.Init() unable to read core ID from %s", coreIDFile) + } + coreID_int, err := strconv.Atoi(coreID) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to convert coreID to int: %v", err) + } + + // Update maxCoreID + if coreID_int > maxCoreID { + maxCoreID = coreID_int + } + + // Check access to current frequency file + scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq") + err = unix.Access(scalingCurFreqFile, unix.R_OK) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to access %s: %v", scalingCurFreqFile, err) + } + + m.cpus = append( + m.cpus, + CPUFreqCollectorCPU{ + tagSet: map[string]string{ + "coreID": strings.TrimSpace(coreID), + "packageID": strings.TrimSpace(packageID), + }, + scalingCurFreqFile: scalingCurFreqFile, + }) + } + } + + // Add num packages and num cores as tags + numPackages := strconv.Itoa(maxPackageID + 1) + numCores := strconv.Itoa(maxCoreID + 1) + for i := range m.cpus { + m.cpus[i].tagSet["num_core"] = numCores + m.cpus[i].tagSet["num_package"] = numPackages + } + + m.init = true + return nil +} + +func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { + if !m.init { + return + } + + for _, cpu := range m.cpus { + // Read current frequency + line, ok := readOneLine(cpu.scalingCurFreqFile) + if !ok { + warnLog.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", cpu.scalingCurFreqFile) + continue + } + cpuFreq, err := strconv.Atoi(line) + if err != nil { + warnLog.Printf("CPUFreqCollector.Read(): Failed to convert CPU frequency '%s': %v", line, err) + continue + } + + value := map[string]interface{}{"value": cpuFreq} + y, err := lp.New("cpufreq", cpu.tagSet, value, time.Now()) + if err == nil { + *out = append(*out, y) + } + } +} + +func (m *CPUFreqCollector) Close() { + m.init = false +} diff --git a/metric-collector.go b/metric-collector.go index 04c221f..90f50c4 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -4,16 +4,17 @@ import ( "encoding/json" "flag" "fmt" - "github.com/ClusterCockpit/cc-metric-collector/collectors" - "github.com/ClusterCockpit/cc-metric-collector/receivers" - "github.com/ClusterCockpit/cc-metric-collector/sinks" - lp "github.com/influxdata/line-protocol" "log" "os" "os/signal" "strings" "sync" "time" + + "github.com/ClusterCockpit/cc-metric-collector/collectors" + "github.com/ClusterCockpit/cc-metric-collector/receivers" + "github.com/ClusterCockpit/cc-metric-collector/sinks" + lp "github.com/influxdata/line-protocol" ) // List of provided collectors. Which collector should be run can be @@ -32,7 +33,8 @@ var Collectors = map[string]collectors.MetricGetter{ "diskstat": &collectors.DiskstatCollector{}, "tempstat": &collectors.TempCollector{}, "ipmistat": &collectors.IpmiCollector{}, - "gpfs": &collectors.GpfsCollector{}, + "gpfs": new(collectors.GpfsCollector), + "cpufreq": new(collectors.CPUFreqCollector), } var Sinks = map[string]sinks.SinkFuncs{ From 5dd2af4e8fcce87f80a2fde5751de7919e2689c0 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Fri, 21 Jan 2022 14:35:52 +0100 Subject: [PATCH 13/22] Avoid staticcheck warning: redundant return statement --- collectors/cpustatMetric.go | 4 ++-- collectors/customCmdMetric.go | 4 ++-- collectors/diskstatMetric.go | 6 +++--- collectors/{gpfs.go => gpfsMetric.go} | 9 +++------ collectors/infinibandMetric.go | 5 +++-- collectors/ipmiMetric.go | 4 ++-- collectors/likwidMetric.go | 6 +++--- collectors/loadavgMetric.go | 4 ++-- collectors/lustreMetric.go | 4 ++-- collectors/memstatMetric.go | 4 ++-- collectors/netstatMetric.go | 4 ++-- collectors/nvidiaMetric.go | 6 +++--- collectors/tempMetric.go | 4 ++-- collectors/topprocsMetric.go | 4 ++-- sinks/stdoutSink.go | 4 +--- 15 files changed, 34 insertions(+), 38 deletions(-) rename collectors/{gpfs.go => gpfsMetric.go} (98%) diff --git a/collectors/cpustatMetric.go b/collectors/cpustatMetric.go index 9e44fa7..64b5842 100644 --- a/collectors/cpustatMetric.go +++ b/collectors/cpustatMetric.go @@ -3,11 +3,12 @@ package collectors import ( "encoding/json" "fmt" - lp "github.com/influxdata/line-protocol" "io/ioutil" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const CPUSTATFILE = `/proc/stat` @@ -88,5 +89,4 @@ func (m *CpustatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) func (m *CpustatCollector) Close() { m.init = false - return } diff --git a/collectors/customCmdMetric.go b/collectors/customCmdMetric.go index 547bb87..bbafc2d 100644 --- a/collectors/customCmdMetric.go +++ b/collectors/customCmdMetric.go @@ -3,12 +3,13 @@ package collectors import ( "encoding/json" "errors" - lp "github.com/influxdata/line-protocol" "io/ioutil" "log" "os/exec" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const CUSTOMCMDPATH = `/home/unrz139/Work/cc-metric-collector/collectors/custom` @@ -126,5 +127,4 @@ func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetri func (m *CustomCmdCollector) Close() { m.init = false - return } diff --git a/collectors/diskstatMetric.go b/collectors/diskstatMetric.go index 5080ca2..4cbd3c6 100644 --- a/collectors/diskstatMetric.go +++ b/collectors/diskstatMetric.go @@ -1,8 +1,10 @@ package collectors import ( - lp "github.com/influxdata/line-protocol" "io/ioutil" + + lp "github.com/influxdata/line-protocol" + // "log" "encoding/json" "errors" @@ -107,10 +109,8 @@ func (m *DiskstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric } } } - return } func (m *DiskstatCollector) Close() { m.init = false - return } diff --git a/collectors/gpfs.go b/collectors/gpfsMetric.go similarity index 98% rename from collectors/gpfs.go rename to collectors/gpfsMetric.go index db8a0d0..fbf3a63 100644 --- a/collectors/gpfs.go +++ b/collectors/gpfsMetric.go @@ -17,13 +17,11 @@ import ( lp "github.com/influxdata/line-protocol" ) -type GpfsCollectorConfig struct { - Mmpmon string `json:"mmpmon"` -} - type GpfsCollector struct { MetricCollector - config GpfsCollectorConfig + config struct { + Mmpmon string `json:"mmpmon"` + } } func (m *GpfsCollector) Init(config []byte) error { @@ -297,5 +295,4 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { func (m *GpfsCollector) Close() { m.init = false - return } diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index a9552f7..6e14251 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -2,10 +2,12 @@ package collectors import ( "fmt" - lp "github.com/influxdata/line-protocol" "io/ioutil" "log" "os/exec" + + lp "github.com/influxdata/line-protocol" + // "os" "encoding/json" "errors" @@ -278,5 +280,4 @@ func (m *InfinibandCollector) Read(interval time.Duration, out *[]lp.MutableMetr func (m *InfinibandCollector) Close() { m.init = false - return } diff --git a/collectors/ipmiMetric.go b/collectors/ipmiMetric.go index d28a134..3179148 100644 --- a/collectors/ipmiMetric.go +++ b/collectors/ipmiMetric.go @@ -3,13 +3,14 @@ package collectors import ( "encoding/json" "errors" - lp "github.com/influxdata/line-protocol" "log" "os" "os/exec" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const IPMITOOL_PATH = `/usr/bin/ipmitool` @@ -133,5 +134,4 @@ func (m *IpmiCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { func (m *IpmiCollector) Close() { m.init = false - return } diff --git a/collectors/likwidMetric.go b/collectors/likwidMetric.go index 34e2364..454a593 100644 --- a/collectors/likwidMetric.go +++ b/collectors/likwidMetric.go @@ -12,8 +12,6 @@ import ( "encoding/json" "errors" "fmt" - lp "github.com/influxdata/line-protocol" - "gopkg.in/Knetic/govaluate.v2" "io/ioutil" "log" "math" @@ -22,6 +20,9 @@ import ( "strings" "time" "unsafe" + + lp "github.com/influxdata/line-protocol" + "gopkg.in/Knetic/govaluate.v2" ) type LikwidCollectorMetricConfig struct { @@ -303,5 +304,4 @@ func (m *LikwidCollector) Close() { C.perfmon_finalize() C.topology_finalize() } - return } diff --git a/collectors/loadavgMetric.go b/collectors/loadavgMetric.go index 21cf350..1ecaea5 100644 --- a/collectors/loadavgMetric.go +++ b/collectors/loadavgMetric.go @@ -2,11 +2,12 @@ package collectors import ( "encoding/json" - lp "github.com/influxdata/line-protocol" "io/ioutil" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const LOADAVGFILE = `/proc/loadavg` @@ -76,5 +77,4 @@ func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric) func (m *LoadavgCollector) Close() { m.init = false - return } diff --git a/collectors/lustreMetric.go b/collectors/lustreMetric.go index e7bb7a6..d77ac09 100644 --- a/collectors/lustreMetric.go +++ b/collectors/lustreMetric.go @@ -3,12 +3,13 @@ package collectors import ( "encoding/json" "errors" - lp "github.com/influxdata/line-protocol" "io/ioutil" "log" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const LUSTREFILE = `/proc/fs/lustre/llite/lnec-XXXXXX/stats` @@ -102,5 +103,4 @@ func (m *LustreCollector) Read(interval time.Duration, out *[]lp.MutableMetric) func (m *LustreCollector) Close() { m.init = false - return } diff --git a/collectors/memstatMetric.go b/collectors/memstatMetric.go index 91987bb..17db13e 100644 --- a/collectors/memstatMetric.go +++ b/collectors/memstatMetric.go @@ -4,12 +4,13 @@ import ( "encoding/json" "errors" "fmt" - lp "github.com/influxdata/line-protocol" "io/ioutil" "log" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const MEMSTATFILE = `/proc/meminfo` @@ -125,5 +126,4 @@ func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) func (m *MemstatCollector) Close() { m.init = false - return } diff --git a/collectors/netstatMetric.go b/collectors/netstatMetric.go index 659b89f..a273de1 100644 --- a/collectors/netstatMetric.go +++ b/collectors/netstatMetric.go @@ -2,12 +2,13 @@ package collectors import ( "encoding/json" - lp "github.com/influxdata/line-protocol" "io/ioutil" "log" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const NETSTATFILE = `/proc/net/dev` @@ -84,5 +85,4 @@ func (m *NetstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) func (m *NetstatCollector) Close() { m.init = false - return } diff --git a/collectors/nvidiaMetric.go b/collectors/nvidiaMetric.go index bd63e2c..31118c2 100644 --- a/collectors/nvidiaMetric.go +++ b/collectors/nvidiaMetric.go @@ -4,10 +4,11 @@ import ( "encoding/json" "errors" "fmt" - "github.com/NVIDIA/go-nvml/pkg/nvml" - lp "github.com/influxdata/line-protocol" "log" "time" + + "github.com/NVIDIA/go-nvml/pkg/nvml" + lp "github.com/influxdata/line-protocol" ) type NvidiaCollectorConfig struct { @@ -267,5 +268,4 @@ func (m *NvidiaCollector) Close() { nvml.Shutdown() m.init = false } - return } diff --git a/collectors/tempMetric.go b/collectors/tempMetric.go index 3665025..b074d78 100644 --- a/collectors/tempMetric.go +++ b/collectors/tempMetric.go @@ -3,13 +3,14 @@ package collectors import ( "encoding/json" "fmt" - lp "github.com/influxdata/line-protocol" "io/ioutil" "os" "path/filepath" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const HWMON_PATH = `/sys/class/hwmon` @@ -105,5 +106,4 @@ func (m *TempCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { func (m *TempCollector) Close() { m.init = false - return } diff --git a/collectors/topprocsMetric.go b/collectors/topprocsMetric.go index a1bf989..e1b31ee 100644 --- a/collectors/topprocsMetric.go +++ b/collectors/topprocsMetric.go @@ -4,11 +4,12 @@ import ( "encoding/json" "errors" "fmt" - lp "github.com/influxdata/line-protocol" "log" "os/exec" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const MAX_NUM_PROCS = 10 @@ -74,5 +75,4 @@ func (m *TopProcsCollector) Read(interval time.Duration, out *[]lp.MutableMetric func (m *TopProcsCollector) Close() { m.init = false - return } diff --git a/sinks/stdoutSink.go b/sinks/stdoutSink.go index 34561e0..8016fcb 100644 --- a/sinks/stdoutSink.go +++ b/sinks/stdoutSink.go @@ -59,6 +59,4 @@ func (s *StdoutSink) Flush() error { return nil } -func (s *StdoutSink) Close() { - return -} +func (s *StdoutSink) Close() {} From 25b9268b24f3458dab0128a8af36df217039528e Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Fri, 21 Jan 2022 15:20:53 +0100 Subject: [PATCH 14/22] Avoid staticcheck warning: unnecessary assignment to the blank identifier --- collectors/likwidMetric.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/collectors/likwidMetric.go b/collectors/likwidMetric.go index 454a593..45fe68c 100644 --- a/collectors/likwidMetric.go +++ b/collectors/likwidMetric.go @@ -152,7 +152,7 @@ func (m *LikwidCollector) Init(config []byte) error { C.free(unsafe.Pointer(cstr)) m.results[i] = make(map[int]map[string]interface{}) m.mresults[i] = make(map[int]map[string]float64) - for tid, _ := range m.cpulist { + for tid := range m.cpulist { m.results[i][tid] = make(map[string]interface{}) m.mresults[i][tid] = make(map[string]float64) m.gmresults[tid] = make(map[string]float64) @@ -194,7 +194,7 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) continue } var eidx C.int - for tid, _ := range m.cpulist { + for tid := range m.cpulist { for eidx = 0; int(eidx) < len(evset.Events); eidx++ { ctr := C.perfmon_getCounterName(gid, eidx) gctr := C.GoString(ctr) @@ -220,7 +220,7 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) } for _, metric := range m.config.Metrics { - for tid, _ := range m.cpulist { + for tid := range m.cpulist { var params map[string]interface{} expression, err := govaluate.NewEvaluableExpression(metric.Calc) if err != nil { @@ -228,7 +228,7 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) continue } params = make(map[string]interface{}) - for j, _ := range m.groups { + for j := range m.groups { for mname, mres := range m.mresults[j][tid] { params[mname] = mres } @@ -241,7 +241,7 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) m.gmresults[tid][metric.Name] = float64(result.(float64)) } } - for i, _ := range m.groups { + for i := range m.groups { evset := m.config.Eventsets[i] for _, metric := range evset.Metrics { _, skip := stringArrayContains(m.config.ExcludeMetrics, metric.Name) From daa7c6bf99135c66cbe86df46595d2b98d9e1c20 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Mon, 24 Jan 2022 11:31:45 +0100 Subject: [PATCH 15/22] Simplified code --- collectors/cpufreqMetric.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index 94f8f4a..f5a10bc 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -15,8 +15,6 @@ import ( "golang.org/x/sys/unix" ) -var warnLog *log.Logger = log.New(os.Stderr, "Warning: ", log.LstdFlags) - // // readOneLine reads one line from a file. // It returns ok when file was successfully read. @@ -138,7 +136,8 @@ func (m *CPUFreqCollector) Init(config []byte) error { m.cpus, CPUFreqCollectorCPU{ tagSet: map[string]string{ - "coreID": strings.TrimSpace(coreID), + "type": "cpu", + "type-id": strings.TrimSpace(coreID), "packageID": strings.TrimSpace(packageID), }, scalingCurFreqFile: scalingCurFreqFile, @@ -150,8 +149,9 @@ func (m *CPUFreqCollector) Init(config []byte) error { numPackages := strconv.Itoa(maxPackageID + 1) numCores := strconv.Itoa(maxCoreID + 1) for i := range m.cpus { - m.cpus[i].tagSet["num_core"] = numCores - m.cpus[i].tagSet["num_package"] = numPackages + c := &m.cpus[i] + c.tagSet["num_core"] = numCores + c.tagSet["num_package"] = numPackages } m.init = true @@ -163,21 +163,23 @@ func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) return } - for _, cpu := range m.cpus { + now := time.Now() + for i := range m.cpus { + cpu := &m.cpus[i] + // Read current frequency line, ok := readOneLine(cpu.scalingCurFreqFile) if !ok { - warnLog.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", cpu.scalingCurFreqFile) + log.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", cpu.scalingCurFreqFile) continue } cpuFreq, err := strconv.Atoi(line) if err != nil { - warnLog.Printf("CPUFreqCollector.Read(): Failed to convert CPU frequency '%s': %v", line, err) + log.Printf("CPUFreqCollector.Read(): Failed to convert CPU frequency '%s': %v", line, err) continue } - value := map[string]interface{}{"value": cpuFreq} - y, err := lp.New("cpufreq", cpu.tagSet, value, time.Now()) + y, err := lp.New("cpufreq", cpu.tagSet, map[string]interface{}{"value": cpuFreq}, now) if err == nil { *out = append(*out, y) } From 8d314ecb19c18353f417e8da38de95a7a6ad9c86 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Mon, 24 Jan 2022 13:10:33 +0100 Subject: [PATCH 16/22] Add CPUFreqCollectorCpuinfo a metric collector to measure the current frequency of the CPUs as obtained from /proc/cpuinfo Only measure on the first hyperthread --- collectors/cpufreqCpuinfoMetric.go | 176 +++++++++++++++++++++++++++++ metric-collector.go | 31 ++--- 2 files changed, 192 insertions(+), 15 deletions(-) create mode 100644 collectors/cpufreqCpuinfoMetric.go diff --git a/collectors/cpufreqCpuinfoMetric.go b/collectors/cpufreqCpuinfoMetric.go new file mode 100644 index 0000000..1658878 --- /dev/null +++ b/collectors/cpufreqCpuinfoMetric.go @@ -0,0 +1,176 @@ +package collectors + +import ( + "bufio" + "fmt" + "log" + "os" + "strconv" + "strings" + "time" + + lp "github.com/influxdata/line-protocol" +) + +// +// CPUFreqCollector +// a metric collector to measure the current frequency of the CPUs +// as obtained from /proc/cpuinfo +// Only measure on the first hyperthread +// +type CPUFreqCpuInfoCollectorTopology struct { + processor string // logical processor number (continuous, starting at 0) + coreID string // socket local core ID + physicalID string // socket / package ID + numPhysicalID string // number of sockets / packages + isHT bool + numNonHT string // number of non hyperthreading processors + tagSet map[string]string +} + +type CPUFreqCpuInfoCollector struct { + MetricCollector + topology []CPUFreqCpuInfoCollectorTopology +} + +func (m *CPUFreqCpuInfoCollector) Init(config []byte) error { + m.name = "CPUFreqCpuInfoCollector" + + const cpuInfoFile = "/proc/cpuinfo" + file, err := os.Open(cpuInfoFile) + if err != nil { + return fmt.Errorf("Failed to open '%s': %v", cpuInfoFile, err) + } + defer file.Close() + + // Collect topology information from file cpuinfo + foundFreq := false + processor := "" + numNonHT := 0 + coreID := "" + physicalID := "" + maxPhysicalID := 0 + m.topology = make([]CPUFreqCpuInfoCollectorTopology, 0) + coreSeenBefore := make(map[string]bool) + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lineSplit := strings.Split(scanner.Text(), ":") + if len(lineSplit) == 2 { + key := strings.TrimSpace(lineSplit[0]) + value := strings.TrimSpace(lineSplit[1]) + switch key { + case "cpu MHz": + // frequency + foundFreq = true + case "processor": + processor = value + case "core id": + coreID = value + case "physical id": + physicalID = value + } + } + + // were all topology information collected? + if foundFreq && + len(processor) > 0 && + len(coreID) > 0 && + len(physicalID) > 0 { + + globalID := physicalID + ":" + coreID + isHT := coreSeenBefore[globalID] + coreSeenBefore[globalID] = true + if !isHT { + // increase number on non hyper thread cores + numNonHT++ + + // increase maximun socket / package ID, when required + physicalIDInt, err := strconv.Atoi(physicalID) + if err != nil { + return fmt.Errorf("Failed to convert physical id to int: %v", err) + } + if physicalIDInt > maxPhysicalID { + maxPhysicalID = physicalIDInt + } + } + + // store collected topology information + m.topology = append( + m.topology, + CPUFreqCpuInfoCollectorTopology{ + processor: processor, + coreID: coreID, + physicalID: physicalID, + isHT: isHT, + }) + + // reset topology information + foundFreq = false + processor = "" + coreID = "" + physicalID = "" + } + } + + numPhysicalID := fmt.Sprint(maxPhysicalID + 1) + numNonHTString := fmt.Sprint(numNonHT) + for i := range m.topology { + t := &m.topology[i] + t.numPhysicalID = numPhysicalID + t.numNonHT = numNonHTString + t.tagSet = map[string]string{ + "type": "cpu", + "type-id": t.processor, + "num_core": t.numNonHT, + "package_id": t.physicalID, + "num_package": t.numPhysicalID, + } + } + + m.init = true + return nil +} + +func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { + if !m.init { + return + } + const cpuInfoFile = "/proc/cpuinfo" + file, err := os.Open(cpuInfoFile) + if err != nil { + log.Printf("Failed to open '%s': %v", cpuInfoFile, err) + return + } + defer file.Close() + + processorCounter := 0 + now := time.Now() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lineSplit := strings.Split(scanner.Text(), ":") + if len(lineSplit) == 2 { + key := strings.TrimSpace(lineSplit[0]) + + // frequency + if key == "cpu MHz" { + t := &m.topology[processorCounter] + if !t.isHT { + value, err := strconv.ParseFloat(strings.TrimSpace(lineSplit[1]), 64) + if err != nil { + log.Printf("Failed to convert cpu MHz to float: %v", err) + return + } + y, err := lp.New("cpufreq", t.tagSet, map[string]interface{}{"value": value}, now) + if err == nil { + *out = append(*out, y) + } + } + processorCounter++ + } + } + } +} + +func (m *CPUFreqCpuInfoCollector) Close() { + m.init = false +} diff --git a/metric-collector.go b/metric-collector.go index 90f50c4..02a2b21 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -20,21 +20,22 @@ import ( // List of provided collectors. Which collector should be run can be // configured at 'collectors' list in 'config.json'. var Collectors = map[string]collectors.MetricGetter{ - "likwid": &collectors.LikwidCollector{}, - "loadavg": &collectors.LoadavgCollector{}, - "memstat": &collectors.MemstatCollector{}, - "netstat": &collectors.NetstatCollector{}, - "ibstat": &collectors.InfinibandCollector{}, - "lustrestat": &collectors.LustreCollector{}, - "cpustat": &collectors.CpustatCollector{}, - "topprocs": &collectors.TopProcsCollector{}, - "nvidia": &collectors.NvidiaCollector{}, - "customcmd": &collectors.CustomCmdCollector{}, - "diskstat": &collectors.DiskstatCollector{}, - "tempstat": &collectors.TempCollector{}, - "ipmistat": &collectors.IpmiCollector{}, - "gpfs": new(collectors.GpfsCollector), - "cpufreq": new(collectors.CPUFreqCollector), + "likwid": &collectors.LikwidCollector{}, + "loadavg": &collectors.LoadavgCollector{}, + "memstat": &collectors.MemstatCollector{}, + "netstat": &collectors.NetstatCollector{}, + "ibstat": &collectors.InfinibandCollector{}, + "lustrestat": &collectors.LustreCollector{}, + "cpustat": &collectors.CpustatCollector{}, + "topprocs": &collectors.TopProcsCollector{}, + "nvidia": &collectors.NvidiaCollector{}, + "customcmd": &collectors.CustomCmdCollector{}, + "diskstat": &collectors.DiskstatCollector{}, + "tempstat": &collectors.TempCollector{}, + "ipmistat": &collectors.IpmiCollector{}, + "gpfs": new(collectors.GpfsCollector), + "cpufreq": new(collectors.CPUFreqCollector), + "cpufreq_cpuinfo": new(collectors.CPUFreqCpuInfoCollector), } var Sinks = map[string]sinks.SinkFuncs{ From 7953629940413ade2d9d708f9afb1d7e1f910720 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Mon, 24 Jan 2022 15:55:15 +0100 Subject: [PATCH 17/22] Update GitHub actions --- .github/ci-config.json | 23 +++++++++++++---------- .github/workflows/runonce.yml | 5 ++++- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/.github/ci-config.json b/.github/ci-config.json index b3fbff1..402388d 100644 --- a/.github/ci-config.json +++ b/.github/ci-config.json @@ -21,7 +21,10 @@ "topprocs", "nvidia", "diskstat", - "ipmistat" + "ipmistat", + "gpfs", + "cpufreq", + "cpufreq_cpuinfo" ], "default_tags": { "cluster": "testcluster" @@ -30,20 +33,20 @@ "type": "none" }, "collect_config": { - "topprocs" : { + "topprocs": { "num_procs": 2 - }, + }, "tempstat": { "tag_override": { - "hwmon0" : { - "type" : "socket", - "type-id" : "0" + "hwmon0": { + "type": "socket", + "type-id": "0" }, - "hwmon1" : { - "type" : "socket", - "type-id" : "1" + "hwmon1": { + "type": "socket", + "type-id": "1" } } } } -} +} \ No newline at end of file diff --git a/.github/workflows/runonce.yml b/.github/workflows/runonce.yml index 8efc70a..194710f 100644 --- a/.github/workflows/runonce.yml +++ b/.github/workflows/runonce.yml @@ -7,8 +7,11 @@ jobs: steps: - uses: actions/checkout@v2 + # See: https://github.com/marketplace/actions/setup-go-environment - name: Setup Golang - uses: actions/setup-go@v2.1.4 + uses: actions/setup-go@v2.1.5 + with: + go-version: '^1.17.6' - name: Build MetricCollector run: make From 2026c3acd9f050e2958ca719aa9127490b7228a7 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Mon, 24 Jan 2022 20:22:08 +0100 Subject: [PATCH 18/22] Fixed topology detection --- collectors/cpufreqMetric.go | 175 +++++++++++++++++++++--------------- 1 file changed, 101 insertions(+), 74 deletions(-) diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index f5a10bc..ec42445 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -32,10 +32,19 @@ func readOneLine(filename string) (text string, ok bool) { return } -type CPUFreqCollectorCPU struct { - // coreID, packageID, num_cores, num_package - tagSet map[string]string +type CPUFreqCollectorTopology struct { + processor string // logical processor number (continuous, starting at 0) + coreID string // socket local core ID + coreID_int int + physicalID string // socket / package ID + physicalID_int int + numPhysicalID string // number of sockets / packages + numPhysicalID_int int + isHT bool + numNonHT string // number of non hyperthreading processors + numNonHT_int int scalingCurFreqFile string + tagSet map[string]string } // @@ -48,10 +57,10 @@ type CPUFreqCollectorCPU struct { // type CPUFreqCollector struct { MetricCollector - config struct { + topology []CPUFreqCollectorTopology + config struct { ExcludeMetrics []string `json:"exclude_metrics,omitempty"` } - cpus []CPUFreqCollectorCPU } func (m *CPUFreqCollector) Init(config []byte) error { @@ -64,9 +73,6 @@ func (m *CPUFreqCollector) Init(config []byte) error { } } - // Initialize CPU list - m.cpus = make([]CPUFreqCollectorCPU, 0) - // Loop for all CPU directories baseDir := "/sys/devices/system/cpu" globPattern := filepath.Join(baseDir, "cpu[0-9]*") @@ -78,82 +84,98 @@ func (m *CPUFreqCollector) Init(config []byte) error { return fmt.Errorf("CPUFreqCollector.Init() unable to find any files with pattern %s", globPattern) } - maxPackageID := 0 - maxCoreID := 0 + // Initialize CPU topology + m.topology = make([]CPUFreqCollectorTopology, len(cpuDirs)) for _, cpuDir := range cpuDirs { - cpuID := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu") + processor := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu") + processor_int, err := strconv.Atoi(processor) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to convert cpuID to int: %v", err) + } - // Read thread sibling list - threadSiblingListFile := filepath.Join(cpuDir, "topology", "thread_siblings_list") - threadSiblingList, ok := readOneLine(threadSiblingListFile) + // Read package ID + packageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") + packageID, ok := readOneLine(packageIDFile) if !ok { - return fmt.Errorf("CPUFreqCollector.Init() unable to read thread siblings list from %s", threadSiblingListFile) + return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", packageIDFile) + } + packageID_int, err := strconv.Atoi(packageID) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err) } - // Read frequency only from first hardware thread - // Ignore Simultaneous Multithreading (SMT) / Hyper-Threading - if strings.Split(threadSiblingList, ",")[0] == cpuID { - // Read package ID - packageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") - packageID, ok := readOneLine(packageIDFile) - if !ok { - return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", packageIDFile) - } - packageID_int, err := strconv.Atoi(packageID) - if err != nil { - return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err) - } + // Read core ID + coreIDFile := filepath.Join(cpuDir, "topology", "core_id") + coreID, ok := readOneLine(coreIDFile) + if !ok { + return fmt.Errorf("CPUFreqCollector.Init() unable to read core ID from %s", coreIDFile) + } + coreID_int, err := strconv.Atoi(coreID) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to convert coreID to int: %v", err) + } - // Update maxPackageID - if packageID_int > maxPackageID { - maxPackageID = packageID_int - } + // Check access to current frequency file + scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq") + err = unix.Access(scalingCurFreqFile, unix.R_OK) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to access %s: %v", scalingCurFreqFile, err) + } - // Read core ID - coreIDFile := filepath.Join(cpuDir, "topology", "core_id") - coreID, ok := readOneLine(coreIDFile) - if !ok { - return fmt.Errorf("CPUFreqCollector.Init() unable to read core ID from %s", coreIDFile) - } - coreID_int, err := strconv.Atoi(coreID) - if err != nil { - return fmt.Errorf("CPUFreqCollector.Init() unable to convert coreID to int: %v", err) - } + t := &m.topology[processor_int] + t.processor = processor + t.physicalID = packageID + t.physicalID_int = packageID_int + t.coreID = coreID + t.coreID_int = coreID_int + t.scalingCurFreqFile = scalingCurFreqFile + } - // Update maxCoreID - if coreID_int > maxCoreID { - maxCoreID = coreID_int - } + // is processor a hyperthread? + coreSeenBefore := make(map[string]bool) + for i := range m.topology { + t := &m.topology[i] - // Check access to current frequency file - scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq") - err = unix.Access(scalingCurFreqFile, unix.R_OK) - if err != nil { - return fmt.Errorf("CPUFreqCollector.Init() unable to access %s: %v", scalingCurFreqFile, err) - } + globalID := t.physicalID + ":" + t.coreID + t.isHT = coreSeenBefore[globalID] + coreSeenBefore[globalID] = true + } - m.cpus = append( - m.cpus, - CPUFreqCollectorCPU{ - tagSet: map[string]string{ - "type": "cpu", - "type-id": strings.TrimSpace(coreID), - "packageID": strings.TrimSpace(packageID), - }, - scalingCurFreqFile: scalingCurFreqFile, - }) + // number of non hyper thread cores and packages / sockets + numNonHT_int := 0 + maxPhysicalID := 0 + for i := range m.topology { + t := &m.topology[i] + + // Update maxPackageID + if t.physicalID_int > maxPhysicalID { + maxPhysicalID = t.physicalID_int + } + + if !t.isHT { + numNonHT_int++ } } - // Add num packages and num cores as tags - numPackages := strconv.Itoa(maxPackageID + 1) - numCores := strconv.Itoa(maxCoreID + 1) - for i := range m.cpus { - c := &m.cpus[i] - c.tagSet["num_core"] = numCores - c.tagSet["num_package"] = numPackages + numPhysicalID_int := maxPhysicalID + 1 + numPhysicalID := fmt.Sprint(numPhysicalID_int) + numNonHT := fmt.Sprint(numNonHT_int) + for i := range m.topology { + t := &m.topology[i] + t.numPhysicalID = numPhysicalID + t.numPhysicalID_int = numPhysicalID_int + t.numNonHT = numNonHT + t.numNonHT_int = numNonHT_int + t.tagSet = map[string]string{ + "type": "cpu", + "type-id": t.processor, + "num_core": t.numNonHT, + "package_id": t.physicalID, + "num_package": t.numPhysicalID, + } } + fmt.Printf("%+v\n", m.topology) m.init = true return nil } @@ -164,13 +186,18 @@ func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) } now := time.Now() - for i := range m.cpus { - cpu := &m.cpus[i] + for i := range m.topology { + t := &m.topology[i] + + // skip hyperthreads + if t.isHT { + continue + } // Read current frequency - line, ok := readOneLine(cpu.scalingCurFreqFile) + line, ok := readOneLine(t.scalingCurFreqFile) if !ok { - log.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", cpu.scalingCurFreqFile) + log.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", t.scalingCurFreqFile) continue } cpuFreq, err := strconv.Atoi(line) @@ -179,7 +206,7 @@ func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) continue } - y, err := lp.New("cpufreq", cpu.tagSet, map[string]interface{}{"value": cpuFreq}, now) + y, err := lp.New("cpufreq", t.tagSet, map[string]interface{}{"value": cpuFreq}, now) if err == nil { *out = append(*out, y) } From be8c92676a4d2532eb848a019a821b106e6e4951 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Mon, 24 Jan 2022 22:03:13 +0100 Subject: [PATCH 19/22] Refactoring --- collectors/cpufreqMetric.go | 51 ++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index ec42445..35e64ac 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -33,18 +33,18 @@ func readOneLine(filename string) (text string, ok bool) { } type CPUFreqCollectorTopology struct { - processor string // logical processor number (continuous, starting at 0) - coreID string // socket local core ID - coreID_int int - physicalID string // socket / package ID - physicalID_int int - numPhysicalID string // number of sockets / packages - numPhysicalID_int int - isHT bool - numNonHT string // number of non hyperthreading processors - numNonHT_int int - scalingCurFreqFile string - tagSet map[string]string + processor string // logical processor number (continuous, starting at 0) + coreID string // socket local core ID + coreID_int int + physicalPackageID string // socket / package ID + physicalPackageID_int int + numPhysicalPackages string // number of sockets / packages + numPhysicalPackages_int int + isHT bool + numNonHT string // number of non hyperthreading processors + numNonHT_int int + scalingCurFreqFile string + tagSet map[string]string } // @@ -94,12 +94,12 @@ func (m *CPUFreqCollector) Init(config []byte) error { } // Read package ID - packageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") - packageID, ok := readOneLine(packageIDFile) + physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") + physicalPackageID, ok := readOneLine(physicalPackageIDFile) if !ok { - return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", packageIDFile) + return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", physicalPackageIDFile) } - packageID_int, err := strconv.Atoi(packageID) + physicalPackageID_int, err := strconv.Atoi(physicalPackageID) if err != nil { return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err) } @@ -124,8 +124,8 @@ func (m *CPUFreqCollector) Init(config []byte) error { t := &m.topology[processor_int] t.processor = processor - t.physicalID = packageID - t.physicalID_int = packageID_int + t.physicalPackageID = physicalPackageID + t.physicalPackageID_int = physicalPackageID_int t.coreID = coreID t.coreID_int = coreID_int t.scalingCurFreqFile = scalingCurFreqFile @@ -136,7 +136,7 @@ func (m *CPUFreqCollector) Init(config []byte) error { for i := range m.topology { t := &m.topology[i] - globalID := t.physicalID + ":" + t.coreID + globalID := t.physicalPackageID + ":" + t.coreID t.isHT = coreSeenBefore[globalID] coreSeenBefore[globalID] = true } @@ -148,8 +148,8 @@ func (m *CPUFreqCollector) Init(config []byte) error { t := &m.topology[i] // Update maxPackageID - if t.physicalID_int > maxPhysicalID { - maxPhysicalID = t.physicalID_int + if t.physicalPackageID_int > maxPhysicalID { + maxPhysicalID = t.physicalPackageID_int } if !t.isHT { @@ -162,20 +162,19 @@ func (m *CPUFreqCollector) Init(config []byte) error { numNonHT := fmt.Sprint(numNonHT_int) for i := range m.topology { t := &m.topology[i] - t.numPhysicalID = numPhysicalID - t.numPhysicalID_int = numPhysicalID_int + t.numPhysicalPackages = numPhysicalID + t.numPhysicalPackages_int = numPhysicalID_int t.numNonHT = numNonHT t.numNonHT_int = numNonHT_int t.tagSet = map[string]string{ "type": "cpu", "type-id": t.processor, "num_core": t.numNonHT, - "package_id": t.physicalID, - "num_package": t.numPhysicalID, + "package_id": t.physicalPackageID, + "num_package": t.numPhysicalPackages, } } - fmt.Printf("%+v\n", m.topology) m.init = true return nil } From e095e4f202e2335aba925de1d45dc69a7e2a017e Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Tue, 25 Jan 2022 09:47:24 +0100 Subject: [PATCH 20/22] Refactoring --- collectors/cpufreqCpuinfoMetric.go | 82 +++++++++++++++++------------- collectors/cpufreqMetric.go | 14 ++--- 2 files changed, 55 insertions(+), 41 deletions(-) diff --git a/collectors/cpufreqCpuinfoMetric.go b/collectors/cpufreqCpuinfoMetric.go index 1658878..e8cd0fc 100644 --- a/collectors/cpufreqCpuinfoMetric.go +++ b/collectors/cpufreqCpuinfoMetric.go @@ -19,13 +19,17 @@ import ( // Only measure on the first hyperthread // type CPUFreqCpuInfoCollectorTopology struct { - processor string // logical processor number (continuous, starting at 0) - coreID string // socket local core ID - physicalID string // socket / package ID - numPhysicalID string // number of sockets / packages - isHT bool - numNonHT string // number of non hyperthreading processors - tagSet map[string]string + processor string // logical processor number (continuous, starting at 0) + coreID string // socket local core ID + coreID_int int + physicalPackageID string // socket / package ID + physicalPackageID_int int + numPhysicalPackages string // number of sockets / packages + numPhysicalPackages_int int + isHT bool + numNonHT string // number of non hyperthreading processors + numNonHT_int int + tagSet map[string]string } type CPUFreqCpuInfoCollector struct { @@ -46,10 +50,10 @@ func (m *CPUFreqCpuInfoCollector) Init(config []byte) error { // Collect topology information from file cpuinfo foundFreq := false processor := "" - numNonHT := 0 + numNonHT_int := 0 coreID := "" - physicalID := "" - maxPhysicalID := 0 + physicalPackageID := "" + maxPhysicalPackageID := 0 m.topology = make([]CPUFreqCpuInfoCollectorTopology, 0) coreSeenBefore := make(map[string]bool) scanner := bufio.NewScanner(file) @@ -67,7 +71,7 @@ func (m *CPUFreqCpuInfoCollector) Init(config []byte) error { case "core id": coreID = value case "physical id": - physicalID = value + physicalPackageID = value } } @@ -75,55 +79,65 @@ func (m *CPUFreqCpuInfoCollector) Init(config []byte) error { if foundFreq && len(processor) > 0 && len(coreID) > 0 && - len(physicalID) > 0 { + len(physicalPackageID) > 0 { - globalID := physicalID + ":" + coreID + coreID_int, err := strconv.Atoi(coreID) + if err != nil { + return fmt.Errorf("Unable to convert coreID to int: %v", err) + } + physicalPackageID_int, err := strconv.Atoi(physicalPackageID) + if err != nil { + return fmt.Errorf("Unable to convert physicalPackageID to int: %v", err) + } + + // increase maximun socket / package ID, when required + if physicalPackageID_int > maxPhysicalPackageID { + maxPhysicalPackageID = physicalPackageID_int + } + + globalID := physicalPackageID + ":" + coreID isHT := coreSeenBefore[globalID] coreSeenBefore[globalID] = true if !isHT { // increase number on non hyper thread cores - numNonHT++ - - // increase maximun socket / package ID, when required - physicalIDInt, err := strconv.Atoi(physicalID) - if err != nil { - return fmt.Errorf("Failed to convert physical id to int: %v", err) - } - if physicalIDInt > maxPhysicalID { - maxPhysicalID = physicalIDInt - } + numNonHT_int++ } // store collected topology information m.topology = append( m.topology, CPUFreqCpuInfoCollectorTopology{ - processor: processor, - coreID: coreID, - physicalID: physicalID, - isHT: isHT, + processor: processor, + coreID: coreID, + coreID_int: coreID_int, + physicalPackageID: physicalPackageID, + physicalPackageID_int: physicalPackageID_int, + isHT: isHT, }) // reset topology information foundFreq = false processor = "" coreID = "" - physicalID = "" + physicalPackageID = "" } } - numPhysicalID := fmt.Sprint(maxPhysicalID + 1) - numNonHTString := fmt.Sprint(numNonHT) + numPhysicalPackageID_int := maxPhysicalPackageID + 1 + numPhysicalPackageID := fmt.Sprint(numPhysicalPackageID_int) + numNonHT := fmt.Sprint(numNonHT_int) for i := range m.topology { t := &m.topology[i] - t.numPhysicalID = numPhysicalID - t.numNonHT = numNonHTString + t.numPhysicalPackages = numPhysicalPackageID + t.numPhysicalPackages_int = numPhysicalPackageID_int + t.numNonHT = numNonHT + t.numNonHT_int = numNonHT_int t.tagSet = map[string]string{ "type": "cpu", "type-id": t.processor, "num_core": t.numNonHT, - "package_id": t.physicalID, - "num_package": t.numPhysicalID, + "package_id": t.physicalPackageID, + "num_package": t.numPhysicalPackages, } } diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index 35e64ac..fcab782 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -143,13 +143,13 @@ func (m *CPUFreqCollector) Init(config []byte) error { // number of non hyper thread cores and packages / sockets numNonHT_int := 0 - maxPhysicalID := 0 + maxPhysicalPackageID := 0 for i := range m.topology { t := &m.topology[i] // Update maxPackageID - if t.physicalPackageID_int > maxPhysicalID { - maxPhysicalID = t.physicalPackageID_int + if t.physicalPackageID_int > maxPhysicalPackageID { + maxPhysicalPackageID = t.physicalPackageID_int } if !t.isHT { @@ -157,13 +157,13 @@ func (m *CPUFreqCollector) Init(config []byte) error { } } - numPhysicalID_int := maxPhysicalID + 1 - numPhysicalID := fmt.Sprint(numPhysicalID_int) + numPhysicalPackageID_int := maxPhysicalPackageID + 1 + numPhysicalPackageID := fmt.Sprint(numPhysicalPackageID_int) numNonHT := fmt.Sprint(numNonHT_int) for i := range m.topology { t := &m.topology[i] - t.numPhysicalPackages = numPhysicalID - t.numPhysicalPackages_int = numPhysicalID_int + t.numPhysicalPackages = numPhysicalPackageID + t.numPhysicalPackages_int = numPhysicalPackageID_int t.numNonHT = numNonHT t.numNonHT_int = numNonHT_int t.tagSet = map[string]string{ From df77c3fd60688266466f7bb07aae84e2373ebb86 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Tue, 25 Jan 2022 10:32:08 +0100 Subject: [PATCH 21/22] Avoid vet warning: Println arg list ends with redundant newline --- collectors/infinibandMetric.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index 6e14251..db7c129 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -17,9 +17,10 @@ import ( "time" ) -const IBBASEPATH = `/sys/class/infiniband/` -const LIDFILE = `/sys/class/infiniband/mlx4_0/ports/1/lid` -const PERFQUERY = `/usr/sbin/perfquery` +const ( + IBBASEPATH = `/sys/class/infiniband/` + PERFQUERY = `/usr/sbin/perfquery` +) type InfinibandCollectorConfig struct { ExcludeDevices []string `json:"exclude_devices,omitempty"` @@ -40,12 +41,14 @@ func (m *InfinibandCollector) Help() { fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.") fmt.Println("For each found LIDs the collector calls the 'perfquery' command") fmt.Println("The path to the 'perfquery' command can be configured with the 'perfquery_path' option") - fmt.Println("in the configuration\n") + fmt.Println("in the configuration") + fmt.Println("") fmt.Println("Full configuration object:") fmt.Println("\"ibstat\" : {") fmt.Println(" \"perfquery_path\" : \"path/to/perfquery\" # if omitted, it searches in $PATH") fmt.Println(" \"exclude_devices\" : [\"dev1\"]") - fmt.Println("}\n") + fmt.Println("}") + fmt.Println("") fmt.Println("Metrics:") fmt.Println("- ib_recv") fmt.Println("- ib_xmit") From 222862af322710872a5b88d21d5723a84cce79d8 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Tue, 25 Jan 2022 11:12:06 +0100 Subject: [PATCH 22/22] Avoid vet warning struct field commands has json tag but is not exported --- collectors/customCmdMetric.go | 8 ++++---- collectors/lustreMetric.go | 4 ++-- collectors/topprocsMetric.go | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/collectors/customCmdMetric.go b/collectors/customCmdMetric.go index bbafc2d..e11f4c7 100644 --- a/collectors/customCmdMetric.go +++ b/collectors/customCmdMetric.go @@ -15,8 +15,8 @@ import ( const CUSTOMCMDPATH = `/home/unrz139/Work/cc-metric-collector/collectors/custom` type CustomCmdCollectorConfig struct { - commands []string `json:"commands"` - files []string `json:"files"` + Commands []string `json:"commands"` + Files []string `json:"files"` ExcludeMetrics []string `json:"exclude_metrics"` } @@ -40,7 +40,7 @@ func (m *CustomCmdCollector) Init(config []byte) error { } } m.setup() - for _, c := range m.config.commands { + for _, c := range m.config.Commands { cmdfields := strings.Fields(c) command := exec.Command(cmdfields[0], strings.Join(cmdfields[1:], " ")) command.Wait() @@ -49,7 +49,7 @@ func (m *CustomCmdCollector) Init(config []byte) error { m.commands = append(m.commands, c) } } - for _, f := range m.config.files { + for _, f := range m.config.Files { _, err = ioutil.ReadFile(f) if err == nil { m.files = append(m.files, f) diff --git a/collectors/lustreMetric.go b/collectors/lustreMetric.go index d77ac09..8931f84 100644 --- a/collectors/lustreMetric.go +++ b/collectors/lustreMetric.go @@ -15,7 +15,7 @@ import ( const LUSTREFILE = `/proc/fs/lustre/llite/lnec-XXXXXX/stats` type LustreCollectorConfig struct { - procfiles []string `json:"procfiles"` + Procfiles []string `json:"procfiles"` ExcludeMetrics []string `json:"exclude_metrics"` } @@ -47,7 +47,7 @@ func (m *LustreCollector) Init(config []byte) error { "statfs": {"statfs": 1}, "inode_permission": {"inode_permission": 1}} m.devices = make([]string, 0) - for _, p := range m.config.procfiles { + for _, p := range m.config.Procfiles { _, err := ioutil.ReadFile(p) if err == nil { m.devices = append(m.devices, p) diff --git a/collectors/topprocsMetric.go b/collectors/topprocsMetric.go index e1b31ee..715b8c3 100644 --- a/collectors/topprocsMetric.go +++ b/collectors/topprocsMetric.go @@ -16,7 +16,7 @@ const MAX_NUM_PROCS = 10 const DEFAULT_NUM_PROCS = 2 type TopProcsCollectorConfig struct { - num_procs int `json:"num_procs"` + Num_procs int `json:"num_procs"` } type TopProcsCollector struct { @@ -35,9 +35,9 @@ func (m *TopProcsCollector) Init(config []byte) error { return err } } else { - m.config.num_procs = int(DEFAULT_NUM_PROCS) + m.config.Num_procs = int(DEFAULT_NUM_PROCS) } - if m.config.num_procs <= 0 || m.config.num_procs > MAX_NUM_PROCS { + if m.config.Num_procs <= 0 || m.config.Num_procs > MAX_NUM_PROCS { return errors.New(fmt.Sprintf("num_procs option must be set in 'topprocs' config (range: 1-%d)", MAX_NUM_PROCS)) } m.setup() @@ -64,7 +64,7 @@ func (m *TopProcsCollector) Read(interval time.Duration, out *[]lp.MutableMetric } lines := strings.Split(string(stdout), "\n") - for i := 1; i < m.config.num_procs+1; i++ { + for i := 1; i < m.config.Num_procs+1; i++ { name := fmt.Sprintf("topproc%d", i) y, err := lp.New(name, m.tags, map[string]interface{}{"value": string(lines[i])}, time.Now()) if err == nil {