From 43a8ea683dd398f6d3df9508c93a48c8f08debba Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Thu, 6 Jan 2022 15:25:51 +0100 Subject: [PATCH 01/45] Cast collector measurement duration to seconds. Thanks to KIT --- collectors/likwidMetric.go | 2 +- metric-collector.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/collectors/likwidMetric.go b/collectors/likwidMetric.go index 2fd1129..34e2364 100644 --- a/collectors/likwidMetric.go +++ b/collectors/likwidMetric.go @@ -200,7 +200,7 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) res := C.perfmon_getLastResult(gid, eidx, C.int(tid)) m.results[i][tid][gctr] = float64(res) } - m.results[i][tid]["time"] = float64(interval) + m.results[i][tid]["time"] = interval.Seconds() m.results[i][tid]["inverseClock"] = float64(1.0 / m.basefreq) for _, metric := range evset.Metrics { expression, err := govaluate.NewEvaluableExpression(metric.Calc) diff --git a/metric-collector.go b/metric-collector.go index f6c8f5c..fd3b556 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -281,7 +281,7 @@ func main() { // storage locations for _, c := range config.Collectors { col := Collectors[c] - col.Read(time.Duration(config.Duration), &tmpPoints) + col.Read(time.Duration(config.Duration)*time.Second, &tmpPoints) for { if len(tmpPoints) == 0 { From 11e40c6ee3a11a5d1add7964e37813ec23c512c1 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 10:15:41 +0100 Subject: [PATCH 02/45] Add IB metrics ib_recv_pkts and ib_xmit_pkts --- collectors/infinibandMetric.go | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index 93725d1..54c974e 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -47,6 +47,8 @@ func (m *InfinibandCollector) Help() { fmt.Println("Metrics:") fmt.Println("- ib_recv") fmt.Println("- ib_xmit") + fmt.Println("- ib_recv_pkts") + fmt.Println("- ib_xmit_pkts") } func (m *InfinibandCollector) Init(config []byte) error { @@ -143,6 +145,26 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin } } } + if strings.HasPrefix(line, "PortRcvPkts") || strings.HasPrefix(line, "RcvPkts") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_recv_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + *out = append(*out, y) + } + } + } + if strings.HasPrefix(line, "PortXmitPkts") || strings.HasPrefix(line, "XmtPkts") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_xmit_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + *out = append(*out, y) + } + } + } } return nil } @@ -171,6 +193,28 @@ func DoSysfsRead(dev string, lid string, port string, tags map[string]string, ou } } } + buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_packets", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_recv_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + *out = append(*out, y) + } + } + } + buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_packets", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_xmit_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + *out = append(*out, y) + } + } + } return nil } From b97c5886600c1be0e130481ae9da65f56c268c59 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 14:25:24 +0100 Subject: [PATCH 03/45] Add GPFS / IBM Spectrum Scale collector --- collectors/gpfs.go | 298 ++++++++++++++++++++++++++++++++++++++++++++ metric-collector.go | 1 + 2 files changed, 299 insertions(+) create mode 100644 collectors/gpfs.go diff --git a/collectors/gpfs.go b/collectors/gpfs.go new file mode 100644 index 0000000..14398b4 --- /dev/null +++ b/collectors/gpfs.go @@ -0,0 +1,298 @@ +package collectors + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "log" + "os" + "os/exec" + "os/user" + "strconv" + "strings" + "time" + + lp "github.com/influxdata/line-protocol" +) + +type GpfsCollectorConfig struct { + Mmpmon string `json:"mmpmon"` +} + +type GpfsCollector struct { + MetricCollector + config GpfsCollectorConfig +} + +func (m *GpfsCollector) Init(config []byte) error { + var err error + m.name = "GpfsCollector" + m.setup() + + // Set default mmpmon binary + m.config.Mmpmon = "/usr/lpp/mmfs/bin/mmpmon" + + // Read JSON configuration + if len(config) > 0 { + err = json.Unmarshal(config, &m.config) + if err != nil { + log.Print(err.Error()) + return err + } + } + + // GPFS / IBM Spectrum Scale file system statistics can only be queried by user root + user, err := user.Current() + if err != nil { + return fmt.Errorf("GpfsCollector.Init(): Failed to get current user: %v", err) + } + if user.Uid != "0" { + return fmt.Errorf("GpfsCollector.Init(): GPFS file system statistics can only be queried by user root") + } + + // Check if mmpmon is in executable search path + _, err = exec.LookPath(m.config.Mmpmon) + if err != nil { + return fmt.Errorf("GpfsCollector.Init(): Failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err) + } + + m.init = true + return nil +} + +func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { + if !m.init { + return + } + + // mmpmon: + // -p: generate output that can be parsed + // -s: suppress the prompt on input + // fs_io_s: Displays I/O statistics per mounted file system + cmd := exec.Command(m.config.Mmpmon, "-p", "-s") + cmd.Stdin = strings.NewReader("once fs_io_s\n") + cmdStdout := new(bytes.Buffer) + cmdStderr := new(bytes.Buffer) + cmd.Stdout = cmdStdout + cmd.Stderr = cmdStderr + err := cmd.Run() + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error()) + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode()) + data, _ := ioutil.ReadAll(cmdStderr) + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): command stderr: \"%s\"\n", string(data)) + data, _ = ioutil.ReadAll(cmdStdout) + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): command stdout: \"%s\"\n", string(data)) + return + } + + // Read I/O statistics + scanner := bufio.NewScanner(cmdStdout) + for scanner.Scan() { + lineSplit := strings.Fields(scanner.Text()) + if lineSplit[0] == "_fs_io_s_" { + key_value := make(map[string]string) + for i := 1; i < len(lineSplit); i += 2 { + key_value[lineSplit[i]] = lineSplit[i+1] + } + + // Ignore keys: + // _n_: node IP address, + // _nn_: node name, + // _cl_: cluster name, + // _d_: number of disks + + filesystem, ok := key_value["_fs_"] + if !ok { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to get filesystem name.\n") + continue + } + + // return code + rc, err := strconv.Atoi(key_value["_rc_"]) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert return code: %s\n", err.Error()) + continue + } + if rc != 0 { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Filesystem %s not ok.", filesystem) + continue + } + + // unix epoch in microseconds + timestampInt, err := strconv.ParseInt(key_value["_t_"]+key_value["_tu_"], 10, 64) + timestamp := time.UnixMicro(timestampInt) + if err != nil { + fmt.Fprintf(os.Stderr, + "GpfsCollector.Read(): Failed to convert time stamp '%s': %s\n", + key_value["_t_"]+key_value["_tu_"], err.Error()) + continue + } + + // bytes read + bytesRead, err := strconv.ParseInt(key_value["_br_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, + "GpfsCollector.Read(): Failed to convert bytes read '%s': %s\n", + key_value["_br_"], err.Error()) + continue + } + y, err := lp.New( + "gpfs_bytes_read", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": bytesRead, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // bytes written + bytesWritten, err := strconv.ParseInt(key_value["_bw_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, + "GpfsCollector.Read(): Failed to convert bytes written '%s': %s\n", + key_value["_bw_"], err.Error()) + continue + } + y, err = lp.New( + "gpfs_bytes_written", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": bytesWritten, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // number of opens + numOpens, err := strconv.ParseInt(key_value["_oc_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, + "GpfsCollector.Read(): Failed to convert number of opens '%s': %s\n", + key_value["_oc_"], err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_opens", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numOpens, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // number of closes + numCloses, err := strconv.ParseInt(key_value["_cc_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of closes: %s\n", err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_closes", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numCloses, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // number of reads + numReads, err := strconv.ParseInt(key_value["_rdc_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of reads: %s\n", err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_reads", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numReads, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // number of writes + numWrites, err := strconv.ParseInt(key_value["_wc_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of writes: %s\n", err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_writes", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numWrites, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // number of read directories + numReaddirs, err := strconv.ParseInt(key_value["_dir_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of read directories: %s\n", err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_readdirs", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numReaddirs, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + + // Number of inode updates + numInodeUpdates, err := strconv.ParseInt(key_value["_iu_"], 10, 64) + if err != nil { + fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert Number of inode updates: %s\n", err.Error()) + continue + } + y, err = lp.New( + "gpfs_num_inode_updates", + map[string]string{ + "filesystem": filesystem, + }, + map[string]interface{}{ + "value": numInodeUpdates, + }, + timestamp) + if err == nil { + *out = append(*out, y) + } + } + } +} + +func (m *GpfsCollector) Close() { + m.init = false + return +} diff --git a/metric-collector.go b/metric-collector.go index fd3b556..0b75675 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -32,6 +32,7 @@ var Collectors = map[string]collectors.MetricGetter{ "diskstat": &collectors.DiskstatCollector{}, "tempstat": &collectors.TempCollector{}, "ipmistat": &collectors.IpmiCollector{}, + "gpfs": &collectors.GpfsCollector{}, } var Sinks = map[string]sinks.SinkFuncs{ From 82b10b365e5459f07889a5a2de6a2c2b68e7458c Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 14:47:59 +0100 Subject: [PATCH 04/45] Fix to work with golang 1.16 --- collectors/gpfs.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/collectors/gpfs.go b/collectors/gpfs.go index 14398b4..db8a0d0 100644 --- a/collectors/gpfs.go +++ b/collectors/gpfs.go @@ -121,6 +121,7 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { continue } + /* requires go 1.17 // unix epoch in microseconds timestampInt, err := strconv.ParseInt(key_value["_t_"]+key_value["_tu_"], 10, 64) timestamp := time.UnixMicro(timestampInt) @@ -130,6 +131,8 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { key_value["_t_"]+key_value["_tu_"], err.Error()) continue } + */ + timestamp := time.Now() // bytes read bytesRead, err := strconv.ParseInt(key_value["_br_"], 10, 64) From 7b29a14e1a89ba515935ede4ba308681007a9321 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 15:35:12 +0100 Subject: [PATCH 05/45] Drop domain part of host name --- metric-collector.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metric-collector.go b/metric-collector.go index 0b75675..04c221f 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -166,6 +166,8 @@ func main() { log.Print(err) return } + // Drop domain part of host name + host = strings.SplitN(host, `.`, 2)[0] clicfg := ReadCli() err = CreatePidfile(clicfg["pidfile"]) err = SetLogging(clicfg["logfile"]) From f17719113d3fe84d178b0a13b01e8066308b0df8 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 15:55:15 +0100 Subject: [PATCH 06/45] Updated to latest stable version of likwid --- collectors/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/collectors/Makefile b/collectors/Makefile index ab47caa..0c637b5 100644 --- a/collectors/Makefile +++ b/collectors/Makefile @@ -8,9 +8,9 @@ ACCESSMODE = direct # if CENTRAL_INSTALL == true ####################################################################### # Path to central installation (if CENTRAL_INSTALL=true) -LIKWID_BASE=/apps/likwid/5.2.0 -# LIKWID version (should be same major version as central installation, 5.1.x) -LIKWID_VERSION = 5.2.0 +LIKWID_BASE=/apps/likwid/5.2.1 +# LIKWID version (should be same major version as central installation, 5.2.x) +LIKWID_VERSION = 5.2.1 ####################################################################### # if CENTRAL_INSTALL == false and ACCESSMODE == accessdaemon From dcb5b4add5a003b9bc4e70696995f46ad373a3d6 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 19 Jan 2022 16:41:32 +0100 Subject: [PATCH 07/45] Define source code dependencies in Makefile --- Makefile | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index e82685e..f49162e 100644 --- a/Makefile +++ b/Makefile @@ -1,20 +1,27 @@ APP = cc-metric-collector +GOSRC_APP := metric-collector.go +GOSRC_COLLECTORS := $(wildcard collectors/*.go) +GOSRC_SINKS := $(wildcard sinks/*.go) +GOSRC_RECEIVERS := $(wildcard receivers/*.go) +GOSRC := $(GOSRC_APP) $(GOSRC_COLLECTORS) $(GOSRC_SINKS) $(GOSRC_RECEIVERS) +.PHONY: all all: $(APP) -$(APP): metric-collector.go +$(APP): $(GOSRC) make -C collectors go get - go build -o $(APP) metric-collector.go + go build -o $(APP) $(GOSRC_APP) +.PHONY: clean clean: make -C collectors clean rm -f $(APP) +.PHONY: fmt fmt: - go fmt collectors/*.go - go fmt sinks/*.go - go fmt receivers/*.go - go fmt metric-collector.go + go fmt $(GOSRC_COLLECTORS) + go fmt $(GOSRC_SINKS) + go fmt $(GOSRC_RECEIVERS) + go fmt $(GOSRC_APP) -.PHONY: clean From f91150f4ba9db90b038d3414df37cff7d0e488db Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 20 Jan 2022 12:09:22 +0100 Subject: [PATCH 08/45] Add vet and staticcheck make targets --- Makefile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Makefile b/Makefile index f49162e..91a1200 100644 --- a/Makefile +++ b/Makefile @@ -25,3 +25,14 @@ fmt: go fmt $(GOSRC_RECEIVERS) go fmt $(GOSRC_APP) +# Examine Go source code and reports suspicious constructs +.PHONY: vet + go vet ./... + + +# Run linter for the Go programming language. +# Using static analysis, it finds bugs and performance issues, offers simplifications, and enforces style rules +.PHONY: staticcheck +staticcheck: + go install honnef.co/go/tools/cmd/staticcheck@latest + $$(go env GOPATH)/bin/staticcheck ./... From 8860b8d0f76408fc078414f38be0d4311d726d78 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 20 Jan 2022 12:13:50 +0100 Subject: [PATCH 09/45] Add vet and staticcheck make targets --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 91a1200..892bbcc 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ fmt: # Examine Go source code and reports suspicious constructs .PHONY: vet +vet: go vet ./... From 5d263adddec4b23fc6299fa99fc017615ba3b163 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 20 Jan 2022 12:38:52 +0100 Subject: [PATCH 10/45] Avoid go vet warning: struct field tag `json:"..., omitempty"` not compatible with reflect.StructTag.Get: suspicious space in struct tag value --- collectors/cpustatMetric.go | 2 +- collectors/diskstatMetric.go | 2 +- collectors/infinibandMetric.go | 2 +- collectors/loadavgMetric.go | 2 +- collectors/nvidiaMetric.go | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/collectors/cpustatMetric.go b/collectors/cpustatMetric.go index fe31c3c..9e44fa7 100644 --- a/collectors/cpustatMetric.go +++ b/collectors/cpustatMetric.go @@ -13,7 +13,7 @@ import ( const CPUSTATFILE = `/proc/stat` type CpustatCollectorConfig struct { - ExcludeMetrics []string `json:"exclude_metrics, omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` } type CpustatCollector struct { diff --git a/collectors/diskstatMetric.go b/collectors/diskstatMetric.go index e2d2f25..5080ca2 100644 --- a/collectors/diskstatMetric.go +++ b/collectors/diskstatMetric.go @@ -15,7 +15,7 @@ const DISKSTATFILE = `/proc/diskstats` const DISKSTAT_SYSFSPATH = `/sys/block` type DiskstatCollectorConfig struct { - ExcludeMetrics []string `json:"exclude_metrics, omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` } type DiskstatCollector struct { diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index 54c974e..a9552f7 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -20,7 +20,7 @@ const LIDFILE = `/sys/class/infiniband/mlx4_0/ports/1/lid` const PERFQUERY = `/usr/sbin/perfquery` type InfinibandCollectorConfig struct { - ExcludeDevices []string `json:"exclude_devices, omitempty"` + ExcludeDevices []string `json:"exclude_devices,omitempty"` PerfQueryPath string `json:"perfquery_path"` } diff --git a/collectors/loadavgMetric.go b/collectors/loadavgMetric.go index dbccf22..21cf350 100644 --- a/collectors/loadavgMetric.go +++ b/collectors/loadavgMetric.go @@ -12,7 +12,7 @@ import ( const LOADAVGFILE = `/proc/loadavg` type LoadavgCollectorConfig struct { - ExcludeMetrics []string `json:"exclude_metrics, omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` } type LoadavgCollector struct { diff --git a/collectors/nvidiaMetric.go b/collectors/nvidiaMetric.go index 4597610..bd63e2c 100644 --- a/collectors/nvidiaMetric.go +++ b/collectors/nvidiaMetric.go @@ -11,8 +11,8 @@ import ( ) type NvidiaCollectorConfig struct { - ExcludeMetrics []string `json:"exclude_metrics, omitempty"` - ExcludeDevices []string `json:"exclude_devices, omitempty"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + ExcludeDevices []string `json:"exclude_devices,omitempty"` } type NvidiaCollector struct { From 0feb880c3b65cd01547de519109d535755f65a47 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 20 Jan 2022 16:32:10 +0100 Subject: [PATCH 11/45] Correct go syntax in README.md --- collectors/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectors/README.md b/collectors/README.md index b5ae4e1..df02dd6 100644 --- a/collectors/README.md +++ b/collectors/README.md @@ -339,7 +339,7 @@ func (m *SampleCollector) Read(interval time.Duration, out *[]lp.MutableMetric) return } // tags for the metric, if type != node use proper type and type-id - tags := map[string][string]{"type" : "node"} + tags := map[string]string{"type" : "node"} // Each metric has exactly one field: value ! value := map[string]interface{}{"value": int(x)} y, err := lp.New("sample_metric", tags, value, time.Now()) From 83b784e6f0315f2835c18c1fb494c150e28418c9 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Fri, 21 Jan 2022 09:59:57 +0100 Subject: [PATCH 12/45] Add CPU frequency collector --- collectors/cpufreqMetric.go | 189 ++++++++++++++++++++++++++++++++++++ metric-collector.go | 12 ++- 2 files changed, 196 insertions(+), 5 deletions(-) create mode 100644 collectors/cpufreqMetric.go diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go new file mode 100644 index 0000000..94f8f4a --- /dev/null +++ b/collectors/cpufreqMetric.go @@ -0,0 +1,189 @@ +package collectors + +import ( + "bufio" + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + lp "github.com/influxdata/line-protocol" + "golang.org/x/sys/unix" +) + +var warnLog *log.Logger = log.New(os.Stderr, "Warning: ", log.LstdFlags) + +// +// readOneLine reads one line from a file. +// It returns ok when file was successfully read. +// In this case text contains the first line of the files contents. +// +func readOneLine(filename string) (text string, ok bool) { + file, err := os.Open(filename) + if err != nil { + return + } + defer file.Close() + scanner := bufio.NewScanner(file) + ok = scanner.Scan() + text = scanner.Text() + return +} + +type CPUFreqCollectorCPU struct { + // coreID, packageID, num_cores, num_package + tagSet map[string]string + scalingCurFreqFile string +} + +// +// CPUFreqCollector +// a metric collector to measure the current frequency of the CPUs +// as obtained from the hardware (in KHz) +// Only measure on the first hyper thread +// +// See: https://www.kernel.org/doc/html/latest/admin-guide/pm/cpufreq.html +// +type CPUFreqCollector struct { + MetricCollector + config struct { + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + } + cpus []CPUFreqCollectorCPU +} + +func (m *CPUFreqCollector) Init(config []byte) error { + m.name = "CPUFreqCollector" + m.setup() + if len(config) > 0 { + err := json.Unmarshal(config, &m.config) + if err != nil { + return err + } + } + + // Initialize CPU list + m.cpus = make([]CPUFreqCollectorCPU, 0) + + // Loop for all CPU directories + baseDir := "/sys/devices/system/cpu" + globPattern := filepath.Join(baseDir, "cpu[0-9]*") + cpuDirs, err := filepath.Glob(globPattern) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to glob files with pattern %s: %v", globPattern, err) + } + if cpuDirs == nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to find any files with pattern %s", globPattern) + } + + maxPackageID := 0 + maxCoreID := 0 + for _, cpuDir := range cpuDirs { + cpuID := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu") + + // Read thread sibling list + threadSiblingListFile := filepath.Join(cpuDir, "topology", "thread_siblings_list") + threadSiblingList, ok := readOneLine(threadSiblingListFile) + if !ok { + return fmt.Errorf("CPUFreqCollector.Init() unable to read thread siblings list from %s", threadSiblingListFile) + } + + // Read frequency only from first hardware thread + // Ignore Simultaneous Multithreading (SMT) / Hyper-Threading + if strings.Split(threadSiblingList, ",")[0] == cpuID { + // Read package ID + packageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") + packageID, ok := readOneLine(packageIDFile) + if !ok { + return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", packageIDFile) + } + packageID_int, err := strconv.Atoi(packageID) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err) + } + + // Update maxPackageID + if packageID_int > maxPackageID { + maxPackageID = packageID_int + } + + // Read core ID + coreIDFile := filepath.Join(cpuDir, "topology", "core_id") + coreID, ok := readOneLine(coreIDFile) + if !ok { + return fmt.Errorf("CPUFreqCollector.Init() unable to read core ID from %s", coreIDFile) + } + coreID_int, err := strconv.Atoi(coreID) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to convert coreID to int: %v", err) + } + + // Update maxCoreID + if coreID_int > maxCoreID { + maxCoreID = coreID_int + } + + // Check access to current frequency file + scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq") + err = unix.Access(scalingCurFreqFile, unix.R_OK) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to access %s: %v", scalingCurFreqFile, err) + } + + m.cpus = append( + m.cpus, + CPUFreqCollectorCPU{ + tagSet: map[string]string{ + "coreID": strings.TrimSpace(coreID), + "packageID": strings.TrimSpace(packageID), + }, + scalingCurFreqFile: scalingCurFreqFile, + }) + } + } + + // Add num packages and num cores as tags + numPackages := strconv.Itoa(maxPackageID + 1) + numCores := strconv.Itoa(maxCoreID + 1) + for i := range m.cpus { + m.cpus[i].tagSet["num_core"] = numCores + m.cpus[i].tagSet["num_package"] = numPackages + } + + m.init = true + return nil +} + +func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { + if !m.init { + return + } + + for _, cpu := range m.cpus { + // Read current frequency + line, ok := readOneLine(cpu.scalingCurFreqFile) + if !ok { + warnLog.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", cpu.scalingCurFreqFile) + continue + } + cpuFreq, err := strconv.Atoi(line) + if err != nil { + warnLog.Printf("CPUFreqCollector.Read(): Failed to convert CPU frequency '%s': %v", line, err) + continue + } + + value := map[string]interface{}{"value": cpuFreq} + y, err := lp.New("cpufreq", cpu.tagSet, value, time.Now()) + if err == nil { + *out = append(*out, y) + } + } +} + +func (m *CPUFreqCollector) Close() { + m.init = false +} diff --git a/metric-collector.go b/metric-collector.go index 04c221f..90f50c4 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -4,16 +4,17 @@ import ( "encoding/json" "flag" "fmt" - "github.com/ClusterCockpit/cc-metric-collector/collectors" - "github.com/ClusterCockpit/cc-metric-collector/receivers" - "github.com/ClusterCockpit/cc-metric-collector/sinks" - lp "github.com/influxdata/line-protocol" "log" "os" "os/signal" "strings" "sync" "time" + + "github.com/ClusterCockpit/cc-metric-collector/collectors" + "github.com/ClusterCockpit/cc-metric-collector/receivers" + "github.com/ClusterCockpit/cc-metric-collector/sinks" + lp "github.com/influxdata/line-protocol" ) // List of provided collectors. Which collector should be run can be @@ -32,7 +33,8 @@ var Collectors = map[string]collectors.MetricGetter{ "diskstat": &collectors.DiskstatCollector{}, "tempstat": &collectors.TempCollector{}, "ipmistat": &collectors.IpmiCollector{}, - "gpfs": &collectors.GpfsCollector{}, + "gpfs": new(collectors.GpfsCollector), + "cpufreq": new(collectors.CPUFreqCollector), } var Sinks = map[string]sinks.SinkFuncs{ From 5dd2af4e8fcce87f80a2fde5751de7919e2689c0 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Fri, 21 Jan 2022 14:35:52 +0100 Subject: [PATCH 13/45] Avoid staticcheck warning: redundant return statement --- collectors/cpustatMetric.go | 4 ++-- collectors/customCmdMetric.go | 4 ++-- collectors/diskstatMetric.go | 6 +++--- collectors/{gpfs.go => gpfsMetric.go} | 9 +++------ collectors/infinibandMetric.go | 5 +++-- collectors/ipmiMetric.go | 4 ++-- collectors/likwidMetric.go | 6 +++--- collectors/loadavgMetric.go | 4 ++-- collectors/lustreMetric.go | 4 ++-- collectors/memstatMetric.go | 4 ++-- collectors/netstatMetric.go | 4 ++-- collectors/nvidiaMetric.go | 6 +++--- collectors/tempMetric.go | 4 ++-- collectors/topprocsMetric.go | 4 ++-- sinks/stdoutSink.go | 4 +--- 15 files changed, 34 insertions(+), 38 deletions(-) rename collectors/{gpfs.go => gpfsMetric.go} (98%) diff --git a/collectors/cpustatMetric.go b/collectors/cpustatMetric.go index 9e44fa7..64b5842 100644 --- a/collectors/cpustatMetric.go +++ b/collectors/cpustatMetric.go @@ -3,11 +3,12 @@ package collectors import ( "encoding/json" "fmt" - lp "github.com/influxdata/line-protocol" "io/ioutil" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const CPUSTATFILE = `/proc/stat` @@ -88,5 +89,4 @@ func (m *CpustatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) func (m *CpustatCollector) Close() { m.init = false - return } diff --git a/collectors/customCmdMetric.go b/collectors/customCmdMetric.go index 547bb87..bbafc2d 100644 --- a/collectors/customCmdMetric.go +++ b/collectors/customCmdMetric.go @@ -3,12 +3,13 @@ package collectors import ( "encoding/json" "errors" - lp "github.com/influxdata/line-protocol" "io/ioutil" "log" "os/exec" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const CUSTOMCMDPATH = `/home/unrz139/Work/cc-metric-collector/collectors/custom` @@ -126,5 +127,4 @@ func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetri func (m *CustomCmdCollector) Close() { m.init = false - return } diff --git a/collectors/diskstatMetric.go b/collectors/diskstatMetric.go index 5080ca2..4cbd3c6 100644 --- a/collectors/diskstatMetric.go +++ b/collectors/diskstatMetric.go @@ -1,8 +1,10 @@ package collectors import ( - lp "github.com/influxdata/line-protocol" "io/ioutil" + + lp "github.com/influxdata/line-protocol" + // "log" "encoding/json" "errors" @@ -107,10 +109,8 @@ func (m *DiskstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric } } } - return } func (m *DiskstatCollector) Close() { m.init = false - return } diff --git a/collectors/gpfs.go b/collectors/gpfsMetric.go similarity index 98% rename from collectors/gpfs.go rename to collectors/gpfsMetric.go index db8a0d0..fbf3a63 100644 --- a/collectors/gpfs.go +++ b/collectors/gpfsMetric.go @@ -17,13 +17,11 @@ import ( lp "github.com/influxdata/line-protocol" ) -type GpfsCollectorConfig struct { - Mmpmon string `json:"mmpmon"` -} - type GpfsCollector struct { MetricCollector - config GpfsCollectorConfig + config struct { + Mmpmon string `json:"mmpmon"` + } } func (m *GpfsCollector) Init(config []byte) error { @@ -297,5 +295,4 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { func (m *GpfsCollector) Close() { m.init = false - return } diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index a9552f7..6e14251 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -2,10 +2,12 @@ package collectors import ( "fmt" - lp "github.com/influxdata/line-protocol" "io/ioutil" "log" "os/exec" + + lp "github.com/influxdata/line-protocol" + // "os" "encoding/json" "errors" @@ -278,5 +280,4 @@ func (m *InfinibandCollector) Read(interval time.Duration, out *[]lp.MutableMetr func (m *InfinibandCollector) Close() { m.init = false - return } diff --git a/collectors/ipmiMetric.go b/collectors/ipmiMetric.go index d28a134..3179148 100644 --- a/collectors/ipmiMetric.go +++ b/collectors/ipmiMetric.go @@ -3,13 +3,14 @@ package collectors import ( "encoding/json" "errors" - lp "github.com/influxdata/line-protocol" "log" "os" "os/exec" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const IPMITOOL_PATH = `/usr/bin/ipmitool` @@ -133,5 +134,4 @@ func (m *IpmiCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { func (m *IpmiCollector) Close() { m.init = false - return } diff --git a/collectors/likwidMetric.go b/collectors/likwidMetric.go index 34e2364..454a593 100644 --- a/collectors/likwidMetric.go +++ b/collectors/likwidMetric.go @@ -12,8 +12,6 @@ import ( "encoding/json" "errors" "fmt" - lp "github.com/influxdata/line-protocol" - "gopkg.in/Knetic/govaluate.v2" "io/ioutil" "log" "math" @@ -22,6 +20,9 @@ import ( "strings" "time" "unsafe" + + lp "github.com/influxdata/line-protocol" + "gopkg.in/Knetic/govaluate.v2" ) type LikwidCollectorMetricConfig struct { @@ -303,5 +304,4 @@ func (m *LikwidCollector) Close() { C.perfmon_finalize() C.topology_finalize() } - return } diff --git a/collectors/loadavgMetric.go b/collectors/loadavgMetric.go index 21cf350..1ecaea5 100644 --- a/collectors/loadavgMetric.go +++ b/collectors/loadavgMetric.go @@ -2,11 +2,12 @@ package collectors import ( "encoding/json" - lp "github.com/influxdata/line-protocol" "io/ioutil" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const LOADAVGFILE = `/proc/loadavg` @@ -76,5 +77,4 @@ func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric) func (m *LoadavgCollector) Close() { m.init = false - return } diff --git a/collectors/lustreMetric.go b/collectors/lustreMetric.go index e7bb7a6..d77ac09 100644 --- a/collectors/lustreMetric.go +++ b/collectors/lustreMetric.go @@ -3,12 +3,13 @@ package collectors import ( "encoding/json" "errors" - lp "github.com/influxdata/line-protocol" "io/ioutil" "log" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const LUSTREFILE = `/proc/fs/lustre/llite/lnec-XXXXXX/stats` @@ -102,5 +103,4 @@ func (m *LustreCollector) Read(interval time.Duration, out *[]lp.MutableMetric) func (m *LustreCollector) Close() { m.init = false - return } diff --git a/collectors/memstatMetric.go b/collectors/memstatMetric.go index 91987bb..17db13e 100644 --- a/collectors/memstatMetric.go +++ b/collectors/memstatMetric.go @@ -4,12 +4,13 @@ import ( "encoding/json" "errors" "fmt" - lp "github.com/influxdata/line-protocol" "io/ioutil" "log" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const MEMSTATFILE = `/proc/meminfo` @@ -125,5 +126,4 @@ func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) func (m *MemstatCollector) Close() { m.init = false - return } diff --git a/collectors/netstatMetric.go b/collectors/netstatMetric.go index 659b89f..a273de1 100644 --- a/collectors/netstatMetric.go +++ b/collectors/netstatMetric.go @@ -2,12 +2,13 @@ package collectors import ( "encoding/json" - lp "github.com/influxdata/line-protocol" "io/ioutil" "log" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const NETSTATFILE = `/proc/net/dev` @@ -84,5 +85,4 @@ func (m *NetstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) func (m *NetstatCollector) Close() { m.init = false - return } diff --git a/collectors/nvidiaMetric.go b/collectors/nvidiaMetric.go index bd63e2c..31118c2 100644 --- a/collectors/nvidiaMetric.go +++ b/collectors/nvidiaMetric.go @@ -4,10 +4,11 @@ import ( "encoding/json" "errors" "fmt" - "github.com/NVIDIA/go-nvml/pkg/nvml" - lp "github.com/influxdata/line-protocol" "log" "time" + + "github.com/NVIDIA/go-nvml/pkg/nvml" + lp "github.com/influxdata/line-protocol" ) type NvidiaCollectorConfig struct { @@ -267,5 +268,4 @@ func (m *NvidiaCollector) Close() { nvml.Shutdown() m.init = false } - return } diff --git a/collectors/tempMetric.go b/collectors/tempMetric.go index 3665025..b074d78 100644 --- a/collectors/tempMetric.go +++ b/collectors/tempMetric.go @@ -3,13 +3,14 @@ package collectors import ( "encoding/json" "fmt" - lp "github.com/influxdata/line-protocol" "io/ioutil" "os" "path/filepath" "strconv" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const HWMON_PATH = `/sys/class/hwmon` @@ -105,5 +106,4 @@ func (m *TempCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { func (m *TempCollector) Close() { m.init = false - return } diff --git a/collectors/topprocsMetric.go b/collectors/topprocsMetric.go index a1bf989..e1b31ee 100644 --- a/collectors/topprocsMetric.go +++ b/collectors/topprocsMetric.go @@ -4,11 +4,12 @@ import ( "encoding/json" "errors" "fmt" - lp "github.com/influxdata/line-protocol" "log" "os/exec" "strings" "time" + + lp "github.com/influxdata/line-protocol" ) const MAX_NUM_PROCS = 10 @@ -74,5 +75,4 @@ func (m *TopProcsCollector) Read(interval time.Duration, out *[]lp.MutableMetric func (m *TopProcsCollector) Close() { m.init = false - return } diff --git a/sinks/stdoutSink.go b/sinks/stdoutSink.go index 34561e0..8016fcb 100644 --- a/sinks/stdoutSink.go +++ b/sinks/stdoutSink.go @@ -59,6 +59,4 @@ func (s *StdoutSink) Flush() error { return nil } -func (s *StdoutSink) Close() { - return -} +func (s *StdoutSink) Close() {} From 25b9268b24f3458dab0128a8af36df217039528e Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Fri, 21 Jan 2022 15:20:53 +0100 Subject: [PATCH 14/45] Avoid staticcheck warning: unnecessary assignment to the blank identifier --- collectors/likwidMetric.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/collectors/likwidMetric.go b/collectors/likwidMetric.go index 454a593..45fe68c 100644 --- a/collectors/likwidMetric.go +++ b/collectors/likwidMetric.go @@ -152,7 +152,7 @@ func (m *LikwidCollector) Init(config []byte) error { C.free(unsafe.Pointer(cstr)) m.results[i] = make(map[int]map[string]interface{}) m.mresults[i] = make(map[int]map[string]float64) - for tid, _ := range m.cpulist { + for tid := range m.cpulist { m.results[i][tid] = make(map[string]interface{}) m.mresults[i][tid] = make(map[string]float64) m.gmresults[tid] = make(map[string]float64) @@ -194,7 +194,7 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) continue } var eidx C.int - for tid, _ := range m.cpulist { + for tid := range m.cpulist { for eidx = 0; int(eidx) < len(evset.Events); eidx++ { ctr := C.perfmon_getCounterName(gid, eidx) gctr := C.GoString(ctr) @@ -220,7 +220,7 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) } for _, metric := range m.config.Metrics { - for tid, _ := range m.cpulist { + for tid := range m.cpulist { var params map[string]interface{} expression, err := govaluate.NewEvaluableExpression(metric.Calc) if err != nil { @@ -228,7 +228,7 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) continue } params = make(map[string]interface{}) - for j, _ := range m.groups { + for j := range m.groups { for mname, mres := range m.mresults[j][tid] { params[mname] = mres } @@ -241,7 +241,7 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) m.gmresults[tid][metric.Name] = float64(result.(float64)) } } - for i, _ := range m.groups { + for i := range m.groups { evset := m.config.Eventsets[i] for _, metric := range evset.Metrics { _, skip := stringArrayContains(m.config.ExcludeMetrics, metric.Name) From daa7c6bf99135c66cbe86df46595d2b98d9e1c20 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Mon, 24 Jan 2022 11:31:45 +0100 Subject: [PATCH 15/45] Simplified code --- collectors/cpufreqMetric.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index 94f8f4a..f5a10bc 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -15,8 +15,6 @@ import ( "golang.org/x/sys/unix" ) -var warnLog *log.Logger = log.New(os.Stderr, "Warning: ", log.LstdFlags) - // // readOneLine reads one line from a file. // It returns ok when file was successfully read. @@ -138,7 +136,8 @@ func (m *CPUFreqCollector) Init(config []byte) error { m.cpus, CPUFreqCollectorCPU{ tagSet: map[string]string{ - "coreID": strings.TrimSpace(coreID), + "type": "cpu", + "type-id": strings.TrimSpace(coreID), "packageID": strings.TrimSpace(packageID), }, scalingCurFreqFile: scalingCurFreqFile, @@ -150,8 +149,9 @@ func (m *CPUFreqCollector) Init(config []byte) error { numPackages := strconv.Itoa(maxPackageID + 1) numCores := strconv.Itoa(maxCoreID + 1) for i := range m.cpus { - m.cpus[i].tagSet["num_core"] = numCores - m.cpus[i].tagSet["num_package"] = numPackages + c := &m.cpus[i] + c.tagSet["num_core"] = numCores + c.tagSet["num_package"] = numPackages } m.init = true @@ -163,21 +163,23 @@ func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) return } - for _, cpu := range m.cpus { + now := time.Now() + for i := range m.cpus { + cpu := &m.cpus[i] + // Read current frequency line, ok := readOneLine(cpu.scalingCurFreqFile) if !ok { - warnLog.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", cpu.scalingCurFreqFile) + log.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", cpu.scalingCurFreqFile) continue } cpuFreq, err := strconv.Atoi(line) if err != nil { - warnLog.Printf("CPUFreqCollector.Read(): Failed to convert CPU frequency '%s': %v", line, err) + log.Printf("CPUFreqCollector.Read(): Failed to convert CPU frequency '%s': %v", line, err) continue } - value := map[string]interface{}{"value": cpuFreq} - y, err := lp.New("cpufreq", cpu.tagSet, value, time.Now()) + y, err := lp.New("cpufreq", cpu.tagSet, map[string]interface{}{"value": cpuFreq}, now) if err == nil { *out = append(*out, y) } From 8d314ecb19c18353f417e8da38de95a7a6ad9c86 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Mon, 24 Jan 2022 13:10:33 +0100 Subject: [PATCH 16/45] Add CPUFreqCollectorCpuinfo a metric collector to measure the current frequency of the CPUs as obtained from /proc/cpuinfo Only measure on the first hyperthread --- collectors/cpufreqCpuinfoMetric.go | 176 +++++++++++++++++++++++++++++ metric-collector.go | 31 ++--- 2 files changed, 192 insertions(+), 15 deletions(-) create mode 100644 collectors/cpufreqCpuinfoMetric.go diff --git a/collectors/cpufreqCpuinfoMetric.go b/collectors/cpufreqCpuinfoMetric.go new file mode 100644 index 0000000..1658878 --- /dev/null +++ b/collectors/cpufreqCpuinfoMetric.go @@ -0,0 +1,176 @@ +package collectors + +import ( + "bufio" + "fmt" + "log" + "os" + "strconv" + "strings" + "time" + + lp "github.com/influxdata/line-protocol" +) + +// +// CPUFreqCollector +// a metric collector to measure the current frequency of the CPUs +// as obtained from /proc/cpuinfo +// Only measure on the first hyperthread +// +type CPUFreqCpuInfoCollectorTopology struct { + processor string // logical processor number (continuous, starting at 0) + coreID string // socket local core ID + physicalID string // socket / package ID + numPhysicalID string // number of sockets / packages + isHT bool + numNonHT string // number of non hyperthreading processors + tagSet map[string]string +} + +type CPUFreqCpuInfoCollector struct { + MetricCollector + topology []CPUFreqCpuInfoCollectorTopology +} + +func (m *CPUFreqCpuInfoCollector) Init(config []byte) error { + m.name = "CPUFreqCpuInfoCollector" + + const cpuInfoFile = "/proc/cpuinfo" + file, err := os.Open(cpuInfoFile) + if err != nil { + return fmt.Errorf("Failed to open '%s': %v", cpuInfoFile, err) + } + defer file.Close() + + // Collect topology information from file cpuinfo + foundFreq := false + processor := "" + numNonHT := 0 + coreID := "" + physicalID := "" + maxPhysicalID := 0 + m.topology = make([]CPUFreqCpuInfoCollectorTopology, 0) + coreSeenBefore := make(map[string]bool) + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lineSplit := strings.Split(scanner.Text(), ":") + if len(lineSplit) == 2 { + key := strings.TrimSpace(lineSplit[0]) + value := strings.TrimSpace(lineSplit[1]) + switch key { + case "cpu MHz": + // frequency + foundFreq = true + case "processor": + processor = value + case "core id": + coreID = value + case "physical id": + physicalID = value + } + } + + // were all topology information collected? + if foundFreq && + len(processor) > 0 && + len(coreID) > 0 && + len(physicalID) > 0 { + + globalID := physicalID + ":" + coreID + isHT := coreSeenBefore[globalID] + coreSeenBefore[globalID] = true + if !isHT { + // increase number on non hyper thread cores + numNonHT++ + + // increase maximun socket / package ID, when required + physicalIDInt, err := strconv.Atoi(physicalID) + if err != nil { + return fmt.Errorf("Failed to convert physical id to int: %v", err) + } + if physicalIDInt > maxPhysicalID { + maxPhysicalID = physicalIDInt + } + } + + // store collected topology information + m.topology = append( + m.topology, + CPUFreqCpuInfoCollectorTopology{ + processor: processor, + coreID: coreID, + physicalID: physicalID, + isHT: isHT, + }) + + // reset topology information + foundFreq = false + processor = "" + coreID = "" + physicalID = "" + } + } + + numPhysicalID := fmt.Sprint(maxPhysicalID + 1) + numNonHTString := fmt.Sprint(numNonHT) + for i := range m.topology { + t := &m.topology[i] + t.numPhysicalID = numPhysicalID + t.numNonHT = numNonHTString + t.tagSet = map[string]string{ + "type": "cpu", + "type-id": t.processor, + "num_core": t.numNonHT, + "package_id": t.physicalID, + "num_package": t.numPhysicalID, + } + } + + m.init = true + return nil +} + +func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { + if !m.init { + return + } + const cpuInfoFile = "/proc/cpuinfo" + file, err := os.Open(cpuInfoFile) + if err != nil { + log.Printf("Failed to open '%s': %v", cpuInfoFile, err) + return + } + defer file.Close() + + processorCounter := 0 + now := time.Now() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lineSplit := strings.Split(scanner.Text(), ":") + if len(lineSplit) == 2 { + key := strings.TrimSpace(lineSplit[0]) + + // frequency + if key == "cpu MHz" { + t := &m.topology[processorCounter] + if !t.isHT { + value, err := strconv.ParseFloat(strings.TrimSpace(lineSplit[1]), 64) + if err != nil { + log.Printf("Failed to convert cpu MHz to float: %v", err) + return + } + y, err := lp.New("cpufreq", t.tagSet, map[string]interface{}{"value": value}, now) + if err == nil { + *out = append(*out, y) + } + } + processorCounter++ + } + } + } +} + +func (m *CPUFreqCpuInfoCollector) Close() { + m.init = false +} diff --git a/metric-collector.go b/metric-collector.go index 90f50c4..02a2b21 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -20,21 +20,22 @@ import ( // List of provided collectors. Which collector should be run can be // configured at 'collectors' list in 'config.json'. var Collectors = map[string]collectors.MetricGetter{ - "likwid": &collectors.LikwidCollector{}, - "loadavg": &collectors.LoadavgCollector{}, - "memstat": &collectors.MemstatCollector{}, - "netstat": &collectors.NetstatCollector{}, - "ibstat": &collectors.InfinibandCollector{}, - "lustrestat": &collectors.LustreCollector{}, - "cpustat": &collectors.CpustatCollector{}, - "topprocs": &collectors.TopProcsCollector{}, - "nvidia": &collectors.NvidiaCollector{}, - "customcmd": &collectors.CustomCmdCollector{}, - "diskstat": &collectors.DiskstatCollector{}, - "tempstat": &collectors.TempCollector{}, - "ipmistat": &collectors.IpmiCollector{}, - "gpfs": new(collectors.GpfsCollector), - "cpufreq": new(collectors.CPUFreqCollector), + "likwid": &collectors.LikwidCollector{}, + "loadavg": &collectors.LoadavgCollector{}, + "memstat": &collectors.MemstatCollector{}, + "netstat": &collectors.NetstatCollector{}, + "ibstat": &collectors.InfinibandCollector{}, + "lustrestat": &collectors.LustreCollector{}, + "cpustat": &collectors.CpustatCollector{}, + "topprocs": &collectors.TopProcsCollector{}, + "nvidia": &collectors.NvidiaCollector{}, + "customcmd": &collectors.CustomCmdCollector{}, + "diskstat": &collectors.DiskstatCollector{}, + "tempstat": &collectors.TempCollector{}, + "ipmistat": &collectors.IpmiCollector{}, + "gpfs": new(collectors.GpfsCollector), + "cpufreq": new(collectors.CPUFreqCollector), + "cpufreq_cpuinfo": new(collectors.CPUFreqCpuInfoCollector), } var Sinks = map[string]sinks.SinkFuncs{ From 7953629940413ade2d9d708f9afb1d7e1f910720 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Mon, 24 Jan 2022 15:55:15 +0100 Subject: [PATCH 17/45] Update GitHub actions --- .github/ci-config.json | 23 +++++++++++++---------- .github/workflows/runonce.yml | 5 ++++- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/.github/ci-config.json b/.github/ci-config.json index b3fbff1..402388d 100644 --- a/.github/ci-config.json +++ b/.github/ci-config.json @@ -21,7 +21,10 @@ "topprocs", "nvidia", "diskstat", - "ipmistat" + "ipmistat", + "gpfs", + "cpufreq", + "cpufreq_cpuinfo" ], "default_tags": { "cluster": "testcluster" @@ -30,20 +33,20 @@ "type": "none" }, "collect_config": { - "topprocs" : { + "topprocs": { "num_procs": 2 - }, + }, "tempstat": { "tag_override": { - "hwmon0" : { - "type" : "socket", - "type-id" : "0" + "hwmon0": { + "type": "socket", + "type-id": "0" }, - "hwmon1" : { - "type" : "socket", - "type-id" : "1" + "hwmon1": { + "type": "socket", + "type-id": "1" } } } } -} +} \ No newline at end of file diff --git a/.github/workflows/runonce.yml b/.github/workflows/runonce.yml index 8efc70a..194710f 100644 --- a/.github/workflows/runonce.yml +++ b/.github/workflows/runonce.yml @@ -7,8 +7,11 @@ jobs: steps: - uses: actions/checkout@v2 + # See: https://github.com/marketplace/actions/setup-go-environment - name: Setup Golang - uses: actions/setup-go@v2.1.4 + uses: actions/setup-go@v2.1.5 + with: + go-version: '^1.17.6' - name: Build MetricCollector run: make From 2026c3acd9f050e2958ca719aa9127490b7228a7 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Mon, 24 Jan 2022 20:22:08 +0100 Subject: [PATCH 18/45] Fixed topology detection --- collectors/cpufreqMetric.go | 175 +++++++++++++++++++++--------------- 1 file changed, 101 insertions(+), 74 deletions(-) diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index f5a10bc..ec42445 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -32,10 +32,19 @@ func readOneLine(filename string) (text string, ok bool) { return } -type CPUFreqCollectorCPU struct { - // coreID, packageID, num_cores, num_package - tagSet map[string]string +type CPUFreqCollectorTopology struct { + processor string // logical processor number (continuous, starting at 0) + coreID string // socket local core ID + coreID_int int + physicalID string // socket / package ID + physicalID_int int + numPhysicalID string // number of sockets / packages + numPhysicalID_int int + isHT bool + numNonHT string // number of non hyperthreading processors + numNonHT_int int scalingCurFreqFile string + tagSet map[string]string } // @@ -48,10 +57,10 @@ type CPUFreqCollectorCPU struct { // type CPUFreqCollector struct { MetricCollector - config struct { + topology []CPUFreqCollectorTopology + config struct { ExcludeMetrics []string `json:"exclude_metrics,omitempty"` } - cpus []CPUFreqCollectorCPU } func (m *CPUFreqCollector) Init(config []byte) error { @@ -64,9 +73,6 @@ func (m *CPUFreqCollector) Init(config []byte) error { } } - // Initialize CPU list - m.cpus = make([]CPUFreqCollectorCPU, 0) - // Loop for all CPU directories baseDir := "/sys/devices/system/cpu" globPattern := filepath.Join(baseDir, "cpu[0-9]*") @@ -78,82 +84,98 @@ func (m *CPUFreqCollector) Init(config []byte) error { return fmt.Errorf("CPUFreqCollector.Init() unable to find any files with pattern %s", globPattern) } - maxPackageID := 0 - maxCoreID := 0 + // Initialize CPU topology + m.topology = make([]CPUFreqCollectorTopology, len(cpuDirs)) for _, cpuDir := range cpuDirs { - cpuID := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu") + processor := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu") + processor_int, err := strconv.Atoi(processor) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to convert cpuID to int: %v", err) + } - // Read thread sibling list - threadSiblingListFile := filepath.Join(cpuDir, "topology", "thread_siblings_list") - threadSiblingList, ok := readOneLine(threadSiblingListFile) + // Read package ID + packageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") + packageID, ok := readOneLine(packageIDFile) if !ok { - return fmt.Errorf("CPUFreqCollector.Init() unable to read thread siblings list from %s", threadSiblingListFile) + return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", packageIDFile) + } + packageID_int, err := strconv.Atoi(packageID) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err) } - // Read frequency only from first hardware thread - // Ignore Simultaneous Multithreading (SMT) / Hyper-Threading - if strings.Split(threadSiblingList, ",")[0] == cpuID { - // Read package ID - packageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") - packageID, ok := readOneLine(packageIDFile) - if !ok { - return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", packageIDFile) - } - packageID_int, err := strconv.Atoi(packageID) - if err != nil { - return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err) - } + // Read core ID + coreIDFile := filepath.Join(cpuDir, "topology", "core_id") + coreID, ok := readOneLine(coreIDFile) + if !ok { + return fmt.Errorf("CPUFreqCollector.Init() unable to read core ID from %s", coreIDFile) + } + coreID_int, err := strconv.Atoi(coreID) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to convert coreID to int: %v", err) + } - // Update maxPackageID - if packageID_int > maxPackageID { - maxPackageID = packageID_int - } + // Check access to current frequency file + scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq") + err = unix.Access(scalingCurFreqFile, unix.R_OK) + if err != nil { + return fmt.Errorf("CPUFreqCollector.Init() unable to access %s: %v", scalingCurFreqFile, err) + } - // Read core ID - coreIDFile := filepath.Join(cpuDir, "topology", "core_id") - coreID, ok := readOneLine(coreIDFile) - if !ok { - return fmt.Errorf("CPUFreqCollector.Init() unable to read core ID from %s", coreIDFile) - } - coreID_int, err := strconv.Atoi(coreID) - if err != nil { - return fmt.Errorf("CPUFreqCollector.Init() unable to convert coreID to int: %v", err) - } + t := &m.topology[processor_int] + t.processor = processor + t.physicalID = packageID + t.physicalID_int = packageID_int + t.coreID = coreID + t.coreID_int = coreID_int + t.scalingCurFreqFile = scalingCurFreqFile + } - // Update maxCoreID - if coreID_int > maxCoreID { - maxCoreID = coreID_int - } + // is processor a hyperthread? + coreSeenBefore := make(map[string]bool) + for i := range m.topology { + t := &m.topology[i] - // Check access to current frequency file - scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq") - err = unix.Access(scalingCurFreqFile, unix.R_OK) - if err != nil { - return fmt.Errorf("CPUFreqCollector.Init() unable to access %s: %v", scalingCurFreqFile, err) - } + globalID := t.physicalID + ":" + t.coreID + t.isHT = coreSeenBefore[globalID] + coreSeenBefore[globalID] = true + } - m.cpus = append( - m.cpus, - CPUFreqCollectorCPU{ - tagSet: map[string]string{ - "type": "cpu", - "type-id": strings.TrimSpace(coreID), - "packageID": strings.TrimSpace(packageID), - }, - scalingCurFreqFile: scalingCurFreqFile, - }) + // number of non hyper thread cores and packages / sockets + numNonHT_int := 0 + maxPhysicalID := 0 + for i := range m.topology { + t := &m.topology[i] + + // Update maxPackageID + if t.physicalID_int > maxPhysicalID { + maxPhysicalID = t.physicalID_int + } + + if !t.isHT { + numNonHT_int++ } } - // Add num packages and num cores as tags - numPackages := strconv.Itoa(maxPackageID + 1) - numCores := strconv.Itoa(maxCoreID + 1) - for i := range m.cpus { - c := &m.cpus[i] - c.tagSet["num_core"] = numCores - c.tagSet["num_package"] = numPackages + numPhysicalID_int := maxPhysicalID + 1 + numPhysicalID := fmt.Sprint(numPhysicalID_int) + numNonHT := fmt.Sprint(numNonHT_int) + for i := range m.topology { + t := &m.topology[i] + t.numPhysicalID = numPhysicalID + t.numPhysicalID_int = numPhysicalID_int + t.numNonHT = numNonHT + t.numNonHT_int = numNonHT_int + t.tagSet = map[string]string{ + "type": "cpu", + "type-id": t.processor, + "num_core": t.numNonHT, + "package_id": t.physicalID, + "num_package": t.numPhysicalID, + } } + fmt.Printf("%+v\n", m.topology) m.init = true return nil } @@ -164,13 +186,18 @@ func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) } now := time.Now() - for i := range m.cpus { - cpu := &m.cpus[i] + for i := range m.topology { + t := &m.topology[i] + + // skip hyperthreads + if t.isHT { + continue + } // Read current frequency - line, ok := readOneLine(cpu.scalingCurFreqFile) + line, ok := readOneLine(t.scalingCurFreqFile) if !ok { - log.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", cpu.scalingCurFreqFile) + log.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", t.scalingCurFreqFile) continue } cpuFreq, err := strconv.Atoi(line) @@ -179,7 +206,7 @@ func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) continue } - y, err := lp.New("cpufreq", cpu.tagSet, map[string]interface{}{"value": cpuFreq}, now) + y, err := lp.New("cpufreq", t.tagSet, map[string]interface{}{"value": cpuFreq}, now) if err == nil { *out = append(*out, y) } From be8c92676a4d2532eb848a019a821b106e6e4951 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Mon, 24 Jan 2022 22:03:13 +0100 Subject: [PATCH 19/45] Refactoring --- collectors/cpufreqMetric.go | 51 ++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index ec42445..35e64ac 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -33,18 +33,18 @@ func readOneLine(filename string) (text string, ok bool) { } type CPUFreqCollectorTopology struct { - processor string // logical processor number (continuous, starting at 0) - coreID string // socket local core ID - coreID_int int - physicalID string // socket / package ID - physicalID_int int - numPhysicalID string // number of sockets / packages - numPhysicalID_int int - isHT bool - numNonHT string // number of non hyperthreading processors - numNonHT_int int - scalingCurFreqFile string - tagSet map[string]string + processor string // logical processor number (continuous, starting at 0) + coreID string // socket local core ID + coreID_int int + physicalPackageID string // socket / package ID + physicalPackageID_int int + numPhysicalPackages string // number of sockets / packages + numPhysicalPackages_int int + isHT bool + numNonHT string // number of non hyperthreading processors + numNonHT_int int + scalingCurFreqFile string + tagSet map[string]string } // @@ -94,12 +94,12 @@ func (m *CPUFreqCollector) Init(config []byte) error { } // Read package ID - packageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") - packageID, ok := readOneLine(packageIDFile) + physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") + physicalPackageID, ok := readOneLine(physicalPackageIDFile) if !ok { - return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", packageIDFile) + return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", physicalPackageIDFile) } - packageID_int, err := strconv.Atoi(packageID) + physicalPackageID_int, err := strconv.Atoi(physicalPackageID) if err != nil { return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err) } @@ -124,8 +124,8 @@ func (m *CPUFreqCollector) Init(config []byte) error { t := &m.topology[processor_int] t.processor = processor - t.physicalID = packageID - t.physicalID_int = packageID_int + t.physicalPackageID = physicalPackageID + t.physicalPackageID_int = physicalPackageID_int t.coreID = coreID t.coreID_int = coreID_int t.scalingCurFreqFile = scalingCurFreqFile @@ -136,7 +136,7 @@ func (m *CPUFreqCollector) Init(config []byte) error { for i := range m.topology { t := &m.topology[i] - globalID := t.physicalID + ":" + t.coreID + globalID := t.physicalPackageID + ":" + t.coreID t.isHT = coreSeenBefore[globalID] coreSeenBefore[globalID] = true } @@ -148,8 +148,8 @@ func (m *CPUFreqCollector) Init(config []byte) error { t := &m.topology[i] // Update maxPackageID - if t.physicalID_int > maxPhysicalID { - maxPhysicalID = t.physicalID_int + if t.physicalPackageID_int > maxPhysicalID { + maxPhysicalID = t.physicalPackageID_int } if !t.isHT { @@ -162,20 +162,19 @@ func (m *CPUFreqCollector) Init(config []byte) error { numNonHT := fmt.Sprint(numNonHT_int) for i := range m.topology { t := &m.topology[i] - t.numPhysicalID = numPhysicalID - t.numPhysicalID_int = numPhysicalID_int + t.numPhysicalPackages = numPhysicalID + t.numPhysicalPackages_int = numPhysicalID_int t.numNonHT = numNonHT t.numNonHT_int = numNonHT_int t.tagSet = map[string]string{ "type": "cpu", "type-id": t.processor, "num_core": t.numNonHT, - "package_id": t.physicalID, - "num_package": t.numPhysicalID, + "package_id": t.physicalPackageID, + "num_package": t.numPhysicalPackages, } } - fmt.Printf("%+v\n", m.topology) m.init = true return nil } From e095e4f202e2335aba925de1d45dc69a7e2a017e Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Tue, 25 Jan 2022 09:47:24 +0100 Subject: [PATCH 20/45] Refactoring --- collectors/cpufreqCpuinfoMetric.go | 82 +++++++++++++++++------------- collectors/cpufreqMetric.go | 14 ++--- 2 files changed, 55 insertions(+), 41 deletions(-) diff --git a/collectors/cpufreqCpuinfoMetric.go b/collectors/cpufreqCpuinfoMetric.go index 1658878..e8cd0fc 100644 --- a/collectors/cpufreqCpuinfoMetric.go +++ b/collectors/cpufreqCpuinfoMetric.go @@ -19,13 +19,17 @@ import ( // Only measure on the first hyperthread // type CPUFreqCpuInfoCollectorTopology struct { - processor string // logical processor number (continuous, starting at 0) - coreID string // socket local core ID - physicalID string // socket / package ID - numPhysicalID string // number of sockets / packages - isHT bool - numNonHT string // number of non hyperthreading processors - tagSet map[string]string + processor string // logical processor number (continuous, starting at 0) + coreID string // socket local core ID + coreID_int int + physicalPackageID string // socket / package ID + physicalPackageID_int int + numPhysicalPackages string // number of sockets / packages + numPhysicalPackages_int int + isHT bool + numNonHT string // number of non hyperthreading processors + numNonHT_int int + tagSet map[string]string } type CPUFreqCpuInfoCollector struct { @@ -46,10 +50,10 @@ func (m *CPUFreqCpuInfoCollector) Init(config []byte) error { // Collect topology information from file cpuinfo foundFreq := false processor := "" - numNonHT := 0 + numNonHT_int := 0 coreID := "" - physicalID := "" - maxPhysicalID := 0 + physicalPackageID := "" + maxPhysicalPackageID := 0 m.topology = make([]CPUFreqCpuInfoCollectorTopology, 0) coreSeenBefore := make(map[string]bool) scanner := bufio.NewScanner(file) @@ -67,7 +71,7 @@ func (m *CPUFreqCpuInfoCollector) Init(config []byte) error { case "core id": coreID = value case "physical id": - physicalID = value + physicalPackageID = value } } @@ -75,55 +79,65 @@ func (m *CPUFreqCpuInfoCollector) Init(config []byte) error { if foundFreq && len(processor) > 0 && len(coreID) > 0 && - len(physicalID) > 0 { + len(physicalPackageID) > 0 { - globalID := physicalID + ":" + coreID + coreID_int, err := strconv.Atoi(coreID) + if err != nil { + return fmt.Errorf("Unable to convert coreID to int: %v", err) + } + physicalPackageID_int, err := strconv.Atoi(physicalPackageID) + if err != nil { + return fmt.Errorf("Unable to convert physicalPackageID to int: %v", err) + } + + // increase maximun socket / package ID, when required + if physicalPackageID_int > maxPhysicalPackageID { + maxPhysicalPackageID = physicalPackageID_int + } + + globalID := physicalPackageID + ":" + coreID isHT := coreSeenBefore[globalID] coreSeenBefore[globalID] = true if !isHT { // increase number on non hyper thread cores - numNonHT++ - - // increase maximun socket / package ID, when required - physicalIDInt, err := strconv.Atoi(physicalID) - if err != nil { - return fmt.Errorf("Failed to convert physical id to int: %v", err) - } - if physicalIDInt > maxPhysicalID { - maxPhysicalID = physicalIDInt - } + numNonHT_int++ } // store collected topology information m.topology = append( m.topology, CPUFreqCpuInfoCollectorTopology{ - processor: processor, - coreID: coreID, - physicalID: physicalID, - isHT: isHT, + processor: processor, + coreID: coreID, + coreID_int: coreID_int, + physicalPackageID: physicalPackageID, + physicalPackageID_int: physicalPackageID_int, + isHT: isHT, }) // reset topology information foundFreq = false processor = "" coreID = "" - physicalID = "" + physicalPackageID = "" } } - numPhysicalID := fmt.Sprint(maxPhysicalID + 1) - numNonHTString := fmt.Sprint(numNonHT) + numPhysicalPackageID_int := maxPhysicalPackageID + 1 + numPhysicalPackageID := fmt.Sprint(numPhysicalPackageID_int) + numNonHT := fmt.Sprint(numNonHT_int) for i := range m.topology { t := &m.topology[i] - t.numPhysicalID = numPhysicalID - t.numNonHT = numNonHTString + t.numPhysicalPackages = numPhysicalPackageID + t.numPhysicalPackages_int = numPhysicalPackageID_int + t.numNonHT = numNonHT + t.numNonHT_int = numNonHT_int t.tagSet = map[string]string{ "type": "cpu", "type-id": t.processor, "num_core": t.numNonHT, - "package_id": t.physicalID, - "num_package": t.numPhysicalID, + "package_id": t.physicalPackageID, + "num_package": t.numPhysicalPackages, } } diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index 35e64ac..fcab782 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -143,13 +143,13 @@ func (m *CPUFreqCollector) Init(config []byte) error { // number of non hyper thread cores and packages / sockets numNonHT_int := 0 - maxPhysicalID := 0 + maxPhysicalPackageID := 0 for i := range m.topology { t := &m.topology[i] // Update maxPackageID - if t.physicalPackageID_int > maxPhysicalID { - maxPhysicalID = t.physicalPackageID_int + if t.physicalPackageID_int > maxPhysicalPackageID { + maxPhysicalPackageID = t.physicalPackageID_int } if !t.isHT { @@ -157,13 +157,13 @@ func (m *CPUFreqCollector) Init(config []byte) error { } } - numPhysicalID_int := maxPhysicalID + 1 - numPhysicalID := fmt.Sprint(numPhysicalID_int) + numPhysicalPackageID_int := maxPhysicalPackageID + 1 + numPhysicalPackageID := fmt.Sprint(numPhysicalPackageID_int) numNonHT := fmt.Sprint(numNonHT_int) for i := range m.topology { t := &m.topology[i] - t.numPhysicalPackages = numPhysicalID - t.numPhysicalPackages_int = numPhysicalID_int + t.numPhysicalPackages = numPhysicalPackageID + t.numPhysicalPackages_int = numPhysicalPackageID_int t.numNonHT = numNonHT t.numNonHT_int = numNonHT_int t.tagSet = map[string]string{ From df77c3fd60688266466f7bb07aae84e2373ebb86 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Tue, 25 Jan 2022 10:32:08 +0100 Subject: [PATCH 21/45] Avoid vet warning: Println arg list ends with redundant newline --- collectors/infinibandMetric.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index 6e14251..db7c129 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -17,9 +17,10 @@ import ( "time" ) -const IBBASEPATH = `/sys/class/infiniband/` -const LIDFILE = `/sys/class/infiniband/mlx4_0/ports/1/lid` -const PERFQUERY = `/usr/sbin/perfquery` +const ( + IBBASEPATH = `/sys/class/infiniband/` + PERFQUERY = `/usr/sbin/perfquery` +) type InfinibandCollectorConfig struct { ExcludeDevices []string `json:"exclude_devices,omitempty"` @@ -40,12 +41,14 @@ func (m *InfinibandCollector) Help() { fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.") fmt.Println("For each found LIDs the collector calls the 'perfquery' command") fmt.Println("The path to the 'perfquery' command can be configured with the 'perfquery_path' option") - fmt.Println("in the configuration\n") + fmt.Println("in the configuration") + fmt.Println("") fmt.Println("Full configuration object:") fmt.Println("\"ibstat\" : {") fmt.Println(" \"perfquery_path\" : \"path/to/perfquery\" # if omitted, it searches in $PATH") fmt.Println(" \"exclude_devices\" : [\"dev1\"]") - fmt.Println("}\n") + fmt.Println("}") + fmt.Println("") fmt.Println("Metrics:") fmt.Println("- ib_recv") fmt.Println("- ib_xmit") From 222862af322710872a5b88d21d5723a84cce79d8 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Tue, 25 Jan 2022 11:12:06 +0100 Subject: [PATCH 22/45] Avoid vet warning struct field commands has json tag but is not exported --- collectors/customCmdMetric.go | 8 ++++---- collectors/lustreMetric.go | 4 ++-- collectors/topprocsMetric.go | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/collectors/customCmdMetric.go b/collectors/customCmdMetric.go index bbafc2d..e11f4c7 100644 --- a/collectors/customCmdMetric.go +++ b/collectors/customCmdMetric.go @@ -15,8 +15,8 @@ import ( const CUSTOMCMDPATH = `/home/unrz139/Work/cc-metric-collector/collectors/custom` type CustomCmdCollectorConfig struct { - commands []string `json:"commands"` - files []string `json:"files"` + Commands []string `json:"commands"` + Files []string `json:"files"` ExcludeMetrics []string `json:"exclude_metrics"` } @@ -40,7 +40,7 @@ func (m *CustomCmdCollector) Init(config []byte) error { } } m.setup() - for _, c := range m.config.commands { + for _, c := range m.config.Commands { cmdfields := strings.Fields(c) command := exec.Command(cmdfields[0], strings.Join(cmdfields[1:], " ")) command.Wait() @@ -49,7 +49,7 @@ func (m *CustomCmdCollector) Init(config []byte) error { m.commands = append(m.commands, c) } } - for _, f := range m.config.files { + for _, f := range m.config.Files { _, err = ioutil.ReadFile(f) if err == nil { m.files = append(m.files, f) diff --git a/collectors/lustreMetric.go b/collectors/lustreMetric.go index d77ac09..8931f84 100644 --- a/collectors/lustreMetric.go +++ b/collectors/lustreMetric.go @@ -15,7 +15,7 @@ import ( const LUSTREFILE = `/proc/fs/lustre/llite/lnec-XXXXXX/stats` type LustreCollectorConfig struct { - procfiles []string `json:"procfiles"` + Procfiles []string `json:"procfiles"` ExcludeMetrics []string `json:"exclude_metrics"` } @@ -47,7 +47,7 @@ func (m *LustreCollector) Init(config []byte) error { "statfs": {"statfs": 1}, "inode_permission": {"inode_permission": 1}} m.devices = make([]string, 0) - for _, p := range m.config.procfiles { + for _, p := range m.config.Procfiles { _, err := ioutil.ReadFile(p) if err == nil { m.devices = append(m.devices, p) diff --git a/collectors/topprocsMetric.go b/collectors/topprocsMetric.go index e1b31ee..715b8c3 100644 --- a/collectors/topprocsMetric.go +++ b/collectors/topprocsMetric.go @@ -16,7 +16,7 @@ const MAX_NUM_PROCS = 10 const DEFAULT_NUM_PROCS = 2 type TopProcsCollectorConfig struct { - num_procs int `json:"num_procs"` + Num_procs int `json:"num_procs"` } type TopProcsCollector struct { @@ -35,9 +35,9 @@ func (m *TopProcsCollector) Init(config []byte) error { return err } } else { - m.config.num_procs = int(DEFAULT_NUM_PROCS) + m.config.Num_procs = int(DEFAULT_NUM_PROCS) } - if m.config.num_procs <= 0 || m.config.num_procs > MAX_NUM_PROCS { + if m.config.Num_procs <= 0 || m.config.Num_procs > MAX_NUM_PROCS { return errors.New(fmt.Sprintf("num_procs option must be set in 'topprocs' config (range: 1-%d)", MAX_NUM_PROCS)) } m.setup() @@ -64,7 +64,7 @@ func (m *TopProcsCollector) Read(interval time.Duration, out *[]lp.MutableMetric } lines := strings.Split(string(stdout), "\n") - for i := 1; i < m.config.num_procs+1; i++ { + for i := 1; i < m.config.Num_procs+1; i++ { name := fmt.Sprintf("topproc%d", i) y, err := lp.New(name, m.tags, map[string]interface{}{"value": string(lines[i])}, time.Now()) if err == nil { From 200af84c546ae71358ef55b5dd8dfc1a13b006bc Mon Sep 17 00:00:00 2001 From: Thomas Gruber Date: Tue, 25 Jan 2022 15:37:43 +0100 Subject: [PATCH 23/45] Modularize the whole thing (#16) * Use channels, add a metric router, split up configuration and use extended version of Influx line protocol internally * Use central timer for collectors and router. Add expressions to router * Add expression to router config * Update entry points * Start with README * Update README for CCMetric * Formatting * Update README.md * Add README for MultiChanTicker * Add README for MultiChanTicker * Update README.md * Add README to metric router * Update main README * Remove SinkEntity type * Update README for sinks * Update go files * Update README for receivers * Update collectors README * Update collectors README * Use seperate page per collector * Fix for tempstat page * Add docs for customcmd collector * Add docs for ipmistat collector * Add docs for topprocs collector * Update customCmdMetric.md * Use seconds when calculating LIKWID metrics * Add IB metrics ib_recv_pkts and ib_xmit_pkts * Drop domain part of host name * Updated to latest stable version of likwid * Define source code dependencies in Makefile * Add GPFS / IBM Spectrum Scale collector * Add vet and staticcheck make targets * Add vet and staticcheck make targets * Avoid go vet warning: struct field tag `json:"..., omitempty"` not compatible with reflect.StructTag.Get: suspicious space in struct tag value struct field tag `json:"...", omitempty` not compatible with reflect.StructTag.Get: key:"value" pairs not separated by spaces * Add sample collector to README.md * Add CPU frequency collector * Avoid staticcheck warning: redundant return statement * Avoid staticcheck warning: unnecessary assignment to the blank identifier * Simplified code * Add CPUFreqCollectorCpuinfo a metric collector to measure the current frequency of the CPUs as obtained from /proc/cpuinfo Only measure on the first hyperthread * Add collector for NFS clients * Move publication of metrics into Flush() for NatsSink * Update GitHub actions * Refactoring * Avoid vet warning: Println arg list ends with redundant newline * Avoid vet warning struct field commands has json tag but is not exported * Avoid vet warning: return copies lock value. * Corrected typo * Refactoring * Add go sources in internal/... * Bad separator in Makefile * Fix Infiniband collector Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> --- Makefile | 6 +- README.md | 87 +--- collectors.json | 15 + collectors/README.md | 321 ++------------- collectors/collectorManager.go | 143 +++++++ collectors/cpufreqCpuinfoMetric.go | 19 +- collectors/cpufreqMetric.go | 17 +- collectors/cpustatMetric.go | 20 +- collectors/cpustatMetric.md | 23 ++ collectors/customCmdMetric.go | 26 +- collectors/customCmdMetric.md | 20 + collectors/diskstatMetric.go | 15 +- collectors/diskstatMetric.md | 34 ++ collectors/gpfsMetric.go | 121 ++---- collectors/infinibandMetric.go | 113 +++--- collectors/infinibandMetric.md | 19 + collectors/ipmiMetric.go | 38 +- collectors/ipmiMetric.md | 16 + collectors/likwidMetric.go | 62 ++- collectors/likwidMetric.md | 119 ++++++ collectors/loadavgMetric.go | 18 +- collectors/loadavgMetric.md | 19 + collectors/lustreMetric.go | 17 +- collectors/lustreMetric.md | 29 ++ collectors/memstatMetric.go | 22 +- collectors/memstatMetric.md | 27 ++ collectors/metricCollector.go | 28 +- collectors/netstatMetric.go | 20 +- collectors/netstatMetric.md | 21 + collectors/nfsMetric.go | 147 +++++++ collectors/nvidiaMetric.go | 103 ++--- collectors/nvidiaMetric.md | 40 ++ collectors/tempMetric.go | 22 +- collectors/tempMetric.md | 22 ++ collectors/topprocsMetric.go | 14 +- collectors/topprocsMetric.md | 15 + config.json | 40 +- go.mod | 3 +- go.sum | 2 + internal/ccMetric/README.md | 32 ++ internal/ccMetric/ccMetric.go | 374 ++++++++++++++++++ internal/metricRouter/README.md | 50 +++ internal/metricRouter/metricRouter.go | 208 ++++++++++ internal/multiChanTicker/README.md | 37 ++ internal/multiChanTicker/multiChanTicker.go | 39 ++ metric-collector.go | 416 +++++++++----------- receivers.json | 8 + receivers/README.md | 39 +- receivers/metricReceiver.go | 29 +- receivers/natsReceiver.go | 62 +-- receivers/receiveManager.go | 153 +++++++ router.json | 22 ++ sinks.json | 6 + sinks/README.md | 126 +++--- sinks/httpSink.go | 16 +- sinks/influxSink.go | 16 +- sinks/metricSink.go | 34 +- sinks/natsSink.go | 35 +- sinks/sinkManager.go | 141 +++++++ sinks/stdoutSink.go | 15 +- 60 files changed, 2596 insertions(+), 1105 deletions(-) create mode 100644 collectors.json create mode 100644 collectors/collectorManager.go create mode 100644 collectors/cpustatMetric.md create mode 100644 collectors/customCmdMetric.md create mode 100644 collectors/diskstatMetric.md create mode 100644 collectors/infinibandMetric.md create mode 100644 collectors/ipmiMetric.md create mode 100644 collectors/likwidMetric.md create mode 100644 collectors/loadavgMetric.md create mode 100644 collectors/lustreMetric.md create mode 100644 collectors/memstatMetric.md create mode 100644 collectors/netstatMetric.md create mode 100644 collectors/nfsMetric.go create mode 100644 collectors/nvidiaMetric.md create mode 100644 collectors/tempMetric.md create mode 100644 collectors/topprocsMetric.md create mode 100644 internal/ccMetric/README.md create mode 100644 internal/ccMetric/ccMetric.go create mode 100644 internal/metricRouter/README.md create mode 100644 internal/metricRouter/metricRouter.go create mode 100644 internal/multiChanTicker/README.md create mode 100644 internal/multiChanTicker/multiChanTicker.go create mode 100644 receivers.json create mode 100644 receivers/receiveManager.go create mode 100644 router.json create mode 100644 sinks.json create mode 100644 sinks/sinkManager.go diff --git a/Makefile b/Makefile index 892bbcc..c9805eb 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,9 @@ GOSRC_APP := metric-collector.go GOSRC_COLLECTORS := $(wildcard collectors/*.go) GOSRC_SINKS := $(wildcard sinks/*.go) GOSRC_RECEIVERS := $(wildcard receivers/*.go) -GOSRC := $(GOSRC_APP) $(GOSRC_COLLECTORS) $(GOSRC_SINKS) $(GOSRC_RECEIVERS) +GOSRC_INTERNAL := $(wildcard internal/*/*.go) +GOSRC := $(GOSRC_APP) $(GOSRC_COLLECTORS) $(GOSRC_SINKS) $(GOSRC_RECEIVERS) $(GOSRC_INTERNAL) + .PHONY: all all: $(APP) @@ -24,6 +26,8 @@ fmt: go fmt $(GOSRC_SINKS) go fmt $(GOSRC_RECEIVERS) go fmt $(GOSRC_APP) + @for F in $(GOSRC_INTERNAL); do go fmt $$F; done + # Examine Go source code and reports suspicious constructs .PHONY: vet diff --git a/README.md b/README.md index fcabb82..158cc0c 100644 --- a/README.md +++ b/README.md @@ -12,79 +12,33 @@ The receiver runs as a go routine side-by-side with the timer loop and asynchron Configuration is implemented using a single json document that is distributed over network and may be persisted as file. Supported metrics are documented [here](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md). +There is a main configuration file with basic settings that point to the other configuration files for the different components. + ``` json { - "interval": 3, - "duration": 1, - "collectors": [ - "memstat", - "likwid", - "loadavg", - "netstat", - "ibstat", - "lustrestat", - "topprocs", - "cpustat", - "nvidia" - ], - "sink": { - "user": "admin", - "password": "12345", - "host": "localhost", - "port": "8080", - "database": "testdb", - "organisation": "testorg", - "type": "stdout" - }, - "default_tags": { - "cluster": "testcluster" - }, - "receiver": { - "type": "none", - "address": "127.0.0.1", - "port": "4222", - "database": "testdb" - }, - "collect_config": { - "tempstat": { - "tag_override": { - "hwmon0": { - "type": "socket", - "type-id": "0" - }, - "hwmon1": { - "type": "socket", - "type-id": "1" - } - } - }, - "diskstat": { - "exclude_metrics": [ - "read_ms" - ] - } - } + "sinks": "sinks.json", + "collectors" : "collectors.json", + "receivers" : "receivers.json", + "router" : "router.json", + "interval": 10, + "duration": 1 } ``` -The `interval` defines how often the metrics should be read and send to the sink. The `duration` tells collectors how long one measurement has to take. An example for this is the `likwid` collector which starts the hardware performance counter, waits for `duration` seconds and stops the counters again. If you configure a collector to do two measurments, the `duration` must be at least half the `interval`. +The `interval` defines how often the metrics should be read and send to the sink. The `duration` tells collectors how long one measurement has to take. This is important for some collectors, like the `likwid` collector. -The `collectors` contains all collectors executed collectors. Each collector can be configured in the `collect_config` section. A more detailed list of all collectors and their configuration options can be found in the [README for collectors](./collectors/README.md). +See the component READMEs for their configuration: +* [`collectors`](./collectors/README.md) +* [`sinks`](./sinks/README.md) +* [`receivers`](./receivers/README.md) +* [`router`](./internal/metricRouter/README.md) -The `sink` section contains the configuration where the data should be transmitted to. There are currently four sinks supported `influxdb`, `nats`, `http` and `stdout`. See [README for sinks](./sinks/README.md) for more information about the individual sinks and which configuration field they are using. - -In the `default_tags` section, one can define key-value-pairs (only strings) that are added to each sent out metric. This can be useful for cluster names like in the example JSON or information like rank or island for orientation. - -With `receiver`, the collector can be used as a router by receiving metrics and forwarding them to the configured sink. There are currently only types `none` (for no receiver) and `nats`. For more information see the [README in receivers](./receivers/README.md). # Installation ``` $ git clone git@github.com:ClusterCockpit/cc-metric-collector.git -$ cd cc-metric-collector/collectors -$ edit Makefile (for LIKWID collector) -$ make (downloads LIKWID, builds it as static library and copies all required files for the collector. Uses sudo in case of own accessdaemon) -$ cd .. +$ make (downloads LIKWID, builds it as static library with 'direct' accessmode and copies all required files for the collector) $ go get (requires at least golang 1.13) $ go build metric-collector ``` @@ -104,13 +58,6 @@ Usage of metric-collector: Path for PID file (default "/var/run/cc-metric-collector.pid") ``` -# Todos - -- [ ] Use only non-blocking APIs for the sinks -- [x] Collector specific configuration in global JSON file? Changing the configuration inside the Go code is not user-friendly. -- [ ] Mark collectors as 'can-run-in-parallel' and use goroutines for them. There are only a few collectors that should run serially (e.g. LIKWID) -- [ ] Configuration option for receivers to add other tags. Additonal flag to tell whether default tags should be added as well. -- [ ] CLI option to get help output for collectors, sinks and receivers about their configuration options and metrics # Contributing The ClusterCockpit ecosystem is designed to be used by different HPC computing centers. Since configurations and setups differ between the centers, the centers likely have to put some work into the cc-metric-collector to gather all desired metrics. @@ -119,5 +66,5 @@ You are free to open an issue to request a collector but we would also be happy # Contact -[Matrix.org ClusterCockpit General chat](https://matrix.to/#/#clustercockpit-dev:matrix.org) -[Matrix.org ClusterCockpit Development chat](https://matrix.to/#/#clustercockpit:matrix.org) +* [Matrix.org ClusterCockpit General chat](https://matrix.to/#/#clustercockpit-dev:matrix.org) +* [Matrix.org ClusterCockpit Development chat](https://matrix.to/#/#clustercockpit:matrix.org) diff --git a/collectors.json b/collectors.json new file mode 100644 index 0000000..df2fce3 --- /dev/null +++ b/collectors.json @@ -0,0 +1,15 @@ +{ + "tempstat": { + "tag_override": { + "hwmon0" : { + "type" : "socket", + "type-id" : "0" + }, + "hwmon1" : { + "type" : "socket", + "type-id" : "1" + } + } + } + +} diff --git a/collectors/README.md b/collectors/README.md index df02dd6..1c3784e 100644 --- a/collectors/README.md +++ b/collectors/README.md @@ -1,288 +1,34 @@ +# CCMetric collectors + This folder contains the collectors for the cc-metric-collector. -# `metricCollector.go` -The base class/configuration is located in `metricCollector.go`. - -# Collectors - -* `memstatMetric.go`: Reads `/proc/meminfo` to calculate **node** metrics. It also combines values to the metric `mem_used` -* `loadavgMetric.go`: Reads `/proc/loadavg` and submits **node** metrics: -* `netstatMetric.go`: Reads `/proc/net/dev` and submits for all network devices as the **node** metrics. -* `lustreMetric.go`: Reads Lustre's stats files and submits **node** metrics: -* `infinibandMetric.go`: Reads InfiniBand metrics. It uses the `perfquery` command to read the **node** metrics but can fallback to sysfs counters in case `perfquery` does not work. -* `likwidMetric.go`: Reads hardware performance events using LIKWID. It submits **socket** and **cpu** metrics -* `cpustatMetric.go`: Read CPU specific values from `/proc/stat` -* `topprocsMetric.go`: Reads the TopX processes by their CPU usage. X is configurable -* `nvidiaMetric.go`: Read data about Nvidia GPUs using the NVML library -* `tempMetric.go`: Read temperature data from `/sys/class/hwmon/hwmon*` -* `ipmiMetric.go`: Collect data from `ipmitool` or as fallback `ipmi-sensors` -* `customCmdMetric.go`: Run commands or read files and submit the output (output has to be in InfluxDB line protocol!) - -If any of the collectors cannot be initialized, it is excluded from all further reads. Like if the Lustre stat file is not a valid path, no Lustre specific metrics will be recorded. - -# Collector configuration +# Configuration ```json - "collectors": [ - "tempstat" - ], - "collect_config": { - "tempstat": { - "tag_override": { - "hwmon0" : { - "type" : "socket", - "type-id" : "0" - }, - "hwmon1" : { - "type" : "socket", - "type-id" : "1" - } - } +{ + "collector_type" : { + } - } +} ``` -The configuration of the collectors in the main config files consists of two parts: active collectors (`collectors`) and collector configuration (`collect_config`). At startup, all collectors in the `collectors` list is initialized and, if successfully initialized, added to the active collectors for metric retrieval. At initialization the collector-specific configuration from the `collect_config` section is handed over. Each collector has own configuration options, check at the collector-specific section. +In contrast to the configuration files for sinks and receivers, the collectors configuration is not a list but a set of dicts. This is required because we didn't manage to partially read the type before loading the remaining configuration. We are eager to change this to the same format. -## `memstat` +# Available collectors -```json - "memstat": { - "exclude_metrics": [ - "mem_used" - ] - } -``` - -The `memstat` collector reads data from `/proc/meminfo` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. - - -Metrics: -* `mem_total` -* `mem_sreclaimable` -* `mem_slab` -* `mem_free` -* `mem_buffers` -* `mem_cached` -* `mem_available` -* `mem_shared` -* `swap_total` -* `swap_free` -* `mem_used` = `mem_total` - (`mem_free` + `mem_buffers` + `mem_cached`) - -## `loadavg` -```json - "loadavg": { - "exclude_metrics": [ - "proc_run" - ] - } -``` - -The `loadavg` collector reads data from `/proc/loadavg` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. - -Metrics: -* `load_one` -* `load_five` -* `load_fifteen` -* `proc_run` -* `proc_total` - -## `netstat` -```json - "netstat": { - "exclude_devices": [ - "lo" - ] - } -``` - -The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. If a device is not required, it can be excluded from forwarding it to the sink. Commonly the `lo` device should be excluded. - -Metrics: -* `bytes_in` -* `bytes_out` -* `pkts_in` -* `pkts_out` - -The device name is added as tag `device`. - - -## `diskstat` - -```json - "diskstat": { - "exclude_metrics": [ - "read_ms" - ], - } -``` - -The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. - -Metrics: -* `reads` -* `reads_merged` -* `read_sectors` -* `read_ms` -* `writes` -* `writes_merged` -* `writes_sectors` -* `writes_ms` -* `ioops` -* `ioops_ms` -* `ioops_weighted_ms` -* `discards` -* `discards_merged` -* `discards_sectors` -* `discards_ms` -* `flushes` -* `flushes_ms` - - -The device name is added as tag `device`. - -## `cpustat` -```json - "netstat": { - "exclude_metrics": [ - "cpu_idle" - ] - } -``` - -The `cpustat` collector reads data from `/proc/stats` and outputs a handful **node** and **hwthread** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. - -Metrics: -* `cpu_user` -* `cpu_nice` -* `cpu_system` -* `cpu_idle` -* `cpu_iowait` -* `cpu_irq` -* `cpu_softirq` -* `cpu_steal` -* `cpu_guest` -* `cpu_guest_nice` - -## `likwid` -```json - "likwid": { - "eventsets": [ - { - "events": { - "FIXC1": "ACTUAL_CPU_CLOCK", - "FIXC2": "MAX_CPU_CLOCK", - "PMC0": "RETIRED_INSTRUCTIONS", - "PMC1": "CPU_CLOCKS_UNHALTED", - "PMC2": "RETIRED_SSE_AVX_FLOPS_ALL", - "PMC3": "MERGE", - "DFC0": "DRAM_CHANNEL_0", - "DFC1": "DRAM_CHANNEL_1", - "DFC2": "DRAM_CHANNEL_2", - "DFC3": "DRAM_CHANNEL_3" - }, - "metrics": [ - { - "name": "ipc", - "calc": "PMC0/PMC1", - "socket_scope": false, - "publish": true - }, - { - "name": "flops_any", - "calc": "0.000001*PMC2/time", - "socket_scope": false, - "publish": true - }, - { - "name": "clock_mhz", - "calc": "0.000001*(FIXC1/FIXC2)/inverseClock", - "socket_scope": false, - "publish": true - }, - { - "name": "mem1", - "calc": "0.000001*(DFC0+DFC1+DFC2+DFC3)*64.0/time", - "socket_scope": true, - "publish": false - } - ] - }, - { - "events": { - "DFC0": "DRAM_CHANNEL_4", - "DFC1": "DRAM_CHANNEL_5", - "DFC2": "DRAM_CHANNEL_6", - "DFC3": "DRAM_CHANNEL_7", - "PWR0": "RAPL_CORE_ENERGY", - "PWR1": "RAPL_PKG_ENERGY" - }, - "metrics": [ - { - "name": "pwr_core", - "calc": "PWR0/time", - "socket_scope": false, - "publish": true - }, - { - "name": "pwr_pkg", - "calc": "PWR1/time", - "socket_scope": true, - "publish": true - }, - { - "name": "mem2", - "calc": "0.000001*(DFC0+DFC1+DFC2+DFC3)*64.0/time", - "socket_scope": true, - "publish": false - } - ] - } - ], - "globalmetrics": [ - { - "name": "mem_bw", - "calc": "mem1+mem2", - "socket_scope": true, - "publish": true - } - ] - } -``` - -_Example config suitable for AMD Zen3_ - -The `likwid` collector reads hardware performance counters at a **hwthread** and **socket** level. The configuration looks quite complicated but it is basically copy&paste from [LIKWID's performance groups](https://github.com/RRZE-HPC/likwid/tree/master/groups). The collector made multiple iterations and tried to use the performance groups but it lacked flexibility. The current way of configuration provides most flexibility. - -The logic is as following: There are multiple eventsets, each consisting of a list of counters+events and a list of metrics. If you compare a common performance group with the example setting above, there is not much difference: -``` -EVENTSET -> "events": { -FIXC1 ACTUAL_CPU_CLOCK -> "FIXC1": "ACTUAL_CPU_CLOCK", -FIXC2 MAX_CPU_CLOCK -> "FIXC2": "MAX_CPU_CLOCK", -PMC0 RETIRED_INSTRUCTIONS -> "PMC0" : "RETIRED_INSTRUCTIONS", -PMC1 CPU_CLOCKS_UNHALTED -> "PMC1" : "CPU_CLOCKS_UNHALTED", -PMC2 RETIRED_SSE_AVX_FLOPS_ALL -> "PMC2": "RETIRED_SSE_AVX_FLOPS_ALL", -PMC3 MERGE -> "PMC3": "MERGE", - -> } -``` - -The metrics are following the same procedure: - -``` -METRICS -> "metrics": [ -IPC PMC0/PMC1 -> { - -> "name" : "IPC", - -> "calc" : "PMC0/PMC1", - -> "socket_scope": false, - -> "publish": true - -> } - -> ] -``` - -The `socket_scope` option tells whether it is submitted per socket or per hwthread. If a metric is only used for internal calculations, you can set `publish = false`. - -Since some metrics can only be gathered in multiple measurements (like the memory bandwidth on AMD Zen3 chips), configure multiple eventsets like in the example config and use the `globalmetrics` section to combine them. **Be aware** that the combination might be misleading because the "behavior" of a metric changes over time and the multiple measurements might count different computing phases. +* [`cpustat`](./cpustatMetric.md) +* [`memstat`](./memstatMetric.md) +* [`diskstat`](./diskstatMetric.md) +* [`loadavg`](./loadavgMetric.md) +* [`netstat`](./netstatMetric.md) +* [`ibstat`](./infinibandMetric.md) +* [`tempstat`](./tempMetric.md) +* [`lustre`](./lustreMetric.md) +* [`likwid`](./likwidMetric.md) +* [`nvidia`](./nvidiaMetric.md) +* [`customcmd`](./customCmdMetric.md) +* [`ipmistat`](./ipmiMetric.md) +* [`topprocs`](./topprocsMetric.md) ## Todos @@ -292,13 +38,15 @@ Since some metrics can only be gathered in multiple measurements (like the memor # Contributing own collectors A collector reads data from any source, parses it to metrics and submits these metrics to the `metric-collector`. A collector provides three function: -* `Init(config []byte) error`: Initializes the collector using the given collector-specific config in JSON. -* `Read(duration time.Duration, out *[]lp.MutableMetric) error`: Read, parse and submit data to the `out` list. If the collector has to measure anything for some duration, use the provided function argument `duration`. +* `Name() string`: Return the name of the collector +* `Init(config json.RawMessage) error`: Initializes the collector using the given collector-specific config in JSON. Check if needed files/commands exists, ... +* `Initialized() bool`: Check if a collector is successfully initialized +* `Read(duration time.Duration, output chan ccMetric.CCMetric)`: Read, parse and submit data to the `output` channel as [`CCMetric`](../internal/ccMetric/README.md). If the collector has to measure anything for some duration, use the provided function argument `duration`. * `Close()`: Closes down the collector. It is recommanded to call `setup()` in the `Init()` function. -Finally, the collector needs to be registered in the `metric-collector.go`. There is a list of collectors called `Collectors` which is a map (string -> pointer to collector). Add a new entry with a descriptive name and the new collector. +Finally, the collector needs to be registered in the `collectorManager.go`. There is a list of collectors called `AvailableCollectors` which is a map (`collector_type_string` -> `pointer to MetricCollector interface`). Add a new entry with a descriptive name and the new collector. ## Sample collector @@ -307,8 +55,9 @@ package collectors import ( "encoding/json" - lp "github.com/influxdata/line-protocol" "time" + + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) // Struct for the collector-specific JSON config @@ -317,11 +66,11 @@ type SampleCollectorConfig struct { } type SampleCollector struct { - MetricCollector + metricCollector config SampleCollectorConfig } -func (m *SampleCollector) Init(config []byte) error { +func (m *SampleCollector) Init(config json.RawMessage) error { m.name = "SampleCollector" m.setup() if len(config) > 0 { @@ -330,11 +79,13 @@ func (m *SampleCollector) Init(config []byte) error { return err } } + m.meta = map[string]string{"source": m.name, "group": "Sample"} + m.init = true return nil } -func (m *SampleCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -342,9 +93,9 @@ func (m *SampleCollector) Read(interval time.Duration, out *[]lp.MutableMetric) tags := map[string]string{"type" : "node"} // Each metric has exactly one field: value ! value := map[string]interface{}{"value": int(x)} - y, err := lp.New("sample_metric", tags, value, time.Now()) + y, err := lp.New("sample_metric", tags, m.meta, value, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go new file mode 100644 index 0000000..9543431 --- /dev/null +++ b/collectors/collectorManager.go @@ -0,0 +1,143 @@ +package collectors + +import ( + "encoding/json" + "log" + "os" + "sync" + "time" + + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker" +) + +var AvailableCollectors = map[string]MetricCollector{ + + "likwid": &LikwidCollector{}, + "loadavg": &LoadavgCollector{}, + "memstat": &MemstatCollector{}, + "netstat": &NetstatCollector{}, + "ibstat": &InfinibandCollector{}, + "lustrestat": &LustreCollector{}, + "cpustat": &CpustatCollector{}, + "topprocs": &TopProcsCollector{}, + "nvidia": &NvidiaCollector{}, + "customcmd": &CustomCmdCollector{}, + "diskstat": &DiskstatCollector{}, + "tempstat": &TempCollector{}, + "ipmistat": &IpmiCollector{}, + "gpfs": new(GpfsCollector), + "cpufreq": new(CPUFreqCollector), + "cpufreq_cpuinfo": new(CPUFreqCpuInfoCollector), + "nfsstat": new(NfsCollector), +} + +type collectorManager struct { + collectors []MetricCollector + output chan lp.CCMetric + done chan bool + ticker mct.MultiChanTicker + duration time.Duration + wg *sync.WaitGroup + config map[string]json.RawMessage +} + +type CollectorManager interface { + Init(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) error + AddOutput(output chan lp.CCMetric) + Start() + Close() +} + +func (cm *collectorManager) Init(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) error { + cm.collectors = make([]MetricCollector, 0) + cm.output = nil + cm.done = make(chan bool) + cm.wg = wg + cm.ticker = ticker + cm.duration = duration + configFile, err := os.Open(collectConfigFile) + if err != nil { + log.Print(err.Error()) + return err + } + defer configFile.Close() + jsonParser := json.NewDecoder(configFile) + err = jsonParser.Decode(&cm.config) + if err != nil { + log.Print(err.Error()) + return err + } + for k, cfg := range cm.config { + log.Print(k, " ", cfg) + if _, found := AvailableCollectors[k]; !found { + log.Print("[CollectorManager] SKIP unknown collector ", k) + continue + } + c := AvailableCollectors[k] + + err = c.Init(cfg) + if err != nil { + log.Print("[CollectorManager] Collector ", k, "initialization failed: ", err.Error()) + continue + } + cm.collectors = append(cm.collectors, c) + } + return nil +} + +func (cm *collectorManager) Start() { + cm.wg.Add(1) + tick := make(chan time.Time) + cm.ticker.AddChannel(tick) + go func() { + for { + CollectorManagerLoop: + select { + case <-cm.done: + for _, c := range cm.collectors { + c.Close() + } + cm.wg.Done() + log.Print("[CollectorManager] DONE\n") + break CollectorManagerLoop + case t := <-tick: + for _, c := range cm.collectors { + CollectorManagerInputLoop: + select { + case <-cm.done: + for _, c := range cm.collectors { + c.Close() + } + cm.wg.Done() + log.Print("[CollectorManager] DONE\n") + break CollectorManagerInputLoop + default: + log.Print("[CollectorManager] ", c.Name(), " ", t) + c.Read(cm.duration, cm.output) + } + } + } + } + log.Print("[CollectorManager] EXIT\n") + }() + log.Print("[CollectorManager] STARTED\n") +} + +func (cm *collectorManager) AddOutput(output chan lp.CCMetric) { + cm.output = output +} + +func (cm *collectorManager) Close() { + cm.done <- true + log.Print("[CollectorManager] CLOSE") +} + +func New(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) (CollectorManager, error) { + cm := &collectorManager{} + err := cm.Init(ticker, duration, wg, collectConfigFile) + if err != nil { + return nil, err + } + return cm, err +} diff --git a/collectors/cpufreqCpuinfoMetric.go b/collectors/cpufreqCpuinfoMetric.go index e8cd0fc..9c91a50 100644 --- a/collectors/cpufreqCpuinfoMetric.go +++ b/collectors/cpufreqCpuinfoMetric.go @@ -2,14 +2,16 @@ package collectors import ( "bufio" + "encoding/json" + "fmt" "log" "os" "strconv" "strings" "time" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" - lp "github.com/influxdata/line-protocol" ) // @@ -33,12 +35,16 @@ type CPUFreqCpuInfoCollectorTopology struct { } type CPUFreqCpuInfoCollector struct { - MetricCollector + metricCollector topology []CPUFreqCpuInfoCollectorTopology } -func (m *CPUFreqCpuInfoCollector) Init(config []byte) error { +func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error { m.name = "CPUFreqCpuInfoCollector" + m.meta = map[string]string{ + "source": m.name, + "group": "cpufreq", + } const cpuInfoFile = "/proc/cpuinfo" file, err := os.Open(cpuInfoFile) @@ -145,7 +151,8 @@ func (m *CPUFreqCpuInfoCollector) Init(config []byte) error { return nil } -func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { + +func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -174,9 +181,9 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, out *[]lp.Mutable log.Printf("Failed to convert cpu MHz to float: %v", err) return } - y, err := lp.New("cpufreq", t.tagSet, map[string]interface{}{"value": value}, now) + y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now) if err == nil { - *out = append(*out, y) + output <- y } } processorCounter++ diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index fcab782..5febed9 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -10,8 +10,7 @@ import ( "strconv" "strings" "time" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" "golang.org/x/sys/unix" ) @@ -56,14 +55,14 @@ type CPUFreqCollectorTopology struct { // See: https://www.kernel.org/doc/html/latest/admin-guide/pm/cpufreq.html // type CPUFreqCollector struct { - MetricCollector + metricCollector topology []CPUFreqCollectorTopology config struct { ExcludeMetrics []string `json:"exclude_metrics,omitempty"` } } -func (m *CPUFreqCollector) Init(config []byte) error { +func (m *CPUFreqCollector) Init(config json.RawMessage) error { m.name = "CPUFreqCollector" m.setup() if len(config) > 0 { @@ -72,6 +71,10 @@ func (m *CPUFreqCollector) Init(config []byte) error { return err } } + m.meta = map[string]string{ + "source": m.name, + "group": "CPU Frequency", + } // Loop for all CPU directories baseDir := "/sys/devices/system/cpu" @@ -179,7 +182,7 @@ func (m *CPUFreqCollector) Init(config []byte) error { return nil } -func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -205,9 +208,9 @@ func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) continue } - y, err := lp.New("cpufreq", t.tagSet, map[string]interface{}{"value": cpuFreq}, now) + y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now) if err == nil { - *out = append(*out, y) + output <- y } } } diff --git a/collectors/cpustatMetric.go b/collectors/cpustatMetric.go index 64b5842..f517300 100644 --- a/collectors/cpustatMetric.go +++ b/collectors/cpustatMetric.go @@ -7,8 +7,7 @@ import ( "strconv" "strings" "time" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) const CPUSTATFILE = `/proc/stat` @@ -18,13 +17,14 @@ type CpustatCollectorConfig struct { } type CpustatCollector struct { - MetricCollector + metricCollector config CpustatCollectorConfig } -func (m *CpustatCollector) Init(config []byte) error { +func (m *CpustatCollector) Init(config json.RawMessage) error { m.name = "CpustatCollector" m.setup() + m.meta = map[string]string{"source": m.name, "group": "CPU"} if len(config) > 0 { err := json.Unmarshal(config, &m.config) if err != nil { @@ -35,7 +35,7 @@ func (m *CpustatCollector) Init(config []byte) error { return nil } -func ParseStatLine(line string, cpu int, exclude []string, out *[]lp.MutableMetric) { +func (c *CpustatCollector) parseStatLine(line string, cpu int, exclude []string, output chan lp.CCMetric) { ls := strings.Fields(line) matches := []string{"", "cpu_user", "cpu_nice", "cpu_system", "cpu_idle", "cpu_iowait", "cpu_irq", "cpu_softirq", "cpu_steal", "cpu_guest", "cpu_guest_nice"} for _, ex := range exclude { @@ -52,16 +52,16 @@ func ParseStatLine(line string, cpu int, exclude []string, out *[]lp.MutableMetr if len(m) > 0 { x, err := strconv.ParseInt(ls[i], 0, 64) if err == nil { - y, err := lp.New(m, tags, map[string]interface{}{"value": int(x)}, time.Now()) + y, err := lp.New(m, tags, c.meta, map[string]interface{}{"value": int(x)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } } } -func (m *CpustatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -78,11 +78,11 @@ func (m *CpustatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) } ls := strings.Fields(line) if strings.Compare(ls[0], "cpu") == 0 { - ParseStatLine(line, -1, m.config.ExcludeMetrics, out) + m.parseStatLine(line, -1, m.config.ExcludeMetrics, output) } else if strings.HasPrefix(ls[0], "cpu") { cpustr := strings.TrimLeft(ls[0], "cpu") cpu, _ := strconv.Atoi(cpustr) - ParseStatLine(line, cpu, m.config.ExcludeMetrics, out) + m.parseStatLine(line, cpu, m.config.ExcludeMetrics, output) } } } diff --git a/collectors/cpustatMetric.md b/collectors/cpustatMetric.md new file mode 100644 index 0000000..604445a --- /dev/null +++ b/collectors/cpustatMetric.md @@ -0,0 +1,23 @@ + +## `cpustat` collector +```json + "netstat": { + "exclude_metrics": [ + "cpu_idle" + ] + } +``` + +The `cpustat` collector reads data from `/proc/stats` and outputs a handful **node** and **hwthread** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. + +Metrics: +* `cpu_user` +* `cpu_nice` +* `cpu_system` +* `cpu_idle` +* `cpu_iowait` +* `cpu_irq` +* `cpu_softirq` +* `cpu_steal` +* `cpu_guest` +* `cpu_guest_nice` diff --git a/collectors/customCmdMetric.go b/collectors/customCmdMetric.go index e11f4c7..ffe8b73 100644 --- a/collectors/customCmdMetric.go +++ b/collectors/customCmdMetric.go @@ -9,7 +9,8 @@ import ( "strings" "time" - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + influx "github.com/influxdata/line-protocol" ) const CUSTOMCMDPATH = `/home/unrz139/Work/cc-metric-collector/collectors/custom` @@ -21,17 +22,18 @@ type CustomCmdCollectorConfig struct { } type CustomCmdCollector struct { - MetricCollector - handler *lp.MetricHandler - parser *lp.Parser + metricCollector + handler *influx.MetricHandler + parser *influx.Parser config CustomCmdCollectorConfig commands []string files []string } -func (m *CustomCmdCollector) Init(config []byte) error { +func (m *CustomCmdCollector) Init(config json.RawMessage) error { var err error m.name = "CustomCmdCollector" + m.meta = map[string]string{"source": m.name, "group": "Custom"} if len(config) > 0 { err = json.Unmarshal(config, &m.config) if err != nil { @@ -61,8 +63,8 @@ func (m *CustomCmdCollector) Init(config []byte) error { if len(m.files) == 0 && len(m.commands) == 0 { return errors.New("No metrics to collect") } - m.handler = lp.NewMetricHandler() - m.parser = lp.NewParser(m.handler) + m.handler = influx.NewMetricHandler() + m.parser = influx.NewParser(m.handler) m.parser.SetTimeFunc(DefaultTime) m.init = true return nil @@ -72,7 +74,7 @@ var DefaultTime = func() time.Time { return time.Unix(42, 0) } -func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -95,9 +97,9 @@ func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetri if skip { continue } - y, err := lp.New(c.Name(), Tags2Map(c), Fields2Map(c), c.Time()) + y, err := lp.New(c.Name(), Tags2Map(c), m.meta, Fields2Map(c), c.Time()) if err == nil { - *out = append(*out, y) + output <- y } } } @@ -117,9 +119,9 @@ func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetri if skip { continue } - y, err := lp.New(f.Name(), Tags2Map(f), Fields2Map(f), f.Time()) + y, err := lp.New(f.Name(), Tags2Map(f), m.meta, Fields2Map(f), f.Time()) if err == nil { - *out = append(*out, y) + output <- y } } } diff --git a/collectors/customCmdMetric.md b/collectors/customCmdMetric.md new file mode 100644 index 0000000..011135d --- /dev/null +++ b/collectors/customCmdMetric.md @@ -0,0 +1,20 @@ + +## `customcmd` collector + +```json + "customcmd": { + "exclude_metrics": [ + "mymetric" + ], + "files" : [ + "/var/run/myapp.metrics" + ], + "commands" : [ + "/usr/local/bin/getmetrics.pl" + ] + } +``` + +The `customcmd` collector reads data from files and the output of executed commands. The files and commands can output multiple metrics (separated by newline) but the have to be in the [InfluxDB line protocol](https://docs.influxdata.com/influxdb/cloud/reference/syntax/line-protocol/). If a metric is not parsable, it is skipped. If a metric is not required, it can be excluded from forwarding it to the sink. + + diff --git a/collectors/diskstatMetric.go b/collectors/diskstatMetric.go index 4cbd3c6..50c41cd 100644 --- a/collectors/diskstatMetric.go +++ b/collectors/diskstatMetric.go @@ -2,9 +2,7 @@ package collectors import ( "io/ioutil" - - lp "github.com/influxdata/line-protocol" - + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" // "log" "encoding/json" "errors" @@ -21,14 +19,15 @@ type DiskstatCollectorConfig struct { } type DiskstatCollector struct { - MetricCollector + metricCollector matches map[int]string config DiskstatCollectorConfig } -func (m *DiskstatCollector) Init(config []byte) error { +func (m *DiskstatCollector) Init(config json.RawMessage) error { var err error m.name = "DiskstatCollector" + m.meta = map[string]string{"source": m.name, "group": "Disk"} m.setup() if len(config) > 0 { err = json.Unmarshal(config, &m.config) @@ -73,7 +72,7 @@ func (m *DiskstatCollector) Init(config []byte) error { return err } -func (m *DiskstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric) { var lines []string if !m.init { return @@ -101,9 +100,9 @@ func (m *DiskstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric if idx < len(f) { x, err := strconv.ParseInt(f[idx], 0, 64) if err == nil { - y, err := lp.New(name, tags, map[string]interface{}{"value": int(x)}, time.Now()) + y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": int(x)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } diff --git a/collectors/diskstatMetric.md b/collectors/diskstatMetric.md new file mode 100644 index 0000000..1ac341d --- /dev/null +++ b/collectors/diskstatMetric.md @@ -0,0 +1,34 @@ + +## `diskstat` collector + +```json + "diskstat": { + "exclude_metrics": [ + "read_ms" + ], + } +``` + +The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. + +Metrics: +* `reads` +* `reads_merged` +* `read_sectors` +* `read_ms` +* `writes` +* `writes_merged` +* `writes_sectors` +* `writes_ms` +* `ioops` +* `ioops_ms` +* `ioops_weighted_ms` +* `discards` +* `discards_merged` +* `discards_sectors` +* `discards_ms` +* `flushes` +* `flushes_ms` + +The device name is added as tag `device`. + diff --git a/collectors/gpfsMetric.go b/collectors/gpfsMetric.go index fbf3a63..f1d3d75 100644 --- a/collectors/gpfsMetric.go +++ b/collectors/gpfsMetric.go @@ -13,18 +13,20 @@ import ( "strconv" "strings" "time" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) type GpfsCollector struct { - MetricCollector + metricCollector + tags map[string]string + config struct { Mmpmon string `json:"mmpmon"` } } -func (m *GpfsCollector) Init(config []byte) error { + +func (m *GpfsCollector) Init(config json.RawMessage) error { var err error m.name = "GpfsCollector" m.setup() @@ -40,6 +42,14 @@ func (m *GpfsCollector) Init(config []byte) error { return err } } + m.meta = map[string]string{ + "source": m.name, + "group": "GPFS", + } + m.tags = map[string]string{ + "type": "node", + "filesystem": "", + } // GPFS / IBM Spectrum Scale file system statistics can only be queried by user root user, err := user.Current() @@ -60,7 +70,7 @@ func (m *GpfsCollector) Init(config []byte) error { return nil } -func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -108,6 +118,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { continue } + m.tags["filesystem"] = filesystem + + // return code rc, err := strconv.Atoi(key_value["_rc_"]) if err != nil { @@ -140,17 +153,10 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { key_value["_br_"], err.Error()) continue } - y, err := lp.New( - "gpfs_bytes_read", - map[string]string{ - "filesystem": filesystem, - }, - map[string]interface{}{ - "value": bytesRead, - }, - timestamp) + + y, err := lp.New("gpfs_bytes_read", m.tags, m.meta, map[string]interface{}{"value": bytesRead}, timestamp) if err == nil { - *out = append(*out, y) + output <- y } // bytes written @@ -161,17 +167,10 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { key_value["_bw_"], err.Error()) continue } - y, err = lp.New( - "gpfs_bytes_written", - map[string]string{ - "filesystem": filesystem, - }, - map[string]interface{}{ - "value": bytesWritten, - }, - timestamp) + + y, err = lp.New("gpfs_bytes_written", m.tags, m.meta, map[string]interface{}{"value": bytesWritten}, timestamp) if err == nil { - *out = append(*out, y) + output <- y } // number of opens @@ -182,17 +181,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { key_value["_oc_"], err.Error()) continue } - y, err = lp.New( - "gpfs_num_opens", - map[string]string{ - "filesystem": filesystem, - }, - map[string]interface{}{ - "value": numOpens, - }, - timestamp) + y, err = lp.New("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp) if err == nil { - *out = append(*out, y) + output <- y } // number of closes @@ -201,17 +192,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of closes: %s\n", err.Error()) continue } - y, err = lp.New( - "gpfs_num_closes", - map[string]string{ - "filesystem": filesystem, - }, - map[string]interface{}{ - "value": numCloses, - }, - timestamp) + y, err = lp.New("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp) if err == nil { - *out = append(*out, y) + output <- y } // number of reads @@ -220,17 +203,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of reads: %s\n", err.Error()) continue } - y, err = lp.New( - "gpfs_num_reads", - map[string]string{ - "filesystem": filesystem, - }, - map[string]interface{}{ - "value": numReads, - }, - timestamp) + y, err = lp.New("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp) if err == nil { - *out = append(*out, y) + output <- y } // number of writes @@ -239,17 +214,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of writes: %s\n", err.Error()) continue } - y, err = lp.New( - "gpfs_num_writes", - map[string]string{ - "filesystem": filesystem, - }, - map[string]interface{}{ - "value": numWrites, - }, - timestamp) + y, err = lp.New("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp) if err == nil { - *out = append(*out, y) + output <- y } // number of read directories @@ -258,17 +225,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert number of read directories: %s\n", err.Error()) continue } - y, err = lp.New( - "gpfs_num_readdirs", - map[string]string{ - "filesystem": filesystem, - }, - map[string]interface{}{ - "value": numReaddirs, - }, - timestamp) + y, err = lp.New("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp) if err == nil { - *out = append(*out, y) + output <- y } // Number of inode updates @@ -277,17 +236,9 @@ func (m *GpfsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { fmt.Fprintf(os.Stderr, "GpfsCollector.Read(): Failed to convert Number of inode updates: %s\n", err.Error()) continue } - y, err = lp.New( - "gpfs_num_inode_updates", - map[string]string{ - "filesystem": filesystem, - }, - map[string]interface{}{ - "value": numInodeUpdates, - }, - timestamp) + y, err = lp.New("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp) if err == nil { - *out = append(*out, y) + output <- y } } } diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index db7c129..af4e579 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -5,9 +5,7 @@ import ( "io/ioutil" "log" "os/exec" - - lp "github.com/influxdata/line-protocol" - + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" // "os" "encoding/json" "errors" @@ -28,7 +26,7 @@ type InfinibandCollectorConfig struct { } type InfinibandCollector struct { - MetricCollector + metricCollector tags map[string]string lids map[string]map[string]string config InfinibandCollectorConfig @@ -56,11 +54,12 @@ func (m *InfinibandCollector) Help() { fmt.Println("- ib_xmit_pkts") } -func (m *InfinibandCollector) Init(config []byte) error { +func (m *InfinibandCollector) Init(config json.RawMessage) error { var err error m.name = "InfinibandCollector" m.use_perfquery = false m.setup() + m.meta = map[string]string{"source": m.name, "group": "Network"} m.tags = map[string]string{"type": "node"} if len(config) > 0 { err = json.Unmarshal(config, &m.config) @@ -117,7 +116,7 @@ func (m *InfinibandCollector) Init(config []byte) error { return err } -func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[string]string, out *[]lp.MutableMetric) error { +func (m *InfinibandCollector) doPerfQuery(cmd string, dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error { args := fmt.Sprintf("-r %s %s 0xf000", lid, port) command := exec.Command(cmd, args) @@ -134,9 +133,9 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin lv := strings.Fields(line) v, err := strconv.ParseFloat(lv[1], 64) if err == nil { - y, err := lp.New("ib_recv", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } @@ -144,9 +143,9 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin lv := strings.Fields(line) v, err := strconv.ParseFloat(lv[1], 64) if err == nil { - y, err := lp.New("ib_xmit", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } @@ -154,9 +153,9 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin lv := strings.Fields(line) v, err := strconv.ParseFloat(lv[1], 64) if err == nil { - y, err := lp.New("ib_recv_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } @@ -164,9 +163,29 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin lv := strings.Fields(line) v, err := strconv.ParseFloat(lv[1], 64) if err == nil { - y, err := lp.New("ib_xmit_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y + } + } + } + if strings.HasPrefix(line, "PortRcvPkts") || strings.HasPrefix(line, "RcvPkts") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + if strings.HasPrefix(line, "PortXmitPkts") || strings.HasPrefix(line, "XmtPkts") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y } } } @@ -174,16 +193,16 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin return nil } -func DoSysfsRead(dev string, lid string, port string, tags map[string]string, out *[]lp.MutableMetric) error { +func (m *InfinibandCollector) doSysfsRead(dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error { path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IBBASEPATH), dev, port) buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path)) if err == nil { data := strings.Replace(string(buffer), "\n", "", -1) v, err := strconv.ParseFloat(data, 64) if err == nil { - y, err := lp.New("ib_recv", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } @@ -192,9 +211,9 @@ func DoSysfsRead(dev string, lid string, port string, tags map[string]string, ou data := strings.Replace(string(buffer), "\n", "", -1) v, err := strconv.ParseFloat(data, 64) if err == nil { - y, err := lp.New("ib_xmit", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } @@ -203,9 +222,9 @@ func DoSysfsRead(dev string, lid string, port string, tags map[string]string, ou data := strings.Replace(string(buffer), "\n", "", -1) v, err := strconv.ParseFloat(data, 64) if err == nil { - y, err := lp.New("ib_recv_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } @@ -214,71 +233,29 @@ func DoSysfsRead(dev string, lid string, port string, tags map[string]string, ou data := strings.Replace(string(buffer), "\n", "", -1) v, err := strconv.ParseFloat(data, 64) if err == nil { - y, err := lp.New("ib_xmit_pkts", tags, map[string]interface{}{"value": float64(v)}, time.Now()) + y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } return nil } -func (m *InfinibandCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) { if m.init { for dev, ports := range m.lids { for port, lid := range ports { tags := map[string]string{"type": "node", "device": dev, "port": port} if m.use_perfquery { - DoPerfQuery(m.config.PerfQueryPath, dev, lid, port, tags, out) + m.doPerfQuery(m.config.PerfQueryPath, dev, lid, port, tags, output) } else { - DoSysfsRead(dev, lid, port, tags, out) + m.doSysfsRead(dev, lid, port, tags, output) } } } } - - // buffer, err := ioutil.ReadFile(string(LIDFILE)) - - // if err != nil { - // log.Print(err) - // return - // } - - // args := fmt.Sprintf("-r %s 1 0xf000", string(buffer)) - - // command := exec.Command(PERFQUERY, args) - // command.Wait() - // stdout, err := command.Output() - // if err != nil { - // log.Print(err) - // return - // } - - // ll := strings.Split(string(stdout), "\n") - - // for _, line := range ll { - // if strings.HasPrefix(line, "PortRcvData") || strings.HasPrefix(line, "RcvData") { - // lv := strings.Fields(line) - // v, err := strconv.ParseFloat(lv[1], 64) - // if err == nil { - // y, err := lp.New("ib_recv", m.tags, map[string]interface{}{"value": float64(v)}, time.Now()) - // if err == nil { - // *out = append(*out, y) - // } - // } - // } - // if strings.HasPrefix(line, "PortXmitData") || strings.HasPrefix(line, "XmtData") { - // lv := strings.Fields(line) - // v, err := strconv.ParseFloat(lv[1], 64) - // if err == nil { - // y, err := lp.New("ib_xmit", m.tags, map[string]interface{}{"value": float64(v)}, time.Now()) - // if err == nil { - // *out = append(*out, y) - // } - // } - // } - // } } func (m *InfinibandCollector) Close() { diff --git a/collectors/infinibandMetric.md b/collectors/infinibandMetric.md new file mode 100644 index 0000000..e9ba043 --- /dev/null +++ b/collectors/infinibandMetric.md @@ -0,0 +1,19 @@ + +## `ibstat` collector + +```json + "ibstat": { + "perfquery_path" : "", + "exclude_devices": [ + "mlx4" + ] + } +``` + +The `ibstat` collector reads either data through the `perfquery` command or the sysfs files below `/sys/class/infiniband/`. + +Metrics: +* `ib_recv` +* `ib_xmit` + +The collector adds a `device` tag to all metrics diff --git a/collectors/ipmiMetric.go b/collectors/ipmiMetric.go index 3179148..f4c5167 100644 --- a/collectors/ipmiMetric.go +++ b/collectors/ipmiMetric.go @@ -9,8 +9,7 @@ import ( "strconv" "strings" "time" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) const IPMITOOL_PATH = `/usr/bin/ipmitool` @@ -23,15 +22,16 @@ type IpmiCollectorConfig struct { } type IpmiCollector struct { - MetricCollector + metricCollector tags map[string]string matches map[string]string config IpmiCollectorConfig } -func (m *IpmiCollector) Init(config []byte) error { +func (m *IpmiCollector) Init(config json.RawMessage) error { m.name = "IpmiCollector" m.setup() + m.meta = map[string]string{"source": m.name, "group": "IPMI"} if len(config) > 0 { err := json.Unmarshal(config, &m.config) if err != nil { @@ -53,7 +53,7 @@ func (m *IpmiCollector) Init(config []byte) error { return nil } -func ReadIpmiTool(cmd string, out *[]lp.MutableMetric) { +func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) { command := exec.Command(cmd, "sensor") command.Wait() stdout, err := command.Output() @@ -74,24 +74,25 @@ func ReadIpmiTool(cmd string, out *[]lp.MutableMetric) { name := strings.ToLower(strings.Replace(strings.Trim(lv[0], " "), " ", "_", -1)) unit := strings.Trim(lv[2], " ") if unit == "Volts" { - unit = "V" + unit = "Volts" } else if unit == "degrees C" { - unit = "C" + unit = "degC" } else if unit == "degrees F" { - unit = "F" + unit = "degF" } else if unit == "Watts" { - unit = "W" + unit = "Watts" } - y, err := lp.New(name, map[string]string{"unit": unit, "type": "node"}, map[string]interface{}{"value": v}, time.Now()) + y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now()) if err == nil { - *out = append(*out, y) + y.AddMeta("unit", unit) + output <- y } } } } -func ReadIpmiSensors(cmd string, out *[]lp.MutableMetric) { +func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) { command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate") command.Wait() @@ -109,25 +110,28 @@ func ReadIpmiSensors(cmd string, out *[]lp.MutableMetric) { v, err := strconv.ParseFloat(lv[3], 64) if err == nil { name := strings.ToLower(strings.Replace(lv[1], " ", "_", -1)) - y, err := lp.New(name, map[string]string{"unit": lv[4], "type": "node"}, map[string]interface{}{"value": v}, time.Now()) + y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now()) if err == nil { - *out = append(*out, y) + if len(lv) > 4 { + y.AddMeta("unit", lv[4]) + } + output <- y } } } } } -func (m *IpmiCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMetric) { if len(m.config.IpmitoolPath) > 0 { _, err := os.Stat(m.config.IpmitoolPath) if err == nil { - ReadIpmiTool(m.config.IpmitoolPath, out) + m.readIpmiTool(m.config.IpmitoolPath, output) } } else if len(m.config.IpmisensorsPath) > 0 { _, err := os.Stat(m.config.IpmisensorsPath) if err == nil { - ReadIpmiSensors(m.config.IpmisensorsPath, out) + m.readIpmiSensors(m.config.IpmisensorsPath, output) } } } diff --git a/collectors/ipmiMetric.md b/collectors/ipmiMetric.md new file mode 100644 index 0000000..fe83759 --- /dev/null +++ b/collectors/ipmiMetric.md @@ -0,0 +1,16 @@ + +## `ipmistat` collector + +```json + "ipmistat": { + "ipmitool_path": "/path/to/ipmitool", + "ipmisensors_path": "/path/to/ipmi-sensors", + } +``` + +The `ipmistat` collector reads data from `ipmitool` (`ipmitool sensor`) or `ipmi-sensors` (`ipmi-sensors --sdr-cache-recreate --comma-separated-output`). + +The metrics depend on the output of the underlying tools but contain temperature, power and energy metrics. + + + diff --git a/collectors/likwidMetric.go b/collectors/likwidMetric.go index 45fe68c..430a09b 100644 --- a/collectors/likwidMetric.go +++ b/collectors/likwidMetric.go @@ -20,16 +20,28 @@ import ( "strings" "time" "unsafe" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" "gopkg.in/Knetic/govaluate.v2" ) +type MetricScope int + +const ( + METRIC_SCOPE_HWTHREAD = iota + METRIC_SCOPE_SOCKET + METRIC_SCOPE_NUMA + METRIC_SCOPE_NODE +) + +func (ms MetricScope) String() string { + return []string{"Head", "Shoulder", "Knee", "Toe"}[ms] +} + type LikwidCollectorMetricConfig struct { - Name string `json:"name"` - Calc string `json:"calc"` - Socket_scope bool `json:"socket_scope"` - Publish bool `json:"publish"` + Name string `json:"name"` + Calc string `json:"calc"` + Scope MetricScope `json:"socket_scope"` + Publish bool `json:"publish"` } type LikwidCollectorEventsetConfig struct { @@ -45,7 +57,7 @@ type LikwidCollectorConfig struct { } type LikwidCollector struct { - MetricCollector + metricCollector cpulist []C.int sock2tid map[int]int metrics map[C.int]map[string]int @@ -105,7 +117,7 @@ func getSocketCpus() map[C.int]int { return outmap } -func (m *LikwidCollector) Init(config []byte) error { +func (m *LikwidCollector) Init(config json.RawMessage) error { var ret C.int m.name = "LikwidCollector" if len(config) > 0 { @@ -115,11 +127,13 @@ func (m *LikwidCollector) Init(config []byte) error { } } m.setup() + m.meta = map[string]string{"source": m.name, "group": "PerfCounter"} cpulist := CpuList() m.cpulist = make([]C.int, len(cpulist)) slist := getSocketCpus() m.sock2tid = make(map[int]int) + // m.numa2tid = make(map[int]int) for i, c := range cpulist { m.cpulist[i] = C.int(c) if sid, found := slist[m.cpulist[i]]; found { @@ -169,7 +183,7 @@ func (m *LikwidCollector) Init(config []byte) error { return nil } -func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -246,24 +260,28 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) for _, metric := range evset.Metrics { _, skip := stringArrayContains(m.config.ExcludeMetrics, metric.Name) if metric.Publish && !skip { - if metric.Socket_scope { + if metric.Scope.String() == "socket" { for sid, tid := range m.sock2tid { y, err := lp.New(metric.Name, - map[string]string{"type": "socket", "type-id": fmt.Sprintf("%d", int(sid))}, + map[string]string{"type": "socket", + "type-id": fmt.Sprintf("%d", int(sid))}, + m.meta, map[string]interface{}{"value": m.mresults[i][tid][metric.Name]}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } - } else { + } else if metric.Scope.String() == "hwthread" { for tid, cpu := range m.cpulist { y, err := lp.New(metric.Name, - map[string]string{"type": "cpu", "type-id": fmt.Sprintf("%d", int(cpu))}, + map[string]string{"type": "cpu", + "type-id": fmt.Sprintf("%d", int(cpu))}, + m.meta, map[string]interface{}{"value": m.mresults[i][tid][metric.Name]}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } @@ -273,24 +291,28 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) for _, metric := range m.config.Metrics { _, skip := stringArrayContains(m.config.ExcludeMetrics, metric.Name) if metric.Publish && !skip { - if metric.Socket_scope { + if metric.Scope.String() == "socket" { for sid, tid := range m.sock2tid { y, err := lp.New(metric.Name, - map[string]string{"type": "socket", "type-id": fmt.Sprintf("%d", int(sid))}, + map[string]string{"type": "socket", + "type-id": fmt.Sprintf("%d", int(sid))}, + m.meta, map[string]interface{}{"value": m.gmresults[tid][metric.Name]}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } else { for tid, cpu := range m.cpulist { y, err := lp.New(metric.Name, - map[string]string{"type": "cpu", "type-id": fmt.Sprintf("%d", int(cpu))}, + map[string]string{"type": "cpu", + "type-id": fmt.Sprintf("%d", int(cpu))}, + m.meta, map[string]interface{}{"value": m.gmresults[tid][metric.Name]}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } diff --git a/collectors/likwidMetric.md b/collectors/likwidMetric.md new file mode 100644 index 0000000..08b917f --- /dev/null +++ b/collectors/likwidMetric.md @@ -0,0 +1,119 @@ + +## `likwid` collector +```json + "likwid": { + "eventsets": [ + { + "events": { + "FIXC1": "ACTUAL_CPU_CLOCK", + "FIXC2": "MAX_CPU_CLOCK", + "PMC0": "RETIRED_INSTRUCTIONS", + "PMC1": "CPU_CLOCKS_UNHALTED", + "PMC2": "RETIRED_SSE_AVX_FLOPS_ALL", + "PMC3": "MERGE", + "DFC0": "DRAM_CHANNEL_0", + "DFC1": "DRAM_CHANNEL_1", + "DFC2": "DRAM_CHANNEL_2", + "DFC3": "DRAM_CHANNEL_3" + }, + "metrics": [ + { + "name": "ipc", + "calc": "PMC0/PMC1", + "socket_scope": false, + "publish": true + }, + { + "name": "flops_any", + "calc": "0.000001*PMC2/time", + "socket_scope": false, + "publish": true + }, + { + "name": "clock_mhz", + "calc": "0.000001*(FIXC1/FIXC2)/inverseClock", + "socket_scope": false, + "publish": true + }, + { + "name": "mem1", + "calc": "0.000001*(DFC0+DFC1+DFC2+DFC3)*64.0/time", + "socket_scope": true, + "publish": false + } + ] + }, + { + "events": { + "DFC0": "DRAM_CHANNEL_4", + "DFC1": "DRAM_CHANNEL_5", + "DFC2": "DRAM_CHANNEL_6", + "DFC3": "DRAM_CHANNEL_7", + "PWR0": "RAPL_CORE_ENERGY", + "PWR1": "RAPL_PKG_ENERGY" + }, + "metrics": [ + { + "name": "pwr_core", + "calc": "PWR0/time", + "socket_scope": false, + "publish": true + }, + { + "name": "pwr_pkg", + "calc": "PWR1/time", + "socket_scope": true, + "publish": true + }, + { + "name": "mem2", + "calc": "0.000001*(DFC0+DFC1+DFC2+DFC3)*64.0/time", + "socket_scope": true, + "publish": false + } + ] + } + ], + "globalmetrics": [ + { + "name": "mem_bw", + "calc": "mem1+mem2", + "socket_scope": true, + "publish": true + } + ] + } +``` + +_Example config suitable for AMD Zen3_ + +The `likwid` collector reads hardware performance counters at a **hwthread** and **socket** level. The configuration looks quite complicated but it is basically copy&paste from [LIKWID's performance groups](https://github.com/RRZE-HPC/likwid/tree/master/groups). The collector made multiple iterations and tried to use the performance groups but it lacked flexibility. The current way of configuration provides most flexibility. + +The logic is as following: There are multiple eventsets, each consisting of a list of counters+events and a list of metrics. If you compare a common performance group with the example setting above, there is not much difference: +``` +EVENTSET -> "events": { +FIXC1 ACTUAL_CPU_CLOCK -> "FIXC1": "ACTUAL_CPU_CLOCK", +FIXC2 MAX_CPU_CLOCK -> "FIXC2": "MAX_CPU_CLOCK", +PMC0 RETIRED_INSTRUCTIONS -> "PMC0" : "RETIRED_INSTRUCTIONS", +PMC1 CPU_CLOCKS_UNHALTED -> "PMC1" : "CPU_CLOCKS_UNHALTED", +PMC2 RETIRED_SSE_AVX_FLOPS_ALL -> "PMC2": "RETIRED_SSE_AVX_FLOPS_ALL", +PMC3 MERGE -> "PMC3": "MERGE", + -> } +``` + +The metrics are following the same procedure: + +``` +METRICS -> "metrics": [ +IPC PMC0/PMC1 -> { + -> "name" : "IPC", + -> "calc" : "PMC0/PMC1", + -> "socket_scope": false, + -> "publish": true + -> } + -> ] +``` + +The `socket_scope` option tells whether it is submitted per socket or per hwthread. If a metric is only used for internal calculations, you can set `publish = false`. + +Since some metrics can only be gathered in multiple measurements (like the memory bandwidth on AMD Zen3 chips), configure multiple eventsets like in the example config and use the `globalmetrics` section to combine them. **Be aware** that the combination might be misleading because the "behavior" of a metric changes over time and the multiple measurements might count different computing phases. diff --git a/collectors/loadavgMetric.go b/collectors/loadavgMetric.go index 1ecaea5..11c0e5e 100644 --- a/collectors/loadavgMetric.go +++ b/collectors/loadavgMetric.go @@ -6,8 +6,7 @@ import ( "strconv" "strings" "time" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) const LOADAVGFILE = `/proc/loadavg` @@ -17,14 +16,14 @@ type LoadavgCollectorConfig struct { } type LoadavgCollector struct { - MetricCollector + metricCollector tags map[string]string load_matches []string proc_matches []string config LoadavgCollectorConfig } -func (m *LoadavgCollector) Init(config []byte) error { +func (m *LoadavgCollector) Init(config json.RawMessage) error { m.name = "LoadavgCollector" m.setup() if len(config) > 0 { @@ -33,6 +32,7 @@ func (m *LoadavgCollector) Init(config []byte) error { return err } } + m.meta = map[string]string{"source": m.name, "group": "LOAD"} m.tags = map[string]string{"type": "node"} m.load_matches = []string{"load_one", "load_five", "load_fifteen"} m.proc_matches = []string{"proc_run", "proc_total"} @@ -40,7 +40,7 @@ func (m *LoadavgCollector) Init(config []byte) error { return nil } -func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric) { var skip bool if !m.init { return @@ -56,9 +56,9 @@ func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric) x, err := strconv.ParseFloat(ls[i], 64) if err == nil { _, skip = stringArrayContains(m.config.ExcludeMetrics, name) - y, err := lp.New(name, m.tags, map[string]interface{}{"value": float64(x)}, time.Now()) + y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": float64(x)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } } @@ -67,9 +67,9 @@ func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric) x, err := strconv.ParseFloat(lv[i], 64) if err == nil { _, skip = stringArrayContains(m.config.ExcludeMetrics, name) - y, err := lp.New(name, m.tags, map[string]interface{}{"value": float64(x)}, time.Now()) + y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": float64(x)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } } diff --git a/collectors/loadavgMetric.md b/collectors/loadavgMetric.md new file mode 100644 index 0000000..d2b3f50 --- /dev/null +++ b/collectors/loadavgMetric.md @@ -0,0 +1,19 @@ + +## `loadavg` collector + +```json + "loadavg": { + "exclude_metrics": [ + "proc_run" + ] + } +``` + +The `loadavg` collector reads data from `/proc/loadavg` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. + +Metrics: +* `load_one` +* `load_five` +* `load_fifteen` +* `proc_run` +* `proc_total` diff --git a/collectors/lustreMetric.go b/collectors/lustreMetric.go index 8931f84..3e248fa 100644 --- a/collectors/lustreMetric.go +++ b/collectors/lustreMetric.go @@ -8,8 +8,7 @@ import ( "strconv" "strings" "time" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) const LUSTREFILE = `/proc/fs/lustre/llite/lnec-XXXXXX/stats` @@ -20,14 +19,14 @@ type LustreCollectorConfig struct { } type LustreCollector struct { - MetricCollector + metricCollector tags map[string]string matches map[string]map[string]int devices []string config LustreCollectorConfig } -func (m *LustreCollector) Init(config []byte) error { +func (m *LustreCollector) Init(config json.RawMessage) error { var err error m.name = "LustreCollector" if len(config) > 0 { @@ -38,6 +37,7 @@ func (m *LustreCollector) Init(config []byte) error { } m.setup() m.tags = map[string]string{"type": "node"} + m.meta = map[string]string{"source": m.name, "group": "Lustre"} m.matches = map[string]map[string]int{"read_bytes": {"read_bytes": 6, "read_requests": 1}, "write_bytes": {"write_bytes": 6, "write_requests": 1}, "open": {"open": 1}, @@ -64,7 +64,7 @@ func (m *LustreCollector) Init(config []byte) error { return nil } -func (m *LustreCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -88,9 +88,12 @@ func (m *LustreCollector) Read(interval time.Duration, out *[]lp.MutableMetric) } x, err := strconv.ParseInt(lf[idx], 0, 64) if err == nil { - y, err := lp.New(name, m.tags, map[string]interface{}{"value": x}, time.Now()) + y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, time.Now()) if err == nil { - *out = append(*out, y) + if strings.Contains(name, "byte") { + y.AddMeta("unit", "Byte") + } + output <- y } } } diff --git a/collectors/lustreMetric.md b/collectors/lustreMetric.md new file mode 100644 index 0000000..0cb9fc8 --- /dev/null +++ b/collectors/lustreMetric.md @@ -0,0 +1,29 @@ + +## `lustrestat` collector + +```json + "lustrestat": { + "procfiles" : [ + "/proc/fs/lustre/llite/lnec-XXXXXX/stats" + ], + "exclude_metrics": [ + "setattr", + "getattr" + ] + } +``` + +The `lustrestat` collector reads from the procfs stat files for Lustre like `/proc/fs/lustre/llite/lnec-XXXXXX/stats`. + +Metrics: +* `read_bytes` +* `read_requests` +* `write_bytes` +* `write_requests` +* `open` +* `close` +* `getattr` +* `setattr` +* `statfs` +* `inode_permission` + diff --git a/collectors/memstatMetric.go b/collectors/memstatMetric.go index 17db13e..c83402c 100644 --- a/collectors/memstatMetric.go +++ b/collectors/memstatMetric.go @@ -9,8 +9,7 @@ import ( "strconv" "strings" "time" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) const MEMSTATFILE = `/proc/meminfo` @@ -20,14 +19,14 @@ type MemstatCollectorConfig struct { } type MemstatCollector struct { - MetricCollector + metricCollector stats map[string]int64 tags map[string]string matches map[string]string config MemstatCollectorConfig } -func (m *MemstatCollector) Init(config []byte) error { +func (m *MemstatCollector) Init(config json.RawMessage) error { var err error m.name = "MemstatCollector" if len(config) > 0 { @@ -36,6 +35,7 @@ func (m *MemstatCollector) Init(config []byte) error { return err } } + m.meta = map[string]string{"source": m.name, "group": "Memory", "unit": "kByte"} m.stats = make(map[string]int64) m.matches = make(map[string]string) m.tags = map[string]string{"type": "node"} @@ -65,7 +65,7 @@ func (m *MemstatCollector) Init(config []byte) error { return err } -func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -97,9 +97,9 @@ func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) log.Print(err) continue } - y, err := lp.New(name, m.tags, map[string]interface{}{"value": int(float64(m.stats[match]) * 1.0e-3)}, time.Now()) + y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": int(float64(m.stats[match]) * 1.0e-3)}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } @@ -108,18 +108,18 @@ func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) if _, cached := m.stats[`Cached`]; cached { memUsed := m.stats[`MemTotal`] - (m.stats[`MemFree`] + m.stats[`Buffers`] + m.stats[`Cached`]) _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used") - y, err := lp.New("mem_used", m.tags, map[string]interface{}{"value": int(float64(memUsed) * 1.0e-3)}, time.Now()) + y, err := lp.New("mem_used", m.tags, m.meta, map[string]interface{}{"value": int(float64(memUsed) * 1.0e-3)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } } } if _, found := m.stats[`MemShared`]; found { _, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_shared") - y, err := lp.New("mem_shared", m.tags, map[string]interface{}{"value": int(float64(m.stats[`MemShared`]) * 1.0e-3)}, time.Now()) + y, err := lp.New("mem_shared", m.tags, m.meta, map[string]interface{}{"value": int(float64(m.stats[`MemShared`]) * 1.0e-3)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } } diff --git a/collectors/memstatMetric.md b/collectors/memstatMetric.md new file mode 100644 index 0000000..4b7b8c7 --- /dev/null +++ b/collectors/memstatMetric.md @@ -0,0 +1,27 @@ + +## `memstat` collector + +```json + "memstat": { + "exclude_metrics": [ + "mem_used" + ] + } +``` + +The `memstat` collector reads data from `/proc/meminfo` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. + + +Metrics: +* `mem_total` +* `mem_sreclaimable` +* `mem_slab` +* `mem_free` +* `mem_buffers` +* `mem_cached` +* `mem_available` +* `mem_shared` +* `swap_total` +* `swap_free` +* `mem_used` = `mem_total` - (`mem_free` + `mem_buffers` + `mem_cached`) + diff --git a/collectors/metricCollector.go b/collectors/metricCollector.go index 0228530..6bc9047 100644 --- a/collectors/metricCollector.go +++ b/collectors/metricCollector.go @@ -1,8 +1,10 @@ package collectors import ( + "encoding/json" "errors" - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + influx "github.com/influxdata/line-protocol" "io/ioutil" "log" "strconv" @@ -10,28 +12,30 @@ import ( "time" ) -type MetricGetter interface { +type MetricCollector interface { Name() string - Init(config []byte) error + Init(config json.RawMessage) error Initialized() bool - Read(time.Duration, *[]lp.MutableMetric) + Read(duration time.Duration, output chan lp.CCMetric) Close() } -type MetricCollector struct { - name string - init bool +type metricCollector struct { + output chan lp.CCMetric + name string + init bool + meta map[string]string } -func (c *MetricCollector) Name() string { +func (c *metricCollector) Name() string { return c.name } -func (c *MetricCollector) setup() error { +func (c *metricCollector) setup() error { return nil } -func (c *MetricCollector) Initialized() bool { +func (c *metricCollector) Initialized() bool { return c.init == true } @@ -103,7 +107,7 @@ func CpuList() []int { return cpulist } -func Tags2Map(metric lp.Metric) map[string]string { +func Tags2Map(metric influx.Metric) map[string]string { tags := make(map[string]string) for _, t := range metric.TagList() { tags[t.Key] = t.Value @@ -111,7 +115,7 @@ func Tags2Map(metric lp.Metric) map[string]string { return tags } -func Fields2Map(metric lp.Metric) map[string]interface{} { +func Fields2Map(metric influx.Metric) map[string]interface{} { fields := make(map[string]interface{}) for _, f := range metric.FieldList() { fields[f.Key] = f.Value diff --git a/collectors/netstatMetric.go b/collectors/netstatMetric.go index a273de1..86437ea 100644 --- a/collectors/netstatMetric.go +++ b/collectors/netstatMetric.go @@ -7,8 +7,7 @@ import ( "strconv" "strings" "time" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) const NETSTATFILE = `/proc/net/dev` @@ -18,14 +17,15 @@ type NetstatCollectorConfig struct { } type NetstatCollector struct { - MetricCollector + metricCollector config NetstatCollectorConfig matches map[int]string } -func (m *NetstatCollector) Init(config []byte) error { +func (m *NetstatCollector) Init(config json.RawMessage) error { m.name = "NetstatCollector" m.setup() + m.meta = map[string]string{"source": m.name, "group": "Memory"} m.matches = map[int]string{ 1: "bytes_in", 9: "bytes_out", @@ -46,7 +46,7 @@ func (m *NetstatCollector) Init(config []byte) error { return nil } -func (m *NetstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric) { data, err := ioutil.ReadFile(string(NETSTATFILE)) if err != nil { log.Print(err.Error()) @@ -73,9 +73,15 @@ func (m *NetstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) for i, name := range m.matches { v, err := strconv.ParseInt(f[i], 10, 0) if err == nil { - y, err := lp.New(name, tags, map[string]interface{}{"value": int(float64(v) * 1.0e-3)}, time.Now()) + y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": int(float64(v) * 1.0e-3)}, time.Now()) if err == nil { - *out = append(*out, y) + switch { + case strings.Contains(name, "byte"): + y.AddMeta("unit", "Byte") + case strings.Contains(name, "pkt"): + y.AddMeta("unit", "Packets") + } + output <- y } } } diff --git a/collectors/netstatMetric.md b/collectors/netstatMetric.md new file mode 100644 index 0000000..34a48fd --- /dev/null +++ b/collectors/netstatMetric.md @@ -0,0 +1,21 @@ + +## `netstat` collector + +```json + "netstat": { + "exclude_devices": [ + "lo" + ] + } +``` + +The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. If a device is not required, it can be excluded from forwarding it to the sink. Commonly the `lo` device should be excluded. + +Metrics: +* `bytes_in` +* `bytes_out` +* `pkts_in` +* `pkts_out` + +The device name is added as tag `device`. + diff --git a/collectors/nfsMetric.go b/collectors/nfsMetric.go new file mode 100644 index 0000000..16a6d23 --- /dev/null +++ b/collectors/nfsMetric.go @@ -0,0 +1,147 @@ +package collectors + +import ( + "encoding/json" + "fmt" + "log" + + // "os" + "os/exec" + "strconv" + "strings" + "time" + + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" +) + +type NfsCollectorData struct { + current int64 + last int64 +} + +type NfsCollector struct { + metricCollector + tags map[string]string + config struct { + Nfsutils string `json:"nfsutils"` + ExcludeMetrics []string `json:"exclude_metrics,omitempty"` + } + data map[string]map[string]NfsCollectorData +} + +func (m *NfsCollector) initStats() error { + cmd := exec.Command(m.config.Nfsutils, "-l") + cmd.Wait() + buffer, err := cmd.Output() + if err == nil { + for _, line := range strings.Split(string(buffer), "\n") { + lf := strings.Fields(line) + if len(lf) != 5 { + continue + } + if _, exist := m.data[lf[1]]; !exist { + m.data[lf[1]] = make(map[string]NfsCollectorData) + } + name := strings.Trim(lf[3], ":") + if _, exist := m.data[lf[1]][name]; !exist { + value, err := strconv.ParseInt(lf[4], 0, 64) + if err == nil { + x := m.data[lf[1]][name] + x.current = value + x.last = 0 + m.data[lf[1]][name] = x + } + } + } + } + return err +} + +func (m *NfsCollector) updateStats() error { + cmd := exec.Command(m.config.Nfsutils, "-l") + cmd.Wait() + buffer, err := cmd.Output() + if err == nil { + for _, line := range strings.Split(string(buffer), "\n") { + lf := strings.Fields(line) + if len(lf) != 5 { + continue + } + if _, exist := m.data[lf[1]]; !exist { + m.data[lf[1]] = make(map[string]NfsCollectorData) + } + name := strings.Trim(lf[3], ":") + if _, exist := m.data[lf[1]][name]; exist { + value, err := strconv.ParseInt(lf[4], 0, 64) + if err == nil { + x := m.data[lf[1]][name] + x.last = x.current + x.current = value + m.data[lf[1]][name] = x + } + } + } + } + return err +} + +func (m *NfsCollector) Init(config json.RawMessage) error { + var err error + m.name = "NfsCollector" + m.setup() + + // Set default mmpmon binary + m.config.Nfsutils = "/usr/sbin/nfsstat" + + // Read JSON configuration + if len(config) > 0 { + err = json.Unmarshal(config, &m.config) + if err != nil { + log.Print(err.Error()) + return err + } + } + m.meta = map[string]string{ + "source": m.name, + "group": "NFS", + } + m.tags = map[string]string{ + "type": "node", + } + // Check if mmpmon is in executable search path + _, err = exec.LookPath(m.config.Nfsutils) + if err != nil { + return fmt.Errorf("NfsCollector.Init(): Failed to find nfsstat binary '%s': %v", m.config.Nfsutils, err) + } + m.data = make(map[string]map[string]NfsCollectorData) + m.initStats() + m.init = true + return nil +} + +func (m *NfsCollector) Read(interval time.Duration, output chan lp.CCMetric) { + if !m.init { + return + } + timestamp := time.Now() + + m.updateStats() + + for version, metrics := range m.data { + for name, data := range metrics { + if _, skip := stringArrayContains(m.config.ExcludeMetrics, name); skip { + continue + } + value := data.current - data.last + y, err := lp.New(fmt.Sprintf("nfs_%s", name), m.tags, m.meta, map[string]interface{}{"value": value}, timestamp) + if err == nil { + y.AddMeta("version", version) + output <- y + } + } + } +} + +func (m *NfsCollector) Close() { + m.init = false +} diff --git a/collectors/nvidiaMetric.go b/collectors/nvidiaMetric.go index 31118c2..6f5141a 100644 --- a/collectors/nvidiaMetric.go +++ b/collectors/nvidiaMetric.go @@ -6,9 +6,8 @@ import ( "fmt" "log" "time" - + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" "github.com/NVIDIA/go-nvml/pkg/nvml" - lp "github.com/influxdata/line-protocol" ) type NvidiaCollectorConfig struct { @@ -17,7 +16,7 @@ type NvidiaCollectorConfig struct { } type NvidiaCollector struct { - MetricCollector + metricCollector num_gpus int config NvidiaCollectorConfig } @@ -29,10 +28,11 @@ func (m *NvidiaCollector) CatchPanic() { } } -func (m *NvidiaCollector) Init(config []byte) error { +func (m *NvidiaCollector) Init(config json.RawMessage) error { var err error m.name = "NvidiaCollector" m.setup() + m.meta = map[string]string{"source": m.name, "group": "Nvidia"} if len(config) > 0 { err = json.Unmarshal(config, &m.config) if err != nil { @@ -55,7 +55,7 @@ func (m *NvidiaCollector) Init(config []byte) error { return nil } -func (m *NvidiaCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -74,14 +74,14 @@ func (m *NvidiaCollector) Read(interval time.Duration, out *[]lp.MutableMetric) util, ret := nvml.DeviceGetUtilizationRates(device) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "util") - y, err := lp.New("util", tags, map[string]interface{}{"value": float64(util.Gpu)}, time.Now()) + y, err := lp.New("util", tags, m.meta, map[string]interface{}{"value": float64(util.Gpu)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } _, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_util") - y, err = lp.New("mem_util", tags, map[string]interface{}{"value": float64(util.Memory)}, time.Now()) + y, err = lp.New("mem_util", tags, m.meta, map[string]interface{}{"value": float64(util.Memory)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } @@ -89,174 +89,177 @@ func (m *NvidiaCollector) Read(interval time.Duration, out *[]lp.MutableMetric) if ret == nvml.SUCCESS { t := float64(meminfo.Total) / (1024 * 1024) _, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_total") - y, err := lp.New("mem_total", tags, map[string]interface{}{"value": t}, time.Now()) + y, err := lp.New("mem_total", tags, m.meta, map[string]interface{}{"value": t}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + y.AddMeta("unit", "MByte") + output <- y } f := float64(meminfo.Used) / (1024 * 1024) _, skip = stringArrayContains(m.config.ExcludeMetrics, "fb_memory") - y, err = lp.New("fb_memory", tags, map[string]interface{}{"value": f}, time.Now()) + y, err = lp.New("fb_memory", tags, m.meta, map[string]interface{}{"value": f}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + y.AddMeta("unit", "MByte") + output <- y } } temp, ret := nvml.DeviceGetTemperature(device, nvml.TEMPERATURE_GPU) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "temp") - y, err := lp.New("temp", tags, map[string]interface{}{"value": float64(temp)}, time.Now()) + y, err := lp.New("temp", tags, m.meta, map[string]interface{}{"value": float64(temp)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + y.AddMeta("unit", "degC") + output <- y } } fan, ret := nvml.DeviceGetFanSpeed(device) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "fan") - y, err := lp.New("fan", tags, map[string]interface{}{"value": float64(fan)}, time.Now()) + y, err := lp.New("fan", tags, m.meta, map[string]interface{}{"value": float64(fan)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } _, ecc_pend, ret := nvml.DeviceGetEccMode(device) if ret == nvml.SUCCESS { - var y lp.MutableMetric + var y lp.CCMetric var err error switch ecc_pend { case nvml.FEATURE_DISABLED: - y, err = lp.New("ecc_mode", tags, map[string]interface{}{"value": string("OFF")}, time.Now()) + y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("OFF")}, time.Now()) case nvml.FEATURE_ENABLED: - y, err = lp.New("ecc_mode", tags, map[string]interface{}{"value": string("ON")}, time.Now()) + y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("ON")}, time.Now()) default: - y, err = lp.New("ecc_mode", tags, map[string]interface{}{"value": string("UNKNOWN")}, time.Now()) + y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("UNKNOWN")}, time.Now()) } _, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_mode") if err == nil && !skip { - *out = append(*out, y) + output <- y } } else if ret == nvml.ERROR_NOT_SUPPORTED { _, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_mode") - y, err := lp.New("ecc_mode", tags, map[string]interface{}{"value": string("N/A")}, time.Now()) + y, err := lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("N/A")}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } pstate, ret := nvml.DeviceGetPerformanceState(device) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "perf_state") - y, err := lp.New("perf_state", tags, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pstate))}, time.Now()) + y, err := lp.New("perf_state", tags, m.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pstate))}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } power, ret := nvml.DeviceGetPowerUsage(device) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "power_usage_report") - y, err := lp.New("power_usage_report", tags, map[string]interface{}{"value": float64(power) / 1000}, time.Now()) + y, err := lp.New("power_usage_report", tags, m.meta, map[string]interface{}{"value": float64(power) / 1000}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } gclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_GRAPHICS) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "graphics_clock_report") - y, err := lp.New("graphics_clock_report", tags, map[string]interface{}{"value": float64(gclk)}, time.Now()) + y, err := lp.New("graphics_clock_report", tags, m.meta, map[string]interface{}{"value": float64(gclk)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } smclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_SM) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "sm_clock_report") - y, err := lp.New("sm_clock_report", tags, map[string]interface{}{"value": float64(smclk)}, time.Now()) + y, err := lp.New("sm_clock_report", tags, m.meta, map[string]interface{}{"value": float64(smclk)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } memclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_MEM) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_clock_report") - y, err := lp.New("mem_clock_report", tags, map[string]interface{}{"value": float64(memclk)}, time.Now()) + y, err := lp.New("mem_clock_report", tags, m.meta, map[string]interface{}{"value": float64(memclk)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } max_gclk, ret := nvml.DeviceGetMaxClockInfo(device, nvml.CLOCK_GRAPHICS) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "max_graphics_clock") - y, err := lp.New("max_graphics_clock", tags, map[string]interface{}{"value": float64(max_gclk)}, time.Now()) + y, err := lp.New("max_graphics_clock", tags, m.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } max_smclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_SM) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "max_sm_clock") - y, err := lp.New("max_sm_clock", tags, map[string]interface{}{"value": float64(max_smclk)}, time.Now()) + y, err := lp.New("max_sm_clock", tags, m.meta, map[string]interface{}{"value": float64(max_smclk)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } max_memclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_MEM) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "max_mem_clock") - y, err := lp.New("max_mem_clock", tags, map[string]interface{}{"value": float64(max_memclk)}, time.Now()) + y, err := lp.New("max_mem_clock", tags, m.meta, map[string]interface{}{"value": float64(max_memclk)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } ecc_db, ret := nvml.DeviceGetTotalEccErrors(device, 1, 1) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_db_error") - y, err := lp.New("ecc_db_error", tags, map[string]interface{}{"value": float64(ecc_db)}, time.Now()) + y, err := lp.New("ecc_db_error", tags, m.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } ecc_sb, ret := nvml.DeviceGetTotalEccErrors(device, 0, 1) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_sb_error") - y, err := lp.New("ecc_sb_error", tags, map[string]interface{}{"value": float64(ecc_sb)}, time.Now()) + y, err := lp.New("ecc_sb_error", tags, m.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } pwr_limit, ret := nvml.DeviceGetPowerManagementLimit(device) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "power_man_limit") - y, err := lp.New("power_man_limit", tags, map[string]interface{}{"value": float64(pwr_limit)}, time.Now()) + y, err := lp.New("power_man_limit", tags, m.meta, map[string]interface{}{"value": float64(pwr_limit)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } enc_util, _, ret := nvml.DeviceGetEncoderUtilization(device) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "encoder_util") - y, err := lp.New("encoder_util", tags, map[string]interface{}{"value": float64(enc_util)}, time.Now()) + y, err := lp.New("encoder_util", tags, m.meta, map[string]interface{}{"value": float64(enc_util)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } dec_util, _, ret := nvml.DeviceGetDecoderUtilization(device) if ret == nvml.SUCCESS { _, skip = stringArrayContains(m.config.ExcludeMetrics, "decoder_util") - y, err := lp.New("decoder_util", tags, map[string]interface{}{"value": float64(dec_util)}, time.Now()) + y, err := lp.New("decoder_util", tags, m.meta, map[string]interface{}{"value": float64(dec_util)}, time.Now()) if err == nil && !skip { - *out = append(*out, y) + output <- y } } } diff --git a/collectors/nvidiaMetric.md b/collectors/nvidiaMetric.md new file mode 100644 index 0000000..c774139 --- /dev/null +++ b/collectors/nvidiaMetric.md @@ -0,0 +1,40 @@ + +## `nvidia` collector + +```json + "lustrestat": { + "exclude_devices" : [ + "0","1" + ], + "exclude_metrics": [ + "fb_memory", + "fan" + ] + } +``` + +Metrics: +* `util` +* `mem_util` +* `mem_total` +* `fb_memory` +* `temp` +* `fan` +* `ecc_mode` +* `perf_state` +* `power_usage_report` +* `graphics_clock_report` +* `sm_clock_report` +* `mem_clock_report` +* `max_graphics_clock` +* `max_sm_clock` +* `max_mem_clock` +* `ecc_db_error` +* `ecc_sb_error` +* `power_man_limit` +* `encoder_util` +* `decoder_util` + +It uses a separate `type` in the metrics. The output metric looks like this: +`,type=accelerator,type-id= value= ` + diff --git a/collectors/tempMetric.go b/collectors/tempMetric.go index b074d78..b73d582 100644 --- a/collectors/tempMetric.go +++ b/collectors/tempMetric.go @@ -4,13 +4,13 @@ import ( "encoding/json" "fmt" "io/ioutil" + "log" "os" "path/filepath" "strconv" "strings" "time" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) const HWMON_PATH = `/sys/class/hwmon` @@ -21,20 +21,21 @@ type TempCollectorConfig struct { } type TempCollector struct { - MetricCollector + metricCollector config TempCollectorConfig } -func (m *TempCollector) Init(config []byte) error { +func (m *TempCollector) Init(config json.RawMessage) error { m.name = "TempCollector" m.setup() - m.init = true + m.meta = map[string]string{"source": m.name, "group": "IPMI", "unit": "degC"} if len(config) > 0 { err := json.Unmarshal(config, &m.config) if err != nil { return err } } + m.init = true return nil } @@ -74,7 +75,7 @@ func get_hwmon_sensors() (map[string]map[string]string, error) { return sensors, nil } -func (m *TempCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) { sensors, err := get_hwmon_sensors() if err != nil { @@ -89,15 +90,20 @@ func (m *TempCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { break } } + mname := strings.Replace(name, " ", "_", -1) + if !strings.Contains(mname, "temp") { + mname = fmt.Sprintf("temp_%s", mname) + } buffer, err := ioutil.ReadFile(string(file)) if err != nil { continue } x, err := strconv.ParseInt(strings.Replace(string(buffer), "\n", "", -1), 0, 64) if err == nil { - y, err := lp.New(strings.ToLower(name), tags, map[string]interface{}{"value": float64(x) / 1000}, time.Now()) + y, err := lp.New(strings.ToLower(mname), tags, m.meta, map[string]interface{}{"value": int(float64(x) / 1000)}, time.Now()) if err == nil { - *out = append(*out, y) + log.Print("[", m.name, "] ", y) + output <- y } } } diff --git a/collectors/tempMetric.md b/collectors/tempMetric.md new file mode 100644 index 0000000..1e3d979 --- /dev/null +++ b/collectors/tempMetric.md @@ -0,0 +1,22 @@ + +## `tempstat` collector + +```json + "tempstat": { + "tag_override" : { + "" : { + "type" : "socket", + "type-id" : "0" + } + }, + "exclude_metrics": [ + "metric1", + "metric2" + ] + } +``` + +The `tempstat` collector reads the data from `/sys/class/hwmon//tempX_{input,label}` + +Metrics: +* `temp_*`: The metric name is taken from the `label` files. diff --git a/collectors/topprocsMetric.go b/collectors/topprocsMetric.go index 715b8c3..d2691dc 100644 --- a/collectors/topprocsMetric.go +++ b/collectors/topprocsMetric.go @@ -8,8 +8,7 @@ import ( "os/exec" "strings" "time" - - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) const MAX_NUM_PROCS = 10 @@ -20,15 +19,16 @@ type TopProcsCollectorConfig struct { } type TopProcsCollector struct { - MetricCollector + metricCollector tags map[string]string config TopProcsCollectorConfig } -func (m *TopProcsCollector) Init(config []byte) error { +func (m *TopProcsCollector) Init(config json.RawMessage) error { var err error m.name = "TopProcsCollector" m.tags = map[string]string{"type": "node"} + m.meta = map[string]string{"source": m.name, "group": "TopProcs"} if len(config) > 0 { err = json.Unmarshal(config, &m.config) if err != nil { @@ -51,7 +51,7 @@ func (m *TopProcsCollector) Init(config []byte) error { return nil } -func (m *TopProcsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) { +func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMetric) { if !m.init { return } @@ -66,9 +66,9 @@ func (m *TopProcsCollector) Read(interval time.Duration, out *[]lp.MutableMetric lines := strings.Split(string(stdout), "\n") for i := 1; i < m.config.Num_procs+1; i++ { name := fmt.Sprintf("topproc%d", i) - y, err := lp.New(name, m.tags, map[string]interface{}{"value": string(lines[i])}, time.Now()) + y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": string(lines[i])}, time.Now()) if err == nil { - *out = append(*out, y) + output <- y } } } diff --git a/collectors/topprocsMetric.md b/collectors/topprocsMetric.md new file mode 100644 index 0000000..ca47582 --- /dev/null +++ b/collectors/topprocsMetric.md @@ -0,0 +1,15 @@ + +## `topprocs` collector + +```json + "topprocs": { + "num_procs": 5 + } +``` + +The `topprocs` collector reads the TopX processes (sorted by CPU utilization, `ps -Ao comm --sort=-pcpu`). + +In contrast to most other collectors, the metric value is a `string`. + + + diff --git a/config.json b/config.json index 4a7fd89..52f9df1 100644 --- a/config.json +++ b/config.json @@ -1,36 +1,8 @@ { - "sink": { - "user": "testuser", - "password": "testpass", - "host": "127.0.0.1", - "port": "9090", - "database": "testdb", - "organization": "testorg", - "type": "stdout" - }, - "interval": 3, - "duration": 1, - "collectors": [ - "tempstat" - ], - "default_tags": { - "cluster": "testcluster" - }, - "receiver": { - "type": "none" - }, - "collect_config": { - "tempstat": { - "tag_override": { - "hwmon0" : { - "type" : "socket", - "type-id" : "0" - }, - "hwmon1" : { - "type" : "socket", - "type-id" : "1" - } - } - } - } + "sinks": "sinks.json", + "collectors" : "collectors.json", + "receivers" : "receivers.json", + "router" : "router.json", + "interval": 10, + "duration": 1 } diff --git a/go.mod b/go.mod index 903ea80..be384b6 100644 --- a/go.mod +++ b/go.mod @@ -3,10 +3,11 @@ module github.com/ClusterCockpit/cc-metric-collector go 1.16 require ( - github.com/NVIDIA/go-nvml v0.11.1-0 // indirect + github.com/NVIDIA/go-nvml v0.11.1-0 github.com/influxdata/influxdb-client-go/v2 v2.2.2 github.com/influxdata/line-protocol v0.0.0-20210311194329-9aa0e372d097 github.com/nats-io/nats.go v1.10.0 github.com/nats-io/nkeys v0.1.4 // indirect github.com/prometheus/client_golang v1.10.0 // indirect + gopkg.in/Knetic/govaluate.v2 v2.3.0 ) diff --git a/go.sum b/go.sum index 4bd7c8a..a6f98d7 100644 --- a/go.sum +++ b/go.sum @@ -421,6 +421,8 @@ google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miE google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +gopkg.in/Knetic/govaluate.v2 v2.3.0 h1:naJVc9CZlWA8rC8f5mvECJD7jreTrn7FvGXjBthkHJQ= +gopkg.in/Knetic/govaluate.v2 v2.3.0/go.mod h1:NW0gr10J8s7aNghEg6uhdxiEaBvc0+8VgJjVViHUKp4= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/ccMetric/README.md b/internal/ccMetric/README.md new file mode 100644 index 0000000..1787ff0 --- /dev/null +++ b/internal/ccMetric/README.md @@ -0,0 +1,32 @@ +# ClusterCockpit metrics + +As described in the [ClusterCockpit specifications](https://github.com/ClusterCockpit/cc-specifications), the whole ClusterCockpit stack uses metrics in the InfluxDB line protocol format. This is also the input and output format for the ClusterCockpit Metric Collector but internally it uses an extended format while processing, named CCMetric. + +It is basically a copy of the [InfluxDB line protocol](https://github.com/influxdata/line-protocol) `MutableMetric` interface with one extension. Besides the tags and fields, it contains a list of meta information (re-using the `Tag` structure of the original protocol): + +```golang +type ccMetric struct { + name string // same as + tags []*influx.Tag // original + fields []*influx.Field // Influx + tm time.Time // line-protocol + meta []*influx.Tag +} + +type CCMetric interface { + influx.MutableMetric // the same functions as defined by influx.MutableMetric + RemoveTag(key string) // this is not published by the original influx.MutableMetric + Meta() map[string]string + MetaList() []*inlux.Tag + AddMeta(key, value string) + HasMeta(key string) bool + GetMeta(key string) (string, bool) + RemoveMeta(key string) +} +``` + +The `CCMetric` interface provides the same functions as the `MutableMetric` like `{Add, Remove, Has}{Tag, Field}` and additionally provides `{Add, Remove, Has}Meta`. + +The InfluxDB protocol creates a new metric with `influx.New(name, tags, fields, time)` while CCMetric uses `ccMetric.New(name, tags, meta, fields, time)` where `tags` and `meta` are both of type `map[string]string`. + +You can copy a CCMetric with `FromMetric(other CCMetric) CCMetric`. If you get an `influx.Metric` from a function, like the line protocol parser, you can use `FromInfluxMetric(other influx.Metric) CCMetric` to get a CCMetric out of it (see `NatsReceiver` for an example). diff --git a/internal/ccMetric/ccMetric.go b/internal/ccMetric/ccMetric.go new file mode 100644 index 0000000..6b6bda9 --- /dev/null +++ b/internal/ccMetric/ccMetric.go @@ -0,0 +1,374 @@ +package ccmetric + +import ( + "fmt" + lp "github.com/influxdata/line-protocol" // MIT license + "sort" + "time" +) + +// Most functions are derived from github.com/influxdata/line-protocol/metric.go +// The metric type is extended with an extra meta information list re-using the Tag +// type. + +type ccMetric struct { + name string + tags []*lp.Tag + fields []*lp.Field + tm time.Time + meta []*lp.Tag +} + +type CCMetric interface { + lp.MutableMetric + AddMeta(key, value string) + MetaList() []*lp.Tag + RemoveTag(key string) +} + +func (m *ccMetric) Meta() map[string]string { + meta := make(map[string]string, len(m.meta)) + for _, m := range m.meta { + meta[m.Key] = m.Value + } + return meta +} + +func (m *ccMetric) MetaList() []*lp.Tag { + return m.meta +} + +func (m *ccMetric) String() string { + return fmt.Sprintf("%s %v %v %v %d", m.name, m.Tags(), m.Meta(), m.Fields(), m.tm.UnixNano()) +} + +func (m *ccMetric) Name() string { + return m.name +} + +func (m *ccMetric) Tags() map[string]string { + tags := make(map[string]string, len(m.tags)) + for _, tag := range m.tags { + tags[tag.Key] = tag.Value + } + return tags +} + +func (m *ccMetric) TagList() []*lp.Tag { + return m.tags +} + +func (m *ccMetric) Fields() map[string]interface{} { + fields := make(map[string]interface{}, len(m.fields)) + for _, field := range m.fields { + fields[field.Key] = field.Value + } + + return fields +} + +func (m *ccMetric) FieldList() []*lp.Field { + return m.fields +} + +func (m *ccMetric) Time() time.Time { + return m.tm +} + +func (m *ccMetric) SetTime(t time.Time) { + m.tm = t +} + +func (m *ccMetric) HasTag(key string) bool { + for _, tag := range m.tags { + if tag.Key == key { + return true + } + } + return false +} + +func (m *ccMetric) GetTag(key string) (string, bool) { + for _, tag := range m.tags { + if tag.Key == key { + return tag.Value, true + } + } + return "", false +} + +func (m *ccMetric) RemoveTag(key string) { + for i, tag := range m.tags { + if tag.Key == key { + copy(m.tags[i:], m.tags[i+1:]) + m.tags[len(m.tags)-1] = nil + m.tags = m.tags[:len(m.tags)-1] + return + } + } +} + +func (m *ccMetric) AddTag(key, value string) { + for i, tag := range m.tags { + if key > tag.Key { + continue + } + + if key == tag.Key { + tag.Value = value + return + } + + m.tags = append(m.tags, nil) + copy(m.tags[i+1:], m.tags[i:]) + m.tags[i] = &lp.Tag{Key: key, Value: value} + return + } + + m.tags = append(m.tags, &lp.Tag{Key: key, Value: value}) +} + +func (m *ccMetric) HasMeta(key string) bool { + for _, tag := range m.meta { + if tag.Key == key { + return true + } + } + return false +} + +func (m *ccMetric) GetMeta(key string) (string, bool) { + for _, tag := range m.meta { + if tag.Key == key { + return tag.Value, true + } + } + return "", false +} + +func (m *ccMetric) RemoveMeta(key string) { + for i, tag := range m.meta { + if tag.Key == key { + copy(m.meta[i:], m.meta[i+1:]) + m.meta[len(m.meta)-1] = nil + m.meta = m.meta[:len(m.meta)-1] + return + } + } +} + +func (m *ccMetric) AddMeta(key, value string) { + for i, tag := range m.meta { + if key > tag.Key { + continue + } + + if key == tag.Key { + tag.Value = value + return + } + + m.meta = append(m.meta, nil) + copy(m.meta[i+1:], m.meta[i:]) + m.meta[i] = &lp.Tag{Key: key, Value: value} + return + } + + m.meta = append(m.meta, &lp.Tag{Key: key, Value: value}) +} + +func (m *ccMetric) AddField(key string, value interface{}) { + for i, field := range m.fields { + if key == field.Key { + m.fields[i] = &lp.Field{Key: key, Value: convertField(value)} + return + } + } + m.fields = append(m.fields, &lp.Field{Key: key, Value: convertField(value)}) +} + +func New( + name string, + tags map[string]string, + meta map[string]string, + fields map[string]interface{}, + tm time.Time, +) (CCMetric, error) { + m := &ccMetric{ + name: name, + tags: nil, + fields: nil, + tm: tm, + meta: nil, + } + + if len(tags) > 0 { + m.tags = make([]*lp.Tag, 0, len(tags)) + for k, v := range tags { + m.tags = append(m.tags, + &lp.Tag{Key: k, Value: v}) + } + sort.Slice(m.tags, func(i, j int) bool { return m.tags[i].Key < m.tags[j].Key }) + } + + if len(meta) > 0 { + m.meta = make([]*lp.Tag, 0, len(meta)) + for k, v := range meta { + m.meta = append(m.meta, + &lp.Tag{Key: k, Value: v}) + } + sort.Slice(m.meta, func(i, j int) bool { return m.meta[i].Key < m.meta[j].Key }) + } + + if len(fields) > 0 { + m.fields = make([]*lp.Field, 0, len(fields)) + for k, v := range fields { + v := convertField(v) + if v == nil { + continue + } + m.AddField(k, v) + } + } + + return m, nil +} + +func FromMetric(other CCMetric) CCMetric { + m := &ccMetric{ + name: other.Name(), + tags: make([]*lp.Tag, len(other.TagList())), + fields: make([]*lp.Field, len(other.FieldList())), + meta: make([]*lp.Tag, len(other.MetaList())), + tm: other.Time(), + } + + for i, tag := range other.TagList() { + m.tags[i] = &lp.Tag{Key: tag.Key, Value: tag.Value} + } + for i, s := range other.MetaList() { + m.meta[i] = &lp.Tag{Key: s.Key, Value: s.Value} + } + + for i, field := range other.FieldList() { + m.fields[i] = &lp.Field{Key: field.Key, Value: field.Value} + } + return m +} + +func FromInfluxMetric(other lp.Metric) CCMetric { + m := &ccMetric{ + name: other.Name(), + tags: make([]*lp.Tag, len(other.TagList())), + fields: make([]*lp.Field, len(other.FieldList())), + meta: make([]*lp.Tag, 0), + tm: other.Time(), + } + + for i, tag := range other.TagList() { + m.tags[i] = &lp.Tag{Key: tag.Key, Value: tag.Value} + } + + for i, field := range other.FieldList() { + m.fields[i] = &lp.Field{Key: field.Key, Value: field.Value} + } + return m +} + +func convertField(v interface{}) interface{} { + switch v := v.(type) { + case float64: + return v + case int64: + return v + case string: + return v + case bool: + return v + case int: + return int64(v) + case uint: + return uint64(v) + case uint64: + return uint64(v) + case []byte: + return string(v) + case int32: + return int64(v) + case int16: + return int64(v) + case int8: + return int64(v) + case uint32: + return uint64(v) + case uint16: + return uint64(v) + case uint8: + return uint64(v) + case float32: + return float64(v) + case *float64: + if v != nil { + return *v + } + case *int64: + if v != nil { + return *v + } + case *string: + if v != nil { + return *v + } + case *bool: + if v != nil { + return *v + } + case *int: + if v != nil { + return int64(*v) + } + case *uint: + if v != nil { + return uint64(*v) + } + case *uint64: + if v != nil { + return uint64(*v) + } + case *[]byte: + if v != nil { + return string(*v) + } + case *int32: + if v != nil { + return int64(*v) + } + case *int16: + if v != nil { + return int64(*v) + } + case *int8: + if v != nil { + return int64(*v) + } + case *uint32: + if v != nil { + return uint64(*v) + } + case *uint16: + if v != nil { + return uint64(*v) + } + case *uint8: + if v != nil { + return uint64(*v) + } + case *float32: + if v != nil { + return float64(*v) + } + default: + return nil + } + return nil +} diff --git a/internal/metricRouter/README.md b/internal/metricRouter/README.md new file mode 100644 index 0000000..a3aef16 --- /dev/null +++ b/internal/metricRouter/README.md @@ -0,0 +1,50 @@ +# CC Metric Router + +The CCMetric router sits in between the collectors and the sinks and can be used to add and remove tags to/from traversing [CCMetrics](../ccMetric/README.md). + +# Configuration + +```json +{ + "add_tags" : [ + { + "key" : "cluster", + "value" : "testcluster", + "if" : "*" + }, + { + "key" : "test", + "value" : "testing", + "if" : "name == 'temp_package_id_0'" + } + ], + "delete_tags" : [ + { + "key" : "unit", + "value" : "*", + "if" : "*" + } + ], + "interval_timestamp" : true +} +``` + +There are three main options `add_tags`, `delete_tags` and `interval_timestamp`. `add_tags` and `delete_tags` are lists consisting of dicts with `key`, `value` and `if`. The `value` can be omitted in the `delete_tags` part as it only uses the `key` for removal. The `interval_timestamp` setting means that a unique timestamp is applied to all metrics traversing the router during an interval. + +# Conditional manipulation of tags + +The `if` setting allows conditional testing of a single metric like in the example: + +```json +{ + "key" : "test", + "value" : "testing", + "if" : "name == 'temp_package_id_0'" +} +``` + +If the CCMetric name is equal to 'temp_package_id_0', it adds an additional tag `test=testing` to the metric. + +In order to match all metrics, you can use `*`, so in order to add a flag per default, like the `cluster=testcluster` tag in the example. + + diff --git a/internal/metricRouter/metricRouter.go b/internal/metricRouter/metricRouter.go new file mode 100644 index 0000000..25b0dc2 --- /dev/null +++ b/internal/metricRouter/metricRouter.go @@ -0,0 +1,208 @@ +package metricRouter + +import ( + "encoding/json" + "log" + "os" + "sync" + "time" + + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker" + "gopkg.in/Knetic/govaluate.v2" +) + +type metricRouterTagConfig struct { + Key string `json:"key"` + Value string `json:"value"` + Condition string `json:"if"` +} + +type metricRouterConfig struct { + AddTags []metricRouterTagConfig `json:"add_tags"` + DelTags []metricRouterTagConfig `json:"delete_tags"` + IntervalStamp bool `json:"interval_timestamp"` +} + +type metricRouter struct { + inputs []chan lp.CCMetric + outputs []chan lp.CCMetric + done chan bool + wg *sync.WaitGroup + timestamp time.Time + ticker mct.MultiChanTicker + config metricRouterConfig +} + +type MetricRouter interface { + Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) error + AddInput(input chan lp.CCMetric) + AddOutput(output chan lp.CCMetric) + Start() + Close() +} + +func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) error { + r.inputs = make([]chan lp.CCMetric, 0) + r.outputs = make([]chan lp.CCMetric, 0) + r.done = make(chan bool) + r.wg = wg + r.ticker = ticker + configFile, err := os.Open(routerConfigFile) + if err != nil { + log.Print(err.Error()) + return err + } + defer configFile.Close() + jsonParser := json.NewDecoder(configFile) + err = jsonParser.Decode(&r.config) + if err != nil { + log.Print(err.Error()) + return err + } + return nil +} + +func (r *metricRouter) StartTimer() { + m := make(chan time.Time) + r.ticker.AddChannel(m) + go func() { + for { + select { + case t := <-m: + r.timestamp = t + } + } + }() +} + +func (r *metricRouter) EvalCondition(Cond string, point lp.CCMetric) (bool, error) { + expression, err := govaluate.NewEvaluableExpression(Cond) + if err != nil { + log.Print(Cond, " = ", err.Error()) + return false, err + } + params := make(map[string]interface{}) + params["name"] = point.Name() + for _, t := range point.TagList() { + params[t.Key] = t.Value + } + for _, m := range point.MetaList() { + params[m.Key] = m.Value + } + for _, f := range point.FieldList() { + params[f.Key] = f.Value + } + params["timestamp"] = point.Time() + + result, err := expression.Evaluate(params) + if err != nil { + log.Print(Cond, " = ", err.Error()) + return false, err + } + return bool(result.(bool)), err +} + +func (r *metricRouter) DoAddTags(point lp.CCMetric) { + for _, m := range r.config.AddTags { + var conditionMatches bool + + if m.Condition == "*" { + conditionMatches = true + } else { + var err error + conditionMatches, err = r.EvalCondition(m.Condition, point) + if err != nil { + log.Print(err.Error()) + conditionMatches = false + } + } + if conditionMatches { + point.AddTag(m.Key, m.Value) + } + } +} + +func (r *metricRouter) DoDelTags(point lp.CCMetric) { + for _, m := range r.config.DelTags { + var conditionMatches bool + + if m.Condition == "*" { + conditionMatches = true + } else { + var err error + conditionMatches, err = r.EvalCondition(m.Condition, point) + if err != nil { + log.Print(err.Error()) + conditionMatches = false + } + } + if conditionMatches { + point.RemoveTag(m.Key) + } + } +} + +func (r *metricRouter) Start() { + r.wg.Add(1) + r.timestamp = time.Now() + if r.config.IntervalStamp { + r.StartTimer() + } + go func() { + for { + RouterLoop: + select { + case <-r.done: + log.Print("[MetricRouter] DONE\n") + r.wg.Done() + break RouterLoop + default: + for _, c := range r.inputs { + RouterInputLoop: + select { + case <-r.done: + log.Print("[MetricRouter] DONE\n") + r.wg.Done() + break RouterInputLoop + case p := <-c: + log.Print("[MetricRouter] FORWARD ", p) + r.DoAddTags(p) + r.DoDelTags(p) + if r.config.IntervalStamp { + p.SetTime(r.timestamp) + } + for _, o := range r.outputs { + o <- p + } + default: + } + } + } + } + log.Print("[MetricRouter] EXIT\n") + }() + log.Print("[MetricRouter] STARTED\n") +} + +func (r *metricRouter) AddInput(input chan lp.CCMetric) { + r.inputs = append(r.inputs, input) +} + +func (r *metricRouter) AddOutput(output chan lp.CCMetric) { + r.outputs = append(r.outputs, output) +} + +func (r *metricRouter) Close() { + r.done <- true + log.Print("[MetricRouter] CLOSE\n") +} + +func New(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) (MetricRouter, error) { + r := new(metricRouter) + err := r.Init(ticker, wg, routerConfigFile) + if err != nil { + return nil, err + } + return r, err +} diff --git a/internal/multiChanTicker/README.md b/internal/multiChanTicker/README.md new file mode 100644 index 0000000..30deb4f --- /dev/null +++ b/internal/multiChanTicker/README.md @@ -0,0 +1,37 @@ +# MultiChanTicker + +The idea of this ticker is to multiply the output channels. The original Golang `time.Ticker` provides only a single output channel, so the signal can only be received by a single other class. This ticker allows to add multiple channels which get all notified about the time tick. + +```golang +type MultiChanTicker interface { + Init(duration time.Duration) + AddChannel(chan time.Time) +} +``` + +The MultiChanTicker is created similarly to the common `time.Ticker`: + +```golang +NewTicker(duration time.Duration) MultiChanTicker +``` + +Afterwards, you can add channels: + +```golang +t := MultiChanTicker(duration) +c1 := make(chan time.Time) +c2 := make(chan time.Time) +t.AddChannel(c1) +t.AddChannel(c2) + +for { + select { + case t1 := <- c1: + log.Print(t1) + case t2 := <- c2: + log.Print(t2) + } +} +``` + +The result should be the same `time.Time` output in both channels, notified "simultaneously". diff --git a/internal/multiChanTicker/multiChanTicker.go b/internal/multiChanTicker/multiChanTicker.go new file mode 100644 index 0000000..f8139fa --- /dev/null +++ b/internal/multiChanTicker/multiChanTicker.go @@ -0,0 +1,39 @@ +package multiChanTicker + +import ( + "time" +) + +type multiChanTicker struct { + ticker *time.Ticker + channels []chan time.Time +} + +type MultiChanTicker interface { + Init(duration time.Duration) + AddChannel(chan time.Time) +} + +func (t *multiChanTicker) Init(duration time.Duration) { + t.ticker = time.NewTicker(duration) + go func() { + for { + select { + case ts := <-t.ticker.C: + for _, c := range t.channels { + c <- ts + } + } + } + }() +} + +func (t *multiChanTicker) AddChannel(channel chan time.Time) { + t.channels = append(t.channels, channel) +} + +func NewTicker(duration time.Duration) MultiChanTicker { + t := &multiChanTicker{} + t.Init(duration) + return t +} diff --git a/metric-collector.go b/metric-collector.go index 02a2b21..c071933 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -8,60 +8,32 @@ import ( "os" "os/signal" "strings" - "sync" - "time" "github.com/ClusterCockpit/cc-metric-collector/collectors" "github.com/ClusterCockpit/cc-metric-collector/receivers" "github.com/ClusterCockpit/cc-metric-collector/sinks" - lp "github.com/influxdata/line-protocol" + + // "strings" + "sync" + "time" + + + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter" + mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker" ) -// List of provided collectors. Which collector should be run can be -// configured at 'collectors' list in 'config.json'. -var Collectors = map[string]collectors.MetricGetter{ - "likwid": &collectors.LikwidCollector{}, - "loadavg": &collectors.LoadavgCollector{}, - "memstat": &collectors.MemstatCollector{}, - "netstat": &collectors.NetstatCollector{}, - "ibstat": &collectors.InfinibandCollector{}, - "lustrestat": &collectors.LustreCollector{}, - "cpustat": &collectors.CpustatCollector{}, - "topprocs": &collectors.TopProcsCollector{}, - "nvidia": &collectors.NvidiaCollector{}, - "customcmd": &collectors.CustomCmdCollector{}, - "diskstat": &collectors.DiskstatCollector{}, - "tempstat": &collectors.TempCollector{}, - "ipmistat": &collectors.IpmiCollector{}, - "gpfs": new(collectors.GpfsCollector), - "cpufreq": new(collectors.CPUFreqCollector), - "cpufreq_cpuinfo": new(collectors.CPUFreqCpuInfoCollector), +type CentralConfigFile struct { + Interval int `json:"interval"` + Duration int `json:"duration"` + Pidfile string `json:"pidfile,omitempty"` + CollectorConfigFile string `json:"collectors"` + RouterConfigFile string `json:"router"` + SinkConfigFile string `json:"sinks"` + ReceiverConfigFile string `json:"receivers,omitempty"` } -var Sinks = map[string]sinks.SinkFuncs{ - "influxdb": &sinks.InfluxSink{}, - "stdout": &sinks.StdoutSink{}, - "nats": &sinks.NatsSink{}, - "http": &sinks.HttpSink{}, -} - -var Receivers = map[string]receivers.ReceiverFuncs{ - "nats": &receivers.NatsReceiver{}, -} - -// Structure of the configuration file -type GlobalConfig struct { - Sink sinks.SinkConfig `json:"sink"` - Interval int `json:"interval"` - Duration int `json:"duration"` - Collectors []string `json:"collectors"` - Receiver receivers.ReceiverConfig `json:"receiver"` - DefTags map[string]string `json:"default_tags"` - CollectConfigs map[string]json.RawMessage `json:"collect_config"` -} - -// Load JSON configuration file -func LoadConfiguration(file string, config *GlobalConfig) error { +func LoadCentralConfiguration(file string, config *CentralConfigFile) error { configFile, err := os.Open(file) defer configFile.Close() if err != nil { @@ -73,6 +45,56 @@ func LoadConfiguration(file string, config *GlobalConfig) error { return err } +type RuntimeConfig struct { + Hostname string + Interval time.Duration + Duration time.Duration + CliArgs map[string]string + ConfigFile CentralConfigFile + + Router mr.MetricRouter + CollectManager collectors.CollectorManager + SinkManager sinks.SinkManager + ReceiveManager receivers.ReceiveManager + Ticker mct.MultiChanTicker + + Channels []chan lp.CCMetric + Sync sync.WaitGroup +} + +func prepare_runcfg() RuntimeConfig { + return RuntimeConfig{ + Router: nil, + CollectManager: nil, + SinkManager: nil, + ReceiveManager: nil, + } +} + +//// Structure of the configuration file +//type GlobalConfig struct { +// Sink sinks.SinkConfig `json:"sink"` +// Interval int `json:"interval"` +// Duration int `json:"duration"` +// Collectors []string `json:"collectors"` +// Receiver receivers.ReceiverConfig `json:"receiver"` +// DefTags map[string]string `json:"default_tags"` +// CollectConfigs map[string]json.RawMessage `json:"collect_config"` +//} + +//// Load JSON configuration file +//func LoadConfiguration(file string, config *GlobalConfig) error { +// configFile, err := os.Open(file) +// defer configFile.Close() +// if err != nil { +// fmt.Println(err.Error()) +// return err +// } +// jsonParser := json.NewDecoder(configFile) +// err = jsonParser.Decode(config) +// return err +//} + func ReadCli() map[string]string { var m map[string]string cfg := flag.String("config", "./config.json", "Path to configuration file") @@ -92,228 +114,168 @@ func ReadCli() map[string]string { return m } -func SetLogging(logfile string) error { - var file *os.File - var err error - if logfile != "stderr" { - file, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) - if err != nil { - log.Fatal(err) - return err - } - } else { - file = os.Stderr - } - log.SetOutput(file) - return nil -} +//func SetLogging(logfile string) error { +// var file *os.File +// var err error +// if logfile != "stderr" { +// file, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) +// if err != nil { +// log.Fatal(err) +// return err +// } +// } else { +// file = os.Stderr +// } +// log.SetOutput(file) +// return nil +//} -func CreatePidfile(pidfile string) error { - file, err := os.OpenFile(pidfile, os.O_CREATE|os.O_RDWR, 0600) - if err != nil { - log.Print(err) - return err - } - file.Write([]byte(fmt.Sprintf("%d", os.Getpid()))) - file.Close() - return nil -} +//func CreatePidfile(pidfile string) error { +// file, err := os.OpenFile(pidfile, os.O_CREATE|os.O_RDWR, 0600) +// if err != nil { +// log.Print(err) +// return err +// } +// file.Write([]byte(fmt.Sprintf("%d", os.Getpid()))) +// file.Close() +// return nil +//} -func RemovePidfile(pidfile string) error { - info, err := os.Stat(pidfile) - if !os.IsNotExist(err) && !info.IsDir() { - os.Remove(pidfile) - } - return nil -} +//func RemovePidfile(pidfile string) error { +// info, err := os.Stat(pidfile) +// if !os.IsNotExist(err) && !info.IsDir() { +// os.Remove(pidfile) +// } +// return nil +//} // General shutdown function that gets executed in case of interrupt or graceful shutdown -func shutdown(wg *sync.WaitGroup, collectors []string, sink sinks.SinkFuncs, recv receivers.ReceiverFuncs, pidfile string) { +func shutdown(config *RuntimeConfig) { log.Print("Shutdown...") - for _, c := range collectors { - col := Collectors[c] - log.Print("Stop ", col.Name()) - col.Close() + if config.CollectManager != nil { + log.Print("Shutdown CollectManager...") + config.CollectManager.Close() } - time.Sleep(1 * time.Second) - if recv != nil { - recv.Close() + if config.ReceiveManager != nil { + log.Print("Shutdown ReceiveManager...") + config.ReceiveManager.Close() } - sink.Close() - RemovePidfile(pidfile) - wg.Done() + if config.Router != nil { + log.Print("Shutdown Router...") + config.Router.Close() + } + if config.SinkManager != nil { + log.Print("Shutdown SinkManager...") + config.SinkManager.Close() + } + + // pidfile := config.ConfigFile.Pidfile + // RemovePidfile(pidfile) + // pidfile = config.CliArgs["pidfile"] + // RemovePidfile(pidfile) + config.Sync.Done() } // Register an interrupt handler for Ctrl+C and similar. At signal, // all collectors are closed -func prepare_shutdown(wg *sync.WaitGroup, config *GlobalConfig, sink sinks.SinkFuncs, recv receivers.ReceiverFuncs, pidfile string) { +func prepare_shutdown(config *RuntimeConfig) { sigs := make(chan os.Signal, 1) signal.Notify(sigs, os.Interrupt) - go func(wg *sync.WaitGroup) { + go func(config *RuntimeConfig) { <-sigs log.Print("Shutdown...") - shutdown(wg, config.Collectors, sink, recv, pidfile) - }(wg) + shutdown(config) + }(config) } func main() { - var config GlobalConfig - var wg sync.WaitGroup - var recv receivers.ReceiverFuncs = nil - var use_recv bool - use_recv = false - wg.Add(1) - host, err := os.Hostname() - if err != nil { - log.Print(err) - return - } - // Drop domain part of host name - host = strings.SplitN(host, `.`, 2)[0] - clicfg := ReadCli() - err = CreatePidfile(clicfg["pidfile"]) - err = SetLogging(clicfg["logfile"]) - if err != nil { - log.Print("Error setting up logging system to ", clicfg["logfile"], " on ", host) - return - } + var err error + use_recv := false + + rcfg := prepare_runcfg() + rcfg.CliArgs = ReadCli() // Load and check configuration - err = LoadConfiguration(clicfg["configfile"], &config) + err = LoadCentralConfiguration(rcfg.CliArgs["configfile"], &rcfg.ConfigFile) if err != nil { - log.Print("Error reading configuration file ", clicfg["configfile"]) + log.Print("Error reading configuration file ", rcfg.CliArgs["configfile"]) log.Print(err.Error()) return } - if config.Interval <= 0 || time.Duration(config.Interval)*time.Second <= 0 { + if rcfg.ConfigFile.Interval <= 0 || time.Duration(rcfg.ConfigFile.Interval)*time.Second <= 0 { log.Print("Configuration value 'interval' must be greater than zero") return } - if config.Duration <= 0 { + rcfg.Interval = time.Duration(rcfg.ConfigFile.Interval) * time.Second + if rcfg.ConfigFile.Duration <= 0 || time.Duration(rcfg.ConfigFile.Duration)*time.Second <= 0 { log.Print("Configuration value 'duration' must be greater than zero") return } - if len(config.Collectors) == 0 { - var keys []string - for k := range Collectors { - keys = append(keys, k) - } - log.Print("Configuration value 'collectors' does not contain any collector. Available: ", strings.Join(keys, ", ")) - return - } - for _, name := range config.Collectors { - if _, found := Collectors[name]; !found { - log.Print("Invalid collector '", name, "' in configuration") - return - } - } - if _, found := Sinks[config.Sink.Type]; !found { - log.Print("Invalid sink type '", config.Sink.Type, "' in configuration") - return - } - // Setup sink - sink := Sinks[config.Sink.Type] - err = sink.Init(config.Sink) + rcfg.Duration = time.Duration(rcfg.ConfigFile.Duration) * time.Second + + rcfg.Hostname, err = os.Hostname() if err != nil { - log.Print(err) + log.Print(err.Error()) return } - // Setup receiver - if len(config.Receiver.Type) > 0 && config.Receiver.Type != "none" { - if _, found := Receivers[config.Receiver.Type]; !found { - log.Print("Invalid receiver type '", config.Receiver.Type, "' in configuration") - return - } else { - recv = Receivers[config.Receiver.Type] - err = recv.Init(config.Receiver, sink) - if err == nil { - use_recv = true - } else { - log.Print(err) - } - } - } - - // Register interrupt handler - prepare_shutdown(&wg, &config, sink, recv, clicfg["pidfile"]) - - // Initialize all collectors - tmp := make([]string, 0) - for _, c := range config.Collectors { - col := Collectors[c] - conf, found := config.CollectConfigs[c] - if !found { - conf = json.RawMessage("") - } - err = col.Init([]byte(conf)) + // Drop domain part of host name + rcfg.Hostname = strings.SplitN(rcfg.Hostname, `.`, 2)[0] + // err = CreatePidfile(rcfg.CliArgs["pidfile"]) + // err = SetLogging(rcfg.CliArgs["logfile"]) + // if err != nil { + // log.Print("Error setting up logging system to ", rcfg.CliArgs["logfile"], " on ", rcfg.Hostname) + // return + // } + rcfg.Ticker = mct.NewTicker(rcfg.Interval) + if len(rcfg.ConfigFile.RouterConfigFile) > 0 { + rcfg.Router, err = mr.New(rcfg.Ticker, &rcfg.Sync, rcfg.ConfigFile.RouterConfigFile) if err != nil { - log.Print("SKIP ", col.Name(), " (", err.Error(), ")") - } else if !col.Initialized() { - log.Print("SKIP ", col.Name(), " (Not initialized)") - } else { - log.Print("Start ", col.Name()) - tmp = append(tmp, c) + log.Print(err.Error()) + return } } - config.Collectors = tmp - config.DefTags["hostname"] = host - - // Setup up ticker loop - if clicfg["once"] != "true" { - log.Print("Running loop every ", time.Duration(config.Interval)*time.Second) - } else { - log.Print("Running loop only once") + if len(rcfg.ConfigFile.SinkConfigFile) > 0 { + rcfg.SinkManager, err = sinks.New(&rcfg.Sync, rcfg.ConfigFile.SinkConfigFile) + if err != nil { + log.Print(err.Error()) + return + } + RouterToSinksChannel := make(chan lp.CCMetric) + rcfg.SinkManager.AddInput(RouterToSinksChannel) + rcfg.Router.AddOutput(RouterToSinksChannel) } - ticker := time.NewTicker(time.Duration(config.Interval) * time.Second) - done := make(chan bool) + if len(rcfg.ConfigFile.CollectorConfigFile) > 0 { + rcfg.CollectManager, err = collectors.New(rcfg.Ticker, rcfg.Duration, &rcfg.Sync, rcfg.ConfigFile.CollectorConfigFile) + if err != nil { + log.Print(err.Error()) + return + } + CollectToRouterChannel := make(chan lp.CCMetric) + rcfg.CollectManager.AddOutput(CollectToRouterChannel) + rcfg.Router.AddInput(CollectToRouterChannel) + } + if len(rcfg.ConfigFile.ReceiverConfigFile) > 0 { + rcfg.ReceiveManager, err = receivers.New(&rcfg.Sync, rcfg.ConfigFile.ReceiverConfigFile) + if err != nil { + log.Print(err.Error()) + return + } + ReceiveToRouterChannel := make(chan lp.CCMetric) + rcfg.ReceiveManager.AddOutput(ReceiveToRouterChannel) + rcfg.Router.AddInput(ReceiveToRouterChannel) + use_recv = true + } + prepare_shutdown(&rcfg) + rcfg.Sync.Add(1) + rcfg.Router.Start() + rcfg.SinkManager.Start() + rcfg.CollectManager.Start() - // Storage for all node metrics - tmpPoints := make([]lp.MutableMetric, 0) - - // Start receiver if use_recv { - recv.Start() + rcfg.ReceiveManager.Start() } - go func() { - for { - select { - case <-done: - return - case t := <-ticker.C: - - // Read all collectors are sort the results in the right - // storage locations - for _, c := range config.Collectors { - col := Collectors[c] - col.Read(time.Duration(config.Duration)*time.Second, &tmpPoints) - - for { - if len(tmpPoints) == 0 { - break - } - p := tmpPoints[0] - for k, v := range config.DefTags { - p.AddTag(k, v) - p.SetTime(t) - } - sink.Write(p) - tmpPoints = tmpPoints[1:] - } - } - - if err := sink.Flush(); err != nil { - log.Printf("sink error: %s\n", err) - } - if clicfg["once"] == "true" { - shutdown(&wg, config.Collectors, sink, recv, clicfg["pidfile"]) - return - } - } - } - }() - // Wait until receiving an interrupt - wg.Wait() + rcfg.Sync.Wait() } diff --git a/receivers.json b/receivers.json new file mode 100644 index 0000000..e368fc3 --- /dev/null +++ b/receivers.json @@ -0,0 +1,8 @@ +[ + { + "type": "nats", + "address": "nats://my-url", + "port" : "4222", + "database": "testcluster" + } +] diff --git a/receivers/README.md b/receivers/README.md index 7733a94..24425f2 100644 --- a/receivers/README.md +++ b/receivers/README.md @@ -1,35 +1,44 @@ -This folder contains the receivers for the cc-metric-collector. +# CCMetric receivers -# `metricReceiver.go` -The base class/configuration is located in `metricReceiver.go`. +This folder contains the ReceiveManager and receiver implementations for the cc-metric-collector. -# Receivers -* `natsReceiver.go`: Receives metrics from the Nats transport system in Influx line protocol encoding. The database name is used as subscription subject for the NATS messages. It uses https://github.com/nats-io/nats.go +# Configuration -# Installation -Nothing to do, all receivers are pure Go code - -# Receiver configuration +The configuration file for the receivers is a list of configurations. The `type` field in each specifies which receiver to initialize. ```json - "receiver": { +[ + { "type": "nats", - "address": "nats://my-url" + "address": "nats://my-url", "port" : "4222", "database": "testcluster" - }, + } +] ``` -## `nats` -The receiver connects to `address` and `port` and subscribes itself for all messages with topic `database`. The default port is `4222`. +## Type `nats` + +```json +{ + "type": "nats", + "address": "", + "port" : "", + "database": "" +} +``` + +The `nats` receiver subscribes to the topic `database` and listens on `address` and `port` for metrics in the InfluxDB line protocol. # Contributing own receivers A receiver contains three functions and is derived from the type `Receiver` (in `metricReceiver.go`): * `Init(config ReceiverConfig) error` * `Start() error` * `Close()` +* `Name() string` +* `SetSink(sink chan ccMetric.CCMetric)` The data structures should be set up in `Init()` like opening a file or server connection. The `Start()` function should either start a go routine or issue some other asynchronous mechanism for receiving metrics. The `Close()` function should tear down anything created in `Init()`. -Finally, the receiver needs to be registered in the `metric-collector.go`. There is a list of receivers called `Receivers` which is a map (string -> pointer to receiver). Add a new entry with a descriptive name and the new receiver. +Finally, the receiver needs to be registered in the `receiveManager.go`. There is a list of receivers called `AvailableReceivers` which is a map (`receiver_type_string` -> `pointer to Receiver interface`). Add a new entry with a descriptive name and the new receiver. diff --git a/receivers/metricReceiver.go b/receivers/metricReceiver.go index acdc455..2c74409 100644 --- a/receivers/metricReceiver.go +++ b/receivers/metricReceiver.go @@ -2,30 +2,41 @@ package receivers import ( // "time" - s "github.com/ClusterCockpit/cc-metric-collector/sinks" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" influx "github.com/influxdata/line-protocol" ) type ReceiverConfig struct { - Addr string `json:"address"` - Port string `json:"port"` - Database string `json:"database"` - Type string `json:"type"` + Addr string `json:"address"` + Port string `json:"port"` + Database string `json:"database"` + Organization string `json:"organization,omitempty"` + Type string `json:"type"` } -type Receiver struct { +type receiver struct { name string addr string port string database string organization string - sink s.SinkFuncs + sink chan lp.CCMetric } -type ReceiverFuncs interface { - Init(config ReceiverConfig, sink s.SinkFuncs) error +type Receiver interface { + Init(config ReceiverConfig) error Start() Close() + Name() string + SetSink(sink chan lp.CCMetric) +} + +func (r *receiver) Name() string { + return r.name +} + +func (r *receiver) SetSink(sink chan lp.CCMetric) { + r.sink = sink } func Tags2Map(metric influx.Metric) map[string]string { diff --git a/receivers/natsReceiver.go b/receivers/natsReceiver.go index 9d98f00..5cbe90d 100644 --- a/receivers/natsReceiver.go +++ b/receivers/natsReceiver.go @@ -2,56 +2,68 @@ package receivers import ( "errors" - s "github.com/ClusterCockpit/cc-metric-collector/sinks" - lp "github.com/influxdata/line-protocol" + "fmt" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + influx "github.com/influxdata/line-protocol" nats "github.com/nats-io/nats.go" "log" "time" ) +type NatsReceiverConfig struct { + Addr string `json:"address"` + Port string `json:"port"` + Database string `json:"database"` +} + type NatsReceiver struct { - Receiver + receiver nc *nats.Conn - handler *lp.MetricHandler - parser *lp.Parser + handler *influx.MetricHandler + parser *influx.Parser + meta map[string]string + config ReceiverConfig } var DefaultTime = func() time.Time { return time.Unix(42, 0) } -func (r *NatsReceiver) Init(config ReceiverConfig, sink s.SinkFuncs) error { - if len(config.Addr) == 0 || - len(config.Port) == 0 || - len(config.Database) == 0 { +func (r *NatsReceiver) Init(config ReceiverConfig) error { + r.name = "NatsReceiver" + r.config = config + if len(r.config.Addr) == 0 || + len(r.config.Port) == 0 || + len(r.config.Database) == 0 { return errors.New("Not all configuration variables set required by NatsReceiver") } - r.addr = config.Addr + r.meta = map[string]string{"source": r.name} + r.addr = r.config.Addr if len(r.addr) == 0 { r.addr = nats.DefaultURL } - r.port = config.Port + r.port = r.config.Port if len(r.port) == 0 { r.port = "4222" } - log.Print("Init NATS Receiver") - nc, err := nats.Connect(r.addr) + log.Print("[NatsReceiver] INIT") + uri := fmt.Sprintf("%s:%s", r.addr, r.port) + nc, err := nats.Connect(uri) if err == nil { - r.database = config.Database - r.sink = sink + r.database = r.config.Database r.nc = nc } else { - log.Print(err) r.nc = nil + return err } - r.handler = lp.NewMetricHandler() - r.parser = lp.NewParser(r.handler) + r.handler = influx.NewMetricHandler() + r.parser = influx.NewParser(r.handler) r.parser.SetTimeFunc(DefaultTime) return err } func (r *NatsReceiver) Start() { - log.Print("Start NATS Receiver") + log.Print("[NatsReceiver] START") r.nc.Subscribe(r.database, r._NatsReceive) } @@ -59,9 +71,13 @@ func (r *NatsReceiver) _NatsReceive(m *nats.Msg) { metrics, err := r.parser.Parse(m.Data) if err == nil { for _, m := range metrics { - y, err := lp.New(m.Name(), Tags2Map(m), Fields2Map(m), m.Time()) - if err == nil { - r.sink.Write(y) + y := lp.FromInfluxMetric(m) + for k, v := range r.meta { + y.AddMeta(k, v) + } + //y, err := lp.New(m.Name(), Tags2Map(m), r.meta, Fields2Map(m), m.Time()) + if r.sink != nil { + r.sink <- y } } } @@ -69,7 +85,7 @@ func (r *NatsReceiver) _NatsReceive(m *nats.Msg) { func (r *NatsReceiver) Close() { if r.nc != nil { - log.Print("Close NATS Receiver") + log.Print("[NatsReceiver] CLOSE") r.nc.Close() } } diff --git a/receivers/receiveManager.go b/receivers/receiveManager.go new file mode 100644 index 0000000..62f70b3 --- /dev/null +++ b/receivers/receiveManager.go @@ -0,0 +1,153 @@ +package receivers + +import ( + "encoding/json" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + "log" + "os" + "sync" +) + +var AvailableReceivers = map[string]Receiver{ + "nats": &NatsReceiver{}, +} + +type receiveManager struct { + inputs []Receiver + output chan lp.CCMetric + done chan bool + wg *sync.WaitGroup + config []ReceiverConfig +} + +type ReceiveManager interface { + Init(wg *sync.WaitGroup, receiverConfigFile string) error + AddInput(rawConfig json.RawMessage) error + AddOutput(output chan lp.CCMetric) + Start() + Close() +} + +func (rm *receiveManager) Init(wg *sync.WaitGroup, receiverConfigFile string) error { + rm.inputs = make([]Receiver, 0) + rm.output = nil + rm.done = make(chan bool) + rm.wg = wg + rm.config = make([]ReceiverConfig, 0) + configFile, err := os.Open(receiverConfigFile) + if err != nil { + log.Print(err.Error()) + return err + } + defer configFile.Close() + jsonParser := json.NewDecoder(configFile) + var rawConfigs []json.RawMessage + err = jsonParser.Decode(&rawConfigs) + if err != nil { + log.Print(err.Error()) + return err + } + for _, raw := range rawConfigs { + log.Print("[ReceiveManager] ", string(raw)) + rm.AddInput(raw) + // if _, found := AvailableReceivers[k.Type]; !found { + // log.Print("[ReceiveManager] SKIP Config specifies unknown receiver 'type': ", k.Type) + // continue + // } + // r := AvailableReceivers[k.Type] + // err = r.Init(k) + // if err != nil { + // log.Print("[ReceiveManager] SKIP Receiver ", k.Type, " cannot be initialized: ", err.Error()) + // continue + // } + // rm.inputs = append(rm.inputs, r) + } + return nil +} + +func (rm *receiveManager) Start() { + rm.wg.Add(1) + + for _, r := range rm.inputs { + log.Print("[ReceiveManager] START ", r.Name()) + r.Start() + } + log.Print("[ReceiveManager] STARTED\n") + // go func() { + // for { + //ReceiveManagerLoop: + // select { + // case <- rm.done: + // log.Print("ReceiveManager done\n") + // rm.wg.Done() + // break ReceiveManagerLoop + // default: + // for _, c := range rm.inputs { + //ReceiveManagerInputLoop: + // select { + // case <- rm.done: + // log.Print("ReceiveManager done\n") + // rm.wg.Done() + // break ReceiveManagerInputLoop + // case p := <- c: + // log.Print("ReceiveManager: ", p) + // rm.output <- p + // default: + // } + // } + // } + // } + // }() + // for _, r := range rm.inputs { + // r.Close() + // } +} + +func (rm *receiveManager) AddInput(rawConfig json.RawMessage) error { + var config ReceiverConfig + err := json.Unmarshal(rawConfig, &config) + if err != nil { + log.Print("[ReceiveManager] SKIP ", config.Type, " JSON config error: ", err.Error()) + log.Print(err.Error()) + return err + } + if _, found := AvailableReceivers[config.Type]; !found { + log.Print("[ReceiveManager] SKIP ", config.Type, " unknown receiver: ", err.Error()) + return err + } + r := AvailableReceivers[config.Type] + err = r.Init(config) + if err != nil { + log.Print("[ReceiveManager] SKIP ", r.Name(), " initialization failed: ", err.Error()) + return err + } + rm.inputs = append(rm.inputs, r) + rm.config = append(rm.config, config) + return nil +} + +func (rm *receiveManager) AddOutput(output chan lp.CCMetric) { + rm.output = output + for _, r := range rm.inputs { + r.SetSink(rm.output) + } +} + +func (rm *receiveManager) Close() { + for _, r := range rm.inputs { + log.Print("[ReceiveManager] CLOSE ", r.Name()) + r.Close() + } + rm.wg.Done() + log.Print("[ReceiveManager] CLOSE\n") + log.Print("[ReceiveManager] EXIT\n") +} + +func New(wg *sync.WaitGroup, receiverConfigFile string) (ReceiveManager, error) { + r := &receiveManager{} + err := r.Init(wg, receiverConfigFile) + if err != nil { + return nil, err + } + return r, err +} diff --git a/router.json b/router.json new file mode 100644 index 0000000..a9f8714 --- /dev/null +++ b/router.json @@ -0,0 +1,22 @@ +{ + "add_tags" : [ + { + "key" : "cluster", + "value" : "testcluster", + "if" : "*" + }, + { + "key" : "test", + "value" : "testing", + "if" : "name == 'temp_package_id_0'" + } + ], + "delete_tags" : [ + { + "key" : "unit", + "value" : "*", + "if" : "*" + } + ], + "interval_timestamp" : true +} diff --git a/sinks.json b/sinks.json new file mode 100644 index 0000000..d304018 --- /dev/null +++ b/sinks.json @@ -0,0 +1,6 @@ +[ + { + "type" : "stdout", + "meta_as_tags" : true + } +] diff --git a/sinks/README.md b/sinks/README.md index 66783c4..8fac8e5 100644 --- a/sinks/README.md +++ b/sinks/README.md @@ -1,65 +1,99 @@ -This folder contains the sinks for the cc-metric-collector. +# CCMetric sinks -# `metricSink.go` -The base class/configuration is located in `metricSink.go`. +This folder contains the SinkManager and sink implementations for the cc-metric-collector. -# Sinks -* `stdoutSink.go`: Writes all metrics to `stdout` in InfluxDB line protocol. The sink does not use https://github.com/influxdata/line-protocol to reduce the executed code for debugging -* `influxSink.go`: Writes all metrics to an InfluxDB database instance using a blocking writer. It uses https://github.com/influxdata/influxdb-client-go . Configuration for the server, port, ssl, password, database name and organisation are in the global configuration file. The 'password' is used for the token and the 'database' for the bucket. It uses the v2 API of Influx. -* `natsSink.go`: Sends all metrics to an NATS server using the InfluxDB line protocol as encoding. It uses https://github.com/nats-io/nats.go . Configuration for the server, port, user, password and database name are in the global configuration file. The database name is used as subject for the NATS messages. -* `httpSink.go`: Sends all metrics to an HTTP endpoint `http://:/` using a POST request. The body of the request will consist of lines in the InfluxDB line protocol. In case password is specified, that password is used as a JWT in the 'Authorization' header. +# Configuration -# Installation -Nothing to do, all sinks are pure Go code - -# Sink configuration +The configuration file for the sinks is a list of configurations. The `type` field in each specifies which sink to initialize. ```json - "sink": { - "user": "testuser", - "password": "testpass", - "host": "127.0.0.1", - "port": "9090", - "database": "testdb", - "organization": "testorg", - "ssl": false - "type": "stdout" +[ + { + "type" : "stdout", + "meta_as_tags" : false + }, + { + "type" : "http", + "host" : "localhost", + "port" : "4123", + "database" : "ccmetric", + "password" : "" } +] ``` -## `stdout` -When configuring `type = stdout`, all metrics are printed to stdout. No further configuration is required or touched, so you can leave your other-sink-config in there and just change the `type` for debugging purposes +This example initializes two sinks, the `stdout` sink printing all metrics to the STDOUT and the `http` sink with the given `host`, `port`, `database` and `password`. -## `influxdb` -The InfluxDB sink uses blocking write operations to write to an InfluxDB database using the v2 API. It uses the following configuration options: -* `host`: Hostname of the database instance -* `port`: Portnumber (as string) of the database -* `database`: Name of the database, called 'bucket' in InfluxDB v2 -* `organization`: The InfluxDB v2 API uses organizations to separate database instances running on the same host -* `ssl`: Boolean to activate SSL/TLS -* `user`: Although the v2 API uses API keys instead of username and password, this field can be used if the sink should authentificate with `username:password`. If you want to use an API key, leave this field empty. -* `password`: API key for the InfluxDB v2 API or password if `user` is set +If `meta_as_tags` is set, all meta information attached to CCMetric are printed out as tags. -## `nats` -* `host`: Hostname of the NATS server -* `port`: Portnumber (as string) of the NATS server -* `user`: Username for authentification in the NATS transport system -* `password`: Password for authentification in the NATS transport system +## Type `stdout` + +```json +{ + "type" : "stdout", + "meta_as_tags" : +} +``` + +The `stdout` sink dumps all metrics to the STDOUT. + +## Type `http` + +```json +{ + "type" : "http", + "host" : "", + "port" : "", + "database" : "", + "password" : "", + "meta_as_tags" : +} +``` +The sink uses POST requests to send metrics to `http://:/` using the JWT token as a JWT in the 'Authorization' header. + +## Type `nats` + +```json +{ + "type" : "nats", + "host" : "", + "port" : "", + "user" : "", + "password" : "", + "database" : "" + "meta_as_tags" : +} +``` + +This sink publishes the CCMetric in a NATS environment using `host`, `port`, `user` and `password` for connecting. The metrics are published using the topic `database`. + +## Type `influxdb` + +```json +{ + "type" : "influxdb", + "host" : "", + "port" : "", + "user" : "", + "password" : "", + "database" : "" + "organization": "", + "ssl" : , + "meta_as_tags" : +} +``` + +This sink submits the CCMetrics to an InfluxDB time-series database. It uses `host`, `port` and `ssl` for connecting. For authentification, it uses either `user:password` if `user` is set and only `password` as API key. The `organization` and `database` are used for writing to the correct database. -## `http` -* `host`: Hostname of the HTTP server -* `port`: Portnumber (as string) of the HTTP server -* `database`: Endpoint to write to. HTTP POST requests are performed on `http://:/` -* `password`: JSON Web token used for authentification # Contributing own sinks -A sink contains three functions and is derived from the type `Sink` (in `metricSink.go`): +A sink contains three functions and is derived from the type `Sink`: * `Init(config SinkConfig) error` -* `Write(measurement string, tags map[string]string, fields map[string]interface{}, t time.Time) error` +* `Write(point CCMetric) error` * `Flush() error` * `Close()` -The data structures should be set up in `Init()` like opening a file or server connection. The `Write()` function takes a measurement, tags, fields and a timestamp and writes/sends the data. For non-blocking sinks, the `Flush()` method tells the sink to drain its internal buffers. The `Close()` function should tear down anything created in `Init()`. +The data structures should be set up in `Init()` like opening a file or server connection. The `Write()` function writes/sends the data. For non-blocking sinks, the `Flush()` method tells the sink to drain its internal buffers. The `Close()` function should tear down anything created in `Init()`. -Finally, the sink needs to be registered in the `metric-collector.go`. There is a list of sinks called `Sinks` which is a map (sink_type_string -> pointer to sink). Add a new entry with a descriptive name and the new sink. +Finally, the sink needs to be registered in the `sinkManager.go`. There is a list of sinks called `AvailableSinks` which is a map (`sink_type_string` -> `pointer to sink interface`). Add a new entry with a descriptive name and the new sink. diff --git a/sinks/httpSink.go b/sinks/httpSink.go index e443ceb..25b0082 100644 --- a/sinks/httpSink.go +++ b/sinks/httpSink.go @@ -7,19 +7,21 @@ import ( "net/http" "time" - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + influx "github.com/influxdata/line-protocol" ) type HttpSink struct { - Sink + sink client *http.Client url, jwt string - encoder *lp.Encoder + encoder *influx.Encoder buffer *bytes.Buffer } -func (s *HttpSink) Init(config SinkConfig) error { - if len(config.Host) == 0 || len(config.Port) == 0 { +func (s *HttpSink) Init(config sinkConfig) error { + s.name = "HttpSink" + if len(config.Host) == 0 || len(config.Port) == 0 || len(config.Database) == 0 { return errors.New("`host`, `port` and `database` config options required for TCP sink") } @@ -28,13 +30,13 @@ func (s *HttpSink) Init(config SinkConfig) error { s.port = config.Port s.jwt = config.Password s.buffer = &bytes.Buffer{} - s.encoder = lp.NewEncoder(s.buffer) + s.encoder = influx.NewEncoder(s.buffer) s.encoder.SetPrecision(time.Second) return nil } -func (s *HttpSink) Write(point lp.MutableMetric) error { +func (s *HttpSink) Write(point lp.CCMetric) error { _, err := s.encoder.Encode(point) return err } diff --git a/sinks/influxSink.go b/sinks/influxSink.go index 40e681f..dca1572 100644 --- a/sinks/influxSink.go +++ b/sinks/influxSink.go @@ -5,15 +5,14 @@ import ( "crypto/tls" "errors" "fmt" - + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" influxdb2 "github.com/influxdata/influxdb-client-go/v2" influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api" - lp "github.com/influxdata/line-protocol" "log" ) type InfluxSink struct { - Sink + sink client influxdb2.Client writeApi influxdb2Api.WriteAPIBlocking retPolicy string @@ -39,7 +38,8 @@ func (s *InfluxSink) connect() error { return nil } -func (s *InfluxSink) Init(config SinkConfig) error { +func (s *InfluxSink) Init(config sinkConfig) error { + s.name = "InfluxSink" if len(config.Host) == 0 || len(config.Port) == 0 || len(config.Database) == 0 || @@ -54,15 +54,21 @@ func (s *InfluxSink) Init(config SinkConfig) error { s.user = config.User s.password = config.Password s.ssl = config.SSL + s.meta_as_tags = config.MetaAsTags return s.connect() } -func (s *InfluxSink) Write(point lp.MutableMetric) error { +func (s *InfluxSink) Write(point lp.CCMetric) error { tags := map[string]string{} fields := map[string]interface{}{} for _, t := range point.TagList() { tags[t.Key] = t.Value } + if s.meta_as_tags { + for _, m := range point.MetaList() { + tags[m.Key] = m.Value + } + } for _, f := range point.FieldList() { fields[f.Key] = f.Value } diff --git a/sinks/metricSink.go b/sinks/metricSink.go index 182495a..25f66bb 100644 --- a/sinks/metricSink.go +++ b/sinks/metricSink.go @@ -2,21 +2,22 @@ package sinks import ( // "time" - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) -type SinkConfig struct { - Host string `json:"host"` - Port string `json:"port"` - Database string `json:"database"` - User string `json:"user"` - Password string `json:"password"` - Organization string `json:"organization"` +type sinkConfig struct { Type string `json:"type"` - SSL bool `json:"ssl"` + Host string `json:"host,omitempty"` + Port string `json:"port,omitempty"` + Database string `json:"database,omitempty"` + User string `json:"user,omitempty"` + Password string `json:"password,omitempty"` + Organization string `json:"organization,omitempty"` + SSL bool `json:"ssl,omitempty"` + MetaAsTags bool `json:"meta_as_tags,omitempty"` } -type Sink struct { +type sink struct { host string port string user string @@ -24,11 +25,18 @@ type Sink struct { database string organization string ssl bool + meta_as_tags bool + name string } -type SinkFuncs interface { - Init(config SinkConfig) error - Write(point lp.MutableMetric) error +type Sink interface { + Init(config sinkConfig) error + Write(point lp.CCMetric) error Flush() error Close() + Name() string +} + +func (s *sink) Name() string { + return s.name } diff --git a/sinks/natsSink.go b/sinks/natsSink.go index 0df14f4..f9cd7eb 100644 --- a/sinks/natsSink.go +++ b/sinks/natsSink.go @@ -4,16 +4,17 @@ import ( "bytes" "errors" "fmt" - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + influx "github.com/influxdata/line-protocol" nats "github.com/nats-io/nats.go" "log" "time" ) type NatsSink struct { - Sink + sink client *nats.Conn - encoder *lp.Encoder + encoder *influx.Encoder buffer *bytes.Buffer } @@ -31,7 +32,8 @@ func (s *NatsSink) connect() error { return nil } -func (s *NatsSink) Init(config SinkConfig) error { +func (s *NatsSink) Init(config sinkConfig) error { + s.name = "NatsSink" if len(config.Host) == 0 || len(config.Port) == 0 || len(config.Database) == 0 { @@ -46,40 +48,31 @@ func (s *NatsSink) Init(config SinkConfig) error { // Setup Influx line protocol s.buffer = &bytes.Buffer{} s.buffer.Grow(1025) - s.encoder = lp.NewEncoder(s.buffer) + s.encoder = influx.NewEncoder(s.buffer) s.encoder.SetPrecision(time.Second) s.encoder.SetMaxLineBytes(1024) // Setup infos for connection return s.connect() } -func (s *NatsSink) Write(point lp.MutableMetric) error { +func (s *NatsSink) Write(point lp.CCMetric) error { if s.client != nil { - // var tags map[string]string - // var fields map[string]interface{} - // for _, t := range point.TagList() { - // tags[t.Key] = t.Value - // } - // for _, f := range point.FieldList() { - // fields[f.Key] = f.Value - // } - // m, err := protocol.New(point.Name(), tags, fields, point.Time()) - // if err != nil { - // log.Print(err) - // return err - // } _, err := s.encoder.Encode(point) if err != nil { log.Print(err) return err } - s.client.Publish(s.database, s.buffer.Bytes()) - s.buffer.Reset() } return nil } func (s *NatsSink) Flush() error { + if s.client != nil { + if err := s.client.Publish(s.database, s.buffer.Bytes()); err != nil { + return err + } + s.buffer.Reset() + } return nil } diff --git a/sinks/sinkManager.go b/sinks/sinkManager.go new file mode 100644 index 0000000..beb0f32 --- /dev/null +++ b/sinks/sinkManager.go @@ -0,0 +1,141 @@ +package sinks + +import ( + "encoding/json" + "log" + "os" + "sync" + + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" +) + +var AvailableSinks = map[string]Sink{ + "influxdb": &InfluxSink{}, + "stdout": &StdoutSink{}, + "nats": &NatsSink{}, + "http": &HttpSink{}, +} + +type sinkManager struct { + input chan lp.CCMetric + outputs []Sink + done chan bool + wg *sync.WaitGroup + config []sinkConfig +} + +type SinkManager interface { + Init(wg *sync.WaitGroup, sinkConfigFile string) error + AddInput(input chan lp.CCMetric) + AddOutput(config json.RawMessage) error + Start() + Close() +} + +func (sm *sinkManager) Init(wg *sync.WaitGroup, sinkConfigFile string) error { + sm.input = nil + sm.outputs = make([]Sink, 0) + sm.done = make(chan bool) + sm.wg = wg + sm.config = make([]sinkConfig, 0) + if len(sinkConfigFile) > 0 { + configFile, err := os.Open(sinkConfigFile) + if err != nil { + log.Print("[SinkManager] ", err.Error()) + return err + } + defer configFile.Close() + jsonParser := json.NewDecoder(configFile) + var rawConfigs []json.RawMessage + err = jsonParser.Decode(&rawConfigs) + if err != nil { + log.Print("[SinkManager] ", err.Error()) + return err + } + for _, raw := range rawConfigs { + err = sm.AddOutput(raw) + if err != nil { + continue + } + } + } + return nil +} + +func (sm *sinkManager) Start() { + sm.wg.Add(1) + batchcount := 20 + go func() { + for { + SinkManagerLoop: + select { + case <-sm.done: + for _, s := range sm.outputs { + s.Close() + } + log.Print("[SinkManager] DONE\n") + sm.wg.Done() + break SinkManagerLoop + case p := <-sm.input: + log.Print("[SinkManager] WRITE ", p) + for _, s := range sm.outputs { + s.Write(p) + } + if batchcount == 0 { + log.Print("[SinkManager] FLUSH") + for _, s := range sm.outputs { + s.Flush() + } + batchcount = 20 + } + batchcount-- + default: + } + } + log.Print("[SinkManager] EXIT\n") + }() + log.Print("[SinkManager] STARTED\n") +} + +func (sm *sinkManager) AddInput(input chan lp.CCMetric) { + sm.input = input +} + +func (sm *sinkManager) AddOutput(rawConfig json.RawMessage) error { + var err error + var config sinkConfig + if len(rawConfig) > 3 { + err = json.Unmarshal(rawConfig, &config) + if err != nil { + log.Print("[SinkManager] SKIP ", config.Type, " JSON config error: ", err.Error()) + return err + } + } + if _, found := AvailableSinks[config.Type]; !found { + log.Print("[SinkManager] SKIP ", config.Type, " unknown sink: ", err.Error()) + return err + } + s := AvailableSinks[config.Type] + err = s.Init(config) + if err != nil { + log.Print("[SinkManager] SKIP ", s.Name(), " initialization failed: ", err.Error()) + return err + } + sm.outputs = append(sm.outputs, s) + sm.config = append(sm.config, config) + return nil +} + +func (sm *sinkManager) Close() { + sm.done <- true + log.Print("[SinkManager] CLOSE") +} + +func New(wg *sync.WaitGroup, sinkConfigFile string) (SinkManager, error) { + sm := &sinkManager{} + err := sm.Init(wg, sinkConfigFile) + if err != nil { + return nil, err + } + return sm, err +} diff --git a/sinks/stdoutSink.go b/sinks/stdoutSink.go index 8016fcb..215239f 100644 --- a/sinks/stdoutSink.go +++ b/sinks/stdoutSink.go @@ -6,23 +6,30 @@ import ( "strings" // "time" - lp "github.com/influxdata/line-protocol" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) type StdoutSink struct { - Sink + sink } -func (s *StdoutSink) Init(config SinkConfig) error { +func (s *StdoutSink) Init(config sinkConfig) error { + s.name = "StdoutSink" + s.meta_as_tags = config.MetaAsTags return nil } -func (s *StdoutSink) Write(point lp.MutableMetric) error { +func (s *StdoutSink) Write(point lp.CCMetric) error { var tagsstr []string var fieldstr []string for _, t := range point.TagList() { tagsstr = append(tagsstr, fmt.Sprintf("%s=%s", t.Key, t.Value)) } + if s.meta_as_tags { + for _, m := range point.MetaList() { + tagsstr = append(tagsstr, fmt.Sprintf("%s=%s", m.Key, m.Value)) + } + } for _, f := range point.FieldList() { switch f.Value.(type) { case float64: From 99aaece6c280548f6273ca20a992d43b14f50dc0 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Tue, 25 Jan 2022 15:46:41 +0100 Subject: [PATCH 24/45] Activate --once option and return proper exit Code with os.Exit() --- metric-collector.go | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/metric-collector.go b/metric-collector.go index c071933..a205a4f 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -189,7 +189,7 @@ func prepare_shutdown(config *RuntimeConfig) { }(config) } -func main() { +func mainFunc() int { var err error use_recv := false @@ -201,23 +201,23 @@ func main() { if err != nil { log.Print("Error reading configuration file ", rcfg.CliArgs["configfile"]) log.Print(err.Error()) - return + return 1 } if rcfg.ConfigFile.Interval <= 0 || time.Duration(rcfg.ConfigFile.Interval)*time.Second <= 0 { log.Print("Configuration value 'interval' must be greater than zero") - return + return 1 } rcfg.Interval = time.Duration(rcfg.ConfigFile.Interval) * time.Second if rcfg.ConfigFile.Duration <= 0 || time.Duration(rcfg.ConfigFile.Duration)*time.Second <= 0 { log.Print("Configuration value 'duration' must be greater than zero") - return + return 1 } rcfg.Duration = time.Duration(rcfg.ConfigFile.Duration) * time.Second rcfg.Hostname, err = os.Hostname() if err != nil { log.Print(err.Error()) - return + return 1 } // Drop domain part of host name rcfg.Hostname = strings.SplitN(rcfg.Hostname, `.`, 2)[0] @@ -232,14 +232,14 @@ func main() { rcfg.Router, err = mr.New(rcfg.Ticker, &rcfg.Sync, rcfg.ConfigFile.RouterConfigFile) if err != nil { log.Print(err.Error()) - return + return 1 } } if len(rcfg.ConfigFile.SinkConfigFile) > 0 { rcfg.SinkManager, err = sinks.New(&rcfg.Sync, rcfg.ConfigFile.SinkConfigFile) if err != nil { log.Print(err.Error()) - return + return 1 } RouterToSinksChannel := make(chan lp.CCMetric) rcfg.SinkManager.AddInput(RouterToSinksChannel) @@ -249,7 +249,7 @@ func main() { rcfg.CollectManager, err = collectors.New(rcfg.Ticker, rcfg.Duration, &rcfg.Sync, rcfg.ConfigFile.CollectorConfigFile) if err != nil { log.Print(err.Error()) - return + return 1 } CollectToRouterChannel := make(chan lp.CCMetric) rcfg.CollectManager.AddOutput(CollectToRouterChannel) @@ -259,7 +259,7 @@ func main() { rcfg.ReceiveManager, err = receivers.New(&rcfg.Sync, rcfg.ConfigFile.ReceiverConfigFile) if err != nil { log.Print(err.Error()) - return + return 1 } ReceiveToRouterChannel := make(chan lp.CCMetric) rcfg.ReceiveManager.AddOutput(ReceiveToRouterChannel) @@ -276,6 +276,19 @@ func main() { rcfg.ReceiveManager.Start() } + // Wait until one tick has passed. This is a workaround + if rcfg.CliArgs["once"] == "true" { + var x int = (1.8 * float64(rcfg.ConfigFile.Interval)) + time.Sleep(time.Duration(int(x)) * time.Second) + shutdown(&rcfg) + } + // Wait until receiving an interrupt rcfg.Sync.Wait() + return 0 +} + +func main() { + exitCode := mainFunc() + os.Exit(exitCode) } From a40d1c954b36941737a35bf4b9dc424bfda04d12 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Tue, 25 Jan 2022 16:33:23 +0100 Subject: [PATCH 25/45] Fix data type mismatch --- metric-collector.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metric-collector.go b/metric-collector.go index a205a4f..b95b38f 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -17,7 +17,6 @@ import ( "sync" "time" - lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter" mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker" @@ -278,7 +277,7 @@ func mainFunc() int { // Wait until one tick has passed. This is a workaround if rcfg.CliArgs["once"] == "true" { - var x int = (1.8 * float64(rcfg.ConfigFile.Interval)) + x := 1.8 * float64(rcfg.ConfigFile.Interval) time.Sleep(time.Duration(int(x)) * time.Second) shutdown(&rcfg) } From bafc6322e674d89e3565d13e03b003d9977c58f9 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Tue, 25 Jan 2022 16:40:02 +0100 Subject: [PATCH 26/45] Change to own Logger --- collectors/collectorManager.go | 23 ++++--- go.mod | 1 + internal/ccLogger/cclogger.go | 111 +++++++++++++++++++++++++++++++++ metric-collector.go | 37 +++++------ 4 files changed, 140 insertions(+), 32 deletions(-) create mode 100644 internal/ccLogger/cclogger.go diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 9543431..73b2891 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -2,13 +2,13 @@ package collectors import ( "encoding/json" - "log" "os" "sync" "time" lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker" + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" ) var AvailableCollectors = map[string]MetricCollector{ @@ -58,29 +58,29 @@ func (cm *collectorManager) Init(ticker mct.MultiChanTicker, duration time.Durat cm.duration = duration configFile, err := os.Open(collectConfigFile) if err != nil { - log.Print(err.Error()) + cclog.Error(err.Error()) return err } defer configFile.Close() jsonParser := json.NewDecoder(configFile) err = jsonParser.Decode(&cm.config) if err != nil { - log.Print(err.Error()) + cclog.Error(err.Error()) return err } for k, cfg := range cm.config { - log.Print(k, " ", cfg) if _, found := AvailableCollectors[k]; !found { - log.Print("[CollectorManager] SKIP unknown collector ", k) + cclog.ComponentPrint("CollectorManager", "SKIP unknown collector ", k) continue } c := AvailableCollectors[k] err = c.Init(cfg) if err != nil { - log.Print("[CollectorManager] Collector ", k, "initialization failed: ", err.Error()) + cclog.ComponentPrint("CollectorManager", "Collector ", k, "initialization failed: ", err.Error()) continue } + cclog.ComponentDebug("CollectorManager", "Collector ", k, "initialized") cm.collectors = append(cm.collectors, c) } return nil @@ -99,7 +99,7 @@ func (cm *collectorManager) Start() { c.Close() } cm.wg.Done() - log.Print("[CollectorManager] DONE\n") + cclog.ComponentPrint("CollectorManager", "DONE") break CollectorManagerLoop case t := <-tick: for _, c := range cm.collectors { @@ -110,18 +110,17 @@ func (cm *collectorManager) Start() { c.Close() } cm.wg.Done() - log.Print("[CollectorManager] DONE\n") + cclog.ComponentPrint("CollectorManager", "DONE") break CollectorManagerInputLoop default: - log.Print("[CollectorManager] ", c.Name(), " ", t) + cclog.ComponentPrint("CollectorManager", c.Name(), " ", t) c.Read(cm.duration, cm.output) } } } } - log.Print("[CollectorManager] EXIT\n") }() - log.Print("[CollectorManager] STARTED\n") + cclog.ComponentPrint("CollectorManager", "STARTED") } func (cm *collectorManager) AddOutput(output chan lp.CCMetric) { @@ -130,7 +129,7 @@ func (cm *collectorManager) AddOutput(output chan lp.CCMetric) { func (cm *collectorManager) Close() { cm.done <- true - log.Print("[CollectorManager] CLOSE") + cclog.ComponentPrint("CollectorManager", "CLOSE") } func New(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) (CollectorManager, error) { diff --git a/go.mod b/go.mod index be384b6..d20d431 100644 --- a/go.mod +++ b/go.mod @@ -9,5 +9,6 @@ require ( github.com/nats-io/nats.go v1.10.0 github.com/nats-io/nkeys v0.1.4 // indirect github.com/prometheus/client_golang v1.10.0 // indirect + golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 gopkg.in/Knetic/govaluate.v2 v2.3.0 ) diff --git a/internal/ccLogger/cclogger.go b/internal/ccLogger/cclogger.go new file mode 100644 index 0000000..ad5b986 --- /dev/null +++ b/internal/ccLogger/cclogger.go @@ -0,0 +1,111 @@ +package cclogger + +import ( + "fmt" + "runtime" + "os" + "log" +) + + +var ( + globalDebug = false + stdout = os.Stdout + stderr = os.Stderr + debugLog *log.Logger = nil + infoLog *log.Logger = nil + errorLog *log.Logger = nil + warnLog *log.Logger = nil + defaultLog *log.Logger = nil +) + +func initLogger() { + if debugLog == nil { + debugLog = log.New(stderr, "DEBUG", log.LstdFlags) + } + if infoLog == nil { + infoLog = log.New(stdout, "INFO", log.LstdFlags) + } + if errorLog == nil { + errorLog = log.New(stderr, "ERROR", log.LstdFlags) + } + if warnLog == nil { + warnLog = log.New(stderr, "WARN", log.LstdFlags) + } + if defaultLog == nil { + defaultLog = log.New(stdout, "", log.LstdFlags) + } +} + +func CCPrint(logger *log.Logger, e ... interface {}) { + if logger != nil { + logger.Print(e) + } +} + +func Print(e ... interface{}) { + CCPrint(defaultLog, e) +} + +func ComponentPrint(component string, e ... interface{}) { + CCPrint(defaultLog, fmt.Sprintf("[%s]", component), e) +} + +func Info(e ... interface{}) { + CCPrint(infoLog, e) +} + +func ComponentInfo(component string, e ... interface{}) { + CCPrint(infoLog, fmt.Sprintf("[%s]", component), e) +} + +func Debug(e ... interface{}) { + if globalDebug { + CCPrint(debugLog, e) + } +} + +func ComponentDebug(component string, e ... interface{}) { + if globalDebug { + CCPrint(debugLog, fmt.Sprintf("[%s]", component), e) + } +} + +func Error(e ... interface{}) { + _, fn, line, _ := runtime.Caller(1) + CCPrint(errorLog, fn, line, e) +} + +func ComponentError(component string, e ... interface{}) { + _, fn, line, _ := runtime.Caller(1) + CCPrint(errorLog, fmt.Sprintf("[%s]", component), fn, line, e) +} + +func SetDebug() { + globalDebug = true +} + + +func SetOutput(filename string) { + if filename == "stderr" { + if stderr != os.Stderr && stderr != os.Stdout { + stderr.Close() + } + stderr = os.Stderr + } else if filename == "stdout" { + if stderr != os.Stderr && stderr != os.Stdout { + stderr.Close() + } + stderr = os.Stdout + } else { + file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) + if err == nil { + defer file.Close() + stderr = file + } + } + debugLog = nil + errorLog = nil + warnLog = nil + initLogger() +} diff --git a/metric-collector.go b/metric-collector.go index a205a4f..56d9cab 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -3,8 +3,7 @@ package main import ( "encoding/json" "flag" - "fmt" - "log" +// "log" "os" "os/signal" "strings" @@ -17,7 +16,7 @@ import ( "sync" "time" - + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter" mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker" @@ -37,7 +36,7 @@ func LoadCentralConfiguration(file string, config *CentralConfigFile) error { configFile, err := os.Open(file) defer configFile.Close() if err != nil { - fmt.Println(err.Error()) + cclog.Error(err.Error()) return err } jsonParser := json.NewDecoder(configFile) @@ -151,21 +150,21 @@ func ReadCli() map[string]string { // General shutdown function that gets executed in case of interrupt or graceful shutdown func shutdown(config *RuntimeConfig) { - log.Print("Shutdown...") + cclog.Info("Shutdown...") if config.CollectManager != nil { - log.Print("Shutdown CollectManager...") + cclog.Debug("Shutdown CollectManager...") config.CollectManager.Close() } if config.ReceiveManager != nil { - log.Print("Shutdown ReceiveManager...") + cclog.Debug("Shutdown ReceiveManager...") config.ReceiveManager.Close() } if config.Router != nil { - log.Print("Shutdown Router...") + cclog.Debug("Shutdown Router...") config.Router.Close() } if config.SinkManager != nil { - log.Print("Shutdown SinkManager...") + cclog.Debug("Shutdown SinkManager...") config.SinkManager.Close() } @@ -184,7 +183,6 @@ func prepare_shutdown(config *RuntimeConfig) { go func(config *RuntimeConfig) { <-sigs - log.Print("Shutdown...") shutdown(config) }(config) } @@ -199,24 +197,23 @@ func mainFunc() int { // Load and check configuration err = LoadCentralConfiguration(rcfg.CliArgs["configfile"], &rcfg.ConfigFile) if err != nil { - log.Print("Error reading configuration file ", rcfg.CliArgs["configfile"]) - log.Print(err.Error()) + cclog.Error("Error reading configuration file ", rcfg.CliArgs["configfile"], ": ", err.Error()) return 1 } if rcfg.ConfigFile.Interval <= 0 || time.Duration(rcfg.ConfigFile.Interval)*time.Second <= 0 { - log.Print("Configuration value 'interval' must be greater than zero") + cclog.Error("Configuration value 'interval' must be greater than zero") return 1 } rcfg.Interval = time.Duration(rcfg.ConfigFile.Interval) * time.Second if rcfg.ConfigFile.Duration <= 0 || time.Duration(rcfg.ConfigFile.Duration)*time.Second <= 0 { - log.Print("Configuration value 'duration' must be greater than zero") + cclog.Error("Configuration value 'duration' must be greater than zero") return 1 } rcfg.Duration = time.Duration(rcfg.ConfigFile.Duration) * time.Second rcfg.Hostname, err = os.Hostname() if err != nil { - log.Print(err.Error()) + cclog.Error(err.Error()) return 1 } // Drop domain part of host name @@ -231,14 +228,14 @@ func mainFunc() int { if len(rcfg.ConfigFile.RouterConfigFile) > 0 { rcfg.Router, err = mr.New(rcfg.Ticker, &rcfg.Sync, rcfg.ConfigFile.RouterConfigFile) if err != nil { - log.Print(err.Error()) + cclog.Error(err.Error()) return 1 } } if len(rcfg.ConfigFile.SinkConfigFile) > 0 { rcfg.SinkManager, err = sinks.New(&rcfg.Sync, rcfg.ConfigFile.SinkConfigFile) if err != nil { - log.Print(err.Error()) + cclog.Error(err.Error()) return 1 } RouterToSinksChannel := make(chan lp.CCMetric) @@ -248,7 +245,7 @@ func mainFunc() int { if len(rcfg.ConfigFile.CollectorConfigFile) > 0 { rcfg.CollectManager, err = collectors.New(rcfg.Ticker, rcfg.Duration, &rcfg.Sync, rcfg.ConfigFile.CollectorConfigFile) if err != nil { - log.Print(err.Error()) + cclog.Error(err.Error()) return 1 } CollectToRouterChannel := make(chan lp.CCMetric) @@ -258,7 +255,7 @@ func mainFunc() int { if len(rcfg.ConfigFile.ReceiverConfigFile) > 0 { rcfg.ReceiveManager, err = receivers.New(&rcfg.Sync, rcfg.ConfigFile.ReceiverConfigFile) if err != nil { - log.Print(err.Error()) + cclog.Error(err.Error()) return 1 } ReceiveToRouterChannel := make(chan lp.CCMetric) @@ -278,7 +275,7 @@ func mainFunc() int { // Wait until one tick has passed. This is a workaround if rcfg.CliArgs["once"] == "true" { - var x int = (1.8 * float64(rcfg.ConfigFile.Interval)) + var x float64 = (1.8 * float64(rcfg.ConfigFile.Interval)) time.Sleep(time.Duration(int(x)) * time.Second) shutdown(&rcfg) } From b4fde31626590589da79fff84a1666f07b6288b6 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Tue, 25 Jan 2022 17:20:20 +0100 Subject: [PATCH 27/45] Add documentation --- collectors/metricCollector.go | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/collectors/metricCollector.go b/collectors/metricCollector.go index 6bc9047..3484dca 100644 --- a/collectors/metricCollector.go +++ b/collectors/metricCollector.go @@ -2,14 +2,15 @@ package collectors import ( "encoding/json" - "errors" - lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" - influx "github.com/influxdata/line-protocol" + "fmt" "io/ioutil" "log" "strconv" "strings" "time" + + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + influx "github.com/influxdata/line-protocol" ) type MetricCollector interface { @@ -21,12 +22,12 @@ type MetricCollector interface { } type metricCollector struct { - output chan lp.CCMetric - name string - init bool - meta map[string]string + name string + init bool + meta map[string]string } +// Name() returns the name of the metric collector func (c *metricCollector) Name() string { return c.name } @@ -35,10 +36,14 @@ func (c *metricCollector) setup() error { return nil } +// Initialized() indicates whether the metric collector has been initialized. func (c *metricCollector) Initialized() bool { - return c.init == true + return c.init } +// intArrayContains scans an array of ints if the value str is present in the array +// If the specified value is found, the corresponding array index is returned. +// The bool value is used to signal success or failure func intArrayContains(array []int, str int) (int, bool) { for i, a := range array { if a == str { @@ -48,6 +53,9 @@ func intArrayContains(array []int, str int) (int, bool) { return -1, false } +// stringArrayContains scans an array of strings if the value str is present in the array +// If the specified value is found, the corresponding array index is returned. +// The bool value is used to signal success or failure func stringArrayContains(array []string, str string) (int, bool) { for i, a := range array { if a == str { @@ -107,6 +115,7 @@ func CpuList() []int { return cpulist } +// Tags2Map stores a InfluxDB list of tags in a map of key value pairs func Tags2Map(metric influx.Metric) map[string]string { tags := make(map[string]string) for _, t := range metric.TagList() { @@ -115,6 +124,7 @@ func Tags2Map(metric influx.Metric) map[string]string { return tags } +// Fields2Map stores a InfluxDB list of fields in a map of key value pairs func Fields2Map(metric influx.Metric) map[string]interface{} { fields := make(map[string]interface{}) for _, f := range metric.FieldList() { @@ -123,11 +133,13 @@ func Fields2Map(metric influx.Metric) map[string]interface{} { return fields } +// RemoveFromStringList removes the string r from the array of strings s +// If r is not contained in the array an error is returned func RemoveFromStringList(s []string, r string) ([]string, error) { for i, item := range s { if r == item { return append(s[:i], s[i+1:]...), nil } } - return s, errors.New("No such string in list") + return s, fmt.Errorf("No such string in list") } From 2925ad9f402a56e1ba9d188131ae8795a03b407f Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Tue, 25 Jan 2022 17:43:10 +0100 Subject: [PATCH 28/45] Use ccLogger anywhere --- collectors/collectorManager.go | 16 +++---- collectors/tempMetric.go | 4 +- internal/ccLogger/cclogger.go | 44 ++++++++++--------- internal/metricRouter/metricRouter.go | 25 +++++------ metric-collector.go | 11 +++++ receivers/natsReceiver.go | 9 ++-- receivers/receiveManager.go | 63 +++++---------------------- sinks/sinkManager.go | 25 ++++++----- 8 files changed, 85 insertions(+), 112 deletions(-) diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 73b2891..88cfdf8 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -70,17 +70,17 @@ func (cm *collectorManager) Init(ticker mct.MultiChanTicker, duration time.Durat } for k, cfg := range cm.config { if _, found := AvailableCollectors[k]; !found { - cclog.ComponentPrint("CollectorManager", "SKIP unknown collector ", k) + cclog.ComponentError("CollectorManager", "SKIP unknown collector", k) continue } c := AvailableCollectors[k] err = c.Init(cfg) if err != nil { - cclog.ComponentPrint("CollectorManager", "Collector ", k, "initialization failed: ", err.Error()) + cclog.ComponentError("CollectorManager", "Collector", k, "initialization failed:", err.Error()) continue } - cclog.ComponentDebug("CollectorManager", "Collector ", k, "initialized") + cclog.ComponentDebug("CollectorManager", "ADD COLLECTOR", c.Name()) cm.collectors = append(cm.collectors, c) } return nil @@ -99,7 +99,7 @@ func (cm *collectorManager) Start() { c.Close() } cm.wg.Done() - cclog.ComponentPrint("CollectorManager", "DONE") + cclog.ComponentDebug("CollectorManager", "DONE") break CollectorManagerLoop case t := <-tick: for _, c := range cm.collectors { @@ -110,17 +110,17 @@ func (cm *collectorManager) Start() { c.Close() } cm.wg.Done() - cclog.ComponentPrint("CollectorManager", "DONE") + cclog.ComponentDebug("CollectorManager", "DONE") break CollectorManagerInputLoop default: - cclog.ComponentPrint("CollectorManager", c.Name(), " ", t) + cclog.ComponentDebug("CollectorManager", c.Name(), t) c.Read(cm.duration, cm.output) } } } } }() - cclog.ComponentPrint("CollectorManager", "STARTED") + cclog.ComponentDebug("CollectorManager", "STARTED") } func (cm *collectorManager) AddOutput(output chan lp.CCMetric) { @@ -129,7 +129,7 @@ func (cm *collectorManager) AddOutput(output chan lp.CCMetric) { func (cm *collectorManager) Close() { cm.done <- true - cclog.ComponentPrint("CollectorManager", "CLOSE") + cclog.ComponentDebug("CollectorManager", "CLOSE") } func New(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) (CollectorManager, error) { diff --git a/collectors/tempMetric.go b/collectors/tempMetric.go index b73d582..caa726e 100644 --- a/collectors/tempMetric.go +++ b/collectors/tempMetric.go @@ -4,7 +4,7 @@ import ( "encoding/json" "fmt" "io/ioutil" - "log" + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" "os" "path/filepath" "strconv" @@ -102,7 +102,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) { if err == nil { y, err := lp.New(strings.ToLower(mname), tags, m.meta, map[string]interface{}{"value": int(float64(x) / 1000)}, time.Now()) if err == nil { - log.Print("[", m.name, "] ", y) + cclog.ComponentDebug(m.name, y) output <- y } } diff --git a/internal/ccLogger/cclogger.go b/internal/ccLogger/cclogger.go index ad5b986..ee92376 100644 --- a/internal/ccLogger/cclogger.go +++ b/internal/ccLogger/cclogger.go @@ -21,68 +21,72 @@ var ( func initLogger() { if debugLog == nil { - debugLog = log.New(stderr, "DEBUG", log.LstdFlags) + debugLog = log.New(stderr, "DEBUG ", log.LstdFlags) } if infoLog == nil { - infoLog = log.New(stdout, "INFO", log.LstdFlags) + infoLog = log.New(stdout, "INFO ", log.LstdFlags) } if errorLog == nil { - errorLog = log.New(stderr, "ERROR", log.LstdFlags) + errorLog = log.New(stderr, "ERROR ", log.LstdFlags) } if warnLog == nil { - warnLog = log.New(stderr, "WARN", log.LstdFlags) + warnLog = log.New(stderr, "WARN ", log.LstdFlags) } if defaultLog == nil { defaultLog = log.New(stdout, "", log.LstdFlags) } } -func CCPrint(logger *log.Logger, e ... interface {}) { - if logger != nil { - logger.Print(e) - } -} - func Print(e ... interface{}) { - CCPrint(defaultLog, e) + initLogger() + defaultLog.Print(e) } func ComponentPrint(component string, e ... interface{}) { - CCPrint(defaultLog, fmt.Sprintf("[%s]", component), e) + initLogger() + defaultLog.Print(fmt.Sprintf("[%s] ", component), e) } func Info(e ... interface{}) { - CCPrint(infoLog, e) + initLogger() + infoLog.Print(e) } func ComponentInfo(component string, e ... interface{}) { - CCPrint(infoLog, fmt.Sprintf("[%s]", component), e) + initLogger() + infoLog.Print(fmt.Sprintf("[%s] ", component), e) } func Debug(e ... interface{}) { - if globalDebug { - CCPrint(debugLog, e) + initLogger() + if globalDebug == true { + debugLog.Print(e) } } func ComponentDebug(component string, e ... interface{}) { - if globalDebug { - CCPrint(debugLog, fmt.Sprintf("[%s]", component), e) + initLogger() + if globalDebug == true && debugLog != nil { + //CCComponentPrint(debugLog, component, e) + debugLog.Print(fmt.Sprintf("[%s] ", component), e) } } func Error(e ... interface{}) { + initLogger() _, fn, line, _ := runtime.Caller(1) - CCPrint(errorLog, fn, line, e) + errorLog.Print(fmt.Sprintf("[%s:%d] ", fn, line), e) } func ComponentError(component string, e ... interface{}) { + initLogger() _, fn, line, _ := runtime.Caller(1) - CCPrint(errorLog, fmt.Sprintf("[%s]", component), fn, line, e) + errorLog.Print(fmt.Sprintf("[%s|%s:%d] ", component, fn, line), e) } func SetDebug() { globalDebug = true + initLogger() } diff --git a/internal/metricRouter/metricRouter.go b/internal/metricRouter/metricRouter.go index 25b0dc2..5fd55ba 100644 --- a/internal/metricRouter/metricRouter.go +++ b/internal/metricRouter/metricRouter.go @@ -2,7 +2,7 @@ package metricRouter import ( "encoding/json" - "log" + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" "os" "sync" "time" @@ -50,14 +50,14 @@ func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, rout r.ticker = ticker configFile, err := os.Open(routerConfigFile) if err != nil { - log.Print(err.Error()) + cclog.ComponentError("MetricRouter", err.Error()) return err } defer configFile.Close() jsonParser := json.NewDecoder(configFile) err = jsonParser.Decode(&r.config) if err != nil { - log.Print(err.Error()) + cclog.ComponentError("MetricRouter", err.Error()) return err } return nil @@ -79,7 +79,7 @@ func (r *metricRouter) StartTimer() { func (r *metricRouter) EvalCondition(Cond string, point lp.CCMetric) (bool, error) { expression, err := govaluate.NewEvaluableExpression(Cond) if err != nil { - log.Print(Cond, " = ", err.Error()) + cclog.ComponentDebug("MetricRouter", Cond, " = ", err.Error()) return false, err } params := make(map[string]interface{}) @@ -97,7 +97,7 @@ func (r *metricRouter) EvalCondition(Cond string, point lp.CCMetric) (bool, erro result, err := expression.Evaluate(params) if err != nil { - log.Print(Cond, " = ", err.Error()) + cclog.ComponentDebug("MetricRouter", Cond, " = ", err.Error()) return false, err } return bool(result.(bool)), err @@ -113,7 +113,7 @@ func (r *metricRouter) DoAddTags(point lp.CCMetric) { var err error conditionMatches, err = r.EvalCondition(m.Condition, point) if err != nil { - log.Print(err.Error()) + cclog.ComponentError("MetricRouter", err.Error()) conditionMatches = false } } @@ -133,7 +133,7 @@ func (r *metricRouter) DoDelTags(point lp.CCMetric) { var err error conditionMatches, err = r.EvalCondition(m.Condition, point) if err != nil { - log.Print(err.Error()) + cclog.ComponentError("MetricRouter", err.Error()) conditionMatches = false } } @@ -154,7 +154,7 @@ func (r *metricRouter) Start() { RouterLoop: select { case <-r.done: - log.Print("[MetricRouter] DONE\n") + cclog.ComponentDebug("MetricRouter", "DONE") r.wg.Done() break RouterLoop default: @@ -162,11 +162,11 @@ func (r *metricRouter) Start() { RouterInputLoop: select { case <-r.done: - log.Print("[MetricRouter] DONE\n") + cclog.ComponentDebug("MetricRouter", "DONE") r.wg.Done() break RouterInputLoop case p := <-c: - log.Print("[MetricRouter] FORWARD ", p) + cclog.ComponentDebug("MetricRouter", "FORWARD", p) r.DoAddTags(p) r.DoDelTags(p) if r.config.IntervalStamp { @@ -180,9 +180,8 @@ func (r *metricRouter) Start() { } } } - log.Print("[MetricRouter] EXIT\n") }() - log.Print("[MetricRouter] STARTED\n") + cclog.ComponentDebug("MetricRouter", "STARTED") } func (r *metricRouter) AddInput(input chan lp.CCMetric) { @@ -195,7 +194,7 @@ func (r *metricRouter) AddOutput(output chan lp.CCMetric) { func (r *metricRouter) Close() { r.done <- true - log.Print("[MetricRouter] CLOSE\n") + cclog.ComponentDebug("MetricRouter", "CLOSE") } func New(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) (MetricRouter, error) { diff --git a/metric-collector.go b/metric-collector.go index 494bcbf..b3ad9d0 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -100,6 +100,7 @@ func ReadCli() map[string]string { logfile := flag.String("log", "stderr", "Path for logfile") pidfile := flag.String("pidfile", "/var/run/cc-metric-collector.pid", "Path for PID file") once := flag.Bool("once", false, "Run all collectors only once") + debug := flag.Bool("debug", false, "Activate debug output") flag.Parse() m = make(map[string]string) m["configfile"] = *cfg @@ -110,6 +111,12 @@ func ReadCli() map[string]string { } else { m["once"] = "false" } + if *debug { + m["debug"] = "true" + cclog.SetDebug() + } else { + m["debug"] = "false" + } return m } @@ -219,6 +226,10 @@ func mainFunc() int { // Drop domain part of host name rcfg.Hostname = strings.SplitN(rcfg.Hostname, `.`, 2)[0] // err = CreatePidfile(rcfg.CliArgs["pidfile"]) + + if rcfg.CliArgs["logfile"] != "stderr" { + cclog.SetOutput(rcfg.CliArgs["logfile"]) + } // err = SetLogging(rcfg.CliArgs["logfile"]) // if err != nil { // log.Print("Error setting up logging system to ", rcfg.CliArgs["logfile"], " on ", rcfg.Hostname) diff --git a/receivers/natsReceiver.go b/receivers/natsReceiver.go index 5cbe90d..853edf1 100644 --- a/receivers/natsReceiver.go +++ b/receivers/natsReceiver.go @@ -6,7 +6,7 @@ import ( lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" influx "github.com/influxdata/line-protocol" nats "github.com/nats-io/nats.go" - "log" + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" "time" ) @@ -46,8 +46,8 @@ func (r *NatsReceiver) Init(config ReceiverConfig) error { if len(r.port) == 0 { r.port = "4222" } - log.Print("[NatsReceiver] INIT") uri := fmt.Sprintf("%s:%s", r.addr, r.port) + cclog.ComponentDebug("NatsReceiver", "INIT", uri) nc, err := nats.Connect(uri) if err == nil { r.database = r.config.Database @@ -63,7 +63,7 @@ func (r *NatsReceiver) Init(config ReceiverConfig) error { } func (r *NatsReceiver) Start() { - log.Print("[NatsReceiver] START") + cclog.ComponentDebug("NatsReceiver", "START") r.nc.Subscribe(r.database, r._NatsReceive) } @@ -75,7 +75,6 @@ func (r *NatsReceiver) _NatsReceive(m *nats.Msg) { for k, v := range r.meta { y.AddMeta(k, v) } - //y, err := lp.New(m.Name(), Tags2Map(m), r.meta, Fields2Map(m), m.Time()) if r.sink != nil { r.sink <- y } @@ -85,7 +84,7 @@ func (r *NatsReceiver) _NatsReceive(m *nats.Msg) { func (r *NatsReceiver) Close() { if r.nc != nil { - log.Print("[NatsReceiver] CLOSE") + cclog.ComponentDebug("NatsReceiver", "CLOSE") r.nc.Close() } } diff --git a/receivers/receiveManager.go b/receivers/receiveManager.go index 62f70b3..e6a2eee 100644 --- a/receivers/receiveManager.go +++ b/receivers/receiveManager.go @@ -3,7 +3,7 @@ package receivers import ( "encoding/json" lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" - "log" + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" "os" "sync" ) @@ -36,7 +36,7 @@ func (rm *receiveManager) Init(wg *sync.WaitGroup, receiverConfigFile string) er rm.config = make([]ReceiverConfig, 0) configFile, err := os.Open(receiverConfigFile) if err != nil { - log.Print(err.Error()) + cclog.ComponentError("ReceiveManager", err.Error()) return err } defer configFile.Close() @@ -44,23 +44,11 @@ func (rm *receiveManager) Init(wg *sync.WaitGroup, receiverConfigFile string) er var rawConfigs []json.RawMessage err = jsonParser.Decode(&rawConfigs) if err != nil { - log.Print(err.Error()) + cclog.ComponentError("ReceiveManager", err.Error()) return err } for _, raw := range rawConfigs { - log.Print("[ReceiveManager] ", string(raw)) rm.AddInput(raw) - // if _, found := AvailableReceivers[k.Type]; !found { - // log.Print("[ReceiveManager] SKIP Config specifies unknown receiver 'type': ", k.Type) - // continue - // } - // r := AvailableReceivers[k.Type] - // err = r.Init(k) - // if err != nil { - // log.Print("[ReceiveManager] SKIP Receiver ", k.Type, " cannot be initialized: ", err.Error()) - // continue - // } - // rm.inputs = append(rm.inputs, r) } return nil } @@ -69,60 +57,32 @@ func (rm *receiveManager) Start() { rm.wg.Add(1) for _, r := range rm.inputs { - log.Print("[ReceiveManager] START ", r.Name()) + cclog.ComponentDebug("ReceiveManager", "START", r.Name()) r.Start() } - log.Print("[ReceiveManager] STARTED\n") - // go func() { - // for { - //ReceiveManagerLoop: - // select { - // case <- rm.done: - // log.Print("ReceiveManager done\n") - // rm.wg.Done() - // break ReceiveManagerLoop - // default: - // for _, c := range rm.inputs { - //ReceiveManagerInputLoop: - // select { - // case <- rm.done: - // log.Print("ReceiveManager done\n") - // rm.wg.Done() - // break ReceiveManagerInputLoop - // case p := <- c: - // log.Print("ReceiveManager: ", p) - // rm.output <- p - // default: - // } - // } - // } - // } - // }() - // for _, r := range rm.inputs { - // r.Close() - // } + cclog.ComponentDebug("ReceiveManager", "STARTED") } func (rm *receiveManager) AddInput(rawConfig json.RawMessage) error { var config ReceiverConfig err := json.Unmarshal(rawConfig, &config) if err != nil { - log.Print("[ReceiveManager] SKIP ", config.Type, " JSON config error: ", err.Error()) - log.Print(err.Error()) + cclog.ComponentError("ReceiveManager", "SKIP", config.Type, "JSON config error:", err.Error()) return err } if _, found := AvailableReceivers[config.Type]; !found { - log.Print("[ReceiveManager] SKIP ", config.Type, " unknown receiver: ", err.Error()) + cclog.ComponentError("ReceiveManager", "SKIP", config.Type, "unknown receiver:", err.Error()) return err } r := AvailableReceivers[config.Type] err = r.Init(config) if err != nil { - log.Print("[ReceiveManager] SKIP ", r.Name(), " initialization failed: ", err.Error()) + cclog.ComponentError("ReceiveManager", "SKIP", r.Name(), "initialization failed:", err.Error()) return err } rm.inputs = append(rm.inputs, r) rm.config = append(rm.config, config) + cclog.ComponentDebug("ReceiveManager", "ADD RECEIVER", r.Name()) return nil } @@ -135,12 +95,11 @@ func (rm *receiveManager) AddOutput(output chan lp.CCMetric) { func (rm *receiveManager) Close() { for _, r := range rm.inputs { - log.Print("[ReceiveManager] CLOSE ", r.Name()) + cclog.ComponentDebug("ReceiveManager", "CLOSE", r.Name()) r.Close() } rm.wg.Done() - log.Print("[ReceiveManager] CLOSE\n") - log.Print("[ReceiveManager] EXIT\n") + cclog.ComponentDebug("ReceiveManager", "CLOSE") } func New(wg *sync.WaitGroup, receiverConfigFile string) (ReceiveManager, error) { diff --git a/sinks/sinkManager.go b/sinks/sinkManager.go index beb0f32..b2d60dc 100644 --- a/sinks/sinkManager.go +++ b/sinks/sinkManager.go @@ -2,11 +2,12 @@ package sinks import ( "encoding/json" - "log" +// "log" "os" "sync" lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" ) var AvailableSinks = map[string]Sink{ @@ -41,7 +42,7 @@ func (sm *sinkManager) Init(wg *sync.WaitGroup, sinkConfigFile string) error { if len(sinkConfigFile) > 0 { configFile, err := os.Open(sinkConfigFile) if err != nil { - log.Print("[SinkManager] ", err.Error()) + cclog.ComponentError("SinkManager", err.Error()) return err } defer configFile.Close() @@ -49,7 +50,7 @@ func (sm *sinkManager) Init(wg *sync.WaitGroup, sinkConfigFile string) error { var rawConfigs []json.RawMessage err = jsonParser.Decode(&rawConfigs) if err != nil { - log.Print("[SinkManager] ", err.Error()) + cclog.ComponentError("SinkManager", err.Error()) return err } for _, raw := range rawConfigs { @@ -73,16 +74,16 @@ func (sm *sinkManager) Start() { for _, s := range sm.outputs { s.Close() } - log.Print("[SinkManager] DONE\n") + cclog.ComponentDebug("SinkManager", "DONE") sm.wg.Done() break SinkManagerLoop case p := <-sm.input: - log.Print("[SinkManager] WRITE ", p) + cclog.ComponentDebug("SinkManager", "WRITE", p) for _, s := range sm.outputs { s.Write(p) } if batchcount == 0 { - log.Print("[SinkManager] FLUSH") + cclog.ComponentDebug("SinkManager", "FLUSH") for _, s := range sm.outputs { s.Flush() } @@ -92,9 +93,8 @@ func (sm *sinkManager) Start() { default: } } - log.Print("[SinkManager] EXIT\n") }() - log.Print("[SinkManager] STARTED\n") + cclog.ComponentDebug("SinkManager", "STARTED") } func (sm *sinkManager) AddInput(input chan lp.CCMetric) { @@ -107,28 +107,29 @@ func (sm *sinkManager) AddOutput(rawConfig json.RawMessage) error { if len(rawConfig) > 3 { err = json.Unmarshal(rawConfig, &config) if err != nil { - log.Print("[SinkManager] SKIP ", config.Type, " JSON config error: ", err.Error()) + cclog.ComponentError("SinkManager", "SKIP", config.Type, "JSON config error:", err.Error()) return err } } if _, found := AvailableSinks[config.Type]; !found { - log.Print("[SinkManager] SKIP ", config.Type, " unknown sink: ", err.Error()) + cclog.ComponentError("SinkManager", "SKIP", config.Type, "unknown sink:", err.Error()) return err } s := AvailableSinks[config.Type] err = s.Init(config) if err != nil { - log.Print("[SinkManager] SKIP ", s.Name(), " initialization failed: ", err.Error()) + cclog.ComponentError("SinkManager", "SKIP", s.Name(), "initialization failed:", err.Error()) return err } sm.outputs = append(sm.outputs, s) sm.config = append(sm.config, config) + cclog.ComponentDebug("SinkManager", "ADD SINK", s.Name()) return nil } func (sm *sinkManager) Close() { sm.done <- true - log.Print("[SinkManager] CLOSE") + cclog.ComponentDebug("SinkManager", "CLOSE") } func New(wg *sync.WaitGroup, sinkConfigFile string) (SinkManager, error) { From 7f77cad0565e1242a54b923a439588e04fba340f Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Tue, 25 Jan 2022 17:49:15 +0100 Subject: [PATCH 29/45] Don't wait too long in case of --once --- metric-collector.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric-collector.go b/metric-collector.go index b3ad9d0..25989ed 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -286,7 +286,7 @@ func mainFunc() int { // Wait until one tick has passed. This is a workaround if rcfg.CliArgs["once"] == "true" { - x := 1.8 * float64(rcfg.ConfigFile.Interval) + x := 1.2 * float64(rcfg.ConfigFile.Interval) time.Sleep(time.Duration(int(x)) * time.Second) shutdown(&rcfg) } From 9bd8a3a90b5a0a93a2ba1e542c6a2cfab74659e4 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 26 Jan 2022 11:38:43 +0100 Subject: [PATCH 30/45] Add documentation --- internal/metricRouter/metricRouter.go | 46 ++++++++++++++++++--------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/internal/metricRouter/metricRouter.go b/internal/metricRouter/metricRouter.go index 5fd55ba..dc7703a 100644 --- a/internal/metricRouter/metricRouter.go +++ b/internal/metricRouter/metricRouter.go @@ -2,38 +2,42 @@ package metricRouter import ( "encoding/json" - cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" "os" "sync" "time" + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker" "gopkg.in/Knetic/govaluate.v2" ) +// Metric router tag configuration type metricRouterTagConfig struct { - Key string `json:"key"` - Value string `json:"value"` - Condition string `json:"if"` + Key string `json:"key"` // Tag name + Value string `json:"value"` // Tag value + Condition string `json:"if"` // Condition for adding or removing corresponding tag } +// Metric router configuration type metricRouterConfig struct { - AddTags []metricRouterTagConfig `json:"add_tags"` - DelTags []metricRouterTagConfig `json:"delete_tags"` - IntervalStamp bool `json:"interval_timestamp"` + AddTags []metricRouterTagConfig `json:"add_tags"` // List of tags that are added when the condition is met + DelTags []metricRouterTagConfig `json:"delete_tags"` // List of tags that are removed when the condition is met + IntervalStamp bool `json:"interval_timestamp"` // Update timestamp periodically? } type metricRouter struct { - inputs []chan lp.CCMetric - outputs []chan lp.CCMetric - done chan bool + inputs []chan lp.CCMetric // List of all input channels + outputs []chan lp.CCMetric // List of all output channels + done chan bool // channel to finish stop metric router wg *sync.WaitGroup - timestamp time.Time + timestamp time.Time // timestamp ticker mct.MultiChanTicker config metricRouterConfig } +// MetricRouter access functions type MetricRouter interface { Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) error AddInput(input chan lp.CCMetric) @@ -42,6 +46,12 @@ type MetricRouter interface { Close() } +// Init initializes a metric router by setting up: +// * input and output channels +// * done channel +// * wait group synchronization (from variable wg) +// * ticker (from variable ticker) +// * configuration (read from config file in variable routerConfigFile) func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) error { r.inputs = make([]chan lp.CCMetric, 0) r.outputs = make([]chan lp.CCMetric, 0) @@ -63,25 +73,27 @@ func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, rout return nil } +// StartTimer starts a timer which updates timestamp periodically func (r *metricRouter) StartTimer() { m := make(chan time.Time) r.ticker.AddChannel(m) go func() { for { - select { - case t := <-m: - r.timestamp = t - } + t := <-m + r.timestamp = t } }() } +// EvalCondition evaluates condition Cond for metric data from point func (r *metricRouter) EvalCondition(Cond string, point lp.CCMetric) (bool, error) { expression, err := govaluate.NewEvaluableExpression(Cond) if err != nil { cclog.ComponentDebug("MetricRouter", Cond, " = ", err.Error()) return false, err } + + // Add metric name, tags, meta data, fields and timestamp to the parameter list params := make(map[string]interface{}) params["name"] = point.Name() for _, t := range point.TagList() { @@ -95,6 +107,7 @@ func (r *metricRouter) EvalCondition(Cond string, point lp.CCMetric) (bool, erro } params["timestamp"] = point.Time() + // evaluate condition result, err := expression.Evaluate(params) if err != nil { cclog.ComponentDebug("MetricRouter", Cond, " = ", err.Error()) @@ -103,6 +116,7 @@ func (r *metricRouter) EvalCondition(Cond string, point lp.CCMetric) (bool, erro return bool(result.(bool)), err } +// DoAddTags adds a tag when condition is fullfiled func (r *metricRouter) DoAddTags(point lp.CCMetric) { for _, m := range r.config.AddTags { var conditionMatches bool @@ -123,6 +137,7 @@ func (r *metricRouter) DoAddTags(point lp.CCMetric) { } } +// DoDelTags removes a tag when condition is fullfiled func (r *metricRouter) DoDelTags(point lp.CCMetric) { for _, m := range r.config.DelTags { var conditionMatches bool @@ -143,6 +158,7 @@ func (r *metricRouter) DoDelTags(point lp.CCMetric) { } } +// Start starts the metric router func (r *metricRouter) Start() { r.wg.Add(1) r.timestamp = time.Now() From 3d073080f83cd4f7716b1d8787fff24e9ee98562 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 26 Jan 2022 12:08:40 +0100 Subject: [PATCH 31/45] Add documentation --- internal/metricRouter/metricRouter.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/metricRouter/metricRouter.go b/internal/metricRouter/metricRouter.go index dc7703a..f76c31f 100644 --- a/internal/metricRouter/metricRouter.go +++ b/internal/metricRouter/metricRouter.go @@ -30,7 +30,7 @@ type metricRouterConfig struct { type metricRouter struct { inputs []chan lp.CCMetric // List of all input channels outputs []chan lp.CCMetric // List of all output channels - done chan bool // channel to finish stop metric router + done chan bool // channel to finish / stop metric router wg *sync.WaitGroup timestamp time.Time // timestamp ticker mct.MultiChanTicker @@ -200,19 +200,23 @@ func (r *metricRouter) Start() { cclog.ComponentDebug("MetricRouter", "STARTED") } +// AddInput adds a input channel to the metric router func (r *metricRouter) AddInput(input chan lp.CCMetric) { r.inputs = append(r.inputs, input) } +// AddOutput adds a output channel to the metric router func (r *metricRouter) AddOutput(output chan lp.CCMetric) { r.outputs = append(r.outputs, output) } +// Close finishes / stops the metric router func (r *metricRouter) Close() { r.done <- true cclog.ComponentDebug("MetricRouter", "CLOSE") } +// New creates a new initialized metric router func New(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) (MetricRouter, error) { r := new(metricRouter) err := r.Init(ticker, wg, routerConfigFile) From c193b800830a3d7f1faa375616fcc2b4388d4c15 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 26 Jan 2022 12:31:04 +0100 Subject: [PATCH 32/45] Add documentation --- collectors/collectorManager.go | 51 ++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 88cfdf8..192ef31 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -6,26 +6,27 @@ import ( "sync" "time" + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker" - cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" ) +// Map of all available metric collectors var AvailableCollectors = map[string]MetricCollector{ - "likwid": &LikwidCollector{}, - "loadavg": &LoadavgCollector{}, - "memstat": &MemstatCollector{}, - "netstat": &NetstatCollector{}, - "ibstat": &InfinibandCollector{}, - "lustrestat": &LustreCollector{}, - "cpustat": &CpustatCollector{}, - "topprocs": &TopProcsCollector{}, - "nvidia": &NvidiaCollector{}, - "customcmd": &CustomCmdCollector{}, - "diskstat": &DiskstatCollector{}, - "tempstat": &TempCollector{}, - "ipmistat": &IpmiCollector{}, + "likwid": new(LikwidCollector), + "loadavg": new(LoadavgCollector), + "memstat": new(MemstatCollector), + "netstat": new(NetstatCollector), + "ibstat": new(InfinibandCollector), + "lustrestat": new(LustreCollector), + "cpustat": new(CpustatCollector), + "topprocs": new(TopProcsCollector), + "nvidia": new(NvidiaCollector), + "customcmd": new(CustomCmdCollector), + "diskstat": new(DiskstatCollector), + "tempstat": new(TempCollector), + "ipmistat": new(IpmiCollector), "gpfs": new(GpfsCollector), "cpufreq": new(CPUFreqCollector), "cpufreq_cpuinfo": new(CPUFreqCpuInfoCollector), @@ -34,14 +35,15 @@ var AvailableCollectors = map[string]MetricCollector{ type collectorManager struct { collectors []MetricCollector - output chan lp.CCMetric - done chan bool + output chan lp.CCMetric // List of all output channels + done chan bool // channel to finish / stop metric collector manager ticker mct.MultiChanTicker duration time.Duration wg *sync.WaitGroup config map[string]json.RawMessage } +// Metric collector access functions type CollectorManager interface { Init(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) error AddOutput(output chan lp.CCMetric) @@ -49,6 +51,13 @@ type CollectorManager interface { Close() } +// Init initializes a new metric collector manager by setting up: +// * output channels +// * done channel +// * wait group synchronization (from variable wg) +// * ticker (from variable ticker) +// * configuration (read from config file in variable collectConfigFile) +// Initialization is done for all configured collectors func (cm *collectorManager) Init(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) error { cm.collectors = make([]MetricCollector, 0) cm.output = nil @@ -56,6 +65,8 @@ func (cm *collectorManager) Init(ticker mct.MultiChanTicker, duration time.Durat cm.wg = wg cm.ticker = ticker cm.duration = duration + + // Read collector config file configFile, err := os.Open(collectConfigFile) if err != nil { cclog.Error(err.Error()) @@ -68,6 +79,8 @@ func (cm *collectorManager) Init(ticker mct.MultiChanTicker, duration time.Durat cclog.Error(err.Error()) return err } + + // Initialize configured collectors for k, cfg := range cm.config { if _, found := AvailableCollectors[k]; !found { cclog.ComponentError("CollectorManager", "SKIP unknown collector", k) @@ -86,6 +99,7 @@ func (cm *collectorManager) Init(ticker mct.MultiChanTicker, duration time.Durat return nil } +// Start starts the metric collector manager func (cm *collectorManager) Start() { cm.wg.Add(1) tick := make(chan time.Time) @@ -113,7 +127,7 @@ func (cm *collectorManager) Start() { cclog.ComponentDebug("CollectorManager", "DONE") break CollectorManagerInputLoop default: - cclog.ComponentDebug("CollectorManager", c.Name(), t) + cclog.ComponentDebug("CollectorManager", c.Name(), t) c.Read(cm.duration, cm.output) } } @@ -123,15 +137,18 @@ func (cm *collectorManager) Start() { cclog.ComponentDebug("CollectorManager", "STARTED") } +// AddOutput adds the output channel to the metric collector manager func (cm *collectorManager) AddOutput(output chan lp.CCMetric) { cm.output = output } +// Close finishes / stops the metric collector manager func (cm *collectorManager) Close() { cm.done <- true cclog.ComponentDebug("CollectorManager", "CLOSE") } +// New creates a new initialized metric collector manager func New(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) (CollectorManager, error) { cm := &collectorManager{} err := cm.Init(ticker, duration, wg, collectConfigFile) From 09b753847982ceafc02c75cebd78163cb31ceb08 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 26 Jan 2022 15:54:49 +0100 Subject: [PATCH 33/45] Avoid labels in collector manager loop --- collectors/collectorManager.go | 34 ++++++++++++++++++++-------------- collectors/memstatMetric.go | 3 ++- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 192ef31..4aae041 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -104,29 +104,33 @@ func (cm *collectorManager) Start() { cm.wg.Add(1) tick := make(chan time.Time) cm.ticker.AddChannel(tick) + go func() { + // Collector manager is done + done := func() { + // close all metric collectors + for _, c := range cm.collectors { + c.Close() + } + cm.wg.Done() + cclog.ComponentDebug("CollectorManager", "DONE") + } + + // Wait for done signal or timer event for { - CollectorManagerLoop: select { case <-cm.done: - for _, c := range cm.collectors { - c.Close() - } - cm.wg.Done() - cclog.ComponentDebug("CollectorManager", "DONE") - break CollectorManagerLoop + done() + return case t := <-tick: for _, c := range cm.collectors { - CollectorManagerInputLoop: + // Wait for done signal or execute the collector select { case <-cm.done: - for _, c := range cm.collectors { - c.Close() - } - cm.wg.Done() - cclog.ComponentDebug("CollectorManager", "DONE") - break CollectorManagerInputLoop + done() + return default: + // Read metrics from collector c cclog.ComponentDebug("CollectorManager", c.Name(), t) c.Read(cm.duration, cm.output) } @@ -134,6 +138,8 @@ func (cm *collectorManager) Start() { } } }() + + // Collector manager is started cclog.ComponentDebug("CollectorManager", "STARTED") } diff --git a/collectors/memstatMetric.go b/collectors/memstatMetric.go index c83402c..b6ef855 100644 --- a/collectors/memstatMetric.go +++ b/collectors/memstatMetric.go @@ -9,6 +9,7 @@ import ( "strconv" "strings" "time" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) @@ -93,7 +94,7 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) for match, name := range m.matches { if _, exists := m.stats[match]; !exists { - err = errors.New(fmt.Sprintf("Parse error for %s : %s", match, name)) + err = fmt.Errorf("Parse error for %s : %s", match, name) log.Print(err) continue } From babd7a9af82dab8071204e3175188503780eec73 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Wed, 26 Jan 2022 16:52:56 +0100 Subject: [PATCH 34/45] Use non-blocking send at close --- collectors/collectorManager.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 4aae041..6140dbf 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -150,7 +150,10 @@ func (cm *collectorManager) AddOutput(output chan lp.CCMetric) { // Close finishes / stops the metric collector manager func (cm *collectorManager) Close() { - cm.done <- true + select { + case cm.done <- true: + default: + } cclog.ComponentDebug("CollectorManager", "CLOSE") } From 3fd77e6887cf368799f07a1cd6f35941a6e55d55 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Wed, 26 Jan 2022 16:54:51 +0100 Subject: [PATCH 35/45] Use non-blocking send at close, use common done function and remove default case --- sinks/sinkManager.go | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/sinks/sinkManager.go b/sinks/sinkManager.go index b2d60dc..efcb5a0 100644 --- a/sinks/sinkManager.go +++ b/sinks/sinkManager.go @@ -2,12 +2,11 @@ package sinks import ( "encoding/json" -// "log" "os" "sync" - lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) var AvailableSinks = map[string]Sink{ @@ -67,16 +66,18 @@ func (sm *sinkManager) Start() { sm.wg.Add(1) batchcount := 20 go func() { + done := func() { + for _, s := range sm.outputs { + s.Close() + } + cclog.ComponentDebug("SinkManager", "DONE") + sm.wg.Done() + } for { - SinkManagerLoop: select { case <-sm.done: - for _, s := range sm.outputs { - s.Close() - } - cclog.ComponentDebug("SinkManager", "DONE") - sm.wg.Done() - break SinkManagerLoop + done() + return case p := <-sm.input: cclog.ComponentDebug("SinkManager", "WRITE", p) for _, s := range sm.outputs { @@ -90,7 +91,6 @@ func (sm *sinkManager) Start() { batchcount = 20 } batchcount-- - default: } } }() @@ -128,7 +128,10 @@ func (sm *sinkManager) AddOutput(rawConfig json.RawMessage) error { } func (sm *sinkManager) Close() { - sm.done <- true + select { + case sm.done <- true: + default: + } cclog.ComponentDebug("SinkManager", "CLOSE") } From 5600cf1f5f27f5781c76032fe94db3acd30f7fa2 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Wed, 26 Jan 2022 17:08:53 +0100 Subject: [PATCH 36/45] Use two separate inputs for metric router to simplify management. Activate --logfile option and close MultiChanTicker explicitly --- internal/metricRouter/metricRouter.go | 99 +++++++++++++-------- internal/multiChanTicker/multiChanTicker.go | 23 ++++- metric-collector.go | 8 +- 3 files changed, 87 insertions(+), 43 deletions(-) diff --git a/internal/metricRouter/metricRouter.go b/internal/metricRouter/metricRouter.go index f76c31f..57ba708 100644 --- a/internal/metricRouter/metricRouter.go +++ b/internal/metricRouter/metricRouter.go @@ -28,19 +28,22 @@ type metricRouterConfig struct { } type metricRouter struct { - inputs []chan lp.CCMetric // List of all input channels - outputs []chan lp.CCMetric // List of all output channels - done chan bool // channel to finish / stop metric router - wg *sync.WaitGroup - timestamp time.Time // timestamp - ticker mct.MultiChanTicker - config metricRouterConfig + coll_input chan lp.CCMetric // Input channel from CollectorManager + recv_input chan lp.CCMetric // Input channel from ReceiveManager + outputs []chan lp.CCMetric // List of all output channels + done chan bool // channel to finish / stop metric router + wg *sync.WaitGroup + timestamp time.Time // timestamp + timerdone chan bool // channel to finish / stop timestamp updater + ticker mct.MultiChanTicker + config metricRouterConfig } // MetricRouter access functions type MetricRouter interface { Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) error - AddInput(input chan lp.CCMetric) + AddCollectorInput(input chan lp.CCMetric) + AddReceiverInput(input chan lp.CCMetric) AddOutput(output chan lp.CCMetric) Start() Close() @@ -53,7 +56,6 @@ type MetricRouter interface { // * ticker (from variable ticker) // * configuration (read from config file in variable routerConfigFile) func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) error { - r.inputs = make([]chan lp.CCMetric, 0) r.outputs = make([]chan lp.CCMetric, 0) r.done = make(chan bool) r.wg = wg @@ -77,12 +79,19 @@ func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, rout func (r *metricRouter) StartTimer() { m := make(chan time.Time) r.ticker.AddChannel(m) + r.timerdone = make(chan bool) go func() { for { - t := <-m - r.timestamp = t + select { + case <-r.timerdone: + cclog.ComponentDebug("MetricRouter", "TIMER DONE") + return + case t := <-m: + r.timestamp = t + } } }() + cclog.ComponentDebug("MetricRouter", "TIMER START") } // EvalCondition evaluates condition Cond for metric data from point @@ -165,35 +174,35 @@ func (r *metricRouter) Start() { if r.config.IntervalStamp { r.StartTimer() } + done := func() { + r.wg.Done() + cclog.ComponentDebug("MetricRouter", "DONE") + } + forward := func(point lp.CCMetric) { + cclog.ComponentDebug("MetricRouter", "FORWARD", point) + r.DoAddTags(point) + r.DoDelTags(point) + for _, o := range r.outputs { + o <- point + } + } go func() { for { - RouterLoop: + // RouterLoop: select { case <-r.done: - cclog.ComponentDebug("MetricRouter", "DONE") - r.wg.Done() - break RouterLoop - default: - for _, c := range r.inputs { - RouterInputLoop: - select { - case <-r.done: - cclog.ComponentDebug("MetricRouter", "DONE") - r.wg.Done() - break RouterInputLoop - case p := <-c: - cclog.ComponentDebug("MetricRouter", "FORWARD", p) - r.DoAddTags(p) - r.DoDelTags(p) - if r.config.IntervalStamp { - p.SetTime(r.timestamp) - } - for _, o := range r.outputs { - o <- p - } - default: - } + done() + return + case p := <-r.coll_input: + if r.config.IntervalStamp { + p.SetTime(r.timestamp) } + forward(p) + case p := <-r.recv_input: + if r.config.IntervalStamp { + p.SetTime(r.timestamp) + } + forward(p) } } }() @@ -201,8 +210,12 @@ func (r *metricRouter) Start() { } // AddInput adds a input channel to the metric router -func (r *metricRouter) AddInput(input chan lp.CCMetric) { - r.inputs = append(r.inputs, input) +func (r *metricRouter) AddCollectorInput(input chan lp.CCMetric) { + r.coll_input = input +} + +func (r *metricRouter) AddReceiverInput(input chan lp.CCMetric) { + r.recv_input = input } // AddOutput adds a output channel to the metric router @@ -212,7 +225,17 @@ func (r *metricRouter) AddOutput(output chan lp.CCMetric) { // Close finishes / stops the metric router func (r *metricRouter) Close() { - r.done <- true + select { + case r.done <- true: + default: + } + if r.config.IntervalStamp { + cclog.ComponentDebug("MetricRouter", "TIMER CLOSE") + select { + case r.timerdone <- true: + default: + } + } cclog.ComponentDebug("MetricRouter", "CLOSE") } diff --git a/internal/multiChanTicker/multiChanTicker.go b/internal/multiChanTicker/multiChanTicker.go index f8139fa..37778ad 100644 --- a/internal/multiChanTicker/multiChanTicker.go +++ b/internal/multiChanTicker/multiChanTicker.go @@ -1,27 +1,43 @@ package multiChanTicker import ( + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" "time" ) type multiChanTicker struct { ticker *time.Ticker channels []chan time.Time + done chan bool } type MultiChanTicker interface { Init(duration time.Duration) AddChannel(chan time.Time) + Close() } func (t *multiChanTicker) Init(duration time.Duration) { t.ticker = time.NewTicker(duration) + t.done = make(chan bool) go func() { + done := func() { + cclog.ComponentDebug("MultiChanTicker", "DONE") + } for { select { + case <-t.done: + done() + return case ts := <-t.ticker.C: + cclog.ComponentDebug("MultiChanTicker", "Tick", ts) for _, c := range t.channels { - c <- ts + select { + case <-t.done: + done() + return + case c <- ts: + } } } } @@ -32,6 +48,11 @@ func (t *multiChanTicker) AddChannel(channel chan time.Time) { t.channels = append(t.channels, channel) } +func (t *multiChanTicker) Close() { + t.done <- true + cclog.ComponentDebug("MultiChanTicker", "CLOSE") +} + func NewTicker(duration time.Duration) MultiChanTicker { t := &multiChanTicker{} t.Init(duration) diff --git a/metric-collector.go b/metric-collector.go index 25989ed..6a6c1b3 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -3,7 +3,6 @@ package main import ( "encoding/json" "flag" -// "log" "os" "os/signal" "strings" @@ -158,6 +157,7 @@ func ReadCli() map[string]string { // General shutdown function that gets executed in case of interrupt or graceful shutdown func shutdown(config *RuntimeConfig) { cclog.Info("Shutdown...") + config.Ticker.Close() if config.CollectManager != nil { cclog.Debug("Shutdown CollectManager...") config.CollectManager.Close() @@ -228,7 +228,7 @@ func mainFunc() int { // err = CreatePidfile(rcfg.CliArgs["pidfile"]) if rcfg.CliArgs["logfile"] != "stderr" { - cclog.SetOutput(rcfg.CliArgs["logfile"]) + cclog.SetOutput(rcfg.CliArgs["logfile"]) } // err = SetLogging(rcfg.CliArgs["logfile"]) // if err != nil { @@ -261,7 +261,7 @@ func mainFunc() int { } CollectToRouterChannel := make(chan lp.CCMetric) rcfg.CollectManager.AddOutput(CollectToRouterChannel) - rcfg.Router.AddInput(CollectToRouterChannel) + rcfg.Router.AddCollectorInput(CollectToRouterChannel) } if len(rcfg.ConfigFile.ReceiverConfigFile) > 0 { rcfg.ReceiveManager, err = receivers.New(&rcfg.Sync, rcfg.ConfigFile.ReceiverConfigFile) @@ -271,7 +271,7 @@ func mainFunc() int { } ReceiveToRouterChannel := make(chan lp.CCMetric) rcfg.ReceiveManager.AddOutput(ReceiveToRouterChannel) - rcfg.Router.AddInput(ReceiveToRouterChannel) + rcfg.Router.AddReceiverInput(ReceiveToRouterChannel) use_recv = true } prepare_shutdown(&rcfg) From 0a383a3789898b36b53d341769e32aa66a4e869b Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Wed, 26 Jan 2022 17:09:20 +0100 Subject: [PATCH 37/45] Update CCLogger --- internal/ccLogger/cclogger.go | 160 +++++++++++++++++----------------- 1 file changed, 79 insertions(+), 81 deletions(-) diff --git a/internal/ccLogger/cclogger.go b/internal/ccLogger/cclogger.go index ee92376..38e7e6b 100644 --- a/internal/ccLogger/cclogger.go +++ b/internal/ccLogger/cclogger.go @@ -2,114 +2,112 @@ package cclogger import ( "fmt" - "runtime" - "os" "log" + "os" + "runtime" ) - var ( - globalDebug = false - stdout = os.Stdout - stderr = os.Stderr - debugLog *log.Logger = nil - infoLog *log.Logger = nil - errorLog *log.Logger = nil - warnLog *log.Logger = nil - defaultLog *log.Logger = nil + globalDebug = false + stdout = os.Stdout + stderr = os.Stderr + debugLog *log.Logger = nil + infoLog *log.Logger = nil + errorLog *log.Logger = nil + warnLog *log.Logger = nil + defaultLog *log.Logger = nil ) func initLogger() { - if debugLog == nil { - debugLog = log.New(stderr, "DEBUG ", log.LstdFlags) - } - if infoLog == nil { - infoLog = log.New(stdout, "INFO ", log.LstdFlags) - } - if errorLog == nil { - errorLog = log.New(stderr, "ERROR ", log.LstdFlags) - } - if warnLog == nil { - warnLog = log.New(stderr, "WARN ", log.LstdFlags) - } - if defaultLog == nil { - defaultLog = log.New(stdout, "", log.LstdFlags) - } + if debugLog == nil { + debugLog = log.New(stderr, "DEBUG ", log.LstdFlags) + } + if infoLog == nil { + infoLog = log.New(stdout, "INFO ", log.LstdFlags) + } + if errorLog == nil { + errorLog = log.New(stderr, "ERROR ", log.LstdFlags) + } + if warnLog == nil { + warnLog = log.New(stderr, "WARN ", log.LstdFlags) + } + if defaultLog == nil { + defaultLog = log.New(stdout, "", log.LstdFlags) + } } -func Print(e ... interface{}) { - initLogger() - defaultLog.Print(e) +func Print(e ...interface{}) { + initLogger() + defaultLog.Print(e) } -func ComponentPrint(component string, e ... interface{}) { - initLogger() - defaultLog.Print(fmt.Sprintf("[%s] ", component), e) +func ComponentPrint(component string, e ...interface{}) { + initLogger() + defaultLog.Print(fmt.Sprintf("[%s] ", component), e) } -func Info(e ... interface{}) { - initLogger() - infoLog.Print(e) +func Info(e ...interface{}) { + initLogger() + infoLog.Print(e) } -func ComponentInfo(component string, e ... interface{}) { - initLogger() - infoLog.Print(fmt.Sprintf("[%s] ", component), e) +func ComponentInfo(component string, e ...interface{}) { + initLogger() + infoLog.Print(fmt.Sprintf("[%s] ", component), e) } -func Debug(e ... interface{}) { - initLogger() - if globalDebug == true { - debugLog.Print(e) - } +func Debug(e ...interface{}) { + initLogger() + if globalDebug == true { + debugLog.Print(e) + } } -func ComponentDebug(component string, e ... interface{}) { - initLogger() - if globalDebug == true && debugLog != nil { - //CCComponentPrint(debugLog, component, e) - debugLog.Print(fmt.Sprintf("[%s] ", component), e) - } +func ComponentDebug(component string, e ...interface{}) { + initLogger() + if globalDebug == true && debugLog != nil { + //CCComponentPrint(debugLog, component, e) + debugLog.Print(fmt.Sprintf("[%s] ", component), e) + } } -func Error(e ... interface{}) { - initLogger() - _, fn, line, _ := runtime.Caller(1) - errorLog.Print(fmt.Sprintf("[%s:%d] ", fn, line), e) +func Error(e ...interface{}) { + initLogger() + _, fn, line, _ := runtime.Caller(1) + errorLog.Print(fmt.Sprintf("[%s:%d] ", fn, line), e) } -func ComponentError(component string, e ... interface{}) { - initLogger() - _, fn, line, _ := runtime.Caller(1) - errorLog.Print(fmt.Sprintf("[%s|%s:%d] ", component, fn, line), e) +func ComponentError(component string, e ...interface{}) { + initLogger() + _, fn, line, _ := runtime.Caller(1) + errorLog.Print(fmt.Sprintf("[%s|%s:%d] ", component, fn, line), e) } func SetDebug() { - globalDebug = true - initLogger() + globalDebug = true + initLogger() } - func SetOutput(filename string) { - if filename == "stderr" { - if stderr != os.Stderr && stderr != os.Stdout { - stderr.Close() - } - stderr = os.Stderr - } else if filename == "stdout" { - if stderr != os.Stderr && stderr != os.Stdout { - stderr.Close() - } - stderr = os.Stdout - } else { - file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) - if err == nil { - defer file.Close() - stderr = file - } - } - debugLog = nil - errorLog = nil - warnLog = nil - initLogger() + if filename == "stderr" { + if stderr != os.Stderr && stderr != os.Stdout { + stderr.Close() + } + stderr = os.Stderr + } else if filename == "stdout" { + if stderr != os.Stderr && stderr != os.Stdout { + stderr.Close() + } + stderr = os.Stdout + } else { + file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) + if err == nil { + defer file.Close() + stderr = file + } + } + debugLog = nil + errorLog = nil + warnLog = nil + initLogger() } From 78834337b0343a0d31c19390be83267b6a516036 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Wed, 26 Jan 2022 18:37:59 +0100 Subject: [PATCH 38/45] Fix for documentation --- collectors/cpustatMetric.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectors/cpustatMetric.md b/collectors/cpustatMetric.md index 604445a..8122afe 100644 --- a/collectors/cpustatMetric.md +++ b/collectors/cpustatMetric.md @@ -1,7 +1,7 @@ ## `cpustat` collector ```json - "netstat": { + "cpustat": { "exclude_metrics": [ "cpu_idle" ] From 86e9b55bc979dae760f88e75afd7e9dee76c6a95 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Wed, 26 Jan 2022 18:41:25 +0100 Subject: [PATCH 39/45] Fix for documentation --- collectors/nvidiaMetric.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectors/nvidiaMetric.md b/collectors/nvidiaMetric.md index c774139..e2e08e5 100644 --- a/collectors/nvidiaMetric.md +++ b/collectors/nvidiaMetric.md @@ -2,7 +2,7 @@ ## `nvidia` collector ```json - "lustrestat": { + "nvidia": { "exclude_devices" : [ "0","1" ], From 76884c3380aaf1b1b5e83964c3d2e9224a0052e8 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Wed, 26 Jan 2022 18:45:23 +0100 Subject: [PATCH 40/45] Prefix Nvidia metrics with 'nv_' --- collectors/nvidiaMetric.go | 88 +++++++++++++++++++------------------- collectors/nvidiaMetric.md | 44 +++++++++---------- 2 files changed, 66 insertions(+), 66 deletions(-) diff --git a/collectors/nvidiaMetric.go b/collectors/nvidiaMetric.go index 6f5141a..1eff3be 100644 --- a/collectors/nvidiaMetric.go +++ b/collectors/nvidiaMetric.go @@ -73,13 +73,13 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) util, ret := nvml.DeviceGetUtilizationRates(device) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "util") - y, err := lp.New("util", tags, m.meta, map[string]interface{}{"value": float64(util.Gpu)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_util") + y, err := lp.New("nv_util", tags, m.meta, map[string]interface{}{"value": float64(util.Gpu)}, time.Now()) if err == nil && !skip { output <- y } - _, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_util") - y, err = lp.New("mem_util", tags, m.meta, map[string]interface{}{"value": float64(util.Memory)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_mem_util") + y, err = lp.New("nv_mem_util", tags, m.meta, map[string]interface{}{"value": float64(util.Memory)}, time.Now()) if err == nil && !skip { output <- y } @@ -88,15 +88,15 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) meminfo, ret := nvml.DeviceGetMemoryInfo(device) if ret == nvml.SUCCESS { t := float64(meminfo.Total) / (1024 * 1024) - _, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_total") - y, err := lp.New("mem_total", tags, m.meta, map[string]interface{}{"value": t}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_mem_total") + y, err := lp.New("nv_mem_total", tags, m.meta, map[string]interface{}{"value": t}, time.Now()) if err == nil && !skip { y.AddMeta("unit", "MByte") output <- y } f := float64(meminfo.Used) / (1024 * 1024) - _, skip = stringArrayContains(m.config.ExcludeMetrics, "fb_memory") - y, err = lp.New("fb_memory", tags, m.meta, map[string]interface{}{"value": f}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_fb_memory") + y, err = lp.New("nv_fb_memory", tags, m.meta, map[string]interface{}{"value": f}, time.Now()) if err == nil && !skip { y.AddMeta("unit", "MByte") output <- y @@ -105,8 +105,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) temp, ret := nvml.DeviceGetTemperature(device, nvml.TEMPERATURE_GPU) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "temp") - y, err := lp.New("temp", tags, m.meta, map[string]interface{}{"value": float64(temp)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_temp") + y, err := lp.New("nv_temp", tags, m.meta, map[string]interface{}{"value": float64(temp)}, time.Now()) if err == nil && !skip { y.AddMeta("unit", "degC") output <- y @@ -115,8 +115,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) fan, ret := nvml.DeviceGetFanSpeed(device) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "fan") - y, err := lp.New("fan", tags, m.meta, map[string]interface{}{"value": float64(fan)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_fan") + y, err := lp.New("nv_fan", tags, m.meta, map[string]interface{}{"value": float64(fan)}, time.Now()) if err == nil && !skip { output <- y } @@ -128,19 +128,19 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) var err error switch ecc_pend { case nvml.FEATURE_DISABLED: - y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("OFF")}, time.Now()) + y, err = lp.New("nv_ecc_mode", tags, m.meta, map[string]interface{}{"value": string("OFF")}, time.Now()) case nvml.FEATURE_ENABLED: - y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("ON")}, time.Now()) + y, err = lp.New("nv_ecc_mode", tags, m.meta, map[string]interface{}{"value": string("ON")}, time.Now()) default: - y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("UNKNOWN")}, time.Now()) + y, err = lp.New("nv_ecc_mode", tags, m.meta, map[string]interface{}{"value": string("UNKNOWN")}, time.Now()) } - _, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_mode") + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_ecc_mode") if err == nil && !skip { output <- y } } else if ret == nvml.ERROR_NOT_SUPPORTED { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_mode") - y, err := lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("N/A")}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_ecc_mode") + y, err := lp.New("nv_ecc_mode", tags, m.meta, map[string]interface{}{"value": string("N/A")}, time.Now()) if err == nil && !skip { output <- y } @@ -148,8 +148,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) pstate, ret := nvml.DeviceGetPerformanceState(device) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "perf_state") - y, err := lp.New("perf_state", tags, m.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pstate))}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_perf_state") + y, err := lp.New("nv_perf_state", tags, m.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pstate))}, time.Now()) if err == nil && !skip { output <- y } @@ -157,8 +157,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) power, ret := nvml.DeviceGetPowerUsage(device) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "power_usage_report") - y, err := lp.New("power_usage_report", tags, m.meta, map[string]interface{}{"value": float64(power) / 1000}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_power_usage_report") + y, err := lp.New("nv_power_usage_report", tags, m.meta, map[string]interface{}{"value": float64(power) / 1000}, time.Now()) if err == nil && !skip { output <- y } @@ -166,8 +166,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) gclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_GRAPHICS) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "graphics_clock_report") - y, err := lp.New("graphics_clock_report", tags, m.meta, map[string]interface{}{"value": float64(gclk)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_graphics_clock_report") + y, err := lp.New("nv_graphics_clock_report", tags, m.meta, map[string]interface{}{"value": float64(gclk)}, time.Now()) if err == nil && !skip { output <- y } @@ -175,8 +175,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) smclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_SM) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "sm_clock_report") - y, err := lp.New("sm_clock_report", tags, m.meta, map[string]interface{}{"value": float64(smclk)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_sm_clock_report") + y, err := lp.New("nv_sm_clock_report", tags, m.meta, map[string]interface{}{"value": float64(smclk)}, time.Now()) if err == nil && !skip { output <- y } @@ -184,8 +184,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) memclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_MEM) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_clock_report") - y, err := lp.New("mem_clock_report", tags, m.meta, map[string]interface{}{"value": float64(memclk)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_mem_clock_report") + y, err := lp.New("nv_mem_clock_report", tags, m.meta, map[string]interface{}{"value": float64(memclk)}, time.Now()) if err == nil && !skip { output <- y } @@ -193,8 +193,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) max_gclk, ret := nvml.DeviceGetMaxClockInfo(device, nvml.CLOCK_GRAPHICS) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "max_graphics_clock") - y, err := lp.New("max_graphics_clock", tags, m.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_max_graphics_clock") + y, err := lp.New("nv_max_graphics_clock", tags, m.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now()) if err == nil && !skip { output <- y } @@ -202,8 +202,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) max_smclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_SM) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "max_sm_clock") - y, err := lp.New("max_sm_clock", tags, m.meta, map[string]interface{}{"value": float64(max_smclk)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_max_sm_clock") + y, err := lp.New("nv_max_sm_clock", tags, m.meta, map[string]interface{}{"value": float64(max_smclk)}, time.Now()) if err == nil && !skip { output <- y } @@ -211,8 +211,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) max_memclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_MEM) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "max_mem_clock") - y, err := lp.New("max_mem_clock", tags, m.meta, map[string]interface{}{"value": float64(max_memclk)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_max_mem_clock") + y, err := lp.New("nv_max_mem_clock", tags, m.meta, map[string]interface{}{"value": float64(max_memclk)}, time.Now()) if err == nil && !skip { output <- y } @@ -220,8 +220,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) ecc_db, ret := nvml.DeviceGetTotalEccErrors(device, 1, 1) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_db_error") - y, err := lp.New("ecc_db_error", tags, m.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_ecc_db_error") + y, err := lp.New("nv_ecc_db_error", tags, m.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now()) if err == nil && !skip { output <- y } @@ -229,8 +229,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) ecc_sb, ret := nvml.DeviceGetTotalEccErrors(device, 0, 1) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_sb_error") - y, err := lp.New("ecc_sb_error", tags, m.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_ecc_sb_error") + y, err := lp.New("nv_ecc_sb_error", tags, m.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now()) if err == nil && !skip { output <- y } @@ -238,8 +238,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) pwr_limit, ret := nvml.DeviceGetPowerManagementLimit(device) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "power_man_limit") - y, err := lp.New("power_man_limit", tags, m.meta, map[string]interface{}{"value": float64(pwr_limit)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_power_man_limit") + y, err := lp.New("nv_power_man_limit", tags, m.meta, map[string]interface{}{"value": float64(pwr_limit)}, time.Now()) if err == nil && !skip { output <- y } @@ -247,8 +247,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) enc_util, _, ret := nvml.DeviceGetEncoderUtilization(device) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "encoder_util") - y, err := lp.New("encoder_util", tags, m.meta, map[string]interface{}{"value": float64(enc_util)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_encoder_util") + y, err := lp.New("nv_encoder_util", tags, m.meta, map[string]interface{}{"value": float64(enc_util)}, time.Now()) if err == nil && !skip { output <- y } @@ -256,8 +256,8 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) dec_util, _, ret := nvml.DeviceGetDecoderUtilization(device) if ret == nvml.SUCCESS { - _, skip = stringArrayContains(m.config.ExcludeMetrics, "decoder_util") - y, err := lp.New("decoder_util", tags, m.meta, map[string]interface{}{"value": float64(dec_util)}, time.Now()) + _, skip = stringArrayContains(m.config.ExcludeMetrics, "nv_decoder_util") + y, err := lp.New("nv_decoder_util", tags, m.meta, map[string]interface{}{"value": float64(dec_util)}, time.Now()) if err == nil && !skip { output <- y } diff --git a/collectors/nvidiaMetric.md b/collectors/nvidiaMetric.md index e2e08e5..afe8b9e 100644 --- a/collectors/nvidiaMetric.md +++ b/collectors/nvidiaMetric.md @@ -7,33 +7,33 @@ "0","1" ], "exclude_metrics": [ - "fb_memory", - "fan" + "nv_fb_memory", + "nv_fan" ] } ``` Metrics: -* `util` -* `mem_util` -* `mem_total` -* `fb_memory` -* `temp` -* `fan` -* `ecc_mode` -* `perf_state` -* `power_usage_report` -* `graphics_clock_report` -* `sm_clock_report` -* `mem_clock_report` -* `max_graphics_clock` -* `max_sm_clock` -* `max_mem_clock` -* `ecc_db_error` -* `ecc_sb_error` -* `power_man_limit` -* `encoder_util` -* `decoder_util` +* `nv_util` +* `nv_mem_util` +* `nv_mem_total` +* `nv_fb_memory` +* `nv_temp` +* `nv_fan` +* `nv_ecc_mode` +* `nv_perf_state` +* `nv_power_usage_report` +* `nv_graphics_clock_report` +* `nv_sm_clock_report` +* `nv_mem_clock_report` +* `nv_max_graphics_clock` +* `nv_max_sm_clock` +* `nv_max_mem_clock` +* `nv_ecc_db_error` +* `nv_ecc_sb_error` +* `nv_power_man_limit` +* `nv_encoder_util` +* `nv_decoder_util` It uses a separate `type` in the metrics. The output metric looks like this: `,type=accelerator,type-id= value= ` From 7077452a5df2c3f42c4b714ca01afdcbcbd9b3b2 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Wed, 26 Jan 2022 20:18:47 +0100 Subject: [PATCH 41/45] Split InfiniBand metric collector, one using /sys filesystem reads and one using perfquery. --- collectors/collectorManager.go | 35 ++-- collectors/infinibandMetric.go | 243 ++++++----------------- collectors/infinibandPerfQueryMetric.go | 250 ++++++++++++++++++++++++ 3 files changed, 332 insertions(+), 196 deletions(-) create mode 100644 collectors/infinibandPerfQueryMetric.go diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 6140dbf..98b6115 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -14,23 +14,24 @@ import ( // Map of all available metric collectors var AvailableCollectors = map[string]MetricCollector{ - "likwid": new(LikwidCollector), - "loadavg": new(LoadavgCollector), - "memstat": new(MemstatCollector), - "netstat": new(NetstatCollector), - "ibstat": new(InfinibandCollector), - "lustrestat": new(LustreCollector), - "cpustat": new(CpustatCollector), - "topprocs": new(TopProcsCollector), - "nvidia": new(NvidiaCollector), - "customcmd": new(CustomCmdCollector), - "diskstat": new(DiskstatCollector), - "tempstat": new(TempCollector), - "ipmistat": new(IpmiCollector), - "gpfs": new(GpfsCollector), - "cpufreq": new(CPUFreqCollector), - "cpufreq_cpuinfo": new(CPUFreqCpuInfoCollector), - "nfsstat": new(NfsCollector), + "likwid": new(LikwidCollector), + "loadavg": new(LoadavgCollector), + "memstat": new(MemstatCollector), + "netstat": new(NetstatCollector), + "ibstat": new(InfinibandCollector), + "ibstat_perfquery": new(InfinibandPerfQueryCollector), + "lustrestat": new(LustreCollector), + "cpustat": new(CpustatCollector), + "topprocs": new(TopProcsCollector), + "nvidia": new(NvidiaCollector), + "customcmd": new(CustomCmdCollector), + "diskstat": new(DiskstatCollector), + "tempstat": new(TempCollector), + "ipmistat": new(IpmiCollector), + "gpfs": new(GpfsCollector), + "cpufreq": new(CPUFreqCollector), + "cpufreq_cpuinfo": new(CPUFreqCpuInfoCollector), + "nfsstat": new(NfsCollector), } type collectorManager struct { diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index af4e579..f506f37 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -3,10 +3,9 @@ package collectors import ( "fmt" "io/ioutil" - "log" - "os/exec" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" - // "os" + "encoding/json" "errors" "path/filepath" @@ -15,35 +14,25 @@ import ( "time" ) -const ( - IBBASEPATH = `/sys/class/infiniband/` - PERFQUERY = `/usr/sbin/perfquery` -) - -type InfinibandCollectorConfig struct { - ExcludeDevices []string `json:"exclude_devices,omitempty"` - PerfQueryPath string `json:"perfquery_path"` -} +const IB_BASEPATH = `/sys/class/infiniband/` type InfinibandCollector struct { metricCollector - tags map[string]string - lids map[string]map[string]string - config InfinibandCollectorConfig - use_perfquery bool + tags map[string]string + lids map[string]map[string]string + config struct { + ExcludeDevices []string `json:"exclude_devices,omitempty"` + } } func (m *InfinibandCollector) Help() { - fmt.Println("This collector includes all devices that can be found below ", IBBASEPATH) - fmt.Println("and where any of the ports provides a 'lid' file (glob ", IBBASEPATH, "//ports//lid).") + fmt.Println("This collector includes all devices that can be found below ", IB_BASEPATH) + fmt.Println("and where any of the ports provides a 'lid' file (glob ", IB_BASEPATH, "//ports//lid).") fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.") fmt.Println("For each found LIDs the collector calls the 'perfquery' command") - fmt.Println("The path to the 'perfquery' command can be configured with the 'perfquery_path' option") - fmt.Println("in the configuration") fmt.Println("") fmt.Println("Full configuration object:") fmt.Println("\"ibstat\" : {") - fmt.Println(" \"perfquery_path\" : \"path/to/perfquery\" # if omitted, it searches in $PATH") fmt.Println(" \"exclude_devices\" : [\"dev1\"]") fmt.Println("}") fmt.Println("") @@ -57,7 +46,6 @@ func (m *InfinibandCollector) Help() { func (m *InfinibandCollector) Init(config json.RawMessage) error { var err error m.name = "InfinibandCollector" - m.use_perfquery = false m.setup() m.meta = map[string]string{"source": m.name, "group": "Network"} m.tags = map[string]string{"type": "node"} @@ -67,19 +55,13 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error { return err } } - if len(m.config.PerfQueryPath) == 0 { - path, err := exec.LookPath("perfquery") - if err == nil { - m.config.PerfQueryPath = path - } - } m.lids = make(map[string]map[string]string) - p := fmt.Sprintf("%s/*/ports/*/lid", string(IBBASEPATH)) + p := fmt.Sprintf("%s/*/ports/*/lid", string(IB_BASEPATH)) files, err := filepath.Glob(p) for _, f := range files { lid, err := ioutil.ReadFile(f) if err == nil { - plist := strings.Split(strings.Replace(f, string(IBBASEPATH), "", -1), "/") + plist := strings.Split(strings.Replace(f, string(IB_BASEPATH), "", -1), "/") skip := false for _, d := range m.config.ExcludeDevices { if d == plist[0] { @@ -93,152 +75,11 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error { } } - for _, ports := range m.lids { - for port, lid := range ports { - args := fmt.Sprintf("-r %s %s 0xf000", lid, port) - command := exec.Command(m.config.PerfQueryPath, args) - command.Wait() - _, err := command.Output() - if err == nil { - m.use_perfquery = true - } - break - } - break + if len(m.lids) == 0 { + return errors.New("No usable IB devices") } - if len(m.lids) > 0 { - m.init = true - } else { - err = errors.New("No usable devices") - } - - return err -} - -func (m *InfinibandCollector) doPerfQuery(cmd string, dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error { - - args := fmt.Sprintf("-r %s %s 0xf000", lid, port) - command := exec.Command(cmd, args) - command.Wait() - stdout, err := command.Output() - if err != nil { - log.Print(err) - return err - } - ll := strings.Split(string(stdout), "\n") - - for _, line := range ll { - if strings.HasPrefix(line, "PortRcvData") || strings.HasPrefix(line, "RcvData") { - lv := strings.Fields(line) - v, err := strconv.ParseFloat(lv[1], 64) - if err == nil { - y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - if strings.HasPrefix(line, "PortXmitData") || strings.HasPrefix(line, "XmtData") { - lv := strings.Fields(line) - v, err := strconv.ParseFloat(lv[1], 64) - if err == nil { - y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - if strings.HasPrefix(line, "PortRcvPkts") || strings.HasPrefix(line, "RcvPkts") { - lv := strings.Fields(line) - v, err := strconv.ParseFloat(lv[1], 64) - if err == nil { - y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - if strings.HasPrefix(line, "PortXmitPkts") || strings.HasPrefix(line, "XmtPkts") { - lv := strings.Fields(line) - v, err := strconv.ParseFloat(lv[1], 64) - if err == nil { - y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - if strings.HasPrefix(line, "PortRcvPkts") || strings.HasPrefix(line, "RcvPkts") { - lv := strings.Fields(line) - v, err := strconv.ParseFloat(lv[1], 64) - if err == nil { - y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - if strings.HasPrefix(line, "PortXmitPkts") || strings.HasPrefix(line, "XmtPkts") { - lv := strings.Fields(line) - v, err := strconv.ParseFloat(lv[1], 64) - if err == nil { - y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - } - return nil -} - -func (m *InfinibandCollector) doSysfsRead(dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error { - path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IBBASEPATH), dev, port) - buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path)) - if err == nil { - data := strings.Replace(string(buffer), "\n", "", -1) - v, err := strconv.ParseFloat(data, 64) - if err == nil { - y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_data", path)) - if err == nil { - data := strings.Replace(string(buffer), "\n", "", -1) - v, err := strconv.ParseFloat(data, 64) - if err == nil { - y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_packets", path)) - if err == nil { - data := strings.Replace(string(buffer), "\n", "", -1) - v, err := strconv.ParseFloat(data, 64) - if err == nil { - y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_packets", path)) - if err == nil { - data := strings.Replace(string(buffer), "\n", "", -1) - v, err := strconv.ParseFloat(data, 64) - if err == nil { - y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } + m.init = true return nil } @@ -247,11 +88,55 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr if m.init { for dev, ports := range m.lids { for port, lid := range ports { - tags := map[string]string{"type": "node", "device": dev, "port": port} - if m.use_perfquery { - m.doPerfQuery(m.config.PerfQueryPath, dev, lid, port, tags, output) - } else { - m.doSysfsRead(dev, lid, port, tags, output) + tags := map[string]string{ + "type": "node", + "device": dev, + "port": port, + "lid": lid} + path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IB_BASEPATH), dev, port) + buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_data", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_packets", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_packets", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } } } } diff --git a/collectors/infinibandPerfQueryMetric.go b/collectors/infinibandPerfQueryMetric.go new file mode 100644 index 0000000..d8f7bf4 --- /dev/null +++ b/collectors/infinibandPerfQueryMetric.go @@ -0,0 +1,250 @@ +package collectors + +import ( + "fmt" + "io/ioutil" + "log" + "os/exec" + + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + + // "os" + "encoding/json" + "errors" + "path/filepath" + "strconv" + "strings" + "time" +) + +const PERFQUERY = `/usr/sbin/perfquery` + +type InfinibandPerfQueryCollector struct { + metricCollector + tags map[string]string + lids map[string]map[string]string + config struct { + ExcludeDevices []string `json:"exclude_devices,omitempty"` + PerfQueryPath string `json:"perfquery_path"` + } +} + +func (m *InfinibandPerfQueryCollector) Help() { + fmt.Println("This collector includes all devices that can be found below ", IB_BASEPATH) + fmt.Println("and where any of the ports provides a 'lid' file (glob ", IB_BASEPATH, "//ports//lid).") + fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.") + fmt.Println("For each found LIDs the collector calls the 'perfquery' command") + fmt.Println("The path to the 'perfquery' command can be configured with the 'perfquery_path' option") + fmt.Println("in the configuration") + fmt.Println("") + fmt.Println("Full configuration object:") + fmt.Println("\"ibstat\" : {") + fmt.Println(" \"perfquery_path\" : \"path/to/perfquery\" # if omitted, it searches in $PATH") + fmt.Println(" \"exclude_devices\" : [\"dev1\"]") + fmt.Println("}") + fmt.Println("") + fmt.Println("Metrics:") + fmt.Println("- ib_recv") + fmt.Println("- ib_xmit") + fmt.Println("- ib_recv_pkts") + fmt.Println("- ib_xmit_pkts") +} + +func (m *InfinibandPerfQueryCollector) Init(config json.RawMessage) error { + var err error + m.name = "InfinibandCollectorPerfQuery" + m.setup() + m.meta = map[string]string{"source": m.name, "group": "Network"} + m.tags = map[string]string{"type": "node"} + if len(config) > 0 { + err = json.Unmarshal(config, &m.config) + if err != nil { + return err + } + } + if len(m.config.PerfQueryPath) == 0 { + path, err := exec.LookPath("perfquery") + if err == nil { + m.config.PerfQueryPath = path + } + } + m.lids = make(map[string]map[string]string) + p := fmt.Sprintf("%s/*/ports/*/lid", string(IB_BASEPATH)) + files, err := filepath.Glob(p) + for _, f := range files { + lid, err := ioutil.ReadFile(f) + if err == nil { + plist := strings.Split(strings.Replace(f, string(IB_BASEPATH), "", -1), "/") + skip := false + for _, d := range m.config.ExcludeDevices { + if d == plist[0] { + skip = true + } + } + if !skip { + m.lids[plist[0]] = make(map[string]string) + m.lids[plist[0]][plist[2]] = string(lid) + } + } + } + + for _, ports := range m.lids { + for port, lid := range ports { + args := fmt.Sprintf("-r %s %s 0xf000", lid, port) + command := exec.Command(m.config.PerfQueryPath, args) + command.Wait() + _, err := command.Output() + if err != nil { + return fmt.Errorf("Failed to execute %s: %v", m.config.PerfQueryPath, err) + } + } + } + + if len(m.lids) == 0 { + return errors.New("No usable IB devices") + } + + m.init = true + return nil +} + +func (m *InfinibandPerfQueryCollector) doPerfQuery(cmd string, dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error { + + args := fmt.Sprintf("-r %s %s 0xf000", lid, port) + command := exec.Command(cmd, args) + command.Wait() + stdout, err := command.Output() + if err != nil { + log.Print(err) + return err + } + ll := strings.Split(string(stdout), "\n") + + for _, line := range ll { + if strings.HasPrefix(line, "PortRcvData") || strings.HasPrefix(line, "RcvData") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + if strings.HasPrefix(line, "PortXmitData") || strings.HasPrefix(line, "XmtData") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + if strings.HasPrefix(line, "PortRcvPkts") || strings.HasPrefix(line, "RcvPkts") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + if strings.HasPrefix(line, "PortXmitPkts") || strings.HasPrefix(line, "XmtPkts") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + if strings.HasPrefix(line, "PortRcvPkts") || strings.HasPrefix(line, "RcvPkts") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + if strings.HasPrefix(line, "PortXmitPkts") || strings.HasPrefix(line, "XmtPkts") { + lv := strings.Fields(line) + v, err := strconv.ParseFloat(lv[1], 64) + if err == nil { + y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + } + return nil +} + +func (m *InfinibandPerfQueryCollector) Read(interval time.Duration, output chan lp.CCMetric) { + + if m.init { + for dev, ports := range m.lids { + for port, lid := range ports { + tags := map[string]string{ + "type": "node", + "device": dev, + "port": port, + "lid": lid} + path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IB_BASEPATH), dev, port) + buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_data", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_packets", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_packets", path)) + if err == nil { + data := strings.Replace(string(buffer), "\n", "", -1) + v, err := strconv.ParseFloat(data, 64) + if err == nil { + y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) + if err == nil { + output <- y + } + } + } + } + } + } +} + +func (m *InfinibandPerfQueryCollector) Close() { + m.init = false +} From e1d0aacd1efad81caf10f8a4727382ab2abab6b5 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 27 Jan 2022 11:08:27 +0100 Subject: [PATCH 42/45] Moved as much work as possible to Init() --- collectors/infinibandMetric.go | 177 ++++++++++++++++++--------------- 1 file changed, 99 insertions(+), 78 deletions(-) diff --git a/collectors/infinibandMetric.go b/collectors/infinibandMetric.go index f506f37..6b4c882 100644 --- a/collectors/infinibandMetric.go +++ b/collectors/infinibandMetric.go @@ -2,12 +2,12 @@ package collectors import ( "fmt" - "io/ioutil" + "os" lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" + "golang.org/x/sys/unix" "encoding/json" - "errors" "path/filepath" "strconv" "strings" @@ -16,13 +16,20 @@ import ( const IB_BASEPATH = `/sys/class/infiniband/` +type InfinibandCollectorInfo struct { + LID string // IB local Identifier (LID) + device string // IB device + port string // IB device port + portCounterFiles map[string]string // mapping counter name -> file + tagSet map[string]string // corresponding tag list +} + type InfinibandCollector struct { metricCollector - tags map[string]string - lids map[string]map[string]string config struct { - ExcludeDevices []string `json:"exclude_devices,omitempty"` + ExcludeDevices []string `json:"exclude_devices,omitempty"` // IB device to exclude e.g. mlx5_0 } + info []InfinibandCollectorInfo } func (m *InfinibandCollector) Help() { @@ -43,99 +50,113 @@ func (m *InfinibandCollector) Help() { fmt.Println("- ib_xmit_pkts") } +// Init initializes the Infiniband collector by walking through files below IB_BASEPATH func (m *InfinibandCollector) Init(config json.RawMessage) error { var err error m.name = "InfinibandCollector" m.setup() - m.meta = map[string]string{"source": m.name, "group": "Network"} - m.tags = map[string]string{"type": "node"} + m.meta = map[string]string{ + "source": m.name, + "group": "Network", + } if len(config) > 0 { err = json.Unmarshal(config, &m.config) if err != nil { return err } } - m.lids = make(map[string]map[string]string) - p := fmt.Sprintf("%s/*/ports/*/lid", string(IB_BASEPATH)) - files, err := filepath.Glob(p) - for _, f := range files { - lid, err := ioutil.ReadFile(f) - if err == nil { - plist := strings.Split(strings.Replace(f, string(IB_BASEPATH), "", -1), "/") - skip := false - for _, d := range m.config.ExcludeDevices { - if d == plist[0] { - skip = true - } - } - if !skip { - m.lids[plist[0]] = make(map[string]string) - m.lids[plist[0]][plist[2]] = string(lid) - } - } + + // Loop for all InfiniBand directories + globPattern := filepath.Join(IB_BASEPATH, "*", "ports", "*") + ibDirs, err := filepath.Glob(globPattern) + if err != nil { + return fmt.Errorf("Unable to glob files with pattern %s: %v", globPattern, err) + } + if ibDirs == nil { + return fmt.Errorf("Unable to find any directories with pattern %s", globPattern) } - if len(m.lids) == 0 { - return errors.New("No usable IB devices") + for _, path := range ibDirs { + + // Skip, when no LID is assigned + LID, ok := readOneLine(path + "/lid") + if !ok || LID == "0x0" { + continue + } + + // Get device and port component + pathSplit := strings.Split(path, string(os.PathSeparator)) + device := pathSplit[4] + port := pathSplit[6] + + // Skip excluded devices + skip := false + for _, excludedDevice := range m.config.ExcludeDevices { + if excludedDevice == device { + skip = true + break + } + } + if skip { + continue + } + + // Check access to counter files + countersDir := filepath.Join(path, "counters") + portCounterFiles := map[string]string{ + "ib_recv": filepath.Join(countersDir, "port_rcv_data"), + "ib_xmit": filepath.Join(countersDir, "port_xmit_data"), + "ib_recv_pkts": filepath.Join(countersDir, "port_rcv_packets"), + "ib_xmit_pkts": filepath.Join(countersDir, "port_xmit_packets"), + } + for _, counterFile := range portCounterFiles { + err := unix.Access(counterFile, unix.R_OK) + if err != nil { + return fmt.Errorf("Unable to access %s: %v", counterFile, err) + } + } + + m.info = append(m.info, + InfinibandCollectorInfo{ + LID: LID, + device: device, + port: port, + portCounterFiles: portCounterFiles, + tagSet: map[string]string{ + "type": "node", + "device": device, + "port": port, + "lid": LID, + }, + }) + } + + if len(m.info) == 0 { + return fmt.Errorf("Found no IB devices") } m.init = true return nil } +// Read reads Infiniband counter files below IB_BASEPATH func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) { - if m.init { - for dev, ports := range m.lids { - for port, lid := range ports { - tags := map[string]string{ - "type": "node", - "device": dev, - "port": port, - "lid": lid} - path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IB_BASEPATH), dev, port) - buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path)) - if err == nil { - data := strings.Replace(string(buffer), "\n", "", -1) - v, err := strconv.ParseFloat(data, 64) - if err == nil { - y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_data", path)) - if err == nil { - data := strings.Replace(string(buffer), "\n", "", -1) - v, err := strconv.ParseFloat(data, 64) - if err == nil { - y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_packets", path)) - if err == nil { - data := strings.Replace(string(buffer), "\n", "", -1) - v, err := strconv.ParseFloat(data, 64) - if err == nil { - y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } - } - } - buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_packets", path)) - if err == nil { - data := strings.Replace(string(buffer), "\n", "", -1) - v, err := strconv.ParseFloat(data, 64) - if err == nil { - y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) - if err == nil { - output <- y - } + // Check if already initialized + if !m.init { + return + } + + now := time.Now() + for i := range m.info { + + // device info + info := &m.info[i] + for counterName, counterFile := range info.portCounterFiles { + if data, ok := readOneLine(counterFile); ok { + if v, err := strconv.ParseInt(data, 10, 64); err == nil { + if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil { + output <- y } } } From b9236dcc31d56d432038c10480a69171695a4ace Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 27 Jan 2022 17:43:00 +0100 Subject: [PATCH 43/45] Handle shutdown sequentially --- collectors/collectorManager.go | 5 +-- collectors/cpufreqMetric.go | 1 + internal/metricRouter/metricRouter.go | 12 ++---- internal/multiChanTicker/multiChanTicker.go | 5 ++- metric-collector.go | 43 +++++++++++---------- receivers/receiveManager.go | 5 ++- sinks/sinkManager.go | 8 ++-- 7 files changed, 37 insertions(+), 42 deletions(-) diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 98b6115..0b2dfcc 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -151,11 +151,8 @@ func (cm *collectorManager) AddOutput(output chan lp.CCMetric) { // Close finishes / stops the metric collector manager func (cm *collectorManager) Close() { - select { - case cm.done <- true: - default: - } cclog.ComponentDebug("CollectorManager", "CLOSE") + cm.done <- true } // New creates a new initialized metric collector manager diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index 5febed9..f3309ff 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -10,6 +10,7 @@ import ( "strconv" "strings" "time" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" "golang.org/x/sys/unix" ) diff --git a/internal/metricRouter/metricRouter.go b/internal/metricRouter/metricRouter.go index 57ba708..6327d95 100644 --- a/internal/metricRouter/metricRouter.go +++ b/internal/metricRouter/metricRouter.go @@ -225,18 +225,12 @@ func (r *metricRouter) AddOutput(output chan lp.CCMetric) { // Close finishes / stops the metric router func (r *metricRouter) Close() { - select { - case r.done <- true: - default: - } + cclog.ComponentDebug("MetricRouter", "CLOSE") + r.done <- true if r.config.IntervalStamp { cclog.ComponentDebug("MetricRouter", "TIMER CLOSE") - select { - case r.timerdone <- true: - default: - } + r.timerdone <- true } - cclog.ComponentDebug("MetricRouter", "CLOSE") } // New creates a new initialized metric router diff --git a/internal/multiChanTicker/multiChanTicker.go b/internal/multiChanTicker/multiChanTicker.go index 37778ad..a9394ab 100644 --- a/internal/multiChanTicker/multiChanTicker.go +++ b/internal/multiChanTicker/multiChanTicker.go @@ -1,8 +1,9 @@ package multiChanTicker import ( - cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" "time" + + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" ) type multiChanTicker struct { @@ -49,8 +50,8 @@ func (t *multiChanTicker) AddChannel(channel chan time.Time) { } func (t *multiChanTicker) Close() { - t.done <- true cclog.ComponentDebug("MultiChanTicker", "CLOSE") + t.done <- true } func NewTicker(duration time.Duration) MultiChanTicker { diff --git a/metric-collector.go b/metric-collector.go index 6a6c1b3..0cd368a 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -6,6 +6,7 @@ import ( "os" "os/signal" "strings" + "syscall" "github.com/ClusterCockpit/cc-metric-collector/collectors" "github.com/ClusterCockpit/cc-metric-collector/receivers" @@ -154,10 +155,19 @@ func ReadCli() map[string]string { // return nil //} -// General shutdown function that gets executed in case of interrupt or graceful shutdown -func shutdown(config *RuntimeConfig) { +// General shutdownHandler function that gets executed in case of interrupt or graceful shutdownHandler +func shutdownHandler(config *RuntimeConfig, shutdownSignal chan os.Signal) { + <-shutdownSignal + + // Remove shutdown handler + // every additional interrupt signal will stop without cleaning up + signal.Stop(shutdownSignal) + cclog.Info("Shutdown...") + + cclog.Debug("Shutdown Ticker...") config.Ticker.Close() + if config.CollectManager != nil { cclog.Debug("Shutdown CollectManager...") config.CollectManager.Close() @@ -182,18 +192,6 @@ func shutdown(config *RuntimeConfig) { config.Sync.Done() } -// Register an interrupt handler for Ctrl+C and similar. At signal, -// all collectors are closed -func prepare_shutdown(config *RuntimeConfig) { - sigs := make(chan os.Signal, 1) - signal.Notify(sigs, os.Interrupt) - - go func(config *RuntimeConfig) { - <-sigs - shutdown(config) - }(config) -} - func mainFunc() int { var err error use_recv := false @@ -249,7 +247,7 @@ func mainFunc() int { cclog.Error(err.Error()) return 1 } - RouterToSinksChannel := make(chan lp.CCMetric) + RouterToSinksChannel := make(chan lp.CCMetric, 200) rcfg.SinkManager.AddInput(RouterToSinksChannel) rcfg.Router.AddOutput(RouterToSinksChannel) } @@ -259,7 +257,7 @@ func mainFunc() int { cclog.Error(err.Error()) return 1 } - CollectToRouterChannel := make(chan lp.CCMetric) + CollectToRouterChannel := make(chan lp.CCMetric, 200) rcfg.CollectManager.AddOutput(CollectToRouterChannel) rcfg.Router.AddCollectorInput(CollectToRouterChannel) } @@ -269,12 +267,17 @@ func mainFunc() int { cclog.Error(err.Error()) return 1 } - ReceiveToRouterChannel := make(chan lp.CCMetric) + ReceiveToRouterChannel := make(chan lp.CCMetric, 200) rcfg.ReceiveManager.AddOutput(ReceiveToRouterChannel) rcfg.Router.AddReceiverInput(ReceiveToRouterChannel) use_recv = true } - prepare_shutdown(&rcfg) + + shutdownSignal := make(chan os.Signal, 1) + signal.Notify(shutdownSignal, os.Interrupt) + signal.Notify(shutdownSignal, syscall.SIGTERM) + go shutdownHandler(&rcfg, shutdownSignal) + rcfg.Sync.Add(1) rcfg.Router.Start() rcfg.SinkManager.Start() @@ -288,10 +291,10 @@ func mainFunc() int { if rcfg.CliArgs["once"] == "true" { x := 1.2 * float64(rcfg.ConfigFile.Interval) time.Sleep(time.Duration(int(x)) * time.Second) - shutdown(&rcfg) + shutdownSignal <- os.Interrupt } - // Wait until receiving an interrupt + // Wait until shutdownHandler is executed rcfg.Sync.Wait() return 0 } diff --git a/receivers/receiveManager.go b/receivers/receiveManager.go index e6a2eee..c570aa4 100644 --- a/receivers/receiveManager.go +++ b/receivers/receiveManager.go @@ -2,10 +2,11 @@ package receivers import ( "encoding/json" - lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" - cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" "os" "sync" + + cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) var AvailableReceivers = map[string]Receiver{ diff --git a/sinks/sinkManager.go b/sinks/sinkManager.go index efcb5a0..4be8313 100644 --- a/sinks/sinkManager.go +++ b/sinks/sinkManager.go @@ -68,10 +68,11 @@ func (sm *sinkManager) Start() { go func() { done := func() { for _, s := range sm.outputs { + s.Flush() s.Close() } - cclog.ComponentDebug("SinkManager", "DONE") sm.wg.Done() + cclog.ComponentDebug("SinkManager", "DONE") } for { select { @@ -128,11 +129,8 @@ func (sm *sinkManager) AddOutput(rawConfig json.RawMessage) error { } func (sm *sinkManager) Close() { - select { - case sm.done <- true: - default: - } cclog.ComponentDebug("SinkManager", "CLOSE") + sm.done <- true } func New(wg *sync.WaitGroup, sinkConfigFile string) (SinkManager, error) { From aea3e2c6b183a9bad5f428c30f178b7fea1fd5bd Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Thu, 27 Jan 2022 20:45:22 +0100 Subject: [PATCH 44/45] Place wait group Add() and Done() near to each other --- collectors/collectorManager.go | 4 +- internal/metricRouter/metricRouter.go | 8 ++- metric-collector.go | 86 +++++++++++++++------------ 3 files changed, 57 insertions(+), 41 deletions(-) diff --git a/collectors/collectorManager.go b/collectors/collectorManager.go index 0b2dfcc..7b0a9b7 100644 --- a/collectors/collectorManager.go +++ b/collectors/collectorManager.go @@ -102,18 +102,18 @@ func (cm *collectorManager) Init(ticker mct.MultiChanTicker, duration time.Durat // Start starts the metric collector manager func (cm *collectorManager) Start() { - cm.wg.Add(1) tick := make(chan time.Time) cm.ticker.AddChannel(tick) + cm.wg.Add(1) go func() { + defer cm.wg.Done() // Collector manager is done done := func() { // close all metric collectors for _, c := range cm.collectors { c.Close() } - cm.wg.Done() cclog.ComponentDebug("CollectorManager", "DONE") } diff --git a/internal/metricRouter/metricRouter.go b/internal/metricRouter/metricRouter.go index 6327d95..a321aae 100644 --- a/internal/metricRouter/metricRouter.go +++ b/internal/metricRouter/metricRouter.go @@ -80,7 +80,10 @@ func (r *metricRouter) StartTimer() { m := make(chan time.Time) r.ticker.AddChannel(m) r.timerdone = make(chan bool) + + r.wg.Add(1) go func() { + defer r.wg.Done() for { select { case <-r.timerdone: @@ -169,13 +172,11 @@ func (r *metricRouter) DoDelTags(point lp.CCMetric) { // Start starts the metric router func (r *metricRouter) Start() { - r.wg.Add(1) r.timestamp = time.Now() if r.config.IntervalStamp { r.StartTimer() } done := func() { - r.wg.Done() cclog.ComponentDebug("MetricRouter", "DONE") } forward := func(point lp.CCMetric) { @@ -186,7 +187,10 @@ func (r *metricRouter) Start() { o <- point } } + + r.wg.Add(1) go func() { + defer r.wg.Done() for { // RouterLoop: select { diff --git a/metric-collector.go b/metric-collector.go index 0cd368a..3975b62 100644 --- a/metric-collector.go +++ b/metric-collector.go @@ -51,25 +51,16 @@ type RuntimeConfig struct { CliArgs map[string]string ConfigFile CentralConfigFile - Router mr.MetricRouter - CollectManager collectors.CollectorManager - SinkManager sinks.SinkManager - ReceiveManager receivers.ReceiveManager - Ticker mct.MultiChanTicker + MetricRouter mr.MetricRouter + CollectManager collectors.CollectorManager + SinkManager sinks.SinkManager + ReceiveManager receivers.ReceiveManager + MultiChanTicker mct.MultiChanTicker Channels []chan lp.CCMetric Sync sync.WaitGroup } -func prepare_runcfg() RuntimeConfig { - return RuntimeConfig{ - Router: nil, - CollectManager: nil, - SinkManager: nil, - ReceiveManager: nil, - } -} - //// Structure of the configuration file //type GlobalConfig struct { // Sink sinks.SinkConfig `json:"sink"` @@ -157,8 +148,9 @@ func ReadCli() map[string]string { // General shutdownHandler function that gets executed in case of interrupt or graceful shutdownHandler func shutdownHandler(config *RuntimeConfig, shutdownSignal chan os.Signal) { - <-shutdownSignal + defer config.Sync.Done() + <-shutdownSignal // Remove shutdown handler // every additional interrupt signal will stop without cleaning up signal.Stop(shutdownSignal) @@ -166,7 +158,7 @@ func shutdownHandler(config *RuntimeConfig, shutdownSignal chan os.Signal) { cclog.Info("Shutdown...") cclog.Debug("Shutdown Ticker...") - config.Ticker.Close() + config.MultiChanTicker.Close() if config.CollectManager != nil { cclog.Debug("Shutdown CollectManager...") @@ -176,9 +168,9 @@ func shutdownHandler(config *RuntimeConfig, shutdownSignal chan os.Signal) { cclog.Debug("Shutdown ReceiveManager...") config.ReceiveManager.Close() } - if config.Router != nil { + if config.MetricRouter != nil { cclog.Debug("Shutdown Router...") - config.Router.Close() + config.MetricRouter.Close() } if config.SinkManager != nil { cclog.Debug("Shutdown SinkManager...") @@ -189,15 +181,20 @@ func shutdownHandler(config *RuntimeConfig, shutdownSignal chan os.Signal) { // RemovePidfile(pidfile) // pidfile = config.CliArgs["pidfile"] // RemovePidfile(pidfile) - config.Sync.Done() } func mainFunc() int { var err error use_recv := false - rcfg := prepare_runcfg() - rcfg.CliArgs = ReadCli() + // Initialize runtime configuration + rcfg := RuntimeConfig{ + MetricRouter: nil, + CollectManager: nil, + SinkManager: nil, + ReceiveManager: nil, + CliArgs: ReadCli(), + } // Load and check configuration err = LoadCentralConfiguration(rcfg.CliArgs["configfile"], &rcfg.ConfigFile) @@ -225,61 +222,75 @@ func mainFunc() int { rcfg.Hostname = strings.SplitN(rcfg.Hostname, `.`, 2)[0] // err = CreatePidfile(rcfg.CliArgs["pidfile"]) - if rcfg.CliArgs["logfile"] != "stderr" { - cclog.SetOutput(rcfg.CliArgs["logfile"]) + // Set log file + if logfile := rcfg.CliArgs["logfile"]; logfile != "stderr" { + cclog.SetOutput(logfile) } - // err = SetLogging(rcfg.CliArgs["logfile"]) - // if err != nil { - // log.Print("Error setting up logging system to ", rcfg.CliArgs["logfile"], " on ", rcfg.Hostname) - // return - // } - rcfg.Ticker = mct.NewTicker(rcfg.Interval) + + // Creat new multi channel ticker + rcfg.MultiChanTicker = mct.NewTicker(rcfg.Interval) + + // Create new metric router if len(rcfg.ConfigFile.RouterConfigFile) > 0 { - rcfg.Router, err = mr.New(rcfg.Ticker, &rcfg.Sync, rcfg.ConfigFile.RouterConfigFile) + rcfg.MetricRouter, err = mr.New(rcfg.MultiChanTicker, &rcfg.Sync, rcfg.ConfigFile.RouterConfigFile) if err != nil { cclog.Error(err.Error()) return 1 } } + + // Create new sink if len(rcfg.ConfigFile.SinkConfigFile) > 0 { rcfg.SinkManager, err = sinks.New(&rcfg.Sync, rcfg.ConfigFile.SinkConfigFile) if err != nil { cclog.Error(err.Error()) return 1 } + + // Connect metric router to sink manager RouterToSinksChannel := make(chan lp.CCMetric, 200) rcfg.SinkManager.AddInput(RouterToSinksChannel) - rcfg.Router.AddOutput(RouterToSinksChannel) + rcfg.MetricRouter.AddOutput(RouterToSinksChannel) } + + // Create new collector manager if len(rcfg.ConfigFile.CollectorConfigFile) > 0 { - rcfg.CollectManager, err = collectors.New(rcfg.Ticker, rcfg.Duration, &rcfg.Sync, rcfg.ConfigFile.CollectorConfigFile) + rcfg.CollectManager, err = collectors.New(rcfg.MultiChanTicker, rcfg.Duration, &rcfg.Sync, rcfg.ConfigFile.CollectorConfigFile) if err != nil { cclog.Error(err.Error()) return 1 } + + // Connect collector manager to metric router CollectToRouterChannel := make(chan lp.CCMetric, 200) rcfg.CollectManager.AddOutput(CollectToRouterChannel) - rcfg.Router.AddCollectorInput(CollectToRouterChannel) + rcfg.MetricRouter.AddCollectorInput(CollectToRouterChannel) } + + // Create new receive manager if len(rcfg.ConfigFile.ReceiverConfigFile) > 0 { rcfg.ReceiveManager, err = receivers.New(&rcfg.Sync, rcfg.ConfigFile.ReceiverConfigFile) if err != nil { cclog.Error(err.Error()) return 1 } + + // Connect receive manager to metric router ReceiveToRouterChannel := make(chan lp.CCMetric, 200) rcfg.ReceiveManager.AddOutput(ReceiveToRouterChannel) - rcfg.Router.AddReceiverInput(ReceiveToRouterChannel) + rcfg.MetricRouter.AddReceiverInput(ReceiveToRouterChannel) use_recv = true } + // Create shutdown handler shutdownSignal := make(chan os.Signal, 1) signal.Notify(shutdownSignal, os.Interrupt) signal.Notify(shutdownSignal, syscall.SIGTERM) + rcfg.Sync.Add(1) go shutdownHandler(&rcfg, shutdownSignal) - rcfg.Sync.Add(1) - rcfg.Router.Start() + // Start the managers + rcfg.MetricRouter.Start() rcfg.SinkManager.Start() rcfg.CollectManager.Start() @@ -294,8 +305,9 @@ func mainFunc() int { shutdownSignal <- os.Interrupt } - // Wait until shutdownHandler is executed + // Wait that all goroutines finish rcfg.Sync.Wait() + return 0 } From 82f5c1c5d010b1a7669f7489800d39b823d585a2 Mon Sep 17 00:00:00 2001 From: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Date: Fri, 28 Jan 2022 09:42:19 +0100 Subject: [PATCH 45/45] Minimum requirement go version 1.17 --- collectors/gpfsMetric.go | 8 +- go.mod | 25 ++- go.sum | 457 +++++++-------------------------------- 3 files changed, 94 insertions(+), 396 deletions(-) diff --git a/collectors/gpfsMetric.go b/collectors/gpfsMetric.go index f1d3d75..bc1852b 100644 --- a/collectors/gpfsMetric.go +++ b/collectors/gpfsMetric.go @@ -13,19 +13,18 @@ import ( "strconv" "strings" "time" + lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" ) type GpfsCollector struct { metricCollector tags map[string]string - config struct { Mmpmon string `json:"mmpmon"` } } - func (m *GpfsCollector) Init(config json.RawMessage) error { var err error m.name = "GpfsCollector" @@ -120,7 +119,6 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) { m.tags["filesystem"] = filesystem - // return code rc, err := strconv.Atoi(key_value["_rc_"]) if err != nil { @@ -132,8 +130,6 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) { continue } - /* requires go 1.17 - // unix epoch in microseconds timestampInt, err := strconv.ParseInt(key_value["_t_"]+key_value["_tu_"], 10, 64) timestamp := time.UnixMicro(timestampInt) if err != nil { @@ -142,8 +138,6 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) { key_value["_t_"]+key_value["_tu_"], err.Error()) continue } - */ - timestamp := time.Now() // bytes read bytesRead, err := strconv.ParseInt(key_value["_br_"], 10, 64) diff --git a/go.mod b/go.mod index d20d431..da4f3ea 100644 --- a/go.mod +++ b/go.mod @@ -1,14 +1,25 @@ module github.com/ClusterCockpit/cc-metric-collector -go 1.16 +go 1.17 require ( github.com/NVIDIA/go-nvml v0.11.1-0 - github.com/influxdata/influxdb-client-go/v2 v2.2.2 - github.com/influxdata/line-protocol v0.0.0-20210311194329-9aa0e372d097 - github.com/nats-io/nats.go v1.10.0 - github.com/nats-io/nkeys v0.1.4 // indirect - github.com/prometheus/client_golang v1.10.0 // indirect - golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 + github.com/influxdata/influxdb-client-go/v2 v2.7.0 + github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf + github.com/nats-io/nats.go v1.13.1-0.20211122170419-d7c1d78a50fc + golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 gopkg.in/Knetic/govaluate.v2 v2.3.0 ) + +require ( + github.com/deepmap/oapi-codegen v1.8.2 // indirect + github.com/golang/protobuf v1.5.2 // indirect + github.com/nats-io/nats-server/v2 v2.7.0 // indirect + github.com/nats-io/nkeys v0.3.0 // indirect + github.com/nats-io/nuid v1.0.1 // indirect + github.com/pkg/errors v0.9.1 // indirect + golang.org/x/crypto v0.0.0-20220112180741-5e0467b6c7ce // indirect + golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 // indirect + google.golang.org/protobuf v1.27.1 // indirect + gopkg.in/yaml.v2 v2.3.0 // indirect +) diff --git a/go.sum b/go.sum index a6f98d7..311633a 100644 --- a/go.sum +++ b/go.sum @@ -1,449 +1,142 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/NVIDIA/go-nvml v0.11.1-0 h1:XHSz3zZKC4NCP2ja1rI7++DXFhA+uDhdYa3MykCTGHY= github.com/NVIDIA/go-nvml v0.11.1-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs= -github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= -github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= -github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= -github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= -github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= -github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= -github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/aryann/difflib v0.0.0-20170710044230-e206f873d14a/go.mod h1:DAHtR1m6lCRdSC2Tm3DSWRPvIPr6xNKyeHdqDQSQT+A= -github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU= -github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= -github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= -github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= -github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= -github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= -github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/deepmap/oapi-codegen v1.3.13 h1:9HKGCsdJqE4dnrQ8VerFS0/1ZOJPmAhN+g8xgp8y3K4= -github.com/deepmap/oapi-codegen v1.3.13/go.mod h1:WAmG5dWY8/PYHt4vKxlt90NsbHMAOCiteYKZMiIRfOo= +github.com/deepmap/oapi-codegen v1.8.2 h1:SegyeYGcdi0jLLrpbCMoJxnUUn8GBXHsvr4rbzjuhfU= +github.com/deepmap/oapi-codegen v1.8.2/go.mod h1:YLgSKSDv/bZQB7N4ws6luhozi3cEdRktEqrX88CvjIw= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= -github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= -github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= -github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= -github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= -github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= -github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/getkin/kin-openapi v0.13.0/go.mod h1:WGRs2ZMM1Q8LR1QBEwUxC6RJEfaBcD0s+pcEVXFuAjw= +github.com/getkin/kin-openapi v0.61.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-chi/chi v4.0.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-kit/kit v0.10.0/go.mod h1:xUsJbQ/Fp4kEt7AFgCuvyX4a71u8h9jB8tj/ORgOZ7o= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= -github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= -github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/go-chi/chi/v5 v5.0.0/go.mod h1:BBug9lr0cqtdAhsu6R4AAdvufI0/XBzAQSsUqJpoZOs= +github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.4.3 h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golangci/lint-1 v0.0.0-20181222135242-d2cdd8c08219/go.mod h1:/X8TswGSh1pIozq4ZwCfxS0WA5JGXguxk94ar/4c87Y= -github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= -github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= -github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= -github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= -github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= -github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= -github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= -github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= -github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= -github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= -github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= -github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= -github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= -github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= -github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/influxdata/influxdb-client-go v1.4.0 h1:+KavOkwhLClHFfYcJMHHnTL5CZQhXJzOm5IKHI9BqJk= -github.com/influxdata/influxdb-client-go/v2 v2.2.2 h1:O0CGIuIwQafvAxttAJ/VqMKfbWWn2Mt8rbOmaM2Zj4w= -github.com/influxdata/influxdb-client-go/v2 v2.2.2/go.mod h1:fa/d1lAdUHxuc1jedx30ZfNG573oQTQmUni3N6pcW+0= -github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/influxdata/influxdb-client-go/v2 v2.7.0 h1:QgP5mlBE9sGnzplpnf96pr+p7uqlIlL4W2GAP3n+XZg= +github.com/influxdata/influxdb-client-go/v2 v2.7.0/go.mod h1:Y/0W1+TZir7ypoQZYd2IrnVOKB3Tq6oegAQeSVN/+EU= github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo= -github.com/influxdata/line-protocol v0.0.0-20210311194329-9aa0e372d097 h1:vilfsDSy7TDxedi9gyBkMvAirat/oRcL0lFdJBf6tdM= -github.com/influxdata/line-protocol v0.0.0-20210311194329-9aa0e372d097/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo= -github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= -github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU= +github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo= +github.com/klauspost/compress v1.13.4 h1:0zhec2I8zGnjWcKyLl6i3gPqKANCCn5e9xmviEEeX6s= +github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/labstack/echo/v4 v4.1.11/go.mod h1:i541M3Fj6f76NZtHSj7TXnyM8n2gaodfvfxNnFqi74g= +github.com/labstack/echo/v4 v4.2.1/go.mod h1:AA49e0DZ8kk5jTOOCKNuPR6oTnBS0dYiM4FW1e6jwpg= github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= -github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= -github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= -github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= +github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/matryer/moq v0.0.0-20190312154309-6cfb0558e1bd/go.mod h1:9ELz6aaclSIGnZBoaSLZ3NAl1VTufbOrXBPvtcy6WiQ= -github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= -github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= -github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= -github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= -github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= -github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= -github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= -github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= -github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= -github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= -github.com/nats-io/jwt v0.3.2 h1:+RB5hMpXUUA2dfxuhBTEkMOrYmM+gKIZYS1KjSostMI= -github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU= -github.com/nats-io/nats-server/v2 v2.1.2/go.mod h1:Afk+wRZqkMQs/p45uXdrVLuab3gwv3Z8C4HTBu8GD/k= -github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= -github.com/nats-io/nats.go v1.10.0 h1:L8qnKaofSfNFbXg0C5F71LdjPRnmQwSsA4ukmkt1TvY= -github.com/nats-io/nats.go v1.10.0/go.mod h1:AjGArbfyR50+afOUotNX2Xs5SYHf+CoOa5HH1eEl2HE= -github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= -github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= -github.com/nats-io/nkeys v0.1.4 h1:aEsHIssIk6ETN5m2/MD8Y4B2X7FfXrBAUdkyRvbVYzA= -github.com/nats-io/nkeys v0.1.4/go.mod h1:XdZpAbhgyyODYqjTawOnIOI7VlbKSarI9Gfy1tqEu/s= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/minio/highwayhash v1.0.1 h1:dZ6IIu8Z14VlC0VpfKofAhCy74wu/Qb5gcn52yWoz/0= +github.com/minio/highwayhash v1.0.1/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= +github.com/nats-io/jwt/v2 v2.2.1-0.20220113022732-58e87895b296 h1:vU9tpM3apjYlLLeY23zRWJ9Zktr5jp+mloR942LEOpY= +github.com/nats-io/jwt/v2 v2.2.1-0.20220113022732-58e87895b296/go.mod h1:0tqz9Hlu6bCBFLWAASKhE5vUA4c24L9KPUUgvwumE/k= +github.com/nats-io/nats-server/v2 v2.7.0 h1:UpqcAM93FI7AHlCyI2FD5QcV3QuHNCauQF2LBVU0238= +github.com/nats-io/nats-server/v2 v2.7.0/go.mod h1:cjxtMhZsZovK1XS2iiapCduR8HuqB/YpFamL0qntIcw= +github.com/nats-io/nats.go v1.13.1-0.20211122170419-d7c1d78a50fc h1:SHr4MUUZJ/fAC0uSm2OzWOJYsHpapmR86mpw7q1qPXU= +github.com/nats-io/nats.go v1.13.1-0.20211122170419-d7c1d78a50fc/go.mod h1:BPko4oXsySz4aSWeFgOHLZs3G4Jq4ZAyE6/zMCxRT6w= +github.com/nats-io/nkeys v0.3.0 h1:cgM5tL53EvYRU+2YLXIK0G2mJtK12Ft9oeooSZMA2G8= +github.com/nats-io/nkeys v0.3.0/go.mod h1:gvUNGjVcM2IPr5rCsRsC6Wb3Hr2CQAm08dsxtV6A5y4= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= -github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= -github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= -github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= -github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= -github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= -github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/openzipkin-contrib/zipkin-go-opentracing v0.4.5/go.mod h1:/wsWhb9smxSfWAKL3wpBW7V8scJMt8N8gnaMCS9E/cA= -github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw= -github.com/openzipkin/zipkin-go v0.2.1/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= -github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= -github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM= -github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= -github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= -github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= -github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= -github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og= -github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= -github.com/prometheus/client_golang v1.10.0 h1:/o0BDeWzLWXNZ+4q5gXltUvaMpJqckTa+jTNoB+z4cg= -github.com/prometheus/client_golang v1.10.0/go.mod h1:WJM3cc3yu7XKBKa/I8WeZm+V3eltZnBwfENSU7mdogU= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= -github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= -github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= -github.com/prometheus/common v0.18.0 h1:WCVKW7aL6LEe1uryfI9dnEc2ZqNB1Fn0ok930v0iL1Y= -github.com/prometheus/common v0.18.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= -github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3xv4= -github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= -github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= -github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= -github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= -github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= -github.com/valyala/fasttemplate v1.1.0/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= -go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= -go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= -go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= -go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= -go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20191112222119-e1110fd1c708/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20200323165209-0ec3e9974c59 h1:3zb4D3T4G8jdExgVU/95+vQXfpEPiMdCaZgmGVxjNHM= -golang.org/x/crypto v0.0.0-20200323165209-0ec3e9974c59/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= +golang.org/x/crypto v0.0.0-20210314154223-e6e6c4f2bb5b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= +golang.org/x/crypto v0.0.0-20220112180741-5e0467b6c7ce h1:Roh6XWxHFKrPgC/EQhVubSAGQ6Ozk6IdxHSzt1mR0EI= +golang.org/x/crypto v0.0.0-20220112180741-5e0467b6c7ce/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191112182307-2180aed22343 h1:00ohfJ4K98s3m6BGUoBd8nyfp4Yl0GoIKvw5abItTjI= -golang.org/x/net v0.0.0-20191112182307-2180aed22343/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200625001655-4c5254603344 h1:vGXIOMxbNfDTk/aXCmfdLgkrSV+Z2tcbze+pEc3v5W4= -golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 h1:CIJ76btIcR3eFI5EgSo6k1qKw9KJexJuRLI9G7Hp5wE= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 h1:46ULzRKLh1CwgRq2dC5SlBzEqqNCi8rreOZnNrbqcIY= -golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220111092808-5a964db01320/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 h1:XfKQ4OlFl8okEOr5UvAqFRVj8pY/4yfcXrddB8qAbU0= +golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11 h1:GZokNIeuVkl3aZHJchRrr13WCsols02MLUcz1U9is6M= +golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= -google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= -google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= -google.golang.org/grpc v1.22.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= gopkg.in/Knetic/govaluate.v2 v2.3.0 h1:naJVc9CZlWA8rC8f5mvECJD7jreTrn7FvGXjBthkHJQ= gopkg.in/Knetic/govaluate.v2 v2.3.0/go.mod h1:NW0gr10J8s7aNghEg6uhdxiEaBvc0+8VgJjVViHUKp4= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= -gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU=