From 84e019c693655b29ba97cdbf453fac976fbd78e5 Mon Sep 17 00:00:00 2001 From: Thomas Gruber Date: Tue, 20 Dec 2022 13:08:04 +0100 Subject: [PATCH 1/3] Merge develop and main (#99) * InfiniBandCollector: Scale raw readings from octets to bytes * Fix clock frequency coming from LikwidCollector and update docs * Build DEB package for Ubuntu 20.04 for releases * Fix memstat collector with numa_stats option * Remove useless prints from MemstatCollector * Replace ioutils with os and io (#87) * Use lower case for error strings in RocmSmiCollector * move maybe-usable-by-other-cc-components to pkg. Fix all files to use the new paths (#88) * Add collector for monitoring the execution of cc-metric-collector itself (#81) * Add collector to monitor execution of cc-metric-collector itself * Register SelfCollector * Fix import paths for moved packages * Check if at least one CPU with frequency information was detected * Correct type: /proc/stats -> /proc/stat * Update README.md * Run ipmitool asynchron. Improved error handling. * Corrected some typos * Add running average power limit (RAPL) metric collector * Add running average power limit (RAPL) metric collector * Do not mess up with the orignal configuration * * Corrected json config in numastatsMetric.md * Added some debug output to numastatsMetric.go * Fixed computing number of physical packages for non continous physical package IDs (e.g. on Ampere Altra Q80-30) * Fix kernel panic for receiver config with missing receiver type * Add receiver to gather remote IPMI sensor metrics * Added config option to add ipmi-sensors command line options * Add documentaion for IPMI receiver * Update to latest version of included go modules * Add go.mod to App dependency * Try to use common metric tags across hardware vendors * Add IPMI metric: current * remove prefix enumeration like 01-... * Add IPMI receiver example configuration to receivers.json * Minimal formating changes * Add hostlist package * Added tests for hostlist Expand() * Use package hostlist to expand a host list * Use package hostlist to expand a host list * Some servers return "ConsumedPowerWatt":65535 instead of "ConsumedPowerWatt":null * Updated to latest package versions * Do not allow unknown fields in JSON configuration file * Add workflow to customize packages to docs * NFS I/O Stats Collector (#91) * Initial version * Delete values for vanished mount points and comments * Fix for Likwid collector (#95) * Run LIKWID in separate thread and check metric type * Change LIKWID collector documentation to use 'type' instead of 'scope' * Re-initialize LIKWID after one read is missing due to lock toggle * Register cc-metric-collector at Zenodo (#93) * Add initial version of Zenodo project file * Orcid ID added * Update .zenodo.json Co-authored-by: Holger Obermaier * Update ipmiMetric.go * Use latest LIKWID version for builds * Update README.md * Remove development stuff from Makefile * Add Requires(pre) to RPM SPEC file * Use curly brackets in packaging make targets * Fix for LIKWID collector with separate measurement thread and inotify watcher on the LIKWID lock (#97) Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Co-authored-by: Holger Obermaier From abd49a377ce57f92b92072d7570506de77751459 Mon Sep 17 00:00:00 2001 From: Thomas Gruber Date: Thu, 26 Jan 2023 10:21:45 +0700 Subject: [PATCH 2/3] Update likwid_perfgroup_to_cc_config.py --- scripts/likwid_perfgroup_to_cc_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/likwid_perfgroup_to_cc_config.py b/scripts/likwid_perfgroup_to_cc_config.py index 52959ed..42abae3 100755 --- a/scripts/likwid_perfgroup_to_cc_config.py +++ b/scripts/likwid_perfgroup_to_cc_config.py @@ -45,7 +45,7 @@ def group_to_json(groupfile): if "PWR" in calc: scope = "socket" - m = {"name" : metric, "calc": calc, "scope" : scope, "publish" : True} + m = {"name" : metric, "calc": calc, "type" : scope, "publish" : True} metrics.append(m) return {"events" : events, "metrics" : metrics} From 94b086acf09eb65fa1941047ea1bf656abcc4721 Mon Sep 17 00:00:00 2001 From: Thomas Gruber Date: Mon, 20 Mar 2023 15:17:24 +0100 Subject: [PATCH 3/3] Develop (#102) * InfiniBandCollector: Scale raw readings from octets to bytes * Fix clock frequency coming from LikwidCollector and update docs * Build DEB package for Ubuntu 20.04 for releases * Fix memstat collector with numa_stats option * Remove useless prints from MemstatCollector * Replace ioutils with os and io (#87) * Use lower case for error strings in RocmSmiCollector * move maybe-usable-by-other-cc-components to pkg. Fix all files to use the new paths (#88) * Add collector for monitoring the execution of cc-metric-collector itself (#81) * Add collector to monitor execution of cc-metric-collector itself * Register SelfCollector * Fix import paths for moved packages * Check if at least one CPU with frequency information was detected * Correct type: /proc/stats -> /proc/stat * Update README.md * Run ipmitool asynchron. Improved error handling. * Corrected some typos * Add running average power limit (RAPL) metric collector * Add running average power limit (RAPL) metric collector * Do not mess up with the orignal configuration * * Corrected json config in numastatsMetric.md * Added some debug output to numastatsMetric.go * Fixed computing number of physical packages for non continous physical package IDs (e.g. on Ampere Altra Q80-30) * Fix kernel panic for receiver config with missing receiver type * Add receiver to gather remote IPMI sensor metrics * Added config option to add ipmi-sensors command line options * Add documentaion for IPMI receiver * Update to latest version of included go modules * Add go.mod to App dependency * Try to use common metric tags across hardware vendors * Add IPMI metric: current * remove prefix enumeration like 01-... * Add IPMI receiver example configuration to receivers.json * Minimal formating changes * Add hostlist package * Added tests for hostlist Expand() * Use package hostlist to expand a host list * Use package hostlist to expand a host list * Some servers return "ConsumedPowerWatt":65535 instead of "ConsumedPowerWatt":null * Updated to latest package versions * Do not allow unknown fields in JSON configuration file * Add workflow to customize packages to docs * NFS I/O Stats Collector (#91) * Initial version * Delete values for vanished mount points and comments * Fix for Likwid collector (#95) * Run LIKWID in separate thread and check metric type * Change LIKWID collector documentation to use 'type' instead of 'scope' * Re-initialize LIKWID after one read is missing due to lock toggle * Register cc-metric-collector at Zenodo (#93) * Add initial version of Zenodo project file * Orcid ID added * Update .zenodo.json Co-authored-by: Holger Obermaier * Update ipmiMetric.go * Use latest LIKWID version for builds * Update README.md * Remove development stuff from Makefile * Add Requires(pre) to RPM SPEC file * Use curly brackets in packaging make targets * Fix for LIKWID collector with separate measurement thread and inotify watcher on the LIKWID lock (#97) * Debian does not like underscores in the version * Update cc-metric-collector.service Remove dependency services not used by cc-metric-collector * Add new requirements to module file * Use customcmd commands if they did not error. (#101) * Merge develop and main (#99) * InfiniBandCollector: Scale raw readings from octets to bytes * Fix clock frequency coming from LikwidCollector and update docs * Build DEB package for Ubuntu 20.04 for releases * Fix memstat collector with numa_stats option * Remove useless prints from MemstatCollector * Replace ioutils with os and io (#87) * Use lower case for error strings in RocmSmiCollector * move maybe-usable-by-other-cc-components to pkg. Fix all files to use the new paths (#88) * Add collector for monitoring the execution of cc-metric-collector itself (#81) * Add collector to monitor execution of cc-metric-collector itself * Register SelfCollector * Fix import paths for moved packages * Check if at least one CPU with frequency information was detected * Correct type: /proc/stats -> /proc/stat * Update README.md * Run ipmitool asynchron. Improved error handling. * Corrected some typos * Add running average power limit (RAPL) metric collector * Add running average power limit (RAPL) metric collector * Do not mess up with the orignal configuration * * Corrected json config in numastatsMetric.md * Added some debug output to numastatsMetric.go * Fixed computing number of physical packages for non continous physical package IDs (e.g. on Ampere Altra Q80-30) * Fix kernel panic for receiver config with missing receiver type * Add receiver to gather remote IPMI sensor metrics * Added config option to add ipmi-sensors command line options * Add documentaion for IPMI receiver * Update to latest version of included go modules * Add go.mod to App dependency * Try to use common metric tags across hardware vendors * Add IPMI metric: current * remove prefix enumeration like 01-... * Add IPMI receiver example configuration to receivers.json * Minimal formating changes * Add hostlist package * Added tests for hostlist Expand() * Use package hostlist to expand a host list * Use package hostlist to expand a host list * Some servers return "ConsumedPowerWatt":65535 instead of "ConsumedPowerWatt":null * Updated to latest package versions * Do not allow unknown fields in JSON configuration file * Add workflow to customize packages to docs * NFS I/O Stats Collector (#91) * Initial version * Delete values for vanished mount points and comments * Fix for Likwid collector (#95) * Run LIKWID in separate thread and check metric type * Change LIKWID collector documentation to use 'type' instead of 'scope' * Re-initialize LIKWID after one read is missing due to lock toggle * Register cc-metric-collector at Zenodo (#93) * Add initial version of Zenodo project file * Orcid ID added * Update .zenodo.json Co-authored-by: Holger Obermaier * Update ipmiMetric.go * Use latest LIKWID version for builds * Update README.md * Remove development stuff from Makefile * Add Requires(pre) to RPM SPEC file * Use curly brackets in packaging make targets * Fix for LIKWID collector with separate measurement thread and inotify watcher on the LIKWID lock (#97) Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Co-authored-by: Holger Obermaier * Update likwid_perfgroup_to_cc_config.py * Use customcmd commands if they did not error. --------- Co-authored-by: Thomas Gruber Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Co-authored-by: Holger Obermaier --------- Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Co-authored-by: Holger Obermaier Co-authored-by: fodinabor <5982050+fodinabor@users.noreply.github.com> --- Makefile | 1 - collectors/customCmdMetric.go | 2 +- go.mod | 13 +++++++------ scripts/cc-metric-collector.service | 1 - 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index b010cd8..8b62dab 100644 --- a/Makefile +++ b/Makefile @@ -113,7 +113,6 @@ DEB: scripts/cc-metric-collector.deb.control $(APP) @VERS=$$(git describe --tags --abbrev=0 $${COMMITISH}) @if [ -z "$${VERS}" ]; then VERS=${GITHUB_REF_NAME}; fi @VERS=$${VERS#v} - @VERS=$$(echo $${VERS} | sed -e s+'-'+'_'+g) @ARCH=$$(uname -m) @ARCH=$$(echo $${ARCH} | sed -e s+'_'+'-'+g) @if [ "$${ARCH}" = "x86-64" ]; then ARCH=amd64; fi diff --git a/collectors/customCmdMetric.go b/collectors/customCmdMetric.go index e150014..e89ea54 100644 --- a/collectors/customCmdMetric.go +++ b/collectors/customCmdMetric.go @@ -48,7 +48,7 @@ func (m *CustomCmdCollector) Init(config json.RawMessage) error { command := exec.Command(cmdfields[0], strings.Join(cmdfields[1:], " ")) command.Wait() _, err = command.Output() - if err != nil { + if err == nil { m.commands = append(m.commands, c) } } diff --git a/go.mod b/go.mod index 9a9bf4b..3908213 100644 --- a/go.mod +++ b/go.mod @@ -10,14 +10,15 @@ require ( github.com/gorilla/mux v1.8.0 github.com/influxdata/influxdb-client-go/v2 v2.12.1 github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf - github.com/nats-io/nats.go v1.21.0 + github.com/nats-io/nats.go v1.22.1 github.com/prometheus/client_golang v1.14.0 github.com/stmcginnis/gofish v0.13.0 github.com/tklauser/go-sysconf v0.3.11 + golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 golang.org/x/sys v0.3.0 + gopkg.in/fsnotify.v0 v0.9.3 ) - require ( github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -31,11 +32,11 @@ require ( github.com/nats-io/nuid v1.0.1 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_model v0.3.0 // indirect - github.com/prometheus/common v0.37.0 // indirect - github.com/prometheus/procfs v0.8.0 // indirect + github.com/prometheus/common v0.39.0 // indirect + github.com/prometheus/procfs v0.9.0 // indirect github.com/shopspring/decimal v1.3.1 // indirect github.com/tklauser/numcpus v0.6.0 // indirect - golang.org/x/crypto v0.3.0 // indirect - golang.org/x/net v0.3.0 // indirect + golang.org/x/crypto v0.4.0 // indirect + golang.org/x/net v0.4.0 // indirect google.golang.org/protobuf v1.28.1 // indirect ) diff --git a/scripts/cc-metric-collector.service b/scripts/cc-metric-collector.service index d40580c..9c2fd1e 100644 --- a/scripts/cc-metric-collector.service +++ b/scripts/cc-metric-collector.service @@ -3,7 +3,6 @@ Description=ClusterCockpit metric collector Documentation=https://github.com/ClusterCockpit/cc-metric-collector Wants=network-online.target After=network-online.target -After=postgresql.service mariadb.service mysql.service [Service] EnvironmentFile=/etc/default/cc-metric-collector