mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-01-28 06:45:16 +01:00
ec570f884c
* Merge develop and main (#99) * InfiniBandCollector: Scale raw readings from octets to bytes * Fix clock frequency coming from LikwidCollector and update docs * Build DEB package for Ubuntu 20.04 for releases * Fix memstat collector with numa_stats option * Remove useless prints from MemstatCollector * Replace ioutils with os and io (#87) * Use lower case for error strings in RocmSmiCollector * move maybe-usable-by-other-cc-components to pkg. Fix all files to use the new paths (#88) * Add collector for monitoring the execution of cc-metric-collector itself (#81) * Add collector to monitor execution of cc-metric-collector itself * Register SelfCollector * Fix import paths for moved packages * Check if at least one CPU with frequency information was detected * Correct type: /proc/stats -> /proc/stat * Update README.md * Run ipmitool asynchron. Improved error handling. * Corrected some typos * Add running average power limit (RAPL) metric collector * Add running average power limit (RAPL) metric collector * Do not mess up with the orignal configuration * * Corrected json config in numastatsMetric.md * Added some debug output to numastatsMetric.go * Fixed computing number of physical packages for non continous physical package IDs (e.g. on Ampere Altra Q80-30) * Fix kernel panic for receiver config with missing receiver type * Add receiver to gather remote IPMI sensor metrics * Added config option to add ipmi-sensors command line options * Add documentaion for IPMI receiver * Update to latest version of included go modules * Add go.mod to App dependency * Try to use common metric tags across hardware vendors * Add IPMI metric: current * remove prefix enumeration like 01-... * Add IPMI receiver example configuration to receivers.json * Minimal formating changes * Add hostlist package * Added tests for hostlist Expand() * Use package hostlist to expand a host list * Use package hostlist to expand a host list * Some servers return "ConsumedPowerWatt":65535 instead of "ConsumedPowerWatt":null * Updated to latest package versions * Do not allow unknown fields in JSON configuration file * Add workflow to customize packages to docs * NFS I/O Stats Collector (#91) * Initial version * Delete values for vanished mount points and comments * Fix for Likwid collector (#95) * Run LIKWID in separate thread and check metric type * Change LIKWID collector documentation to use 'type' instead of 'scope' * Re-initialize LIKWID after one read is missing due to lock toggle * Register cc-metric-collector at Zenodo (#93) * Add initial version of Zenodo project file * Orcid ID added * Update .zenodo.json Co-authored-by: Holger Obermaier <holger.obermaier@kit.edu> * Update ipmiMetric.go * Use latest LIKWID version for builds * Update README.md * Remove development stuff from Makefile * Add Requires(pre) to RPM SPEC file * Use curly brackets in packaging make targets * Fix for LIKWID collector with separate measurement thread and inotify watcher on the LIKWID lock (#97) Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu> * Update likwid_perfgroup_to_cc_config.py * Use customcmd commands if they did not error. --------- Co-authored-by: Thomas Gruber <Thomas.Roehl@googlemail.com> Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu>
84 lines
2.4 KiB
Python
Executable File
84 lines
2.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import os, os.path, sys, getopt, re, json
|
|
|
|
def which(cmd):
|
|
ospath = os.environ.get("PATH", "")
|
|
for p in ospath.split(":"):
|
|
testcmd = os.path.join(p, cmd)
|
|
if os.access(testcmd, os.X_OK):
|
|
return testcmd
|
|
return None
|
|
|
|
def group_to_json(groupfile):
|
|
gdata = []
|
|
with open(groupfile, "r") as fp:
|
|
gdata = fp.read().strip().split("\n")
|
|
events = {}
|
|
metrics = []
|
|
parse_events = False
|
|
parse_metrics = False
|
|
for line in gdata:
|
|
if line == "EVENTSET":
|
|
parse_events = True
|
|
parse_metrics = False
|
|
continue
|
|
if line == "METRICS":
|
|
parse_events = False
|
|
parse_metrics = True
|
|
continue
|
|
if len(line) == 0 or line.startswith("SHORT") or line == "LONG":
|
|
parse_events = False
|
|
parse_metrics = False
|
|
continue
|
|
if parse_events:
|
|
m = re.match("([\w\d]+)\s+([\w\d_]+)", line)
|
|
if m:
|
|
events[m.group(1)] = m.group(2)
|
|
if parse_metrics:
|
|
llist = re.split("\s+", line)
|
|
calc = llist[-1]
|
|
metric = " ".join(llist[:-1])
|
|
scope = "hwthread"
|
|
if "BOX" in calc:
|
|
scope = "socket"
|
|
if "PWR" in calc:
|
|
scope = "socket"
|
|
|
|
m = {"name" : metric, "calc": calc, "type" : scope, "publish" : True}
|
|
metrics.append(m)
|
|
return {"events" : events, "metrics" : metrics}
|
|
|
|
if len(sys.argv) != 3:
|
|
print("Usage: $0 <likwid-arch> <group-name>")
|
|
sys.exit(1)
|
|
|
|
|
|
arch = sys.argv[1]
|
|
group = sys.argv[2]
|
|
|
|
ltopo = which("likwid-topology")
|
|
if not ltopo:
|
|
print("Cannot find LIKWID installation. Please add LIKWID bin folder to your PATH.")
|
|
sys.exit(1)
|
|
|
|
bindir = os.path.dirname(ltopo)
|
|
|
|
groupdir = os.path.normpath(os.path.join(bindir, "../share/likwid/perfgroups"))
|
|
if not os.path.exists(groupdir):
|
|
print("Cannot find LIKWID performance groups in default install location")
|
|
sys.exit(1)
|
|
|
|
archdir = os.path.join(groupdir, arch)
|
|
if not os.path.exists(archdir):
|
|
print("Cannot find LIKWID performance groups for architecture {}".format(arch))
|
|
sys.exit(1)
|
|
|
|
groupfile = os.path.join(archdir, "{}.txt".format(group))
|
|
if not os.path.exists(groupfile):
|
|
print("Cannot find LIKWID performance group {} for architecture {}".format(group, arch))
|
|
sys.exit(1)
|
|
|
|
gdata = group_to_json(groupfile)
|
|
print(json.dumps(gdata, sort_keys=True, indent=2))
|