Merge latest development changes to main branch (#79)

* Cleanup: Remove unused code

* Use Golang duration parser for 'interval' and 'duration'
 in main config

* Update handling of LIKWID headers. Download only if not already present in the system. Fixes #73

* Units with cc-units (#64)

* Add option to normalize units with cc-unit

* Add unit conversion to router

* Add option to change unit prefix in the router

* Add to MetricRouter README

* Add order of operations in router to README

* Use second add_tags/del_tags only if metric gets renamed

* Skip disks in DiskstatCollector that have size=0

* Check readability of sensor files in TempCollector

* Fix for --once option

* Rename `cpu` type to `hwthread` (#69)

* Rename 'cpu' type to 'hwthread' to avoid naming clashes with MetricStore and CC-Webfrontend

* Collectors in parallel (#74)

* Provide info to CollectorManager whether the collector can be executed in parallel with others

* Split serial and parallel collectors. Read in parallel first

* Update NvidiaCollector with new metrics, MIG and NvLink support (#75)

* CC topology module update (#76)

* Rename CPU to hardware thread, write some comments

* Do renaming in other parts

* Remove CpuList and SocketList function from metricCollector. Available in ccTopology

* Option to use MIG UUID as subtype-id in NvidiaCollector

* Option to use MIG slice name as subtype-id in NvidiaCollector

* MetricRouter: Fix JSON in README

* Fix for Github Action to really use the selected version

* Remove Ganglia installation in runonce Action and add Go 1.18

* Fix daemon options in init script

* Add separate go.mod files to use it with deprecated 1.16

* Minor updates for Makefiles

* fix string comparison

* AMD ROCm SMI collector (#77)

* Add collector for AMD ROCm SMI metrics

* Fix import path

* Fix imports

* Remove Board Number

* store GPU index explicitly

* Remove board number from description

* Use http instead of ftp to download likwid

* Fix serial number in rocmCollector

* Improved http sink (#78)

* automatic flush in NatsSink

* tweak default options of HttpSink

* shorter cirt. section and retries for HttpSink

* fix error handling

* Remove file added by mistake.

* Use http instead of ftp to download likwid

* Fix serial number in rocmCollector

Co-authored-by: Thomas Roehl <thomas.roehl@fau.de>

Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
Co-authored-by: Lou <lou.knauer@gmx.de>
This commit is contained in:
Thomas Gruber
2022-06-08 15:25:40 +02:00
committed by GitHub
parent 186a62a86b
commit 8d85bd53f1
51 changed files with 2097 additions and 705 deletions

View File

@@ -246,7 +246,7 @@ func matchfunc(args ...interface{}) (interface{}, error) {
func getCpuCoreFunc(args ...interface{}) (interface{}, error) {
switch cpuid := args[0].(type) {
case int:
return topo.GetCpuCore(cpuid), nil
return topo.GetHwthreadCore(cpuid), nil
}
return -1, errors.New("function 'getCpuCore' accepts only an 'int' cpuid")
}
@@ -255,7 +255,7 @@ func getCpuCoreFunc(args ...interface{}) (interface{}, error) {
func getCpuSocketFunc(args ...interface{}) (interface{}, error) {
switch cpuid := args[0].(type) {
case int:
return topo.GetCpuSocket(cpuid), nil
return topo.GetHwthreadSocket(cpuid), nil
}
return -1, errors.New("function 'getCpuCore' accepts only an 'int' cpuid")
}
@@ -264,7 +264,7 @@ func getCpuSocketFunc(args ...interface{}) (interface{}, error) {
func getCpuNumaDomainFunc(args ...interface{}) (interface{}, error) {
switch cpuid := args[0].(type) {
case int:
return topo.GetCpuNumaDomain(cpuid), nil
return topo.GetHwthreadNumaDomain(cpuid), nil
}
return -1, errors.New("function 'getCpuNuma' accepts only an 'int' cpuid")
}
@@ -273,7 +273,7 @@ func getCpuNumaDomainFunc(args ...interface{}) (interface{}, error) {
func getCpuDieFunc(args ...interface{}) (interface{}, error) {
switch cpuid := args[0].(type) {
case int:
return topo.GetCpuDie(cpuid), nil
return topo.GetHwthreadDie(cpuid), nil
}
return -1, errors.New("function 'getCpuDie' accepts only an 'int' cpuid")
}
@@ -336,7 +336,7 @@ func getCpuListOfDieFunc(args ...interface{}) (interface{}, error) {
// wrapper function to get a list of all cpuids of the node
func getCpuListOfNode(args ...interface{}) (interface{}, error) {
return topo.CpuList(), nil
return topo.HwthreadList(), nil
}
// helper function to get the cpuid list for a CCMetric type tag set (type and type-id)
@@ -348,14 +348,14 @@ func getCpuListOfType(args ...interface{}) (interface{}, error) {
case string:
switch typ {
case "node":
return topo.CpuList(), nil
return topo.HwthreadList(), nil
case "socket":
return getCpuListOfSocketFunc(args[1])
case "numadomain":
return getCpuListOfNumaDomainFunc(args[1])
case "core":
return getCpuListOfCoreFunc(args[1])
case "cpu":
case "hwthread":
var cpu int
switch id := args[1].(type) {