Rename cpu type to hwthread (#69)

* Rename 'cpu' type to 'hwthread' to avoid naming clashes with MetricStore and CC-Webfrontend
This commit is contained in:
Thomas Gruber 2022-05-13 14:09:45 +02:00 committed by GitHub
parent 0623691bab
commit 1db5f3b29a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 56 additions and 19 deletions

View File

@ -1,6 +1,8 @@
{
"testoutput" : {
"type" : "stdout",
"meta_as_tags" : true
"meta_as_tags" : [
"unit"
]
}
}

View File

@ -150,7 +150,7 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
t.numNonHT = numNonHT
t.numNonHT_int = numNonHT_int
t.tagSet = map[string]string{
"type": "cpu",
"type": "hwthread",
"type-id": t.processor,
"package_id": t.physicalPackageID,
}

View File

@ -4,7 +4,7 @@
"cpufreq_cpuinfo": {}
```
The `cpufreq_cpuinfo` collector reads the clock frequency from `/proc/cpuinfo` and outputs a handful **cpu** metrics.
The `cpufreq_cpuinfo` collector reads the clock frequency from `/proc/cpuinfo` and outputs a handful **hwthread** metrics.
Metrics:
* `cpufreq`

View File

@ -161,7 +161,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
t.numNonHT = numNonHT
t.numNonHT_int = numNonHT_int
t.tagSet = map[string]string{
"type": "cpu",
"type": "hwthread",
"type-id": t.processor,
"package_id": t.physicalPackageID,
}

View File

@ -5,7 +5,7 @@
}
```
The `cpufreq` collector reads the clock frequency from `/sys/devices/system/cpu/cpu*/cpufreq` and outputs a handful **cpu** metrics.
The `cpufreq` collector reads the clock frequency from `/sys/devices/system/cpu/cpu*/cpufreq` and outputs a handful **hwthread** metrics.
Metrics:
* `cpufreq`

View File

@ -82,7 +82,7 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
cpustr := strings.TrimLeft(linefields[0], "cpu")
cpu, _ := strconv.Atoi(cpustr)
m.cputags[linefields[0]] = map[string]string{"type": "cpu", "type-id": fmt.Sprintf("%d", cpu)}
m.cputags[linefields[0]] = map[string]string{"type": "hwthread", "type-id": fmt.Sprintf("%d", cpu)}
num_cpus++
}
}

View File

@ -19,7 +19,7 @@ The `likwid` collector is probably the most complicated collector. The LIKWID li
"calc": "COUNTER0 + COUNTER1",
"publish": false,
"unit": "myunit",
"type": "cpu"
"type": "hwthread"
}
]
}
@ -30,7 +30,7 @@ The `likwid` collector is probably the most complicated collector. The LIKWID li
"calc": "sum_01",
"publish": true,
"unit": "myunit",
"type": "cpu"
"type": "hwthread"
}
]
}
@ -51,15 +51,15 @@ Additional options:
Hardware performance counters are scattered all over the system nowadays. A counter coveres a specific part of the system. While there are hardware thread specific counter for CPU cycles, instructions and so on, some others are specific for a whole CPU socket/package. To address that, the LikwidCollector provides the specification of a `type` for each metric.
- `cpu` : One metric per CPU hardware thread with the tags `"type" : "cpu"` and `"type-id" : "$cpu_id"`
- `hwthread` : One metric per CPU hardware thread with the tags `"type" : "hwthread"` and `"type-id" : "$hwthread_id"`
- `socket` : One metric per CPU socket/package with the tags `"type" : "socket"` and `"type-id" : "$socket_id"`
**Note:** You should not specify the `socket` type for a metric that is measured at `cpu` scope and vice versa, so some kind of expert knowledge or lookup work in the [Likwid Wiki](https://github.com/RRZE-HPC/likwid/wiki) is required. Get the scope of each counter from the *Architecture* pages and as soon as one counter in a metric is socket-specific, the whole metric is socket-specific.
**Note:** You cannot specify `socket` scope for a metric that is measured at `hwthread` scope, so some kind of expert knowledge or lookup work in the [Likwid Wiki](https://github.com/RRZE-HPC/likwid/wiki) is required. Get the scope of each counter from the *Architecture* pages and as soon as one counter in a metric is socket-specific, the whole metric is socket-specific.
As a guideline:
- All counters `FIXCx`, `PMCy` and `TMAz` have the scope `cpu`
- All counters `FIXCx`, `PMCy` and `TMAz` have the scope `hwthread`
- All counters names containing `BOX` have the scope `socket`
- All `PWRx` counters have scope `socket`, except `"PWR1" : "RAPL_CORE_ENERGY"` has `cpu` scope (AMD Zen)
- All `PWRx` counters have scope `socket`, except `"PWR1" : "RAPL_CORE_ENERGY"` has `hwthread` scope
- All `DFCx` counters have scope `socket`
### Help with the configuration
@ -90,7 +90,7 @@ $ scripts/likwid_perfgroup_to_cc_config.py ICX MEM_DP
"name": "Runtime (RDTSC) [s]",
"publish": true,
"unit": "seconds"
"scope": "cpu"
"scope": "hwthread"
},
{
"..." : "..."
@ -147,20 +147,20 @@ One might think this does not happen often but often used metrics in the world o
{
"name": "ipc",
"calc": "PMC0/PMC1",
"type": "cpu",
"type": "hwthread",
"publish": true
},
{
"name": "flops_any",
"calc": "0.000001*PMC2/time",
"unit": "MFlops/s",
"type": "cpu",
"type": "hwthread",
"publish": true
},
{
"name": "clock",
"calc": "0.000001*(FIXC1/FIXC2)/inverseClock",
"type": "cpu",
"type": "hwthread",
"unit": "MHz",
"publish": true
},
@ -219,3 +219,33 @@ One might think this does not happen often but often used metrics in the world o
}
```
### How to get the eventsets and metrics from LIKWID
The `likwid` collector reads hardware performance counters at a **hwthread** and **socket** level. The configuration looks quite complicated but it is basically copy&paste from [LIKWID's performance groups](https://github.com/RRZE-HPC/likwid/tree/master/groups). The collector made multiple iterations and tried to use the performance groups but it lacked flexibility. The current way of configuration provides most flexibility.
The logic is as following: There are multiple eventsets, each consisting of a list of counters+events and a list of metrics. If you compare a common performance group with the example setting above, there is not much difference:
```
EVENTSET -> "events": {
FIXC1 ACTUAL_CPU_CLOCK -> "FIXC1": "ACTUAL_CPU_CLOCK",
FIXC2 MAX_CPU_CLOCK -> "FIXC2": "MAX_CPU_CLOCK",
PMC0 RETIRED_INSTRUCTIONS -> "PMC0" : "RETIRED_INSTRUCTIONS",
PMC1 CPU_CLOCKS_UNHALTED -> "PMC1" : "CPU_CLOCKS_UNHALTED",
PMC2 RETIRED_SSE_AVX_FLOPS_ALL -> "PMC2": "RETIRED_SSE_AVX_FLOPS_ALL",
PMC3 MERGE -> "PMC3": "MERGE",
-> }
```
The metrics are following the same procedure:
```
METRICS -> "metrics": [
IPC PMC0/PMC1 -> {
-> "name" : "IPC",
-> "calc" : "PMC0/PMC1",
-> "scope": "hwthread",
-> "publish": true
-> }
-> ]
```
The script `scripts/likwid_perfgroup_to_cc_config.py` might help you.

View File

@ -42,7 +42,12 @@ func (m *SampleCollector) Init(config json.RawMessage) error {
// The 'type' tag is always needed, it defines the granulatity of the metric
// node -> whole system
// socket -> CPU socket (requires socket ID as 'type-id' tag)
// cpu -> single CPU hardware thread (requires cpu ID as 'type-id' tag)
// die -> CPU die (requires CPU die ID as 'type-id' tag)
// memoryDomain -> NUMA domain (requires NUMA domain ID as 'type-id' tag)
// llc -> Last level cache (requires last level cache ID as 'type-id' tag)
// core -> single CPU core that may consist of multiple hardware threads (SMT) (requires core ID as 'type-id' tag)
// hwthtread -> single CPU hardware thread (requires hardware thread ID as 'type-id' tag)
// accelerator -> A accelerator device like GPU or FPGA (requires an accelerator ID as 'type-id' tag)
m.tags = map[string]string{"type": "node"}
// Read in the JSON configuration
if len(config) > 0 {

View File

@ -355,7 +355,7 @@ func getCpuListOfType(args ...interface{}) (interface{}, error) {
return getCpuListOfNumaDomainFunc(args[1])
case "core":
return getCpuListOfCoreFunc(args[1])
case "cpu":
case "hwthread":
var cpu int
switch id := args[1].(type) {

View File

@ -39,7 +39,7 @@ def group_to_json(groupfile):
llist = re.split("\s+", line)
calc = llist[-1]
metric = " ".join(llist[:-1])
scope = "cpu"
scope = "hwthread"
if "BOX" in calc:
scope = "socket"
if "PWR" in calc: