Switch access mode of LikwidCollector in config file

This commit is contained in:
Thomas Roehl 2022-03-03 13:03:58 +01:00
parent 276c00442a
commit 60de21c41e
2 changed files with 26 additions and 5 deletions

View File

@ -2,7 +2,7 @@ package collectors
/* /*
#cgo CFLAGS: -I./likwid #cgo CFLAGS: -I./likwid
#cgo LDFLAGS: -L./likwid -llikwid -llikwid-hwloc -lm -Wl,--unresolved-symbols=ignore-in-object-files #cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#include <stdlib.h> #include <stdlib.h>
#include <likwid.h> #include <likwid.h>
*/ */
@ -73,6 +73,7 @@ func GetAllMetricScopes() []MetricScope {
const ( const (
LIKWID_LIB_NAME = "liblikwid.so" LIKWID_LIB_NAME = "liblikwid.so"
LIKWID_LIB_DL_FLAGS = dl.RTLD_LAZY | dl.RTLD_GLOBAL LIKWID_LIB_DL_FLAGS = dl.RTLD_LAZY | dl.RTLD_GLOBAL
LIKWID_DEF_ACCESSMODE = "direct"
) )
type LikwidCollectorMetricConfig struct { type LikwidCollectorMetricConfig struct {
@ -95,6 +96,8 @@ type LikwidCollectorConfig struct {
Metrics []LikwidCollectorMetricConfig `json:"globalmetrics,omitempty"` Metrics []LikwidCollectorMetricConfig `json:"globalmetrics,omitempty"`
ForceOverwrite bool `json:"force_overwrite,omitempty"` ForceOverwrite bool `json:"force_overwrite,omitempty"`
InvalidToZero bool `json:"invalid_to_zero,omitempty"` InvalidToZero bool `json:"invalid_to_zero,omitempty"`
AccessMode string `json:"access_mode,omitempty"`
DaemonPath string `json:"accessdaemon_path,omitempty"`
} }
type LikwidCollector struct { type LikwidCollector struct {
@ -260,6 +263,7 @@ func (m *LikwidCollector) getResponsiblities() map[MetricScope]map[int]int {
func (m *LikwidCollector) Init(config json.RawMessage) error { func (m *LikwidCollector) Init(config json.RawMessage) error {
var ret C.int var ret C.int
m.name = "LikwidCollector" m.name = "LikwidCollector"
m.config.AccessMode = LIKWID_DEF_ACCESSMODE
if len(config) > 0 { if len(config) > 0 {
err := json.Unmarshal(config, &m.config) err := json.Unmarshal(config, &m.config)
if err != nil { if err != nil {
@ -270,6 +274,11 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
if lib == nil { if lib == nil {
return fmt.Errorf("error instantiating DynamicLibrary for %s", LIKWID_LIB_NAME) return fmt.Errorf("error instantiating DynamicLibrary for %s", LIKWID_LIB_NAME)
} }
err := lib.Open()
if err != nil {
return fmt.Errorf("error opening %s: %v", LIKWID_LIB_NAME, err)
}
if m.config.ForceOverwrite { if m.config.ForceOverwrite {
cclog.ComponentDebug(m.name, "Set LIKWID_FORCE=1") cclog.ComponentDebug(m.name, "Set LIKWID_FORCE=1")
os.Setenv("LIKWID_FORCE", "1") os.Setenv("LIKWID_FORCE", "1")
@ -301,6 +310,16 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
m.initGranularity() m.initGranularity()
// Generate map for MetricScope -> scope_id (like socket id) -> responsible id (offset in cpulist) // Generate map for MetricScope -> scope_id (like socket id) -> responsible id (offset in cpulist)
m.scopeRespTids = m.getResponsiblities() m.scopeRespTids = m.getResponsiblities()
switch m.config.AccessMode {
case "direct":
C.HPMmode(0)
case "accessdaemon":
if len(m.config.DaemonPath) > 0 {
p := os.Getenv("PATH")
os.Setenv("PATH", m.config.DaemonPath+":"+p)
}
C.HPMmode(1)
}
cclog.ComponentDebug(m.name, "initialize LIKWID perfmon module") cclog.ComponentDebug(m.name, "initialize LIKWID perfmon module")
ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0]) ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])

View File

@ -8,6 +8,8 @@ The `likwid` configuration consists of two parts, the "eventsets" and "globalmet
- The global metrics are metrics which require data from all event set measurements to be derived. The inputs are the metrics in the event sets. Similar to the metrics in the event sets, the global metrics are defined by a name, a formula, a scope and a publish flag. See event set metrics for details. The only difference is that there is no access to the raw event measurements anymore but only to the metrics. So, the idea is to derive a metric in the "eventsets" section and reuse it in the "globalmetrics" part. If you need a metric only for deriving the global metrics, disable forwarding of the event set metrics. **Be aware** that the combination might be misleading because the "behavior" of a metric changes over time and the multiple measurements might count different computing phases. - The global metrics are metrics which require data from all event set measurements to be derived. The inputs are the metrics in the event sets. Similar to the metrics in the event sets, the global metrics are defined by a name, a formula, a scope and a publish flag. See event set metrics for details. The only difference is that there is no access to the raw event measurements anymore but only to the metrics. So, the idea is to derive a metric in the "eventsets" section and reuse it in the "globalmetrics" part. If you need a metric only for deriving the global metrics, disable forwarding of the event set metrics. **Be aware** that the combination might be misleading because the "behavior" of a metric changes over time and the multiple measurements might count different computing phases.
Additional options: Additional options:
- `access_mode` : Method to use for hardware performance monitoring (`direct` access as root user, `accessdaemon` for the daemon mode)
- `accessdaemon_path`: Folder with the access daemon `likwid-accessD`, commonly `$LIKWID_INSTALL_LOC/sbin`
- `force_overwrite`: Same as setting `LIKWID_FORCE=1`. In case counters are already in-use, LIKWID overwrites their configuration to do its measurements - `force_overwrite`: Same as setting `LIKWID_FORCE=1`. In case counters are already in-use, LIKWID overwrites their configuration to do its measurements
- `invalid_to_zero`: In some cases, the calculations result in `NaN` or `Inf`. With this option, all `NaN` and `Inf` values are replaces with `0.0`. - `invalid_to_zero`: In some cases, the calculations result in `NaN` or `Inf`. With this option, all `NaN` and `Inf` values are replaces with `0.0`.
@ -69,12 +71,12 @@ LIKWID checks the file `/var/run/likwid.lock` before performing any interfering
Before (SLURM prolog, ...) Before (SLURM prolog, ...)
``` ```
$ chwon $USER /var/run/likwid.lock $ chwon $JOBUSER /var/run/likwid.lock
``` ```
After (SLURM epilog, ...) After (SLURM epilog, ...)
``` ```
$ chwon root /var/run/likwid.lock $ chwon $CCUSER /var/run/likwid.lock
``` ```
### Example configuration ### Example configuration