Re-initialize LIKWID after one read is missing due to lock toggle

This commit is contained in:
Thomas Roehl 2022-12-14 16:36:52 +01:00
parent 8f372f0274
commit 35b25a6ce8

View File

@ -82,6 +82,7 @@ type LikwidCollector struct {
basefreq float64 basefreq float64
running bool running bool
initialized bool initialized bool
needs_reinit bool
likwidGroups map[C.int]LikwidEventsetConfig likwidGroups map[C.int]LikwidEventsetConfig
lock sync.Mutex lock sync.Mutex
measureThread thread.Thread measureThread thread.Thread
@ -193,6 +194,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
m.name = "LikwidCollector" m.name = "LikwidCollector"
m.parallel = false m.parallel = false
m.initialized = false m.initialized = false
m.needs_reinit = true
m.running = false m.running = false
m.config.AccessMode = LIKWID_DEF_ACCESSMODE m.config.AccessMode = LIKWID_DEF_ACCESSMODE
m.config.LibraryPath = LIKWID_LIB_NAME m.config.LibraryPath = LIKWID_LIB_NAME
@ -299,6 +301,7 @@ func (m *LikwidCollector) takeMeasurement(evset LikwidEventsetConfig, interval t
if ret != 0 { if ret != 0 {
var err error = nil var err error = nil
var skip bool = false var skip bool = false
cclog.ComponentDebug(m.name, "Setup returns", ret)
if ret == -37 { if ret == -37 {
skip = true skip = true
} else { } else {
@ -353,7 +356,6 @@ func (m *LikwidCollector) takeMeasurement(evset LikwidEventsetConfig, interval t
for _, tid := range m.cpu2tid { for _, tid := range m.cpu2tid {
evset.results[tid]["time"] = runtime evset.results[tid]["time"] = runtime
} }
} }
m.lock.Unlock() m.lock.Unlock()
return false, nil return false, nil
@ -458,6 +460,28 @@ func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration, output chan
return nil return nil
} }
func (m *LikwidCollector) ReInit() error {
C.perfmon_finalize()
ret := C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])
if ret != 0 {
return nil
}
for i, evset := range m.config.Eventsets {
var gid C.int
if len(evset.Events) > 0 {
//skip := false
likwidGroup := genLikwidEventSet(evset)
gid = C.perfmon_addEventSet(likwidGroup.estr)
if gid >= 0 {
likwidGroup.gid = gid
likwidGroup.internal = i
m.likwidGroups[gid] = likwidGroup
}
}
}
return nil
}
func (m *LikwidCollector) LateInit() error { func (m *LikwidCollector) LateInit() error {
var ret C.int var ret C.int
if m.initialized { if m.initialized {
@ -498,48 +522,53 @@ func (m *LikwidCollector) LateInit() error {
m.basefreq = getBaseFreq() m.basefreq = getBaseFreq()
cclog.ComponentDebug(m.name, "BaseFreq", m.basefreq) cclog.ComponentDebug(m.name, "BaseFreq", m.basefreq)
cclog.ComponentDebug(m.name, "initialize LIKWID perfmon module") if m.needs_reinit {
ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0]) m.ReInit()
if ret != 0 { m.needs_reinit = false
var err error = nil
C.topology_finalize()
if ret != -22 {
err = errors.New("failed to initialize LIKWID perfmon")
cclog.ComponentError(m.name, err.Error())
} else {
err = errors.New("access to LIKWID perfmon locked")
}
return err
} }
// While adding the events, we test the metrics whether they can be computed at all // cclog.ComponentDebug(m.name, "initialize LIKWID perfmon module")
for i, evset := range m.config.Eventsets { // ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])
var gid C.int // if ret != 0 {
if len(evset.Events) > 0 { // var err error = nil
skip := false // C.topology_finalize()
likwidGroup := genLikwidEventSet(evset) // if ret != -22 {
for _, g := range m.likwidGroups { // err = errors.New("failed to initialize LIKWID perfmon")
if likwidGroup.go_estr == g.go_estr { // cclog.ComponentError(m.name, err.Error())
skip = true // } else {
break // err = errors.New("access to LIKWID perfmon locked")
} // }
} // return err
if skip { // }
continue
}
// Now we add the list of events to likwid
gid = C.perfmon_addEventSet(likwidGroup.estr)
if gid >= 0 {
likwidGroup.gid = gid
likwidGroup.internal = i
m.likwidGroups[gid] = likwidGroup
}
} else {
cclog.ComponentError(m.name, "Invalid Likwid eventset config, no events given")
continue
}
} // // While adding the events, we test the metrics whether they can be computed at all
// for i, evset := range m.config.Eventsets {
// var gid C.int
// if len(evset.Events) > 0 {
// //skip := false
// likwidGroup := genLikwidEventSet(evset)
// // for _, g := range m.likwidGroups {
// // if likwidGroup.go_estr == g.go_estr {
// // skip = true
// // break
// // }
// // }
// // if skip {
// // continue
// // }
// // Now we add the list of events to likwid
// gid = C.perfmon_addEventSet(likwidGroup.estr)
// if gid >= 0 {
// likwidGroup.gid = gid
// likwidGroup.internal = i
// m.likwidGroups[gid] = likwidGroup
// }
// } else {
// cclog.ComponentError(m.name, "Invalid Likwid eventset config, no events given")
// continue
// }
// }
// If no event set could be added, shut down LikwidCollector // If no event set could be added, shut down LikwidCollector
if len(m.likwidGroups) == 0 { if len(m.likwidGroups) == 0 {
@ -606,6 +635,10 @@ func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric)
// use the event set metrics to derive the global metrics // use the event set metrics to derive the global metrics
m.calcGlobalMetrics(time, output) m.calcGlobalMetrics(time, output)
} }
if skip {
m.needs_reinit = true
m.initialized = false
}
} }
}) })
} }