mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-07-19 03:11:41 +02:00
Merge develop branch into main (#106)
* Add cpu_used (all-cpu_idle) to CpustatCollector * Update to line-protocol/v2 * Update runonce.yml with Golang 1.20 * Update fsnotify in LIKWID Collector * Use not a pointer to line-protocol.Encoder * Simplify Makefile * Use only as many arguments as required * Allow sum function to handle non float types * Allow values to be a slice of type float64, float32, int, int64, int32, bool * Use generic function to simplify code * Add missing case for type []int32 * Use generic function to compute minimum * Use generic function to compute maximum * Use generic function to compute average * Add error value to sumAnyType * Use generic function to compute median * For older versions of go slices is not part of the installation * Remove old entries from go.sum * Use simpler sort function * Compute metrics ib_total and ib_total_pkts * Add aggregated metrics. Add missing units * Update likwidMetric.go Fixes a potential bug when `fsnotify.NewWatcher()` fails with an error * Completly avoid memory allocations in infinibandMetric read() * Fixed initialization: Initalization and measurements should run in the same thread --------- Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
This commit is contained in:
@@ -30,7 +30,7 @@ import (
|
||||
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||
"golang.design/x/thread"
|
||||
fsnotify "gopkg.in/fsnotify.v0"
|
||||
fsnotify "gopkg.in/fsnotify.v1"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -306,17 +306,36 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
}
|
||||
m.measureThread = thread.New()
|
||||
switch m.config.AccessMode {
|
||||
case "direct":
|
||||
C.HPMmode(0)
|
||||
m.measureThread.Call(
|
||||
func() {
|
||||
C.HPMmode(0)
|
||||
})
|
||||
case "accessdaemon":
|
||||
if len(m.config.DaemonPath) > 0 {
|
||||
p := os.Getenv("PATH")
|
||||
os.Setenv("PATH", m.config.DaemonPath+":"+p)
|
||||
}
|
||||
C.HPMmode(1)
|
||||
m.measureThread.Call(
|
||||
func() {
|
||||
C.HPMmode(1)
|
||||
retCode := C.HPMinit()
|
||||
if retCode != 0 {
|
||||
err := fmt.Errorf("C.HPMinit() failed with return code %v", retCode)
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
}
|
||||
})
|
||||
for _, c := range m.cpulist {
|
||||
C.HPMaddThread(c)
|
||||
m.measureThread.Call(
|
||||
func() {
|
||||
retCode := C.HPMaddThread(c)
|
||||
if retCode != 0 {
|
||||
err := fmt.Errorf("C.HPMaddThread(%v) failed with return code %v", c, retCode)
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
m.sock2tid = make(map[int]int)
|
||||
@@ -331,7 +350,6 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
m.basefreq = getBaseFreq()
|
||||
m.measureThread = thread.New()
|
||||
m.init = true
|
||||
return nil
|
||||
}
|
||||
@@ -344,6 +362,7 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
watcher, err := fsnotify.NewWatcher()
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return true, err
|
||||
}
|
||||
defer watcher.Close()
|
||||
if len(m.config.LockfilePath) > 0 {
|
||||
@@ -360,7 +379,7 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
return true, fmt.Errorf("Access to performance counters locked by %d", stat.Uid)
|
||||
}
|
||||
}
|
||||
err = watcher.Watch(m.config.LockfilePath)
|
||||
err = watcher.Add(m.config.LockfilePath)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
}
|
||||
@@ -368,9 +387,9 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
m.lock.Lock()
|
||||
defer m.lock.Unlock()
|
||||
select {
|
||||
case e := <-watcher.Event:
|
||||
case e := <-watcher.Events:
|
||||
ret = -1
|
||||
if !e.IsAttrib() {
|
||||
if e.Op != fsnotify.Chmod {
|
||||
ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])
|
||||
}
|
||||
default:
|
||||
@@ -384,9 +403,9 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
select {
|
||||
case <-sigchan:
|
||||
gid = -1
|
||||
case e := <-watcher.Event:
|
||||
case e := <-watcher.Events:
|
||||
gid = -1
|
||||
if !e.IsAttrib() {
|
||||
if e.Op != fsnotify.Chmod {
|
||||
gid = C.perfmon_addEventSet(evset.estr)
|
||||
}
|
||||
default:
|
||||
@@ -401,8 +420,8 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
select {
|
||||
case <-sigchan:
|
||||
ret = -1
|
||||
case e := <-watcher.Event:
|
||||
if !e.IsAttrib() {
|
||||
case e := <-watcher.Events:
|
||||
if e.Op != fsnotify.Chmod {
|
||||
ret = C.perfmon_setupCounters(gid)
|
||||
}
|
||||
default:
|
||||
@@ -414,8 +433,8 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
select {
|
||||
case <-sigchan:
|
||||
ret = -1
|
||||
case e := <-watcher.Event:
|
||||
if !e.IsAttrib() {
|
||||
case e := <-watcher.Events:
|
||||
if e.Op != fsnotify.Chmod {
|
||||
ret = C.perfmon_startCounters()
|
||||
}
|
||||
default:
|
||||
@@ -427,8 +446,8 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
select {
|
||||
case <-sigchan:
|
||||
ret = -1
|
||||
case e := <-watcher.Event:
|
||||
if !e.IsAttrib() {
|
||||
case e := <-watcher.Events:
|
||||
if e.Op != fsnotify.Chmod {
|
||||
ret = C.perfmon_readCounters()
|
||||
}
|
||||
default:
|
||||
@@ -441,8 +460,8 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
select {
|
||||
case <-sigchan:
|
||||
ret = -1
|
||||
case e := <-watcher.Event:
|
||||
if !e.IsAttrib() {
|
||||
case e := <-watcher.Events:
|
||||
if e.Op != fsnotify.Chmod {
|
||||
ret = C.perfmon_readCounters()
|
||||
}
|
||||
default:
|
||||
@@ -468,8 +487,8 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
select {
|
||||
case <-sigchan:
|
||||
ret = -1
|
||||
case e := <-watcher.Event:
|
||||
if !e.IsAttrib() {
|
||||
case e := <-watcher.Events:
|
||||
if e.Op != fsnotify.Chmod {
|
||||
ret = C.perfmon_stopCounters()
|
||||
}
|
||||
default:
|
||||
@@ -480,8 +499,8 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
}
|
||||
signal.Stop(sigchan)
|
||||
select {
|
||||
case e := <-watcher.Event:
|
||||
if !e.IsAttrib() {
|
||||
case e := <-watcher.Events:
|
||||
if e.Op != fsnotify.Chmod {
|
||||
C.perfmon_finalize()
|
||||
}
|
||||
default:
|
||||
@@ -589,7 +608,6 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
func (m *LikwidCollector) ReadThread(interval time.Duration, output chan lp.CCMetric) {
|
||||
var err error = nil
|
||||
groups := make([]LikwidEventsetConfig, 0)
|
||||
|
Reference in New Issue
Block a user