From 037b4f152671535c9af4872a80461ccb149a2975 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Mon, 8 Jun 2026 14:52:24 +0200 Subject: [PATCH] Update cclog calls --- cc-metric-collector.go | 8 ++-- collectors/beegfsmetaMetric.go | 8 ++-- collectors/beegfsstorageMetric.go | 8 ++-- collectors/cpufreqCpuinfoMetric.go | 12 +++--- collectors/cpufreqMetric.go | 13 ++----- collectors/cpustatMetric.go | 8 ++-- collectors/customCmdMetric.go | 26 ++++++------- collectors/diskstatMetric.go | 6 +-- collectors/gpfsMetric.go | 14 +++---- collectors/iostatMetric.go | 8 ++-- collectors/likwidMetric.go | 32 ++++++++------- collectors/loadavgMetric.go | 12 +++--- collectors/memstatMetric.go | 2 +- collectors/netstatMetric.go | 8 ++-- collectors/nfsMetric.go | 5 +-- collectors/numastatsMetric.go | 2 +- collectors/nvidiaMetric.go | 56 +++++++++++++-------------- collectors/raplMetric.go | 7 ++-- collectors/rocmsmiMetric.go | 4 +- collectors/schedstatMetric.go | 8 ++-- collectors/slurmCgroupMetric.go | 4 +- collectors/smartmonMetric.go | 4 +- collectors/tempMetric.go | 8 ++-- collectors/topprocsMetric.go | 4 +- internal/metricRouter/metricRouter.go | 26 ++++++------- 25 files changed, 144 insertions(+), 149 deletions(-) diff --git a/cc-metric-collector.go b/cc-metric-collector.go index 3188f9c..2a2a26d 100644 --- a/cc-metric-collector.go +++ b/cc-metric-collector.go @@ -132,11 +132,11 @@ func mainFunc() int { if len(rcfg.ConfigFile.Interval) > 0 { t, err := time.ParseDuration(rcfg.ConfigFile.Interval) if err != nil { - cclog.Error("Configuration value 'interval' no valid duration") + cclog.Errorf("Configuration value interval=%s no valid duration", rcfg.ConfigFile.Interval) } rcfg.Interval = t if rcfg.Interval == 0 { - cclog.Error("Configuration value 'interval' must be greater than zero") + cclog.Errorf("Configuration value interval=%s must be greater than zero", rcfg.ConfigFile.Interval) return 1 } } @@ -145,11 +145,11 @@ func mainFunc() int { if len(rcfg.ConfigFile.Duration) > 0 { t, err := time.ParseDuration(rcfg.ConfigFile.Duration) if err != nil { - cclog.Error("Configuration value 'duration' no valid duration") + cclog.Error("Configuration value duration=%s no valid duration", rcfg.ConfigFile.Duration) } rcfg.Duration = t if rcfg.Duration == 0 { - cclog.Error("Configuration value 'duration' must be greater than zero") + cclog.Error("Configuration value duration=%s must be greater than zero", rcfg.ConfigFile.Duration) return 1 } } diff --git a/collectors/beegfsmetaMetric.go b/collectors/beegfsmetaMetric.go index 0a58dbf..4d358fc 100644 --- a/collectors/beegfsmetaMetric.go +++ b/collectors/beegfsmetaMetric.go @@ -209,16 +209,16 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMess } else { f1, err := strconv.ParseFloat(m.matches["other"], 32) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err)) + "Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err) continue } f2, err := strconv.ParseFloat(split[i], 32) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err)) + "Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err) continue } m.matches["beegfs_cstorage_other"] = fmt.Sprintf("%f", f1+f2) diff --git a/collectors/beegfsstorageMetric.go b/collectors/beegfsstorageMetric.go index 17abe65..fde5da5 100644 --- a/collectors/beegfsstorageMetric.go +++ b/collectors/beegfsstorageMetric.go @@ -200,16 +200,16 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM } else { f1, err := strconv.ParseFloat(m.matches["other"], 32) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err)) + "Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err) continue } f2, err := strconv.ParseFloat(split[i], 32) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err)) + "Metric (other): Failed to convert str written '%s' to float: %v", m.matches["other"], err) continue } m.matches["beegfs_cstorage_other"] = fmt.Sprintf("%f", f1+f2) diff --git a/collectors/cpufreqCpuinfoMetric.go b/collectors/cpufreqCpuinfoMetric.go index 4163436..dbfb78f 100644 --- a/collectors/cpufreqCpuinfoMetric.go +++ b/collectors/cpufreqCpuinfoMetric.go @@ -139,16 +139,16 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CC const cpuInfoFile = "/proc/cpuinfo" file, err := os.Open(cpuInfoFile) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to open file '%s': %v", cpuInfoFile, err)) + "Read(): Failed to open file '%s': %v", cpuInfoFile, err) return } defer func() { if err := file.Close(); err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to close file '%s': %v", cpuInfoFile, err)) + "Read(): Failed to close file '%s': %v", cpuInfoFile, err) } }() @@ -166,9 +166,9 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CC if !t.isHT { value, err := strconv.ParseFloat(strings.TrimSpace(lineSplit[1]), 64) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to convert cpu MHz '%s' to float64: %v", lineSplit[1], err)) + "Read(): Failed to convert cpu MHz '%s' to float64: %v", lineSplit[1], err) return } if y, err := lp.NewMetric("cpufreq", t.tagSet, m.meta, value, now); err == nil { diff --git a/collectors/cpufreqMetric.go b/collectors/cpufreqMetric.go index 225739c..beacbaf 100644 --- a/collectors/cpufreqMetric.go +++ b/collectors/cpufreqMetric.go @@ -95,10 +95,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error { } // Initialized - cclog.ComponentDebug( - m.name, - "initialized", - len(m.topology), "non-hyper-threading CPUs") + cclog.ComponentDebugf(m.name, "initialized %d non-hyper-threading CPUs") m.init = true return nil } @@ -116,16 +113,14 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMessage // Read current frequency line, err := os.ReadFile(t.scalingCurFreqFile) if err != nil { - cclog.ComponentError( - m.name, - fmt.Sprintf("Read(): Failed to read file '%s': %v", t.scalingCurFreqFile, err)) + cclog.ComponentErrorf( + m.name, "Read(): Failed to read file '%s': %v", t.scalingCurFreqFile, err) continue } cpuFreq, err := strconv.ParseInt(strings.TrimSpace(string(line)), 10, 64) if err != nil { cclog.ComponentError( - m.name, - fmt.Sprintf("Read(): Failed to convert CPU frequency '%s' to int64: %v", line, err)) + m.name, "Read(): Failed to convert CPU frequency '%s' to int64: %v", line, err) continue } diff --git a/collectors/cpustatMetric.go b/collectors/cpustatMetric.go index 1045f51..7595688 100644 --- a/collectors/cpustatMetric.go +++ b/collectors/cpustatMetric.go @@ -171,15 +171,15 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMessage file, err := os.Open(CPUSTATFILE) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to open file '%s': %v", CPUSTATFILE, err)) + "Read(): Failed to open file '%s': %v", CPUSTATFILE, err) } defer func() { if err := file.Close(); err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to close file '%s': %v", string(CPUSTATFILE), err)) + "Read(): Failed to close file '%s': %v", string(CPUSTATFILE), err) } }() diff --git a/collectors/customCmdMetric.go b/collectors/customCmdMetric.go index f77d92e..f7deb4a 100644 --- a/collectors/customCmdMetric.go +++ b/collectors/customCmdMetric.go @@ -64,9 +64,9 @@ func (m *CustomCmdCollector) Init(config json.RawMessage) error { cmdFields := strings.Fields(c) command := exec.Command(cmdFields[0], cmdFields[1:]...) if _, err := command.Output(); err != nil { - cclog.ComponentWarn( + cclog.ComponentWarnf( m.name, - fmt.Sprintf("%s Init(): Execution of command \"%s\" failed: %v", m.name, command.String(), err)) + "%s Init(): Execution of command \"%s\" failed: %v", m.name, command.String(), err) continue } m.cmdFieldsSlice = append(m.cmdFieldsSlice, cmdFields) @@ -77,7 +77,7 @@ func (m *CustomCmdCollector) Init(config json.RawMessage) error { if _, err := os.ReadFile(fileName); err != nil { cclog.ComponentWarn( m.name, - fmt.Sprintf("%s Init(): Reading of file \"%s\" failed: %v", m.name, fileName, err)) + "%s Init(): Reading of file \"%s\" failed: %v", m.name, fileName, err) continue } m.files = append(m.files, fileName) @@ -100,20 +100,18 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMessa command := exec.Command(cmdFields[0], cmdFields[1:]...) stdout, err := command.Output() if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to read command output for command \"%s\": %v", command.String(), err), - ) + "Read(): Failed to read command output for command \"%s\": %v", command.String(), err) continue } // Read and decode influxDB line-protocol from command output metrics, err := lp.FromBytes(stdout) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to decode influx Message: %v", err), - ) + "Read(): Failed to decode influx Message: %v", err) continue } for _, metric := range metrics { @@ -128,20 +126,18 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMessa for _, filename := range m.files { input, err := os.ReadFile(filename) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to read file \"%s\": %v\n", filename, err), - ) + "Read(): Failed to read file \"%s\": %v\n", filename, err) continue } // Read and decode influxDB line-protocol from file metrics, err := lp.FromBytes(input) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to decode influx Message: %v", err), - ) + "Read(): Failed to decode influx Message: %v", err) continue } for _, metric := range metrics { diff --git a/collectors/diskstatMetric.go b/collectors/diskstatMetric.go index 5f133b7..23cf55b 100644 --- a/collectors/diskstatMetric.go +++ b/collectors/diskstatMetric.go @@ -77,16 +77,16 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMessag file, err := os.Open(MOUNTFILE) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to open file '%s': %v", MOUNTFILE, err)) + "Read(): Failed to open file '%s': %v", MOUNTFILE, err) return } defer func() { if err := file.Close(); err != nil { cclog.ComponentError( m.name, - fmt.Sprintf("Read(): Failed to close file '%s': %v", MOUNTFILE, err)) + "Read(): Failed to close file '%s': %v", MOUNTFILE, err) } }() diff --git a/collectors/gpfsMetric.go b/collectors/gpfsMetric.go index 1d1a963..d76130f 100644 --- a/collectors/gpfsMetric.go +++ b/collectors/gpfsMetric.go @@ -371,7 +371,7 @@ func (m *GpfsCollector) Init(config json.RawMessage) error { if err != nil { // if using sudo, exec.lookPath will return EACCES (file mode r-x------), this can be ignored if m.config.Sudo && errors.Is(err, syscall.EACCES) { - cclog.ComponentWarn(m.name, fmt.Sprintf("got error looking for mmpmon binary '%s': %v . This is expected when using sudo, continuing.", m.config.Mmpmon, err)) + cclog.ComponentWarnf(m.name, "got error looking for mmpmon binary '%s': %v . This is expected when using sudo, continuing.", m.config.Mmpmon, err) // the file was given in the config, use it p = m.config.Mmpmon } else { @@ -517,23 +517,23 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) { // return code rc, err := strconv.Atoi(key_value["_rc_"]) if err != nil { - cclog.ComponentError(m.name, fmt.Sprintf("Read(): Failed to convert return code '%s' to int: %v", key_value["_rc_"], err)) + cclog.ComponentErrorf(m.name, "Read(): Failed to convert return code '%s' to int: %v", key_value["_rc_"], err) continue } if rc != 0 { - cclog.ComponentError(m.name, fmt.Sprintf("Read(): Filesystem '%s' is not ok.", filesystem)) + cclog.ComponentErrorf(m.name, "Read(): Filesystem '%s' is not ok.", filesystem) continue } // timestamp sec, err := strconv.ParseInt(key_value["_t_"], 10, 64) if err != nil { - cclog.ComponentError(m.name, fmt.Sprintf("Read(): Failed to convert seconds '%s' to int64: %v", key_value["_t_"], err)) + cclog.ComponentErrorf(m.name, "Read(): Failed to convert seconds '%s' to int64: %v", key_value["_t_"], err) continue } msec, err := strconv.ParseInt(key_value["_tu_"], 10, 64) if err != nil { - cclog.ComponentError(m.name, fmt.Sprintf("Read(): Failed to convert micro seconds '%s' to int64: %v", key_value["_tu_"], err)) + cclog.ComponentErrorf(m.name, "Read(): Failed to convert micro seconds '%s' to int64: %v", key_value["_tu_"], err) continue } timestamp := time.Unix(sec, msec*1000) @@ -551,7 +551,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) { for _, metric := range GpfsAbsMetrics { value, err := strconv.ParseInt(key_value[metric.prefix], 10, 64) if err != nil { - cclog.ComponentError(m.name, fmt.Sprintf("Read(): Failed to convert %s '%s' to int64: %v", metric.desc, key_value[metric.prefix], err)) + cclog.ComponentErrorf(m.name, "Read(): Failed to convert %s '%s' to int64: %v", metric.desc, key_value[metric.prefix], err) continue } newstate[metric.prefix] = value @@ -636,7 +636,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) { } } else { // the value could not be computed correctly - cclog.ComponentWarn(m.name, fmt.Sprintf("Read(): Could not compute value for filesystem %s of metric %s: vold_ok = %t, vnew_ok = %t", filesystem, metric.name, vold_ok, vnew_ok)) + cclog.ComponentWarnf(m.name, "Read(): Could not compute value for filesystem %s of metric %s: vold_ok = %t, vnew_ok = %t", filesystem, metric.name, vold_ok, vnew_ok) } } diff --git a/collectors/iostatMetric.go b/collectors/iostatMetric.go index a71f9b3..9db0949 100644 --- a/collectors/iostatMetric.go +++ b/collectors/iostatMetric.go @@ -145,16 +145,16 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage) file, err := os.Open(IOSTATFILE) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to open file '%s': %v", IOSTATFILE, err)) + "Read(): Failed to open file '%s': %v", IOSTATFILE, err) return } defer func() { if err := file.Close(); err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to close file '%s': %v", IOSTATFILE, err)) + "Read(): Failed to close file '%s': %v", IOSTATFILE, err) } }() diff --git a/collectors/likwidMetric.go b/collectors/likwidMetric.go index d09b2e2..e486ee6 100644 --- a/collectors/likwidMetric.go +++ b/collectors/likwidMetric.go @@ -12,6 +12,12 @@ package collectors #cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files #include #include + + +int cc_add_hwthread(int cpu_id) { + return HPMaddThread(cpu_id); +} + */ import "C" @@ -261,12 +267,12 @@ func (m *LikwidCollector) Init(config json.RawMessage) error { } for _, metric := range evset.Metrics { // Try to evaluate the metric - cclog.ComponentDebug(m.name, "Checking", metric.Name) + cclog.ComponentDebugf(m.name, "Checking %s", metric.Name) if !checkMetricType(metric.Type) { - cclog.ComponentError(m.name, "Metric", metric.Name, "uses invalid type", metric.Type) + cclog.ComponentErrorf(m.name, "Metric %s uses invalid type %s", metric.Name, metric.Type) metric.Calc = "" } else if !testLikwidMetricFormula(metric.Calc, params) { - cclog.ComponentError(m.name, "Metric", metric.Name, "cannot be calculated with given counters") + cclog.ComponentError(m.name, "Metric %s cannot be calculated with given counters", metric.Name) metric.Calc = "" } else { globalParams = append(globalParams, metric.Name) @@ -281,13 +287,13 @@ func (m *LikwidCollector) Init(config json.RawMessage) error { for _, metric := range m.config.Metrics { // Try to evaluate the global metric if !checkMetricType(metric.Type) { - cclog.ComponentError(m.name, "Metric", metric.Name, "uses invalid type", metric.Type) + cclog.ComponentErrorf(m.name, "Metric %s uses invalid type %s", metric.Name, metric.Type) metric.Calc = "" } else if !testLikwidMetricFormula(metric.Calc, globalParams) { - cclog.ComponentError(m.name, "Metric", metric.Name, "cannot be calculated with given counters") + cclog.ComponentError(m.name, "Metric %s cannot be calculated with given counters", metric.Name) metric.Calc = "" } else if !checkMetricType(metric.Type) { - cclog.ComponentError(m.name, "Metric", metric.Name, "has invalid type") + cclog.ComponentError(m.name, "Metric %s has invalid type", metric.Name) metric.Calc = "" } else { totalMetrics++ @@ -328,7 +334,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error { for _, c := range m.cpulist { m.measureThread.Call( func() { - retCode := C.HPMaddThread(C.uint32_t(c)) + retCode := C.cc_add_hwthread(C.int(c)) if retCode != 0 { err := fmt.Errorf("C.HPMaddThread(%v) failed with return code %v", c, retCode) cclog.ComponentError(m.name, err.Error()) @@ -375,16 +381,16 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig, // Watch changes for the lock file () watcher, err := fsnotify.NewWatcher() if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("takeMeasurement(): Failed to create a new fsnotify.Watcher: %v", err)) + "takeMeasurement(): Failed to create a new fsnotify.Watcher: %v", err) return true, err } defer func() { if err := watcher.Close(); err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("takeMeasurement(): Failed to close fsnotify.Watcher: %v", err)) + "takeMeasurement(): Failed to close fsnotify.Watcher: %v", err) } }() if len(m.config.LockfilePath) > 0 { @@ -597,7 +603,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv if tid >= 0 && len(metric.Calc) > 0 { value, err := agg.EvalFloat64Condition(metric.Calc, evset.results[tid]) if err != nil { - cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error()) + cclog.ComponentErrorf(m.name, "Calculation for metric %s failed: %s", metric.Name, err.Error()) value = 0.0 } if m.config.InvalidToZero && (math.IsNaN(value) || math.IsInf(value, 0)) { @@ -762,7 +768,7 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter // Evaluate the metric value, err := agg.EvalFloat64Condition(metric.Calc, params) if err != nil { - cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error()) + cclog.ComponentErrorf(m.name, "Calculation for metric %s failed: %s", metric.Name, err.Error()) value = 0.0 } if m.config.InvalidToZero && (math.IsNaN(value) || math.IsInf(value, 0)) { diff --git a/collectors/loadavgMetric.go b/collectors/loadavgMetric.go index 774254d..dceb2e0 100644 --- a/collectors/loadavgMetric.go +++ b/collectors/loadavgMetric.go @@ -89,9 +89,9 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMessage } buffer, err := os.ReadFile(LOADAVGFILE) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to read file '%s': %v", LOADAVGFILE, err)) + "Read(): Failed to read file '%s': %v", LOADAVGFILE, err) return } now := time.Now() @@ -101,9 +101,9 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMessage for i, name := range m.load_matches { x, err := strconv.ParseFloat(ls[i], 64) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to convert '%s' to float64: %v", ls[i], err)) + "Read(): Failed to convert '%s' to float64: %v", ls[i], err) continue } if m.load_skips[i] { @@ -120,9 +120,9 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMessage for i, name := range m.proc_matches { x, err := strconv.ParseInt(lv[i], 10, 64) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to convert '%s' to float64: %v", lv[i], err)) + "Read(): Failed to convert '%s' to float64: %v", lv[i], err) continue } if m.proc_skips[i] { diff --git a/collectors/memstatMetric.go b/collectors/memstatMetric.go index fb21ffb..235de22 100644 --- a/collectors/memstatMetric.go +++ b/collectors/memstatMetric.go @@ -84,7 +84,7 @@ func getStats(filename string) map[string]MemstatStats { case 5: v, err := strconv.ParseFloat(linefields[3], 64) if err == nil { - cclog.ComponentDebug("getStats", strings.Trim(linefields[2], ":"), v, linefields[4]) + cclog.ComponentDebug("MemstatCollector", "getStats %s value %v unit %s", strings.Trim(linefields[2], ":"), v, linefields[4]) stats[strings.Trim(linefields[2], ":")] = MemstatStats{ value: v, unit: linefields[4], diff --git a/collectors/netstatMetric.go b/collectors/netstatMetric.go index 8be9c5b..cc689f1 100644 --- a/collectors/netstatMetric.go +++ b/collectors/netstatMetric.go @@ -222,16 +222,16 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMessage file, err := os.Open(NETSTATFILE) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to open file '%s': %v", NETSTATFILE, err)) + "Read(): Failed to open file '%s': %v", NETSTATFILE, err) return } defer func() { if err := file.Close(); err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to close file '%s': %v", NETSTATFILE, err)) + "Read(): Failed to close file '%s': %v", NETSTATFILE, err) } }() diff --git a/collectors/nfsMetric.go b/collectors/nfsMetric.go index 3bc9f15..c798114 100644 --- a/collectors/nfsMetric.go +++ b/collectors/nfsMetric.go @@ -125,10 +125,9 @@ func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMessage) { timestamp := time.Now() if err := m.updateStats(); err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): updateStats() failed: %v", err), - ) + "Read(): updateStats() failed: %v", err) return } var prefix string diff --git a/collectors/numastatsMetric.go b/collectors/numastatsMetric.go index dc9d19e..375af70 100644 --- a/collectors/numastatsMetric.go +++ b/collectors/numastatsMetric.go @@ -117,7 +117,7 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error { } // Initialized - cclog.ComponentDebug(m.name, "initialized", len(m.topology), "NUMA domains") + cclog.ComponentDebugf(m.name, "initialized %d NUMA domains", len(m.topology)) m.init = true return nil } diff --git a/collectors/nvidiaMetric.go b/collectors/nvidiaMetric.go index f23f2fe..745984f 100644 --- a/collectors/nvidiaMetric.go +++ b/collectors/nvidiaMetric.go @@ -113,7 +113,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error { // Skip excluded devices by ID str_i := strconv.Itoa(i) if slices.Contains(m.config.ExcludeDevices, str_i) { - cclog.ComponentDebug(m.name, "Skipping excluded device", str_i) + cclog.ComponentDebugf(m.name, "Skipping excluded device %s", str_i) continue } @@ -121,7 +121,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error { device, ret := nvml.DeviceGetHandleByIndex(i) if ret != nvml.SUCCESS { err = errors.New(nvml.ErrorString(ret)) - cclog.ComponentError(m.name, "Unable to get device at index", i, ":", err.Error()) + cclog.ComponentErrorf(m.name, "Unable to get device at index %d: %s", i, err.Error()) continue } @@ -129,7 +129,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error { pciInfo, ret := nvml.DeviceGetPciInfo(device) if ret != nvml.SUCCESS { err = errors.New(nvml.ErrorString(ret)) - cclog.ComponentError(m.name, "Unable to get PCI info for device at index", i, ":", err.Error()) + cclog.ComponentErrorf(m.name, "Unable to get PCI info for device at index %d: %s", i, err.Error()) continue } // Create PCI ID in the common format used by the NVML. @@ -141,7 +141,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error { // Skip excluded devices specified by PCI ID if slices.Contains(m.config.ExcludeDevices, pci_id) { - cclog.ComponentDebug(m.name, "Skipping excluded device", pci_id) + cclog.ComponentDebugf(m.name, "Skipping excluded device %s", pci_id) continue } @@ -183,7 +183,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error { if m.config.AddBoardNumberMeta { board, ret := nvml.DeviceGetBoardPartNumber(device) if ret != nvml.SUCCESS { - cclog.ComponentError(m.name, "Unable to get boart part number for device at index", i, ":", err.Error()) + cclog.ComponentErrorf(m.name, "Unable to get boart part number for device at index %d: %s", i, err.Error()) } else { g.meta["board_number"] = board } @@ -191,7 +191,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error { if m.config.AddSerialMeta { serial, ret := nvml.DeviceGetSerial(device) if ret != nvml.SUCCESS { - cclog.ComponentError(m.name, "Unable to get serial number for device at index", i, ":", err.Error()) + cclog.ComponentErrorf(m.name, "Unable to get serial number for device at index %d: %s", i, err.Error()) } else { g.meta["serial"] = serial } @@ -199,7 +199,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error { if m.config.AddUuidMeta { uuid, ret := nvml.DeviceGetUUID(device) if ret != nvml.SUCCESS { - cclog.ComponentError(m.name, "Unable to get UUID for device at index", i, ":", err.Error()) + cclog.ComponentErrorf(m.name, "Unable to get UUID for device at index %d: %s", i, err.Error()) } else { g.meta["uuid"] = uuid } @@ -1128,97 +1128,97 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage) } err = readMemoryInfo(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readMemoryInfo for device", name, "failed") + cclog.ComponentDebugf(m.name, "readMemoryInfo for device %s failed", name) } err = readUtilization(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readUtilization for device", name, "failed") + cclog.ComponentDebugf(m.name, "readUtilization for device %s failed", name) } err = readTemp(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readTemp for device", name, "failed") + cclog.ComponentDebugf(m.name, "readTemp for device %s failed", name) } err = readFan(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readFan for device", name, "failed") + cclog.ComponentDebugf(m.name, "readFan for device %s failed", name) } err = readEccMode(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readEccMode for device", name, "failed") + cclog.ComponentDebugf(m.name, "readEccMode for device %s failed", name) } err = readPerfState(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readPerfState for device", name, "failed") + cclog.ComponentDebugf(m.name, "readPerfState for device %s failed", name) } err = readPowerUsage(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readPowerUsage for device", name, "failed") + cclog.ComponentDebugf(m.name, "readPowerUsage for device %s failed", name) } err = readEnergyConsumption(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readEnergyConsumption for device", name, "failed") + cclog.ComponentDebugf(m.name, "readEnergyConsumption for device %s failed", name) } err = readClocks(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readClocks for device", name, "failed") + cclog.ComponentDebugf(m.name, "readClocks for device %s failed", name) } err = readMaxClocks(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readMaxClocks for device", name, "failed") + cclog.ComponentDebugf(m.name, "readMaxClocks for device %s failed", name) } err = readEccErrors(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readEccErrors for device", name, "failed") + cclog.ComponentDebugf(m.name, "readEccErrors for device %s failed", name) } err = readPowerLimit(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readPowerLimit for device", name, "failed") + cclog.ComponentDebugf(m.name, "readPowerLimit for device %s failed", name) } err = readEncUtilization(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readEncUtilization for device", name, "failed") + cclog.ComponentDebugf(m.name, "readEncUtilization for device %s failed", name) } err = readDecUtilization(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readDecUtilization for device", name, "failed") + cclog.ComponentDebugf(m.name, "readDecUtilization for device %s failed", name) } err = readRemappedRows(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readRemappedRows for device", name, "failed") + cclog.ComponentDebugf(m.name, "readRemappedRows for device %s failed", name) } err = readBarMemoryInfo(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readBarMemoryInfo for device", name, "failed") + cclog.ComponentDebugf(m.name, "readBarMemoryInfo for device %s failed", name) } err = readProcessCounts(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readProcessCounts for device", name, "failed") + cclog.ComponentDebugf(m.name, "readProcessCounts for device %s failed", name) } err = readViolationStats(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readViolationStats for device", name, "failed") + cclog.ComponentDebugf(m.name, "readViolationStats for device %s failed", name) } err = readNVLinkStats(device, output) if err != nil { - cclog.ComponentDebug(m.name, "readNVLinkStats for device", name, "failed") + cclog.ComponentDebugf(m.name, "readNVLinkStats for device %s failed", name) } } @@ -1244,7 +1244,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage) if maxMig == 0 { continue } - cclog.ComponentDebug(m.name, "Reading MIG devices for GPU", i) + cclog.ComponentDebugf(m.name, "Reading MIG devices for GPU %d", i) for j := range maxMig { mdev, ret := nvml.DeviceGetMigDeviceHandleByIndex(m.gpus[i].device, j) @@ -1268,7 +1268,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage) if m.config.UseUuidForMigDevices { uuid, ret := nvml.DeviceGetUUID(mdev) if ret != nvml.SUCCESS { - cclog.ComponentError(m.name, "Unable to get UUID for mig device at index", j, ":", err.Error()) + cclog.ComponentErrorf(m.name, "Unable to get UUID for mig device at index %d: %s", j, err.Error()) } else { migDevice.tags["stype-id"] = uuid } diff --git a/collectors/raplMetric.go b/collectors/raplMetric.go index 3d459df..1a1459f 100644 --- a/collectors/raplMetric.go +++ b/collectors/raplMetric.go @@ -208,11 +208,10 @@ func (m *RAPLCollector) Init(config json.RawMessage) error { } // Initialized - cclog.ComponentDebug( + cclog.ComponentDebugf( m.name, - "initialized", - len(m.RAPLZoneInfo), - "zones with running average power limit (RAPL) monitoring attributes") + "initialized %d zones with running average power limit (RAPL) monitoring attributes", + len(m.RAPLZoneInfo)) m.init = true return err diff --git a/collectors/rocmsmiMetric.go b/collectors/rocmsmiMetric.go index ca4ef70..8e2128d 100644 --- a/collectors/rocmsmiMetric.go +++ b/collectors/rocmsmiMetric.go @@ -124,7 +124,7 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error { if m.config.AddSerialMeta { serial, ret := rocm_smi.DeviceGetSerialNumber(device) if ret != rocm_smi.STATUS_SUCCESS { - cclog.ComponentError(m.name, "Unable to get serial number for device at index", i, ":", rocm_smi.StatusStringNoError(ret)) + cclog.ComponentErrorf(m.name, "Unable to get serial number for device at index %d: %s", i, rocm_smi.StatusStringNoError(ret)) } else { dev.meta["serial"] = serial } @@ -152,7 +152,7 @@ func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMessage for _, dev := range m.devices { metrics, ret := rocm_smi.DeviceGetMetrics(dev.device) if ret != rocm_smi.STATUS_SUCCESS { - cclog.ComponentError(m.name, "Unable to get metrics for device at index", dev.index, ":", rocm_smi.StatusStringNoError(ret)) + cclog.ComponentErrorf(m.name, "Unable to get metrics for device at index %d: %s", dev.index, rocm_smi.StatusStringNoError(ret)) continue } diff --git a/collectors/schedstatMetric.go b/collectors/schedstatMetric.go index c4ed093..5b7afd0 100644 --- a/collectors/schedstatMetric.go +++ b/collectors/schedstatMetric.go @@ -147,15 +147,15 @@ func (m *SchedstatCollector) Read(interval time.Duration, output chan lp.CCMessa file, err := os.Open(SCHEDSTATFILE) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to open file '%s': %v", SCHEDSTATFILE, err)) + "Read(): Failed to open file '%s': %v", SCHEDSTATFILE, err) } defer func() { if err := file.Close(); err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to close file '%s': %v", SCHEDSTATFILE, err)) + "Read(): Failed to close file '%s': %v", SCHEDSTATFILE, err) } }() diff --git a/collectors/slurmCgroupMetric.go b/collectors/slurmCgroupMetric.go index c091dc2..30dbbe0 100644 --- a/collectors/slurmCgroupMetric.go +++ b/collectors/slurmCgroupMetric.go @@ -240,7 +240,7 @@ func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMes globPattern := filepath.Join(m.cgroupBase, "job_*") jobDirs, err := filepath.Glob(globPattern) if err != nil { - cclog.ComponentError(m.name, "Error globbing job directories:", err.Error()) + cclog.ComponentErrorf(m.name, "Error globbing job directories: %s", err.Error()) return } @@ -249,7 +249,7 @@ func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMes jobdata, err := m.ReadJobData(jKey) if err != nil { - cclog.ComponentError(m.name, "Error reading job data for", jKey, ":", err.Error()) + cclog.ComponentError(m.name, "Error reading job data for %s: %s", jKey, err.Error()) continue } diff --git a/collectors/smartmonMetric.go b/collectors/smartmonMetric.go index 346d157..87902ac 100644 --- a/collectors/smartmonMetric.go +++ b/collectors/smartmonMetric.go @@ -228,12 +228,12 @@ func (m *SmartMonCollector) Read(interval time.Duration, output chan lp.CCMessag stdout, err := command.Output() if err != nil { - cclog.ComponentError(m.name, "cannot read data for device", d.Name) + cclog.ComponentErrorf(m.name, "cannot read data for device %s", d.Name) continue } err = json.Unmarshal(stdout, &data) if err != nil { - cclog.ComponentError(m.name, "cannot unmarshal data for device", d.Name) + cclog.ComponentErrorf(m.name, "cannot unmarshal data for device %s", d.Name) continue } if !m.excludeMetric.temp { diff --git a/collectors/tempMetric.go b/collectors/tempMetric.go index ffae7ff..adbfb5a 100644 --- a/collectors/tempMetric.go +++ b/collectors/tempMetric.go @@ -188,16 +188,16 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMessage) { // Read sensor file buffer, err := os.ReadFile(sensor.file) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to read file '%s': %v", sensor.file, err)) + "Read(): Failed to read file '%s': %v", sensor.file, err) continue } x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64) if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to convert temperature '%s' to int64: %v", buffer, err)) + "Read(): Failed to convert temperature '%s' to int64: %v", buffer, err) continue } x /= 1000 diff --git a/collectors/topprocsMetric.go b/collectors/topprocsMetric.go index eba833c..407018c 100644 --- a/collectors/topprocsMetric.go +++ b/collectors/topprocsMetric.go @@ -77,9 +77,9 @@ func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMessag command := exec.Command("ps", "-Ao", "comm", "--sort=-pcpu") stdout, err := command.Output() if err != nil { - cclog.ComponentError( + cclog.ComponentErrorf( m.name, - fmt.Sprintf("Read(): Failed to read output from command \"%s\": %v", command.String(), err)) + "Read(): Failed to read output from command \"%s\": %v", command.String(), err) return } diff --git a/internal/metricRouter/metricRouter.go b/internal/metricRouter/metricRouter.go index 718b6aa..a3772ca 100644 --- a/internal/metricRouter/metricRouter.go +++ b/internal/metricRouter/metricRouter.go @@ -35,18 +35,18 @@ type metricRouterTagConfig struct { // Metric router configuration type metricRouterConfig struct { - HostnameTagName string `json:"hostname_tag"` // Key name used when adding the hostname to a metric (default 'hostname') - AddTags []metricRouterTagConfig `json:"add_tags"` // List of tags that are added when the condition is met - DelTags []metricRouterTagConfig `json:"delete_tags"` // List of tags that are removed when the condition is met - IntervalAgg []agg.MetricAggregatorIntervalConfig `json:"interval_aggregates"` // List of aggregation function processed at the end of an interval - DropMetrics []string `json:"drop_metrics"` // List of metric names to drop. For fine-grained dropping use drop_metrics_if - DropMetricsIf []string `json:"drop_metrics_if"` // List of evaluatable terms to drop metrics - RenameMetrics map[string]string `json:"rename_metrics"` // Map to rename metric name from key to value - IntervalStamp bool `json:"interval_timestamp"` // Update timestamp periodically by ticker each interval? - NumCacheIntervals int `json:"num_cache_intervals"` // Number of intervals of cached metrics for evaluation - MaxForward int `json:"max_forward"` // Number of maximal forwarded metrics at one select - NormalizeUnits bool `json:"normalize_units"` // Check unit meta flag and normalize it using cc-units - ChangeUnitPrefix map[string]string `json:"change_unit_prefix"` // Add prefix that should be applied to the metrics + HostnameTagName string `json:"hostname_tag,omitempty"` // Key name used when adding the hostname to a metric (default 'hostname') + AddTags []metricRouterTagConfig `json:"add_tags,omitempty"` // List of tags that are added when the condition is met + DelTags []metricRouterTagConfig `json:"delete_tags,omitempty"` // List of tags that are removed when the condition is met + IntervalAgg []agg.MetricAggregatorIntervalConfig `json:"interval_aggregates,omitempty"` // List of aggregation function processed at the end of an interval + DropMetrics []string `json:"drop_metrics,omitempty"` // List of metric names to drop. For fine-grained dropping use drop_metrics_if + DropMetricsIf []string `json:"drop_metrics_if,omitempty"` // List of evaluatable terms to drop metrics + RenameMetrics map[string]string `json:"rename_metrics,omitempty"` // Map to rename metric name from key to value + IntervalStamp bool `json:"interval_timestamp,omitempty"` // Update timestamp periodically by ticker each interval? + NumCacheIntervals int `json:"num_cache_intervals,omitempty"` // Number of intervals of cached metrics for evaluation + MaxForward int `json:"max_forward,omitempty"` // Number of maximal forwarded metrics at one select + NormalizeUnits bool `json:"normalize_units,omitempty"` // Check unit meta flag and normalize it using cc-units + ChangeUnitPrefix map[string]string `json:"change_unit_prefix,omitempty"` // Add prefix that should be applied to the metrics MessageProcessor json.RawMessage `json:"process_messages,omitempty"` } @@ -297,7 +297,7 @@ func (r *metricRouter) Start() { case timestamp := <-timeChan: r.timestamp = timestamp - cclog.ComponentDebug("MetricRouter", "Update timestamp", r.timestamp.UnixNano()) + cclog.ComponentDebugf("MetricRouter", "Update timestamp %d", r.timestamp.UnixNano()) case p := <-r.coll_input: coll_forward(p)