Merge branch 'develop' into smartmon_collector

This commit is contained in:
Thomas Gruber
2022-10-09 17:35:47 +02:00
committed by GitHub
29 changed files with 1213 additions and 318 deletions

View File

@@ -35,7 +35,7 @@ In contrast to the configuration files for sinks and receivers, the collectors c
* [`nfs4stat`](./nfs4Metric.md)
* [`cpufreq`](./cpufreqMetric.md)
* [`cpufreq_cpuinfo`](./cpufreqCpuinfoMetric.md)
* [`numastat`](./numastatMetric.md)
* [`numastats`](./numastatsMetric.md)
* [`gpfs`](./gpfsMetric.md)
* [`beegfs_meta`](./beegfsmetaMetric.md)
* [`beegfs_storage`](./beegfsstorageMetric.md)

View File

@@ -5,7 +5,7 @@ import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"io"
"os"
"os/exec"
"os/user"
@@ -115,7 +115,7 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
return
}
//get mounpoint
buffer, _ := ioutil.ReadFile(string("/proc/mounts"))
buffer, _ := os.ReadFile(string("/proc/mounts"))
mounts := strings.Split(string(buffer), "\n")
var mountpoints []string
for _, line := range mounts {
@@ -157,9 +157,9 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
if err != nil {
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
data, _ := ioutil.ReadAll(cmdStderr)
data, _ := io.ReadAll(cmdStderr)
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stderr: \"%s\"\n", string(data))
data, _ = ioutil.ReadAll(cmdStdout)
data, _ = io.ReadAll(cmdStdout)
fmt.Fprintf(os.Stderr, "BeegfsMetaCollector.Read(): command stdout: \"%s\"\n", string(data))
return
}

View File

@@ -5,7 +5,7 @@ import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"io"
"os"
"os/exec"
"os/user"
@@ -108,7 +108,7 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
return
}
//get mounpoint
buffer, _ := ioutil.ReadFile(string("/proc/mounts"))
buffer, _ := os.ReadFile(string("/proc/mounts"))
mounts := strings.Split(string(buffer), "\n")
var mountpoints []string
for _, line := range mounts {
@@ -149,9 +149,9 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
if err != nil {
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): Failed to execute command \"%s\": %s\n", cmd.String(), err.Error())
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command exit code: \"%d\"\n", cmd.ProcessState.ExitCode())
data, _ := ioutil.ReadAll(cmdStderr)
data, _ := io.ReadAll(cmdStderr)
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stderr: \"%s\"\n", string(data))
data, _ = ioutil.ReadAll(cmdStdout)
data, _ = io.ReadAll(cmdStdout)
fmt.Fprintf(os.Stderr, "BeegfsStorageCollector.Read(): command stdout: \"%s\"\n", string(data))
return
}

View File

@@ -38,6 +38,7 @@ var AvailableCollectors = map[string]MetricCollector{
"beegfs_storage": new(BeegfsStorageCollector),
"rocm_smi": new(RocmSmiCollector),
"smartmon": new(SmartMonCollector),
"schedstat": new(SchedstatCollector),
}
// Metric collector manager data structure

View File

@@ -3,7 +3,7 @@ package collectors
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
@@ -88,7 +88,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
// Read package ID
physicalPackageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id")
line, err := ioutil.ReadFile(physicalPackageIDFile)
line, err := os.ReadFile(physicalPackageIDFile)
if err != nil {
return fmt.Errorf("unable to read physical package ID from file '%s': %v", physicalPackageIDFile, err)
}
@@ -100,7 +100,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
// Read core ID
coreIDFile := filepath.Join(cpuDir, "topology", "core_id")
line, err = ioutil.ReadFile(coreIDFile)
line, err = os.ReadFile(coreIDFile)
if err != nil {
return fmt.Errorf("unable to read core ID from file '%s': %v", coreIDFile, err)
}
@@ -188,7 +188,7 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric)
}
// Read current frequency
line, err := ioutil.ReadFile(t.scalingCurFreqFile)
line, err := os.ReadFile(t.scalingCurFreqFile)
if err != nil {
cclog.ComponentError(
m.name,

View File

@@ -11,6 +11,7 @@ import (
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
sysconf "github.com/tklauser/go-sysconf"
)
const CPUSTATFILE = `/proc/stat`
@@ -22,9 +23,11 @@ type CpustatCollectorConfig struct {
type CpustatCollector struct {
metricCollector
config CpustatCollectorConfig
lastTimestamp time.Time // Store time stamp of last tick to derive values
matches map[string]int
cputags map[string]map[string]string
nodetags map[string]string
olddata map[string]map[string]int64
}
func (m *CpustatCollector) Init(config json.RawMessage) error {
@@ -76,36 +79,48 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
// Pre-generate tags for all CPUs
num_cpus := 0
m.cputags = make(map[string]map[string]string)
m.olddata = make(map[string]map[string]int64)
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
linefields := strings.Fields(line)
if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
if strings.Compare(linefields[0], "cpu") == 0 {
m.olddata["cpu"] = make(map[string]int64)
for k, v := range m.matches {
m.olddata["cpu"][k], _ = strconv.ParseInt(linefields[v], 0, 64)
}
} else if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
cpustr := strings.TrimLeft(linefields[0], "cpu")
cpu, _ := strconv.Atoi(cpustr)
m.cputags[linefields[0]] = map[string]string{"type": "hwthread", "type-id": fmt.Sprintf("%d", cpu)}
m.olddata[linefields[0]] = make(map[string]int64)
for k, v := range m.matches {
m.olddata[linefields[0]][k], _ = strconv.ParseInt(linefields[v], 0, 64)
}
num_cpus++
}
}
m.lastTimestamp = time.Now()
m.init = true
return nil
}
func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]string, output chan lp.CCMetric) {
func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]string, output chan lp.CCMetric, now time.Time, tsdelta time.Duration) {
values := make(map[string]float64)
total := 0.0
clktck, _ := sysconf.Sysconf(sysconf.SC_CLK_TCK)
for match, index := range m.matches {
if len(match) > 0 {
x, err := strconv.ParseInt(linefields[index], 0, 64)
if err == nil {
values[match] = float64(x)
total += values[match]
vdiff := x - m.olddata[linefields[0]][match]
m.olddata[linefields[0]][match] = x // Store new value for next run
values[match] = float64(vdiff) / float64(tsdelta.Seconds()) / float64(clktck)
}
}
}
t := time.Now()
for name, value := range values {
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": (value * 100.0) / total}, t)
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value * 100}, now)
if err == nil {
output <- y
}
@@ -117,6 +132,9 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
return
}
num_cpus := 0
now := time.Now()
tsdelta := now.Sub(m.lastTimestamp)
file, err := os.Open(string(CPUSTATFILE))
if err != nil {
cclog.ComponentError(m.name, err.Error())
@@ -128,9 +146,9 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
line := scanner.Text()
linefields := strings.Fields(line)
if strings.Compare(linefields[0], "cpu") == 0 {
m.parseStatLine(linefields, m.nodetags, output)
m.parseStatLine(linefields, m.nodetags, output, now, tsdelta)
} else if strings.HasPrefix(linefields[0], "cpu") {
m.parseStatLine(linefields, m.cputags[linefields[0]], output)
m.parseStatLine(linefields, m.cputags[linefields[0]], output, now, tsdelta)
num_cpus++
}
}
@@ -139,11 +157,13 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
m.nodetags,
m.meta,
map[string]interface{}{"value": int(num_cpus)},
time.Now(),
now,
)
if err == nil {
output <- num_cpus_metric
}
m.lastTimestamp = now
}
func (m *CpustatCollector) Close() {

View File

@@ -3,8 +3,8 @@ package collectors
import (
"encoding/json"
"errors"
"io/ioutil"
"log"
"os"
"os/exec"
"strings"
"time"
@@ -53,7 +53,7 @@ func (m *CustomCmdCollector) Init(config json.RawMessage) error {
}
}
for _, f := range m.config.Files {
_, err = ioutil.ReadFile(f)
_, err = os.ReadFile(f)
if err == nil {
m.files = append(m.files, f)
} else {
@@ -106,7 +106,7 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetri
}
}
for _, file := range m.files {
buffer, err := ioutil.ReadFile(file)
buffer, err := os.ReadFile(file)
if err != nil {
log.Print(err)
return

View File

@@ -5,7 +5,7 @@ import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"io"
"log"
"os/exec"
"os/user"
@@ -118,8 +118,8 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
cmd.Stderr = cmdStderr
err := cmd.Run()
if err != nil {
dataStdErr, _ := ioutil.ReadAll(cmdStderr)
dataStdOut, _ := ioutil.ReadAll(cmdStdout)
dataStdErr, _ := io.ReadAll(cmdStderr)
dataStdOut, _ := io.ReadAll(cmdStdout)
cclog.ComponentError(
m.name,
fmt.Sprintf("Read(): Failed to execute command \"%s\": %v\n", cmd.String(), err),

View File

@@ -2,7 +2,6 @@ package collectors
import (
"fmt"
"io/ioutil"
"os"
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
@@ -19,8 +18,9 @@ import (
const IB_BASEPATH = "/sys/class/infiniband/"
type InfinibandCollectorMetric struct {
path string
unit string
path string
unit string
scale int64
}
type InfinibandCollectorInfo struct {
@@ -84,7 +84,7 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
for _, path := range ibDirs {
// Skip, when no LID is assigned
line, err := ioutil.ReadFile(filepath.Join(path, "lid"))
line, err := os.ReadFile(filepath.Join(path, "lid"))
if err != nil {
continue
}
@@ -113,10 +113,10 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
// Check access to counter files
countersDir := filepath.Join(path, "counters")
portCounterFiles := map[string]InfinibandCollectorMetric{
"ib_recv": {path: filepath.Join(countersDir, "port_rcv_data"), unit: "bytes"},
"ib_xmit": {path: filepath.Join(countersDir, "port_xmit_data"), unit: "bytes"},
"ib_recv_pkts": {path: filepath.Join(countersDir, "port_rcv_packets"), unit: "packets"},
"ib_xmit_pkts": {path: filepath.Join(countersDir, "port_xmit_packets"), unit: "packets"},
"ib_recv": {path: filepath.Join(countersDir, "port_rcv_data"), unit: "bytes", scale: 4},
"ib_xmit": {path: filepath.Join(countersDir, "port_xmit_data"), unit: "bytes", scale: 4},
"ib_recv_pkts": {path: filepath.Join(countersDir, "port_rcv_packets"), unit: "packets", scale: 1},
"ib_xmit_pkts": {path: filepath.Join(countersDir, "port_xmit_packets"), unit: "packets", scale: 1},
}
for _, counter := range portCounterFiles {
err := unix.Access(counter.path, unix.R_OK)
@@ -174,7 +174,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
for counterName, counterDef := range info.portCounterFiles {
// Read counter file
line, err := ioutil.ReadFile(counterDef.path)
line, err := os.ReadFile(counterDef.path)
if err != nil {
cclog.ComponentError(
m.name,
@@ -191,6 +191,8 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
fmt.Sprintf("Read(): Failed to convert Infininiband metrice %s='%s' to int64: %v", counterName, data, err))
continue
}
// Scale raw value
v *= counterDef.scale
// Send absolut values
if m.config.SendAbsoluteValues {

View File

@@ -12,7 +12,6 @@ import (
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"math"
"os"
"os/signal"
@@ -154,12 +153,13 @@ func getBaseFreq() float64 {
}
var freq float64 = math.NaN()
for _, f := range files {
buffer, err := ioutil.ReadFile(f)
buffer, err := os.ReadFile(f)
if err == nil {
data := strings.Replace(string(buffer), "\n", "", -1)
x, err := strconv.ParseInt(data, 0, 64)
if err == nil {
freq = float64(x) * 1e6
freq = float64(x)
break
}
}
}
@@ -168,11 +168,11 @@ func getBaseFreq() float64 {
C.power_init(0)
info := C.get_powerInfo()
if float64(info.baseFrequency) != 0 {
freq = float64(info.baseFrequency) * 1e6
freq = float64(info.baseFrequency)
}
C.power_finalize()
}
return freq
return freq * 1e3
}
func (m *LikwidCollector) Init(config json.RawMessage) error {

View File

@@ -7,6 +7,9 @@ The `likwid` collector is probably the most complicated collector. The LIKWID li
"likwid": {
"force_overwrite" : false,
"invalid_to_zero" : false,
"liblikwid_path" : "/path/to/liblikwid.so",
"accessdaemon_path" : "/folder/that/contains/likwid-accessD",
"access_mode" : "direct or accessdaemon or perf_event",
"eventsets": [
{
"events" : {

View File

@@ -3,7 +3,7 @@ package collectors
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"strconv"
"strings"
"time"
@@ -72,7 +72,7 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
if !m.init {
return
}
buffer, err := ioutil.ReadFile(LOADAVGFILE)
buffer, err := os.ReadFile(LOADAVGFILE)
if err != nil {
if err != nil {
cclog.ComponentError(

View File

@@ -68,7 +68,8 @@ func getStats(filename string) map[string]MemstatStats {
} else if len(linefields) == 5 {
v, err := strconv.ParseFloat(linefields[3], 64)
if err == nil {
stats[strings.Trim(linefields[0], ":")] = MemstatStats{
cclog.ComponentDebug("getStats", strings.Trim(linefields[2], ":"), v, linefields[4])
stats[strings.Trim(linefields[2], ":")] = MemstatStats{
value: v,
unit: linefields[4],
}
@@ -160,7 +161,6 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
if !m.init {
cclog.ComponentPrint(m.name, "Here")
return
}
@@ -188,16 +188,20 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
unit := ""
if totalVal, total := stats["MemTotal"]; total {
if freeVal, free := stats["MemFree"]; free {
memUsed = totalVal.value - freeVal.value
if len(totalVal.unit) > 0 {
unit = totalVal.unit
} else if len(freeVal.unit) > 0 {
unit = freeVal.unit
}
if bufVal, buffers := stats["Buffers"]; buffers {
memUsed -= bufVal.value
if len(bufVal.unit) > 0 && len(unit) == 0 {
unit = bufVal.unit
}
if cacheVal, cached := stats["Cached"]; cached {
memUsed = totalVal.value - (freeVal.value + bufVal.value + cacheVal.value)
if len(totalVal.unit) > 0 {
unit = totalVal.unit
} else if len(freeVal.unit) > 0 {
unit = freeVal.unit
} else if len(bufVal.unit) > 0 {
unit = bufVal.unit
} else if len(cacheVal.unit) > 0 {
memUsed -= cacheVal.value
if len(cacheVal.unit) > 0 && len(unit) == 0 {
unit = cacheVal.unit
}
}

View File

@@ -66,14 +66,14 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
ret := rocm_smi.Init()
if ret != rocm_smi.STATUS_SUCCESS {
err = errors.New("Failed to initialize ROCm SMI library")
err = errors.New("failed to initialize ROCm SMI library")
cclog.ComponentError(m.name, err.Error())
return err
}
numDevs, ret := rocm_smi.NumMonitorDevices()
if ret != rocm_smi.STATUS_SUCCESS {
err = errors.New("Failed to get number of GPUs from ROCm SMI library")
err = errors.New("failed to get number of GPUs from ROCm SMI library")
cclog.ComponentError(m.name, err.Error())
return err
}
@@ -98,14 +98,14 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
}
device, ret := rocm_smi.DeviceGetHandleByIndex(i)
if ret != rocm_smi.STATUS_SUCCESS {
err = fmt.Errorf("Failed to get handle for GPU %d", i)
err = fmt.Errorf("failed to get handle for GPU %d", i)
cclog.ComponentError(m.name, err.Error())
return err
}
pciInfo, ret := rocm_smi.DeviceGetPciInfo(device)
if ret != rocm_smi.STATUS_SUCCESS {
err = fmt.Errorf("Failed to get PCI information for GPU %d", i)
err = fmt.Errorf("failed to get PCI information for GPU %d", i)
cclog.ComponentError(m.name, err.Error())
return err
}

View File

@@ -0,0 +1,155 @@
package collectors
import (
"encoding/json"
"fmt"
"bufio"
"time"
"os"
"strings"
"strconv"
"math"
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
)
const SCHEDSTATFILE = `/proc/schedstat`
// These are the fields we read from the JSON configuration
type SchedstatCollectorConfig struct {
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
}
// This contains all variables we need during execution and the variables
// defined by metricCollector (name, init, ...)
type SchedstatCollector struct {
metricCollector
config SchedstatCollectorConfig // the configuration structure
lastTimestamp time.Time // Store time stamp of last tick to derive values
meta map[string]string // default meta information
cputags map[string]map[string]string // default tags
olddata map[string]map[string]int64 // default tags
}
// Functions to implement MetricCollector interface
// Init(...), Read(...), Close()
// See: metricCollector.go
// Init initializes the sample collector
// Called once by the collector manager
// All tags, meta data tags and metrics that do not change over the runtime should be set here
func (m *SchedstatCollector) Init(config json.RawMessage) error {
var err error = nil
// Always set the name early in Init() to use it in cclog.Component* functions
m.name = "SchedstatCollector"
// This is for later use, also call it early
m.setup()
// Tell whether the collector should be run in parallel with others (reading files, ...)
// or it should be run serially, mostly for collectors acutally doing measurements
// because they should not measure the execution of the other collectors
m.parallel = true
// Define meta information sent with each metric
// (Can also be dynamic or this is the basic set with extension through AddMeta())
m.meta = map[string]string{"source": m.name, "group": "SCHEDSTAT"}
// Read in the JSON configuration
if len(config) > 0 {
err = json.Unmarshal(config, &m.config)
if err != nil {
cclog.ComponentError(m.name, "Error reading config:", err.Error())
return err
}
}
// Check input file
file, err := os.Open(string(SCHEDSTATFILE))
if err != nil {
cclog.ComponentError(m.name, err.Error())
}
defer file.Close()
// Pre-generate tags for all CPUs
num_cpus := 0
m.cputags = make(map[string]map[string]string)
m.olddata = make(map[string]map[string]int64)
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
linefields := strings.Fields(line)
if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
cpustr := strings.TrimLeft(linefields[0], "cpu")
cpu, _ := strconv.Atoi(cpustr)
running, _ := strconv.ParseInt(linefields[7], 10, 64)
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
m.cputags[linefields[0]] = map[string]string{"type": "hwthread", "type-id": fmt.Sprintf("%d", cpu)}
m.olddata[linefields[0]] = map[string]int64{"running" : running, "waiting" : waiting}
num_cpus++
}
}
// Save current timestamp
m.lastTimestamp = time.Now()
// Set this flag only if everything is initialized properly, all required files exist, ...
m.init = true
return err
}
func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]string, output chan lp.CCMetric, now time.Time, tsdelta time.Duration) {
running, _ := strconv.ParseInt(linefields[7], 10, 64)
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
diff_running := running - m.olddata[linefields[0]]["running"]
diff_waiting := waiting - m.olddata[linefields[0]]["waiting"]
var l_running float64 = float64(diff_running) / tsdelta.Seconds() / (math.Pow(1000, 3))
var l_waiting float64 = float64(diff_waiting) / tsdelta.Seconds() / (math.Pow(1000, 3))
m.olddata[linefields[0]]["running"] = running
m.olddata[linefields[0]]["waiting"] = waiting
value := l_running + l_waiting
y, err := lp.New("cpu_load_core", tags, m.meta, map[string]interface{}{"value": value}, now)
if err == nil {
// Send it to output channel
output <- y
}
}
// Read collects all metrics belonging to the sample collector
// and sends them through the output channel to the collector manager
func (m *SchedstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
if !m.init {
return
}
//timestamps
now := time.Now()
tsdelta := now.Sub(m.lastTimestamp)
file, err := os.Open(string(SCHEDSTATFILE))
if err != nil {
cclog.ComponentError(m.name, err.Error())
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
linefields := strings.Fields(line)
if strings.HasPrefix(linefields[0], "cpu") {
m.ParseProcLine(linefields, m.cputags[linefields[0]], output, now, tsdelta)
}
}
m.lastTimestamp = now
}
// Close metric collector: close network connection, close files, close libraries, ...
// Called once by the collector manager
func (m *SchedstatCollector) Close() {
// Unset flag
m.init = false
}

View File

@@ -0,0 +1,11 @@
## `schedstat` collector
```json
"schedstat": {
}
```
The `schedstat` collector reads data from /proc/schedstat and calculates a load value, separated by hwthread. This might be useful to detect bad cpu pinning on shared nodes etc.
Metric:
* `cpu_load_core`

View File

@@ -3,7 +3,7 @@ package collectors
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
@@ -83,14 +83,14 @@ func (m *TempCollector) Init(config json.RawMessage) error {
// sensor name
nameFile := filepath.Join(filepath.Dir(file), "name")
name, err := ioutil.ReadFile(nameFile)
name, err := os.ReadFile(nameFile)
if err == nil {
sensor.name = strings.TrimSpace(string(name))
}
// sensor label
labelFile := strings.TrimSuffix(file, "_input") + "_label"
label, err := ioutil.ReadFile(labelFile)
label, err := os.ReadFile(labelFile)
if err == nil {
sensor.label = strings.TrimSpace(string(label))
}
@@ -117,7 +117,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
}
// Sensor file
_, err = ioutil.ReadFile(file)
_, err = os.ReadFile(file)
if err != nil {
continue
}
@@ -139,7 +139,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
// max temperature
if m.config.ReportMaxTemp {
maxTempFile := strings.TrimSuffix(file, "_input") + "_max"
if buffer, err := ioutil.ReadFile(maxTempFile); err == nil {
if buffer, err := os.ReadFile(maxTempFile); err == nil {
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
sensor.maxTempName = strings.Replace(sensor.metricName, "temp", "max_temp", 1)
sensor.maxTemp = x / 1000
@@ -150,7 +150,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
// critical temperature
if m.config.ReportCriticalTemp {
criticalTempFile := strings.TrimSuffix(file, "_input") + "_crit"
if buffer, err := ioutil.ReadFile(criticalTempFile); err == nil {
if buffer, err := os.ReadFile(criticalTempFile); err == nil {
if x, err := strconv.ParseInt(strings.TrimSpace(string(buffer)), 10, 64); err == nil {
sensor.critTempName = strings.Replace(sensor.metricName, "temp", "crit_temp", 1)
sensor.critTemp = x / 1000
@@ -175,7 +175,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
for _, sensor := range m.sensors {
// Read sensor file
buffer, err := ioutil.ReadFile(sensor.file)
buffer, err := os.ReadFile(sensor.file)
if err != nil {
cclog.ComponentError(
m.name,