cpustatMetric.go: Use derived values instead of absolute values (#83)

* cpustatMetric.go: Use derived values instead of absolute values

  The values in /proc/stat are absolute counters related to the boot
  time of the system. To obtain a utilization of the CPU, the changes
  in the counters must be derived according to time. To take only the
  absolute values leads to the fact that changes in the utilization,
  straight with larger values, do not become visible.

* Add new collector for /proc/schedstat

  The `schedstat` collector reads data from /proc/schedstat and calculates
  a load value, separated by hwthread. This might be useful to detect bad
  cpu pinning on shared nodes etc.

Co-authored-by: Michael Schwarz <post@michael-schwarz.name>
This commit is contained in:
oscarminus
2022-09-07 14:09:29 +02:00
committed by Thomas Roehl
parent 503705d442
commit 8a3446a596
4 changed files with 197 additions and 10 deletions

View File

@@ -11,6 +11,7 @@ import (
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
sysconf "github.com/tklauser/go-sysconf"
)
const CPUSTATFILE = `/proc/stat`
@@ -22,9 +23,11 @@ type CpustatCollectorConfig struct {
type CpustatCollector struct {
metricCollector
config CpustatCollectorConfig
lastTimestamp time.Time // Store time stamp of last tick to derive values
matches map[string]int
cputags map[string]map[string]string
nodetags map[string]string
olddata map[string]map[string]int64
}
func (m *CpustatCollector) Init(config json.RawMessage) error {
@@ -76,36 +79,48 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
// Pre-generate tags for all CPUs
num_cpus := 0
m.cputags = make(map[string]map[string]string)
m.olddata = make(map[string]map[string]int64)
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
linefields := strings.Fields(line)
if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
if strings.Compare(linefields[0], "cpu") == 0 {
m.olddata["cpu"] = make(map[string]int64)
for k, v := range m.matches {
m.olddata["cpu"][k], _ = strconv.ParseInt(linefields[v], 0, 64)
}
} else if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
cpustr := strings.TrimLeft(linefields[0], "cpu")
cpu, _ := strconv.Atoi(cpustr)
m.cputags[linefields[0]] = map[string]string{"type": "hwthread", "type-id": fmt.Sprintf("%d", cpu)}
m.olddata[linefields[0]] = make(map[string]int64)
for k, v := range m.matches {
m.olddata[linefields[0]][k], _ = strconv.ParseInt(linefields[v], 0, 64)
}
num_cpus++
}
}
m.lastTimestamp = time.Now()
m.init = true
return nil
}
func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]string, output chan lp.CCMetric) {
func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]string, output chan lp.CCMetric, now time.Time, tsdelta time.Duration) {
values := make(map[string]float64)
total := 0.0
clktck, _ := sysconf.Sysconf(sysconf.SC_CLK_TCK)
for match, index := range m.matches {
if len(match) > 0 {
x, err := strconv.ParseInt(linefields[index], 0, 64)
if err == nil {
values[match] = float64(x)
total += values[match]
vdiff := x - m.olddata[linefields[0]][match]
m.olddata[linefields[0]][match] = x // Store new value for next run
values[match] = float64(vdiff) / float64(tsdelta.Seconds()) / float64(clktck)
}
}
}
t := time.Now()
for name, value := range values {
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": (value * 100.0) / total}, t)
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value * 100}, now)
if err == nil {
output <- y
}
@@ -117,6 +132,9 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
return
}
num_cpus := 0
now := time.Now()
tsdelta := now.Sub(m.lastTimestamp)
file, err := os.Open(string(CPUSTATFILE))
if err != nil {
cclog.ComponentError(m.name, err.Error())
@@ -128,9 +146,9 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
line := scanner.Text()
linefields := strings.Fields(line)
if strings.Compare(linefields[0], "cpu") == 0 {
m.parseStatLine(linefields, m.nodetags, output)
m.parseStatLine(linefields, m.nodetags, output, now, tsdelta)
} else if strings.HasPrefix(linefields[0], "cpu") {
m.parseStatLine(linefields, m.cputags[linefields[0]], output)
m.parseStatLine(linefields, m.cputags[linefields[0]], output, now, tsdelta)
num_cpus++
}
}
@@ -139,11 +157,13 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
m.nodetags,
m.meta,
map[string]interface{}{"value": int(num_cpus)},
time.Now(),
now,
)
if err == nil {
output <- num_cpus_metric
}
m.lastTimestamp = now
}
func (m *CpustatCollector) Close() {