cc-metric-collector/collectors/loadavgMetric.go
Thomas Gruber 8d85bd53f1
Merge latest development changes to main branch (#79)
* Cleanup: Remove unused code

* Use Golang duration parser for 'interval' and 'duration'
 in main config

* Update handling of LIKWID headers. Download only if not already present in the system. Fixes #73

* Units with cc-units (#64)

* Add option to normalize units with cc-unit

* Add unit conversion to router

* Add option to change unit prefix in the router

* Add to MetricRouter README

* Add order of operations in router to README

* Use second add_tags/del_tags only if metric gets renamed

* Skip disks in DiskstatCollector that have size=0

* Check readability of sensor files in TempCollector

* Fix for --once option

* Rename `cpu` type to `hwthread` (#69)

* Rename 'cpu' type to 'hwthread' to avoid naming clashes with MetricStore and CC-Webfrontend

* Collectors in parallel (#74)

* Provide info to CollectorManager whether the collector can be executed in parallel with others

* Split serial and parallel collectors. Read in parallel first

* Update NvidiaCollector with new metrics, MIG and NvLink support (#75)

* CC topology module update (#76)

* Rename CPU to hardware thread, write some comments

* Do renaming in other parts

* Remove CpuList and SocketList function from metricCollector. Available in ccTopology

* Option to use MIG UUID as subtype-id in NvidiaCollector

* Option to use MIG slice name as subtype-id in NvidiaCollector

* MetricRouter: Fix JSON in README

* Fix for Github Action to really use the selected version

* Remove Ganglia installation in runonce Action and add Go 1.18

* Fix daemon options in init script

* Add separate go.mod files to use it with deprecated 1.16

* Minor updates for Makefiles

* fix string comparison

* AMD ROCm SMI collector (#77)

* Add collector for AMD ROCm SMI metrics

* Fix import path

* Fix imports

* Remove Board Number

* store GPU index explicitly

* Remove board number from description

* Use http instead of ftp to download likwid

* Fix serial number in rocmCollector

* Improved http sink (#78)

* automatic flush in NatsSink

* tweak default options of HttpSink

* shorter cirt. section and retries for HttpSink

* fix error handling

* Remove file added by mistake.

* Use http instead of ftp to download likwid

* Fix serial number in rocmCollector

Co-authored-by: Thomas Roehl <thomas.roehl@fau.de>

Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
Co-authored-by: Lou <lou.knauer@gmx.de>
2022-06-08 15:25:40 +02:00

129 lines
2.8 KiB
Go

package collectors
import (
"encoding/json"
"fmt"
"io/ioutil"
"strconv"
"strings"
"time"
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
)
//
// LoadavgCollector collects:
// * load average of last 1, 5 & 15 minutes
// * number of processes currently runnable
// * total number of processes in system
//
// See: https://www.kernel.org/doc/html/latest/filesystems/proc.html
//
const LOADAVGFILE = "/proc/loadavg"
type LoadavgCollector struct {
metricCollector
tags map[string]string
load_matches []string
load_skips []bool
proc_matches []string
proc_skips []bool
config struct {
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
}
}
func (m *LoadavgCollector) Init(config json.RawMessage) error {
m.name = "LoadavgCollector"
m.parallel = true
m.setup()
if len(config) > 0 {
err := json.Unmarshal(config, &m.config)
if err != nil {
return err
}
}
m.meta = map[string]string{
"source": m.name,
"group": "LOAD"}
m.tags = map[string]string{"type": "node"}
m.load_matches = []string{
"load_one",
"load_five",
"load_fifteen"}
m.load_skips = make([]bool, len(m.load_matches))
m.proc_matches = []string{
"proc_run",
"proc_total"}
m.proc_skips = make([]bool, len(m.proc_matches))
for i, name := range m.load_matches {
_, m.load_skips[i] = stringArrayContains(m.config.ExcludeMetrics, name)
}
for i, name := range m.proc_matches {
_, m.proc_skips[i] = stringArrayContains(m.config.ExcludeMetrics, name)
}
m.init = true
return nil
}
func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric) {
if !m.init {
return
}
buffer, err := ioutil.ReadFile(LOADAVGFILE)
if err != nil {
if err != nil {
cclog.ComponentError(
m.name,
fmt.Sprintf("Read(): Failed to read file '%s': %v", LOADAVGFILE, err))
}
return
}
now := time.Now()
// Load metrics
ls := strings.Split(string(buffer), ` `)
for i, name := range m.load_matches {
x, err := strconv.ParseFloat(ls[i], 64)
if err != nil {
cclog.ComponentError(
m.name,
fmt.Sprintf("Read(): Failed to convert '%s' to float64: %v", ls[i], err))
continue
}
if m.load_skips[i] {
continue
}
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
if err == nil {
output <- y
}
}
// Process metrics
lv := strings.Split(ls[3], `/`)
for i, name := range m.proc_matches {
x, err := strconv.ParseInt(lv[i], 10, 64)
if err != nil {
cclog.ComponentError(
m.name,
fmt.Sprintf("Read(): Failed to convert '%s' to float64: %v", lv[i], err))
continue
}
if m.proc_skips[i] {
continue
}
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
if err == nil {
output <- y
}
}
}
func (m *LoadavgCollector) Close() {
m.init = false
}