mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-01-13 15:49:06 +01:00
Add collector for monitoring the execution of cc-metric-collector itself (#81)
* Add collector to monitor execution of cc-metric-collector itself * Register SelfCollector * Fix import paths for moved packages
This commit is contained in:
parent
4bd71224df
commit
9ae0806aa9
@ -37,6 +37,7 @@ var AvailableCollectors = map[string]MetricCollector{
|
|||||||
"beegfs_meta": new(BeegfsMetaCollector),
|
"beegfs_meta": new(BeegfsMetaCollector),
|
||||||
"beegfs_storage": new(BeegfsStorageCollector),
|
"beegfs_storage": new(BeegfsStorageCollector),
|
||||||
"rocm_smi": new(RocmSmiCollector),
|
"rocm_smi": new(RocmSmiCollector),
|
||||||
|
"self": new(SelfCollector),
|
||||||
"schedstat": new(SchedstatCollector),
|
"schedstat": new(SchedstatCollector),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
144
collectors/selfMetric.go
Normal file
144
collectors/selfMetric.go
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
package collectors
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"runtime"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SelfCollectorConfig struct {
|
||||||
|
MemStats bool `json:"read_mem_stats"`
|
||||||
|
GoRoutines bool `json:"read_goroutines"`
|
||||||
|
CgoCalls bool `json:"read_cgo_calls"`
|
||||||
|
Rusage bool `json:"read_rusage"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type SelfCollector struct {
|
||||||
|
metricCollector
|
||||||
|
config SelfCollectorConfig // the configuration structure
|
||||||
|
meta map[string]string // default meta information
|
||||||
|
tags map[string]string // default tags
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SelfCollector) Init(config json.RawMessage) error {
|
||||||
|
var err error = nil
|
||||||
|
m.name = "SelfCollector"
|
||||||
|
m.setup()
|
||||||
|
m.parallel = true
|
||||||
|
m.meta = map[string]string{"source": m.name, "group": "Self"}
|
||||||
|
m.tags = map[string]string{"type": "node"}
|
||||||
|
if len(config) > 0 {
|
||||||
|
err = json.Unmarshal(config, &m.config)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.init = true
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SelfCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
|
timestamp := time.Now()
|
||||||
|
|
||||||
|
if m.config.MemStats {
|
||||||
|
var memstats runtime.MemStats
|
||||||
|
runtime.ReadMemStats(&memstats)
|
||||||
|
|
||||||
|
y, err := lp.New("total_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.TotalAlloc}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapAlloc}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_sys", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapSys}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_idle", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapIdle}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_inuse", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapInuse}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_released", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapReleased}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "Bytes")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("heap_objects", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapObjects}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.config.GoRoutines {
|
||||||
|
y, err := lp.New("num_goroutines", m.tags, m.meta, map[string]interface{}{"value": runtime.NumGoroutine()}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.config.CgoCalls {
|
||||||
|
y, err := lp.New("num_cgo_calls", m.tags, m.meta, map[string]interface{}{"value": runtime.NumCgoCall()}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.config.Rusage {
|
||||||
|
var rusage syscall.Rusage
|
||||||
|
err := syscall.Getrusage(syscall.RUSAGE_SELF, &rusage)
|
||||||
|
if err == nil {
|
||||||
|
sec, nsec := rusage.Utime.Unix()
|
||||||
|
t := float64(sec) + (float64(nsec) * 1e-9)
|
||||||
|
y, err := lp.New("rusage_user_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "seconds")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
sec, nsec = rusage.Stime.Unix()
|
||||||
|
t = float64(sec) + (float64(nsec) * 1e-9)
|
||||||
|
y, err = lp.New("rusage_system_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
y.AddMeta("unit", "seconds")
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_vol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nvcsw}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_invol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nivcsw}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_signals", m.tags, m.meta, map[string]interface{}{"value": rusage.Nsignals}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_major_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Majflt}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
y, err = lp.New("rusage_minor_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Minflt}, timestamp)
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SelfCollector) Close() {
|
||||||
|
m.init = false
|
||||||
|
}
|
34
collectors/selfMetric.md
Normal file
34
collectors/selfMetric.md
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
## `self` collector
|
||||||
|
|
||||||
|
```json
|
||||||
|
"self": {
|
||||||
|
"read_mem_stats" : true,
|
||||||
|
"read_goroutines" : true,
|
||||||
|
"read_cgo_calls" : true,
|
||||||
|
"read_rusage" : true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `self` collector reads the data from the `runtime` and `syscall` packages, so monitors the execution of the cc-metric-collector itself.
|
||||||
|
|
||||||
|
Metrics:
|
||||||
|
* If `read_mem_stats == true`:
|
||||||
|
* `total_alloc`: The metric reports cumulative bytes allocated for heap objects.
|
||||||
|
* `heap_alloc`: The metric reports bytes of allocated heap objects.
|
||||||
|
* `heap_sys`: The metric reports bytes of heap memory obtained from the OS.
|
||||||
|
* `heap_idle`: The metric reports bytes in idle (unused) spans.
|
||||||
|
* `heap_inuse`: The metric reports bytes in in-use spans.
|
||||||
|
* `heap_released`: The metric reports bytes of physical memory returned to the OS.
|
||||||
|
* `heap_objects`: The metric reports the number of allocated heap objects.
|
||||||
|
* If `read_goroutines == true`:
|
||||||
|
* `num_goroutines`: The metric reports the number of goroutines that currently exist.
|
||||||
|
* If `read_cgo_calls == true`:
|
||||||
|
* `num_cgo_calls`: The metric reports the number of cgo calls made by the current process.
|
||||||
|
* If `read_rusage == true`:
|
||||||
|
* `rusage_user_time`: The metric reports the amount of time that this process has been scheduled in user mode.
|
||||||
|
* `rusage_system_time`: The metric reports the amount of time that this process has been scheduled in kernel mode.
|
||||||
|
* `rusage_vol_ctx_switch`: The metric reports the amount of voluntary context switches.
|
||||||
|
* `rusage_invol_ctx_switch`: The metric reports the amount of involuntary context switches.
|
||||||
|
* `rusage_signals`: The metric reports the number of signals received.
|
||||||
|
* `rusage_major_pgfaults`: The metric reports the number of major faults the process has made which have required loading a memory page from disk.
|
||||||
|
* `rusage_minor_pgfaults`: The metric reports the number of minor faults the process has made which have not required loading a memory page from disk.
|
Loading…
Reference in New Issue
Block a user