mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2024-11-10 04:27:25 +01:00
be20f956c2
* InfiniBandCollector: Scale raw readings from octets to bytes * Fix clock frequency coming from LikwidCollector and update docs * Build DEB package for Ubuntu 20.04 for releases * Fix memstat collector with numa_stats option * Remove useless prints from MemstatCollector * Replace ioutils with os and io (#87) * Use lower case for error strings in RocmSmiCollector * move maybe-usable-by-other-cc-components to pkg. Fix all files to use the new paths (#88) * Add collector for monitoring the execution of cc-metric-collector itself (#81) * Add collector to monitor execution of cc-metric-collector itself * Register SelfCollector * Fix import paths for moved packages
349 lines
10 KiB
Go
349 lines
10 KiB
Go
package metricAggregator
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math"
|
|
"os"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
|
|
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
|
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
|
|
|
"github.com/PaesslerAG/gval"
|
|
)
|
|
|
|
type MetricAggregatorIntervalConfig struct {
|
|
Name string `json:"name"` // Metric name for the new metric
|
|
Function string `json:"function"` // Function to apply on the metric
|
|
Condition string `json:"if"` // Condition for applying function
|
|
Tags map[string]string `json:"tags"` // Tags for the new metric
|
|
Meta map[string]string `json:"meta"` // Meta information for the new metric
|
|
gvalCond gval.Evaluable
|
|
gvalFunc gval.Evaluable
|
|
}
|
|
|
|
type metricAggregator struct {
|
|
functions []*MetricAggregatorIntervalConfig
|
|
constants map[string]interface{}
|
|
language gval.Language
|
|
output chan lp.CCMetric
|
|
}
|
|
|
|
type MetricAggregator interface {
|
|
AddAggregation(name, function, condition string, tags, meta map[string]string) error
|
|
DeleteAggregation(name string) error
|
|
Init(output chan lp.CCMetric) error
|
|
Eval(starttime time.Time, endtime time.Time, metrics []lp.CCMetric)
|
|
}
|
|
|
|
var metricCacheLanguage = gval.NewLanguage(
|
|
gval.Base(),
|
|
gval.Function("sum", sumfunc),
|
|
gval.Function("min", minfunc),
|
|
gval.Function("avg", avgfunc),
|
|
gval.Function("mean", avgfunc),
|
|
gval.Function("max", maxfunc),
|
|
gval.Function("len", lenfunc),
|
|
gval.Function("median", medianfunc),
|
|
gval.InfixOperator("in", infunc),
|
|
gval.Function("match", matchfunc),
|
|
gval.Function("getCpuCore", getCpuCoreFunc),
|
|
gval.Function("getCpuSocket", getCpuSocketFunc),
|
|
gval.Function("getCpuNuma", getCpuNumaDomainFunc),
|
|
gval.Function("getCpuDie", getCpuDieFunc),
|
|
gval.Function("getSockCpuList", getCpuListOfSocketFunc),
|
|
gval.Function("getNumaCpuList", getCpuListOfNumaDomainFunc),
|
|
gval.Function("getDieCpuList", getCpuListOfDieFunc),
|
|
gval.Function("getCoreCpuList", getCpuListOfCoreFunc),
|
|
gval.Function("getCpuList", getCpuListOfNode),
|
|
gval.Function("getCpuListOfType", getCpuListOfType),
|
|
)
|
|
var language gval.Language = gval.NewLanguage(
|
|
gval.Full(),
|
|
metricCacheLanguage,
|
|
)
|
|
var evaluables = struct {
|
|
mapping map[string]gval.Evaluable
|
|
mutex sync.Mutex
|
|
}{
|
|
mapping: make(map[string]gval.Evaluable),
|
|
}
|
|
|
|
func (c *metricAggregator) Init(output chan lp.CCMetric) error {
|
|
c.output = output
|
|
c.functions = make([]*MetricAggregatorIntervalConfig, 0)
|
|
c.constants = make(map[string]interface{})
|
|
|
|
// add constants like hostname, numSockets, ... to constants list
|
|
// Set hostname
|
|
hostname, err := os.Hostname()
|
|
if err != nil {
|
|
cclog.Error(err.Error())
|
|
return err
|
|
}
|
|
// Drop domain part of host name
|
|
c.constants["hostname"] = strings.SplitN(hostname, `.`, 2)[0]
|
|
cinfo := topo.CpuInfo()
|
|
c.constants["numHWThreads"] = cinfo.NumHWthreads
|
|
c.constants["numSockets"] = cinfo.NumSockets
|
|
c.constants["numNumaDomains"] = cinfo.NumNumaDomains
|
|
c.constants["numDies"] = cinfo.NumDies
|
|
c.constants["smtWidth"] = cinfo.SMTWidth
|
|
|
|
c.language = gval.NewLanguage(
|
|
gval.Full(),
|
|
metricCacheLanguage,
|
|
)
|
|
|
|
// Example aggregation function
|
|
// var f metricCacheFunctionConfig
|
|
// f.Name = "temp_cores_avg"
|
|
// //f.Condition = `"temp_core_" in name`
|
|
// f.Condition = `match("temp_core_%d+", metric.Name())`
|
|
// f.Function = `avg(values)`
|
|
// f.Tags = map[string]string{"type": "node"}
|
|
// f.Meta = map[string]string{"group": "IPMI", "unit": "degC", "source": "TempCollector"}
|
|
// c.functions = append(c.functions, &f)
|
|
return nil
|
|
}
|
|
|
|
func (c *metricAggregator) Eval(starttime time.Time, endtime time.Time, metrics []lp.CCMetric) {
|
|
vars := make(map[string]interface{})
|
|
for k, v := range c.constants {
|
|
vars[k] = v
|
|
}
|
|
vars["starttime"] = starttime
|
|
vars["endtime"] = endtime
|
|
for _, f := range c.functions {
|
|
cclog.ComponentDebug("MetricCache", "COLLECT", f.Name, "COND", f.Condition)
|
|
values := make([]float64, 0)
|
|
matches := make([]lp.CCMetric, 0)
|
|
for _, m := range metrics {
|
|
vars["metric"] = m
|
|
//value, err := gval.Evaluate(f.Condition, vars, c.language)
|
|
value, err := f.gvalCond.EvalBool(context.Background(), vars)
|
|
if err != nil {
|
|
cclog.ComponentError("MetricCache", "COLLECT", f.Name, "COND", f.Condition, ":", err.Error())
|
|
continue
|
|
}
|
|
if value {
|
|
v, valid := m.GetField("value")
|
|
if valid {
|
|
switch x := v.(type) {
|
|
case float64:
|
|
values = append(values, x)
|
|
case float32:
|
|
case int:
|
|
case int64:
|
|
values = append(values, float64(x))
|
|
case bool:
|
|
if x {
|
|
values = append(values, float64(1.0))
|
|
} else {
|
|
values = append(values, float64(0.0))
|
|
}
|
|
default:
|
|
cclog.ComponentError("MetricCache", "COLLECT ADD VALUE", v, "FAILED")
|
|
}
|
|
}
|
|
matches = append(matches, m)
|
|
}
|
|
}
|
|
delete(vars, "metric")
|
|
cclog.ComponentDebug("MetricCache", "EVALUATE", f.Name, "METRICS", len(values), "CALC", f.Function)
|
|
vars["values"] = values
|
|
vars["metrics"] = matches
|
|
if len(values) > 0 {
|
|
value, err := gval.Evaluate(f.Function, vars, c.language)
|
|
if err != nil {
|
|
cclog.ComponentError("MetricCache", "EVALUATE", f.Name, "METRICS", len(values), "CALC", f.Function, ":", err.Error())
|
|
break
|
|
}
|
|
|
|
copy_tags := func(tags map[string]string, metrics []lp.CCMetric) map[string]string {
|
|
out := make(map[string]string)
|
|
for key, value := range tags {
|
|
switch value {
|
|
case "<copy>":
|
|
for _, m := range metrics {
|
|
v, err := m.GetTag(key)
|
|
if err {
|
|
out[key] = v
|
|
}
|
|
}
|
|
default:
|
|
out[key] = value
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
copy_meta := func(meta map[string]string, metrics []lp.CCMetric) map[string]string {
|
|
out := make(map[string]string)
|
|
for key, value := range meta {
|
|
switch value {
|
|
case "<copy>":
|
|
for _, m := range metrics {
|
|
v, err := m.GetMeta(key)
|
|
if err {
|
|
out[key] = v
|
|
}
|
|
}
|
|
default:
|
|
out[key] = value
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
tags := copy_tags(f.Tags, matches)
|
|
meta := copy_meta(f.Meta, matches)
|
|
|
|
var m lp.CCMetric
|
|
switch t := value.(type) {
|
|
case float64:
|
|
m, err = lp.New(f.Name, tags, meta, map[string]interface{}{"value": t}, starttime)
|
|
case float32:
|
|
m, err = lp.New(f.Name, tags, meta, map[string]interface{}{"value": t}, starttime)
|
|
case int:
|
|
m, err = lp.New(f.Name, tags, meta, map[string]interface{}{"value": t}, starttime)
|
|
case int64:
|
|
m, err = lp.New(f.Name, tags, meta, map[string]interface{}{"value": t}, starttime)
|
|
case string:
|
|
m, err = lp.New(f.Name, tags, meta, map[string]interface{}{"value": t}, starttime)
|
|
default:
|
|
cclog.ComponentError("MetricCache", "Gval returned invalid type", t, "skipping metric", f.Name)
|
|
}
|
|
if err != nil {
|
|
cclog.ComponentError("MetricCache", "Cannot create metric from Gval result", value, ":", err.Error())
|
|
}
|
|
cclog.ComponentDebug("MetricCache", "SEND", m)
|
|
select {
|
|
case c.output <- m:
|
|
default:
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *metricAggregator) AddAggregation(name, function, condition string, tags, meta map[string]string) error {
|
|
// Since "" cannot be used inside of JSON strings, we use '' and replace them here because gval does not like ''
|
|
// but wants ""
|
|
newfunc := strings.ReplaceAll(function, "'", "\"")
|
|
newcond := strings.ReplaceAll(condition, "'", "\"")
|
|
gvalCond, err := gval.Full(metricCacheLanguage).NewEvaluable(newcond)
|
|
if err != nil {
|
|
cclog.ComponentError("MetricAggregator", "Cannot add aggregation, invalid if condition", newcond, ":", err.Error())
|
|
return err
|
|
}
|
|
gvalFunc, err := gval.Full(metricCacheLanguage).NewEvaluable(newfunc)
|
|
if err != nil {
|
|
cclog.ComponentError("MetricAggregator", "Cannot add aggregation, invalid function condition", newfunc, ":", err.Error())
|
|
return err
|
|
}
|
|
for _, agg := range c.functions {
|
|
if agg.Name == name {
|
|
agg.Name = name
|
|
agg.Condition = newcond
|
|
agg.Function = newfunc
|
|
agg.Tags = tags
|
|
agg.Meta = meta
|
|
agg.gvalCond = gvalCond
|
|
agg.gvalFunc = gvalFunc
|
|
return nil
|
|
}
|
|
}
|
|
agg := &MetricAggregatorIntervalConfig{
|
|
Name: name,
|
|
Condition: newcond,
|
|
gvalCond: gvalCond,
|
|
Function: newfunc,
|
|
gvalFunc: gvalFunc,
|
|
Tags: tags,
|
|
Meta: meta,
|
|
}
|
|
c.functions = append(c.functions, agg)
|
|
return nil
|
|
}
|
|
|
|
func (c *metricAggregator) DeleteAggregation(name string) error {
|
|
for i, agg := range c.functions {
|
|
if agg.Name == name {
|
|
copy(c.functions[i:], c.functions[i+1:])
|
|
c.functions[len(c.functions)-1] = nil
|
|
c.functions = c.functions[:len(c.functions)-1]
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("no aggregation for metric name %s", name)
|
|
}
|
|
|
|
func (c *metricAggregator) AddConstant(name string, value interface{}) {
|
|
c.constants[name] = value
|
|
}
|
|
|
|
func (c *metricAggregator) DelConstant(name string) {
|
|
delete(c.constants, name)
|
|
}
|
|
|
|
func (c *metricAggregator) AddFunction(name string, function func(args ...interface{}) (interface{}, error)) {
|
|
c.language = gval.NewLanguage(c.language, gval.Function(name, function))
|
|
}
|
|
|
|
func EvalBoolCondition(condition string, params map[string]interface{}) (bool, error) {
|
|
evaluables.mutex.Lock()
|
|
evaluable, ok := evaluables.mapping[condition]
|
|
evaluables.mutex.Unlock()
|
|
if !ok {
|
|
newcond :=
|
|
strings.ReplaceAll(
|
|
strings.ReplaceAll(
|
|
condition, "'", "\""), "%", "\\")
|
|
var err error
|
|
evaluable, err = language.NewEvaluable(newcond)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
evaluables.mutex.Lock()
|
|
evaluables.mapping[condition] = evaluable
|
|
evaluables.mutex.Unlock()
|
|
}
|
|
value, err := evaluable.EvalBool(context.Background(), params)
|
|
return value, err
|
|
}
|
|
|
|
func EvalFloat64Condition(condition string, params map[string]interface{}) (float64, error) {
|
|
evaluables.mutex.Lock()
|
|
evaluable, ok := evaluables.mapping[condition]
|
|
evaluables.mutex.Unlock()
|
|
if !ok {
|
|
newcond :=
|
|
strings.ReplaceAll(
|
|
strings.ReplaceAll(
|
|
condition, "'", "\""), "%", "\\")
|
|
var err error
|
|
evaluable, err = language.NewEvaluable(newcond)
|
|
if err != nil {
|
|
return math.NaN(), err
|
|
}
|
|
evaluables.mutex.Lock()
|
|
evaluables.mapping[condition] = evaluable
|
|
evaluables.mutex.Unlock()
|
|
}
|
|
value, err := evaluable.EvalFloat64(context.Background(), params)
|
|
return value, err
|
|
}
|
|
|
|
func NewAggregator(output chan lp.CCMetric) (MetricAggregator, error) {
|
|
a := new(metricAggregator)
|
|
err := a.Init(output)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return a, err
|
|
}
|