mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2024-12-26 15:29:04 +01:00
Split MetricRouter and MetricAggregator (#24)
* Split MetricRouter and MetricAggregator * Missing change in MetricCache * Add README for MetricAggregator
This commit is contained in:
parent
a016483012
commit
92d4a9c2b9
@ -24,7 +24,7 @@ import (
|
|||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
||||||
mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MetricScope string
|
type MetricScope string
|
||||||
@ -70,10 +70,10 @@ func GetAllMetricScopes() []MetricScope {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type LikwidCollectorMetricConfig struct {
|
type LikwidCollectorMetricConfig struct {
|
||||||
Name string `json:"name"` // Name of the metric
|
Name string `json:"name"` // Name of the metric
|
||||||
Calc string `json:"calc"` // Calculation for the metric using
|
Calc string `json:"calc"` // Calculation for the metric using
|
||||||
Aggr string `json:"aggregation"` // if scope unequal to LIKWID metric scope, the values are combined (sum, min, max, mean or avg, median)
|
//Aggr string `json:"aggregation"` // if scope unequal to LIKWID metric scope, the values are combined (sum, min, max, mean or avg, median)
|
||||||
Scope MetricScope `json:"scope"` // scope for calculation. subscopes are aggregated using the 'aggregation' function
|
Scope MetricScope `json:"scope"` // scope for calculation. subscopes are aggregated using the 'aggregation' function
|
||||||
Publish bool `json:"publish"`
|
Publish bool `json:"publish"`
|
||||||
granulatity MetricScope
|
granulatity MetricScope
|
||||||
}
|
}
|
||||||
@ -314,7 +314,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
for _, metric := range evset.Metrics {
|
for _, metric := range evset.Metrics {
|
||||||
// Try to evaluate the metric
|
// Try to evaluate the metric
|
||||||
_, err := mr.EvalFloat64Condition(metric.Calc, params)
|
_, err := agg.EvalFloat64Condition(metric.Calc, params)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
||||||
continue
|
continue
|
||||||
@ -343,7 +343,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
for _, metric := range m.config.Metrics {
|
for _, metric := range m.config.Metrics {
|
||||||
// Try to evaluate the global metric
|
// Try to evaluate the global metric
|
||||||
_, err := mr.EvalFloat64Condition(metric.Calc, globalParams)
|
_, err := agg.EvalFloat64Condition(metric.Calc, globalParams)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
||||||
continue
|
continue
|
||||||
@ -428,7 +428,7 @@ func (m *LikwidCollector) calcEventsetMetrics(group int, interval time.Duration,
|
|||||||
scopemap := m.scopeRespTids[metric.Scope]
|
scopemap := m.scopeRespTids[metric.Scope]
|
||||||
for domain, tid := range scopemap {
|
for domain, tid := range scopemap {
|
||||||
if tid >= 0 {
|
if tid >= 0 {
|
||||||
value, err := mr.EvalFloat64Condition(metric.Calc, m.results[group][tid])
|
value, err := agg.EvalFloat64Condition(metric.Calc, m.results[group][tid])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
||||||
continue
|
continue
|
||||||
@ -465,7 +465,7 @@ func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration, output chan
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Evaluate the metric
|
// Evaluate the metric
|
||||||
value, err := mr.EvalFloat64Condition(metric.Calc, params)
|
value, err := agg.EvalFloat64Condition(metric.Calc, params)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
||||||
continue
|
continue
|
||||||
|
38
internal/metricAggregator/README.md
Normal file
38
internal/metricAggregator/README.md
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# The MetricAggregator
|
||||||
|
|
||||||
|
In some cases, further combination of metrics or raw values is required. For that strings like `foo + 1` with runtime dependent `foo` need to be evaluated. The MetricAggregator relies on the [`gval`](https://github.com/PaesslerAG/gval) Golang package to perform all expression evaluation. The `gval` package provides the basic arithmetic operations but the MetricAggregator defines additional ones.
|
||||||
|
|
||||||
|
**Note**: To get an impression which expressions can be handled by `gval`, see its [README](https://github.com/PaesslerAG/gval/blob/master/README.md)
|
||||||
|
|
||||||
|
## Simple expression evaluation
|
||||||
|
|
||||||
|
For simple expression evaluation, the MetricAggregator provides two function for different use-cases:
|
||||||
|
- `EvalBoolCondition(expression string, params map[string]interface{}`: Used by the MetricRouter to match metrics like `metric.Name() == 'mymetric'`
|
||||||
|
- `EvalFloat64Condition(expression string, params map[string]interface{})`: Used by the MetricRouter and LikwidCollector to derive new values like `(PMC0+PMC1)/PMC3`
|
||||||
|
|
||||||
|
## MetricAggregator extensions for `gval`
|
||||||
|
|
||||||
|
The MetricAggregator provides these functions additional to the `Full` language in `gval`:
|
||||||
|
- `sum(array)`: Sum up values in an array like `sum(values)`
|
||||||
|
- `min(array)`: Get the minimum value in an array like `min(values)`
|
||||||
|
- `avg(array)`: Get the mean value in an array like `avg(values)`
|
||||||
|
- `mean(array)`: Get the mean value in an array like `mean(values)`
|
||||||
|
- `max(array)`: Get the maximum value in an array like `max(values)`
|
||||||
|
- `len(array)`: Get the length of an array like `len(values)`
|
||||||
|
- `median(array)`: Get the median value in an array like `mean(values)`
|
||||||
|
- `in`: Check existence in an array like `0 in getCpuList()` to check whether there is an entry `0`. Also substring matching works like `temp in metric.Name()`
|
||||||
|
- `match`: Regular-expression matching like `match('temp_cores_%d+', metric.Name())`. **Note** all `\` in an regex has to be replaced with `%`
|
||||||
|
- `getCpuCore(cpuid)`: For a CPU id, the the corresponding CPU core id like `getCpuCore(0)`
|
||||||
|
- `getCpuSocket(cpuid)`: For a CPU id, the the corresponding CPU socket id
|
||||||
|
- `getCpuNuma(cpuid)`: For a CPU id, the the corresponding NUMA domain id
|
||||||
|
- `getCpuDie(cpuid)`: For a CPU id, the the corresponding CPU die id
|
||||||
|
- `getSockCpuList(sockid)`: For a given CPU socket id, the list of CPU ids is returned like the CPUs on socket 1 `getSockCpuList(1)`
|
||||||
|
- `getNumaCpuList(numaid)`: For a given NUMA node id, the list of CPU ids is returned
|
||||||
|
- `getDieCpuList(dieid)`: For a given CPU die id, the list of CPU ids is returned
|
||||||
|
- `getCoreCpuList(coreid)`: For a given CPU core id, the list of CPU ids is returned
|
||||||
|
- `getCpuList`: Get the list of all CPUs
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
- Since the metrics are written in JSON files which do not allow `""` without proper escaping inside of JSON strings, you have to use `''` for strings.
|
||||||
|
- Since `\` is interpreted by JSON as escape character, it cannot be used in metrics. But it is required to write regular expressions. So instead of `/`, use `%` and the MetricAggregator replaces them after reading the JSON file.
|
@ -1,4 +1,4 @@
|
|||||||
package metricRouter
|
package metricAggregator
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
@ -16,7 +16,7 @@ import (
|
|||||||
"github.com/PaesslerAG/gval"
|
"github.com/PaesslerAG/gval"
|
||||||
)
|
)
|
||||||
|
|
||||||
type metricAggregatorIntervalConfig struct {
|
type MetricAggregatorIntervalConfig struct {
|
||||||
Name string `json:"name"` // Metric name for the new metric
|
Name string `json:"name"` // Metric name for the new metric
|
||||||
Function string `json:"function"` // Function to apply on the metric
|
Function string `json:"function"` // Function to apply on the metric
|
||||||
Condition string `json:"if"` // Condition for applying function
|
Condition string `json:"if"` // Condition for applying function
|
||||||
@ -27,7 +27,7 @@ type metricAggregatorIntervalConfig struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type metricAggregator struct {
|
type metricAggregator struct {
|
||||||
functions []*metricAggregatorIntervalConfig
|
functions []*MetricAggregatorIntervalConfig
|
||||||
constants map[string]interface{}
|
constants map[string]interface{}
|
||||||
language gval.Language
|
language gval.Language
|
||||||
output chan lp.CCMetric
|
output chan lp.CCMetric
|
||||||
@ -65,7 +65,7 @@ var metricCacheLanguage = gval.NewLanguage(
|
|||||||
|
|
||||||
func (c *metricAggregator) Init(output chan lp.CCMetric) error {
|
func (c *metricAggregator) Init(output chan lp.CCMetric) error {
|
||||||
c.output = output
|
c.output = output
|
||||||
c.functions = make([]*metricAggregatorIntervalConfig, 0)
|
c.functions = make([]*MetricAggregatorIntervalConfig, 0)
|
||||||
c.constants = make(map[string]interface{})
|
c.constants = make(map[string]interface{})
|
||||||
|
|
||||||
// add constants like hostname, numSockets, ... to constants list
|
// add constants like hostname, numSockets, ... to constants list
|
||||||
@ -246,7 +246,7 @@ func (c *metricAggregator) AddAggregation(name, function, condition string, tags
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var agg metricAggregatorIntervalConfig
|
var agg MetricAggregatorIntervalConfig
|
||||||
agg.Name = name
|
agg.Name = name
|
||||||
agg.Condition = newcond
|
agg.Condition = newcond
|
||||||
agg.gvalCond = gvalCond
|
agg.gvalCond = gvalCond
|
@ -1,4 +1,4 @@
|
|||||||
package metricRouter
|
package metricAggregator
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
@ -7,6 +7,7 @@ import (
|
|||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
|
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -28,7 +29,7 @@ type metricCache struct {
|
|||||||
tickchan chan time.Time
|
tickchan chan time.Time
|
||||||
done chan bool
|
done chan bool
|
||||||
output chan lp.CCMetric
|
output chan lp.CCMetric
|
||||||
aggEngine MetricAggregator
|
aggEngine agg.MetricAggregator
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricCache interface {
|
type MetricCache interface {
|
||||||
@ -59,7 +60,7 @@ func (c *metricCache) Init(output chan lp.CCMetric, ticker mct.MultiChanTicker,
|
|||||||
|
|
||||||
// Create a new aggregation engine. No separate goroutine at the moment
|
// Create a new aggregation engine. No separate goroutine at the moment
|
||||||
// The code is executed by the MetricCache goroutine
|
// The code is executed by the MetricCache goroutine
|
||||||
c.aggEngine, err = NewAggregator(c.output)
|
c.aggEngine, err = agg.NewAggregator(c.output)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError("MetricCache", "Cannot create aggregator")
|
cclog.ComponentError("MetricCache", "Cannot create aggregator")
|
||||||
return err
|
return err
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
|
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -22,15 +23,15 @@ type metricRouterTagConfig struct {
|
|||||||
|
|
||||||
// Metric router configuration
|
// Metric router configuration
|
||||||
type metricRouterConfig struct {
|
type metricRouterConfig struct {
|
||||||
AddTags []metricRouterTagConfig `json:"add_tags"` // List of tags that are added when the condition is met
|
AddTags []metricRouterTagConfig `json:"add_tags"` // List of tags that are added when the condition is met
|
||||||
DelTags []metricRouterTagConfig `json:"delete_tags"` // List of tags that are removed when the condition is met
|
DelTags []metricRouterTagConfig `json:"delete_tags"` // List of tags that are removed when the condition is met
|
||||||
IntervalAgg []metricAggregatorIntervalConfig `json:"interval_aggregates"` // List of aggregation function processed at the end of an interval
|
IntervalAgg []agg.MetricAggregatorIntervalConfig `json:"interval_aggregates"` // List of aggregation function processed at the end of an interval
|
||||||
DropMetrics []string `json:"drop_metrics"` // List of metric names to drop. For fine-grained dropping use drop_metrics_if
|
DropMetrics []string `json:"drop_metrics"` // List of metric names to drop. For fine-grained dropping use drop_metrics_if
|
||||||
DropMetricsIf []string `json:"drop_metrics_if"` // List of evaluatable terms to drop metrics
|
DropMetricsIf []string `json:"drop_metrics_if"` // List of evaluatable terms to drop metrics
|
||||||
RenameMetrics map[string]string `json:"rename_metrics"` // Map to rename metric name from key to value
|
RenameMetrics map[string]string `json:"rename_metrics"` // Map to rename metric name from key to value
|
||||||
IntervalStamp bool `json:"interval_timestamp"` // Update timestamp periodically by ticker each interval?
|
IntervalStamp bool `json:"interval_timestamp"` // Update timestamp periodically by ticker each interval?
|
||||||
NumCacheIntervals int `json:"num_cache_intervals"` // Number of intervals of cached metrics for evaluation
|
NumCacheIntervals int `json:"num_cache_intervals"` // Number of intervals of cached metrics for evaluation
|
||||||
dropMetrics map[string]bool // Internal map for O(1) lookup
|
dropMetrics map[string]bool // Internal map for O(1) lookup
|
||||||
}
|
}
|
||||||
|
|
||||||
// Metric router data structure
|
// Metric router data structure
|
||||||
@ -161,7 +162,7 @@ func (r *metricRouter) DoAddTags(point lp.CCMetric) {
|
|||||||
conditionMatches = true
|
conditionMatches = true
|
||||||
} else {
|
} else {
|
||||||
var err error
|
var err error
|
||||||
conditionMatches, err = EvalBoolCondition(m.Condition, getParamMap(point))
|
conditionMatches, err = agg.EvalBoolCondition(m.Condition, getParamMap(point))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError("MetricRouter", err.Error())
|
cclog.ComponentError("MetricRouter", err.Error())
|
||||||
conditionMatches = false
|
conditionMatches = false
|
||||||
@ -182,7 +183,7 @@ func (r *metricRouter) DoDelTags(point lp.CCMetric) {
|
|||||||
conditionMatches = true
|
conditionMatches = true
|
||||||
} else {
|
} else {
|
||||||
var err error
|
var err error
|
||||||
conditionMatches, err = EvalBoolCondition(m.Condition, getParamMap(point))
|
conditionMatches, err = agg.EvalBoolCondition(m.Condition, getParamMap(point))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError("MetricRouter", err.Error())
|
cclog.ComponentError("MetricRouter", err.Error())
|
||||||
conditionMatches = false
|
conditionMatches = false
|
||||||
@ -202,7 +203,7 @@ func (r *metricRouter) dropMetric(point lp.CCMetric) bool {
|
|||||||
}
|
}
|
||||||
// Checking the dropping conditions
|
// Checking the dropping conditions
|
||||||
for _, m := range r.config.DropMetricsIf {
|
for _, m := range r.config.DropMetricsIf {
|
||||||
conditionMatches, err := EvalBoolCondition(m, getParamMap(point))
|
conditionMatches, err := agg.EvalBoolCondition(m, getParamMap(point))
|
||||||
if conditionMatches || err != nil {
|
if conditionMatches || err != nil {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user