mirror of
				https://github.com/ClusterCockpit/cc-metric-collector.git
				synced 2025-11-04 10:45:06 +01:00 
			
		
		
		
	Split MetricRouter and MetricAggregator (#24)
* Split MetricRouter and MetricAggregator * Missing change in MetricCache * Add README for MetricAggregator
This commit is contained in:
		@@ -24,7 +24,7 @@ import (
 | 
				
			|||||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
 | 
						cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
 | 
				
			||||||
	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | 
						lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | 
				
			||||||
	topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
 | 
						topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
 | 
				
			||||||
	mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
 | 
						agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type MetricScope string
 | 
					type MetricScope string
 | 
				
			||||||
@@ -70,10 +70,10 @@ func GetAllMetricScopes() []MetricScope {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type LikwidCollectorMetricConfig struct {
 | 
					type LikwidCollectorMetricConfig struct {
 | 
				
			||||||
	Name        string      `json:"name"`        // Name of the metric
 | 
						Name string `json:"name"` // Name of the metric
 | 
				
			||||||
	Calc        string      `json:"calc"`        // Calculation for the metric using
 | 
						Calc string `json:"calc"` // Calculation for the metric using
 | 
				
			||||||
	Aggr        string      `json:"aggregation"` // if scope unequal to LIKWID metric scope, the values are combined (sum, min, max, mean or avg, median)
 | 
						//Aggr        string      `json:"aggregation"` // if scope unequal to LIKWID metric scope, the values are combined (sum, min, max, mean or avg, median)
 | 
				
			||||||
	Scope       MetricScope `json:"scope"`       // scope for calculation. subscopes are aggregated using the 'aggregation' function
 | 
						Scope       MetricScope `json:"scope"` // scope for calculation. subscopes are aggregated using the 'aggregation' function
 | 
				
			||||||
	Publish     bool        `json:"publish"`
 | 
						Publish     bool        `json:"publish"`
 | 
				
			||||||
	granulatity MetricScope
 | 
						granulatity MetricScope
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -314,7 +314,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
		for _, metric := range evset.Metrics {
 | 
							for _, metric := range evset.Metrics {
 | 
				
			||||||
			// Try to evaluate the metric
 | 
								// Try to evaluate the metric
 | 
				
			||||||
			_, err := mr.EvalFloat64Condition(metric.Calc, params)
 | 
								_, err := agg.EvalFloat64Condition(metric.Calc, params)
 | 
				
			||||||
			if err != nil {
 | 
								if err != nil {
 | 
				
			||||||
				cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
 | 
									cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
 | 
				
			||||||
				continue
 | 
									continue
 | 
				
			||||||
@@ -343,7 +343,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
	for _, metric := range m.config.Metrics {
 | 
						for _, metric := range m.config.Metrics {
 | 
				
			||||||
		// Try to evaluate the global metric
 | 
							// Try to evaluate the global metric
 | 
				
			||||||
		_, err := mr.EvalFloat64Condition(metric.Calc, globalParams)
 | 
							_, err := agg.EvalFloat64Condition(metric.Calc, globalParams)
 | 
				
			||||||
		if err != nil {
 | 
							if err != nil {
 | 
				
			||||||
			cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
 | 
								cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
 | 
				
			||||||
			continue
 | 
								continue
 | 
				
			||||||
@@ -428,7 +428,7 @@ func (m *LikwidCollector) calcEventsetMetrics(group int, interval time.Duration,
 | 
				
			|||||||
		scopemap := m.scopeRespTids[metric.Scope]
 | 
							scopemap := m.scopeRespTids[metric.Scope]
 | 
				
			||||||
		for domain, tid := range scopemap {
 | 
							for domain, tid := range scopemap {
 | 
				
			||||||
			if tid >= 0 {
 | 
								if tid >= 0 {
 | 
				
			||||||
				value, err := mr.EvalFloat64Condition(metric.Calc, m.results[group][tid])
 | 
									value, err := agg.EvalFloat64Condition(metric.Calc, m.results[group][tid])
 | 
				
			||||||
				if err != nil {
 | 
									if err != nil {
 | 
				
			||||||
					cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
 | 
										cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
 | 
				
			||||||
					continue
 | 
										continue
 | 
				
			||||||
@@ -465,7 +465,7 @@ func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration, output chan
 | 
				
			|||||||
					}
 | 
										}
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				// Evaluate the metric
 | 
									// Evaluate the metric
 | 
				
			||||||
				value, err := mr.EvalFloat64Condition(metric.Calc, params)
 | 
									value, err := agg.EvalFloat64Condition(metric.Calc, params)
 | 
				
			||||||
				if err != nil {
 | 
									if err != nil {
 | 
				
			||||||
					cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
 | 
										cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
 | 
				
			||||||
					continue
 | 
										continue
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										38
									
								
								internal/metricAggregator/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								internal/metricAggregator/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
				
			|||||||
 | 
					# The MetricAggregator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					In some cases, further combination of metrics or raw values is required. For that strings like `foo + 1` with runtime dependent `foo` need to be evaluated. The MetricAggregator relies on the [`gval`](https://github.com/PaesslerAG/gval) Golang package to perform all expression evaluation. The `gval` package provides the basic arithmetic operations but the MetricAggregator defines additional ones.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Note**: To get an impression which expressions can be handled by `gval`, see its [README](https://github.com/PaesslerAG/gval/blob/master/README.md)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Simple expression evaluation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					For simple expression evaluation, the MetricAggregator provides two function for different use-cases:
 | 
				
			||||||
 | 
					- `EvalBoolCondition(expression string, params map[string]interface{}`: Used by the MetricRouter to match metrics like `metric.Name() == 'mymetric'`
 | 
				
			||||||
 | 
					- `EvalFloat64Condition(expression string, params map[string]interface{})`: Used by the MetricRouter and LikwidCollector to derive new values like `(PMC0+PMC1)/PMC3`
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## MetricAggregator extensions for `gval`
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The MetricAggregator provides these functions additional to the `Full` language in `gval`:
 | 
				
			||||||
 | 
					- `sum(array)`: Sum up values in an array like `sum(values)`
 | 
				
			||||||
 | 
					- `min(array)`: Get the minimum value in an array like `min(values)`
 | 
				
			||||||
 | 
					- `avg(array)`: Get the mean value in an array like `avg(values)`
 | 
				
			||||||
 | 
					- `mean(array)`: Get the mean value in an array like `mean(values)`
 | 
				
			||||||
 | 
					- `max(array)`: Get the maximum value in an array like `max(values)`
 | 
				
			||||||
 | 
					- `len(array)`: Get the length of an array like `len(values)`
 | 
				
			||||||
 | 
					- `median(array)`: Get the median value in an array like `mean(values)`
 | 
				
			||||||
 | 
					- `in`: Check existence in an array like `0 in getCpuList()` to check whether there is an entry `0`. Also substring matching works like `temp in metric.Name()`
 | 
				
			||||||
 | 
					- `match`: Regular-expression matching like `match('temp_cores_%d+', metric.Name())`. **Note** all `\` in an regex has to be replaced with `%`
 | 
				
			||||||
 | 
					- `getCpuCore(cpuid)`: For a CPU id, the the corresponding CPU core id like `getCpuCore(0)`
 | 
				
			||||||
 | 
					- `getCpuSocket(cpuid)`: For a CPU id, the the corresponding CPU socket id
 | 
				
			||||||
 | 
					- `getCpuNuma(cpuid)`: For a CPU id, the the corresponding NUMA domain id
 | 
				
			||||||
 | 
					- `getCpuDie(cpuid)`: For a CPU id, the the corresponding CPU die id
 | 
				
			||||||
 | 
					- `getSockCpuList(sockid)`: For a given CPU socket id, the list of CPU ids is returned like the CPUs on socket 1 `getSockCpuList(1)`
 | 
				
			||||||
 | 
					- `getNumaCpuList(numaid)`: For a given NUMA node id, the list of CPU ids is returned
 | 
				
			||||||
 | 
					- `getDieCpuList(dieid)`: For a given CPU die id, the list of CPU ids is returned
 | 
				
			||||||
 | 
					- `getCoreCpuList(coreid)`: For a given CPU core id, the list of CPU ids is returned
 | 
				
			||||||
 | 
					- `getCpuList`: Get the list of all CPUs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Limitations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Since the metrics are written in JSON files which do not allow `""` without proper escaping inside of JSON strings, you have to use `''` for strings.
 | 
				
			||||||
 | 
					- Since `\` is interpreted by JSON as escape character, it cannot be used in metrics. But it is required to write regular expressions. So instead of `/`, use `%` and the MetricAggregator replaces them after reading the JSON file.
 | 
				
			||||||
@@ -1,4 +1,4 @@
 | 
				
			|||||||
package metricRouter
 | 
					package metricAggregator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"context"
 | 
						"context"
 | 
				
			||||||
@@ -16,7 +16,7 @@ import (
 | 
				
			|||||||
	"github.com/PaesslerAG/gval"
 | 
						"github.com/PaesslerAG/gval"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type metricAggregatorIntervalConfig struct {
 | 
					type MetricAggregatorIntervalConfig struct {
 | 
				
			||||||
	Name      string            `json:"name"`     // Metric name for the new metric
 | 
						Name      string            `json:"name"`     // Metric name for the new metric
 | 
				
			||||||
	Function  string            `json:"function"` // Function to apply on the metric
 | 
						Function  string            `json:"function"` // Function to apply on the metric
 | 
				
			||||||
	Condition string            `json:"if"`       // Condition for applying function
 | 
						Condition string            `json:"if"`       // Condition for applying function
 | 
				
			||||||
@@ -27,7 +27,7 @@ type metricAggregatorIntervalConfig struct {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type metricAggregator struct {
 | 
					type metricAggregator struct {
 | 
				
			||||||
	functions []*metricAggregatorIntervalConfig
 | 
						functions []*MetricAggregatorIntervalConfig
 | 
				
			||||||
	constants map[string]interface{}
 | 
						constants map[string]interface{}
 | 
				
			||||||
	language  gval.Language
 | 
						language  gval.Language
 | 
				
			||||||
	output    chan lp.CCMetric
 | 
						output    chan lp.CCMetric
 | 
				
			||||||
@@ -65,7 +65,7 @@ var metricCacheLanguage = gval.NewLanguage(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
func (c *metricAggregator) Init(output chan lp.CCMetric) error {
 | 
					func (c *metricAggregator) Init(output chan lp.CCMetric) error {
 | 
				
			||||||
	c.output = output
 | 
						c.output = output
 | 
				
			||||||
	c.functions = make([]*metricAggregatorIntervalConfig, 0)
 | 
						c.functions = make([]*MetricAggregatorIntervalConfig, 0)
 | 
				
			||||||
	c.constants = make(map[string]interface{})
 | 
						c.constants = make(map[string]interface{})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// add constants like hostname, numSockets, ... to constants list
 | 
						// add constants like hostname, numSockets, ... to constants list
 | 
				
			||||||
@@ -246,7 +246,7 @@ func (c *metricAggregator) AddAggregation(name, function, condition string, tags
 | 
				
			|||||||
			return nil
 | 
								return nil
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	var agg metricAggregatorIntervalConfig
 | 
						var agg MetricAggregatorIntervalConfig
 | 
				
			||||||
	agg.Name = name
 | 
						agg.Name = name
 | 
				
			||||||
	agg.Condition = newcond
 | 
						agg.Condition = newcond
 | 
				
			||||||
	agg.gvalCond = gvalCond
 | 
						agg.gvalCond = gvalCond
 | 
				
			||||||
@@ -1,4 +1,4 @@
 | 
				
			|||||||
package metricRouter
 | 
					package metricAggregator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"errors"
 | 
						"errors"
 | 
				
			||||||
@@ -7,6 +7,7 @@ import (
 | 
				
			|||||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
 | 
						cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | 
						lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | 
				
			||||||
 | 
						agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
 | 
				
			||||||
	mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
 | 
						mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -28,7 +29,7 @@ type metricCache struct {
 | 
				
			|||||||
	tickchan   chan time.Time
 | 
						tickchan   chan time.Time
 | 
				
			||||||
	done       chan bool
 | 
						done       chan bool
 | 
				
			||||||
	output     chan lp.CCMetric
 | 
						output     chan lp.CCMetric
 | 
				
			||||||
	aggEngine  MetricAggregator
 | 
						aggEngine  agg.MetricAggregator
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type MetricCache interface {
 | 
					type MetricCache interface {
 | 
				
			||||||
@@ -59,7 +60,7 @@ func (c *metricCache) Init(output chan lp.CCMetric, ticker mct.MultiChanTicker,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	// Create a new aggregation engine. No separate goroutine at the moment
 | 
						// Create a new aggregation engine. No separate goroutine at the moment
 | 
				
			||||||
	// The code is executed by the MetricCache goroutine
 | 
						// The code is executed by the MetricCache goroutine
 | 
				
			||||||
	c.aggEngine, err = NewAggregator(c.output)
 | 
						c.aggEngine, err = agg.NewAggregator(c.output)
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		cclog.ComponentError("MetricCache", "Cannot create aggregator")
 | 
							cclog.ComponentError("MetricCache", "Cannot create aggregator")
 | 
				
			||||||
		return err
 | 
							return err
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -10,6 +10,7 @@ import (
 | 
				
			|||||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
 | 
						cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | 
						lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 | 
				
			||||||
 | 
						agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
 | 
				
			||||||
	mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
 | 
						mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -22,15 +23,15 @@ type metricRouterTagConfig struct {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
// Metric router configuration
 | 
					// Metric router configuration
 | 
				
			||||||
type metricRouterConfig struct {
 | 
					type metricRouterConfig struct {
 | 
				
			||||||
	AddTags           []metricRouterTagConfig          `json:"add_tags"`            // List of tags that are added when the condition is met
 | 
						AddTags           []metricRouterTagConfig              `json:"add_tags"`            // List of tags that are added when the condition is met
 | 
				
			||||||
	DelTags           []metricRouterTagConfig          `json:"delete_tags"`         // List of tags that are removed when the condition is met
 | 
						DelTags           []metricRouterTagConfig              `json:"delete_tags"`         // List of tags that are removed when the condition is met
 | 
				
			||||||
	IntervalAgg       []metricAggregatorIntervalConfig `json:"interval_aggregates"` // List of aggregation function processed at the end of an interval
 | 
						IntervalAgg       []agg.MetricAggregatorIntervalConfig `json:"interval_aggregates"` // List of aggregation function processed at the end of an interval
 | 
				
			||||||
	DropMetrics       []string                         `json:"drop_metrics"`        // List of metric names to drop. For fine-grained dropping use drop_metrics_if
 | 
						DropMetrics       []string                             `json:"drop_metrics"`        // List of metric names to drop. For fine-grained dropping use drop_metrics_if
 | 
				
			||||||
	DropMetricsIf     []string                         `json:"drop_metrics_if"`     // List of evaluatable terms to drop metrics
 | 
						DropMetricsIf     []string                             `json:"drop_metrics_if"`     // List of evaluatable terms to drop metrics
 | 
				
			||||||
	RenameMetrics     map[string]string                `json:"rename_metrics"`      // Map to rename metric name from key to value
 | 
						RenameMetrics     map[string]string                    `json:"rename_metrics"`      // Map to rename metric name from key to value
 | 
				
			||||||
	IntervalStamp     bool                             `json:"interval_timestamp"`  // Update timestamp periodically by ticker each interval?
 | 
						IntervalStamp     bool                                 `json:"interval_timestamp"`  // Update timestamp periodically by ticker each interval?
 | 
				
			||||||
	NumCacheIntervals int                              `json:"num_cache_intervals"` // Number of intervals of cached metrics for evaluation
 | 
						NumCacheIntervals int                                  `json:"num_cache_intervals"` // Number of intervals of cached metrics for evaluation
 | 
				
			||||||
	dropMetrics       map[string]bool                  // Internal map for O(1) lookup
 | 
						dropMetrics       map[string]bool                      // Internal map for O(1) lookup
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Metric router data structure
 | 
					// Metric router data structure
 | 
				
			||||||
@@ -161,7 +162,7 @@ func (r *metricRouter) DoAddTags(point lp.CCMetric) {
 | 
				
			|||||||
			conditionMatches = true
 | 
								conditionMatches = true
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			var err error
 | 
								var err error
 | 
				
			||||||
			conditionMatches, err = EvalBoolCondition(m.Condition, getParamMap(point))
 | 
								conditionMatches, err = agg.EvalBoolCondition(m.Condition, getParamMap(point))
 | 
				
			||||||
			if err != nil {
 | 
								if err != nil {
 | 
				
			||||||
				cclog.ComponentError("MetricRouter", err.Error())
 | 
									cclog.ComponentError("MetricRouter", err.Error())
 | 
				
			||||||
				conditionMatches = false
 | 
									conditionMatches = false
 | 
				
			||||||
@@ -182,7 +183,7 @@ func (r *metricRouter) DoDelTags(point lp.CCMetric) {
 | 
				
			|||||||
			conditionMatches = true
 | 
								conditionMatches = true
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			var err error
 | 
								var err error
 | 
				
			||||||
			conditionMatches, err = EvalBoolCondition(m.Condition, getParamMap(point))
 | 
								conditionMatches, err = agg.EvalBoolCondition(m.Condition, getParamMap(point))
 | 
				
			||||||
			if err != nil {
 | 
								if err != nil {
 | 
				
			||||||
				cclog.ComponentError("MetricRouter", err.Error())
 | 
									cclog.ComponentError("MetricRouter", err.Error())
 | 
				
			||||||
				conditionMatches = false
 | 
									conditionMatches = false
 | 
				
			||||||
@@ -202,7 +203,7 @@ func (r *metricRouter) dropMetric(point lp.CCMetric) bool {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
	// Checking the dropping conditions
 | 
						// Checking the dropping conditions
 | 
				
			||||||
	for _, m := range r.config.DropMetricsIf {
 | 
						for _, m := range r.config.DropMetricsIf {
 | 
				
			||||||
		conditionMatches, err := EvalBoolCondition(m, getParamMap(point))
 | 
							conditionMatches, err := agg.EvalBoolCondition(m, getParamMap(point))
 | 
				
			||||||
		if conditionMatches || err != nil {
 | 
							if conditionMatches || err != nil {
 | 
				
			||||||
			return true
 | 
								return true
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user