mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-07-20 11:51:40 +02:00
Modularize the whole thing (#16)
* Use channels, add a metric router, split up configuration and use extended version of Influx line protocol internally * Use central timer for collectors and router. Add expressions to router * Add expression to router config * Update entry points * Start with README * Update README for CCMetric * Formatting * Update README.md * Add README for MultiChanTicker * Add README for MultiChanTicker * Update README.md * Add README to metric router * Update main README * Remove SinkEntity type * Update README for sinks * Update go files * Update README for receivers * Update collectors README * Update collectors README * Use seperate page per collector * Fix for tempstat page * Add docs for customcmd collector * Add docs for ipmistat collector * Add docs for topprocs collector * Update customCmdMetric.md * Use seconds when calculating LIKWID metrics * Add IB metrics ib_recv_pkts and ib_xmit_pkts * Drop domain part of host name * Updated to latest stable version of likwid * Define source code dependencies in Makefile * Add GPFS / IBM Spectrum Scale collector * Add vet and staticcheck make targets * Add vet and staticcheck make targets * Avoid go vet warning: struct field tag `json:"..., omitempty"` not compatible with reflect.StructTag.Get: suspicious space in struct tag value struct field tag `json:"...", omitempty` not compatible with reflect.StructTag.Get: key:"value" pairs not separated by spaces * Add sample collector to README.md * Add CPU frequency collector * Avoid staticcheck warning: redundant return statement * Avoid staticcheck warning: unnecessary assignment to the blank identifier * Simplified code * Add CPUFreqCollectorCpuinfo a metric collector to measure the current frequency of the CPUs as obtained from /proc/cpuinfo Only measure on the first hyperthread * Add collector for NFS clients * Move publication of metrics into Flush() for NatsSink * Update GitHub actions * Refactoring * Avoid vet warning: Println arg list ends with redundant newline * Avoid vet warning struct field commands has json tag but is not exported * Avoid vet warning: return copies lock value. * Corrected typo * Refactoring * Add go sources in internal/... * Bad separator in Makefile * Fix Infiniband collector Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
This commit is contained in:
50
internal/metricRouter/README.md
Normal file
50
internal/metricRouter/README.md
Normal file
@@ -0,0 +1,50 @@
|
||||
# CC Metric Router
|
||||
|
||||
The CCMetric router sits in between the collectors and the sinks and can be used to add and remove tags to/from traversing [CCMetrics](../ccMetric/README.md).
|
||||
|
||||
# Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"add_tags" : [
|
||||
{
|
||||
"key" : "cluster",
|
||||
"value" : "testcluster",
|
||||
"if" : "*"
|
||||
},
|
||||
{
|
||||
"key" : "test",
|
||||
"value" : "testing",
|
||||
"if" : "name == 'temp_package_id_0'"
|
||||
}
|
||||
],
|
||||
"delete_tags" : [
|
||||
{
|
||||
"key" : "unit",
|
||||
"value" : "*",
|
||||
"if" : "*"
|
||||
}
|
||||
],
|
||||
"interval_timestamp" : true
|
||||
}
|
||||
```
|
||||
|
||||
There are three main options `add_tags`, `delete_tags` and `interval_timestamp`. `add_tags` and `delete_tags` are lists consisting of dicts with `key`, `value` and `if`. The `value` can be omitted in the `delete_tags` part as it only uses the `key` for removal. The `interval_timestamp` setting means that a unique timestamp is applied to all metrics traversing the router during an interval.
|
||||
|
||||
# Conditional manipulation of tags
|
||||
|
||||
The `if` setting allows conditional testing of a single metric like in the example:
|
||||
|
||||
```json
|
||||
{
|
||||
"key" : "test",
|
||||
"value" : "testing",
|
||||
"if" : "name == 'temp_package_id_0'"
|
||||
}
|
||||
```
|
||||
|
||||
If the CCMetric name is equal to 'temp_package_id_0', it adds an additional tag `test=testing` to the metric.
|
||||
|
||||
In order to match all metrics, you can use `*`, so in order to add a flag per default, like the `cluster=testcluster` tag in the example.
|
||||
|
||||
|
208
internal/metricRouter/metricRouter.go
Normal file
208
internal/metricRouter/metricRouter.go
Normal file
@@ -0,0 +1,208 @@
|
||||
package metricRouter
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
||||
"gopkg.in/Knetic/govaluate.v2"
|
||||
)
|
||||
|
||||
type metricRouterTagConfig struct {
|
||||
Key string `json:"key"`
|
||||
Value string `json:"value"`
|
||||
Condition string `json:"if"`
|
||||
}
|
||||
|
||||
type metricRouterConfig struct {
|
||||
AddTags []metricRouterTagConfig `json:"add_tags"`
|
||||
DelTags []metricRouterTagConfig `json:"delete_tags"`
|
||||
IntervalStamp bool `json:"interval_timestamp"`
|
||||
}
|
||||
|
||||
type metricRouter struct {
|
||||
inputs []chan lp.CCMetric
|
||||
outputs []chan lp.CCMetric
|
||||
done chan bool
|
||||
wg *sync.WaitGroup
|
||||
timestamp time.Time
|
||||
ticker mct.MultiChanTicker
|
||||
config metricRouterConfig
|
||||
}
|
||||
|
||||
type MetricRouter interface {
|
||||
Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) error
|
||||
AddInput(input chan lp.CCMetric)
|
||||
AddOutput(output chan lp.CCMetric)
|
||||
Start()
|
||||
Close()
|
||||
}
|
||||
|
||||
func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) error {
|
||||
r.inputs = make([]chan lp.CCMetric, 0)
|
||||
r.outputs = make([]chan lp.CCMetric, 0)
|
||||
r.done = make(chan bool)
|
||||
r.wg = wg
|
||||
r.ticker = ticker
|
||||
configFile, err := os.Open(routerConfigFile)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
defer configFile.Close()
|
||||
jsonParser := json.NewDecoder(configFile)
|
||||
err = jsonParser.Decode(&r.config)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *metricRouter) StartTimer() {
|
||||
m := make(chan time.Time)
|
||||
r.ticker.AddChannel(m)
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case t := <-m:
|
||||
r.timestamp = t
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (r *metricRouter) EvalCondition(Cond string, point lp.CCMetric) (bool, error) {
|
||||
expression, err := govaluate.NewEvaluableExpression(Cond)
|
||||
if err != nil {
|
||||
log.Print(Cond, " = ", err.Error())
|
||||
return false, err
|
||||
}
|
||||
params := make(map[string]interface{})
|
||||
params["name"] = point.Name()
|
||||
for _, t := range point.TagList() {
|
||||
params[t.Key] = t.Value
|
||||
}
|
||||
for _, m := range point.MetaList() {
|
||||
params[m.Key] = m.Value
|
||||
}
|
||||
for _, f := range point.FieldList() {
|
||||
params[f.Key] = f.Value
|
||||
}
|
||||
params["timestamp"] = point.Time()
|
||||
|
||||
result, err := expression.Evaluate(params)
|
||||
if err != nil {
|
||||
log.Print(Cond, " = ", err.Error())
|
||||
return false, err
|
||||
}
|
||||
return bool(result.(bool)), err
|
||||
}
|
||||
|
||||
func (r *metricRouter) DoAddTags(point lp.CCMetric) {
|
||||
for _, m := range r.config.AddTags {
|
||||
var conditionMatches bool
|
||||
|
||||
if m.Condition == "*" {
|
||||
conditionMatches = true
|
||||
} else {
|
||||
var err error
|
||||
conditionMatches, err = r.EvalCondition(m.Condition, point)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
conditionMatches = false
|
||||
}
|
||||
}
|
||||
if conditionMatches {
|
||||
point.AddTag(m.Key, m.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *metricRouter) DoDelTags(point lp.CCMetric) {
|
||||
for _, m := range r.config.DelTags {
|
||||
var conditionMatches bool
|
||||
|
||||
if m.Condition == "*" {
|
||||
conditionMatches = true
|
||||
} else {
|
||||
var err error
|
||||
conditionMatches, err = r.EvalCondition(m.Condition, point)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
conditionMatches = false
|
||||
}
|
||||
}
|
||||
if conditionMatches {
|
||||
point.RemoveTag(m.Key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *metricRouter) Start() {
|
||||
r.wg.Add(1)
|
||||
r.timestamp = time.Now()
|
||||
if r.config.IntervalStamp {
|
||||
r.StartTimer()
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
RouterLoop:
|
||||
select {
|
||||
case <-r.done:
|
||||
log.Print("[MetricRouter] DONE\n")
|
||||
r.wg.Done()
|
||||
break RouterLoop
|
||||
default:
|
||||
for _, c := range r.inputs {
|
||||
RouterInputLoop:
|
||||
select {
|
||||
case <-r.done:
|
||||
log.Print("[MetricRouter] DONE\n")
|
||||
r.wg.Done()
|
||||
break RouterInputLoop
|
||||
case p := <-c:
|
||||
log.Print("[MetricRouter] FORWARD ", p)
|
||||
r.DoAddTags(p)
|
||||
r.DoDelTags(p)
|
||||
if r.config.IntervalStamp {
|
||||
p.SetTime(r.timestamp)
|
||||
}
|
||||
for _, o := range r.outputs {
|
||||
o <- p
|
||||
}
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Print("[MetricRouter] EXIT\n")
|
||||
}()
|
||||
log.Print("[MetricRouter] STARTED\n")
|
||||
}
|
||||
|
||||
func (r *metricRouter) AddInput(input chan lp.CCMetric) {
|
||||
r.inputs = append(r.inputs, input)
|
||||
}
|
||||
|
||||
func (r *metricRouter) AddOutput(output chan lp.CCMetric) {
|
||||
r.outputs = append(r.outputs, output)
|
||||
}
|
||||
|
||||
func (r *metricRouter) Close() {
|
||||
r.done <- true
|
||||
log.Print("[MetricRouter] CLOSE\n")
|
||||
}
|
||||
|
||||
func New(ticker mct.MultiChanTicker, wg *sync.WaitGroup, routerConfigFile string) (MetricRouter, error) {
|
||||
r := new(metricRouter)
|
||||
err := r.Init(ticker, wg, routerConfigFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r, err
|
||||
}
|
Reference in New Issue
Block a user