Merge develop branch into main (#123)

* Add cpu_used (all-cpu_idle) to CpustatCollector

* Update cc-metric-collector.init

* Allow selection of timestamp precision in HttpSink

* Add comment about precision requirement for cc-metric-store

* Fix for API changes in gofish@v0.15.0

* Update requirements to latest version

* Read sensors through redfish

* Update golang toolchain to 1.21

* Remove stray error check

* Update main config in configuration.md

* Update Release action to use golang 1.22 stable release, no golang RPMs anymore

* Update runonce action to use golang 1.22 stable release, no golang RPMs anymore

* Update README.md

Use right JSON type in configuration

* Update sink's README

* Test whether ipmitool or ipmi-sensors can be executed without errors

* Little fixes to the prometheus sink (#115)

* Add uint64 to float64 cast option

* Add prometheus sink to the list of available sinks

* Add aggregated counters by gpu for nvlink errors

---------

Co-authored-by: Michael Schwarz <schwarz@uni-paderborn.de>

* Ccmessage migration (#119)

* Add cpu_used (all-cpu_idle) to CpustatCollector

* Update cc-metric-collector.init

* Allow selection of timestamp precision in HttpSink

* Add comment about precision requirement for cc-metric-store

* Fix for API changes in gofish@v0.15.0

* Update requirements to latest version

* Read sensors through redfish

* Update golang toolchain to 1.21

* Remove stray error check

* Update main config in configuration.md

* Update Release action to use golang 1.22 stable release, no golang RPMs anymore

* Update runonce action to use golang 1.22 stable release, no golang RPMs anymore

* Switch to CCMessage for all files.

---------

Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu>
Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>

* Switch to ccmessage also for latest additions in nvidiaMetric

* New Message processor (#118)

* Add cpu_used (all-cpu_idle) to CpustatCollector

* Update cc-metric-collector.init

* Allow selection of timestamp precision in HttpSink

* Add comment about precision requirement for cc-metric-store

* Fix for API changes in gofish@v0.15.0

* Update requirements to latest version

* Read sensors through redfish

* Update golang toolchain to 1.21

* Remove stray error check

* Update main config in configuration.md

* Update Release action to use golang 1.22 stable release, no golang RPMs anymore

* Update runonce action to use golang 1.22 stable release, no golang RPMs anymore

* New message processor to check whether a message should be dropped or manipulate it in flight

* Create a copy of message before manipulation

---------

Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu>
Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>

* Update collector's Makefile and go.mod/sum files

* Use message processor in router, all sinks and all receivers

* Add support for credential file (NKEY) to NATS sink and receiver

* Fix JSON keys in message processor configuration

* Update docs for message processor, router and the default router config file

* Add link to expr syntax and fix regex matching docs

* Update sample collectors

* Minor style change in collector manager

* Some helpers for ccTopology

* LIKWID collector: write log owner change only once

* Fix for metrics without units and reduce debugging messages for messageProcessor

* Use shorted hostname for hostname added by router

* Define default port for NATS

* CPUstat collector: only add unit for applicable metrics

* Add precision option to all sinks using Influx's encoder

* Add message processor to all sink documentation

* Add units to documentation of cpustat collector

---------

Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu>
Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
Co-authored-by: oscarminus <me@oscarminus.de>
Co-authored-by: Michael Schwarz <schwarz@uni-paderborn.de>
This commit is contained in:
Thomas Gruber
2024-12-19 23:00:14 +01:00
committed by GitHub
parent 21646e1edf
commit 7840de7b82
74 changed files with 1902 additions and 1017 deletions

View File

@@ -6,18 +6,19 @@ import (
"strings"
"sync"
lp2 "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
lplegacy "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
"github.com/expr-lang/expr"
"github.com/expr-lang/expr/vm"
)
// Message processor add/delete tag/meta configuration
type messageProcessorTagConfig struct {
Key string `json:"key"` // Tag name
Value string `json:"value"` // Tag value
Condition string `json:"if"` // Condition for adding or removing corresponding tag
Key string `json:"key"` // Tag name
Value string `json:"value,omitempty"` // Tag value
Condition string `json:"if"` // Condition for adding or removing corresponding tag
}
type messageProcessorConfig struct {
@@ -32,8 +33,8 @@ type messageProcessorConfig struct {
DelTagsIf []messageProcessorTagConfig `json:"delete_tags_if"` // List of tags that are removed when the condition is met
AddMetaIf []messageProcessorTagConfig `json:"add_meta_if"` // List of meta infos that are added when the condition is met
DelMetaIf []messageProcessorTagConfig `json:"delete_meta_if"` // List of meta infos that are removed when the condition is met
AddFieldIf []messageProcessorTagConfig `json:"add_fields_if"` // List of fields that are added when the condition is met
DelFieldIf []messageProcessorTagConfig `json:"delete_fields_if"` // List of fields that are removed when the condition is met
AddFieldIf []messageProcessorTagConfig `json:"add_field_if"` // List of fields that are added when the condition is met
DelFieldIf []messageProcessorTagConfig `json:"delete_field_if"` // List of fields that are removed when the condition is met
DropByType []string `json:"drop_by_message_type"` // List of message types that should be dropped
MoveTagToMeta []messageProcessorTagConfig `json:"move_tag_to_meta_if"`
MoveTagToField []messageProcessorTagConfig `json:"move_tag_to_field_if"`
@@ -117,8 +118,8 @@ type MessageProcessor interface {
// Read in a JSON configuration
FromConfigJSON(config json.RawMessage) error
// Processing functions for legacy CCMetric and current CCMessage
ProcessMetric(m lp.CCMetric) (lp2.CCMessage, error)
ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, error)
ProcessMetric(m lplegacy.CCMetric) (lp.CCMessage, error)
ProcessMessage(m lp.CCMessage) (lp.CCMessage, error)
//EvalToBool(condition string, parameters map[string]interface{}) (bool, error)
//EvalToFloat64(condition string, parameters map[string]interface{}) (float64, error)
//EvalToString(condition string, parameters map[string]interface{}) (string, error)
@@ -261,8 +262,8 @@ var baseenv = map[string]interface{}{
"log": "",
},
"timestamp": 1234567890,
"msg": lp2.EmptyMessage(),
"message": lp2.EmptyMessage(),
"msg": lp.EmptyMessage(),
"message": lp.EmptyMessage(),
}
func addBaseEnvWalker(values map[string]interface{}) map[string]interface{} {
@@ -759,8 +760,8 @@ func (mp *messageProcessor) FromConfigJSON(config json.RawMessage) error {
return nil
}
func (mp *messageProcessor) ProcessMetric(metric lp.CCMetric) (lp2.CCMessage, error) {
m, err := lp2.NewMessage(
func (mp *messageProcessor) ProcessMetric(metric lplegacy.CCMetric) (lp.CCMessage, error) {
m, err := lp.NewMessage(
metric.Name(),
metric.Tags(),
metric.Meta(),
@@ -774,9 +775,9 @@ func (mp *messageProcessor) ProcessMetric(metric lp.CCMetric) (lp2.CCMessage, er
}
func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, error) {
func (mp *messageProcessor) ProcessMessage(m lp.CCMessage) (lp.CCMessage, error) {
var err error = nil
var out lp2.CCMessage = lp2.FromMessage(m)
var out lp.CCMessage = lp.FromMessage(m)
name := out.Name()
@@ -802,45 +803,45 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
switch s {
case STAGENAME_DROP_BY_NAME:
if len(mp.dropMessages) > 0 {
cclog.ComponentDebug("MessageProcessor", "Dropping by message name ", name)
//cclog.ComponentDebug("MessageProcessor", "Dropping by message name ", name)
if _, ok := mp.dropMessages[name]; ok {
cclog.ComponentDebug("MessageProcessor", "Drop")
//cclog.ComponentDebug("MessageProcessor", "Drop")
return nil, nil
}
}
case STAGENAME_DROP_BY_TYPE:
if len(mp.dropTypes) > 0 {
cclog.ComponentDebug("MessageProcessor", "Dropping by message type")
//cclog.ComponentDebug("MessageProcessor", "Dropping by message type")
if _, ok := mp.dropTypes[params["messagetype"].(string)]; ok {
cclog.ComponentDebug("MessageProcessor", "Drop")
//cclog.ComponentDebug("MessageProcessor", "Drop")
return nil, nil
}
}
case STAGENAME_DROP_IF:
if len(mp.dropMessagesIf) > 0 {
cclog.ComponentDebug("MessageProcessor", "Dropping by condition")
//cclog.ComponentDebug("MessageProcessor", "Dropping by condition")
drop, err := dropMessagesIf(&params, &mp.dropMessagesIf)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
}
if drop {
cclog.ComponentDebug("MessageProcessor", "Drop")
//cclog.ComponentDebug("MessageProcessor", "Drop")
return nil, nil
}
}
case STAGENAME_RENAME_BY_NAME:
if len(mp.renameMessages) > 0 {
cclog.ComponentDebug("MessageProcessor", "Renaming by name match")
//cclog.ComponentDebug("MessageProcessor", "Renaming by name match")
if newname, ok := mp.renameMessages[name]; ok {
cclog.ComponentDebug("MessageProcessor", "Rename to", newname)
//cclog.ComponentDebug("MessageProcessor", "Rename to", newname)
out.SetName(newname)
cclog.ComponentDebug("MessageProcessor", "Add old name as 'oldname' to meta", name)
//cclog.ComponentDebug("MessageProcessor", "Add old name as 'oldname' to meta", name)
out.AddMeta("oldname", name)
}
}
case STAGENAME_RENAME_IF:
if len(mp.renameMessagesIf) > 0 {
cclog.ComponentDebug("MessageProcessor", "Renaming by condition")
//cclog.ComponentDebug("MessageProcessor", "Renaming by condition")
_, err := renameMessagesIf(out, &params, &mp.renameMessagesIf)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -848,7 +849,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_ADD_TAG:
if len(mp.addTagsIf) > 0 {
cclog.ComponentDebug("MessageProcessor", "Adding tags")
//cclog.ComponentDebug("MessageProcessor", "Adding tags")
_, err = addTagIf(out, &params, &mp.addTagsIf)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -856,7 +857,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_DELETE_TAG:
if len(mp.deleteTagsIf) > 0 {
cclog.ComponentDebug("MessageProcessor", "Delete tags")
//cclog.ComponentDebug("MessageProcessor", "Delete tags")
_, err = deleteTagIf(out, &params, &mp.deleteTagsIf)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -864,7 +865,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_ADD_META:
if len(mp.addMetaIf) > 0 {
cclog.ComponentDebug("MessageProcessor", "Adding meta information")
//cclog.ComponentDebug("MessageProcessor", "Adding meta information")
_, err = addMetaIf(out, &params, &mp.addMetaIf)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -872,7 +873,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_DELETE_META:
if len(mp.deleteMetaIf) > 0 {
cclog.ComponentDebug("MessageProcessor", "Delete meta information")
//cclog.ComponentDebug("MessageProcessor", "Delete meta information")
_, err = deleteMetaIf(out, &params, &mp.deleteMetaIf)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -880,7 +881,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_ADD_FIELD:
if len(mp.addFieldIf) > 0 {
cclog.ComponentDebug("MessageProcessor", "Adding fields")
//cclog.ComponentDebug("MessageProcessor", "Adding fields")
_, err = addFieldIf(out, &params, &mp.addFieldIf)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -888,7 +889,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_DELETE_FIELD:
if len(mp.deleteFieldIf) > 0 {
cclog.ComponentDebug("MessageProcessor", "Delete fields")
//cclog.ComponentDebug("MessageProcessor", "Delete fields")
_, err = deleteFieldIf(out, &params, &mp.deleteFieldIf)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -896,7 +897,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_MOVE_TAG_META:
if len(mp.moveTagToMeta) > 0 {
cclog.ComponentDebug("MessageProcessor", "Move tag to meta")
//cclog.ComponentDebug("MessageProcessor", "Move tag to meta")
_, err := moveTagToMeta(out, &params, &mp.moveTagToMeta)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -904,7 +905,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_MOVE_TAG_FIELD:
if len(mp.moveTagToField) > 0 {
cclog.ComponentDebug("MessageProcessor", "Move tag to fields")
//cclog.ComponentDebug("MessageProcessor", "Move tag to fields")
_, err := moveTagToField(out, &params, &mp.moveTagToField)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -912,7 +913,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_MOVE_META_TAG:
if len(mp.moveMetaToTag) > 0 {
cclog.ComponentDebug("MessageProcessor", "Move meta to tags")
//cclog.ComponentDebug("MessageProcessor", "Move meta to tags")
_, err := moveMetaToTag(out, &params, &mp.moveMetaToTag)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -920,7 +921,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_MOVE_META_FIELD:
if len(mp.moveMetaToField) > 0 {
cclog.ComponentDebug("MessageProcessor", "Move meta to fields")
//cclog.ComponentDebug("MessageProcessor", "Move meta to fields")
_, err := moveMetaToField(out, &params, &mp.moveMetaToField)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -928,7 +929,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_MOVE_FIELD_META:
if len(mp.moveFieldToMeta) > 0 {
cclog.ComponentDebug("MessageProcessor", "Move field to meta")
//cclog.ComponentDebug("MessageProcessor", "Move field to meta")
_, err := moveFieldToMeta(out, &params, &mp.moveFieldToMeta)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -936,7 +937,7 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_MOVE_FIELD_TAG:
if len(mp.moveFieldToTag) > 0 {
cclog.ComponentDebug("MessageProcessor", "Move field to tags")
//cclog.ComponentDebug("MessageProcessor", "Move field to tags")
_, err := moveFieldToTag(out, &params, &mp.moveFieldToTag)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -944,8 +945,8 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
}
case STAGENAME_NORMALIZE_UNIT:
if mp.normalizeUnits {
cclog.ComponentDebug("MessageProcessor", "Normalize units")
if lp2.IsMetric(out) {
//cclog.ComponentDebug("MessageProcessor", "Normalize units")
if lp.IsMetric(out) {
_, err := normalizeUnits(out)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())
@@ -957,8 +958,8 @@ func (mp *messageProcessor) ProcessMessage(m lp2.CCMessage) (lp2.CCMessage, erro
case STAGENAME_CHANGE_UNIT_PREFIX:
if len(mp.changeUnitPrefix) > 0 {
cclog.ComponentDebug("MessageProcessor", "Change unit prefix")
if lp2.IsMetric(out) {
//cclog.ComponentDebug("MessageProcessor", "Change unit prefix")
if lp.IsMetric(out) {
_, err := changeUnitPrefix(out, &params, &mp.changeUnitPrefix)
if err != nil {
return out, fmt.Errorf("failed to evaluate: %v", err.Error())