Merge develop branch into main (#123)

* Add cpu_used (all-cpu_idle) to CpustatCollector

* Update cc-metric-collector.init

* Allow selection of timestamp precision in HttpSink

* Add comment about precision requirement for cc-metric-store

* Fix for API changes in gofish@v0.15.0

* Update requirements to latest version

* Read sensors through redfish

* Update golang toolchain to 1.21

* Remove stray error check

* Update main config in configuration.md

* Update Release action to use golang 1.22 stable release, no golang RPMs anymore

* Update runonce action to use golang 1.22 stable release, no golang RPMs anymore

* Update README.md

Use right JSON type in configuration

* Update sink's README

* Test whether ipmitool or ipmi-sensors can be executed without errors

* Little fixes to the prometheus sink (#115)

* Add uint64 to float64 cast option

* Add prometheus sink to the list of available sinks

* Add aggregated counters by gpu for nvlink errors

---------

Co-authored-by: Michael Schwarz <schwarz@uni-paderborn.de>

* Ccmessage migration (#119)

* Add cpu_used (all-cpu_idle) to CpustatCollector

* Update cc-metric-collector.init

* Allow selection of timestamp precision in HttpSink

* Add comment about precision requirement for cc-metric-store

* Fix for API changes in gofish@v0.15.0

* Update requirements to latest version

* Read sensors through redfish

* Update golang toolchain to 1.21

* Remove stray error check

* Update main config in configuration.md

* Update Release action to use golang 1.22 stable release, no golang RPMs anymore

* Update runonce action to use golang 1.22 stable release, no golang RPMs anymore

* Switch to CCMessage for all files.

---------

Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu>
Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>

* Switch to ccmessage also for latest additions in nvidiaMetric

* New Message processor (#118)

* Add cpu_used (all-cpu_idle) to CpustatCollector

* Update cc-metric-collector.init

* Allow selection of timestamp precision in HttpSink

* Add comment about precision requirement for cc-metric-store

* Fix for API changes in gofish@v0.15.0

* Update requirements to latest version

* Read sensors through redfish

* Update golang toolchain to 1.21

* Remove stray error check

* Update main config in configuration.md

* Update Release action to use golang 1.22 stable release, no golang RPMs anymore

* Update runonce action to use golang 1.22 stable release, no golang RPMs anymore

* New message processor to check whether a message should be dropped or manipulate it in flight

* Create a copy of message before manipulation

---------

Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu>
Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>

* Update collector's Makefile and go.mod/sum files

* Use message processor in router, all sinks and all receivers

* Add support for credential file (NKEY) to NATS sink and receiver

* Fix JSON keys in message processor configuration

* Update docs for message processor, router and the default router config file

* Add link to expr syntax and fix regex matching docs

* Update sample collectors

* Minor style change in collector manager

* Some helpers for ccTopology

* LIKWID collector: write log owner change only once

* Fix for metrics without units and reduce debugging messages for messageProcessor

* Use shorted hostname for hostname added by router

* Define default port for NATS

* CPUstat collector: only add unit for applicable metrics

* Add precision option to all sinks using Influx's encoder

* Add message processor to all sink documentation

* Add units to documentation of cpustat collector

---------

Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu>
Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
Co-authored-by: oscarminus <me@oscarminus.de>
Co-authored-by: Michael Schwarz <schwarz@uni-paderborn.de>
This commit is contained in:
Thomas Gruber
2024-12-19 23:00:14 +01:00
committed by GitHub
parent 21646e1edf
commit 7840de7b82
74 changed files with 1902 additions and 1017 deletions

View File

@@ -4,10 +4,10 @@ import (
"fmt"
"strings"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
)
func GangliaMetricName(point lp.CCMetric) string {
func GangliaMetricName(point lp.CCMessage) string {
name := point.Name()
metricType, typeOK := point.GetTag("type")
metricTid, tidOk := point.GetTag("type-id")
@@ -39,7 +39,7 @@ func GangliaMetricRename(name string) string {
return name
}
func GangliaSlopeType(point lp.CCMetric) uint {
func GangliaSlopeType(point lp.CCMessage) uint {
name := point.Name()
if name == "mem_total" || name == "swap_total" {
return 0
@@ -151,7 +151,7 @@ type GangliaMetricConfig struct {
Name string
}
func GetCommonGangliaConfig(point lp.CCMetric) GangliaMetricConfig {
func GetCommonGangliaConfig(point lp.CCMessage) GangliaMetricConfig {
mname := GangliaMetricRename(point.Name())
if oldname, ok := point.GetMeta("oldname"); ok {
mname = GangliaMetricRename(oldname)
@@ -207,7 +207,7 @@ func GetCommonGangliaConfig(point lp.CCMetric) GangliaMetricConfig {
}
}
func GetGangliaConfig(point lp.CCMetric) GangliaMetricConfig {
func GetGangliaConfig(point lp.CCMessage) GangliaMetricConfig {
mname := GangliaMetricRename(point.Name())
if oldname, ok := point.GetMeta("oldname"); ok {
mname = GangliaMetricRename(oldname)

View File

@@ -10,8 +10,9 @@ import (
// "time"
"os/exec"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
mp "github.com/ClusterCockpit/cc-metric-collector/pkg/messageProcessor"
)
const GMETRIC_EXEC = `gmetric`
@@ -35,50 +36,53 @@ type GangliaSink struct {
config GangliaSinkConfig
}
func (s *GangliaSink) Write(point lp.CCMetric) error {
func (s *GangliaSink) Write(msg lp.CCMessage) error {
var err error = nil
//var tagsstr []string
var argstr []string
// Get metric config (type, value, ... in suitable format)
conf := GetCommonGangliaConfig(point)
if len(conf.Type) == 0 {
conf = GetGangliaConfig(point)
}
if len(conf.Type) == 0 {
return fmt.Errorf("metric %q (Ganglia name %q) has no 'value' field", point.Name(), conf.Name)
}
point, err := s.mp.ProcessMessage(msg)
if err == nil && point != nil {
// Get metric config (type, value, ... in suitable format)
conf := GetCommonGangliaConfig(point)
if len(conf.Type) == 0 {
conf = GetGangliaConfig(point)
}
if len(conf.Type) == 0 {
return fmt.Errorf("metric %q (Ganglia name %q) has no 'value' field", point.Name(), conf.Name)
}
if s.config.AddGangliaGroup {
argstr = append(argstr, fmt.Sprintf("--group=%s", conf.Group))
}
if s.config.AddUnits && len(conf.Unit) > 0 {
argstr = append(argstr, fmt.Sprintf("--units=%s", conf.Unit))
}
if s.config.AddGangliaGroup {
argstr = append(argstr, fmt.Sprintf("--group=%s", conf.Group))
}
if s.config.AddUnits && len(conf.Unit) > 0 {
argstr = append(argstr, fmt.Sprintf("--units=%s", conf.Unit))
}
if len(s.config.ClusterName) > 0 {
argstr = append(argstr, fmt.Sprintf("--cluster=%s", s.config.ClusterName))
}
// if s.config.AddTagsAsDesc && len(tagsstr) > 0 {
// argstr = append(argstr, fmt.Sprintf("--desc=%q", strings.Join(tagsstr, ",")))
// }
if len(s.gmetric_config) > 0 {
argstr = append(argstr, fmt.Sprintf("--conf=%s", s.gmetric_config))
}
if s.config.AddTypeToName {
argstr = append(argstr, fmt.Sprintf("--name=%s", GangliaMetricName(point)))
} else {
argstr = append(argstr, fmt.Sprintf("--name=%s", conf.Name))
}
argstr = append(argstr, fmt.Sprintf("--slope=%s", conf.Slope))
argstr = append(argstr, fmt.Sprintf("--value=%s", conf.Value))
argstr = append(argstr, fmt.Sprintf("--type=%s", conf.Type))
argstr = append(argstr, fmt.Sprintf("--tmax=%d", conf.Tmax))
if len(s.config.ClusterName) > 0 {
argstr = append(argstr, fmt.Sprintf("--cluster=%s", s.config.ClusterName))
}
// if s.config.AddTagsAsDesc && len(tagsstr) > 0 {
// argstr = append(argstr, fmt.Sprintf("--desc=%q", strings.Join(tagsstr, ",")))
// }
if len(s.gmetric_config) > 0 {
argstr = append(argstr, fmt.Sprintf("--conf=%s", s.gmetric_config))
}
if s.config.AddTypeToName {
argstr = append(argstr, fmt.Sprintf("--name=%s", GangliaMetricName(point)))
} else {
argstr = append(argstr, fmt.Sprintf("--name=%s", conf.Name))
}
argstr = append(argstr, fmt.Sprintf("--slope=%s", conf.Slope))
argstr = append(argstr, fmt.Sprintf("--value=%s", conf.Value))
argstr = append(argstr, fmt.Sprintf("--type=%s", conf.Type))
argstr = append(argstr, fmt.Sprintf("--tmax=%d", conf.Tmax))
cclog.ComponentDebug(s.name, s.gmetric_path, strings.Join(argstr, " "))
command := exec.Command(s.gmetric_path, argstr...)
command.Wait()
_, err = command.Output()
cclog.ComponentDebug(s.name, s.gmetric_path, strings.Join(argstr, " "))
command := exec.Command(s.gmetric_path, argstr...)
command.Wait()
_, err = command.Output()
}
return err
}
@@ -104,6 +108,13 @@ func NewGangliaSink(name string, config json.RawMessage) (Sink, error) {
}
s.gmetric_path = ""
s.gmetric_config = ""
p, err := mp.NewMessageProcessor()
if err != nil {
return nil, fmt.Errorf("initialization of message processor failed: %v", err.Error())
}
s.mp = p
if len(s.config.GmetricPath) > 0 {
p, err := exec.LookPath(s.config.GmetricPath)
if err == nil {
@@ -122,5 +133,15 @@ func NewGangliaSink(name string, config json.RawMessage) (Sink, error) {
if len(s.config.GmetricConfig) > 0 {
s.gmetric_config = s.config.GmetricConfig
}
if len(s.config.MessageProcessor) > 0 {
err = s.mp.FromConfigJSON(s.config.MessageProcessor)
if err != nil {
return nil, fmt.Errorf("failed parsing JSON for message processor: %v", err.Error())
}
}
for _, k := range s.config.MetaAsTags {
s.mp.AddMoveMetaToTags("true", k, k)
}
return s, nil
}

View File

@@ -8,14 +8,18 @@ The `ganglia` sink uses the `gmetric` tool of the [Ganglia Monitoring System](ht
{
"<name>": {
"type": "ganglia",
"meta_as_tags" : true,
"gmetric_path" : "/path/to/gmetric",
"add_ganglia_group" : true
"add_ganglia_group" : true,
"process_messages" : {
"see" : "docs of message processor for valid fields"
},
"meta_as_tags" : []
}
}
```
- `type`: makes the sink an `ganglia` sink
- `meta_as_tags`: print all meta information as tags in the output (optional)
- `gmetric_path`: Path to `gmetric` executable (optional). If not given, the sink searches in `$PATH` for `gmetric`.
- `add_ganglia_group`: Add `--group=X` based on meta information to the `gmetric` call. Some old versions of `gmetric` do not support the `--group` option.
- `add_ganglia_group`: Add `--group=X` based on meta information to the `gmetric` call. Some old versions of `gmetric` do not support the `--group` option.
- `process_messages`: Process messages with given rules before progressing or dropping, see [here](../pkg/messageProcessor/README.md) (optional)
- `meta_as_tags`: print all meta information as tags in the output (deprecated, optional)

View File

@@ -9,8 +9,9 @@ import (
"sync"
"time"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
mp "github.com/ClusterCockpit/cc-metric-collector/pkg/messageProcessor"
influx "github.com/influxdata/line-protocol/v2/lineprotocol"
"golang.org/x/exp/slices"
)
@@ -75,28 +76,20 @@ type HttpSink struct {
}
// Write sends metric m as http message
func (s *HttpSink) Write(m lp.CCMetric) error {
func (s *HttpSink) Write(msg lp.CCMessage) error {
// Lock for encoder usage
s.encoderLock.Lock()
// submit m only after applying processing/dropping rules
m, err := s.mp.ProcessMessage(msg)
if err == nil && m != nil {
// Lock for encoder usage
s.encoderLock.Lock()
// Encode measurement name
s.encoder.StartLine(m.Name())
// Encode measurement name
s.encoder.StartLine(m.Name())
// copy tags and meta data which should be used as tags
s.extended_tag_list = s.extended_tag_list[:0]
for key, value := range m.Tags() {
s.extended_tag_list =
append(
s.extended_tag_list,
key_value_pair{
key: key,
value: value,
},
)
}
for _, key := range s.config.MetaAsTags {
if value, ok := m.GetMeta(key); ok {
// copy tags and meta data which should be used as tags
s.extended_tag_list = s.extended_tag_list[:0]
for key, value := range m.Tags() {
s.extended_tag_list =
append(
s.extended_tag_list,
@@ -106,45 +99,57 @@ func (s *HttpSink) Write(m lp.CCMetric) error {
},
)
}
}
// for _, key := range s.config.MetaAsTags {
// if value, ok := m.GetMeta(key); ok {
// s.extended_tag_list =
// append(
// s.extended_tag_list,
// key_value_pair{
// key: key,
// value: value,
// },
// )
// }
// }
// Encode tags (they musts be in lexical order)
slices.SortFunc(
s.extended_tag_list,
func(a key_value_pair, b key_value_pair) int {
if a.key < b.key {
return -1
}
if a.key > b.key {
return +1
}
return 0
},
)
for i := range s.extended_tag_list {
s.encoder.AddTag(
s.extended_tag_list[i].key,
s.extended_tag_list[i].value,
// Encode tags (they musts be in lexical order)
slices.SortFunc(
s.extended_tag_list,
func(a key_value_pair, b key_value_pair) int {
if a.key < b.key {
return -1
}
if a.key > b.key {
return +1
}
return 0
},
)
}
for i := range s.extended_tag_list {
s.encoder.AddTag(
s.extended_tag_list[i].key,
s.extended_tag_list[i].value,
)
}
// Encode fields
for key, value := range m.Fields() {
s.encoder.AddField(key, influx.MustNewValue(value))
}
// Encode fields
for key, value := range m.Fields() {
s.encoder.AddField(key, influx.MustNewValue(value))
}
// Encode time stamp
s.encoder.EndLine(m.Time())
// Encode time stamp
s.encoder.EndLine(m.Time())
// Check for encoder errors
err := s.encoder.Err()
// Check for encoder errors
err := s.encoder.Err()
// Unlock encoder usage
s.encoderLock.Unlock()
// Unlock encoder usage
s.encoderLock.Unlock()
// Check that encoding worked
if err != nil {
return fmt.Errorf("encoding failed: %v", err)
// Check that encoding worked
if err != nil {
return fmt.Errorf("encoding failed: %v", err)
}
}
if s.config.flushDelay == 0 {
@@ -271,7 +276,7 @@ func NewHttpSink(name string, config json.RawMessage) (Sink, error) {
s.config.Timeout = "5s"
s.config.FlushDelay = "5s"
s.config.MaxRetries = 3
s.config.Precision = "ns"
s.config.Precision = "s"
cclog.ComponentDebug(s.name, "Init()")
// Read config
@@ -297,6 +302,11 @@ func NewHttpSink(name string, config json.RawMessage) (Sink, error) {
if s.config.useBasicAuth && len(s.config.Password) == 0 {
return nil, errors.New("basic authentication requires password")
}
p, err := mp.NewMessageProcessor()
if err != nil {
return nil, fmt.Errorf("initialization of message processor failed: %v", err.Error())
}
s.mp = p
if len(s.config.IdleConnTimeout) > 0 {
t, err := time.ParseDuration(s.config.IdleConnTimeout)
@@ -319,7 +329,17 @@ func NewHttpSink(name string, config json.RawMessage) (Sink, error) {
cclog.ComponentDebug(s.name, "Init(): flushDelay", t)
}
}
precision := influx.Nanosecond
if len(s.config.MessageProcessor) > 0 {
err = p.FromConfigJSON(s.config.MessageProcessor)
if err != nil {
return nil, fmt.Errorf("failed parsing JSON for message processor: %v", err.Error())
}
}
for _, k := range s.config.MetaAsTags {
s.mp.AddMoveMetaToTags("true", k, k)
}
precision := influx.Second
if len(s.config.Precision) > 0 {
switch s.config.Precision {
case "s":

View File

@@ -8,9 +8,6 @@ The `http` sink uses POST requests to a HTTP server to submit the metrics in the
{
"<name>": {
"type": "http",
"meta_as_tags" : [
"meta-key"
],
"url" : "https://my-monitoring.example.com:1234/api/write",
"jwt" : "blabla.blabla.blabla",
"username": "myUser",
@@ -19,13 +16,16 @@ The `http` sink uses POST requests to a HTTP server to submit the metrics in the
"idle_connection_timeout" : "5s",
"flush_delay": "2s",
"batch_size": 1000,
"precision": "s"
"precision": "s",
"process_messages" : {
"see" : "docs of message processor for valid fields"
},
"meta_as_tags" : []
}
}
```
- `type`: makes the sink an `http` sink
- `meta_as_tags`: Move specific meta information to the tags in the output (optional)
- `url`: The full URL of the endpoint
- `jwt`: JSON web tokens for authentication (Using the *Bearer* scheme)
- `username`: username for basic authentication
@@ -35,8 +35,10 @@ The `http` sink uses POST requests to a HTTP server to submit the metrics in the
- `idle_connection_timeout`: Timeout for idle connections (default '120s'). Should be larger than the measurement interval to keep the connection open
- `flush_delay`: Batch all writes arriving in during this duration (default '1s', batching can be disabled by setting it to 0)
- `batch_size`: Maximal batch size. If `batch_size` is reached before the end of `flush_delay`, the metrics are sent without further delay
- `precision`: Precision of the timestamp. Valid values are 's', 'ms', 'us' and 'ns'. (default is 'ns')
- `precision`: Precision of the timestamp. Valid values are 's', 'ms', 'us' and 'ns'. (default is 's')
- `process_messages`: Process messages with given rules before progressing or dropping, see [here](../pkg/messageProcessor/README.md) (optional)
- `meta_as_tags`: print all meta information as tags in the output (deprecated, optional)
### Using HttpSink for communication with cc-metric-store
### Using `http` sink for communication with cc-metric-store
The cc-metric-store only accepts metrics with a timestamp precision in seconds, so it is required to set `"precision": "s"`.
The cc-metric-store only accepts metrics with a timestamp precision in seconds, so it is required to use `"precision": "s"`.

View File

@@ -10,8 +10,9 @@ import (
"strings"
"time"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
mp "github.com/ClusterCockpit/cc-metric-collector/pkg/messageProcessor"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
influxdb2ApiHttp "github.com/influxdata/influxdb-client-go/v2/api/http"
@@ -36,6 +37,8 @@ type InfluxAsyncSinkConfig struct {
InfluxMaxRetryTime string `json:"max_retry_time,omitempty"`
CustomFlushInterval string `json:"custom_flush_interval,omitempty"`
MaxRetryAttempts uint `json:"max_retry_attempts,omitempty"`
// Timestamp precision
Precision string `json:"precision,omitempty"`
}
type InfluxAsyncSink struct {
@@ -93,7 +96,22 @@ func (s *InfluxAsyncSink) connect() error {
&tls.Config{
InsecureSkipVerify: true,
},
).SetPrecision(time.Second)
)
precision := time.Second
if len(s.config.Precision) > 0 {
switch s.config.Precision {
case "s":
precision = time.Second
case "ms":
precision = time.Millisecond
case "us":
precision = time.Microsecond
case "ns":
precision = time.Nanosecond
}
}
clientOptions.SetPrecision(precision)
s.client = influxdb2.NewClientWithOptions(uri, auth, clientOptions)
s.writeApi = s.client.WriteAPI(s.config.Organization, s.config.Database)
@@ -112,7 +130,7 @@ func (s *InfluxAsyncSink) connect() error {
return nil
}
func (s *InfluxAsyncSink) Write(m lp.CCMetric) error {
func (s *InfluxAsyncSink) Write(m lp.CCMessage) error {
if s.customFlushInterval != 0 && s.flushTimer == nil {
// Run a batched flush for all lines that have arrived in the defined interval
s.flushTimer = time.AfterFunc(s.customFlushInterval, func() {
@@ -121,9 +139,10 @@ func (s *InfluxAsyncSink) Write(m lp.CCMetric) error {
}
})
}
s.writeApi.WritePoint(
m.ToPoint(s.meta_as_tags),
)
msg, err := s.mp.ProcessMessage(m)
if err == nil && msg != nil {
s.writeApi.WritePoint(msg.ToPoint(nil))
}
return nil
}
@@ -158,6 +177,7 @@ func NewInfluxAsyncSink(name string, config json.RawMessage) (Sink, error) {
s.config.CustomFlushInterval = ""
s.customFlushInterval = time.Duration(0)
s.config.MaxRetryAttempts = 1
s.config.Precision = "s"
// Default retry intervals (in seconds)
// 1 2
@@ -200,10 +220,24 @@ func NewInfluxAsyncSink(name string, config json.RawMessage) (Sink, error) {
if len(s.config.Password) == 0 {
return nil, errors.New("missing password configuration required by InfluxSink")
}
p, err := mp.NewMessageProcessor()
if err != nil {
return nil, fmt.Errorf("initialization of message processor failed: %v", err.Error())
}
s.mp = p
if len(s.config.MessageProcessor) > 0 {
err = s.mp.FromConfigJSON(s.config.MessageProcessor)
if err != nil {
return nil, fmt.Errorf("failed parsing JSON for message processor: %v", err.Error())
}
}
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
// s.meta_as_tags = make(map[string]bool)
// for _, k := range s.config.MetaAsTags {
// s.meta_as_tags[k] = true
// }
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
s.mp.AddMoveMetaToTags("true", k, k)
}
toUint := func(duration string, def uint) uint {

View File

@@ -19,9 +19,13 @@ The `influxasync` sink uses the official [InfluxDB golang client](https://pkg.go
"batch_size": 200,
"retry_interval" : "1s",
"retry_exponential_base" : 2,
"precision": "s",
"max_retries": 20,
"max_retry_time" : "168h",
"meta_as_tags" : [],
"process_messages" : {
"see" : "docs of message processor for valid fields"
},
"meta_as_tags" : []
}
}
```
@@ -39,6 +43,12 @@ The `influxasync` sink uses the official [InfluxDB golang client](https://pkg.go
- `retry_exponential_base`: The retry interval is exponentially increased with this base, default 2
- `max_retries`: Maximal number of retry attempts
- `max_retry_time`: Maximal time to retry failed writes, default 168h (one week)
- `meta_as_tags`: move meta information keys to tags (optional)
- `precision`: Precision of the timestamp. Valid values are 's', 'ms', 'us' and 'ns'. (default is 's')
- `process_messages`: Process messages with given rules before progressing or dropping, see [here](../pkg/messageProcessor/README.md) (optional)
- `meta_as_tags`: print all meta information as tags in the output (deprecated, optional)
For information about the calculation of the retry interval settings, see [offical influxdb-client-go documentation](https://github.com/influxdata/influxdb-client-go#handling-of-failed-async-writes)
### Using `influxasync` sink for communication with cc-metric-store
The cc-metric-store only accepts metrics with a timestamp precision in seconds, so it is required to use `"precision": "s"`.

View File

@@ -10,8 +10,9 @@ import (
"sync"
"time"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
mp "github.com/ClusterCockpit/cc-metric-collector/pkg/messageProcessor"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
influx "github.com/influxdata/line-protocol/v2/lineprotocol"
@@ -58,6 +59,8 @@ type InfluxSink struct {
InfluxMaxRetryTime string `json:"max_retry_time,omitempty"`
// Specify whether to use GZip compression in write requests
InfluxUseGzip bool `json:"use_gzip"`
// Timestamp precision
Precision string `json:"precision,omitempty"`
}
// influx line protocol encoder
@@ -206,7 +209,20 @@ func (s *InfluxSink) connect() error {
)
// Set time precision
clientOptions.SetPrecision(time.Nanosecond)
precision := time.Second
if len(s.config.Precision) > 0 {
switch s.config.Precision {
case "s":
precision = time.Second
case "ms":
precision = time.Millisecond
case "us":
precision = time.Microsecond
case "ns":
precision = time.Nanosecond
}
}
clientOptions.SetPrecision(precision)
// Create new writeAPI
s.client = influxdb2.NewClientWithOptions(uri, auth, clientOptions)
@@ -224,28 +240,19 @@ func (s *InfluxSink) connect() error {
}
// Write sends metric m in influxDB line protocol
func (s *InfluxSink) Write(m lp.CCMetric) error {
func (s *InfluxSink) Write(msg lp.CCMessage) error {
// Lock for encoder usage
s.encoderLock.Lock()
m, err := s.mp.ProcessMessage(msg)
if err == nil && m != nil {
// Lock for encoder usage
s.encoderLock.Lock()
// Encode measurement name
s.encoder.StartLine(m.Name())
// Encode measurement name
s.encoder.StartLine(m.Name())
// copy tags and meta data which should be used as tags
s.extended_tag_list = s.extended_tag_list[:0]
for key, value := range m.Tags() {
s.extended_tag_list =
append(
s.extended_tag_list,
key_value_pair{
key: key,
value: value,
},
)
}
for _, key := range s.config.MetaAsTags {
if value, ok := m.GetMeta(key); ok {
// copy tags and meta data which should be used as tags
s.extended_tag_list = s.extended_tag_list[:0]
for key, value := range m.Tags() {
s.extended_tag_list =
append(
s.extended_tag_list,
@@ -255,45 +262,57 @@ func (s *InfluxSink) Write(m lp.CCMetric) error {
},
)
}
}
// for _, key := range s.config.MetaAsTags {
// if value, ok := m.GetMeta(key); ok {
// s.extended_tag_list =
// append(
// s.extended_tag_list,
// key_value_pair{
// key: key,
// value: value,
// },
// )
// }
// }
// Encode tags (they musts be in lexical order)
slices.SortFunc(
s.extended_tag_list,
func(a key_value_pair, b key_value_pair) int {
if a.key < b.key {
return -1
}
if a.key > b.key {
return +1
}
return 0
},
)
for i := range s.extended_tag_list {
s.encoder.AddTag(
s.extended_tag_list[i].key,
s.extended_tag_list[i].value,
// Encode tags (they musts be in lexical order)
slices.SortFunc(
s.extended_tag_list,
func(a key_value_pair, b key_value_pair) int {
if a.key < b.key {
return -1
}
if a.key > b.key {
return +1
}
return 0
},
)
for i := range s.extended_tag_list {
s.encoder.AddTag(
s.extended_tag_list[i].key,
s.extended_tag_list[i].value,
)
}
// Encode fields
for key, value := range m.Fields() {
s.encoder.AddField(key, influx.MustNewValue(value))
}
// Encode time stamp
s.encoder.EndLine(m.Time())
// Check for encoder errors
if err := s.encoder.Err(); err != nil {
// Unlock encoder usage
s.encoderLock.Unlock()
return fmt.Errorf("encoding failed: %v", err)
}
s.numRecordsInEncoder++
}
// Encode fields
for key, value := range m.Fields() {
s.encoder.AddField(key, influx.MustNewValue(value))
}
// Encode time stamp
s.encoder.EndLine(m.Time())
// Check for encoder errors
if err := s.encoder.Err(); err != nil {
// Unlock encoder usage
s.encoderLock.Unlock()
return fmt.Errorf("Encoding failed: %v", err)
}
s.numRecordsInEncoder++
if s.config.flushDelay == 0 {
// Unlock encoder usage
s.encoderLock.Unlock()
@@ -417,6 +436,7 @@ func NewInfluxSink(name string, config json.RawMessage) (Sink, error) {
// Set config default values
s.config.BatchSize = 1000
s.config.FlushInterval = "1s"
s.config.Precision = "s"
// Read config
if len(config) > 0 {
@@ -443,11 +463,20 @@ func NewInfluxSink(name string, config json.RawMessage) (Sink, error) {
if len(s.config.Password) == 0 {
return s, errors.New("missing password configuration required by InfluxSink")
}
p, err := mp.NewMessageProcessor()
if err != nil {
return nil, fmt.Errorf("initialization of message processor failed: %v", err.Error())
}
s.mp = p
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
if len(s.config.MessageProcessor) > 0 {
err = p.FromConfigJSON(s.config.MessageProcessor)
if err != nil {
return nil, fmt.Errorf("failed parsing JSON for message processor: %v", err.Error())
}
}
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
s.mp.AddMoveMetaToTags("true", k, k)
}
// Configure flush delay duration

View File

@@ -17,14 +17,17 @@ The `influxdb` sink uses the official [InfluxDB golang client](https://pkg.go.de
"ssl": true,
"flush_delay" : "1s",
"batch_size" : 1000,
"use_gzip": true
"meta_as_tags" : [],
"use_gzip": true,
"precision": "s",
"process_messages" : {
"see" : "docs of message processor for valid fields"
},
"meta_as_tags" : []
}
}
```
- `type`: makes the sink an `influxdb` sink
- `meta_as_tags`: print all meta information as tags in the output (optional)
- `database`: All metrics are written to this bucket
- `host`: Hostname of the InfluxDB database server
- `port`: Port number (as string) of the InfluxDB database server
@@ -34,6 +37,9 @@ The `influxdb` sink uses the official [InfluxDB golang client](https://pkg.go.de
- `ssl`: Use SSL connection
- `flush_delay`: Group metrics coming in to a single batch
- `batch_size`: Maximal batch size. If `batch_size` is reached before the end of `flush_delay`, the metrics are sent without further delay
- `precision`: Precision of the timestamp. Valid values are 's', 'ms', 'us' and 'ns'. (default is 's')
- `process_messages`: Process messages with given rules before progressing or dropping, see [here](../pkg/messageProcessor/README.md) (optional)
- `meta_as_tags`: print all meta information as tags in the output (deprecated, optional)
Influx client options:
=======
@@ -46,3 +52,7 @@ Influx client options:
- `max_retries`: maximum count of retry attempts of failed writes
- `max_retry_time`: maximum total retry timeout
- `use_gzip`: Specify whether to use GZip compression in write requests
### Using `influxdb` sink for communication with cc-metric-store
The cc-metric-store only accepts metrics with a timestamp precision in seconds, so it is required to use `"precision": "s"`.

View File

@@ -72,8 +72,9 @@ import (
"fmt"
"unsafe"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
mp "github.com/ClusterCockpit/cc-metric-collector/pkg/messageProcessor"
"github.com/NVIDIA/go-nvml/pkg/dl"
)
@@ -110,99 +111,102 @@ type LibgangliaSink struct {
cstrCache map[string]*C.char
}
func (s *LibgangliaSink) Write(point lp.CCMetric) error {
func (s *LibgangliaSink) Write(msg lp.CCMessage) error {
var err error = nil
var c_name *C.char
var c_value *C.char
var c_type *C.char
var c_unit *C.char
// helper function for looking up C strings in the cache
lookup := func(key string) *C.char {
if _, exist := s.cstrCache[key]; !exist {
s.cstrCache[key] = C.CString(key)
point, err := s.mp.ProcessMessage(msg)
if err == nil && point != nil {
// helper function for looking up C strings in the cache
lookup := func(key string) *C.char {
if _, exist := s.cstrCache[key]; !exist {
s.cstrCache[key] = C.CString(key)
}
return s.cstrCache[key]
}
return s.cstrCache[key]
}
conf := GetCommonGangliaConfig(point)
if len(conf.Type) == 0 {
conf = GetGangliaConfig(point)
}
if len(conf.Type) == 0 {
return fmt.Errorf("metric %q (Ganglia name %q) has no 'value' field", point.Name(), conf.Name)
}
conf := GetCommonGangliaConfig(point)
if len(conf.Type) == 0 {
conf = GetGangliaConfig(point)
}
if len(conf.Type) == 0 {
return fmt.Errorf("metric %q (Ganglia name %q) has no 'value' field", point.Name(), conf.Name)
}
if s.config.AddTypeToName {
conf.Name = GangliaMetricName(point)
}
if s.config.AddTypeToName {
conf.Name = GangliaMetricName(point)
}
c_value = C.CString(conf.Value)
c_type = lookup(conf.Type)
c_name = lookup(conf.Name)
c_value = C.CString(conf.Value)
c_type = lookup(conf.Type)
c_name = lookup(conf.Name)
// Add unit
unit := ""
if s.config.AddUnits {
unit = conf.Unit
}
c_unit = lookup(unit)
// Add unit
unit := ""
if s.config.AddUnits {
unit = conf.Unit
}
c_unit = lookup(unit)
// Determine the slope of the metric. Ganglia's own collector mostly use
// 'both' but the mem and swap total uses 'zero'.
slope_type := C.GANGLIA_SLOPE_BOTH
switch conf.Slope {
case "zero":
slope_type = C.GANGLIA_SLOPE_ZERO
case "both":
slope_type = C.GANGLIA_SLOPE_BOTH
}
// Determine the slope of the metric. Ganglia's own collector mostly use
// 'both' but the mem and swap total uses 'zero'.
slope_type := C.GANGLIA_SLOPE_BOTH
switch conf.Slope {
case "zero":
slope_type = C.GANGLIA_SLOPE_ZERO
case "both":
slope_type = C.GANGLIA_SLOPE_BOTH
}
// Create a new Ganglia metric
gmetric := C.Ganglia_metric_create(s.global_context)
// Set name, value, type and unit in the Ganglia metric
// The default slope_type is both directions, so up and down. Some metrics want 'zero' slope, probably constant.
// The 'tmax' value is by default 300.
rval := C.int(0)
rval = C.Ganglia_metric_set(gmetric, c_name, c_value, c_type, c_unit, C.uint(slope_type), C.uint(conf.Tmax), 0)
switch rval {
case 1:
// Create a new Ganglia metric
gmetric := C.Ganglia_metric_create(s.global_context)
// Set name, value, type and unit in the Ganglia metric
// The default slope_type is both directions, so up and down. Some metrics want 'zero' slope, probably constant.
// The 'tmax' value is by default 300.
rval := C.int(0)
rval = C.Ganglia_metric_set(gmetric, c_name, c_value, c_type, c_unit, C.uint(slope_type), C.uint(conf.Tmax), 0)
switch rval {
case 1:
C.free(unsafe.Pointer(c_value))
return errors.New("invalid parameters")
case 2:
C.free(unsafe.Pointer(c_value))
return errors.New("one of your parameters has an invalid character '\"'")
case 3:
C.free(unsafe.Pointer(c_value))
return fmt.Errorf("the type parameter \"%s\" is not a valid type", conf.Type)
case 4:
C.free(unsafe.Pointer(c_value))
return fmt.Errorf("the value parameter \"%s\" does not represent a number", conf.Value)
default:
}
// Set the cluster name, otherwise it takes it from the configuration file
if len(s.config.ClusterName) > 0 {
C.Ganglia_metadata_add(gmetric, lookup("CLUSTER"), lookup(s.config.ClusterName))
}
// Set the group metadata in the Ganglia metric if configured
if s.config.AddGangliaGroup {
c_group := lookup(conf.Group)
C.Ganglia_metadata_add(gmetric, lookup("GROUP"), c_group)
}
// Now we send the metric
// gmetric does provide some more options like description and other options
// but they are not provided by the collectors
rval = C.Ganglia_metric_send(gmetric, s.send_channels)
if rval != 0 {
err = fmt.Errorf("there was an error sending metric %s to %d of the send channels ", point.Name(), rval)
// fall throuph to use Ganglia_metric_destroy from common cleanup
}
// Cleanup Ganglia metric
C.Ganglia_metric_destroy(gmetric)
// Free the value C string, the only one not stored in the cache
C.free(unsafe.Pointer(c_value))
return errors.New("invalid parameters")
case 2:
C.free(unsafe.Pointer(c_value))
return errors.New("one of your parameters has an invalid character '\"'")
case 3:
C.free(unsafe.Pointer(c_value))
return fmt.Errorf("the type parameter \"%s\" is not a valid type", conf.Type)
case 4:
C.free(unsafe.Pointer(c_value))
return fmt.Errorf("the value parameter \"%s\" does not represent a number", conf.Value)
default:
}
// Set the cluster name, otherwise it takes it from the configuration file
if len(s.config.ClusterName) > 0 {
C.Ganglia_metadata_add(gmetric, lookup("CLUSTER"), lookup(s.config.ClusterName))
}
// Set the group metadata in the Ganglia metric if configured
if s.config.AddGangliaGroup {
c_group := lookup(conf.Group)
C.Ganglia_metadata_add(gmetric, lookup("GROUP"), c_group)
}
// Now we send the metric
// gmetric does provide some more options like description and other options
// but they are not provided by the collectors
rval = C.Ganglia_metric_send(gmetric, s.send_channels)
if rval != 0 {
err = fmt.Errorf("there was an error sending metric %s to %d of the send channels ", point.Name(), rval)
// fall throuph to use Ganglia_metric_destroy from common cleanup
}
// Cleanup Ganglia metric
C.Ganglia_metric_destroy(gmetric)
// Free the value C string, the only one not stored in the cache
C.free(unsafe.Pointer(c_value))
return err
}
@@ -241,6 +245,20 @@ func NewLibgangliaSink(name string, config json.RawMessage) (Sink, error) {
return nil, err
}
}
p, err := mp.NewMessageProcessor()
if err != nil {
return nil, fmt.Errorf("initialization of message processor failed: %v", err.Error())
}
s.mp = p
if len(s.config.MessageProcessor) > 0 {
err = s.mp.FromConfigJSON(s.config.MessageProcessor)
if err != nil {
return nil, fmt.Errorf("failed parsing JSON for message processor: %v", err.Error())
}
}
for _, k := range s.config.MetaAsTags {
s.mp.AddMoveMetaToTags("true", k, k)
}
lib := dl.New(s.config.GangliaLib, GANGLIA_LIB_DL_FLAGS)
if lib == nil {
return nil, fmt.Errorf("error instantiating DynamicLibrary for %s", s.config.GangliaLib)

View File

@@ -15,18 +15,23 @@ The `libganglia` sink has probably less overhead compared to the `ganglia` sink
"cluster_name": "MyCluster",
"add_ganglia_group" : true,
"add_type_to_name": true,
"add_units" : true
"add_units" : true,
"process_messages" : {
"see" : "docs of message processor for valid fields"
},
"meta_as_tags" : []
}
}
```
- `type`: makes the sink an `libganglia` sink
- `meta_as_tags`: print all meta information as tags in the output (optional)
- `gmond_config`: Path to the Ganglia configuration file `gmond.conf` (default: `/etc/ganglia/gmond.conf`)
- `cluster_name`: Set a cluster name for the metric. If not set, it is taken from `gmond_config`
- `add_ganglia_group`: Add a Ganglia metric group based on meta information. Some old versions of `gmetric` do not support the `--group` option
- `add_type_to_name`: Ganglia commonly uses only node-level metrics but with cc-metric-collector, there are metrics for cpus, memory domains, CPU sockets and the whole node. In order to get eeng, this option prefixes the metric name with `<type><type-id>_` or `device_` depending on the metric tags and meta information. For metrics of the whole node `type=node`, no prefix is added
- `add_units`: Add metric value unit if there is a `unit` entry in the metric tags or meta information
- `process_messages`: Process messages with given rules before progressing or dropping, see [here](../pkg/messageProcessor/README.md) (optional)
- `meta_as_tags`: print all meta information as tags in the output (deprecated, optional)
### Ganglia Installation

View File

@@ -1,24 +1,29 @@
package sinks
import (
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
"encoding/json"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
mp "github.com/ClusterCockpit/cc-metric-collector/pkg/messageProcessor"
)
type defaultSinkConfig struct {
MetaAsTags []string `json:"meta_as_tags,omitempty"`
Type string `json:"type"`
MetaAsTags []string `json:"meta_as_tags,omitempty"`
MessageProcessor json.RawMessage `json:"process_messages,omitempty"`
Type string `json:"type"`
}
type sink struct {
meta_as_tags map[string]bool // Use meta data tags as tags
name string // Name of the sink
meta_as_tags map[string]bool // Use meta data tags as tags
mp mp.MessageProcessor // message processor for the sink
name string // Name of the sink
}
type Sink interface {
Write(point lp.CCMetric) error // Write metric to the sink
Flush() error // Flush buffered metrics
Close() // Close / finish metric sink
Name() string // Name of the metric sink
Write(point lp.CCMessage) error // Write metric to the sink
Flush() error // Flush buffered metrics
Close() // Close / finish metric sink
Name() string // Name of the metric sink
}
// Name returns the name of the metric sink

View File

@@ -5,13 +5,16 @@ import (
"encoding/json"
"errors"
"fmt"
"os"
"sync"
"time"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
influx "github.com/influxdata/line-protocol"
mp "github.com/ClusterCockpit/cc-metric-collector/pkg/messageProcessor"
influx "github.com/influxdata/line-protocol/v2/lineprotocol"
nats "github.com/nats-io/nats.go"
"golang.org/x/exp/slices"
)
type NatsSinkConfig struct {
@@ -22,18 +25,24 @@ type NatsSinkConfig struct {
User string `json:"user,omitempty"`
Password string `json:"password,omitempty"`
FlushDelay string `json:"flush_delay,omitempty"`
NkeyFile string `json:"nkey_file,omitempty"`
// Timestamp precision
Precision string `json:"precision,omitempty"`
}
type NatsSink struct {
sink
client *nats.Conn
encoder *influx.Encoder
encoder influx.Encoder
buffer *bytes.Buffer
config NatsSinkConfig
lock sync.Mutex
flushDelay time.Duration
flushTimer *time.Timer
extended_tag_list []key_value_pair
}
func (s *NatsSink) connect() error {
@@ -42,6 +51,13 @@ func (s *NatsSink) connect() error {
var nc *nats.Conn
if len(s.config.User) > 0 && len(s.config.Password) > 0 {
uinfo = nats.UserInfo(s.config.User, s.config.Password)
} else if len(s.config.NkeyFile) > 0 {
if _, err := os.Stat(s.config.NkeyFile); err == nil {
uinfo = nats.UserCredentials(s.config.NkeyFile)
} else {
cclog.ComponentError(s.name, "NKEY file", s.config.NkeyFile, "does not exist: %v", err.Error())
return err
}
}
uri := fmt.Sprintf("nats://%s:%s", s.config.Host, s.config.Port)
cclog.ComponentDebug(s.name, "Connect to", uri)
@@ -59,13 +75,61 @@ func (s *NatsSink) connect() error {
return nil
}
func (s *NatsSink) Write(m lp.CCMetric) error {
s.lock.Lock()
_, err := s.encoder.Encode(m.ToPoint(s.meta_as_tags))
s.lock.Unlock()
if err != nil {
cclog.ComponentError(s.name, "Write:", err.Error())
return err
func (s *NatsSink) Write(m lp.CCMessage) error {
msg, err := s.mp.ProcessMessage(m)
if err == nil && msg != nil {
s.lock.Lock()
// Encode measurement name
s.encoder.StartLine(msg.Name())
// copy tags and meta data which should be used as tags
s.extended_tag_list = s.extended_tag_list[:0]
for key, value := range m.Tags() {
s.extended_tag_list =
append(
s.extended_tag_list,
key_value_pair{
key: key,
value: value,
},
)
}
// Encode tags (they musts be in lexical order)
slices.SortFunc(
s.extended_tag_list,
func(a key_value_pair, b key_value_pair) int {
if a.key < b.key {
return -1
}
if a.key > b.key {
return +1
}
return 0
},
)
for i := range s.extended_tag_list {
s.encoder.AddTag(
s.extended_tag_list[i].key,
s.extended_tag_list[i].value,
)
}
// Encode fields
for key, value := range msg.Fields() {
s.encoder.AddField(key, influx.MustNewValue(value))
}
// Encode time stamp
s.encoder.EndLine(msg.Time())
// Check for encoder errors
err := s.encoder.Err()
s.lock.Unlock()
if err != nil {
cclog.ComponentError(s.name, "Write:", err.Error())
return err
}
}
if s.flushDelay == 0 {
@@ -83,14 +147,13 @@ func (s *NatsSink) Write(m lp.CCMetric) error {
func (s *NatsSink) Flush() error {
s.lock.Lock()
buf := append([]byte{}, s.buffer.Bytes()...) // copy bytes
s.buffer.Reset()
buf := slices.Clone(s.encoder.Bytes())
s.encoder.Reset()
s.lock.Unlock()
if len(buf) == 0 {
return nil
}
if err := s.client.Publish(s.config.Subject, buf); err != nil {
cclog.ComponentError(s.name, "Flush:", err.Error())
return err
@@ -107,6 +170,8 @@ func NewNatsSink(name string, config json.RawMessage) (Sink, error) {
s := new(NatsSink)
s.name = fmt.Sprintf("NatsSink(%s)", name)
s.flushDelay = 10 * time.Second
s.config.Port = "4222"
s.config.Precision = "s"
if len(config) > 0 {
d := json.NewDecoder(bytes.NewReader(config))
d.DisallowUnknownFields()
@@ -120,17 +185,41 @@ func NewNatsSink(name string, config json.RawMessage) (Sink, error) {
len(s.config.Subject) == 0 {
return nil, errors.New("not all configuration variables set required by NatsSink")
}
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
p, err := mp.NewMessageProcessor()
if err != nil {
return nil, fmt.Errorf("initialization of message processor failed: %v", err.Error())
}
s.mp = p
if len(s.config.MessageProcessor) > 0 {
err = s.mp.FromConfigJSON(s.config.MessageProcessor)
if err != nil {
return nil, fmt.Errorf("failed parsing JSON for message processor: %v", err.Error())
}
}
// Create lookup map to use meta infos as tags in the output metric
for _, k := range s.config.MetaAsTags {
s.mp.AddMoveMetaToTags("true", k, k)
}
precision := influx.Second
if len(s.config.Precision) > 0 {
switch s.config.Precision {
case "s":
precision = influx.Second
case "ms":
precision = influx.Millisecond
case "us":
precision = influx.Microsecond
case "ns":
precision = influx.Nanosecond
}
}
// s.meta_as_tags = make(map[string]bool)
// for _, k := range s.config.MetaAsTags {
// s.meta_as_tags[k] = true
// }
// Setup Influx line protocol
s.buffer = &bytes.Buffer{}
s.buffer.Grow(1025)
s.encoder = influx.NewEncoder(s.buffer)
s.encoder.SetPrecision(time.Second)
s.encoder.SetMaxLineBytes(1024)
s.encoder.SetPrecision(precision)
// Setup infos for connection
if err := s.connect(); err != nil {
return nil, fmt.Errorf("unable to connect: %v", err)
@@ -144,6 +233,7 @@ func NewNatsSink(name string, config json.RawMessage) (Sink, error) {
return nil, err
}
}
s.extended_tag_list = make([]key_value_pair, 0)
return s, nil
}

View File

@@ -13,7 +13,13 @@ The `nats` sink publishes all metrics into a NATS network. The publishing key is
"port": "4222",
"user": "exampleuser",
"password" : "examplepw",
"meta_as_tags" : [],
"nkey_file": "/path/to/nkey_file",
"flush_delay": "10s",
"precision": "s",
"process_messages" : {
"see" : "docs of message processor for valid fields"
},
"meta_as_tags" : []
}
}
```
@@ -24,4 +30,12 @@ The `nats` sink publishes all metrics into a NATS network. The publishing key is
- `port`: Port number (as string) of the NATS server
- `user`: Username for basic authentication
- `password`: Password for basic authentication
- `meta_as_tags`: print all meta information as tags in the output (optional)
- `nkey_file`: Path to credentials file with NKEY
- `flush_delay`: Maximum time until metrics are sent out
- `precision`: Precision of the timestamp. Valid values are 's', 'ms', 'us' and 'ns'. (default is 's')
- `process_messages`: Process messages with given rules before progressing or dropping, see [here](../pkg/messageProcessor/README.md) (optional)
- `meta_as_tags`: print all meta information as tags in the output (deprecated, optional)
### Using `nats` sink for communication with cc-metric-store
The cc-metric-store only accepts metrics with a timestamp precision in seconds, so it is required to use `"precision": "s"`.

View File

@@ -10,8 +10,9 @@ import (
"strings"
"sync"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
mp "github.com/ClusterCockpit/cc-metric-collector/pkg/messageProcessor"
"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -49,11 +50,13 @@ func intToFloat64(input interface{}) (float64, error) {
return float64(value), nil
case int64:
return float64(value), nil
case uint64:
return float64(value), nil
}
return 0, errors.New("cannot cast value to float64")
}
func getLabelValue(metric lp.CCMetric) []string {
func getLabelValue(metric lp.CCMessage) []string {
labelValues := []string{}
if tid, tidok := metric.GetTag("type-id"); tidok && metric.HasTag("type") {
labelValues = append(labelValues, tid)
@@ -66,7 +69,7 @@ func getLabelValue(metric lp.CCMetric) []string {
return labelValues
}
func getLabelNames(metric lp.CCMetric) []string {
func getLabelNames(metric lp.CCMessage) []string {
labelNames := []string{}
if t, tok := metric.GetTag("type"); tok && metric.HasTag("type-id") {
labelNames = append(labelNames, t)
@@ -79,7 +82,7 @@ func getLabelNames(metric lp.CCMetric) []string {
return labelNames
}
func (s *PrometheusSink) newMetric(metric lp.CCMetric) error {
func (s *PrometheusSink) newMetric(metric lp.CCMessage) error {
var value float64 = 0
name := metric.Name()
opts := prometheus.GaugeOpts{
@@ -117,7 +120,7 @@ func (s *PrometheusSink) newMetric(metric lp.CCMetric) error {
return nil
}
func (s *PrometheusSink) updateMetric(metric lp.CCMetric) error {
func (s *PrometheusSink) updateMetric(metric lp.CCMessage) error {
var value float64 = 0.0
name := metric.Name()
labelValues := getLabelValue(metric)
@@ -150,8 +153,12 @@ func (s *PrometheusSink) updateMetric(metric lp.CCMetric) error {
return nil
}
func (s *PrometheusSink) Write(m lp.CCMetric) error {
return s.updateMetric(m)
func (s *PrometheusSink) Write(m lp.CCMessage) error {
msg, err := s.mp.ProcessMessage(m)
if err == nil && msg != nil {
err = s.updateMetric(m)
}
return err
}
func (s *PrometheusSink) Flush() error {
@@ -180,6 +187,20 @@ func NewPrometheusSink(name string, config json.RawMessage) (Sink, error) {
cclog.ComponentError(s.name, err.Error())
return nil, err
}
p, err := mp.NewMessageProcessor()
if err != nil {
return nil, fmt.Errorf("initialization of message processor failed: %v", err.Error())
}
s.mp = p
if len(s.config.MessageProcessor) > 0 {
err = p.FromConfigJSON(s.config.MessageProcessor)
if err != nil {
return nil, fmt.Errorf("failed parsing JSON for message processor: %v", err.Error())
}
}
for _, k := range s.config.MetaAsTags {
s.mp.AddMoveMetaToTags("true", k, k)
}
s.labelMetrics = make(map[string]*prometheus.GaugeVec)
s.nodeMetrics = make(map[string]prometheus.Gauge)
s.promWg.Add(1)

View File

@@ -11,7 +11,11 @@ The `prometheus` sink publishes all metrics via an HTTP server ready to be scrap
"type": "prometheus",
"host": "localhost",
"port": "8080",
"path": "metrics"
"path": "metrics",
"process_messages" : {
"see" : "docs of message processor for valid fields"
},
"meta_as_tags" : []
}
}
```
@@ -21,3 +25,5 @@ The `prometheus` sink publishes all metrics via an HTTP server ready to be scrap
- `port`: Portnumber (as string) for the HTTP server
- `path`: Path where the metrics should be servered. The metrics will be published at `host`:`port`/`path`
- `group_as_namespace`: Most metrics contain a group as meta information like 'memory', 'load'. With this the metric names are extended to `group`_`name` if possible.
- `process_messages`: Process messages with given rules before progressing or dropping, see [here](../pkg/messageProcessor/README.md) (optional)
- `meta_as_tags`: print all meta information as tags in the output (deprecated, optional)

View File

@@ -6,8 +6,9 @@ import (
"fmt"
"log"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
mp "github.com/ClusterCockpit/cc-metric-collector/pkg/messageProcessor"
)
type SampleSinkConfig struct {
@@ -28,9 +29,14 @@ type SampleSink struct {
// See: metricSink.go
// Code to submit a single CCMetric to the sink
func (s *SampleSink) Write(point lp.CCMetric) error {
func (s *SampleSink) Write(point lp.CCMessage) error {
// based on s.meta_as_tags use meta infos as tags
log.Print(point)
// moreover, submit the point to the message processor
// to apply drop/modify rules
msg, err := s.mp.ProcessMessage(point)
if err == nil && msg != nil {
log.Print(msg)
}
return nil
}
@@ -66,10 +72,24 @@ func NewSampleSink(name string, config json.RawMessage) (Sink, error) {
}
}
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
// Initialize and configure the message processor
p, err := mp.NewMessageProcessor()
if err != nil {
return nil, fmt.Errorf("initialization of message processor failed: %v", err.Error())
}
s.mp = p
// Add message processor configuration
if len(s.config.MessageProcessor) > 0 {
err = p.FromConfigJSON(s.config.MessageProcessor)
if err != nil {
return nil, fmt.Errorf("failed parsing JSON for message processor: %v", err.Error())
}
}
// Add rules to move meta information to tag space
// Replacing the legacy 'meta_as_tags' configuration
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
s.mp.AddMoveMetaToTags("true", k, k)
}
// Check if all required fields in the config are set

View File

@@ -7,7 +7,7 @@ import (
"sync"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
)
const SINK_MAX_FORWARD = 50
@@ -21,11 +21,12 @@ var AvailableSinks = map[string]func(name string, config json.RawMessage) (Sink,
"influxdb": NewInfluxSink,
"influxasync": NewInfluxAsyncSink,
"http": NewHttpSink,
"prometheus": NewPrometheusSink,
}
// Metric collector manager data structure
type sinkManager struct {
input chan lp.CCMetric // input channel
input chan lp.CCMessage // input channel
done chan bool // channel to finish / stop metric sink manager
wg *sync.WaitGroup // wait group for all goroutines in cc-metric-collector
sinks map[string]Sink // Mapping sink name to sink
@@ -35,7 +36,7 @@ type sinkManager struct {
// Sink manager access functions
type SinkManager interface {
Init(wg *sync.WaitGroup, sinkConfigFile string) error
AddInput(input chan lp.CCMetric)
AddInput(input chan lp.CCMessage)
AddOutput(name string, config json.RawMessage) error
Start()
Close()
@@ -107,7 +108,7 @@ func (sm *sinkManager) Start() {
cclog.ComponentDebug("SinkManager", "DONE")
}
toTheSinks := func(p lp.CCMetric) {
toTheSinks := func(p lp.CCMessage) {
// Send received metric to all outputs
cclog.ComponentDebug("SinkManager", "WRITE", p)
for _, s := range sm.sinks {
@@ -138,7 +139,7 @@ func (sm *sinkManager) Start() {
}
// AddInput adds the input channel to the sink manager
func (sm *sinkManager) AddInput(input chan lp.CCMetric) {
func (sm *sinkManager) AddInput(input chan lp.CCMessage) {
sm.input = input
}

View File

@@ -8,8 +8,9 @@ import (
"strings"
// "time"
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
mp "github.com/ClusterCockpit/cc-metric-collector/pkg/messageProcessor"
)
type StdoutSink struct {
@@ -21,11 +22,14 @@ type StdoutSink struct {
}
}
func (s *StdoutSink) Write(m lp.CCMetric) error {
fmt.Fprint(
s.output,
m.ToLineProtocol(s.meta_as_tags),
)
func (s *StdoutSink) Write(m lp.CCMessage) error {
msg, err := s.mp.ProcessMessage(m)
if err == nil && msg != nil {
fmt.Fprint(
s.output,
msg.ToLineProtocol(s.meta_as_tags),
)
}
return nil
}
@@ -41,6 +45,7 @@ func (s *StdoutSink) Close() {
}
func NewStdoutSink(name string, config json.RawMessage) (Sink, error) {
s := new(StdoutSink)
s.name = fmt.Sprintf("StdoutSink(%s)", name)
if len(config) > 0 {
@@ -51,6 +56,11 @@ func NewStdoutSink(name string, config json.RawMessage) (Sink, error) {
return nil, err
}
}
p, err := mp.NewMessageProcessor()
if err != nil {
return nil, fmt.Errorf("initialization of message processor failed: %v", err.Error())
}
s.mp = p
s.output = os.Stdout
if len(s.config.Output) > 0 {
@@ -67,10 +77,21 @@ func NewStdoutSink(name string, config json.RawMessage) (Sink, error) {
s.output = f
}
}
// Add message processor configuration
if len(s.config.MessageProcessor) > 0 {
err = s.mp.FromConfigJSON(s.config.MessageProcessor)
if err != nil {
return nil, fmt.Errorf("failed parsing JSON for message processor: %v", err.Error())
}
}
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
// s.meta_as_tags = make(map[string]bool)
// for _, k := range s.config.MetaAsTags {
// s.meta_as_tags[k] = true
// }
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
s.mp.AddMoveMetaToTags("true", k, k)
}
return s, nil

View File

@@ -10,7 +10,11 @@ The `stdout` sink is the most simple sink provided by cc-metric-collector. It wr
"<name>": {
"type": "stdout",
"meta_as_tags" : [],
"output_file" : "mylogfile.log"
"output_file" : "mylogfile.log",
"process_messages" : {
"see" : "docs of message processor for valid fields"
},
"meta_as_tags" : []
}
}
```
@@ -18,5 +22,6 @@ The `stdout` sink is the most simple sink provided by cc-metric-collector. It wr
- `type`: makes the sink an `stdout` sink
- `meta_as_tags`: print meta information as tags in the output (optional)
- `output_file`: Write all data to the selected file (optional). There are two 'special' files: `stdout` and `stderr`. If this option is not provided, the default value is `stdout`
- `process_messages`: Process messages with given rules before progressing or dropping, see [here](../pkg/messageProcessor/README.md) (optional)
- `meta_as_tags`: print all meta information as tags in the output (deprecated, optional)