Merge latest developments into main (#67)

* Update configuration.md

Add an additional receiver to have better alignment of components

* Change default GpfsCollector command to `mmpmon` (#53)

* Set default cmd to 'mmpmon'

* Reuse looked up path

* Cast const to string

* Just download LIKWID to get the headers (#54)

* Just download LIKWID to get the headers

* Remove perl-Data-Dumper from BuildRequires, only required by LIKWID build

* Add HttpReceiver as counterpart to the HttpSink (#49)

* Use GBytes as unit for large memory numbers

* Make maxForward configurable, save old name in meta in rename metrics and make the hostname tag key configurable

* Single release action (#55)

Building all RPMs and releasing in a single workflow

* Makefile target to build binary-only Debian packages (#61)

* Add 'install' and 'DEB' make targets to build binary-only Debian packages

* Add control file for DEB builds

* Use a single line for bash loop in make clean

* Add config options for retry intervals of InfluxDB clients (#59)

* Refactoring of LikwidCollector and metric units (#62)

* Reduce complexity of LikwidCollector and allow metric units

* Add unit to LikwidCollector docu and fix some typos

* Make library path configurable

* Use old metric name in Ganglia if rename has happened in the router (#60)

* Use old metric name if rename has happened in the router

* Also check for Ganglia renames for the oldname

* Derived metrics (#57)

* Add time-based derivatived (e.g. bandwidth) to some collectors

* Add documentation

* Add comments

* Fix: Only compute rates with a valid previous state

* Only compute rates with a valid previous state

* Define const values for net/dev fields

* Set default config values

* Add comments

* Refactor: Consolidate data structures

* Refactor: Consolidate data structures

* Refactor: Avoid struct deep copy

* Refactor: Avoid redundant tag maps

* Refactor: Use int64 type for absolut values

Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>

* Simplified iota usage

* Move unit tag to meta data tags

* Derived metrics (#65)

* Add time-based derivatived (e.g. bandwidth) to some collectors

* Add documentation

* Add comments

* Fix: Only compute rates with a valid previous state

* Only compute rates with a valid previous state

* Define const values for net/dev fields

* Set default config values

* Add comments

* Refactor: Consolidate data structures

* Refactor: Consolidate data structures

* Refactor: Avoid struct deep copy

* Refactor: Avoid redundant tag maps

* Refactor: Use int64 type for absolut values

* Update LustreCollector

Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>

* Meta to tags list and map for sinks (#63)

* Change ccMetric->Influx functions

* Use a meta_as_tags string list in config but create a lookup map afterwards

* Add meta as tag logic to sampleSink

* Fix staticcheck warnings (#66)

Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
This commit is contained in:
Thomas Gruber
2022-03-15 16:41:11 +01:00
committed by GitHub
parent 3157386b3e
commit 3f76947f54
45 changed files with 1329 additions and 714 deletions

View File

@@ -148,10 +148,14 @@ type GangliaMetricConfig struct {
Unit string
Group string
Value string
Name string
}
func GetCommonGangliaConfig(point lp.CCMetric) GangliaMetricConfig {
mname := GangliaMetricRename(point.Name())
if oldname, ok := point.GetMeta("oldname"); ok {
mname = GangliaMetricRename(oldname)
}
for _, group := range CommonGangliaMetrics {
for _, metric := range group.Metrics {
if metric.Name == mname {
@@ -187,6 +191,7 @@ func GetCommonGangliaConfig(point lp.CCMetric) GangliaMetricConfig {
Tmax: metric.Tmax,
Unit: metric.Unit,
Value: valueStr,
Name: GangliaMetricRename(mname),
}
}
}
@@ -198,10 +203,15 @@ func GetCommonGangliaConfig(point lp.CCMetric) GangliaMetricConfig {
Tmax: 0,
Unit: "",
Value: "",
Name: "",
}
}
func GetGangliaConfig(point lp.CCMetric) GangliaMetricConfig {
mname := GangliaMetricRename(point.Name())
if oldname, ok := point.GetMeta("oldname"); ok {
mname = GangliaMetricRename(oldname)
}
group := ""
if g, ok := point.GetMeta("group"); ok {
group = g
@@ -254,5 +264,6 @@ func GetGangliaConfig(point lp.CCMetric) GangliaMetricConfig {
Tmax: DEFAULT_GANGLIA_METRIC_TMAX,
Unit: unit,
Value: valueStr,
Name: GangliaMetricRename(mname),
}
}

View File

@@ -39,16 +39,13 @@ func (s *GangliaSink) Write(point lp.CCMetric) error {
//var tagsstr []string
var argstr []string
// Get metric name
metricname := GangliaMetricRename(point.Name())
// Get metric config (type, value, ... in suitable format)
conf := GetCommonGangliaConfig(point)
if len(conf.Type) == 0 {
conf = GetGangliaConfig(point)
}
if len(conf.Type) == 0 {
return fmt.Errorf("metric %s has no 'value' field", metricname)
return fmt.Errorf("metric %q (Ganglia name %q) has no 'value' field", point.Name(), conf.Name)
}
if s.config.AddGangliaGroup {
@@ -70,7 +67,7 @@ func (s *GangliaSink) Write(point lp.CCMetric) error {
if s.config.AddTypeToName {
argstr = append(argstr, fmt.Sprintf("--name=%s", GangliaMetricName(point)))
} else {
argstr = append(argstr, fmt.Sprintf("--name=%s", metricname))
argstr = append(argstr, fmt.Sprintf("--name=%s", conf.Name))
}
argstr = append(argstr, fmt.Sprintf("--slope=%s", conf.Slope))
argstr = append(argstr, fmt.Sprintf("--value=%s", conf.Value))

View File

@@ -53,7 +53,7 @@ func (s *HttpSink) Write(m lp.CCMetric) error {
})
}
p := m.ToPoint(s.config.MetaAsTags)
p := m.ToPoint(s.meta_as_tags)
s.lock.Lock()
_, err := s.encoder.Encode(p)
@@ -159,6 +159,11 @@ func NewHttpSink(name string, config json.RawMessage) (Sink, error) {
s.flushDelay = t
}
}
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
}
tr := &http.Transport{
MaxIdleConns: s.maxIdleConns,
IdleConnTimeout: s.idleConnTimeout,

View File

@@ -6,6 +6,7 @@ import (
"encoding/json"
"errors"
"fmt"
"time"
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
@@ -26,15 +27,21 @@ type InfluxAsyncSinkConfig struct {
// Maximum number of points sent to server in single request. Default 5000
BatchSize uint `json:"batch_size,omitempty"`
// Interval, in ms, in which is buffer flushed if it has not been already written (by reaching batch size) . Default 1000ms
FlushInterval uint `json:"flush_interval,omitempty"`
FlushInterval uint `json:"flush_interval,omitempty"`
InfluxRetryInterval string `json:"retry_interval"`
InfluxExponentialBase uint `json:"retry_exponential_base"`
InfluxMaxRetries uint `json:"max_retries"`
InfluxMaxRetryTime string `json:"max_retry_time"`
}
type InfluxAsyncSink struct {
sink
client influxdb2.Client
writeApi influxdb2Api.WriteAPI
errors <-chan error
config InfluxAsyncSinkConfig
client influxdb2.Client
writeApi influxdb2Api.WriteAPI
errors <-chan error
config InfluxAsyncSinkConfig
influxRetryInterval uint
influxMaxRetryTime uint
}
func (s *InfluxAsyncSink) connect() error {
@@ -63,6 +70,11 @@ func (s *InfluxAsyncSink) connect() error {
InsecureSkipVerify: true,
},
)
clientOptions.SetMaxRetryInterval(s.influxRetryInterval)
clientOptions.SetMaxRetryTime(s.influxMaxRetryTime)
clientOptions.SetExponentialBase(s.config.InfluxExponentialBase)
clientOptions.SetMaxRetries(s.config.InfluxMaxRetries)
s.client = influxdb2.NewClientWithOptions(uri, auth, clientOptions)
s.writeApi = s.client.WriteAPI(s.config.Organization, s.config.Database)
ok, err := s.client.Ping(context.Background())
@@ -77,7 +89,7 @@ func (s *InfluxAsyncSink) connect() error {
func (s *InfluxAsyncSink) Write(m lp.CCMetric) error {
s.writeApi.WritePoint(
m.ToPoint(s.config.MetaAsTags),
m.ToPoint(s.meta_as_tags),
)
return nil
}
@@ -99,6 +111,33 @@ func NewInfluxAsyncSink(name string, config json.RawMessage) (Sink, error) {
// Set default for maximum number of points sent to server in single request.
s.config.BatchSize = 100
s.influxRetryInterval = uint(time.Duration(1) * time.Second)
s.config.InfluxRetryInterval = "1s"
s.influxMaxRetryTime = uint(7 * time.Duration(24) * time.Hour)
s.config.InfluxMaxRetryTime = "168h"
s.config.InfluxMaxRetries = 20
s.config.InfluxExponentialBase = 2
// Default retry intervals (in seconds)
// 1 2
// 2 4
// 4 8
// 8 16
// 16 32
// 32 64
// 64 128
// 128 256
// 256 512
// 512 1024
// 1024 2048
// 2048 4096
// 4096 8192
// 8192 16384
// 16384 32768
// 32768 65536
// 65536 131072
// 131072 262144
// 262144 524288
if len(config) > 0 {
err := json.Unmarshal(config, &s.config)
@@ -113,6 +152,21 @@ func NewInfluxAsyncSink(name string, config json.RawMessage) (Sink, error) {
len(s.config.Password) == 0 {
return nil, errors.New("not all configuration variables set required by InfluxAsyncSink")
}
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
}
toUint := func(duration string, def uint) uint {
t, err := time.ParseDuration(duration)
if err == nil {
return uint(t.Milliseconds())
}
return def
}
s.influxRetryInterval = toUint(s.config.InfluxRetryInterval, s.influxRetryInterval)
s.influxMaxRetryTime = toUint(s.config.InfluxMaxRetryTime, s.influxMaxRetryTime)
// Connect to InfluxDB server
if err := s.connect(); err != nil {

View File

@@ -18,6 +18,10 @@ The `influxasync` sink uses the official [InfluxDB golang client](https://pkg.go
"organization": "myorg",
"ssl": true,
"batch_size": 200,
"retry_interval" : "1s",
"retry_exponential_base" : 2,
"max_retries": 20,
"max_retry_time" : "168h"
}
}
```
@@ -31,4 +35,10 @@ The `influxasync` sink uses the official [InfluxDB golang client](https://pkg.go
- `password`: Password for basic authentification
- `organization`: Organization in the InfluxDB
- `ssl`: Use SSL connection
- `batch_size`: batch up metrics internally, default 100
- `batch_size`: batch up metrics internally, default 100
- `retry_interval`: Base retry interval for failed write requests, default 1s
- `retry_exponential_base`: The retry interval is exponentially increased with this base, default 2
- `max_retries`: Maximal number of retry attempts
- `max_retry_time`: Maximal time to retry failed writes, default 168h (one week)
For information about the calculation of the retry interval settings, see [offical influxdb-client-go documentation](https://github.com/influxdata/influxdb-client-go#handling-of-failed-async-writes)

View File

@@ -6,6 +6,7 @@ import (
"encoding/json"
"errors"
"fmt"
"time"
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
@@ -15,21 +16,29 @@ import (
type InfluxSinkConfig struct {
defaultSinkConfig
Host string `json:"host,omitempty"`
Port string `json:"port,omitempty"`
Database string `json:"database,omitempty"`
User string `json:"user,omitempty"`
Password string `json:"password,omitempty"`
Organization string `json:"organization,omitempty"`
SSL bool `json:"ssl,omitempty"`
RetentionPol string `json:"retention_policy,omitempty"`
Host string `json:"host,omitempty"`
Port string `json:"port,omitempty"`
Database string `json:"database,omitempty"`
User string `json:"user,omitempty"`
Password string `json:"password,omitempty"`
Organization string `json:"organization,omitempty"`
SSL bool `json:"ssl,omitempty"`
RetentionPol string `json:"retention_policy,omitempty"`
InfluxRetryInterval string `json:"retry_interval"`
InfluxExponentialBase uint `json:"retry_exponential_base"`
InfluxMaxRetries uint `json:"max_retries"`
InfluxMaxRetryTime string `json:"max_retry_time"`
//InfluxMaxRetryDelay string `json:"max_retry_delay"` // It is mentioned in the docs but there is no way to set it
}
type InfluxSink struct {
sink
client influxdb2.Client
writeApi influxdb2Api.WriteAPIBlocking
config InfluxSinkConfig
client influxdb2.Client
writeApi influxdb2Api.WriteAPIBlocking
config InfluxSinkConfig
influxRetryInterval uint
influxMaxRetryTime uint
//influxMaxRetryDelay uint
}
func (s *InfluxSink) connect() error {
@@ -52,6 +61,12 @@ func (s *InfluxSink) connect() error {
InsecureSkipVerify: true,
},
)
clientOptions.SetMaxRetryInterval(s.influxRetryInterval)
clientOptions.SetMaxRetryTime(s.influxMaxRetryTime)
clientOptions.SetExponentialBase(s.config.InfluxExponentialBase)
clientOptions.SetMaxRetries(s.config.InfluxMaxRetries)
s.client = influxdb2.NewClientWithOptions(uri, auth, clientOptions)
s.writeApi = s.client.WriteAPIBlocking(s.config.Organization, s.config.Database)
ok, err := s.client.Ping(context.Background())
@@ -68,7 +83,7 @@ func (s *InfluxSink) Write(m lp.CCMetric) error {
err :=
s.writeApi.WritePoint(
context.Background(),
m.ToPoint(s.config.MetaAsTags),
m.ToPoint(s.meta_as_tags),
)
return err
}
@@ -91,6 +106,13 @@ func NewInfluxSink(name string, config json.RawMessage) (Sink, error) {
return nil, err
}
}
s.influxRetryInterval = uint(time.Duration(1) * time.Second)
s.config.InfluxRetryInterval = "1s"
s.influxMaxRetryTime = uint(7 * time.Duration(24) * time.Hour)
s.config.InfluxMaxRetryTime = "168h"
s.config.InfluxMaxRetries = 20
s.config.InfluxExponentialBase = 2
if len(s.config.Host) == 0 ||
len(s.config.Port) == 0 ||
len(s.config.Database) == 0 ||
@@ -98,6 +120,21 @@ func NewInfluxSink(name string, config json.RawMessage) (Sink, error) {
len(s.config.Password) == 0 {
return nil, errors.New("not all configuration variables set required by InfluxSink")
}
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
}
toUint := func(duration string, def uint) uint {
t, err := time.ParseDuration(duration)
if err == nil {
return uint(t.Milliseconds())
}
return def
}
s.influxRetryInterval = toUint(s.config.InfluxRetryInterval, s.influxRetryInterval)
s.influxMaxRetryTime = toUint(s.config.InfluxMaxRetryTime, s.influxMaxRetryTime)
// Connect to InfluxDB server
if err := s.connect(); err != nil {

View File

@@ -17,6 +17,10 @@ The `influxdb` sink uses the official [InfluxDB golang client](https://pkg.go.de
"password" : "examplepw",
"organization": "myorg",
"ssl": true,
"retry_interval" : "1s",
"retry_exponential_base" : 2,
"max_retries": 20,
"max_retry_time" : "168h"
}
}
```
@@ -29,4 +33,10 @@ The `influxdb` sink uses the official [InfluxDB golang client](https://pkg.go.de
- `user`: Username for basic authentification
- `password`: Password for basic authentification
- `organization`: Organization in the InfluxDB
- `ssl`: Use SSL connection
- `ssl`: Use SSL connection
- `retry_interval`: Base retry interval for failed write requests, default 1s
- `retry_exponential_base`: The retry interval is exponentially increased with this base, default 2
- `max_retries`: Maximal number of retry attempts
- `max_retry_time`: Maximal time to retry failed writes, default 168h (one week)
For information about the calculation of the retry interval settings, see [offical influxdb-client-go documentation](https://github.com/influxdata/influxdb-client-go#handling-of-failed-async-writes)

View File

@@ -124,24 +124,21 @@ func (s *LibgangliaSink) Write(point lp.CCMetric) error {
return s.cstrCache[key]
}
// Get metric name
metricname := GangliaMetricRename(point.Name())
conf := GetCommonGangliaConfig(point)
if len(conf.Type) == 0 {
conf = GetGangliaConfig(point)
}
if len(conf.Type) == 0 {
return fmt.Errorf("metric %s has no 'value' field", metricname)
return fmt.Errorf("metric %q (Ganglia name %q) has no 'value' field", point.Name(), conf.Name)
}
if s.config.AddTypeToName {
metricname = GangliaMetricName(point)
conf.Name = GangliaMetricName(point)
}
c_value = C.CString(conf.Value)
c_type = lookup(conf.Type)
c_name = lookup(metricname)
c_name = lookup(conf.Name)
// Add unit
unit := ""

View File

@@ -5,13 +5,13 @@ import (
)
type defaultSinkConfig struct {
MetaAsTags bool `json:"meta_as_tags,omitempty"`
Type string `json:"type"`
MetaAsTags []string `json:"meta_as_tags,omitempty"`
Type string `json:"type"`
}
type sink struct {
meta_as_tags bool // Use meta data tags as tags
name string // Name of the sink
meta_as_tags map[string]bool // Use meta data tags as tags
name string // Name of the sink
}
type Sink interface {

View File

@@ -55,7 +55,7 @@ func (s *NatsSink) connect() error {
func (s *NatsSink) Write(m lp.CCMetric) error {
if s.client != nil {
_, err := s.encoder.Encode(m.ToPoint(s.config.MetaAsTags))
_, err := s.encoder.Encode(m.ToPoint(s.meta_as_tags))
if err != nil {
cclog.ComponentError(s.name, "Write:", err.Error())
return err
@@ -97,6 +97,11 @@ func NewNatsSink(name string, config json.RawMessage) (Sink, error) {
len(s.config.Database) == 0 {
return nil, errors.New("not all configuration variables set required by NatsSink")
}
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
}
// Setup Influx line protocol
s.buffer = &bytes.Buffer{}
s.buffer.Grow(1025)
@@ -105,7 +110,7 @@ func NewNatsSink(name string, config json.RawMessage) (Sink, error) {
s.encoder.SetMaxLineBytes(1024)
// Setup infos for connection
if err := s.connect(); err != nil {
return nil, fmt.Errorf("Unable to connect: %v", err)
return nil, fmt.Errorf("unable to connect: %v", err)
}
return s, nil
}

View File

@@ -10,14 +10,14 @@ import (
)
type SampleSinkConfig struct {
// defines JSON tags for 'type' and 'meta_as_tags'
// defines JSON tags for 'type' and 'meta_as_tags' (string list)
// See: metricSink.go
defaultSinkConfig
// Additional config options, for SampleSink
}
type SampleSink struct {
// declares elements 'name' and 'meta_as_tags'
// declares elements 'name' and 'meta_as_tags' (string to bool map!)
sink
config SampleSinkConfig // entry point to the SampleSinkConfig
}
@@ -28,6 +28,7 @@ type SampleSink struct {
// Code to submit a single CCMetric to the sink
func (s *SampleSink) Write(point lp.CCMetric) error {
// based on s.meta_as_tags use meta infos as tags
log.Print(point)
return nil
}
@@ -62,6 +63,12 @@ func NewSampleSink(name string, config json.RawMessage) (Sink, error) {
}
}
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
}
// Check if all required fields in the config are set
// E.g. use 'len(s.config.Option) > 0' for string settings

View File

@@ -63,7 +63,11 @@ func NewStdoutSink(name string, config json.RawMessage) (Sink, error) {
s.output = f
}
}
s.meta_as_tags = s.config.MetaAsTags
// Create lookup map to use meta infos as tags in the output metric
s.meta_as_tags = make(map[string]bool)
for _, k := range s.config.MetaAsTags {
s.meta_as_tags[k] = true
}
return s, nil
}