Merge branch 'develop' into main

Merge latest developments into main (#67 )
* Update configuration.md Add an additional receiver to have better alignment of components * Change default GpfsCollector command to `mmpmon` (#53) * Set default cmd to 'mmpmon' * Reuse looked up path * Cast const to string * Just download LIKWID to get the headers (#54) * Just download LIKWID to get the headers * Remove perl-Data-Dumper from BuildRequires, only required by LIKWID build * Add HttpReceiver as counterpart to the HttpSink (#49) * Use GBytes as unit for large memory numbers * Make maxForward configurable, save old name in meta in rename metrics and make the hostname tag key configurable * Single release action (#55) Building all RPMs and releasing in a single workflow * Makefile target to build binary-only Debian packages (#61) * Add 'install' and 'DEB' make targets to build binary-only Debian packages * Add control file for DEB builds * Use a single line for bash loop in make clean * Add config options for retry intervals of InfluxDB clients (#59) * Refactoring of LikwidCollector and metric units (#62) * Reduce complexity of LikwidCollector and allow metric units * Add unit to LikwidCollector docu and fix some typos * Make library path configurable * Use old metric name in Ganglia if rename has happened in the router (#60) * Use old metric name if rename has happened in the router * Also check for Ganglia renames for the oldname * Derived metrics (#57) * Add time-based derivatived (e.g. bandwidth) to some collectors * Add documentation * Add comments * Fix: Only compute rates with a valid previous state * Only compute rates with a valid previous state * Define const values for net/dev fields * Set default config values * Add comments * Refactor: Consolidate data structures * Refactor: Consolidate data structures * Refactor: Avoid struct deep copy * Refactor: Avoid redundant tag maps * Refactor: Use int64 type for absolut values Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> * Simplified iota usage * Move unit tag to meta data tags * Derived metrics (#65) * Add time-based derivatived (e.g. bandwidth) to some collectors * Add documentation * Add comments * Fix: Only compute rates with a valid previous state * Only compute rates with a valid previous state * Define const values for net/dev fields * Set default config values * Add comments * Refactor: Consolidate data structures * Refactor: Consolidate data structures * Refactor: Avoid struct deep copy * Refactor: Avoid redundant tag maps * Refactor: Use int64 type for absolut values * Update LustreCollector Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> * Meta to tags list and map for sinks (#63) * Change ccMetric->Influx functions * Use a meta_as_tags string list in config but create a lookup map afterwards * Add meta as tag logic to sampleSink * Fix staticcheck warnings (#66) Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
2025-07-19 19:31:41 +02:00 · 2022-03-16 19:08:13 +01:00 · 2022-03-15 16:41:11 +01:00 · 2022-03-04 23:34:28 +01:00 · 2022-03-04 11:49:55 +01:00 · 2022-03-04 11:37:45 +01:00
21 changed files with 392 additions and 1132 deletions
--- a/collectors/diskstatMetric.go
+++ b/collectors/diskstatMetric.go
@@ -3,6 +3,7 @@ package collectors
 import (
 	"bufio"
 	"encoding/json"
+	"fmt"
 	"os"
 	"strings"
 	"syscall"
@@ -80,7 +81,8 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric
 		stat := syscall.Statfs_t{}
 		err := syscall.Statfs(path, &stat)
 		if err != nil {
-			continue
+			fmt.Println(err.Error())
+			return
 		}
 		tags := map[string]string{"type": "node", "device": linefields[0]}
 		total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000000000)
--- a/collectors/gpfsMetric.go
+++ b/collectors/gpfsMetric.go
@@ -70,7 +70,6 @@ func (m *GpfsCollector) Init(config json.RawMessage) error {
 	for _, fs := range m.config.ExcludeFilesystem {
 		m.skipFS[fs] = struct{}{}
 	}
-	m.lastState = make(map[string]GpfsCollectorLastState)

 	// GPFS / IBM Spectrum Scale file system statistics can only be queried by user root
 	user, err := user.Current()
@@ -163,16 +162,11 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 			continue
 		}

-		// Add filesystem tag
 		m.tags["filesystem"] = filesystem
-
-		// Create initial last state
-		if m.config.SendBandwidths {
-			if _, ok := m.lastState[filesystem]; !ok {
-				m.lastState[filesystem] = GpfsCollectorLastState{
-					bytesRead:    -1,
-					bytesWritten: -1,
-				}
+		if _, ok := m.lastState[filesystem]; !ok {
+			m.lastState[filesystem] = GpfsCollectorLastState{
+				bytesRead:    -1,
+				bytesWritten: -1,
 			}
 		}

--- a/collectors/infinibandMetric.go
+++ b/collectors/infinibandMetric.go
@@ -18,18 +18,13 @@ import (

 const IB_BASEPATH = "/sys/class/infiniband/"

-type InfinibandCollectorMetric struct {
-	path string
-	unit string
-}
-
 type InfinibandCollectorInfo struct {
-	LID              string                               // IB local Identifier (LID)
-	device           string                               // IB device
-	port             string                               // IB device port
-	portCounterFiles map[string]InfinibandCollectorMetric // mapping counter name -> InfinibandCollectorMetric
-	tagSet           map[string]string                    // corresponding tag list
-	lastState        map[string]int64                     // State from last measurement
+	LID              string            // IB local Identifier (LID)
+	device           string            // IB device
+	port             string            // IB device port
+	portCounterFiles map[string]string // mapping counter name -> sysfs file
+	tagSet           map[string]string // corresponding tag list
+	lastState        map[string]int64  // State from last measurement
 }

 type InfinibandCollector struct {
@@ -111,16 +106,16 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {

 		// Check access to counter files
 		countersDir := filepath.Join(path, "counters")
-		portCounterFiles := map[string]InfinibandCollectorMetric{
-			"ib_recv":      {path: filepath.Join(countersDir, "port_rcv_data"), unit: "bytes"},
-			"ib_xmit":      {path: filepath.Join(countersDir, "port_xmit_data"), unit: "bytes"},
-			"ib_recv_pkts": {path: filepath.Join(countersDir, "port_rcv_packets"), unit: "packets"},
-			"ib_xmit_pkts": {path: filepath.Join(countersDir, "port_xmit_packets"), unit: "packets"},
+		portCounterFiles := map[string]string{
+			"ib_recv":      filepath.Join(countersDir, "port_rcv_data"),
+			"ib_xmit":      filepath.Join(countersDir, "port_xmit_data"),
+			"ib_recv_pkts": filepath.Join(countersDir, "port_rcv_packets"),
+			"ib_xmit_pkts": filepath.Join(countersDir, "port_xmit_packets"),
 		}
-		for _, counter := range portCounterFiles {
-			err := unix.Access(counter.path, unix.R_OK)
+		for _, counterFile := range portCounterFiles {
+			err := unix.Access(counterFile, unix.R_OK)
 			if err != nil {
-				return fmt.Errorf("unable to access %s: %v", counter.path, err)
+				return fmt.Errorf("unable to access %s: %v", counterFile, err)
 			}
 		}

@@ -170,14 +165,14 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
 	m.lastTimestamp = now

 	for _, info := range m.info {
-		for counterName, counterDef := range info.portCounterFiles {
+		for counterName, counterFile := range info.portCounterFiles {

 			// Read counter file
-			line, err := ioutil.ReadFile(counterDef.path)
+			line, err := ioutil.ReadFile(counterFile)
 			if err != nil {
 				cclog.ComponentError(
 					m.name,
-					fmt.Sprintf("Read(): Failed to read from file '%s': %v", counterDef.path, err))
+					fmt.Sprintf("Read(): Failed to read from file '%s': %v", counterFile, err))
 				continue
 			}
 			data := strings.TrimSpace(string(line))
@@ -194,7 +189,6 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
 			// Send absolut values
 			if m.config.SendAbsoluteValues {
 				if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil {
-					y.AddMeta("unit", counterDef.unit)
 					output <- y
 				}
 			}
@@ -204,7 +198,6 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
 				if info.lastState[counterName] >= 0 {
 					rate := float64((v - info.lastState[counterName])) / timeDiff
 					if y, err := lp.New(counterName+"_bw", info.tagSet, m.meta, map[string]interface{}{"value": rate}, now); err == nil {
-						y.AddMeta("unit", counterDef.unit+"/sec")
 						output <- y
 					}
 				}
--- a/collectors/likwidMetric.go
+++ b/collectors/likwidMetric.go
@@ -15,12 +15,8 @@ import (
 	"io/ioutil"
 	"math"
 	"os"
-	"os/signal"
-	"sort"
 	"strconv"
 	"strings"
-	"sync"
-	"syscall"
 	"time"
 	"unsafe"

@@ -50,16 +46,6 @@ type LikwidCollectorEventsetConfig struct {
 	Metrics []LikwidCollectorMetricConfig `json:"metrics"`
 }

-type LikwidEventsetConfig struct {
-	internal int
-	gid      C.int
-	eorder   []*C.char
-	estr     *C.char
-	go_estr  string
-	results  map[int]map[string]interface{}
-	metrics  map[int]map[string]float64
-}
-
 type LikwidCollectorConfig struct {
 	Eventsets      []LikwidCollectorEventsetConfig `json:"eventsets"`
 	Metrics        []LikwidCollectorMetricConfig   `json:"globalmetrics,omitempty"`
@@ -72,18 +58,17 @@ type LikwidCollectorConfig struct {

 type LikwidCollector struct {
 	metricCollector
-	cpulist      []C.int
-	cpu2tid      map[int]int
-	sock2tid     map[int]int
-	metrics      map[C.int]map[string]int
-	groups       []C.int
-	config       LikwidCollectorConfig
-	gmresults    map[int]map[string]float64
-	basefreq     float64
-	running      bool
-	initialized  bool
-	likwidGroups map[C.int]LikwidEventsetConfig
-	lock         sync.Mutex
+	cpulist   []C.int
+	cpu2tid   map[int]int
+	sock2tid  map[int]int
+	metrics   map[C.int]map[string]int
+	groups    []C.int
+	config    LikwidCollectorConfig
+	results   map[int]map[int]map[string]interface{}
+	mresults  map[int]map[int]map[string]float64
+	gmresults map[int]map[string]float64
+	basefreq  float64
+	running   bool
 }

 type LikwidMetric struct {
@@ -101,60 +86,14 @@ func eventsToEventStr(events map[string]string) string {
 	return strings.Join(elist, ",")
 }

-func genLikwidEventSet(input LikwidCollectorEventsetConfig) LikwidEventsetConfig {
-	tmplist := make([]string, 0)
-	clist := make([]string, 0)
-	for k := range input.Events {
-		clist = append(clist, k)
-	}
-	sort.Strings(clist)
-	elist := make([]*C.char, 0)
-	for _, k := range clist {
-		v := input.Events[k]
-		tmplist = append(tmplist, fmt.Sprintf("%s:%s", v, k))
-		c_counter := C.CString(k)
-		elist = append(elist, c_counter)
-	}
-	estr := strings.Join(tmplist, ",")
-	res := make(map[int]map[string]interface{})
-	met := make(map[int]map[string]float64)
-	for _, i := range topo.CpuList() {
-		res[i] = make(map[string]interface{})
-		for k := range input.Events {
-			res[i][k] = 0.0
-		}
-		met[i] = make(map[string]float64)
-		for _, v := range input.Metrics {
-			res[i][v.Name] = 0.0
-		}
-	}
-	return LikwidEventsetConfig{
-		gid:     -1,
-		eorder:  elist,
-		estr:    C.CString(estr),
-		go_estr: estr,
-		results: res,
-		metrics: met,
-	}
-}
-
-func testLikwidMetricFormula(formula string, params []string) bool {
-	myparams := make(map[string]interface{})
-	for _, p := range params {
-		myparams[p] = float64(1.0)
-	}
-	_, err := agg.EvalFloat64Condition(formula, myparams)
-	return err == nil
-}
-
 func getBaseFreq() float64 {
-	files := []string{
-		"/sys/devices/system/cpu/cpu0/cpufreq/bios_limit",
-		"/sys/devices/system/cpu/cpu0/cpufreq/base_frequency",
-	}
 	var freq float64 = math.NaN()
-	for _, f := range files {
-		buffer, err := ioutil.ReadFile(f)
+	C.power_init(0)
+	info := C.get_powerInfo()
+	if float64(info.baseFrequency) != 0 {
+		freq = float64(info.baseFrequency) * 1e6
+	} else {
+		buffer, err := ioutil.ReadFile("/sys/devices/system/cpu/cpu0/cpufreq/bios_limit")
 		if err == nil {
 			data := strings.Replace(string(buffer), "\n", "", -1)
 			x, err := strconv.ParseInt(data, 0, 64)
@@ -163,22 +102,12 @@ func getBaseFreq() float64 {
 			}
 		}
 	}
-
-	if math.IsNaN(freq) {
-		C.power_init(0)
-		info := C.get_powerInfo()
-		if float64(info.baseFrequency) != 0 {
-			freq = float64(info.baseFrequency) * 1e6
-		}
-		C.power_finalize()
-	}
 	return freq
 }

 func (m *LikwidCollector) Init(config json.RawMessage) error {
+	var ret C.int
 	m.name = "LikwidCollector"
-	m.initialized = false
-	m.running = false
 	m.config.AccessMode = LIKWID_DEF_ACCESSMODE
 	m.config.LibraryPath = LIKWID_LIB_NAME
 	if len(config) > 0 {
@@ -202,7 +131,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
 	}
 	m.setup()

-	m.meta = map[string]string{"group": "PerfCounter"}
+	m.meta = map[string]string{"source": m.name, "group": "PerfCounter"}
 	cclog.ComponentDebug(m.name, "Get cpulist and init maps and lists")
 	cpulist := topo.CpuList()
 	m.cpulist = make([]C.int, len(cpulist))
@@ -211,136 +140,172 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
 		m.cpulist[i] = C.int(c)
 		m.cpu2tid[c] = i
 	}
-
-	m.likwidGroups = make(map[C.int]LikwidEventsetConfig)
-
-	// m.results = make(map[int]map[int]map[string]interface{})
-	// m.mresults = make(map[int]map[int]map[string]float64)
+	m.sock2tid = make(map[int]int)
+	tmp := make([]C.int, 1)
+	for _, sid := range topo.SocketList() {
+		cstr := C.CString(fmt.Sprintf("S%d:0", sid))
+		ret = C.cpustr_to_cpulist(cstr, &tmp[0], 1)
+		if ret > 0 {
+			m.sock2tid[sid] = m.cpu2tid[int(tmp[0])]
+		}
+		C.free(unsafe.Pointer(cstr))
+	}
+	m.results = make(map[int]map[int]map[string]interface{})
+	m.mresults = make(map[int]map[int]map[string]float64)
 	m.gmresults = make(map[int]map[string]float64)
-	for _, tid := range m.cpu2tid {
-		m.gmresults[tid] = make(map[string]float64)
+	cclog.ComponentDebug(m.name, "initialize LIKWID topology")
+	ret = C.topology_init()
+	if ret != 0 {
+		err := errors.New("failed to initialize LIKWID topology")
+		cclog.ComponentError(m.name, err.Error())
+		return err
+	}
+
+	switch m.config.AccessMode {
+	case "direct":
+		C.HPMmode(0)
+	case "accessdaemon":
+		if len(m.config.DaemonPath) > 0 {
+			p := os.Getenv("PATH")
+			os.Setenv("PATH", m.config.DaemonPath+":"+p)
+		}
+		C.HPMmode(1)
+	}
+
+	cclog.ComponentDebug(m.name, "initialize LIKWID perfmon module")
+	ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])
+	if ret != 0 {
+		C.topology_finalize()
+		err := errors.New("failed to initialize LIKWID topology")
+		cclog.ComponentError(m.name, err.Error())
+		return err
 	}

 	// This is for the global metrics computation test
-	totalMetrics := 0
-	// Generate parameter list for the metric computing test
-	params := make([]string, 0)
-	params = append(params, "time", "inverseClock")
-	// Generate parameter list for the global metric computing test
-	globalParams := make([]string, 0)
-	globalParams = append(globalParams, "time", "inverseClock")
-	// We test the eventset metrics whether they can be computed at all
-	for _, evset := range m.config.Eventsets {
+	globalParams := make(map[string]interface{})
+	globalParams["time"] = float64(1.0)
+	globalParams["inverseClock"] = float64(1.0)
+	// While adding the events, we test the metrics whether they can be computed at all
+	for i, evset := range m.config.Eventsets {
+		var gid C.int
+		var cstr *C.char
 		if len(evset.Events) > 0 {
-			params = params[:2]
+			estr := eventsToEventStr(evset.Events)
+			// Generate parameter list for the metric computing test
+			params := make(map[string]interface{})
+			params["time"] = float64(1.0)
+			params["inverseClock"] = float64(1.0)
 			for counter := range evset.Events {
-				params = append(params, counter)
+				params[counter] = float64(1.0)
 			}
 			for _, metric := range evset.Metrics {
 				// Try to evaluate the metric
-				if testLikwidMetricFormula(metric.Calc, params) {
-					// Add the computable metric to the parameter list for the global metrics
-					globalParams = append(globalParams, metric.Name)
-					totalMetrics++
-				} else {
-					metric.Calc = ""
+				_, err := agg.EvalFloat64Condition(metric.Calc, params)
+				if err != nil {
+					cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
+					continue
+				}
+				// If the metric is not in the parameter list for the global metrics, add it
+				if _, ok := globalParams[metric.Name]; !ok {
+					globalParams[metric.Name] = float64(1.0)
 				}
 			}
+			// Now we add the list of events to likwid
+			cstr = C.CString(estr)
+			gid = C.perfmon_addEventSet(cstr)
 		} else {
 			cclog.ComponentError(m.name, "Invalid Likwid eventset config, no events given")
 			continue
 		}
+		if gid >= 0 {
+			m.groups = append(m.groups, gid)
+		}
+		C.free(unsafe.Pointer(cstr))
+		m.results[i] = make(map[int]map[string]interface{})
+		m.mresults[i] = make(map[int]map[string]float64)
+		for tid := range m.cpulist {
+			m.results[i][tid] = make(map[string]interface{})
+			m.mresults[i][tid] = make(map[string]float64)
+			if i == 0 {
+				m.gmresults[tid] = make(map[string]float64)
+			}
+		}
 	}
 	for _, metric := range m.config.Metrics {
 		// Try to evaluate the global metric
-		if !testLikwidMetricFormula(metric.Calc, globalParams) {
-			cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed")
-			metric.Calc = ""
-		} else {
-			totalMetrics++
+		_, err := agg.EvalFloat64Condition(metric.Calc, globalParams)
+		if err != nil {
+			cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
+			continue
 		}
 	}

 	// If no event set could be added, shut down LikwidCollector
-	if totalMetrics == 0 {
-		err := errors.New("no LIKWID eventset or metric usable")
+	if len(m.groups) == 0 {
+		C.perfmon_finalize()
+		C.topology_finalize()
+		err := errors.New("no LIKWID performance group initialized")
 		cclog.ComponentError(m.name, err.Error())
 		return err
 	}
+	m.basefreq = getBaseFreq()
+	cclog.ComponentDebug(m.name, "BaseFreq", m.basefreq)
 	m.init = true
 	return nil
 }

 // take a measurement for 'interval' seconds of event set index 'group'
-func (m *LikwidCollector) takeMeasurement(evset LikwidEventsetConfig, interval time.Duration) (bool, error) {
+func (m *LikwidCollector) takeMeasurement(group int, interval time.Duration) error {
 	var ret C.int
-	m.lock.Lock()
-	if m.initialized {
-		ret = C.perfmon_setupCounters(evset.gid)
-		if ret != 0 {
-			var err error = nil
-			var skip bool = false
-			if ret == -37 {
-				skip = true
-			} else {
-				err = fmt.Errorf("failed to setup performance group %d", evset.gid)
-			}
-			m.lock.Unlock()
-			return skip, err
-		}
-		ret = C.perfmon_startCounters()
-		if ret != 0 {
-			var err error = nil
-			var skip bool = false
-			if ret == -37 {
-				skip = true
-			} else {
-				err = fmt.Errorf("failed to setup performance group %d", evset.gid)
-			}
-			m.lock.Unlock()
-			return skip, err
-		}
-		m.running = true
-		time.Sleep(interval)
-		m.running = false
-		ret = C.perfmon_stopCounters()
-		if ret != 0 {
-			var err error = nil
-			var skip bool = false
-			if ret == -37 {
-				skip = true
-			} else {
-				err = fmt.Errorf("failed to setup performance group %d", evset.gid)
-			}
-			m.lock.Unlock()
-			return skip, err
-		}
+	gid := m.groups[group]
+	ret = C.perfmon_setupCounters(gid)
+	if ret != 0 {
+		gctr := C.GoString(C.perfmon_getGroupName(gid))
+		err := fmt.Errorf("failed to setup performance group %d (%s)", gid, gctr)
+		return err
 	}
-	m.lock.Unlock()
-	return false, nil
+	ret = C.perfmon_startCounters()
+	if ret != 0 {
+		gctr := C.GoString(C.perfmon_getGroupName(gid))
+		err := fmt.Errorf("failed to start performance group %d (%s)", gid, gctr)
+		return err
+	}
+	m.running = true
+	time.Sleep(interval)
+	m.running = false
+	ret = C.perfmon_stopCounters()
+	if ret != 0 {
+		gctr := C.GoString(C.perfmon_getGroupName(gid))
+		err := fmt.Errorf("failed to stop performance group %d (%s)", gid, gctr)
+		return err
+	}
+	return nil
 }

 // Get all measurement results for an event set, derive the metric values out of the measurement results and send it
-func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interval time.Duration, output chan lp.CCMetric) error {
+func (m *LikwidCollector) calcEventsetMetrics(group int, interval time.Duration, output chan lp.CCMetric) error {
+	var eidx C.int
+	evset := m.config.Eventsets[group]
+	gid := m.groups[group]
 	invClock := float64(1.0 / m.basefreq)

 	// Go over events and get the results
-	for eidx, counter := range evset.eorder {
-		gctr := C.GoString(counter)
+	for eidx = 0; int(eidx) < len(evset.Events); eidx++ {
+		ctr := C.perfmon_getCounterName(gid, eidx)
+		gctr := C.GoString(ctr)
+
 		for _, tid := range m.cpu2tid {
-			res := C.perfmon_getLastResult(evset.gid, C.int(eidx), C.int(tid))
-			fres := float64(res)
-			if m.config.InvalidToZero && (math.IsNaN(fres) || math.IsInf(fres, 0)) {
-				fres = 0.0
+			if tid >= 0 {
+				m.results[group][tid]["time"] = interval.Seconds()
+				m.results[group][tid]["inverseClock"] = invClock
+				res := C.perfmon_getLastResult(gid, eidx, C.int(tid))
+				m.results[group][tid][gctr] = float64(res)
 			}
-			evset.results[tid][gctr] = fres
-			evset.results[tid]["time"] = interval.Seconds()
-			evset.results[tid]["inverseClock"] = invClock
 		}
 	}

 	// Go over the event set metrics, derive the value out of the event:counter values and send it
-	for _, metric := range m.config.Eventsets[evset.internal].Metrics {
+	for _, metric := range evset.Metrics {
 		// The metric scope is determined in the Init() function
 		// Get the map scope-id -> tids
 		scopemap := m.cpu2tid
@@ -348,16 +313,19 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
 			scopemap = m.sock2tid
 		}
 		for domain, tid := range scopemap {
-			if tid >= 0 && len(metric.Calc) > 0 {
-				value, err := agg.EvalFloat64Condition(metric.Calc, evset.results[tid])
+			if tid >= 0 {
+				value, err := agg.EvalFloat64Condition(metric.Calc, m.results[group][tid])
 				if err != nil {
 					cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
+					continue
+				}
+				m.mresults[group][tid][metric.Name] = value
+				if m.config.InvalidToZero && math.IsNaN(value) {
 					value = 0.0
 				}
-				if m.config.InvalidToZero && (math.IsNaN(value) || math.IsInf(value, 0)) {
+				if m.config.InvalidToZero && math.IsInf(value, 0) {
 					value = 0.0
 				}
-				evset.metrics[tid][metric.Name] = value
 				// Now we have the result, send it with the proper tags
 				if !math.IsNaN(value) {
 					if metric.Publish {
@@ -392,8 +360,8 @@ func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration, output chan
 			if tid >= 0 {
 				// Here we generate parameter list
 				params := make(map[string]interface{})
-				for _, evset := range m.likwidGroups {
-					for mname, mres := range evset.metrics[tid] {
+				for j := range m.groups {
+					for mname, mres := range m.mresults[j][tid] {
 						params[mname] = mres
 					}
 				}
@@ -401,12 +369,15 @@ func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration, output chan
 				value, err := agg.EvalFloat64Condition(metric.Calc, params)
 				if err != nil {
 					cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
-					value = 0.0
-				}
-				if m.config.InvalidToZero && (math.IsNaN(value) || math.IsInf(value, 0)) {
-					value = 0.0
+					continue
 				}
 				m.gmresults[tid][metric.Name] = value
+				if m.config.InvalidToZero && math.IsNaN(value) {
+					value = 0.0
+				}
+				if m.config.InvalidToZero && math.IsInf(value, 0) {
+					value = 0.0
+				}
 				// Now we have the result, send it with the proper tags
 				if !math.IsNaN(value) {
 					if metric.Publish {
@@ -430,163 +401,38 @@ func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration, output chan
 	return nil
 }

-func (m *LikwidCollector) LateInit() error {
-	var ret C.int
-	if m.initialized {
-		return nil
-	}
-	switch m.config.AccessMode {
-	case "direct":
-		C.HPMmode(0)
-	case "accessdaemon":
-		if len(m.config.DaemonPath) > 0 {
-			p := os.Getenv("PATH")
-			os.Setenv("PATH", m.config.DaemonPath+":"+p)
-		}
-		C.HPMmode(1)
-	}
-	cclog.ComponentDebug(m.name, "initialize LIKWID topology")
-	ret = C.topology_init()
-	if ret != 0 {
-		err := errors.New("failed to initialize LIKWID topology")
-		cclog.ComponentError(m.name, err.Error())
-		return err
-	}
-
-	m.sock2tid = make(map[int]int)
-	tmp := make([]C.int, 1)
-	for _, sid := range topo.SocketList() {
-		cstr := C.CString(fmt.Sprintf("S%d:0", sid))
-		ret = C.cpustr_to_cpulist(cstr, &tmp[0], 1)
-		if ret > 0 {
-			m.sock2tid[sid] = m.cpu2tid[int(tmp[0])]
-		}
-		C.free(unsafe.Pointer(cstr))
-	}
-
-	m.basefreq = getBaseFreq()
-	cclog.ComponentDebug(m.name, "BaseFreq", m.basefreq)
-
-	cclog.ComponentDebug(m.name, "initialize LIKWID perfmon module")
-	ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])
-	if ret != 0 {
-		var err error = nil
-		C.topology_finalize()
-		if ret != -22 {
-			err = errors.New("failed to initialize LIKWID perfmon")
-			cclog.ComponentError(m.name, err.Error())
-		} else {
-			err = errors.New("access to LIKWID perfmon locked")
-		}
-		return err
-	}
-
-	// While adding the events, we test the metrics whether they can be computed at all
-	for i, evset := range m.config.Eventsets {
-		var gid C.int
-		if len(evset.Events) > 0 {
-			skip := false
-			likwidGroup := genLikwidEventSet(evset)
-			for _, g := range m.likwidGroups {
-				if likwidGroup.go_estr == g.go_estr {
-					skip = true
-					break
-				}
-			}
-			if skip {
-				continue
-			}
-			// Now we add the list of events to likwid
-			gid = C.perfmon_addEventSet(likwidGroup.estr)
-			if gid >= 0 {
-				likwidGroup.gid = gid
-				likwidGroup.internal = i
-				m.likwidGroups[gid] = likwidGroup
-			}
-		} else {
-			cclog.ComponentError(m.name, "Invalid Likwid eventset config, no events given")
-			continue
-		}
-
-	}
-
-	// If no event set could be added, shut down LikwidCollector
-	if len(m.likwidGroups) == 0 {
-		C.perfmon_finalize()
-		C.topology_finalize()
-		err := errors.New("no LIKWID performance group initialized")
-		cclog.ComponentError(m.name, err.Error())
-		return err
-	}
-	sigchan := make(chan os.Signal, 1)
-	signal.Notify(sigchan, syscall.SIGCHLD)
-	signal.Notify(sigchan, os.Interrupt)
-	go func() {
-		<-sigchan
-
-		signal.Stop(sigchan)
-		m.initialized = false
-	}()
-	m.initialized = true
-	return nil
-}
-
 // main read function taking multiple measurement rounds, each 'interval' seconds long
 func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric) {
-	var skip bool = false
-	var err error
 	if !m.init {
 		return
 	}

-	if !m.initialized {
-		m.lock.Lock()
-		err = m.LateInit()
+	for i := range m.groups {
+		// measure event set 'i' for 'interval' seconds
+		err := m.takeMeasurement(i, interval)
 		if err != nil {
-			m.lock.Unlock()
+			cclog.ComponentError(m.name, err.Error())
 			return
 		}
-		m.initialized = true
-		m.lock.Unlock()
-	}
-
-	if m.initialized && !skip {
-		for _, evset := range m.likwidGroups {
-			if !skip {
-				// measure event set 'i' for 'interval' seconds
-				skip, err = m.takeMeasurement(evset, interval)
-				if err != nil {
-					cclog.ComponentError(m.name, err.Error())
-					return
-				}
-			}
-
-			if !skip {
-				// read measurements and derive event set metrics
-				m.calcEventsetMetrics(evset, interval, output)
-			}
-		}
-		if !skip {
-			// use the event set metrics to derive the global metrics
-			m.calcGlobalMetrics(interval, output)
-		}
+		// read measurements and derive event set metrics
+		m.calcEventsetMetrics(i, interval, output)
 	}
+	// use the event set metrics to derive the global metrics
+	m.calcGlobalMetrics(interval, output)
 }

 func (m *LikwidCollector) Close() {
 	if m.init {
-		m.init = false
 		cclog.ComponentDebug(m.name, "Closing ...")
-		m.lock.Lock()
-		if m.initialized {
-			cclog.ComponentDebug(m.name, "Finalize LIKWID perfmon module")
-			C.perfmon_finalize()
-			m.initialized = false
+		m.init = false
+		if m.running {
+			cclog.ComponentDebug(m.name, "Stopping counters")
+			C.perfmon_stopCounters()
 		}
-		m.lock.Unlock()
+		cclog.ComponentDebug(m.name, "Finalize LIKWID perfmon module")
+		C.perfmon_finalize()
 		cclog.ComponentDebug(m.name, "Finalize LIKWID topology module")
 		C.topology_finalize()
-
 		cclog.ComponentDebug(m.name, "Closing done")
 	}
 }
--- a/collectors/likwidMetric.md
+++ b/collectors/likwidMetric.md
@@ -3,63 +3,32 @@

 The `likwid` collector is probably the most complicated collector. The LIKWID library is included as static library with *direct* access mode. The *direct* access mode is suitable if the daemon is executed by a root user. The static library does not contain the performance groups, so all information needs to be provided in the configuration.

-```json
-  "likwid": {
-    "force_overwrite" : false,
-    "invalid_to_zero" : false,
-    "eventsets": [
-      {
-        "events" : {
-          "COUNTER0": "EVENT0",
-          "COUNTER1": "EVENT1",
-        },
-        "metrics" : [
-          {
-            "name": "sum_01",
-            "calc": "COUNTER0 + COUNTER1",
-            "publish": false,
-            "unit": "myunit",
-            "type": "cpu"
-          }
-        ]
-      }
-    ]
-    "globalmetrics" : [
-      {
-        "name": "global_sum",
-        "calc": "sum_01",
-        "publish": true,
-        "unit": "myunit",
-        "type": "cpu"
-      }
-    ]
-  }
-```
-
-The `likwid` configuration consists of two parts, the `eventsets` and `globalmetrics`:
- An event set list itself has two parts, the `events` and a set of derivable `metrics`. Each of the `events` is a `counter:event` pair in LIKWID's syntax. The `metrics` are a list of formulas to derive the metric value from the measurements of the `events`' values. Each metric has a name, the formula, a type and a publish flag. There is an optional `unit` field. Counter names can be used like variables in the formulas, so `PMC0+PMC1` sums the measurements for the both events configured in the counters `PMC0` and `PMC1`. You can optionally use `time` for the measurement time and `inverseClock` for `1.0/baseCpuFrequency`. The type tells the LikwidCollector whether it is a metric for each hardware thread (`cpu`) or each CPU socket (`socket`). You may specify a unit for the metric with `unit`. The last one is the publishing flag. It tells the LikwidCollector whether a metric should be sent to the router or is only used internally to compute a global metric.
- The `globalmetrics` are metrics which require data from multiple event set measurements to be derived. The inputs are the metrics in the event sets. Similar to the metrics in the event sets, the global metrics are defined by a name, a formula, a scope and a publish flag. See event set metrics for details. The only difference is that there is no access to the raw event measurements anymore but only to the metrics. Also `time` and `inverseClock` cannot be used anymore. So, the idea is to derive a metric in the `eventsets` section and reuse it in the `globalmetrics` part. If you need a metric only for deriving the global metrics, disable forwarding of the event set metrics (`"publish": false`). **Be aware** that the combination might be misleading because the "behavior" of a metric changes over time and the multiple measurements might count different computing phases. Similar to the metrics in the eventset, you can specify a metric unit with the `unit` field.
+The `likwid` configuration consists of two parts, the "eventsets" and "globalmetrics":
+- An event set list itself has two parts, the "events" and a set of derivable "metrics". Each of the "events" is a counter:event pair in LIKWID's syntax. The "metrics" are a list of formulas to derive the metric value from the measurements of the "events". Each metric has a name, the formula, a scope and a publish flag. Counter names can be used like variables in the formulas, so `PMC0+PMC1` sums the measurements for the both events configured in the counters `PMC0` and `PMC1`. The scope tells the Collector whether it is a metric for each hardware thread (`cpu`) or each CPU socket (`socket`). You may specify a unit for the metric with `unit`. The last one is the publishing flag. It tells the collector whether a metric should be sent to the router.
+- The global metrics are metrics which require data from all event set measurements to be derived. The inputs are the metrics in the event sets. Similar to the metrics in the event sets, the global metrics are defined by a name, a formula, a scope and a publish flag. See event set metrics for details. The only difference is that there is no access to the raw event measurements anymore but only to the metrics. So, the idea is to derive a metric in the "eventsets" section and reuse it in the "globalmetrics" part. If you need a metric only for deriving the global metrics, disable forwarding of the event set metrics (`publish=false`). **Be aware** that the combination might be misleading because the "behavior" of a metric changes over time and the multiple measurements might count different computing phases. Similar to the metrics in the eventset, you can specify a metric unit with the `unit` field.

 Additional options:
+- `access_mode` : Method to use for hardware performance monitoring (`direct` access as root user, `accessdaemon` for the daemon mode)
+- `accessdaemon_path`: Folder with the access daemon `likwid-accessD`, commonly `$LIKWID_INSTALL_LOC/sbin`
 - `force_overwrite`: Same as setting `LIKWID_FORCE=1`. In case counters are already in-use, LIKWID overwrites their configuration to do its measurements
- `invalid_to_zero`: In some cases, the calculations result in `NaN` or `Inf`. With this option, all `NaN` and `Inf` values are replaces with `0.0`. See below in [seperate section](./likwidMetric.md#invalid_to_zero-option)
- `access_mode`: Specify LIKWID access mode: `direct` for direct register access as root user or `accessdaemon`. The access mode `perf_event` is current untested.
- `accessdaemon_path`: Folder of the accessDaemon `likwid-accessD` (like `/usr/local/sbin`)
- `liblikwid_path`: Location of `liblikwid.so` including file name like `/usr/local/lib/liblikwid.so`
+- `invalid_to_zero`: In some cases, the calculations result in `NaN` or `Inf`. With this option, all `NaN` and `Inf` values are replaces with `0.0`.
+- `access_mode`: Specify LIKWID access mode: `direct` for direct register access as root user or `accessdaemon`
+- `accessdaemon_path`: Folder of the accessDaemon `likwid-accessD`
+- `liblikwid_path`: Location of `liblikwid.so`

 ### Available metric scopes

-Hardware performance counters are scattered all over the system nowadays. A counter coveres a specific part of the system. While there are hardware thread specific counter for CPU cycles, instructions and so on, some others are specific for a whole CPU socket/package. To address that, the LikwidCollector provides the specification of a `type` for each metric.
+Hardware performance counters are scattered all over the system nowadays. A counter coveres a specific part of the system. While there are hardware thread specific counter for CPU cycles, instructions and so on, some others are specific for a whole CPU socket/package. To address that, the collector provides the specification of a 'scope' for each metric.

 - `cpu` : One metric per CPU hardware thread with the tags `"type" : "cpu"` and `"type-id" : "$cpu_id"`
 - `socket` : One metric per CPU socket/package with the tags `"type" : "socket"` and `"type-id" : "$socket_id"`

-**Note:** You should not specify the `socket` type for a metric that is measured at `cpu` scope and vice versa, so some kind of expert knowledge or lookup work in the [Likwid Wiki](https://github.com/RRZE-HPC/likwid/wiki) is required. Get the scope of each counter from the *Architecture* pages and as soon as one counter in a metric is socket-specific, the whole metric is socket-specific.
+**Note:** You cannot specify `socket` scope for a metric that is measured at `cpu` scope, so some kind of expert knowledge or lookup work in the [Likwid Wiki](https://github.com/RRZE-HPC/likwid/wiki) is required. Get the scope of each counter from the *Architecture* pages and as soon as one counter in a metric is socket-specific, the whole metric is socket-specific.

 As a guideline:
 - All counters `FIXCx`, `PMCy` and `TMAz` have the scope `cpu`
 - All counters names containing `BOX` have the scope `socket`
- All `PWRx` counters have scope `socket`, except `"PWR1" : "RAPL_CORE_ENERGY"` has `cpu` scope (AMD Zen)
+- All `PWRx` counters have scope `socket`, except `"PWR1" : "RAPL_CORE_ENERGY"` has `cpu` scope
 - All `DFCx` counters have scope `socket`

 ### Help with the configuration
@@ -81,7 +50,6 @@ $ scripts/likwid_perfgroup_to_cc_config.py ICX MEM_DP
 {
  "events": {
    "FIXC0": "INSTR_RETIRED_ANY",
-    "FIXC1": "CPU_CLK_UNHALTED_CORE",
    "..." : "..."
  },
  "metrics" : [
@@ -107,28 +75,21 @@ LIKWID checks the file `/var/run/likwid.lock` before performing any interfering

 Before (SLURM prolog, ...)
 ```
-$ chown $JOBUSER /var/run/likwid.lock
+$ chwon $JOBUSER /var/run/likwid.lock
 ```

 After (SLURM epilog, ...)
 ```
-$ chown $CCUSER /var/run/likwid.lock
+$ chwon $CCUSER /var/run/likwid.lock
 ```

-### `invalid_to_zero` option
-In some cases LIKWID returns `0.0` for some events that are further used in processing and maybe used as divisor in a calculation. After evaluation of a metric, the result might be `NaN` or `+-Inf`. These resulting metrics are commonly not created and forwarded to the router because the [InfluxDB line protocol](https://docs.influxdata.com/influxdb/cloud/reference/syntax/line-protocol/#float) does not support these special floating-point values. If you want to have them sent, this option forces these metric values to be `0.0` instead.
-
-One might think this does not happen often but often used metrics in the world of performance engineering like Instructions-per-Cycle (IPC) or more frequently the actual CPU clock are derived with events like `CPU_CLK_UNHALTED_CORE` (Intel) which do not increment in halted state (as the name implies). In there are different power management systems in a chip which can cause a hardware thread to go in such a state. Moreover, if no cycles are executed by the core, also many other events are not incremented as well (like `INSTR_RETIRED_ANY` for retired instructions and part of IPC).
-
-
 ### Example configuration

-#### AMD Zen3

 ```json
  "likwid": {
    "force_overwrite" : false,
-    "invalid_to_zero" : false,
+    "nan_to_zero" : false,
    "eventsets": [
      {
        "events": {
@@ -219,3 +180,33 @@ One might think this does not happen often but often used metrics in the world o
  }
 ```

+### How to get the eventsets and metrics from LIKWID
+
+The `likwid` collector reads hardware performance counters at a **cpu** and **socket** level. The configuration looks quite complicated but it is basically copy&paste from [LIKWID's performance groups](https://github.com/RRZE-HPC/likwid/tree/master/groups). The collector made multiple iterations and tried to use the performance groups but it lacked flexibility. The current way of configuration provides most flexibility.
+
+The logic is as following: There are multiple eventsets, each consisting of a list of counters+events and a list of metrics. If you compare a common performance group with the example setting above, there is not much difference:
+```
+EVENTSET                         ->   "events": {
+FIXC1 ACTUAL_CPU_CLOCK           ->     "FIXC1": "ACTUAL_CPU_CLOCK",
+FIXC2 MAX_CPU_CLOCK              ->     "FIXC2": "MAX_CPU_CLOCK",
+PMC0  RETIRED_INSTRUCTIONS       ->     "PMC0" : "RETIRED_INSTRUCTIONS",
+PMC1  CPU_CLOCKS_UNHALTED        ->     "PMC1" : "CPU_CLOCKS_UNHALTED",
+PMC2  RETIRED_SSE_AVX_FLOPS_ALL  ->     "PMC2": "RETIRED_SSE_AVX_FLOPS_ALL",
+PMC3  MERGE                      ->     "PMC3": "MERGE",
+                                 ->   }
+```
+
+The metrics are following the same procedure:
+
+```
+METRICS                          ->   "metrics": [
+IPC   PMC0/PMC1                  ->     {
+                                 ->       "name" : "IPC",
+                                 ->       "calc" : "PMC0/PMC1",
+                                 ->       "scope": "cpu",
+                                 ->       "publish": true
+                                 ->     }
+                                 ->   ]
+```
+
+The script `scripts/likwid_perfgroup_to_cc_config.py` might help you.
--- a/collectors/memstatMetric.go
+++ b/collectors/memstatMetric.go
@@ -40,13 +40,8 @@ type MemstatCollector struct {
 	sendMemUsed bool
 }

-type MemstatStats struct {
-	value float64
-	unit  string
-}
-
-func getStats(filename string) map[string]MemstatStats {
-	stats := make(map[string]MemstatStats)
+func getStats(filename string) map[string]float64 {
+	stats := make(map[string]float64)
 	file, err := os.Open(filename)
 	if err != nil {
 		cclog.Error(err.Error())
@@ -60,18 +55,12 @@ func getStats(filename string) map[string]MemstatStats {
 		if len(linefields) == 3 {
 			v, err := strconv.ParseFloat(linefields[1], 64)
 			if err == nil {
-				stats[strings.Trim(linefields[0], ":")] = MemstatStats{
-					value: v,
-					unit:  linefields[2],
-				}
+				stats[strings.Trim(linefields[0], ":")] = v
 			}
 		} else if len(linefields) == 5 {
 			v, err := strconv.ParseFloat(linefields[3], 64)
 			if err == nil {
-				stats[strings.Trim(linefields[0], ":")] = MemstatStats{
-					value: v,
-					unit:  linefields[4],
-				}
+				stats[strings.Trim(linefields[0], ":")] = v
 			}
 		}
 	}
@@ -89,7 +78,7 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
 			return err
 		}
 	}
-	m.meta = map[string]string{"source": m.name, "group": "Memory"}
+	m.meta = map[string]string{"source": m.name, "group": "Memory", "unit": "GByte"}
 	m.stats = make(map[string]int64)
 	m.matches = make(map[string]string)
 	m.tags = map[string]string{"type": "node"}
@@ -162,51 +151,30 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
 		return
 	}

-	sendStats := func(stats map[string]MemstatStats, tags map[string]string) {
+	sendStats := func(stats map[string]float64, tags map[string]string) {
 		for match, name := range m.matches {
 			var value float64 = 0
-			var unit string = ""
 			if v, ok := stats[match]; ok {
-				value = v.value
-				if len(v.unit) > 0 {
-					unit = v.unit
-				}
+				value = v
 			}
-
-			y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value}, time.Now())
+			y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value * 1e-6}, time.Now())
 			if err == nil {
-				if len(unit) > 0 {
-					y.AddMeta("unit", unit)
-				}
 				output <- y
 			}
 		}
 		if m.sendMemUsed {
 			memUsed := 0.0
-			unit := ""
 			if totalVal, total := stats["MemTotal"]; total {
 				if freeVal, free := stats["MemFree"]; free {
 					if bufVal, buffers := stats["Buffers"]; buffers {
 						if cacheVal, cached := stats["Cached"]; cached {
-							memUsed = totalVal.value - (freeVal.value + bufVal.value + cacheVal.value)
-							if len(totalVal.unit) > 0 {
-								unit = totalVal.unit
-							} else if len(freeVal.unit) > 0 {
-								unit = freeVal.unit
-							} else if len(bufVal.unit) > 0 {
-								unit = bufVal.unit
-							} else if len(cacheVal.unit) > 0 {
-								unit = cacheVal.unit
-							}
+							memUsed = totalVal - (freeVal + bufVal + cacheVal)
 						}
 					}
 				}
 			}
-			y, err := lp.New("mem_used", tags, m.meta, map[string]interface{}{"value": memUsed}, time.Now())
+			y, err := lp.New("mem_used", tags, m.meta, map[string]interface{}{"value": memUsed * 1e-6}, time.Now())
 			if err == nil {
-				if len(unit) > 0 {
-					y.AddMeta("unit", unit)
-				}
 				output <- y
 			}
 		}
--- a/collectors/nfsMetric.go
+++ b/collectors/nfsMetric.go
@@ -36,7 +36,7 @@ type nfsCollector struct {
 }

 func (m *nfsCollector) initStats() error {
-	cmd := exec.Command(m.config.Nfsstats, `-l`, `--all`)
+	cmd := exec.Command(m.config.Nfsstats, `-l`)
 	cmd.Wait()
 	buffer, err := cmd.Output()
 	if err == nil {
@@ -52,7 +52,7 @@ func (m *nfsCollector) initStats() error {
 					if err == nil {
 						x := m.data[name]
 						x.current = value
-						x.last = value
+						x.last = 0
 						m.data[name] = x
 					}
 				}
@@ -63,7 +63,7 @@ func (m *nfsCollector) initStats() error {
 }

 func (m *nfsCollector) updateStats() error {
-	cmd := exec.Command(m.config.Nfsstats, `-l`, `--all`)
+	cmd := exec.Command(m.config.Nfsstats, `-l`)
 	cmd.Wait()
 	buffer, err := cmd.Output()
 	if err == nil {
--- a/go.mod
+++ b/go.mod
@@ -3,14 +3,17 @@ module github.com/ClusterCockpit/cc-metric-collector
 go 1.16

 require (
-	github.com/NVIDIA/go-nvml v0.11.6-0
-	github.com/PaesslerAG/gval v1.1.2
-	github.com/gorilla/mux v1.8.0
-	github.com/influxdata/influxdb-client-go/v2 v2.8.1
+	github.com/NVIDIA/go-nvml v0.11.1-0
+	github.com/influxdata/influxdb-client-go/v2 v2.7.0
 	github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
-	github.com/nats-io/nats-server/v2 v2.8.0 // indirect
-	github.com/nats-io/nats.go v1.14.0
-	github.com/prometheus/client_golang v1.12.1
-	github.com/stmcginnis/gofish v0.13.0
-	golang.org/x/sys v0.0.0-20220412211240-33da011f77ad
+	github.com/nats-io/nats.go v1.13.1-0.20211122170419-d7c1d78a50fc
+	golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
+	gopkg.in/Knetic/govaluate.v2 v2.3.0
+)
+
+require (
+	github.com/PaesslerAG/gval v1.1.2
+	github.com/golang/protobuf v1.5.2 // indirect
+	github.com/nats-io/nats-server/v2 v2.7.0 // indirect
+	google.golang.org/protobuf v1.27.1 // indirect
 )
--- a/internal/ccTopology/ccTopology.go
+++ b/internal/ccTopology/ccTopology.go
@@ -169,10 +169,7 @@ func DieList() []int {
 			}
 		}
 	}
-	if len(dielist) > 0 {
-		return dielist
-	}
-	return SocketList()
+	return dielist
 }

 type CpuEntry struct {
@@ -264,7 +261,7 @@ func CpuData() []CpuEntry {
 	for _, c := range CpuList() {
 		clist = append(clist, CpuEntry{Cpuid: c})
 	}
-	for i, centry := range clist {
+	for _, centry := range clist {
 		centry.Socket = -1
 		centry.Numadomain = -1
 		centry.Die = -1
@@ -292,8 +289,6 @@ func CpuData() []CpuEntry {
 		// Lookup NUMA domain id
 		centry.Numadomain = getNumaDomain(base)

-		// Update values in output list
-		clist[i] = centry
 	}
 	return clist
 }
--- a/internal/metricRouter/README.md
+++ b/internal/metricRouter/README.md
@@ -8,8 +8,6 @@ The CCMetric router sits in between the collectors and the sinks and can be used
 {
    "num_cache_intervals" : 1,
    "interval_timestamp" : true,
-    "hostname_tag" : "hostname",
-    "max_forward" : 50,
    "add_tags" : [
        {
            "key" : "cluster",
@@ -57,20 +55,6 @@ The CCMetric router sits in between the collectors and the sinks and can be used
 ```

 There are three main options `add_tags`, `delete_tags` and `interval_timestamp`. `add_tags` and `delete_tags` are lists consisting of dicts with `key`, `value` and `if`. The `value` can be omitted in the `delete_tags` part as it only uses the `key` for removal. The `interval_timestamp` setting means that a unique timestamp is applied to all metrics traversing the router during an interval.
-
-# Processing order in the router
-
- Add the `hostname_tag` tag (if sent by collectors or cache)
- If `interval_timestamp == true`, change time of metrics
- Check if metric should be dropped (`drop_metrics` and `drop_metrics_if`)
- Add tags from `add_tags`
- Delete tags from `del_tags`
- Rename metric based on `rename_metrics` and store old name as `oldname` in meta information
- Add tags from `add_tags` (if you used the new name in the `if` condition)
- Delete tags from `del_tags` (if you used the new name in the `if` condition)
- Send to sinks
- Move to cache (if `num_cache_intervals > 0`)
-
 # The `interval_timestamp` option

 The collectors' `Read()` functions are not called simultaneously and therefore the metrics gathered in an interval can have different timestamps. If you want to avoid that and have a common timestamp (the beginning of the interval), set this option to `true` and the MetricRouter sets the time.
@@ -81,14 +65,6 @@ If the MetricRouter should buffer metrics of intervals in a MetricCache, this op

 A `num_cache_intervals > 0` is required to use the `interval_aggregates` option.

-# The `hostname_tag` option
-
-By default, the router tags metrics with the hostname for all locally created metrics. The default tag name is `hostname`, but it can be changed if your organization wants anything else
-
-# The `max_forward` option
-
-Every time the router receives a metric through any of the channels, it tries to directly read up to `max_forward` metrics from the same channel. This was done as the router thread would go to sleep and wake up with every arriving metric. The default are `50` metrics at once and `max_forward` needs to greater than `1`.
-
 # The `rename_metrics` option

 In the ClusterCockpit world we specified a set of standard metrics. Since some collectors determine the metric names based on files, execuables and libraries, they might change from system to system (or installation to installtion, OS to OS, ...). In order to get the common names, you can rename incoming metrics before sending them to the sink. If the metric name matches the `oldname`, it is changed to `newname`
--- a/internal/metricRouter/metricRouter.go
+++ b/internal/metricRouter/metricRouter.go
@@ -48,6 +48,7 @@ type metricRouter struct {
 	done        chan bool           // channel to finish / stop metric router
 	wg          *sync.WaitGroup     // wait group for all goroutines in cc-metric-collector
 	timestamp   time.Time           // timestamp periodically updated by ticker each interval
+	timerdone   chan bool           // channel to finish / stop timestamp updater
 	ticker      mct.MultiChanTicker // periodically ticking once each interval
 	config      metricRouterConfig  // json encoded config for metric router
 	cache       MetricCache         // pointer to MetricCache
@@ -102,10 +103,7 @@ func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, rout
 		cclog.ComponentError("MetricRouter", err.Error())
 		return err
 	}
-	r.maxForward = 1
-	if r.config.MaxForward > r.maxForward {
-		r.maxForward = r.config.MaxForward
-	}
+	r.maxForward = r.config.MaxForward
 	if r.config.NumCacheIntervals > 0 {
 		r.cache, err = NewCache(r.cache_input, r.ticker, &r.cachewg, r.config.NumCacheIntervals)
 		if err != nil {
@@ -123,6 +121,29 @@ func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, rout
 	return nil
 }

+// StartTimer starts a timer which updates timestamp periodically
+func (r *metricRouter) StartTimer() {
+	m := make(chan time.Time)
+	r.ticker.AddChannel(m)
+	r.timerdone = make(chan bool)
+
+	r.wg.Add(1)
+	go func() {
+		defer r.wg.Done()
+		for {
+			select {
+			case <-r.timerdone:
+				close(r.timerdone)
+				cclog.ComponentDebug("MetricRouter", "TIMER DONE")
+				return
+			case t := <-m:
+				r.timestamp = t
+			}
+		}
+	}()
+	cclog.ComponentDebug("MetricRouter", "TIMER START")
+}
+
 func getParamMap(point lp.CCMetric) map[string]interface{} {
 	params := make(map[string]interface{})
 	params["metric"] = point
@@ -211,9 +232,8 @@ func (r *metricRouter) dropMetric(point lp.CCMetric) bool {
 func (r *metricRouter) Start() {
 	// start timer if configured
 	r.timestamp = time.Now()
-	timeChan := make(chan time.Time)
 	if r.config.IntervalStamp {
-		r.ticker.AddChannel(timeChan)
+		r.StartTimer()
 	}

 	// Router manager is done
@@ -293,10 +313,6 @@ func (r *metricRouter) Start() {
 				done()
 				return

-			case timestamp := <-timeChan:
-				r.timestamp = timestamp
-				cclog.ComponentDebug("MetricRouter", "Update timestamp", r.timestamp.UnixNano())
-
 			case p := <-r.coll_input:
 				coll_forward(p)
 				for i := 0; len(r.coll_input) > 0 && i < (r.maxForward-1); i++ {
@@ -342,6 +358,14 @@ func (r *metricRouter) Close() {
 	// wait for close of channel r.done
 	<-r.done

+	// stop timer
+	if r.config.IntervalStamp {
+		cclog.ComponentDebug("MetricRouter", "TIMER CLOSE")
+		r.timerdone <- true
+		// wait for close of channel r.timerdone
+		<-r.timerdone
+	}
+
 	// stop metric cache
 	if r.config.NumCacheIntervals > 0 {
 		cclog.ComponentDebug("MetricRouter", "CACHE CLOSE")
--- a/receivers.json
+++ b/receivers.json
@@ -4,22 +4,5 @@
        "address": "nats://my-url",
        "port" : "4222",
        "database": "testcluster"
-    },
-    "redfish_recv": {
-        "type": "redfish",
-        "client_config": [
-            {
-                "hostname": "my-host-1",
-                "username": "username-1",
-                "password": "password-1",
-                "endpoint": "https://my-endpoint-1"
-            },
-            {
-                "hostname": "my-host-2",
-                "username": "username-2",
-                "password": "password-2",
-                "endpoint": "https://my-endpoint-2"
-            }
-        ]
    }
 }
--- a/receivers/receiveManager.go
+++ b/receivers/receiveManager.go
@@ -10,13 +10,14 @@ import (
 )

 var AvailableReceivers = map[string]func(name string, config json.RawMessage) (Receiver, error){
-	"nats":    NewNatsReceiver,
-	"redfish": NewRedfishReceiver,
+	"nats": NewNatsReceiver,
 }

 type receiveManager struct {
 	inputs []Receiver
 	output chan lp.CCMetric
+	done   chan bool
+	wg     *sync.WaitGroup
 	config []json.RawMessage
 }

@@ -32,6 +33,8 @@ func (rm *receiveManager) Init(wg *sync.WaitGroup, receiverConfigFile string) er
 	// Initialize struct fields
 	rm.inputs = make([]Receiver, 0)
 	rm.output = nil
+	rm.done = make(chan bool)
+	rm.wg = wg
 	rm.config = make([]json.RawMessage, 0)

 	configFile, err := os.Open(receiverConfigFile)
@@ -55,7 +58,7 @@ func (rm *receiveManager) Init(wg *sync.WaitGroup, receiverConfigFile string) er
 }

 func (rm *receiveManager) Start() {
-	cclog.ComponentDebug("ReceiveManager", "START")
+	rm.wg.Add(1)

 	for _, r := range rm.inputs {
 		cclog.ComponentDebug("ReceiveManager", "START", r.Name())
@@ -94,19 +97,16 @@ func (rm *receiveManager) AddOutput(output chan lp.CCMetric) {
 }

 func (rm *receiveManager) Close() {
-	cclog.ComponentDebug("ReceiveManager", "CLOSE")
-
-	// Close all receivers
 	for _, r := range rm.inputs {
 		cclog.ComponentDebug("ReceiveManager", "CLOSE", r.Name())
 		r.Close()
 	}
-
-	cclog.ComponentDebug("ReceiveManager", "DONE")
+	rm.wg.Done()
+	cclog.ComponentDebug("ReceiveManager", "CLOSE")
 }

 func New(wg *sync.WaitGroup, receiverConfigFile string) (ReceiveManager, error) {
-	r := new(receiveManager)
+	r := &receiveManager{}
 	err := r.Init(wg, receiverConfigFile)
 	if err != nil {
 		return nil, err
--- a/receivers/redfishReceiver.go
+++ b/receivers/redfishReceiver.go
@@ -1,324 +0,0 @@
-package receivers
-
-import (
-	"encoding/json"
-	"fmt"
-	"strconv"
-	"sync"
-	"time"
-
-	cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
-	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
-
-	// See: https://pkg.go.dev/github.com/stmcginnis/gofish
-	"github.com/stmcginnis/gofish"
-)
-
-// RedfishReceiver configuration:
-type RedfishReceiver struct {
-	receiver
-	config struct {
-		Type     string `json:"type"`
-		Fanout   int    `json:"fanout,omitempty"`   // Default fanout: 64
-		Interval int    `json:"interval,omitempty"` // Default interval: 30s
-
-		// Client config for each redfish service
-		ClientConfigs []struct {
-			Hostname       *string  `json:"hostname"`
-			Username       *string  `json:"username"`
-			Password       *string  `json:"password"`
-			Endpoint       *string  `json:"endpoint"`
-			Insecure       *bool    `json:"insecure,omitempty"`
-			ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
-			gofish         gofish.ClientConfig
-		} `json:"client_config"`
-	}
-
-	done chan bool      // channel to finish / stop redfish receiver
-	wg   sync.WaitGroup // wait group for redfish receiver
-}
-
-// Start starts the redfish receiver
-func (r *RedfishReceiver) Start() {
-	cclog.ComponentDebug(r.name, "START")
-
-	// readPowerMetric reads readfish power metric from the endpoint configured in conf
-	readPowerMetric := func(clientConfigIndex int) error {
-
-		clientConfig := &r.config.ClientConfigs[clientConfigIndex]
-
-		// Connect to redfish service
-		c, err := gofish.Connect(clientConfig.gofish)
-		if err != nil {
-			c := struct {
-				Username  string
-				Endpoint  string
-				BasicAuth bool
-				Insecure  bool
-			}{
-				Username:  clientConfig.gofish.Username,
-				Endpoint:  clientConfig.gofish.Endpoint,
-				BasicAuth: clientConfig.gofish.BasicAuth,
-				Insecure:  clientConfig.gofish.Insecure,
-			}
-			return fmt.Errorf("readPowerMetric: gofish.Connect(%+v) failed: %v", c, err)
-		}
-		defer c.Logout()
-
-		// Get all chassis managed by this service
-		chassis_list, err := c.Service.Chassis()
-		if err != nil {
-			return fmt.Errorf("readPowerMetric: c.Service.Chassis() failed: %v", err)
-		}
-
-		for _, chassis := range chassis_list {
-			timestamp := time.Now()
-
-			// Get power information for each chassis
-			power, err := chassis.Power()
-			if err != nil {
-				return fmt.Errorf("readPowerMetric: chassis.Power() failed: %v", err)
-			}
-			if power == nil {
-				continue
-			}
-
-			// Read min, max and average consumed watts for each power control
-			for _, pc := range power.PowerControl {
-
-				// Map of collected metrics
-				metrics := map[string]float32{
-					// PowerConsumedWatts shall represent the actual power being consumed (in
-					// Watts) by the chassis
-					"consumed_watts": pc.PowerConsumedWatts,
-					// AverageConsumedWatts shall represent the
-					// average power level that occurred averaged over the last IntervalInMin
-					// minutes.
-					"average_consumed_watts": pc.PowerMetrics.AverageConsumedWatts,
-					// MinConsumedWatts shall represent the
-					// minimum power level in watts that occurred within the last
-					// IntervalInMin minutes.
-					"min_consumed_watts": pc.PowerMetrics.MinConsumedWatts,
-					// MaxConsumedWatts shall represent the
-					// maximum power level in watts that occurred within the last
-					// IntervalInMin minutes
-					"max_consumed_watts": pc.PowerMetrics.MaxConsumedWatts,
-				}
-				intervalInMin := strconv.FormatFloat(float64(pc.PowerMetrics.IntervalInMin), 'f', -1, 32)
-
-				// Metrics to exclude
-				for _, key := range clientConfig.ExcludeMetrics {
-					delete(metrics, key)
-				}
-
-				// Set tags
-				tags := map[string]string{
-					"hostname": *clientConfig.Hostname,
-					"type":     "node",
-					// ID uniquely identifies the resource
-					"id": pc.ID,
-					// MemberID shall uniquely identify the member within the collection. For
-					// services supporting Redfish v1.6 or higher, this value shall be the
-					// zero-based array index.
-					"member_id": pc.MemberID,
-					// PhysicalContext shall be a description of the affected device(s) or region
-					// within the chassis to which this power control applies.
-					"physical_context": string(pc.PhysicalContext),
-					// Name
-					"power_control_name": pc.Name,
-				}
-
-				// Delete empty tags
-				for key, value := range tags {
-					if value == "" {
-						delete(tags, key)
-					}
-				}
-
-				// Set meta data tags
-				meta := map[string]string{
-					"source":              r.name,
-					"group":               "Energy",
-					"interval_in_minutes": intervalInMin,
-					"unit":                "watts",
-				}
-
-				// Delete empty meta data tags
-				for key, value := range meta {
-					if value == "" {
-						delete(meta, key)
-					}
-				}
-
-				for name, value := range metrics {
-
-					y, err := lp.New(name, tags, meta,
-						map[string]interface{}{
-							"value": value,
-						},
-						timestamp)
-					if err == nil {
-						r.sink <- y
-					}
-				}
-			}
-		}
-
-		return nil
-	}
-
-	// doReadPowerMetric read power metrics for all configure redfish services.
-	// To compensate latencies of the Redfish services a fanout is used.
-	doReadPowerMetric := func() {
-
-		// Compute fanout to use
-		realFanout := r.config.Fanout
-		if len(r.config.ClientConfigs) < realFanout {
-			realFanout = len(r.config.ClientConfigs)
-		}
-
-		// Create wait group and input channel for workers
-		var workerWaitGroup sync.WaitGroup
-		workerInput := make(chan int, realFanout)
-
-		// Create worker go routines
-		for i := 0; i < realFanout; i++ {
-			// Increment worker wait group counter
-			workerWaitGroup.Add(1)
-			go func() {
-				// Decrement worker wait group counter
-				defer workerWaitGroup.Done()
-
-				// Read power metrics for each client config
-				for clientConfigIndex := range workerInput {
-					err := readPowerMetric(clientConfigIndex)
-					if err != nil {
-						cclog.ComponentError(r.name, err)
-					}
-				}
-			}()
-		}
-
-		// Distribute client configs to workers
-		for i := range r.config.ClientConfigs {
-			// Check done channel status
-			select {
-			case workerInput <- i:
-			case <-r.done:
-				// process done event
-				// Stop workers, clear channel and wait for all workers to finish
-				close(workerInput)
-				for range workerInput {
-				}
-				workerWaitGroup.Wait()
-				return
-			}
-		}
-
-		// Stop workers and wait for all workers to finish
-		close(workerInput)
-		workerWaitGroup.Wait()
-	}
-
-	// Start redfish receiver
-	r.wg.Add(1)
-	go func() {
-		defer r.wg.Done()
-
-		// Create ticker
-		ticker := time.NewTicker(time.Duration(r.config.Interval) * time.Second)
-		defer ticker.Stop()
-
-		for {
-			doReadPowerMetric()
-
-			select {
-			case <-ticker.C:
-				// process ticker event -> continue
-				continue
-			case <-r.done:
-				// process done event
-				return
-			}
-		}
-	}()
-
-	cclog.ComponentDebug(r.name, "STARTED")
-}
-
-// Close redfish receiver
-func (r *RedfishReceiver) Close() {
-	cclog.ComponentDebug(r.name, "CLOSE")
-
-	// Send the signal and wait
-	close(r.done)
-	r.wg.Wait()
-
-	cclog.ComponentDebug(r.name, "DONE")
-}
-
-// New function to create a new instance of the receiver
-// Initialize the receiver by giving it a name and reading in the config JSON
-func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
-	r := new(RedfishReceiver)
-
-	// Set name
-	r.name = fmt.Sprintf("RedfishReceiver(%s)", name)
-
-	// Create done channel
-	r.done = make(chan bool)
-
-	// Set defaults in r.config
-	// Allow overwriting these defaults by reading config JSON
-	r.config.Fanout = 64
-	r.config.Interval = 30
-
-	// Read the redfish receiver specific JSON config
-	if len(config) > 0 {
-		err := json.Unmarshal(config, &r.config)
-		if err != nil {
-			cclog.ComponentError(r.name, "Error reading config:", err.Error())
-			return nil, err
-		}
-	}
-
-	// Create gofish client config
-	for i := range r.config.ClientConfigs {
-		clientConfig := &r.config.ClientConfigs[i]
-		gofishConfig := &clientConfig.gofish
-
-		if clientConfig.Hostname == nil {
-			err := fmt.Errorf("client config number %v requires hostname", i)
-			cclog.ComponentError(r.name, err)
-			return nil, err
-		}
-
-		if clientConfig.Endpoint == nil {
-			err := fmt.Errorf("client config number %v requires endpoint", i)
-			cclog.ComponentError(r.name, err)
-			return nil, err
-		}
-		gofishConfig.Endpoint = *clientConfig.Endpoint
-
-		if clientConfig.Username == nil {
-			err := fmt.Errorf("client config number %v requires username", i)
-			cclog.ComponentError(r.name, err)
-			return nil, err
-		}
-		gofishConfig.Username = *clientConfig.Username
-
-		if clientConfig.Password == nil {
-			err := fmt.Errorf("client config number %v requires password", i)
-			cclog.ComponentError(r.name, err)
-			return nil, err
-		}
-		gofishConfig.Password = *clientConfig.Password
-
-		gofishConfig.Insecure = true
-		if clientConfig.Insecure != nil {
-			gofishConfig.Insecure = *clientConfig.Insecure
-		}
-	}
-
-	return r, nil
-}
--- a/receivers/sampleReceiver.go
+++ b/receivers/sampleReceiver.go
@@ -36,26 +36,16 @@ func (r *SampleReceiver) Start() {

 	// or use own go routine but always make sure it exits
 	// as soon as it gets the signal of the r.done channel
-	//
-	// r.done = make(chan bool)
 	// r.wg.Add(1)
 	// go func() {
-	//      defer r.wg.Done()
-	//
-	//      // Create ticker
-	//      ticker := time.NewTicker(30 * time.Second)
-	//      defer ticker.Stop()
-	//
-	//      for {
-	//          readMetric()
-	//          select {
-	//          case <-ticker.C:
-	//              // process ticker event -> continue
-	//              continue
-	//          case <-r.done:
-	//              return
-	//          }
-	//      }
+	// 	for {
+	// 		select {
+	// 		case <-r.done:
+	// 			r.wg.Done()
+	// 			return
+	// 		}
+	// 	}
+	// 	r.wg.Done()
 	// }()
 }

--- a/sinks.json
+++ b/sinks.json
@@ -1,8 +1,6 @@
 {
-  "mystdout": {
-    "type": "stdout",
-    "meta_as_tags": [
-      "unit"
-    ]
+  "mystdout" : {
+    "type" : "stdout",
+    "meta_as_tags" : true
  }
-}
+}
--- a/sinks/httpSink.go
+++ b/sinks/httpSink.go
@@ -22,7 +22,6 @@ type HttpSinkConfig struct {
 	MaxIdleConns    int    `json:"max_idle_connections,omitempty"`
 	IdleConnTimeout string `json:"idle_connection_timeout,omitempty"`
 	FlushDelay      string `json:"flush_delay,omitempty"`
-	BatchSize       int    `json:"batch_size,omitempty"`
 }

 type HttpSink struct {
@@ -37,20 +36,19 @@ type HttpSink struct {
 	idleConnTimeout time.Duration
 	timeout         time.Duration
 	flushDelay      time.Duration
-	batchSize       int
 }

 func (s *HttpSink) Write(m lp.CCMetric) error {
 	if s.buffer.Len() == 0 && s.flushDelay != 0 {
 		// This is the first write since the last flush, start the flushTimer!
 		if s.flushTimer != nil && s.flushTimer.Stop() {
-			cclog.ComponentDebug(s.name, "unexpected: the flushTimer was already running?")
+			cclog.ComponentDebug("HttpSink", "unexpected: the flushTimer was already running?")
 		}

 		// Run a batched flush for all lines that have arrived in the last second
 		s.flushTimer = time.AfterFunc(s.flushDelay, func() {
 			if err := s.Flush(); err != nil {
-				cclog.ComponentError(s.name, "flush failed:", err.Error())
+				cclog.ComponentError("HttpSink", "flush failed:", err.Error())
 			}
 		})
 	}
@@ -59,11 +57,9 @@ func (s *HttpSink) Write(m lp.CCMetric) error {

 	s.lock.Lock()
 	_, err := s.encoder.Encode(p)
-	s.batchSize++
 	s.lock.Unlock() // defer does not work here as Flush() takes the lock as well

 	if err != nil {
-		cclog.ComponentError(s.name, "encoding failed:", err.Error())
 		return err
 	}

@@ -71,9 +67,6 @@ func (s *HttpSink) Write(m lp.CCMetric) error {
 	if s.flushDelay == 0 {
 		return s.Flush()
 	}
-	if s.batchSize == s.config.BatchSize {
-		return s.Flush()
-	}

 	return err
 }
@@ -91,7 +84,6 @@ func (s *HttpSink) Flush() error {
 	// Create new request to send buffer
 	req, err := http.NewRequest(http.MethodPost, s.config.URL, s.buffer)
 	if err != nil {
-		cclog.ComponentError(s.name, "failed to create request:", err.Error())
 		return err
 	}

@@ -105,19 +97,15 @@ func (s *HttpSink) Flush() error {

 	// Clear buffer
 	s.buffer.Reset()
-	s.batchSize = 0

 	// Handle transport/tcp errors
 	if err != nil {
-		cclog.ComponentError(s.name, "transport/tcp error:", err.Error())
 		return err
 	}

 	// Handle application errors
 	if res.StatusCode != http.StatusOK {
-		err = errors.New(res.Status)
-		cclog.ComponentError(s.name, "application error:", err.Error())
-		return err
+		return errors.New(res.Status)
 	}

 	return nil
@@ -126,7 +114,7 @@ func (s *HttpSink) Flush() error {
 func (s *HttpSink) Close() {
 	s.flushTimer.Stop()
 	if err := s.Flush(); err != nil {
-		cclog.ComponentError(s.name, "flush failed:", err.Error())
+		cclog.ComponentError("HttpSink", "flush failed:", err.Error())
 	}
 	s.client.CloseIdleConnections()
 }
@@ -139,7 +127,6 @@ func NewHttpSink(name string, config json.RawMessage) (Sink, error) {
 	s.config.IdleConnTimeout = "5s"
 	s.config.Timeout = "5s"
 	s.config.FlushDelay = "1s"
-	s.config.BatchSize = 100

 	// Read config
 	if len(config) > 0 {
--- a/sinks/httpSink.md
+++ b/sinks/httpSink.md
@@ -15,7 +15,6 @@ The `http` sink uses POST requests to a HTTP server to submit the metrics in the
    "max_idle_connections" : 10,
    "idle_connection_timeout" : "5s",
    "flush_delay": "2s",
-    "batch_size" : 100
  }
 }
 ```
@@ -28,4 +27,3 @@ The `http` sink uses POST requests to a HTTP server to submit the metrics in the
 - `max_idle_connections`: Maximally idle connections (default 10)
 - `idle_connection_timeout`: Timeout for idle connections (default '5s')
 - `flush_delay`: Batch all writes arriving in during this duration (default '1s', batching can be disabled by setting it to 0)
- `batch_size`: Maximal number of batched metrics. Either it is flushed because batch size or the `flush_delay` is reached
--- a/sinks/influxAsyncSink.go
+++ b/sinks/influxAsyncSink.go
@@ -6,14 +6,12 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"strings"
 	"time"

 	cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
 	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 	influxdb2 "github.com/influxdata/influxdb-client-go/v2"
 	influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
-	influxdb2ApiHttp "github.com/influxdata/influxdb-client-go/v2/api/http"
 )

 type InfluxAsyncSinkConfig struct {
@@ -30,12 +28,10 @@ type InfluxAsyncSinkConfig struct {
 	BatchSize uint `json:"batch_size,omitempty"`
 	// Interval, in ms, in which is buffer flushed if it has not been already written (by reaching batch size) . Default 1000ms
 	FlushInterval         uint   `json:"flush_interval,omitempty"`
-	InfluxRetryInterval   string `json:"retry_interval,omitempty"`
-	InfluxExponentialBase uint   `json:"retry_exponential_base,omitempty"`
-	InfluxMaxRetries      uint   `json:"max_retries,omitempty"`
-	InfluxMaxRetryTime    string `json:"max_retry_time,omitempty"`
-	CustomFlushInterval   string `json:"custom_flush_interval,omitempty"`
-	MaxRetryAttempts      uint   `json:"max_retry_attempts,omitempty"`
+	InfluxRetryInterval   string `json:"retry_interval"`
+	InfluxExponentialBase uint   `json:"retry_exponential_base"`
+	InfluxMaxRetries      uint   `json:"max_retries"`
+	InfluxMaxRetryTime    string `json:"max_retry_time"`
 }

 type InfluxAsyncSink struct {
@@ -46,8 +42,6 @@ type InfluxAsyncSink struct {
 	config              InfluxAsyncSinkConfig
 	influxRetryInterval uint
 	influxMaxRetryTime  uint
-	customFlushInterval time.Duration
-	flushTimer          *time.Timer
 }

 func (s *InfluxAsyncSink) connect() error {
@@ -66,34 +60,20 @@ func (s *InfluxAsyncSink) connect() error {
 	cclog.ComponentDebug(s.name, "Using URI", uri, "Org", s.config.Organization, "Bucket", s.config.Database)
 	clientOptions := influxdb2.DefaultOptions()
 	if s.config.BatchSize != 0 {
-		cclog.ComponentDebug(s.name, "Batch size", s.config.BatchSize)
 		clientOptions.SetBatchSize(s.config.BatchSize)
 	}
 	if s.config.FlushInterval != 0 {
-		cclog.ComponentDebug(s.name, "Flush interval", s.config.FlushInterval)
 		clientOptions.SetFlushInterval(s.config.FlushInterval)
 	}
-	if s.influxRetryInterval != 0 {
-		cclog.ComponentDebug(s.name, "MaxRetryInterval", s.influxRetryInterval)
-		clientOptions.SetMaxRetryInterval(s.influxRetryInterval)
-	}
-	if s.influxMaxRetryTime != 0 {
-		cclog.ComponentDebug(s.name, "MaxRetryTime", s.influxMaxRetryTime)
-		clientOptions.SetMaxRetryTime(s.influxMaxRetryTime)
-	}
-	if s.config.InfluxExponentialBase != 0 {
-		cclog.ComponentDebug(s.name, "Exponential Base", s.config.InfluxExponentialBase)
-		clientOptions.SetExponentialBase(s.config.InfluxExponentialBase)
-	}
-	if s.config.InfluxMaxRetries != 0 {
-		cclog.ComponentDebug(s.name, "Max Retries", s.config.InfluxMaxRetries)
-		clientOptions.SetMaxRetries(s.config.InfluxMaxRetries)
-	}
 	clientOptions.SetTLSConfig(
 		&tls.Config{
 			InsecureSkipVerify: true,
 		},
-	).SetPrecision(time.Second)
+	)
+	clientOptions.SetMaxRetryInterval(s.influxRetryInterval)
+	clientOptions.SetMaxRetryTime(s.influxMaxRetryTime)
+	clientOptions.SetExponentialBase(s.config.InfluxExponentialBase)
+	clientOptions.SetMaxRetries(s.config.InfluxMaxRetries)

 	s.client = influxdb2.NewClientWithOptions(uri, auth, clientOptions)
 	s.writeApi = s.client.WriteAPI(s.config.Organization, s.config.Database)
@@ -104,23 +84,10 @@ func (s *InfluxAsyncSink) connect() error {
 	if !ok {
 		return fmt.Errorf("connection to %s not healthy", uri)
 	}
-	s.writeApi.SetWriteFailedCallback(func(batch string, err influxdb2ApiHttp.Error, retryAttempts uint) bool {
-		mlist := strings.Split(batch, "\n")
-		cclog.ComponentError(s.name, fmt.Sprintf("Failed to write batch with %d metrics %d times (max: %d): %s", len(mlist), retryAttempts, s.config.MaxRetryAttempts, err.Error()))
-		return retryAttempts <= s.config.MaxRetryAttempts
-	})
 	return nil
 }

 func (s *InfluxAsyncSink) Write(m lp.CCMetric) error {
-	if s.customFlushInterval != 0 && s.flushTimer == nil {
-		// Run a batched flush for all lines that have arrived in the defined interval
-		s.flushTimer = time.AfterFunc(s.customFlushInterval, func() {
-			if err := s.Flush(); err != nil {
-				cclog.ComponentError(s.name, "flush failed:", err.Error())
-			}
-		})
-	}
 	s.writeApi.WritePoint(
 		m.ToPoint(s.meta_as_tags),
 	)
@@ -128,11 +95,7 @@ func (s *InfluxAsyncSink) Write(m lp.CCMetric) error {
 }

 func (s *InfluxAsyncSink) Flush() error {
-	cclog.ComponentDebug(s.name, "Flushing")
 	s.writeApi.Flush()
-	if s.customFlushInterval != 0 && s.flushTimer != nil {
-		s.flushTimer = nil
-	}
 	return nil
 }

@@ -147,17 +110,13 @@ func NewInfluxAsyncSink(name string, config json.RawMessage) (Sink, error) {
 	s.name = fmt.Sprintf("InfluxSink(%s)", name)

 	// Set default for maximum number of points sent to server in single request.
-	s.config.BatchSize = 0
-	s.influxRetryInterval = 0
-	//s.config.InfluxRetryInterval = "1s"
-	s.influxMaxRetryTime = 0
-	//s.config.InfluxMaxRetryTime = "168h"
-	s.config.InfluxMaxRetries = 0
-	s.config.InfluxExponentialBase = 0
-	s.config.FlushInterval = 0
-	s.config.CustomFlushInterval = ""
-	s.customFlushInterval = time.Duration(0)
-	s.config.MaxRetryAttempts = 1
+	s.config.BatchSize = 100
+	s.influxRetryInterval = uint(time.Duration(1) * time.Second)
+	s.config.InfluxRetryInterval = "1s"
+	s.influxMaxRetryTime = uint(7 * time.Duration(24) * time.Hour)
+	s.config.InfluxMaxRetryTime = "168h"
+	s.config.InfluxMaxRetries = 20
+	s.config.InfluxExponentialBase = 2

 	// Default retry intervals (in seconds)
 	// 1 2
@@ -209,15 +168,6 @@ func NewInfluxAsyncSink(name string, config json.RawMessage) (Sink, error) {
 	s.influxRetryInterval = toUint(s.config.InfluxRetryInterval, s.influxRetryInterval)
 	s.influxMaxRetryTime = toUint(s.config.InfluxMaxRetryTime, s.influxMaxRetryTime)

-	// Use a own timer for calling Flush()
-	if len(s.config.CustomFlushInterval) > 0 {
-		t, err := time.ParseDuration(s.config.CustomFlushInterval)
-		if err != nil {
-			return nil, fmt.Errorf("invalid duration in 'custom_flush_interval': %v", err)
-		}
-		s.customFlushInterval = t
-	}
-
 	// Connect to InfluxDB server
 	if err := s.connect(); err != nil {
 		return nil, fmt.Errorf("unable to connect: %v", err)
--- a/sinks/influxSink.go
+++ b/sinks/influxSink.go
@@ -6,32 +6,28 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"sync"
 	"time"

 	cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
 	lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
 	influxdb2 "github.com/influxdata/influxdb-client-go/v2"
 	influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
-	"github.com/influxdata/influxdb-client-go/v2/api/write"
 )

 type InfluxSinkConfig struct {
 	defaultSinkConfig
-	Host         string `json:"host,omitempty"`
-	Port         string `json:"port,omitempty"`
-	Database     string `json:"database,omitempty"`
-	User         string `json:"user,omitempty"`
-	Password     string `json:"password,omitempty"`
-	Organization string `json:"organization,omitempty"`
-	SSL          bool   `json:"ssl,omitempty"`
-	FlushDelay   string `json:"flush_delay,omitempty"`
-	BatchSize    int    `json:"batch_size,omitempty"`
-	RetentionPol string `json:"retention_policy,omitempty"`
-	// InfluxRetryInterval   string `json:"retry_interval"`
-	// InfluxExponentialBase uint   `json:"retry_exponential_base"`
-	// InfluxMaxRetries      uint   `json:"max_retries"`
-	// InfluxMaxRetryTime    string `json:"max_retry_time"`
+	Host                  string `json:"host,omitempty"`
+	Port                  string `json:"port,omitempty"`
+	Database              string `json:"database,omitempty"`
+	User                  string `json:"user,omitempty"`
+	Password              string `json:"password,omitempty"`
+	Organization          string `json:"organization,omitempty"`
+	SSL                   bool   `json:"ssl,omitempty"`
+	RetentionPol          string `json:"retention_policy,omitempty"`
+	InfluxRetryInterval   string `json:"retry_interval"`
+	InfluxExponentialBase uint   `json:"retry_exponential_base"`
+	InfluxMaxRetries      uint   `json:"max_retries"`
+	InfluxMaxRetryTime    string `json:"max_retry_time"`
 	//InfluxMaxRetryDelay  string `json:"max_retry_delay"` // It is mentioned in the docs but there is no way to set it
 }

@@ -42,71 +38,37 @@ type InfluxSink struct {
 	config              InfluxSinkConfig
 	influxRetryInterval uint
 	influxMaxRetryTime  uint
-	batch               []*write.Point
-	flushTimer          *time.Timer
-	flushDelay          time.Duration
-	lock                sync.Mutex // Flush() runs in another goroutine, so this lock has to protect the buffer
 	//influxMaxRetryDelay uint
 }

-// connect connects to the InfluxDB server
 func (s *InfluxSink) connect() error {
-
-	// URI options:
-	// * http://host:port
-	// * https://host:port
+	var auth string
 	var uri string
 	if s.config.SSL {
 		uri = fmt.Sprintf("https://%s:%s", s.config.Host, s.config.Port)
 	} else {
 		uri = fmt.Sprintf("http://%s:%s", s.config.Host, s.config.Port)
 	}
-
-	// Authentication options:
-	// * token
-	// * username:password
-	var auth string
 	if len(s.config.User) == 0 {
 		auth = s.config.Password
 	} else {
 		auth = fmt.Sprintf("%s:%s", s.config.User, s.config.Password)
 	}
 	cclog.ComponentDebug(s.name, "Using URI", uri, "Org", s.config.Organization, "Bucket", s.config.Database)
-
-	// Set influxDB client options
 	clientOptions := influxdb2.DefaultOptions()
-
-	// if s.influxRetryInterval != 0 {
-	// 	cclog.ComponentDebug(s.name, "MaxRetryInterval", s.influxRetryInterval)
-	// 	clientOptions.SetMaxRetryInterval(s.influxRetryInterval)
-	// }
-	// if s.influxMaxRetryTime != 0 {
-	// 	cclog.ComponentDebug(s.name, "MaxRetryTime", s.influxMaxRetryTime)
-	// 	clientOptions.SetMaxRetryTime(s.influxMaxRetryTime)
-	// }
-	// if s.config.InfluxExponentialBase != 0 {
-	// 	cclog.ComponentDebug(s.name, "Exponential Base", s.config.InfluxExponentialBase)
-	// 	clientOptions.SetExponentialBase(s.config.InfluxExponentialBase)
-	// }
-	// if s.config.InfluxMaxRetries != 0 {
-	// 	cclog.ComponentDebug(s.name, "Max Retries", s.config.InfluxMaxRetries)
-	// 	clientOptions.SetMaxRetries(s.config.InfluxMaxRetries)
-	// }
-
-	// Do not check InfluxDB certificate
 	clientOptions.SetTLSConfig(
 		&tls.Config{
 			InsecureSkipVerify: true,
 		},
 	)

-	clientOptions.SetPrecision(time.Second)
+	clientOptions.SetMaxRetryInterval(s.influxRetryInterval)
+	clientOptions.SetMaxRetryTime(s.influxMaxRetryTime)
+	clientOptions.SetExponentialBase(s.config.InfluxExponentialBase)
+	clientOptions.SetMaxRetries(s.config.InfluxMaxRetries)

-	// Create new writeAPI
 	s.client = influxdb2.NewClientWithOptions(uri, auth, clientOptions)
 	s.writeApi = s.client.WriteAPIBlocking(s.config.Organization, s.config.Database)
-
-	// Check InfluxDB server accessibility
 	ok, err := s.client.Ping(context.Background())
 	if err != nil {
 		return err
@@ -118,142 +80,61 @@ func (s *InfluxSink) connect() error {
 }

 func (s *InfluxSink) Write(m lp.CCMetric) error {
-
-	if len(s.batch) == 0 && s.flushDelay != 0 {
-		// This is the first write since the last flush, start the flushTimer!
-		if s.flushTimer != nil && s.flushTimer.Stop() {
-			cclog.ComponentDebug(s.name, "unexpected: the flushTimer was already running?")
-		}
-
-		// Run a batched flush for all lines that have arrived in the last flush delay interval
-		s.flushTimer = time.AfterFunc(s.flushDelay, func() {
-			if err := s.Flush(); err != nil {
-				cclog.ComponentError(s.name, "flush failed:", err.Error())
-			}
-		})
-	}
-
-	// Append metric to batch slice
-	p := m.ToPoint(s.meta_as_tags)
-	s.lock.Lock()
-	s.batch = append(s.batch, p)
-	s.lock.Unlock()
-
-	// Flush synchronously if "flush_delay" is zero
-	if s.flushDelay == 0 {
-		return s.Flush()
-	}
-
-	// Flush if batch size is reached
-	if len(s.batch) == s.config.BatchSize {
-		return s.Flush()
-	}
-
-	return nil
+	err :=
+		s.writeApi.WritePoint(
+			context.Background(),
+			m.ToPoint(s.meta_as_tags),
+		)
+	return err
 }

-// Flush sends all metrics buffered in batch slice to InfluxDB server
 func (s *InfluxSink) Flush() error {
-
-	// Lock access to batch slice
-	s.lock.Lock()
-	defer s.lock.Unlock()
-
-	// Nothing to do, batch slice is empty
-	if len(s.batch) == 0 {
-		return nil
-	}
-
-	// Send metrics from batch slice
-	err := s.writeApi.WritePoint(context.Background(), s.batch...)
-	if err != nil {
-		cclog.ComponentError(s.name, "flush failed:", err.Error())
-		return err
-	}
-
-	// Clear batch slice
-	for i := range s.batch {
-		s.batch[i] = nil
-	}
-	s.batch = s.batch[:0]
-
 	return nil
 }

 func (s *InfluxSink) Close() {
 	cclog.ComponentDebug(s.name, "Closing InfluxDB connection")
-	s.flushTimer.Stop()
-	s.Flush()
 	s.client.Close()
 }

-// NewInfluxSink create a new InfluxDB sink
 func NewInfluxSink(name string, config json.RawMessage) (Sink, error) {
 	s := new(InfluxSink)
 	s.name = fmt.Sprintf("InfluxSink(%s)", name)
-
-	// Set config default values
-	s.config.BatchSize = 100
-	s.config.FlushDelay = "1s"
-
-	// Read config
 	if len(config) > 0 {
 		err := json.Unmarshal(config, &s.config)
 		if err != nil {
 			return nil, err
 		}
 	}
-	s.influxRetryInterval = 0
-	s.influxMaxRetryTime = 0
-	// s.config.InfluxRetryInterval = ""
-	// s.config.InfluxMaxRetryTime = ""
-	// s.config.InfluxMaxRetries = 0
-	// s.config.InfluxExponentialBase = 0
+	s.influxRetryInterval = uint(time.Duration(1) * time.Second)
+	s.config.InfluxRetryInterval = "1s"
+	s.influxMaxRetryTime = uint(7 * time.Duration(24) * time.Hour)
+	s.config.InfluxMaxRetryTime = "168h"
+	s.config.InfluxMaxRetries = 20
+	s.config.InfluxExponentialBase = 2

-	if len(s.config.Host) == 0 {
-		return nil, errors.New("Missing host configuration required by InfluxSink")
+	if len(s.config.Host) == 0 ||
+		len(s.config.Port) == 0 ||
+		len(s.config.Database) == 0 ||
+		len(s.config.Organization) == 0 ||
+		len(s.config.Password) == 0 {
+		return nil, errors.New("not all configuration variables set required by InfluxSink")
 	}
-	if len(s.config.Port) == 0 {
-		return nil, errors.New("Missing port configuration required by InfluxSink")
-	}
-	if len(s.config.Database) == 0 {
-		return nil, errors.New("Missing database configuration required by InfluxSink")
-	}
-	if len(s.config.Organization) == 0 {
-		return nil, errors.New("Missing organization configuration required by InfluxSink")
-	}
-	if len(s.config.Password) == 0 {
-		return nil, errors.New("Missing password configuration required by InfluxSink")
-	}
-
 	// Create lookup map to use meta infos as tags in the output metric
 	s.meta_as_tags = make(map[string]bool)
 	for _, k := range s.config.MetaAsTags {
 		s.meta_as_tags[k] = true
 	}

-	// toUint := func(duration string, def uint) uint {
-	// 	if len(duration) > 0 {
-	// 		t, err := time.ParseDuration(duration)
-	// 		if err == nil {
-	// 			return uint(t.Milliseconds())
-	// 		}
-	// 	}
-	// 	return def
-	// }
-	// s.influxRetryInterval = toUint(s.config.InfluxRetryInterval, s.influxRetryInterval)
-	// s.influxMaxRetryTime = toUint(s.config.InfluxMaxRetryTime, s.influxMaxRetryTime)
-
-	// Configure flush delay duration
-	if len(s.config.FlushDelay) > 0 {
-		t, err := time.ParseDuration(s.config.FlushDelay)
+	toUint := func(duration string, def uint) uint {
+		t, err := time.ParseDuration(duration)
 		if err == nil {
-			s.flushDelay = t
+			return uint(t.Milliseconds())
 		}
+		return def
 	}
-
-	// allocate batch slice
-	s.batch = make([]*write.Point, 0, s.config.BatchSize)
+	s.influxRetryInterval = toUint(s.config.InfluxRetryInterval, s.influxRetryInterval)
+	s.influxMaxRetryTime = toUint(s.config.InfluxMaxRetryTime, s.influxMaxRetryTime)

 	// Connect to InfluxDB server
 	if err := s.connect(); err != nil {
--- a/sinks/influxSink.md
+++ b/sinks/influxSink.md
@@ -17,8 +17,10 @@ The `influxdb` sink uses the official [InfluxDB golang client](https://pkg.go.de
    "password" : "examplepw",
    "organization": "myorg",
    "ssl": true,
-    "flush_delay" : "1s",
-    "batch_size" : 100
+    "retry_interval" : "1s",
+    "retry_exponential_base" : 2,
+    "max_retries": 20,
+    "max_retry_time" : "168h"
  }
 }
 ```
@@ -32,6 +34,9 @@ The `influxdb` sink uses the official [InfluxDB golang client](https://pkg.go.de
 - `password`: Password for basic authentification
 - `organization`: Organization in the InfluxDB
 - `ssl`: Use SSL connection
- `flush_delay`: Group metrics coming in to a single batch
- `batch_size`: Maximal batch size
+- `retry_interval`: Base retry interval for failed write requests, default 1s
+- `retry_exponential_base`: The retry interval is exponentially increased with this base, default 2
+- `max_retries`: Maximal number of retry attempts
+- `max_retry_time`: Maximal time to retry failed writes, default 168h (one week)

+For information about the calculation of the retry interval settings, see [offical influxdb-client-go documentation](https://github.com/influxdata/influxdb-client-go#handling-of-failed-async-writes)
Author	SHA1	Message	Date
Thomas Roehl	4851382ad7	Merge branch 'develop' into main	2022-03-16 19:08:13 +01:00
Thomas Gruber	3f76947f54	Merge latest developments into main (#67 ) * Update configuration.md Add an additional receiver to have better alignment of components * Change default GpfsCollector command to `mmpmon` (#53) * Set default cmd to 'mmpmon' * Reuse looked up path * Cast const to string * Just download LIKWID to get the headers (#54) * Just download LIKWID to get the headers * Remove perl-Data-Dumper from BuildRequires, only required by LIKWID build * Add HttpReceiver as counterpart to the HttpSink (#49) * Use GBytes as unit for large memory numbers * Make maxForward configurable, save old name in meta in rename metrics and make the hostname tag key configurable * Single release action (#55) Building all RPMs and releasing in a single workflow * Makefile target to build binary-only Debian packages (#61) * Add 'install' and 'DEB' make targets to build binary-only Debian packages * Add control file for DEB builds * Use a single line for bash loop in make clean * Add config options for retry intervals of InfluxDB clients (#59) * Refactoring of LikwidCollector and metric units (#62) * Reduce complexity of LikwidCollector and allow metric units * Add unit to LikwidCollector docu and fix some typos * Make library path configurable * Use old metric name in Ganglia if rename has happened in the router (#60) * Use old metric name if rename has happened in the router * Also check for Ganglia renames for the oldname * Derived metrics (#57) * Add time-based derivatived (e.g. bandwidth) to some collectors * Add documentation * Add comments * Fix: Only compute rates with a valid previous state * Only compute rates with a valid previous state * Define const values for net/dev fields * Set default config values * Add comments * Refactor: Consolidate data structures * Refactor: Consolidate data structures * Refactor: Avoid struct deep copy * Refactor: Avoid redundant tag maps * Refactor: Use int64 type for absolut values Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> * Simplified iota usage * Move unit tag to meta data tags * Derived metrics (#65) * Add time-based derivatived (e.g. bandwidth) to some collectors * Add documentation * Add comments * Fix: Only compute rates with a valid previous state * Only compute rates with a valid previous state * Define const values for net/dev fields * Set default config values * Add comments * Refactor: Consolidate data structures * Refactor: Consolidate data structures * Refactor: Avoid struct deep copy * Refactor: Avoid redundant tag maps * Refactor: Use int64 type for absolut values * Update LustreCollector Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> * Meta to tags list and map for sinks (#63) * Change ccMetric->Influx functions * Use a meta_as_tags string list in config but create a lookup map afterwards * Add meta as tag logic to sampleSink * Fix staticcheck warnings (#66) Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>	2022-03-15 16:41:11 +01:00
Thomas Roehl	3157386b3e	Merge branch 'develop' into main	2022-03-04 23:34:28 +01:00
Thomas Roehl	ff08eaeb43	Set proper user for files	2022-03-04 11:49:55 +01:00
Thomas Roehl	64c41be34c	Fix name for ClusterCockpit	2022-03-04 11:37:45 +01:00
Thomas Roehl	f4af520b2a	Fix error print in LustreCollector	2022-03-04 11:32:39 +01:00