mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-01-13 07:39:05 +01:00
7840de7b82
* Add cpu_used (all-cpu_idle) to CpustatCollector * Update cc-metric-collector.init * Allow selection of timestamp precision in HttpSink * Add comment about precision requirement for cc-metric-store * Fix for API changes in gofish@v0.15.0 * Update requirements to latest version * Read sensors through redfish * Update golang toolchain to 1.21 * Remove stray error check * Update main config in configuration.md * Update Release action to use golang 1.22 stable release, no golang RPMs anymore * Update runonce action to use golang 1.22 stable release, no golang RPMs anymore * Update README.md Use right JSON type in configuration * Update sink's README * Test whether ipmitool or ipmi-sensors can be executed without errors * Little fixes to the prometheus sink (#115) * Add uint64 to float64 cast option * Add prometheus sink to the list of available sinks * Add aggregated counters by gpu for nvlink errors --------- Co-authored-by: Michael Schwarz <schwarz@uni-paderborn.de> * Ccmessage migration (#119) * Add cpu_used (all-cpu_idle) to CpustatCollector * Update cc-metric-collector.init * Allow selection of timestamp precision in HttpSink * Add comment about precision requirement for cc-metric-store * Fix for API changes in gofish@v0.15.0 * Update requirements to latest version * Read sensors through redfish * Update golang toolchain to 1.21 * Remove stray error check * Update main config in configuration.md * Update Release action to use golang 1.22 stable release, no golang RPMs anymore * Update runonce action to use golang 1.22 stable release, no golang RPMs anymore * Switch to CCMessage for all files. --------- Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu> Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> * Switch to ccmessage also for latest additions in nvidiaMetric * New Message processor (#118) * Add cpu_used (all-cpu_idle) to CpustatCollector * Update cc-metric-collector.init * Allow selection of timestamp precision in HttpSink * Add comment about precision requirement for cc-metric-store * Fix for API changes in gofish@v0.15.0 * Update requirements to latest version * Read sensors through redfish * Update golang toolchain to 1.21 * Remove stray error check * Update main config in configuration.md * Update Release action to use golang 1.22 stable release, no golang RPMs anymore * Update runonce action to use golang 1.22 stable release, no golang RPMs anymore * New message processor to check whether a message should be dropped or manipulate it in flight * Create a copy of message before manipulation --------- Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu> Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> * Update collector's Makefile and go.mod/sum files * Use message processor in router, all sinks and all receivers * Add support for credential file (NKEY) to NATS sink and receiver * Fix JSON keys in message processor configuration * Update docs for message processor, router and the default router config file * Add link to expr syntax and fix regex matching docs * Update sample collectors * Minor style change in collector manager * Some helpers for ccTopology * LIKWID collector: write log owner change only once * Fix for metrics without units and reduce debugging messages for messageProcessor * Use shorted hostname for hostname added by router * Define default port for NATS * CPUstat collector: only add unit for applicable metrics * Add precision option to all sinks using Influx's encoder * Add message processor to all sink documentation * Add units to documentation of cpustat collector --------- Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu> Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Co-authored-by: oscarminus <me@oscarminus.de> Co-authored-by: Michael Schwarz <schwarz@uni-paderborn.de>
123 lines
3.8 KiB
Go
123 lines
3.8 KiB
Go
package collectors
|
|
|
|
import (
|
|
"encoding/json"
|
|
"sync"
|
|
"time"
|
|
|
|
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
|
)
|
|
|
|
// These are the fields we read from the JSON configuration
|
|
type SampleTimerCollectorConfig struct {
|
|
Interval string `json:"interval"`
|
|
}
|
|
|
|
// This contains all variables we need during execution and the variables
|
|
// defined by metricCollector (name, init, ...)
|
|
type SampleTimerCollector struct {
|
|
metricCollector
|
|
wg sync.WaitGroup // sync group for management
|
|
done chan bool // channel for management
|
|
meta map[string]string // default meta information
|
|
tags map[string]string // default tags
|
|
config SampleTimerCollectorConfig // the configuration structure
|
|
interval time.Duration // the interval parsed from configuration
|
|
ticker *time.Ticker // own timer
|
|
output chan lp.CCMessage // own internal output channel
|
|
}
|
|
|
|
func (m *SampleTimerCollector) Init(name string, config json.RawMessage) error {
|
|
var err error = nil
|
|
// Always set the name early in Init() to use it in cclog.Component* functions
|
|
m.name = "SampleTimerCollector"
|
|
// This is for later use, also call it early
|
|
m.setup()
|
|
// Define meta information sent with each metric
|
|
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
|
m.meta = map[string]string{"source": m.name, "group": "SAMPLE"}
|
|
// Define tags sent with each metric
|
|
// The 'type' tag is always needed, it defines the granularity of the metric
|
|
// node -> whole system
|
|
// socket -> CPU socket (requires socket ID as 'type-id' tag)
|
|
// cpu -> single CPU hardware thread (requires cpu ID as 'type-id' tag)
|
|
m.tags = map[string]string{"type": "node"}
|
|
// Read in the JSON configuration
|
|
if len(config) > 0 {
|
|
err = json.Unmarshal(config, &m.config)
|
|
if err != nil {
|
|
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
|
return err
|
|
}
|
|
}
|
|
// Parse the read interval duration
|
|
m.interval, err = time.ParseDuration(m.config.Interval)
|
|
if err != nil {
|
|
cclog.ComponentError(m.name, "Error parsing interval:", err.Error())
|
|
return err
|
|
}
|
|
|
|
// Storage for output channel
|
|
m.output = nil
|
|
// Management channel for the timer function.
|
|
m.done = make(chan bool)
|
|
// Create the own ticker
|
|
m.ticker = time.NewTicker(m.interval)
|
|
|
|
// Start the timer loop with return functionality by sending 'true' to the done channel
|
|
m.wg.Add(1)
|
|
go func() {
|
|
select {
|
|
case <-m.done:
|
|
// Exit the timer loop
|
|
cclog.ComponentDebug(m.name, "Closing...")
|
|
m.wg.Done()
|
|
return
|
|
case timestamp := <-m.ticker.C:
|
|
// This is executed every timer tick but we have to wait until the first
|
|
// Read() to get the output channel
|
|
if m.output != nil {
|
|
m.ReadMetrics(timestamp)
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Set this flag only if everything is initialized properly, all required files exist, ...
|
|
m.init = true
|
|
return err
|
|
}
|
|
|
|
// This function is called at each interval timer tick
|
|
func (m *SampleTimerCollector) ReadMetrics(timestamp time.Time) {
|
|
// Create a sample metric
|
|
|
|
value := 1.0
|
|
|
|
// If you want to measure something for a specific amount of time, use interval
|
|
// start := readState()
|
|
// time.Sleep(interval)
|
|
// stop := readState()
|
|
// value = (stop - start) / interval.Seconds()
|
|
|
|
y, err := lp.NewMessage("sample_metric", m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
|
|
if err == nil && m.output != nil {
|
|
// Send it to output channel if we have a valid channel
|
|
m.output <- y
|
|
}
|
|
}
|
|
|
|
func (m *SampleTimerCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|
// Capture output channel
|
|
m.output = output
|
|
}
|
|
|
|
func (m *SampleTimerCollector) Close() {
|
|
// Send signal to the timer loop to stop it
|
|
m.done <- true
|
|
// Wait until the timer loop is done
|
|
m.wg.Wait()
|
|
// Unset flag
|
|
m.init = false
|
|
}
|