mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-07-20 20:01:40 +02:00
Compare commits
8 Commits
http_stats
...
v0.5
Author | SHA1 | Date | |
---|---|---|---|
|
4763733d8d | ||
|
16e898ecca | ||
|
4851382ad7 | ||
|
3f76947f54 | ||
|
3157386b3e | ||
|
ff08eaeb43 | ||
|
64c41be34c | ||
|
f4af520b2a |
@@ -20,7 +20,6 @@ There is a main configuration file with basic settings that point to the other c
|
|||||||
"collectors" : "collectors.json",
|
"collectors" : "collectors.json",
|
||||||
"receivers" : "receivers.json",
|
"receivers" : "receivers.json",
|
||||||
"router" : "router.json",
|
"router" : "router.json",
|
||||||
"stats_api" : "api.json",
|
|
||||||
"interval": 10,
|
"interval": 10,
|
||||||
"duration": 1
|
"duration": 1
|
||||||
}
|
}
|
||||||
@@ -33,7 +32,6 @@ See the component READMEs for their configuration:
|
|||||||
* [`sinks`](./sinks/README.md)
|
* [`sinks`](./sinks/README.md)
|
||||||
* [`receivers`](./receivers/README.md)
|
* [`receivers`](./receivers/README.md)
|
||||||
* [`router`](./internal/metricRouter/README.md)
|
* [`router`](./internal/metricRouter/README.md)
|
||||||
* [`stats_api`](./internal/metricRouter/StatsApi.md)
|
|
||||||
|
|
||||||
|
|
||||||
# Installation
|
# Installation
|
||||||
|
@@ -28,7 +28,6 @@ type CentralConfigFile struct {
|
|||||||
RouterConfigFile string `json:"router"`
|
RouterConfigFile string `json:"router"`
|
||||||
SinkConfigFile string `json:"sinks"`
|
SinkConfigFile string `json:"sinks"`
|
||||||
ReceiverConfigFile string `json:"receivers,omitempty"`
|
ReceiverConfigFile string `json:"receivers,omitempty"`
|
||||||
StatsApiConfigFile string `json:"stats_api,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func LoadCentralConfiguration(file string, config *CentralConfigFile) error {
|
func LoadCentralConfiguration(file string, config *CentralConfigFile) error {
|
||||||
@@ -53,7 +52,6 @@ type RuntimeConfig struct {
|
|||||||
CollectManager collectors.CollectorManager
|
CollectManager collectors.CollectorManager
|
||||||
SinkManager sinks.SinkManager
|
SinkManager sinks.SinkManager
|
||||||
ReceiveManager receivers.ReceiveManager
|
ReceiveManager receivers.ReceiveManager
|
||||||
StatsApi mr.StatsApi
|
|
||||||
MultiChanTicker mct.MultiChanTicker
|
MultiChanTicker mct.MultiChanTicker
|
||||||
|
|
||||||
Channels []chan lp.CCMetric
|
Channels []chan lp.CCMetric
|
||||||
@@ -154,16 +152,11 @@ func shutdownHandler(config *RuntimeConfig, shutdownSignal chan os.Signal) {
|
|||||||
cclog.Debug("Shutdown SinkManager...")
|
cclog.Debug("Shutdown SinkManager...")
|
||||||
config.SinkManager.Close()
|
config.SinkManager.Close()
|
||||||
}
|
}
|
||||||
if config.StatsApi != nil {
|
|
||||||
cclog.Debug("Shutdown StatsApi...")
|
|
||||||
config.StatsApi.Close()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func mainFunc() int {
|
func mainFunc() int {
|
||||||
var err error
|
var err error
|
||||||
use_recv := false
|
use_recv := false
|
||||||
use_api := false
|
|
||||||
|
|
||||||
// Initialize runtime configuration
|
// Initialize runtime configuration
|
||||||
rcfg := RuntimeConfig{
|
rcfg := RuntimeConfig{
|
||||||
@@ -171,7 +164,6 @@ func mainFunc() int {
|
|||||||
CollectManager: nil,
|
CollectManager: nil,
|
||||||
SinkManager: nil,
|
SinkManager: nil,
|
||||||
ReceiveManager: nil,
|
ReceiveManager: nil,
|
||||||
StatsApi: nil,
|
|
||||||
CliArgs: ReadCli(),
|
CliArgs: ReadCli(),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -261,16 +253,6 @@ func mainFunc() int {
|
|||||||
use_recv = true
|
use_recv = true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create new statistics API manager
|
|
||||||
if len(rcfg.ConfigFile.StatsApiConfigFile) > 0 {
|
|
||||||
rcfg.StatsApi, err = mr.NewStatsApi(rcfg.MultiChanTicker, &rcfg.Sync, rcfg.ConfigFile.StatsApiConfigFile)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Error(err.Error())
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
use_api = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create shutdown handler
|
// Create shutdown handler
|
||||||
shutdownSignal := make(chan os.Signal, 1)
|
shutdownSignal := make(chan os.Signal, 1)
|
||||||
signal.Notify(shutdownSignal, os.Interrupt)
|
signal.Notify(shutdownSignal, os.Interrupt)
|
||||||
@@ -278,11 +260,6 @@ func mainFunc() int {
|
|||||||
rcfg.Sync.Add(1)
|
rcfg.Sync.Add(1)
|
||||||
go shutdownHandler(&rcfg, shutdownSignal)
|
go shutdownHandler(&rcfg, shutdownSignal)
|
||||||
|
|
||||||
// Start the stats api early to be prepared for init settings
|
|
||||||
if use_api {
|
|
||||||
rcfg.StatsApi.Start()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start the managers
|
// Start the managers
|
||||||
rcfg.MetricRouter.Start()
|
rcfg.MetricRouter.Start()
|
||||||
rcfg.SinkManager.Start()
|
rcfg.SinkManager.Start()
|
||||||
|
@@ -16,7 +16,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const DEFAULT_BEEGFS_CMD = "beegfs-ctl"
|
const DEFAULT_BEEGFS_CMD = "beegfs-ctl"
|
||||||
@@ -34,7 +33,6 @@ type BeegfsMetaCollector struct {
|
|||||||
matches map[string]string
|
matches map[string]string
|
||||||
config BeegfsMetaCollectorConfig
|
config BeegfsMetaCollectorConfig
|
||||||
skipFS map[string]struct{}
|
skipFS map[string]struct{}
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *BeegfsMetaCollector) Init(config json.RawMessage) error {
|
func (m *BeegfsMetaCollector) Init(config json.RawMessage) error {
|
||||||
@@ -107,7 +105,6 @@ func (m *BeegfsMetaCollector) Init(config json.RawMessage) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("BeegfsMetaCollector.Init(): Failed to find beegfs-ctl binary '%s': %v", m.config.Beegfs, err)
|
return fmt.Errorf("BeegfsMetaCollector.Init(): Failed to find beegfs-ctl binary '%s': %v", m.config.Beegfs, err)
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -221,12 +218,10 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
y, err := lp.New(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
y, err := lp.New(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *BeegfsMetaCollector) Close() {
|
func (m *BeegfsMetaCollector) Close() {
|
||||||
|
@@ -16,7 +16,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Struct for the collector-specific JSON config
|
// Struct for the collector-specific JSON config
|
||||||
@@ -32,7 +31,6 @@ type BeegfsStorageCollector struct {
|
|||||||
matches map[string]string
|
matches map[string]string
|
||||||
config BeegfsStorageCollectorConfig
|
config BeegfsStorageCollectorConfig
|
||||||
skipFS map[string]struct{}
|
skipFS map[string]struct{}
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *BeegfsStorageCollector) Init(config json.RawMessage) error {
|
func (m *BeegfsStorageCollector) Init(config json.RawMessage) error {
|
||||||
@@ -100,7 +98,6 @@ func (m *BeegfsStorageCollector) Init(config json.RawMessage) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("BeegfsStorageCollector.Init(): Failed to find beegfs-ctl binary '%s': %v", m.config.Beegfs, err)
|
return fmt.Errorf("BeegfsStorageCollector.Init(): Failed to find beegfs-ctl binary '%s': %v", m.config.Beegfs, err)
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -213,12 +210,10 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
|
|||||||
y, err := lp.New(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
y, err := lp.New(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *BeegfsStorageCollector) Close() {
|
func (m *BeegfsStorageCollector) Close() {
|
||||||
|
@@ -12,7 +12,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -38,7 +37,6 @@ type CPUFreqCpuInfoCollectorTopology struct {
|
|||||||
type CPUFreqCpuInfoCollector struct {
|
type CPUFreqCpuInfoCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
topology []*CPUFreqCpuInfoCollectorTopology
|
topology []*CPUFreqCpuInfoCollectorTopology
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
||||||
@@ -157,7 +155,7 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
|||||||
"package_id": t.physicalPackageID,
|
"package_id": t.physicalPackageID,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -198,7 +196,6 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CC
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now); err == nil {
|
if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now); err == nil {
|
||||||
m.statsProcessedMetrics++
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -206,7 +203,6 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CC
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *CPUFreqCpuInfoCollector) Close() {
|
func (m *CPUFreqCpuInfoCollector) Close() {
|
||||||
|
@@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -41,7 +40,6 @@ type CPUFreqCollectorTopology struct {
|
|||||||
type CPUFreqCollector struct {
|
type CPUFreqCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
topology []CPUFreqCollectorTopology
|
topology []CPUFreqCollectorTopology
|
||||||
statsProcessedMetrics int64
|
|
||||||
config struct {
|
config struct {
|
||||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
}
|
}
|
||||||
@@ -168,7 +166,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
"package_id": t.physicalPackageID,
|
"package_id": t.physicalPackageID,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -205,11 +203,9 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now); err == nil {
|
if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now); err == nil {
|
||||||
m.statsProcessedMetrics++
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *CPUFreqCollector) Close() {
|
func (m *CPUFreqCollector) Close() {
|
||||||
|
@@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const CPUSTATFILE = `/proc/stat`
|
const CPUSTATFILE = `/proc/stat`
|
||||||
@@ -26,7 +25,6 @@ type CpustatCollector struct {
|
|||||||
matches map[string]int
|
matches map[string]int
|
||||||
cputags map[string]map[string]string
|
cputags map[string]map[string]string
|
||||||
nodetags map[string]string
|
nodetags map[string]string
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *CpustatCollector) Init(config json.RawMessage) error {
|
func (m *CpustatCollector) Init(config json.RawMessage) error {
|
||||||
@@ -88,7 +86,6 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
|
|||||||
num_cpus++
|
num_cpus++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -109,7 +106,6 @@ func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]st
|
|||||||
for name, value := range values {
|
for name, value := range values {
|
||||||
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": (value * 100.0) / total}, t)
|
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": (value * 100.0) / total}, t)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
m.statsProcessedMetrics++
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -145,10 +141,8 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
time.Now(),
|
time.Now(),
|
||||||
)
|
)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
m.statsProcessedMetrics++
|
|
||||||
output <- num_cpus_metric
|
output <- num_cpus_metric
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *CpustatCollector) Close() {
|
func (m *CpustatCollector) Close() {
|
||||||
|
@@ -10,7 +10,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -29,9 +28,6 @@ type CustomCmdCollector struct {
|
|||||||
config CustomCmdCollectorConfig
|
config CustomCmdCollectorConfig
|
||||||
commands []string
|
commands []string
|
||||||
files []string
|
files []string
|
||||||
statsProcessedMetrics int64
|
|
||||||
statsProcessedCommands int64
|
|
||||||
statsProcessedFiles int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *CustomCmdCollector) Init(config json.RawMessage) error {
|
func (m *CustomCmdCollector) Init(config json.RawMessage) error {
|
||||||
@@ -70,9 +66,6 @@ func (m *CustomCmdCollector) Init(config json.RawMessage) error {
|
|||||||
m.handler = influx.NewMetricHandler()
|
m.handler = influx.NewMetricHandler()
|
||||||
m.parser = influx.NewParser(m.handler)
|
m.parser = influx.NewParser(m.handler)
|
||||||
m.parser.SetTimeFunc(DefaultTime)
|
m.parser.SetTimeFunc(DefaultTime)
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.statsProcessedFiles = 0
|
|
||||||
m.statsProcessedCommands = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -107,13 +100,9 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetri
|
|||||||
|
|
||||||
y := lp.FromInfluxMetric(c)
|
y := lp.FromInfluxMetric(c)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
m.statsProcessedMetrics++
|
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.statsProcessedCommands++
|
|
||||||
stats.ComponentStatInt(m.name, "processed_commands", m.statsProcessedCommands)
|
|
||||||
}
|
}
|
||||||
for _, file := range m.files {
|
for _, file := range m.files {
|
||||||
buffer, err := ioutil.ReadFile(file)
|
buffer, err := ioutil.ReadFile(file)
|
||||||
@@ -133,13 +122,9 @@ func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetri
|
|||||||
}
|
}
|
||||||
y := lp.FromInfluxMetric(f)
|
y := lp.FromInfluxMetric(f)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
m.statsProcessedMetrics++
|
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.statsProcessedFiles++
|
|
||||||
stats.ComponentStatInt(m.name, "processed_files", m.statsProcessedFiles)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// "log"
|
// "log"
|
||||||
@@ -24,8 +23,9 @@ type DiskstatCollectorConfig struct {
|
|||||||
|
|
||||||
type DiskstatCollector struct {
|
type DiskstatCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
config DiskstatCollectorConfig
|
//matches map[string]int
|
||||||
statsProcessedMetrics int64
|
config IOstatCollectorConfig
|
||||||
|
//devices map[string]IOstatCollectorEntry
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *DiskstatCollector) Init(config json.RawMessage) error {
|
func (m *DiskstatCollector) Init(config json.RawMessage) error {
|
||||||
@@ -44,7 +44,6 @@ func (m *DiskstatCollector) Init(config json.RawMessage) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -90,16 +89,12 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric
|
|||||||
y, err := lp.New("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now())
|
y, err := lp.New("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "GBytes")
|
y.AddMeta("unit", "GBytes")
|
||||||
m.statsProcessedMetrics++
|
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000000000)
|
free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000000000)
|
||||||
y, err = lp.New("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now())
|
y, err = lp.New("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "GBytes")
|
y.AddMeta("unit", "GBytes")
|
||||||
m.statsProcessedMetrics++
|
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
perc := (100 * (total - free)) / total
|
perc := (100 * (total - free)) / total
|
||||||
@@ -110,8 +105,6 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric
|
|||||||
y, err := lp.New("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": int(part_max_used)}, time.Now())
|
y, err := lp.New("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": int(part_max_used)}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "percent")
|
y.AddMeta("unit", "percent")
|
||||||
m.statsProcessedMetrics++
|
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -15,7 +15,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const DEFAULT_GPFS_CMD = "mmpmon"
|
const DEFAULT_GPFS_CMD = "mmpmon"
|
||||||
@@ -36,7 +35,6 @@ type GpfsCollector struct {
|
|||||||
skipFS map[string]struct{}
|
skipFS map[string]struct{}
|
||||||
lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
|
lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
|
||||||
lastState map[string]GpfsCollectorLastState
|
lastState map[string]GpfsCollectorLastState
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *GpfsCollector) Init(config json.RawMessage) error {
|
func (m *GpfsCollector) Init(config json.RawMessage) error {
|
||||||
@@ -88,7 +86,7 @@ func (m *GpfsCollector) Init(config json.RawMessage) error {
|
|||||||
return fmt.Errorf("failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err)
|
return fmt.Errorf("failed to find mmpmon binary '%s': %v", m.config.Mmpmon, err)
|
||||||
}
|
}
|
||||||
m.config.Mmpmon = p
|
m.config.Mmpmon = p
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -213,14 +211,12 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
}
|
}
|
||||||
if y, err := lp.New("gpfs_bytes_read", m.tags, m.meta, map[string]interface{}{"value": bytesRead}, timestamp); err == nil {
|
if y, err := lp.New("gpfs_bytes_read", m.tags, m.meta, map[string]interface{}{"value": bytesRead}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
if m.config.SendBandwidths {
|
if m.config.SendBandwidths {
|
||||||
if lastBytesRead := m.lastState[filesystem].bytesRead; lastBytesRead >= 0 {
|
if lastBytesRead := m.lastState[filesystem].bytesRead; lastBytesRead >= 0 {
|
||||||
bwRead := float64(bytesRead-lastBytesRead) / timeDiff
|
bwRead := float64(bytesRead-lastBytesRead) / timeDiff
|
||||||
if y, err := lp.New("gpfs_bw_read", m.tags, m.meta, map[string]interface{}{"value": bwRead}, timestamp); err == nil {
|
if y, err := lp.New("gpfs_bw_read", m.tags, m.meta, map[string]interface{}{"value": bwRead}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -235,14 +231,12 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
}
|
}
|
||||||
if y, err := lp.New("gpfs_bytes_written", m.tags, m.meta, map[string]interface{}{"value": bytesWritten}, timestamp); err == nil {
|
if y, err := lp.New("gpfs_bytes_written", m.tags, m.meta, map[string]interface{}{"value": bytesWritten}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
if m.config.SendBandwidths {
|
if m.config.SendBandwidths {
|
||||||
if lastBytesWritten := m.lastState[filesystem].bytesRead; lastBytesWritten >= 0 {
|
if lastBytesWritten := m.lastState[filesystem].bytesRead; lastBytesWritten >= 0 {
|
||||||
bwWrite := float64(bytesWritten-lastBytesWritten) / timeDiff
|
bwWrite := float64(bytesWritten-lastBytesWritten) / timeDiff
|
||||||
if y, err := lp.New("gpfs_bw_write", m.tags, m.meta, map[string]interface{}{"value": bwWrite}, timestamp); err == nil {
|
if y, err := lp.New("gpfs_bw_write", m.tags, m.meta, map[string]interface{}{"value": bwWrite}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -264,7 +258,6 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
}
|
}
|
||||||
if y, err := lp.New("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
|
if y, err := lp.New("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// number of closes
|
// number of closes
|
||||||
@@ -277,7 +270,6 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
}
|
}
|
||||||
if y, err := lp.New("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
|
if y, err := lp.New("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// number of reads
|
// number of reads
|
||||||
@@ -290,7 +282,6 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
}
|
}
|
||||||
if y, err := lp.New("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
|
if y, err := lp.New("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// number of writes
|
// number of writes
|
||||||
@@ -303,7 +294,6 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
}
|
}
|
||||||
if y, err := lp.New("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
|
if y, err := lp.New("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// number of read directories
|
// number of read directories
|
||||||
@@ -316,7 +306,6 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
}
|
}
|
||||||
if y, err := lp.New("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
|
if y, err := lp.New("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Number of inode updates
|
// Number of inode updates
|
||||||
@@ -328,11 +317,9 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if y, err := lp.New("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
|
if y, err := lp.New("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
|
||||||
m.statsProcessedMetrics++
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *GpfsCollector) Close() {
|
func (m *GpfsCollector) Close() {
|
||||||
|
@@ -7,7 +7,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
@@ -19,16 +18,11 @@ import (
|
|||||||
|
|
||||||
const IB_BASEPATH = "/sys/class/infiniband/"
|
const IB_BASEPATH = "/sys/class/infiniband/"
|
||||||
|
|
||||||
type InfinibandCollectorMetric struct {
|
|
||||||
path string
|
|
||||||
unit string
|
|
||||||
}
|
|
||||||
|
|
||||||
type InfinibandCollectorInfo struct {
|
type InfinibandCollectorInfo struct {
|
||||||
LID string // IB local Identifier (LID)
|
LID string // IB local Identifier (LID)
|
||||||
device string // IB device
|
device string // IB device
|
||||||
port string // IB device port
|
port string // IB device port
|
||||||
portCounterFiles map[string]InfinibandCollectorMetric // mapping counter name -> InfinibandCollectorMetric
|
portCounterFiles map[string]string // mapping counter name -> sysfs file
|
||||||
tagSet map[string]string // corresponding tag list
|
tagSet map[string]string // corresponding tag list
|
||||||
lastState map[string]int64 // State from last measurement
|
lastState map[string]int64 // State from last measurement
|
||||||
}
|
}
|
||||||
@@ -42,7 +36,6 @@ type InfinibandCollector struct {
|
|||||||
}
|
}
|
||||||
info []*InfinibandCollectorInfo
|
info []*InfinibandCollectorInfo
|
||||||
lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
|
lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init initializes the Infiniband collector by walking through files below IB_BASEPATH
|
// Init initializes the Infiniband collector by walking through files below IB_BASEPATH
|
||||||
@@ -113,16 +106,16 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
|||||||
|
|
||||||
// Check access to counter files
|
// Check access to counter files
|
||||||
countersDir := filepath.Join(path, "counters")
|
countersDir := filepath.Join(path, "counters")
|
||||||
portCounterFiles := map[string]InfinibandCollectorMetric{
|
portCounterFiles := map[string]string{
|
||||||
"ib_recv": {path: filepath.Join(countersDir, "port_rcv_data"), unit: "bytes"},
|
"ib_recv": filepath.Join(countersDir, "port_rcv_data"),
|
||||||
"ib_xmit": {path: filepath.Join(countersDir, "port_xmit_data"), unit: "bytes"},
|
"ib_xmit": filepath.Join(countersDir, "port_xmit_data"),
|
||||||
"ib_recv_pkts": {path: filepath.Join(countersDir, "port_rcv_packets"), unit: "packets"},
|
"ib_recv_pkts": filepath.Join(countersDir, "port_rcv_packets"),
|
||||||
"ib_xmit_pkts": {path: filepath.Join(countersDir, "port_xmit_packets"), unit: "packets"},
|
"ib_xmit_pkts": filepath.Join(countersDir, "port_xmit_packets"),
|
||||||
}
|
}
|
||||||
for _, counter := range portCounterFiles {
|
for _, counterFile := range portCounterFiles {
|
||||||
err := unix.Access(counter.path, unix.R_OK)
|
err := unix.Access(counterFile, unix.R_OK)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to access %s: %v", counter.path, err)
|
return fmt.Errorf("unable to access %s: %v", counterFile, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -151,7 +144,7 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
|||||||
if len(m.info) == 0 {
|
if len(m.info) == 0 {
|
||||||
return fmt.Errorf("found no IB devices")
|
return fmt.Errorf("found no IB devices")
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -172,14 +165,14 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
m.lastTimestamp = now
|
m.lastTimestamp = now
|
||||||
|
|
||||||
for _, info := range m.info {
|
for _, info := range m.info {
|
||||||
for counterName, counterDef := range info.portCounterFiles {
|
for counterName, counterFile := range info.portCounterFiles {
|
||||||
|
|
||||||
// Read counter file
|
// Read counter file
|
||||||
line, err := ioutil.ReadFile(counterDef.path)
|
line, err := ioutil.ReadFile(counterFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(
|
cclog.ComponentError(
|
||||||
m.name,
|
m.name,
|
||||||
fmt.Sprintf("Read(): Failed to read from file '%s': %v", counterDef.path, err))
|
fmt.Sprintf("Read(): Failed to read from file '%s': %v", counterFile, err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
data := strings.TrimSpace(string(line))
|
data := strings.TrimSpace(string(line))
|
||||||
@@ -196,9 +189,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
// Send absolut values
|
// Send absolut values
|
||||||
if m.config.SendAbsoluteValues {
|
if m.config.SendAbsoluteValues {
|
||||||
if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil {
|
if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil {
|
||||||
y.AddMeta("unit", counterDef.unit)
|
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -207,9 +198,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
if info.lastState[counterName] >= 0 {
|
if info.lastState[counterName] >= 0 {
|
||||||
rate := float64((v - info.lastState[counterName])) / timeDiff
|
rate := float64((v - info.lastState[counterName])) / timeDiff
|
||||||
if y, err := lp.New(counterName+"_bw", info.tagSet, m.meta, map[string]interface{}{"value": rate}, now); err == nil {
|
if y, err := lp.New(counterName+"_bw", info.tagSet, m.meta, map[string]interface{}{"value": rate}, now); err == nil {
|
||||||
y.AddMeta("unit", counterDef.unit+"/sec")
|
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Save current state
|
// Save current state
|
||||||
@@ -218,7 +207,6 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *InfinibandCollector) Close() {
|
func (m *InfinibandCollector) Close() {
|
||||||
|
@@ -6,7 +6,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
|
|
||||||
// "log"
|
// "log"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
@@ -33,7 +32,6 @@ type IOstatCollector struct {
|
|||||||
matches map[string]int
|
matches map[string]int
|
||||||
config IOstatCollectorConfig
|
config IOstatCollectorConfig
|
||||||
devices map[string]IOstatCollectorEntry
|
devices map[string]IOstatCollectorEntry
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *IOstatCollector) Init(config json.RawMessage) error {
|
func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||||
@@ -104,7 +102,6 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
|||||||
lastValues: values,
|
lastValues: values,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -144,7 +141,6 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
y, err := lp.New(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
|
y, err := lp.New(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
entry.lastValues[name] = x
|
entry.lastValues[name] = x
|
||||||
@@ -152,7 +148,6 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
}
|
}
|
||||||
m.devices[device] = entry
|
m.devices[device] = entry
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *IOstatCollector) Close() {
|
func (m *IOstatCollector) Close() {
|
||||||
|
@@ -11,7 +11,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const IPMITOOL_PATH = `ipmitool`
|
const IPMITOOL_PATH = `ipmitool`
|
||||||
@@ -30,7 +29,6 @@ type IpmiCollector struct {
|
|||||||
config IpmiCollectorConfig
|
config IpmiCollectorConfig
|
||||||
ipmitool string
|
ipmitool string
|
||||||
ipmisensors string
|
ipmisensors string
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *IpmiCollector) Init(config json.RawMessage) error {
|
func (m *IpmiCollector) Init(config json.RawMessage) error {
|
||||||
@@ -58,7 +56,6 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
|
|||||||
if len(m.ipmitool) == 0 && len(m.ipmisensors) == 0 {
|
if len(m.ipmitool) == 0 && len(m.ipmisensors) == 0 {
|
||||||
return errors.New("no IPMI reader found")
|
return errors.New("no IPMI reader found")
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -97,7 +94,6 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", unit)
|
y.AddMeta("unit", unit)
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -127,7 +123,6 @@ func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
|
|||||||
y.AddMeta("unit", lv[4])
|
y.AddMeta("unit", lv[4])
|
||||||
}
|
}
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -146,7 +141,6 @@ func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
m.readIpmiSensors(m.config.IpmisensorsPath, output)
|
m.readIpmiSensors(m.config.IpmisensorsPath, output)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *IpmiCollector) Close() {
|
func (m *IpmiCollector) Close() {
|
||||||
|
@@ -16,7 +16,6 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"sort"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -28,7 +27,6 @@ import (
|
|||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
||||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -56,7 +54,6 @@ type LikwidEventsetConfig struct {
|
|||||||
gid C.int
|
gid C.int
|
||||||
eorder []*C.char
|
eorder []*C.char
|
||||||
estr *C.char
|
estr *C.char
|
||||||
go_estr string
|
|
||||||
results map[int]map[string]interface{}
|
results map[int]map[string]interface{}
|
||||||
metrics map[int]map[string]float64
|
metrics map[int]map[string]float64
|
||||||
}
|
}
|
||||||
@@ -85,9 +82,6 @@ type LikwidCollector struct {
|
|||||||
initialized bool
|
initialized bool
|
||||||
likwidGroups map[C.int]LikwidEventsetConfig
|
likwidGroups map[C.int]LikwidEventsetConfig
|
||||||
lock sync.Mutex
|
lock sync.Mutex
|
||||||
statsMeasurements int64
|
|
||||||
statsProcessedMetrics int64
|
|
||||||
statsPublishedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type LikwidMetric struct {
|
type LikwidMetric struct {
|
||||||
@@ -107,14 +101,8 @@ func eventsToEventStr(events map[string]string) string {
|
|||||||
|
|
||||||
func genLikwidEventSet(input LikwidCollectorEventsetConfig) LikwidEventsetConfig {
|
func genLikwidEventSet(input LikwidCollectorEventsetConfig) LikwidEventsetConfig {
|
||||||
tmplist := make([]string, 0)
|
tmplist := make([]string, 0)
|
||||||
clist := make([]string, 0)
|
|
||||||
for k := range input.Events {
|
|
||||||
clist = append(clist, k)
|
|
||||||
}
|
|
||||||
sort.Strings(clist)
|
|
||||||
elist := make([]*C.char, 0)
|
elist := make([]*C.char, 0)
|
||||||
for _, k := range clist {
|
for k, v := range input.Events {
|
||||||
v := input.Events[k]
|
|
||||||
tmplist = append(tmplist, fmt.Sprintf("%s:%s", v, k))
|
tmplist = append(tmplist, fmt.Sprintf("%s:%s", v, k))
|
||||||
c_counter := C.CString(k)
|
c_counter := C.CString(k)
|
||||||
elist = append(elist, c_counter)
|
elist = append(elist, c_counter)
|
||||||
@@ -136,7 +124,6 @@ func genLikwidEventSet(input LikwidCollectorEventsetConfig) LikwidEventsetConfig
|
|||||||
gid: -1,
|
gid: -1,
|
||||||
eorder: elist,
|
eorder: elist,
|
||||||
estr: C.CString(estr),
|
estr: C.CString(estr),
|
||||||
go_estr: estr,
|
|
||||||
results: res,
|
results: res,
|
||||||
metrics: met,
|
metrics: met,
|
||||||
}
|
}
|
||||||
@@ -206,7 +193,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
m.setup()
|
m.setup()
|
||||||
|
|
||||||
m.meta = map[string]string{"group": "PerfCounter"}
|
m.meta = map[string]string{"source": m.name, "group": "PerfCounter"}
|
||||||
cclog.ComponentDebug(m.name, "Get cpulist and init maps and lists")
|
cclog.ComponentDebug(m.name, "Get cpulist and init maps and lists")
|
||||||
cpulist := topo.CpuList()
|
cpulist := topo.CpuList()
|
||||||
m.cpulist = make([]C.int, len(cpulist))
|
m.cpulist = make([]C.int, len(cpulist))
|
||||||
@@ -271,9 +258,6 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
|||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
m.statsMeasurements = 0
|
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.statsPublishedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -281,7 +265,6 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
|||||||
// take a measurement for 'interval' seconds of event set index 'group'
|
// take a measurement for 'interval' seconds of event set index 'group'
|
||||||
func (m *LikwidCollector) takeMeasurement(evset LikwidEventsetConfig, interval time.Duration) (bool, error) {
|
func (m *LikwidCollector) takeMeasurement(evset LikwidEventsetConfig, interval time.Duration) (bool, error) {
|
||||||
var ret C.int
|
var ret C.int
|
||||||
|
|
||||||
m.lock.Lock()
|
m.lock.Lock()
|
||||||
if m.initialized {
|
if m.initialized {
|
||||||
ret = C.perfmon_setupCounters(evset.gid)
|
ret = C.perfmon_setupCounters(evset.gid)
|
||||||
@@ -325,8 +308,6 @@ func (m *LikwidCollector) takeMeasurement(evset LikwidEventsetConfig, interval t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.lock.Unlock()
|
m.lock.Unlock()
|
||||||
m.statsMeasurements++
|
|
||||||
stats.ComponentStatInt(m.name, "measurements", m.statsMeasurements)
|
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -367,8 +348,6 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
|
|||||||
if m.config.InvalidToZero && math.IsInf(value, 0) {
|
if m.config.InvalidToZero && math.IsInf(value, 0) {
|
||||||
value = 0.0
|
value = 0.0
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics++
|
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
// Now we have the result, send it with the proper tags
|
// Now we have the result, send it with the proper tags
|
||||||
if !math.IsNaN(value) {
|
if !math.IsNaN(value) {
|
||||||
if metric.Publish {
|
if metric.Publish {
|
||||||
@@ -381,8 +360,6 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
|
|||||||
if len(metric.Unit) > 0 {
|
if len(metric.Unit) > 0 {
|
||||||
y.AddMeta("unit", metric.Unit)
|
y.AddMeta("unit", metric.Unit)
|
||||||
}
|
}
|
||||||
m.statsPublishedMetrics++
|
|
||||||
stats.ComponentStatInt(m.name, "published_metrics", m.statsPublishedMetrics)
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -423,8 +400,6 @@ func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration, output chan
|
|||||||
if m.config.InvalidToZero && math.IsInf(value, 0) {
|
if m.config.InvalidToZero && math.IsInf(value, 0) {
|
||||||
value = 0.0
|
value = 0.0
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics++
|
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
// Now we have the result, send it with the proper tags
|
// Now we have the result, send it with the proper tags
|
||||||
if !math.IsNaN(value) {
|
if !math.IsNaN(value) {
|
||||||
if metric.Publish {
|
if metric.Publish {
|
||||||
@@ -438,8 +413,6 @@ func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration, output chan
|
|||||||
if len(metric.Unit) > 0 {
|
if len(metric.Unit) > 0 {
|
||||||
y.AddMeta("unit", metric.Unit)
|
y.AddMeta("unit", metric.Unit)
|
||||||
}
|
}
|
||||||
m.statsPublishedMetrics++
|
|
||||||
stats.ComponentStatInt(m.name, "published_metrics", m.statsPublishedMetrics)
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -452,9 +425,6 @@ func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration, output chan
|
|||||||
|
|
||||||
func (m *LikwidCollector) LateInit() error {
|
func (m *LikwidCollector) LateInit() error {
|
||||||
var ret C.int
|
var ret C.int
|
||||||
if m.initialized {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
switch m.config.AccessMode {
|
switch m.config.AccessMode {
|
||||||
case "direct":
|
case "direct":
|
||||||
C.HPMmode(0)
|
C.HPMmode(0)
|
||||||
@@ -505,17 +475,7 @@ func (m *LikwidCollector) LateInit() error {
|
|||||||
for i, evset := range m.config.Eventsets {
|
for i, evset := range m.config.Eventsets {
|
||||||
var gid C.int
|
var gid C.int
|
||||||
if len(evset.Events) > 0 {
|
if len(evset.Events) > 0 {
|
||||||
skip := false
|
|
||||||
likwidGroup := genLikwidEventSet(evset)
|
likwidGroup := genLikwidEventSet(evset)
|
||||||
for _, g := range m.likwidGroups {
|
|
||||||
if likwidGroup.go_estr == g.go_estr {
|
|
||||||
skip = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if skip {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Now we add the list of events to likwid
|
// Now we add the list of events to likwid
|
||||||
gid = C.perfmon_addEventSet(likwidGroup.estr)
|
gid = C.perfmon_addEventSet(likwidGroup.estr)
|
||||||
if gid >= 0 {
|
if gid >= 0 {
|
||||||
@@ -560,14 +520,9 @@ func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !m.initialized {
|
if !m.initialized {
|
||||||
m.lock.Lock()
|
if m.LateInit() != nil {
|
||||||
err = m.LateInit()
|
|
||||||
if err != nil {
|
|
||||||
m.lock.Unlock()
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
m.initialized = true
|
|
||||||
m.lock.Unlock()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if m.initialized && !skip {
|
if m.initialized && !skip {
|
||||||
|
@@ -10,7 +10,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -33,7 +32,6 @@ type LoadavgCollector struct {
|
|||||||
config struct {
|
config struct {
|
||||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
}
|
}
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
||||||
@@ -65,7 +63,6 @@ func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
|||||||
for i, name := range m.proc_matches {
|
for i, name := range m.proc_matches {
|
||||||
_, m.proc_skips[i] = stringArrayContains(m.config.ExcludeMetrics, name)
|
_, m.proc_skips[i] = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -101,7 +98,6 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,10 +117,9 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LoadavgCollector) Close() {
|
func (m *LoadavgCollector) Close() {
|
||||||
|
@@ -12,7 +12,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const LUSTRE_SYSFS = `/sys/fs/lustre`
|
const LUSTRE_SYSFS = `/sys/fs/lustre`
|
||||||
@@ -45,7 +44,6 @@ type LustreCollector struct {
|
|||||||
lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
|
lastTimestamp time.Time // Store time stamp of last tick to derive bandwidths
|
||||||
definitions []LustreMetricDefinition // Combined list without excluded metrics
|
definitions []LustreMetricDefinition // Combined list without excluded metrics
|
||||||
stats map[string]map[string]int64 // Data for last value per device and metric
|
stats map[string]map[string]int64 // Data for last value per device and metric
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LustreCollector) getDeviceDataCommand(device string) []string {
|
func (m *LustreCollector) getDeviceDataCommand(device string) []string {
|
||||||
@@ -374,7 +372,6 @@ func (m *LustreCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.lastTimestamp = time.Now()
|
m.lastTimestamp = time.Now()
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -421,13 +418,11 @@ func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
y.AddMeta("unit", def.unit)
|
y.AddMeta("unit", def.unit)
|
||||||
}
|
}
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
devData[def.name] = use_x
|
devData[def.name] = use_x
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.lastTimestamp = now
|
m.lastTimestamp = now
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LustreCollector) Close() {
|
func (m *LustreCollector) Close() {
|
||||||
|
@@ -14,7 +14,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const MEMSTATFILE = "/proc/meminfo"
|
const MEMSTATFILE = "/proc/meminfo"
|
||||||
@@ -39,7 +38,6 @@ type MemstatCollector struct {
|
|||||||
config MemstatCollectorConfig
|
config MemstatCollectorConfig
|
||||||
nodefiles map[int]MemstatCollectorNode
|
nodefiles map[int]MemstatCollectorNode
|
||||||
sendMemUsed bool
|
sendMemUsed bool
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type MemstatStats struct {
|
type MemstatStats struct {
|
||||||
@@ -155,7 +153,6 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -181,7 +178,6 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if len(unit) > 0 {
|
if len(unit) > 0 {
|
||||||
y.AddMeta("unit", unit)
|
y.AddMeta("unit", unit)
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics++
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -211,7 +207,6 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if len(unit) > 0 {
|
if len(unit) > 0 {
|
||||||
y.AddMeta("unit", unit)
|
y.AddMeta("unit", unit)
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics++
|
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -228,7 +223,6 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
sendStats(stats, nodeConf.tags)
|
sendStats(stats, nodeConf.tags)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "collected_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MemstatCollector) Close() {
|
func (m *MemstatCollector) Close() {
|
||||||
|
@@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const NETSTATFILE = "/proc/net/dev"
|
const NETSTATFILE = "/proc/net/dev"
|
||||||
@@ -36,7 +35,6 @@ type NetstatCollector struct {
|
|||||||
config NetstatCollectorConfig
|
config NetstatCollectorConfig
|
||||||
matches map[string][]NetstatCollectorMetric
|
matches map[string][]NetstatCollectorMetric
|
||||||
lastTimestamp time.Time
|
lastTimestamp time.Time
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *NetstatCollector) Init(config json.RawMessage) error {
|
func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||||
@@ -150,7 +148,6 @@ func (m *NetstatCollector) Init(config json.RawMessage) error {
|
|||||||
if len(m.matches) == 0 {
|
if len(m.matches) == 0 {
|
||||||
return errors.New("no devices to collector metrics found")
|
return errors.New("no devices to collector metrics found")
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -201,7 +198,6 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if m.config.SendAbsoluteValues {
|
if m.config.SendAbsoluteValues {
|
||||||
if y, err := lp.New(metric.name, metric.tags, metric.meta, map[string]interface{}{"value": v}, now); err == nil {
|
if y, err := lp.New(metric.name, metric.tags, metric.meta, map[string]interface{}{"value": v}, now); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if m.config.SendDerivedValues {
|
if m.config.SendDerivedValues {
|
||||||
@@ -209,7 +205,6 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
rate := float64(v-metric.lastValue) / timeDiff
|
rate := float64(v-metric.lastValue) / timeDiff
|
||||||
if y, err := lp.New(metric.name+"_bw", metric.tags, metric.meta_rates, map[string]interface{}{"value": rate}, now); err == nil {
|
if y, err := lp.New(metric.name+"_bw", metric.tags, metric.meta_rates, map[string]interface{}{"value": rate}, now); err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
metric.lastValue = v
|
metric.lastValue = v
|
||||||
@@ -217,7 +212,6 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *NetstatCollector) Close() {
|
func (m *NetstatCollector) Close() {
|
||||||
|
@@ -12,7 +12,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// First part contains the code for the general NfsCollector.
|
// First part contains the code for the general NfsCollector.
|
||||||
@@ -34,7 +33,6 @@ type nfsCollector struct {
|
|||||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
}
|
}
|
||||||
data map[string]NfsCollectorData
|
data map[string]NfsCollectorData
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *nfsCollector) initStats() error {
|
func (m *nfsCollector) initStats() error {
|
||||||
@@ -115,7 +113,6 @@ func (m *nfsCollector) MainInit(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
m.data = make(map[string]NfsCollectorData)
|
m.data = make(map[string]NfsCollectorData)
|
||||||
m.initStats()
|
m.initStats()
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -146,10 +143,8 @@ func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("version", m.version)
|
y.AddMeta("version", m.version)
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *nfsCollector) Close() {
|
func (m *nfsCollector) Close() {
|
||||||
|
@@ -12,7 +12,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -46,7 +45,6 @@ type NUMAStatsCollectorTopolgy struct {
|
|||||||
type NUMAStatsCollector struct {
|
type NUMAStatsCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
topology []NUMAStatsCollectorTopolgy
|
topology []NUMAStatsCollectorTopolgy
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
||||||
@@ -82,7 +80,7 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
|||||||
tagSet: map[string]string{"memoryDomain": node},
|
tagSet: map[string]string{"memoryDomain": node},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -129,13 +127,11 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMetri
|
|||||||
)
|
)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
file.Close()
|
file.Close()
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "collected_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *NUMAStatsCollector) Close() {
|
func (m *NUMAStatsCollector) Close() {
|
||||||
|
@@ -9,7 +9,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -30,7 +29,6 @@ type NvidiaCollector struct {
|
|||||||
num_gpus int
|
num_gpus int
|
||||||
config NvidiaCollectorConfig
|
config NvidiaCollectorConfig
|
||||||
gpus []NvidiaCollectorDevice
|
gpus []NvidiaCollectorDevice
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *NvidiaCollector) CatchPanic() {
|
func (m *NvidiaCollector) CatchPanic() {
|
||||||
@@ -122,7 +120,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
|||||||
pciInfo.Device)
|
pciInfo.Device)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -153,7 +151,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !device.excludeMetrics["nv_mem_util"] {
|
if !device.excludeMetrics["nv_mem_util"] {
|
||||||
@@ -161,7 +158,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -190,7 +186,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MByte")
|
y.AddMeta("unit", "MByte")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -200,7 +195,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MByte")
|
y.AddMeta("unit", "MByte")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -218,7 +212,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "degC")
|
y.AddMeta("unit", "degC")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -239,7 +232,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -266,13 +258,11 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
}
|
}
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
} else if ret == nvml.ERROR_NOT_SUPPORTED {
|
} else if ret == nvml.ERROR_NOT_SUPPORTED {
|
||||||
y, err := lp.New("nv_ecc_mode", device.tags, m.meta, map[string]interface{}{"value": "N/A"}, time.Now())
|
y, err := lp.New("nv_ecc_mode", device.tags, m.meta, map[string]interface{}{"value": "N/A"}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -290,7 +280,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
y, err := lp.New("nv_perf_state", device.tags, m.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pState))}, time.Now())
|
y, err := lp.New("nv_perf_state", device.tags, m.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pState))}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -307,7 +296,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "watts")
|
y.AddMeta("unit", "watts")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -325,7 +313,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -337,7 +324,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -349,7 +335,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -372,7 +357,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -384,7 +368,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -396,7 +379,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -416,7 +398,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
y, err := lp.New("nv_ecc_db_error", device.tags, m.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now())
|
y, err := lp.New("nv_ecc_db_error", device.tags, m.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -427,7 +408,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
y, err := lp.New("nv_ecc_sb_error", device.tags, m.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now())
|
y, err := lp.New("nv_ecc_sb_error", device.tags, m.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -445,7 +425,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "watts")
|
y.AddMeta("unit", "watts")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -462,7 +441,6 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -479,12 +457,11 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "collected_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *NvidiaCollector) Close() {
|
func (m *NvidiaCollector) Close() {
|
||||||
|
@@ -6,7 +6,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// These are the fields we read from the JSON configuration
|
// These are the fields we read from the JSON configuration
|
||||||
@@ -21,7 +20,6 @@ type SampleCollector struct {
|
|||||||
config SampleTimerCollectorConfig // the configuration structure
|
config SampleTimerCollectorConfig // the configuration structure
|
||||||
meta map[string]string // default meta information
|
meta map[string]string // default meta information
|
||||||
tags map[string]string // default tags
|
tags map[string]string // default tags
|
||||||
statsCount int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Functions to implement MetricCollector interface
|
// Functions to implement MetricCollector interface
|
||||||
@@ -60,9 +58,6 @@ func (m *SampleCollector) Init(config json.RawMessage) error {
|
|||||||
// for all topological entities (sockets, NUMA domains, ...)
|
// for all topological entities (sockets, NUMA domains, ...)
|
||||||
// Return some useful error message in case of any failures
|
// Return some useful error message in case of any failures
|
||||||
|
|
||||||
// Initialize counts for statistics
|
|
||||||
m.statsCount = 0
|
|
||||||
|
|
||||||
// Set this flag only if everything is initialized properly, all required files exist, ...
|
// Set this flag only if everything is initialized properly, all required files exist, ...
|
||||||
m.init = true
|
m.init = true
|
||||||
return err
|
return err
|
||||||
@@ -85,11 +80,8 @@ func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
// Send it to output channel
|
// Send it to output channel
|
||||||
output <- y
|
output <- y
|
||||||
// increment count for each sent metric or any other operation
|
|
||||||
m.statsCount++
|
|
||||||
}
|
}
|
||||||
// Set stats for the component
|
|
||||||
stats.ComponentStatInt(m.name, "count", m.statsCount)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close metric collector: close network connection, close files, close libraries, ...
|
// Close metric collector: close network connection, close files, close libraries, ...
|
||||||
|
@@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// See: https://www.kernel.org/doc/html/latest/hwmon/sysfs-interface.html
|
// See: https://www.kernel.org/doc/html/latest/hwmon/sysfs-interface.html
|
||||||
@@ -42,7 +41,6 @@ type TempCollector struct {
|
|||||||
ReportCriticalTemp bool `json:"report_critical_temperature"`
|
ReportCriticalTemp bool `json:"report_critical_temperature"`
|
||||||
}
|
}
|
||||||
sensors []*TempCollectorSensor
|
sensors []*TempCollectorSensor
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *TempCollector) Init(config json.RawMessage) error {
|
func (m *TempCollector) Init(config json.RawMessage) error {
|
||||||
@@ -164,7 +162,6 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Finished initialization
|
// Finished initialization
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -197,7 +194,6 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
)
|
)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// max temperature
|
// max temperature
|
||||||
@@ -211,7 +207,6 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
)
|
)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -226,11 +221,10 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|||||||
)
|
)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *TempCollector) Close() {
|
func (m *TempCollector) Close() {
|
||||||
|
@@ -10,7 +10,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const MAX_NUM_PROCS = 10
|
const MAX_NUM_PROCS = 10
|
||||||
@@ -24,7 +23,6 @@ type TopProcsCollector struct {
|
|||||||
metricCollector
|
metricCollector
|
||||||
tags map[string]string
|
tags map[string]string
|
||||||
config TopProcsCollectorConfig
|
config TopProcsCollectorConfig
|
||||||
statsProcessedMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *TopProcsCollector) Init(config json.RawMessage) error {
|
func (m *TopProcsCollector) Init(config json.RawMessage) error {
|
||||||
@@ -50,7 +48,6 @@ func (m *TopProcsCollector) Init(config json.RawMessage) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.New("failed to execute command")
|
return errors.New("failed to execute command")
|
||||||
}
|
}
|
||||||
m.statsProcessedMetrics = 0
|
|
||||||
m.init = true
|
m.init = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -73,10 +70,8 @@ func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMetric
|
|||||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": string(lines[i])}, time.Now())
|
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": string(lines[i])}, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
m.statsProcessedMetrics++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.ComponentStatInt(m.name, "processed_metrics", m.statsProcessedMetrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *TopProcsCollector) Close() {
|
func (m *TopProcsCollector) Close() {
|
||||||
|
@@ -1,17 +0,0 @@
|
|||||||
# Stats API
|
|
||||||
|
|
||||||
The Stats API can be used for debugging. It publishes counts at an HTTP endpoint as JSON from different componenets of the CC Metric Collector.
|
|
||||||
|
|
||||||
# Configuration
|
|
||||||
|
|
||||||
The Stats API has an own configuration file to specify the listen host and port. The defaults are `localhost` and `8080`.
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"bindhost" : "",
|
|
||||||
"port" : "8080",
|
|
||||||
"publish_collectorstate" : true
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The `bindhost` and `port` can be used to specify the listen host and port. The `publish_collectorstate` needs to be `true`, otherwise nothing is presented. This option is for future use if we need to publish more infos using different domains.
|
|
@@ -1,232 +0,0 @@
|
|||||||
package metricRouter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
"os"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
|
||||||
mct "github.com/ClusterCockpit/cc-metric-collector/internal/multiChanTicker"
|
|
||||||
"github.com/gorilla/mux"
|
|
||||||
)
|
|
||||||
|
|
||||||
type statsApiConfig struct {
|
|
||||||
PublishCollectorState bool `json:"publish_collectorstate"`
|
|
||||||
Host string `json:"bindhost"`
|
|
||||||
Port string `json:"port"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Metric cache data structure
|
|
||||||
type statsApi struct {
|
|
||||||
name string
|
|
||||||
input chan lp.CCMetric
|
|
||||||
indone chan bool
|
|
||||||
outdone chan bool
|
|
||||||
config statsApiConfig
|
|
||||||
wg *sync.WaitGroup
|
|
||||||
statsWg sync.WaitGroup
|
|
||||||
ticker mct.MultiChanTicker
|
|
||||||
tickchan chan time.Time
|
|
||||||
server *http.Server
|
|
||||||
router *mux.Router
|
|
||||||
lock sync.Mutex
|
|
||||||
baseurl string
|
|
||||||
stats map[string]map[string]int64
|
|
||||||
outStats map[string]map[string]int64
|
|
||||||
}
|
|
||||||
|
|
||||||
type StatsApi interface {
|
|
||||||
Start()
|
|
||||||
Close()
|
|
||||||
StatsFunc(w http.ResponseWriter, r *http.Request)
|
|
||||||
}
|
|
||||||
|
|
||||||
var statsApiServer *statsApi = nil
|
|
||||||
|
|
||||||
func (a *statsApi) updateStats(point lp.CCMetric) {
|
|
||||||
switch point.Name() {
|
|
||||||
case "_stats":
|
|
||||||
if name, nok := point.GetMeta("source"); nok {
|
|
||||||
var compStats map[string]int64
|
|
||||||
var ok bool
|
|
||||||
|
|
||||||
if compStats, ok = a.stats[name]; !ok {
|
|
||||||
a.stats[name] = make(map[string]int64)
|
|
||||||
compStats = a.stats[name]
|
|
||||||
}
|
|
||||||
for k, v := range point.Fields() {
|
|
||||||
switch value := v.(type) {
|
|
||||||
case int:
|
|
||||||
compStats[k] = int64(value)
|
|
||||||
case uint:
|
|
||||||
compStats[k] = int64(value)
|
|
||||||
case int32:
|
|
||||||
compStats[k] = int64(value)
|
|
||||||
case uint32:
|
|
||||||
compStats[k] = int64(value)
|
|
||||||
case int64:
|
|
||||||
compStats[k] = int64(value)
|
|
||||||
case uint64:
|
|
||||||
compStats[k] = int64(value)
|
|
||||||
default:
|
|
||||||
cclog.ComponentDebug(a.name, "Unusable stats for", k, ". Values should be int64")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
a.stats[name] = compStats
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *statsApi) Start() {
|
|
||||||
a.ticker.AddChannel(a.tickchan)
|
|
||||||
a.wg.Add(1)
|
|
||||||
a.statsWg.Add(1)
|
|
||||||
go func() {
|
|
||||||
a.stats = make(map[string]map[string]int64)
|
|
||||||
defer a.statsWg.Done()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-a.indone:
|
|
||||||
cclog.ComponentDebug(a.name, "INPUT DONE")
|
|
||||||
close(a.indone)
|
|
||||||
return
|
|
||||||
case p := <-a.input:
|
|
||||||
a.lock.Lock()
|
|
||||||
a.updateStats(p)
|
|
||||||
a.lock.Unlock()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
a.statsWg.Add(1)
|
|
||||||
go func() {
|
|
||||||
a.outStats = make(map[string]map[string]int64)
|
|
||||||
defer a.statsWg.Done()
|
|
||||||
a.lock.Lock()
|
|
||||||
for comp, compData := range a.stats {
|
|
||||||
var outData map[string]int64
|
|
||||||
var ok bool
|
|
||||||
if outData, ok = a.outStats[comp]; !ok {
|
|
||||||
outData = make(map[string]int64)
|
|
||||||
}
|
|
||||||
for k, v := range compData {
|
|
||||||
outData[k] = v
|
|
||||||
}
|
|
||||||
a.outStats[comp] = outData
|
|
||||||
}
|
|
||||||
a.lock.Unlock()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-a.outdone:
|
|
||||||
cclog.ComponentDebug(a.name, "OUTPUT DONE")
|
|
||||||
close(a.outdone)
|
|
||||||
return
|
|
||||||
case <-a.tickchan:
|
|
||||||
a.lock.Lock()
|
|
||||||
for comp, compData := range a.stats {
|
|
||||||
var outData map[string]int64
|
|
||||||
var ok bool
|
|
||||||
if outData, ok = a.outStats[comp]; !ok {
|
|
||||||
outData = make(map[string]int64)
|
|
||||||
}
|
|
||||||
for k, v := range compData {
|
|
||||||
outData[k] = v
|
|
||||||
}
|
|
||||||
a.outStats[comp] = outData
|
|
||||||
}
|
|
||||||
a.lock.Unlock()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
a.statsWg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer a.statsWg.Done()
|
|
||||||
err := a.server.ListenAndServe()
|
|
||||||
if err != nil && err.Error() != "http: Server closed" {
|
|
||||||
cclog.ComponentError(a.name, err.Error())
|
|
||||||
}
|
|
||||||
cclog.ComponentDebug(a.name, "SERVER DONE")
|
|
||||||
}()
|
|
||||||
cclog.ComponentDebug(a.name, "STARTED")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *statsApi) StatsFunc(w http.ResponseWriter, r *http.Request) {
|
|
||||||
data, err := json.Marshal(a.outStats)
|
|
||||||
if err == nil {
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
io.WriteString(w, string(data))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close finishes / stops the metric cache
|
|
||||||
func (a *statsApi) Close() {
|
|
||||||
cclog.ComponentDebug(a.name, "CLOSE")
|
|
||||||
a.indone <- true
|
|
||||||
a.outdone <- true
|
|
||||||
a.server.Shutdown(context.Background())
|
|
||||||
// wait for close of channel r.done
|
|
||||||
<-a.indone
|
|
||||||
<-a.outdone
|
|
||||||
a.statsWg.Wait()
|
|
||||||
a.wg.Done()
|
|
||||||
|
|
||||||
//a.wg.Wait()
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewStatsApi(ticker mct.MultiChanTicker, wg *sync.WaitGroup, statsApiConfigfile string) (StatsApi, error) {
|
|
||||||
a := new(statsApi)
|
|
||||||
a.name = "StatsApi"
|
|
||||||
a.config.Host = "localhost"
|
|
||||||
a.config.Port = "8080"
|
|
||||||
configFile, err := os.Open(statsApiConfigfile)
|
|
||||||
if err != nil {
|
|
||||||
cclog.ComponentError(a.name, err.Error())
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer configFile.Close()
|
|
||||||
jsonParser := json.NewDecoder(configFile)
|
|
||||||
err = jsonParser.Decode(&a.config)
|
|
||||||
if err != nil {
|
|
||||||
cclog.ComponentError(a.name, err.Error())
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
a.input = make(chan lp.CCMetric)
|
|
||||||
a.ticker = ticker
|
|
||||||
a.tickchan = make(chan time.Time)
|
|
||||||
a.wg = wg
|
|
||||||
a.indone = make(chan bool)
|
|
||||||
a.outdone = make(chan bool)
|
|
||||||
a.router = mux.NewRouter()
|
|
||||||
a.baseurl = fmt.Sprintf("%s:%s", a.config.Host, a.config.Port)
|
|
||||||
a.server = &http.Server{Addr: a.baseurl, Handler: a.router}
|
|
||||||
if a.config.PublishCollectorState {
|
|
||||||
a.router.HandleFunc("/", a.StatsFunc)
|
|
||||||
}
|
|
||||||
statsApiServer = a
|
|
||||||
return a, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func ComponentStatInt(component string, key string, value int64) {
|
|
||||||
if statsApiServer == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
y, err := lp.New("_stats", map[string]string{}, map[string]string{"source": component}, map[string]interface{}{key: value}, time.Now())
|
|
||||||
if err == nil {
|
|
||||||
statsApiServer.input <- y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func ComponentStatString(component string, key string, value int64) {
|
|
||||||
if statsApiServer == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
y, err := lp.New("_stats", map[string]string{}, map[string]string{"source": component}, map[string]interface{}{key: value}, time.Now())
|
|
||||||
if err == nil {
|
|
||||||
statsApiServer.input <- y
|
|
||||||
}
|
|
||||||
}
|
|
@@ -54,12 +54,6 @@ type metricRouter struct {
|
|||||||
cache MetricCache // pointer to MetricCache
|
cache MetricCache // pointer to MetricCache
|
||||||
cachewg sync.WaitGroup // wait group for MetricCache
|
cachewg sync.WaitGroup // wait group for MetricCache
|
||||||
maxForward int // number of metrics to forward maximally in one iteration
|
maxForward int // number of metrics to forward maximally in one iteration
|
||||||
statsCollForward int64
|
|
||||||
statsRecvForward int64
|
|
||||||
statsCacheForward int64
|
|
||||||
statsTotalForward int64
|
|
||||||
statsDropped int64
|
|
||||||
statsRenamed int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MetricRouter access functions
|
// MetricRouter access functions
|
||||||
@@ -127,12 +121,6 @@ func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, rout
|
|||||||
for _, mname := range r.config.DropMetrics {
|
for _, mname := range r.config.DropMetrics {
|
||||||
r.config.dropMetrics[mname] = true
|
r.config.dropMetrics[mname] = true
|
||||||
}
|
}
|
||||||
r.statsCollForward = 0
|
|
||||||
r.statsRecvForward = 0
|
|
||||||
r.statsCacheForward = 0
|
|
||||||
r.statsTotalForward = 0
|
|
||||||
r.statsDropped = 0
|
|
||||||
r.statsRenamed = 0
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -152,7 +140,6 @@ func (r *metricRouter) StartTimer() {
|
|||||||
cclog.ComponentDebug("MetricRouter", "TIMER DONE")
|
cclog.ComponentDebug("MetricRouter", "TIMER DONE")
|
||||||
return
|
return
|
||||||
case t := <-m:
|
case t := <-m:
|
||||||
cclog.ComponentDebug("MetricRouter", "INTERVAL_TICK", t.Unix())
|
|
||||||
r.timestamp = t
|
r.timestamp = t
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -266,8 +253,6 @@ func (r *metricRouter) Start() {
|
|||||||
r.DoDelTags(point)
|
r.DoDelTags(point)
|
||||||
name := point.Name()
|
name := point.Name()
|
||||||
if new, ok := r.config.RenameMetrics[name]; ok {
|
if new, ok := r.config.RenameMetrics[name]; ok {
|
||||||
r.statsRenamed++
|
|
||||||
ComponentStatInt("MetricRouter", "renamed", r.statsRenamed)
|
|
||||||
point.SetName(new)
|
point.SetName(new)
|
||||||
point.AddMeta("oldname", name)
|
point.AddMeta("oldname", name)
|
||||||
}
|
}
|
||||||
@@ -287,14 +272,7 @@ func (r *metricRouter) Start() {
|
|||||||
p.SetTime(r.timestamp)
|
p.SetTime(r.timestamp)
|
||||||
}
|
}
|
||||||
if !r.dropMetric(p) {
|
if !r.dropMetric(p) {
|
||||||
r.statsCollForward++
|
|
||||||
r.statsTotalForward++
|
|
||||||
ComponentStatInt("MetricRouter", "collector_forward", r.statsCollForward)
|
|
||||||
ComponentStatInt("MetricRouter", "total_forward", r.statsTotalForward)
|
|
||||||
forward(p)
|
forward(p)
|
||||||
} else {
|
|
||||||
r.statsDropped++
|
|
||||||
ComponentStatInt("MetricRouter", "dropped", r.statsDropped)
|
|
||||||
}
|
}
|
||||||
// even if the metric is dropped, it is stored in the cache for
|
// even if the metric is dropped, it is stored in the cache for
|
||||||
// aggregations
|
// aggregations
|
||||||
@@ -310,14 +288,7 @@ func (r *metricRouter) Start() {
|
|||||||
p.SetTime(r.timestamp)
|
p.SetTime(r.timestamp)
|
||||||
}
|
}
|
||||||
if !r.dropMetric(p) {
|
if !r.dropMetric(p) {
|
||||||
r.statsRecvForward++
|
|
||||||
r.statsTotalForward++
|
|
||||||
ComponentStatInt("MetricRouter", "receiver_forward", r.statsRecvForward)
|
|
||||||
ComponentStatInt("MetricRouter", "total_forward", r.statsTotalForward)
|
|
||||||
forward(p)
|
forward(p)
|
||||||
} else {
|
|
||||||
r.statsDropped++
|
|
||||||
ComponentStatInt("MetricRouter", "dropped", r.statsDropped)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -326,14 +297,7 @@ func (r *metricRouter) Start() {
|
|||||||
// receive from metric collector
|
// receive from metric collector
|
||||||
if !r.dropMetric(p) {
|
if !r.dropMetric(p) {
|
||||||
p.AddTag(r.config.HostnameTagName, r.hostname)
|
p.AddTag(r.config.HostnameTagName, r.hostname)
|
||||||
r.statsCacheForward++
|
|
||||||
r.statsTotalForward++
|
|
||||||
ComponentStatInt("MetricRouter", "cache_forward", r.statsCacheForward)
|
|
||||||
ComponentStatInt("MetricRouter", "total_forward", r.statsTotalForward)
|
|
||||||
forward(p)
|
forward(p)
|
||||||
} else {
|
|
||||||
r.statsDropped++
|
|
||||||
ComponentStatInt("MetricRouter", "dropped", r.statsDropped)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const GMETRIC_EXEC = `gmetric`
|
const GMETRIC_EXEC = `gmetric`
|
||||||
@@ -33,7 +32,6 @@ type GangliaSink struct {
|
|||||||
gmetric_path string
|
gmetric_path string
|
||||||
gmetric_config string
|
gmetric_config string
|
||||||
config GangliaSinkConfig
|
config GangliaSinkConfig
|
||||||
statsSentMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *GangliaSink) Write(point lp.CCMetric) error {
|
func (s *GangliaSink) Write(point lp.CCMetric) error {
|
||||||
@@ -80,8 +78,6 @@ func (s *GangliaSink) Write(point lp.CCMetric) error {
|
|||||||
command := exec.Command(s.gmetric_path, argstr...)
|
command := exec.Command(s.gmetric_path, argstr...)
|
||||||
command.Wait()
|
command.Wait()
|
||||||
_, err = command.Output()
|
_, err = command.Output()
|
||||||
s.statsSentMetrics++
|
|
||||||
stats.ComponentStatInt(s.name, "sent_metrics", s.statsSentMetrics)
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,6 +120,5 @@ func NewGangliaSink(name string, config json.RawMessage) (Sink, error) {
|
|||||||
if len(s.config.GmetricConfig) > 0 {
|
if len(s.config.GmetricConfig) > 0 {
|
||||||
s.gmetric_config = s.config.GmetricConfig
|
s.gmetric_config = s.config.GmetricConfig
|
||||||
}
|
}
|
||||||
s.statsSentMetrics = 0
|
|
||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
@@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
influx "github.com/influxdata/line-protocol"
|
influx "github.com/influxdata/line-protocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -37,21 +36,19 @@ type HttpSink struct {
|
|||||||
idleConnTimeout time.Duration
|
idleConnTimeout time.Duration
|
||||||
timeout time.Duration
|
timeout time.Duration
|
||||||
flushDelay time.Duration
|
flushDelay time.Duration
|
||||||
statsProcessed int64
|
|
||||||
statsFlushes int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *HttpSink) Write(m lp.CCMetric) error {
|
func (s *HttpSink) Write(m lp.CCMetric) error {
|
||||||
if s.buffer.Len() == 0 && s.flushDelay != 0 {
|
if s.buffer.Len() == 0 && s.flushDelay != 0 {
|
||||||
// This is the first write since the last flush, start the flushTimer!
|
// This is the first write since the last flush, start the flushTimer!
|
||||||
if s.flushTimer != nil && s.flushTimer.Stop() {
|
if s.flushTimer != nil && s.flushTimer.Stop() {
|
||||||
cclog.ComponentDebug(s.name, "unexpected: the flushTimer was already running?")
|
cclog.ComponentDebug("HttpSink", "unexpected: the flushTimer was already running?")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run a batched flush for all lines that have arrived in the last second
|
// Run a batched flush for all lines that have arrived in the last second
|
||||||
s.flushTimer = time.AfterFunc(s.flushDelay, func() {
|
s.flushTimer = time.AfterFunc(s.flushDelay, func() {
|
||||||
if err := s.Flush(); err != nil {
|
if err := s.Flush(); err != nil {
|
||||||
cclog.ComponentError(s.name, "flush failed:", err.Error())
|
cclog.ComponentError("HttpSink", "flush failed:", err.Error())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -63,11 +60,8 @@ func (s *HttpSink) Write(m lp.CCMetric) error {
|
|||||||
s.lock.Unlock() // defer does not work here as Flush() takes the lock as well
|
s.lock.Unlock() // defer does not work here as Flush() takes the lock as well
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(s.name, "encoding failed:", err.Error())
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
s.statsProcessed++
|
|
||||||
stats.ComponentStatInt(s.name, "processed_metrics", s.statsProcessed)
|
|
||||||
|
|
||||||
// Flush synchronously if "flush_delay" is zero
|
// Flush synchronously if "flush_delay" is zero
|
||||||
if s.flushDelay == 0 {
|
if s.flushDelay == 0 {
|
||||||
@@ -90,7 +84,6 @@ func (s *HttpSink) Flush() error {
|
|||||||
// Create new request to send buffer
|
// Create new request to send buffer
|
||||||
req, err := http.NewRequest(http.MethodPost, s.config.URL, s.buffer)
|
req, err := http.NewRequest(http.MethodPost, s.config.URL, s.buffer)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(s.name, "failed to create request:", err.Error())
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -107,18 +100,13 @@ func (s *HttpSink) Flush() error {
|
|||||||
|
|
||||||
// Handle transport/tcp errors
|
// Handle transport/tcp errors
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(s.name, "transport/tcp error:", err.Error())
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle application errors
|
// Handle application errors
|
||||||
if res.StatusCode != http.StatusOK {
|
if res.StatusCode != http.StatusOK {
|
||||||
err = errors.New(res.Status)
|
return errors.New(res.Status)
|
||||||
cclog.ComponentError(s.name, "application error:", err.Error())
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
s.statsFlushes++
|
|
||||||
stats.ComponentStatInt(s.name, "flushes", s.statsFlushes)
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -126,7 +114,7 @@ func (s *HttpSink) Flush() error {
|
|||||||
func (s *HttpSink) Close() {
|
func (s *HttpSink) Close() {
|
||||||
s.flushTimer.Stop()
|
s.flushTimer.Stop()
|
||||||
if err := s.Flush(); err != nil {
|
if err := s.Flush(); err != nil {
|
||||||
cclog.ComponentError(s.name, "flush failed:", err.Error())
|
cclog.ComponentError("HttpSink", "flush failed:", err.Error())
|
||||||
}
|
}
|
||||||
s.client.CloseIdleConnections()
|
s.client.CloseIdleConnections()
|
||||||
}
|
}
|
||||||
@@ -184,7 +172,5 @@ func NewHttpSink(name string, config json.RawMessage) (Sink, error) {
|
|||||||
s.buffer = &bytes.Buffer{}
|
s.buffer = &bytes.Buffer{}
|
||||||
s.encoder = influx.NewEncoder(s.buffer)
|
s.encoder = influx.NewEncoder(s.buffer)
|
||||||
s.encoder.SetPrecision(time.Second)
|
s.encoder.SetPrecision(time.Second)
|
||||||
s.statsFlushes = 0
|
|
||||||
s.statsProcessed = 0
|
|
||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
@@ -10,7 +10,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||||
)
|
)
|
||||||
@@ -29,10 +28,10 @@ type InfluxAsyncSinkConfig struct {
|
|||||||
BatchSize uint `json:"batch_size,omitempty"`
|
BatchSize uint `json:"batch_size,omitempty"`
|
||||||
// Interval, in ms, in which is buffer flushed if it has not been already written (by reaching batch size) . Default 1000ms
|
// Interval, in ms, in which is buffer flushed if it has not been already written (by reaching batch size) . Default 1000ms
|
||||||
FlushInterval uint `json:"flush_interval,omitempty"`
|
FlushInterval uint `json:"flush_interval,omitempty"`
|
||||||
InfluxRetryInterval string `json:"retry_interval,omitempty"`
|
InfluxRetryInterval string `json:"retry_interval"`
|
||||||
InfluxExponentialBase uint `json:"retry_exponential_base,omitempty"`
|
InfluxExponentialBase uint `json:"retry_exponential_base"`
|
||||||
InfluxMaxRetries uint `json:"max_retries,omitempty"`
|
InfluxMaxRetries uint `json:"max_retries"`
|
||||||
InfluxMaxRetryTime string `json:"max_retry_time,omitempty"`
|
InfluxMaxRetryTime string `json:"max_retry_time"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type InfluxAsyncSink struct {
|
type InfluxAsyncSink struct {
|
||||||
@@ -43,9 +42,6 @@ type InfluxAsyncSink struct {
|
|||||||
config InfluxAsyncSinkConfig
|
config InfluxAsyncSinkConfig
|
||||||
influxRetryInterval uint
|
influxRetryInterval uint
|
||||||
influxMaxRetryTime uint
|
influxMaxRetryTime uint
|
||||||
sentMetrics int64
|
|
||||||
statsFlushes int64
|
|
||||||
statsErrors int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *InfluxAsyncSink) connect() error {
|
func (s *InfluxAsyncSink) connect() error {
|
||||||
@@ -64,34 +60,20 @@ func (s *InfluxAsyncSink) connect() error {
|
|||||||
cclog.ComponentDebug(s.name, "Using URI", uri, "Org", s.config.Organization, "Bucket", s.config.Database)
|
cclog.ComponentDebug(s.name, "Using URI", uri, "Org", s.config.Organization, "Bucket", s.config.Database)
|
||||||
clientOptions := influxdb2.DefaultOptions()
|
clientOptions := influxdb2.DefaultOptions()
|
||||||
if s.config.BatchSize != 0 {
|
if s.config.BatchSize != 0 {
|
||||||
cclog.ComponentDebug(s.name, "Batch size", s.config.BatchSize)
|
|
||||||
clientOptions.SetBatchSize(s.config.BatchSize)
|
clientOptions.SetBatchSize(s.config.BatchSize)
|
||||||
}
|
}
|
||||||
if s.config.FlushInterval != 0 {
|
if s.config.FlushInterval != 0 {
|
||||||
cclog.ComponentDebug(s.name, "Flush interval", s.config.FlushInterval)
|
|
||||||
clientOptions.SetFlushInterval(s.config.FlushInterval)
|
clientOptions.SetFlushInterval(s.config.FlushInterval)
|
||||||
}
|
}
|
||||||
if s.influxRetryInterval != 0 {
|
|
||||||
cclog.ComponentDebug(s.name, "MaxRetryInterval", s.influxRetryInterval)
|
|
||||||
clientOptions.SetMaxRetryInterval(s.influxRetryInterval)
|
|
||||||
}
|
|
||||||
if s.influxMaxRetryTime != 0 {
|
|
||||||
cclog.ComponentDebug(s.name, "MaxRetryTime", s.influxMaxRetryTime)
|
|
||||||
clientOptions.SetMaxRetryTime(s.influxMaxRetryTime)
|
|
||||||
}
|
|
||||||
if s.config.InfluxExponentialBase != 0 {
|
|
||||||
cclog.ComponentDebug(s.name, "Exponential Base", s.config.InfluxExponentialBase)
|
|
||||||
clientOptions.SetExponentialBase(s.config.InfluxExponentialBase)
|
|
||||||
}
|
|
||||||
if s.config.InfluxMaxRetries != 0 {
|
|
||||||
cclog.ComponentDebug(s.name, "Max Retries", s.config.InfluxMaxRetries)
|
|
||||||
clientOptions.SetMaxRetries(s.config.InfluxMaxRetries)
|
|
||||||
}
|
|
||||||
clientOptions.SetTLSConfig(
|
clientOptions.SetTLSConfig(
|
||||||
&tls.Config{
|
&tls.Config{
|
||||||
InsecureSkipVerify: true,
|
InsecureSkipVerify: true,
|
||||||
},
|
},
|
||||||
).SetPrecision(time.Second)
|
)
|
||||||
|
clientOptions.SetMaxRetryInterval(s.influxRetryInterval)
|
||||||
|
clientOptions.SetMaxRetryTime(s.influxMaxRetryTime)
|
||||||
|
clientOptions.SetExponentialBase(s.config.InfluxExponentialBase)
|
||||||
|
clientOptions.SetMaxRetries(s.config.InfluxMaxRetries)
|
||||||
|
|
||||||
s.client = influxdb2.NewClientWithOptions(uri, auth, clientOptions)
|
s.client = influxdb2.NewClientWithOptions(uri, auth, clientOptions)
|
||||||
s.writeApi = s.client.WriteAPI(s.config.Organization, s.config.Database)
|
s.writeApi = s.client.WriteAPI(s.config.Organization, s.config.Database)
|
||||||
@@ -109,15 +91,11 @@ func (s *InfluxAsyncSink) Write(m lp.CCMetric) error {
|
|||||||
s.writeApi.WritePoint(
|
s.writeApi.WritePoint(
|
||||||
m.ToPoint(s.meta_as_tags),
|
m.ToPoint(s.meta_as_tags),
|
||||||
)
|
)
|
||||||
s.sentMetrics++
|
|
||||||
stats.ComponentStatInt(s.name, "send_metrics", s.sentMetrics)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *InfluxAsyncSink) Flush() error {
|
func (s *InfluxAsyncSink) Flush() error {
|
||||||
s.writeApi.Flush()
|
s.writeApi.Flush()
|
||||||
s.statsFlushes++
|
|
||||||
stats.ComponentStatInt(s.name, "flushes", s.statsFlushes)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -132,14 +110,13 @@ func NewInfluxAsyncSink(name string, config json.RawMessage) (Sink, error) {
|
|||||||
s.name = fmt.Sprintf("InfluxSink(%s)", name)
|
s.name = fmt.Sprintf("InfluxSink(%s)", name)
|
||||||
|
|
||||||
// Set default for maximum number of points sent to server in single request.
|
// Set default for maximum number of points sent to server in single request.
|
||||||
s.config.BatchSize = 0
|
s.config.BatchSize = 100
|
||||||
s.influxRetryInterval = 0
|
s.influxRetryInterval = uint(time.Duration(1) * time.Second)
|
||||||
//s.config.InfluxRetryInterval = "1s"
|
s.config.InfluxRetryInterval = "1s"
|
||||||
s.influxMaxRetryTime = 0
|
s.influxMaxRetryTime = uint(7 * time.Duration(24) * time.Hour)
|
||||||
//s.config.InfluxMaxRetryTime = "168h"
|
s.config.InfluxMaxRetryTime = "168h"
|
||||||
s.config.InfluxMaxRetries = 0
|
s.config.InfluxMaxRetries = 20
|
||||||
s.config.InfluxExponentialBase = 0
|
s.config.InfluxExponentialBase = 2
|
||||||
s.config.FlushInterval = 0
|
|
||||||
|
|
||||||
// Default retry intervals (in seconds)
|
// Default retry intervals (in seconds)
|
||||||
// 1 2
|
// 1 2
|
||||||
@@ -197,17 +174,12 @@ func NewInfluxAsyncSink(name string, config json.RawMessage) (Sink, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Start background: Read from error channel
|
// Start background: Read from error channel
|
||||||
s.statsErrors = 0
|
|
||||||
s.errors = s.writeApi.Errors()
|
s.errors = s.writeApi.Errors()
|
||||||
go func() {
|
go func() {
|
||||||
for err := range s.errors {
|
for err := range s.errors {
|
||||||
s.statsErrors++
|
|
||||||
stats.ComponentStatInt(s.name, "errors", s.statsErrors)
|
|
||||||
cclog.ComponentError(s.name, err.Error())
|
cclog.ComponentError(s.name, err.Error())
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
s.sentMetrics = 0
|
|
||||||
s.statsFlushes = 0
|
|
||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
@@ -6,15 +6,12 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||||
"github.com/influxdata/influxdb-client-go/v2/api/write"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type InfluxSinkConfig struct {
|
type InfluxSinkConfig struct {
|
||||||
@@ -26,13 +23,11 @@ type InfluxSinkConfig struct {
|
|||||||
Password string `json:"password,omitempty"`
|
Password string `json:"password,omitempty"`
|
||||||
Organization string `json:"organization,omitempty"`
|
Organization string `json:"organization,omitempty"`
|
||||||
SSL bool `json:"ssl,omitempty"`
|
SSL bool `json:"ssl,omitempty"`
|
||||||
FlushDelay string `json:"flush_delay,omitempty"`
|
|
||||||
BatchSize int `json:"batch_size,omitempty"`
|
|
||||||
RetentionPol string `json:"retention_policy,omitempty"`
|
RetentionPol string `json:"retention_policy,omitempty"`
|
||||||
// InfluxRetryInterval string `json:"retry_interval"`
|
InfluxRetryInterval string `json:"retry_interval"`
|
||||||
// InfluxExponentialBase uint `json:"retry_exponential_base"`
|
InfluxExponentialBase uint `json:"retry_exponential_base"`
|
||||||
// InfluxMaxRetries uint `json:"max_retries"`
|
InfluxMaxRetries uint `json:"max_retries"`
|
||||||
// InfluxMaxRetryTime string `json:"max_retry_time"`
|
InfluxMaxRetryTime string `json:"max_retry_time"`
|
||||||
//InfluxMaxRetryDelay string `json:"max_retry_delay"` // It is mentioned in the docs but there is no way to set it
|
//InfluxMaxRetryDelay string `json:"max_retry_delay"` // It is mentioned in the docs but there is no way to set it
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -43,12 +38,6 @@ type InfluxSink struct {
|
|||||||
config InfluxSinkConfig
|
config InfluxSinkConfig
|
||||||
influxRetryInterval uint
|
influxRetryInterval uint
|
||||||
influxMaxRetryTime uint
|
influxMaxRetryTime uint
|
||||||
batch []*write.Point
|
|
||||||
flushTimer *time.Timer
|
|
||||||
flushDelay time.Duration
|
|
||||||
lock sync.Mutex // Flush() runs in another goroutine, so this lock has to protect the buffer
|
|
||||||
statsSentMetrics int64
|
|
||||||
statsProcessedMetrics int64
|
|
||||||
//influxMaxRetryDelay uint
|
//influxMaxRetryDelay uint
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,31 +56,16 @@ func (s *InfluxSink) connect() error {
|
|||||||
}
|
}
|
||||||
cclog.ComponentDebug(s.name, "Using URI", uri, "Org", s.config.Organization, "Bucket", s.config.Database)
|
cclog.ComponentDebug(s.name, "Using URI", uri, "Org", s.config.Organization, "Bucket", s.config.Database)
|
||||||
clientOptions := influxdb2.DefaultOptions()
|
clientOptions := influxdb2.DefaultOptions()
|
||||||
|
|
||||||
// if s.influxRetryInterval != 0 {
|
|
||||||
// cclog.ComponentDebug(s.name, "MaxRetryInterval", s.influxRetryInterval)
|
|
||||||
// clientOptions.SetMaxRetryInterval(s.influxRetryInterval)
|
|
||||||
// }
|
|
||||||
// if s.influxMaxRetryTime != 0 {
|
|
||||||
// cclog.ComponentDebug(s.name, "MaxRetryTime", s.influxMaxRetryTime)
|
|
||||||
// clientOptions.SetMaxRetryTime(s.influxMaxRetryTime)
|
|
||||||
// }
|
|
||||||
// if s.config.InfluxExponentialBase != 0 {
|
|
||||||
// cclog.ComponentDebug(s.name, "Exponential Base", s.config.InfluxExponentialBase)
|
|
||||||
// clientOptions.SetExponentialBase(s.config.InfluxExponentialBase)
|
|
||||||
// }
|
|
||||||
// if s.config.InfluxMaxRetries != 0 {
|
|
||||||
// cclog.ComponentDebug(s.name, "Max Retries", s.config.InfluxMaxRetries)
|
|
||||||
// clientOptions.SetMaxRetries(s.config.InfluxMaxRetries)
|
|
||||||
// }
|
|
||||||
|
|
||||||
clientOptions.SetTLSConfig(
|
clientOptions.SetTLSConfig(
|
||||||
&tls.Config{
|
&tls.Config{
|
||||||
InsecureSkipVerify: true,
|
InsecureSkipVerify: true,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
clientOptions.SetPrecision(time.Second)
|
clientOptions.SetMaxRetryInterval(s.influxRetryInterval)
|
||||||
|
clientOptions.SetMaxRetryTime(s.influxMaxRetryTime)
|
||||||
|
clientOptions.SetExponentialBase(s.config.InfluxExponentialBase)
|
||||||
|
clientOptions.SetMaxRetries(s.config.InfluxMaxRetries)
|
||||||
|
|
||||||
s.client = influxdb2.NewClientWithOptions(uri, auth, clientOptions)
|
s.client = influxdb2.NewClientWithOptions(uri, auth, clientOptions)
|
||||||
s.writeApi = s.client.WriteAPIBlocking(s.config.Organization, s.config.Database)
|
s.writeApi = s.client.WriteAPIBlocking(s.config.Organization, s.config.Database)
|
||||||
@@ -106,80 +80,38 @@ func (s *InfluxSink) connect() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *InfluxSink) Write(m lp.CCMetric) error {
|
func (s *InfluxSink) Write(m lp.CCMetric) error {
|
||||||
// err :=
|
err :=
|
||||||
// s.writeApi.WritePoint(
|
s.writeApi.WritePoint(
|
||||||
// context.Background(),
|
context.Background(),
|
||||||
// m.ToPoint(s.meta_as_tags),
|
m.ToPoint(s.meta_as_tags),
|
||||||
// )
|
)
|
||||||
if len(s.batch) == 0 && s.flushDelay != 0 {
|
return err
|
||||||
// This is the first write since the last flush, start the flushTimer!
|
|
||||||
if s.flushTimer != nil && s.flushTimer.Stop() {
|
|
||||||
cclog.ComponentDebug(s.name, "unexpected: the flushTimer was already running?")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run a batched flush for all lines that have arrived in the last second
|
|
||||||
s.flushTimer = time.AfterFunc(s.flushDelay, func() {
|
|
||||||
if err := s.Flush(); err != nil {
|
|
||||||
cclog.ComponentError(s.name, "flush failed:", err.Error())
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
p := m.ToPoint(s.meta_as_tags)
|
|
||||||
s.lock.Lock()
|
|
||||||
s.statsProcessedMetrics++
|
|
||||||
s.batch = append(s.batch, p)
|
|
||||||
s.lock.Unlock()
|
|
||||||
stats.ComponentStatInt(s.name, "processed_metrics", s.statsProcessedMetrics)
|
|
||||||
|
|
||||||
// Flush synchronously if "flush_delay" is zero
|
|
||||||
if s.flushDelay == 0 {
|
|
||||||
return s.Flush()
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *InfluxSink) Flush() error {
|
func (s *InfluxSink) Flush() error {
|
||||||
s.lock.Lock()
|
|
||||||
defer s.lock.Unlock()
|
|
||||||
if len(s.batch) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
err := s.writeApi.WritePoint(context.Background(), s.batch...)
|
|
||||||
if err != nil {
|
|
||||||
cclog.ComponentError(s.name, "flush failed:", err.Error())
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
s.statsSentMetrics += int64(len(s.batch))
|
|
||||||
stats.ComponentStatInt(s.name, "sent_metrics", s.statsSentMetrics)
|
|
||||||
s.batch = s.batch[:0]
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *InfluxSink) Close() {
|
func (s *InfluxSink) Close() {
|
||||||
cclog.ComponentDebug(s.name, "Closing InfluxDB connection")
|
cclog.ComponentDebug(s.name, "Closing InfluxDB connection")
|
||||||
s.flushTimer.Stop()
|
|
||||||
s.Flush()
|
|
||||||
s.client.Close()
|
s.client.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewInfluxSink(name string, config json.RawMessage) (Sink, error) {
|
func NewInfluxSink(name string, config json.RawMessage) (Sink, error) {
|
||||||
s := new(InfluxSink)
|
s := new(InfluxSink)
|
||||||
s.name = fmt.Sprintf("InfluxSink(%s)", name)
|
s.name = fmt.Sprintf("InfluxSink(%s)", name)
|
||||||
s.config.BatchSize = 100
|
|
||||||
s.config.FlushDelay = "1s"
|
|
||||||
if len(config) > 0 {
|
if len(config) > 0 {
|
||||||
err := json.Unmarshal(config, &s.config)
|
err := json.Unmarshal(config, &s.config)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s.influxRetryInterval = 0
|
s.influxRetryInterval = uint(time.Duration(1) * time.Second)
|
||||||
s.influxMaxRetryTime = 0
|
s.config.InfluxRetryInterval = "1s"
|
||||||
// s.config.InfluxRetryInterval = ""
|
s.influxMaxRetryTime = uint(7 * time.Duration(24) * time.Hour)
|
||||||
// s.config.InfluxMaxRetryTime = ""
|
s.config.InfluxMaxRetryTime = "168h"
|
||||||
// s.config.InfluxMaxRetries = 0
|
s.config.InfluxMaxRetries = 20
|
||||||
// s.config.InfluxExponentialBase = 0
|
s.config.InfluxExponentialBase = 2
|
||||||
|
|
||||||
if len(s.config.Host) == 0 ||
|
if len(s.config.Host) == 0 ||
|
||||||
len(s.config.Port) == 0 ||
|
len(s.config.Port) == 0 ||
|
||||||
@@ -194,31 +126,19 @@ func NewInfluxSink(name string, config json.RawMessage) (Sink, error) {
|
|||||||
s.meta_as_tags[k] = true
|
s.meta_as_tags[k] = true
|
||||||
}
|
}
|
||||||
|
|
||||||
// toUint := func(duration string, def uint) uint {
|
toUint := func(duration string, def uint) uint {
|
||||||
// if len(duration) > 0 {
|
t, err := time.ParseDuration(duration)
|
||||||
// t, err := time.ParseDuration(duration)
|
|
||||||
// if err == nil {
|
|
||||||
// return uint(t.Milliseconds())
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return def
|
|
||||||
// }
|
|
||||||
// s.influxRetryInterval = toUint(s.config.InfluxRetryInterval, s.influxRetryInterval)
|
|
||||||
// s.influxMaxRetryTime = toUint(s.config.InfluxMaxRetryTime, s.influxMaxRetryTime)
|
|
||||||
|
|
||||||
if len(s.config.FlushDelay) > 0 {
|
|
||||||
t, err := time.ParseDuration(s.config.FlushDelay)
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
s.flushDelay = t
|
return uint(t.Milliseconds())
|
||||||
}
|
}
|
||||||
|
return def
|
||||||
}
|
}
|
||||||
s.batch = make([]*write.Point, 0, s.config.BatchSize)
|
s.influxRetryInterval = toUint(s.config.InfluxRetryInterval, s.influxRetryInterval)
|
||||||
|
s.influxMaxRetryTime = toUint(s.config.InfluxMaxRetryTime, s.influxMaxRetryTime)
|
||||||
|
|
||||||
// Connect to InfluxDB server
|
// Connect to InfluxDB server
|
||||||
if err := s.connect(); err != nil {
|
if err := s.connect(); err != nil {
|
||||||
return nil, fmt.Errorf("unable to connect: %v", err)
|
return nil, fmt.Errorf("unable to connect: %v", err)
|
||||||
}
|
}
|
||||||
s.statsSentMetrics = 0
|
|
||||||
s.statsProcessedMetrics = 0
|
|
||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
@@ -17,8 +17,10 @@ The `influxdb` sink uses the official [InfluxDB golang client](https://pkg.go.de
|
|||||||
"password" : "examplepw",
|
"password" : "examplepw",
|
||||||
"organization": "myorg",
|
"organization": "myorg",
|
||||||
"ssl": true,
|
"ssl": true,
|
||||||
"flush_delay" : "1s",
|
"retry_interval" : "1s",
|
||||||
"batch_size" : 100
|
"retry_exponential_base" : 2,
|
||||||
|
"max_retries": 20,
|
||||||
|
"max_retry_time" : "168h"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@@ -32,6 +34,9 @@ The `influxdb` sink uses the official [InfluxDB golang client](https://pkg.go.de
|
|||||||
- `password`: Password for basic authentification
|
- `password`: Password for basic authentification
|
||||||
- `organization`: Organization in the InfluxDB
|
- `organization`: Organization in the InfluxDB
|
||||||
- `ssl`: Use SSL connection
|
- `ssl`: Use SSL connection
|
||||||
- `flush_delay`: Group metrics coming in to a single batch
|
- `retry_interval`: Base retry interval for failed write requests, default 1s
|
||||||
- `batch_size`: Maximal batch size
|
- `retry_exponential_base`: The retry interval is exponentially increased with this base, default 2
|
||||||
|
- `max_retries`: Maximal number of retry attempts
|
||||||
|
- `max_retry_time`: Maximal time to retry failed writes, default 168h (one week)
|
||||||
|
|
||||||
|
For information about the calculation of the retry interval settings, see [offical influxdb-client-go documentation](https://github.com/influxdata/influxdb-client-go#handling-of-failed-async-writes)
|
@@ -73,7 +73,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -108,7 +107,6 @@ type LibgangliaSink struct {
|
|||||||
gmond_config C.Ganglia_gmond_config
|
gmond_config C.Ganglia_gmond_config
|
||||||
send_channels C.Ganglia_udp_send_channels
|
send_channels C.Ganglia_udp_send_channels
|
||||||
cstrCache map[string]*C.char
|
cstrCache map[string]*C.char
|
||||||
statsSentMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *LibgangliaSink) Write(point lp.CCMetric) error {
|
func (s *LibgangliaSink) Write(point lp.CCMetric) error {
|
||||||
@@ -204,8 +202,6 @@ func (s *LibgangliaSink) Write(point lp.CCMetric) error {
|
|||||||
C.Ganglia_metric_destroy(gmetric)
|
C.Ganglia_metric_destroy(gmetric)
|
||||||
// Free the value C string, the only one not stored in the cache
|
// Free the value C string, the only one not stored in the cache
|
||||||
C.free(unsafe.Pointer(c_value))
|
C.free(unsafe.Pointer(c_value))
|
||||||
s.statsSentMetrics++
|
|
||||||
stats.ComponentStatInt(s.name, "sent_metrics", s.statsSentMetrics)
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -251,7 +247,7 @@ func NewLibgangliaSink(name string, config json.RawMessage) (Sink, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error opening %s: %v", s.config.GangliaLib, err)
|
return nil, fmt.Errorf("error opening %s: %v", s.config.GangliaLib, err)
|
||||||
}
|
}
|
||||||
s.statsSentMetrics = 0
|
|
||||||
// Set up cache for the C strings
|
// Set up cache for the C strings
|
||||||
s.cstrCache = make(map[string]*C.char)
|
s.cstrCache = make(map[string]*C.char)
|
||||||
// s.cstrCache["globals"] = C.CString("globals")
|
// s.cstrCache["globals"] = C.CString("globals")
|
||||||
|
@@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
@@ -35,7 +34,6 @@ type PrometheusSink struct {
|
|||||||
nodeMetrics map[string]prometheus.Gauge
|
nodeMetrics map[string]prometheus.Gauge
|
||||||
promWg sync.WaitGroup
|
promWg sync.WaitGroup
|
||||||
promServer *http.Server
|
promServer *http.Server
|
||||||
statsSentMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func intToFloat64(input interface{}) (float64, error) {
|
func intToFloat64(input interface{}) (float64, error) {
|
||||||
@@ -115,8 +113,6 @@ func (s *PrometheusSink) newMetric(metric lp.CCMetric) error {
|
|||||||
s.nodeMetrics[name] = new
|
s.nodeMetrics[name] = new
|
||||||
prometheus.Register(new)
|
prometheus.Register(new)
|
||||||
}
|
}
|
||||||
s.statsSentMetrics++
|
|
||||||
stats.ComponentStatInt(s.name, "sent_metrics", s.statsSentMetrics)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -150,8 +146,6 @@ func (s *PrometheusSink) updateMetric(metric lp.CCMetric) error {
|
|||||||
}
|
}
|
||||||
s.nodeMetrics[name].Set(value)
|
s.nodeMetrics[name].Set(value)
|
||||||
}
|
}
|
||||||
s.statsSentMetrics++
|
|
||||||
stats.ComponentStatInt(s.name, "sent_metrics", s.statsSentMetrics)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -7,7 +7,6 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type SampleSinkConfig struct {
|
type SampleSinkConfig struct {
|
||||||
@@ -15,15 +14,12 @@ type SampleSinkConfig struct {
|
|||||||
// See: metricSink.go
|
// See: metricSink.go
|
||||||
defaultSinkConfig
|
defaultSinkConfig
|
||||||
// Additional config options, for SampleSink
|
// Additional config options, for SampleSink
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type SampleSink struct {
|
type SampleSink struct {
|
||||||
// declares elements 'name' and 'meta_as_tags' (string to bool map!)
|
// declares elements 'name' and 'meta_as_tags' (string to bool map!)
|
||||||
sink
|
sink
|
||||||
config SampleSinkConfig // entry point to the SampleSinkConfig
|
config SampleSinkConfig // entry point to the SampleSinkConfig
|
||||||
// Stats counters
|
|
||||||
statsSentMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Implement functions required for Sink interface
|
// Implement functions required for Sink interface
|
||||||
@@ -34,8 +30,6 @@ type SampleSink struct {
|
|||||||
func (s *SampleSink) Write(point lp.CCMetric) error {
|
func (s *SampleSink) Write(point lp.CCMetric) error {
|
||||||
// based on s.meta_as_tags use meta infos as tags
|
// based on s.meta_as_tags use meta infos as tags
|
||||||
log.Print(point)
|
log.Print(point)
|
||||||
s.statsSentMetrics++
|
|
||||||
stats.ComponentStatInt(s.name, "sent_metrics", s.statsSentMetrics)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -69,9 +63,6 @@ func NewSampleSink(name string, config json.RawMessage) (Sink, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initalize stats counters
|
|
||||||
s.statsSentMetrics = 0
|
|
||||||
|
|
||||||
// Create lookup map to use meta infos as tags in the output metric
|
// Create lookup map to use meta infos as tags in the output metric
|
||||||
s.meta_as_tags = make(map[string]bool)
|
s.meta_as_tags = make(map[string]bool)
|
||||||
for _, k := range s.config.MetaAsTags {
|
for _, k := range s.config.MetaAsTags {
|
||||||
|
@@ -102,19 +102,13 @@ func (sm *sinkManager) Start() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
toTheSinks := func(p lp.CCMetric) {
|
toTheSinks := func(p lp.CCMetric) {
|
||||||
var wg sync.WaitGroup
|
|
||||||
// Send received metric to all outputs
|
// Send received metric to all outputs
|
||||||
cclog.ComponentDebug("SinkManager", "WRITE", p)
|
cclog.ComponentDebug("SinkManager", "WRITE", p)
|
||||||
for _, s := range sm.sinks {
|
for _, s := range sm.sinks {
|
||||||
wg.Add(1)
|
|
||||||
go func(s Sink) {
|
|
||||||
if err := s.Write(p); err != nil {
|
if err := s.Write(p); err != nil {
|
||||||
cclog.ComponentError("SinkManager", "WRITE", s.Name(), "write failed:", err.Error())
|
cclog.ComponentError("SinkManager", "WRITE", s.Name(), "write failed:", err.Error())
|
||||||
}
|
}
|
||||||
wg.Done()
|
|
||||||
}(s)
|
|
||||||
}
|
}
|
||||||
wg.Wait()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
|
@@ -8,7 +8,6 @@ import (
|
|||||||
|
|
||||||
// "time"
|
// "time"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
stats "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type StdoutSink struct {
|
type StdoutSink struct {
|
||||||
@@ -18,7 +17,6 @@ type StdoutSink struct {
|
|||||||
defaultSinkConfig
|
defaultSinkConfig
|
||||||
Output string `json:"output_file,omitempty"`
|
Output string `json:"output_file,omitempty"`
|
||||||
}
|
}
|
||||||
sentMetrics int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *StdoutSink) Write(m lp.CCMetric) error {
|
func (s *StdoutSink) Write(m lp.CCMetric) error {
|
||||||
@@ -26,8 +24,6 @@ func (s *StdoutSink) Write(m lp.CCMetric) error {
|
|||||||
s.output,
|
s.output,
|
||||||
m.ToLineProtocol(s.meta_as_tags),
|
m.ToLineProtocol(s.meta_as_tags),
|
||||||
)
|
)
|
||||||
s.sentMetrics++
|
|
||||||
stats.ComponentStatInt(s.name, "sent_metrics", s.sentMetrics)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,7 +68,6 @@ func NewStdoutSink(name string, config json.RawMessage) (Sink, error) {
|
|||||||
for _, k := range s.config.MetaAsTags {
|
for _, k := range s.config.MetaAsTags {
|
||||||
s.meta_as_tags[k] = true
|
s.meta_as_tags[k] = true
|
||||||
}
|
}
|
||||||
s.sentMetrics = 0
|
|
||||||
|
|
||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user