mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-10-24 06:45:07 +02:00
Skip collectors that fail at init. Only write metrics with fields
This commit is contained in:
@@ -2,6 +2,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/ClusterCockpit/cc-metric-collector/collectors"
|
"github.com/ClusterCockpit/cc-metric-collector/collectors"
|
||||||
"github.com/ClusterCockpit/cc-metric-collector/sinks"
|
"github.com/ClusterCockpit/cc-metric-collector/sinks"
|
||||||
@@ -11,7 +12,6 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
"flag"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// List of provided collectors. Which collector should be run can be
|
// List of provided collectors. Which collector should be run can be
|
||||||
@@ -25,6 +25,7 @@ var Collectors = map[string]collectors.MetricGetter{
|
|||||||
"lustrestat": &collectors.LustreCollector{},
|
"lustrestat": &collectors.LustreCollector{},
|
||||||
"cpustat": &collectors.CpustatCollector{},
|
"cpustat": &collectors.CpustatCollector{},
|
||||||
"topprocs": &collectors.TopProcsCollector{},
|
"topprocs": &collectors.TopProcsCollector{},
|
||||||
|
"nvidia": &collectors.NvidiaCollector{},
|
||||||
}
|
}
|
||||||
|
|
||||||
var Sinks = map[string]sinks.SinkFuncs{
|
var Sinks = map[string]sinks.SinkFuncs{
|
||||||
@@ -75,7 +76,7 @@ func ReadCli() map[string]string {
|
|||||||
func SetLogging(logfile string) error {
|
func SetLogging(logfile string) error {
|
||||||
var file *os.File
|
var file *os.File
|
||||||
var err error
|
var err error
|
||||||
if (logfile != "stderr") {
|
if logfile != "stderr" {
|
||||||
file, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600)
|
file, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
@@ -119,14 +120,14 @@ func main() {
|
|||||||
}
|
}
|
||||||
clicfg := ReadCli()
|
clicfg := ReadCli()
|
||||||
err = SetLogging(clicfg["logfile"])
|
err = SetLogging(clicfg["logfile"])
|
||||||
if (err != nil) {
|
if err != nil {
|
||||||
log.Print("Error setting up logging system to ", clicfg["logfile"])
|
log.Print("Error setting up logging system to ", clicfg["logfile"])
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load and check configuration
|
// Load and check configuration
|
||||||
err = LoadConfiguration(clicfg["configfile"], &config)
|
err = LoadConfiguration(clicfg["configfile"], &config)
|
||||||
if (err != nil) {
|
if err != nil {
|
||||||
log.Print("Error reading configuration file ", clicfg["configfile"])
|
log.Print("Error reading configuration file ", clicfg["configfile"])
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -167,11 +168,18 @@ func main() {
|
|||||||
shutdown(&wg, &config, sink)
|
shutdown(&wg, &config, sink)
|
||||||
|
|
||||||
// Initialize all collectors
|
// Initialize all collectors
|
||||||
|
tmp := make([]string, 0)
|
||||||
for _, c := range config.Collectors {
|
for _, c := range config.Collectors {
|
||||||
col := Collectors[c]
|
col := Collectors[c]
|
||||||
col.Init()
|
err = col.Init()
|
||||||
|
if err != nil {
|
||||||
|
log.Print("SKIP ", col.Name())
|
||||||
|
} else {
|
||||||
log.Print("Start ", col.Name())
|
log.Print("Start ", col.Name())
|
||||||
|
tmp = append(tmp, c)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
config.Collectors = tmp
|
||||||
|
|
||||||
// Setup up ticker loop
|
// Setup up ticker loop
|
||||||
log.Print("Running loop every ", time.Duration(config.Interval)*time.Second)
|
log.Print("Running loop every ", time.Duration(config.Interval)*time.Second)
|
||||||
@@ -229,23 +237,29 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Send out node metrics
|
// Send out node metrics
|
||||||
|
if len(nodeFields) > 0 {
|
||||||
sink.Write("node", map[string]string{"host": host}, nodeFields, t)
|
sink.Write("node", map[string]string{"host": host}, nodeFields, t)
|
||||||
|
}
|
||||||
|
|
||||||
// Send out socket metrics (if any)
|
// Send out socket metrics (if any)
|
||||||
if scount > 0 {
|
if scount > 0 {
|
||||||
for sid, socket := range socketsFields {
|
for sid, socket := range socketsFields {
|
||||||
|
if len(socket) > 0 {
|
||||||
sink.Write("socket", map[string]string{"socket": fmt.Sprintf("%d", sid), "host": host}, socket, t)
|
sink.Write("socket", map[string]string{"socket": fmt.Sprintf("%d", sid), "host": host}, socket, t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Send out CPU metrics (if any)
|
// Send out CPU metrics (if any)
|
||||||
if ccount > 0 {
|
if ccount > 0 {
|
||||||
for cid, cpu := range cpuFields {
|
for cid, cpu := range cpuFields {
|
||||||
|
if len(cpu) > 0 {
|
||||||
sink.Write("cpu", map[string]string{"cpu": fmt.Sprintf("%d", cid), "host": host}, cpu, t)
|
sink.Write("cpu", map[string]string{"cpu": fmt.Sprintf("%d", cid), "host": host}, cpu, t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Wait until receiving an interrupt
|
// Wait until receiving an interrupt
|
||||||
|
Reference in New Issue
Block a user