cc-metric-collector/metric-collector.go

213 lines
5.4 KiB
Go
Raw Normal View History

2021-03-25 14:46:25 +01:00
package main
import (
"encoding/json"
"fmt"
"github.com/ClusterCockpit/cc-metric-collector/collectors"
"github.com/ClusterCockpit/cc-metric-collector/sinks"
2021-03-25 14:46:25 +01:00
"log"
"os"
"os/signal"
"strings"
2021-03-25 14:46:25 +01:00
"sync"
"time"
)
// List of provided collectors. Which collector should be run can be
// configured at 'collectors' list in 'config.json'.
2021-03-25 14:46:25 +01:00
var Collectors = map[string]collectors.MetricGetter{
2021-03-25 17:47:08 +01:00
"likwid": &collectors.LikwidCollector{},
"loadavg": &collectors.LoadavgCollector{},
"memstat": &collectors.MemstatCollector{},
"netstat": &collectors.NetstatCollector{},
"ibstat": &collectors.InfinibandCollector{},
"lustrestat": &collectors.LustreCollector{},
2021-03-25 14:46:25 +01:00
}
2021-03-25 17:47:08 +01:00
var Sinks = map[string]sinks.SinkFuncs{
"influxdb": &sinks.InfluxSink{},
2021-03-26 17:03:46 +01:00
"stdout": &sinks.StdoutSink{},
"nats": &sinks.NatsSink{},
}
// Structure of the configuration file
2021-03-25 14:46:25 +01:00
type GlobalConfig struct {
Sink struct {
User string `json:"user"`
Password string `json:"password"`
Host string `json:"host"`
Port string `json:"port"`
Database string `json:"database"`
Type string `json:"type"`
2021-03-25 14:46:25 +01:00
} `json:"sink"`
Interval int `json:"interval"`
Duration int `json:"duration"`
Collectors []string `json:"collectors"`
}
// Load JSON configuration file
2021-03-25 14:46:25 +01:00
func LoadConfiguration(file string, config *GlobalConfig) error {
configFile, err := os.Open(file)
defer configFile.Close()
if err != nil {
fmt.Println(err.Error())
}
jsonParser := json.NewDecoder(configFile)
jsonParser.Decode(config)
return err
}
// Register an interrupt handler for Ctrl+C and similar. At signal,
// all collectors are closed
func shutdown(wg *sync.WaitGroup, config *GlobalConfig, sink sinks.SinkFuncs) {
2021-03-25 14:46:25 +01:00
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, os.Interrupt)
go func(wg *sync.WaitGroup) {
<-sigs
log.Print("Shutdown...")
for _, c := range config.Collectors {
col := Collectors[c]
log.Print("Stop ", col.Name())
col.Close()
}
time.Sleep(1 * time.Second)
sink.Close()
2021-03-25 14:46:25 +01:00
wg.Done()
}(wg)
}
func main() {
var config GlobalConfig
var wg sync.WaitGroup
wg.Add(1)
host, err := os.Hostname()
2021-03-25 17:47:08 +01:00
if err != nil {
log.Print(err)
return
}
2021-03-25 14:46:25 +01:00
// Load and check configuration
2021-03-25 14:46:25 +01:00
LoadConfiguration("config.json", &config)
2021-03-25 17:47:08 +01:00
if config.Interval <= 0 || time.Duration(config.Interval)*time.Second <= 0 {
log.Print("Configuration value 'interval' must be greater than zero")
return
2021-03-25 14:46:25 +01:00
}
if config.Duration <= 0 {
2021-03-25 17:47:08 +01:00
log.Print("Configuration value 'duration' must be greater than zero")
return
2021-03-25 14:46:25 +01:00
}
if len(config.Collectors) == 0 {
var keys []string
for k := range Collectors {
keys = append(keys, k)
}
log.Print("Configuration value 'collectors' does not contain any collector. Available: ", strings.Join(keys, ", "))
return
}
for _, name := range config.Collectors {
if _, found := Collectors[name]; !found {
log.Print("Invalid collector '", name, "' in configuration")
return
}
}
if _, found := Sinks[config.Sink.Type]; !found {
log.Print("Invalid sink type '", config.Sink.Type, "' in configuration")
return
}
// Setup sink
sink := Sinks[config.Sink.Type]
err = sink.Init(config.Sink.Host, config.Sink.Port, config.Sink.User, config.Sink.Password, config.Sink.Database)
if err != nil {
return
}
// Register interrupt handler
shutdown(&wg, &config, sink)
// Initialize all collectors
2021-03-25 14:46:25 +01:00
for _, c := range config.Collectors {
col := Collectors[c]
col.Init()
log.Print("Start ", col.Name())
}
// Setup up ticker loop
log.Print("Running loop every ", time.Duration(config.Interval)*time.Second)
2021-03-25 14:46:25 +01:00
ticker := time.NewTicker(time.Duration(config.Interval) * time.Second)
done := make(chan bool)
// Storage for all node metrics
nodeFields := make(map[string]interface{})
// Storage for all socket metrics
2021-03-25 14:46:25 +01:00
slist := collectors.SocketList()
socketsFields := make(map[int]map[string]interface{}, len(slist))
2021-03-25 17:47:08 +01:00
for _, s := range slist {
socketsFields[s] = make(map[string]interface{})
2021-03-25 17:47:08 +01:00
}
// Storage for all CPU metrics
2021-03-25 17:47:08 +01:00
clist := collectors.CpuList()
cpuFields := make(map[int]map[string]interface{}, len(clist))
2021-03-25 17:47:08 +01:00
for _, s := range clist {
cpuFields[s] = make(map[string]interface{})
2021-03-25 17:47:08 +01:00
}
2021-03-25 14:46:25 +01:00
go func() {
for {
select {
case <-done:
return
2021-03-25 17:47:08 +01:00
case t := <-ticker.C:
// Count how many socket and cpu metrics are returned
2021-03-25 17:47:08 +01:00
scount := 0
ccount := 0
// Read all collectors are sort the results in the right
// storage locations
2021-03-25 17:47:08 +01:00
for _, c := range config.Collectors {
col := Collectors[c]
col.Read(time.Duration(config.Duration))
2021-03-25 17:47:08 +01:00
for key, val := range col.GetNodeMetric() {
nodeFields[key] = val
2021-03-25 17:47:08 +01:00
}
for sid, socket := range col.GetSocketMetrics() {
for key, val := range socket {
socketsFields[sid][key] = val
2021-03-25 17:47:08 +01:00
scount++
}
}
for cid, cpu := range col.GetCpuMetrics() {
for key, val := range cpu {
cpuFields[cid][key] = val
2021-03-25 17:47:08 +01:00
ccount++
}
}
2021-03-25 14:46:25 +01:00
}
// Send out node metrics
sink.Write("node", map[string]string{"host": host}, nodeFields, t)
// Send out socket metrics (if any)
2021-03-25 17:47:08 +01:00
if scount > 0 {
for sid, socket := range socketsFields {
sink.Write("socket", map[string]string{"socket": fmt.Sprintf("%d", sid), "host": host}, socket, t)
2021-03-25 17:47:08 +01:00
}
}
// Send out CPU metrics (if any)
2021-03-25 17:47:08 +01:00
if ccount > 0 {
for cid, cpu := range cpuFields {
sink.Write("cpu", map[string]string{"cpu": fmt.Sprintf("%d", cid), "host": host}, cpu, t)
2021-03-25 17:47:08 +01:00
}
}
2021-03-25 14:46:25 +01:00
}
}
}()
// Wait until receiving an interrupt
2021-03-25 14:46:25 +01:00
wg.Wait()
}