cc-metric-collector/metric-collector.go

268 lines
6.7 KiB
Go

package main
import (
"encoding/json"
"flag"
"fmt"
"github.com/ClusterCockpit/cc-metric-collector/collectors"
"github.com/ClusterCockpit/cc-metric-collector/sinks"
"log"
"os"
"os/signal"
"strings"
"sync"
"time"
)
// List of provided collectors. Which collector should be run can be
// configured at 'collectors' list in 'config.json'.
var Collectors = map[string]collectors.MetricGetter{
"likwid": &collectors.LikwidCollector{},
"loadavg": &collectors.LoadavgCollector{},
"memstat": &collectors.MemstatCollector{},
"netstat": &collectors.NetstatCollector{},
"ibstat": &collectors.InfinibandCollector{},
"lustrestat": &collectors.LustreCollector{},
"cpustat": &collectors.CpustatCollector{},
"topprocs": &collectors.TopProcsCollector{},
"nvidia": &collectors.NvidiaCollector{},
}
var Sinks = map[string]sinks.SinkFuncs{
"influxdb": &sinks.InfluxSink{},
"stdout": &sinks.StdoutSink{},
"nats": &sinks.NatsSink{},
}
// Structure of the configuration file
type GlobalConfig struct {
Sink struct {
User string `json:"user"`
Password string `json:"password"`
Host string `json:"host"`
Port string `json:"port"`
Database string `json:"database"`
Type string `json:"type"`
} `json:"sink"`
Interval int `json:"interval"`
Duration int `json:"duration"`
Collectors []string `json:"collectors"`
}
// Load JSON configuration file
func LoadConfiguration(file string, config *GlobalConfig) error {
configFile, err := os.Open(file)
defer configFile.Close()
if err != nil {
fmt.Println(err.Error())
return err
}
jsonParser := json.NewDecoder(configFile)
jsonParser.Decode(config)
return err
}
func ReadCli() map[string]string {
var m map[string]string
cfg := flag.String("config", "./config.json", "Path to configuration file")
logfile := flag.String("log", "stderr", "Path for logfile")
flag.Parse()
m = make(map[string]string)
m["configfile"] = *cfg
m["logfile"] = *logfile
return m
}
func SetLogging(logfile string) error {
var file *os.File
var err error
if logfile != "stderr" {
file, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600)
if err != nil {
log.Fatal(err)
return err
}
} else {
file = os.Stderr
}
log.SetOutput(file)
return nil
}
// Register an interrupt handler for Ctrl+C and similar. At signal,
// all collectors are closed
func shutdown(wg *sync.WaitGroup, config *GlobalConfig, sink sinks.SinkFuncs) {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, os.Interrupt)
go func(wg *sync.WaitGroup) {
<-sigs
log.Print("Shutdown...")
for _, c := range config.Collectors {
col := Collectors[c]
log.Print("Stop ", col.Name())
col.Close()
}
time.Sleep(1 * time.Second)
sink.Close()
wg.Done()
}(wg)
}
func main() {
var config GlobalConfig
var wg sync.WaitGroup
wg.Add(1)
host, err := os.Hostname()
if err != nil {
log.Print(err)
return
}
clicfg := ReadCli()
err = SetLogging(clicfg["logfile"])
if err != nil {
log.Print("Error setting up logging system to ", clicfg["logfile"])
return
}
// Load and check configuration
err = LoadConfiguration(clicfg["configfile"], &config)
if err != nil {
log.Print("Error reading configuration file ", clicfg["configfile"])
return
}
if config.Interval <= 0 || time.Duration(config.Interval)*time.Second <= 0 {
log.Print("Configuration value 'interval' must be greater than zero")
return
}
if config.Duration <= 0 {
log.Print("Configuration value 'duration' must be greater than zero")
return
}
if len(config.Collectors) == 0 {
var keys []string
for k := range Collectors {
keys = append(keys, k)
}
log.Print("Configuration value 'collectors' does not contain any collector. Available: ", strings.Join(keys, ", "))
return
}
for _, name := range config.Collectors {
if _, found := Collectors[name]; !found {
log.Print("Invalid collector '", name, "' in configuration")
return
}
}
if _, found := Sinks[config.Sink.Type]; !found {
log.Print("Invalid sink type '", config.Sink.Type, "' in configuration")
return
}
// Setup sink
sink := Sinks[config.Sink.Type]
err = sink.Init(config.Sink.Host, config.Sink.Port, config.Sink.User, config.Sink.Password, config.Sink.Database)
if err != nil {
return
}
// Register interrupt handler
shutdown(&wg, &config, sink)
// Initialize all collectors
tmp := make([]string, 0)
for _, c := range config.Collectors {
col := Collectors[c]
err = col.Init()
if err != nil {
log.Print("SKIP ", col.Name())
} else {
log.Print("Start ", col.Name())
tmp = append(tmp, c)
}
}
config.Collectors = tmp
// Setup up ticker loop
log.Print("Running loop every ", time.Duration(config.Interval)*time.Second)
ticker := time.NewTicker(time.Duration(config.Interval) * time.Second)
done := make(chan bool)
// Storage for all node metrics
nodeFields := make(map[string]interface{})
// Storage for all socket metrics
slist := collectors.SocketList()
socketsFields := make(map[int]map[string]interface{}, len(slist))
for _, s := range slist {
socketsFields[s] = make(map[string]interface{})
}
// Storage for all CPU metrics
clist := collectors.CpuList()
cpuFields := make(map[int]map[string]interface{}, len(clist))
for _, s := range clist {
cpuFields[s] = make(map[string]interface{})
}
go func() {
for {
select {
case <-done:
return
case t := <-ticker.C:
// Count how many socket and cpu metrics are returned
scount := 0
ccount := 0
// Read all collectors are sort the results in the right
// storage locations
for _, c := range config.Collectors {
col := Collectors[c]
col.Read(time.Duration(config.Duration))
for key, val := range col.GetNodeMetric() {
nodeFields[key] = val
}
for sid, socket := range col.GetSocketMetrics() {
for key, val := range socket {
socketsFields[sid][key] = val
scount++
}
}
for cid, cpu := range col.GetCpuMetrics() {
for key, val := range cpu {
cpuFields[cid][key] = val
ccount++
}
}
}
// Send out node metrics
if len(nodeFields) > 0 {
sink.Write("node", map[string]string{"host": host}, nodeFields, t)
}
// Send out socket metrics (if any)
if scount > 0 {
for sid, socket := range socketsFields {
if len(socket) > 0 {
sink.Write("socket", map[string]string{"socket": fmt.Sprintf("%d", sid), "host": host}, socket, t)
}
}
}
// Send out CPU metrics (if any)
if ccount > 0 {
for cid, cpu := range cpuFields {
if len(cpu) > 0 {
sink.Write("cpu", map[string]string{"cpu": fmt.Sprintf("%d", cid), "host": host}, cpu, t)
}
}
}
}
}
}()
// Wait until receiving an interrupt
wg.Wait()
}