2021-03-25 14:46:25 +01:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"fmt"
|
2021-03-26 13:08:44 +01:00
|
|
|
"github.com/ClusterCockpit/cc-metric-collector/collectors"
|
2021-03-26 16:48:09 +01:00
|
|
|
"github.com/ClusterCockpit/cc-metric-collector/sinks"
|
2021-03-25 14:46:25 +01:00
|
|
|
"log"
|
|
|
|
"os"
|
|
|
|
"os/signal"
|
2021-03-26 13:08:44 +01:00
|
|
|
"strings"
|
2021-03-25 14:46:25 +01:00
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// List of provided collectors. Which collector should be run can be
|
|
|
|
// configured at 'collectors' list in 'config.json'.
|
2021-03-25 14:46:25 +01:00
|
|
|
var Collectors = map[string]collectors.MetricGetter{
|
2021-03-25 17:47:08 +01:00
|
|
|
"likwid": &collectors.LikwidCollector{},
|
|
|
|
"loadavg": &collectors.LoadavgCollector{},
|
|
|
|
"memstat": &collectors.MemstatCollector{},
|
|
|
|
"netstat": &collectors.NetstatCollector{},
|
|
|
|
"ibstat": &collectors.InfinibandCollector{},
|
|
|
|
"lustrestat": &collectors.LustreCollector{},
|
2021-03-25 14:46:25 +01:00
|
|
|
}
|
2021-03-25 17:47:08 +01:00
|
|
|
|
2021-03-26 16:48:09 +01:00
|
|
|
var Sinks = map[string]sinks.SinkFuncs{
|
|
|
|
"influxdb": &sinks.InfluxSink{},
|
2021-03-26 17:03:46 +01:00
|
|
|
"stdout": &sinks.StdoutSink{},
|
2021-03-26 16:48:09 +01:00
|
|
|
}
|
2021-03-26 10:19:54 +01:00
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Structure of the configuration file
|
2021-03-25 14:46:25 +01:00
|
|
|
type GlobalConfig struct {
|
|
|
|
Sink struct {
|
|
|
|
User string `json:"user"`
|
|
|
|
Password string `json:"password"`
|
|
|
|
Host string `json:"host"`
|
|
|
|
Port string `json:"port"`
|
2021-03-26 16:48:09 +01:00
|
|
|
Database string `json:"database"`
|
|
|
|
Type string `json:"type"`
|
2021-03-25 14:46:25 +01:00
|
|
|
} `json:"sink"`
|
|
|
|
Interval int `json:"interval"`
|
|
|
|
Duration int `json:"duration"`
|
|
|
|
Collectors []string `json:"collectors"`
|
|
|
|
}
|
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Load JSON configuration file
|
2021-03-25 14:46:25 +01:00
|
|
|
func LoadConfiguration(file string, config *GlobalConfig) error {
|
|
|
|
configFile, err := os.Open(file)
|
|
|
|
defer configFile.Close()
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println(err.Error())
|
|
|
|
}
|
|
|
|
jsonParser := json.NewDecoder(configFile)
|
|
|
|
jsonParser.Decode(config)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Register an interrupt handler for Ctrl+C and similar. At signal,
|
|
|
|
// all collectors are closed
|
2021-03-26 16:48:09 +01:00
|
|
|
func shutdown(wg *sync.WaitGroup, config *GlobalConfig, sink sinks.SinkFuncs) {
|
2021-03-25 14:46:25 +01:00
|
|
|
sigs := make(chan os.Signal, 1)
|
|
|
|
signal.Notify(sigs, os.Interrupt)
|
|
|
|
|
|
|
|
go func(wg *sync.WaitGroup) {
|
|
|
|
<-sigs
|
|
|
|
log.Print("Shutdown...")
|
|
|
|
for _, c := range config.Collectors {
|
|
|
|
col := Collectors[c]
|
|
|
|
log.Print("Stop ", col.Name())
|
|
|
|
col.Close()
|
|
|
|
}
|
|
|
|
time.Sleep(1 * time.Second)
|
2021-03-26 16:48:09 +01:00
|
|
|
sink.Close()
|
2021-03-25 14:46:25 +01:00
|
|
|
wg.Done()
|
|
|
|
}(wg)
|
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
var config GlobalConfig
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
wg.Add(1)
|
|
|
|
host, err := os.Hostname()
|
2021-03-25 17:47:08 +01:00
|
|
|
if err != nil {
|
|
|
|
log.Print(err)
|
|
|
|
return
|
|
|
|
}
|
2021-03-25 14:46:25 +01:00
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Load and check configuration
|
2021-03-25 14:46:25 +01:00
|
|
|
LoadConfiguration("config.json", &config)
|
2021-03-25 17:47:08 +01:00
|
|
|
if config.Interval <= 0 || time.Duration(config.Interval)*time.Second <= 0 {
|
|
|
|
log.Print("Configuration value 'interval' must be greater than zero")
|
|
|
|
return
|
2021-03-25 14:46:25 +01:00
|
|
|
}
|
|
|
|
if config.Duration <= 0 {
|
2021-03-25 17:47:08 +01:00
|
|
|
log.Print("Configuration value 'duration' must be greater than zero")
|
|
|
|
return
|
2021-03-25 14:46:25 +01:00
|
|
|
}
|
2021-03-26 13:08:44 +01:00
|
|
|
if len(config.Collectors) == 0 {
|
|
|
|
var keys []string
|
|
|
|
for k := range Collectors {
|
|
|
|
keys = append(keys, k)
|
|
|
|
}
|
|
|
|
log.Print("Configuration value 'collectors' does not contain any collector. Available: ", strings.Join(keys, ", "))
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for _, name := range config.Collectors {
|
|
|
|
if _, found := Collectors[name]; !found {
|
|
|
|
log.Print("Invalid collector '", name, "' in configuration")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
2021-03-26 16:48:09 +01:00
|
|
|
if _, found := Sinks[config.Sink.Type]; !found {
|
|
|
|
log.Print("Invalid sink type '", config.Sink.Type, "' in configuration")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// Setup sink
|
|
|
|
sink := Sinks[config.Sink.Type]
|
|
|
|
err = sink.Init(config.Sink.Host, config.Sink.Port, config.Sink.User, config.Sink.Password, config.Sink.Database)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
2021-03-26 10:19:54 +01:00
|
|
|
|
2021-03-26 16:48:09 +01:00
|
|
|
// Register interrupt handler
|
|
|
|
shutdown(&wg, &config, sink)
|
2021-03-26 10:19:54 +01:00
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Initialize all collectors
|
2021-03-25 14:46:25 +01:00
|
|
|
for _, c := range config.Collectors {
|
|
|
|
col := Collectors[c]
|
|
|
|
col.Init()
|
|
|
|
log.Print("Start ", col.Name())
|
|
|
|
}
|
2021-03-26 10:19:54 +01:00
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Setup up ticker loop
|
|
|
|
log.Print("Running loop every ", time.Duration(config.Interval)*time.Second)
|
2021-03-25 14:46:25 +01:00
|
|
|
ticker := time.NewTicker(time.Duration(config.Interval) * time.Second)
|
|
|
|
done := make(chan bool)
|
2021-03-26 10:19:54 +01:00
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Storage for all node metrics
|
2021-03-26 10:19:54 +01:00
|
|
|
nodeFields := make(map[string]interface{})
|
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Storage for all socket metrics
|
2021-03-25 14:46:25 +01:00
|
|
|
slist := collectors.SocketList()
|
2021-03-26 10:19:54 +01:00
|
|
|
socketsFields := make(map[int]map[string]interface{}, len(slist))
|
2021-03-25 17:47:08 +01:00
|
|
|
for _, s := range slist {
|
2021-03-26 10:19:54 +01:00
|
|
|
socketsFields[s] = make(map[string]interface{})
|
2021-03-25 17:47:08 +01:00
|
|
|
}
|
2021-03-26 10:19:54 +01:00
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Storage for all CPU metrics
|
2021-03-25 17:47:08 +01:00
|
|
|
clist := collectors.CpuList()
|
2021-03-26 10:19:54 +01:00
|
|
|
cpuFields := make(map[int]map[string]interface{}, len(clist))
|
2021-03-25 17:47:08 +01:00
|
|
|
for _, s := range clist {
|
2021-03-26 10:19:54 +01:00
|
|
|
cpuFields[s] = make(map[string]interface{})
|
2021-03-25 17:47:08 +01:00
|
|
|
}
|
2021-03-25 14:46:25 +01:00
|
|
|
|
|
|
|
go func() {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-done:
|
|
|
|
return
|
2021-03-25 17:47:08 +01:00
|
|
|
case t := <-ticker.C:
|
2021-03-26 13:08:44 +01:00
|
|
|
// Count how many socket and cpu metrics are returned
|
2021-03-25 17:47:08 +01:00
|
|
|
scount := 0
|
|
|
|
ccount := 0
|
2021-03-26 10:19:54 +01:00
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Read all collectors are sort the results in the right
|
|
|
|
// storage locations
|
2021-03-25 17:47:08 +01:00
|
|
|
for _, c := range config.Collectors {
|
|
|
|
col := Collectors[c]
|
|
|
|
col.Read(time.Duration(config.Duration))
|
2021-03-26 10:19:54 +01:00
|
|
|
|
2021-03-25 17:47:08 +01:00
|
|
|
for key, val := range col.GetNodeMetric() {
|
2021-03-26 10:19:54 +01:00
|
|
|
nodeFields[key] = val
|
2021-03-25 17:47:08 +01:00
|
|
|
}
|
|
|
|
for sid, socket := range col.GetSocketMetrics() {
|
|
|
|
for key, val := range socket {
|
2021-03-26 10:19:54 +01:00
|
|
|
socketsFields[sid][key] = val
|
2021-03-25 17:47:08 +01:00
|
|
|
scount++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for cid, cpu := range col.GetCpuMetrics() {
|
|
|
|
for key, val := range cpu {
|
2021-03-26 10:19:54 +01:00
|
|
|
cpuFields[cid][key] = val
|
2021-03-25 17:47:08 +01:00
|
|
|
ccount++
|
|
|
|
}
|
|
|
|
}
|
2021-03-25 14:46:25 +01:00
|
|
|
}
|
2021-03-26 16:48:09 +01:00
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Send out node metrics
|
2021-03-26 16:48:09 +01:00
|
|
|
sink.Write("node", map[string]string{"host": host}, nodeFields, t)
|
2021-03-26 10:19:54 +01:00
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Send out socket metrics (if any)
|
2021-03-25 17:47:08 +01:00
|
|
|
if scount > 0 {
|
2021-03-26 10:19:54 +01:00
|
|
|
for sid, socket := range socketsFields {
|
2021-03-26 16:48:09 +01:00
|
|
|
sink.Write("socket", map[string]string{"socket": fmt.Sprintf("%d", sid), "host": host}, socket, t)
|
2021-03-25 17:47:08 +01:00
|
|
|
}
|
|
|
|
}
|
2021-03-26 10:19:54 +01:00
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Send out CPU metrics (if any)
|
2021-03-25 17:47:08 +01:00
|
|
|
if ccount > 0 {
|
2021-03-26 10:19:54 +01:00
|
|
|
for cid, cpu := range cpuFields {
|
2021-03-26 16:48:09 +01:00
|
|
|
sink.Write("cpu", map[string]string{"cpu": fmt.Sprintf("%d", cid), "host": host}, cpu, t)
|
2021-03-25 17:47:08 +01:00
|
|
|
}
|
|
|
|
}
|
2021-03-25 14:46:25 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2021-03-26 13:08:44 +01:00
|
|
|
// Wait until receiving an interrupt
|
2021-03-25 14:46:25 +01:00
|
|
|
wg.Wait()
|
|
|
|
}
|