mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-08-14 14:52:58 +02:00
Use channels, add a metric router, split up configuration and use extended version of Influx line protocol internally
This commit is contained in:
138
collectors/collectorManager.go
Normal file
138
collectors/collectorManager.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package collectors
|
||||
|
||||
import (
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"sync"
|
||||
"time"
|
||||
"log"
|
||||
"os"
|
||||
"encoding/json"
|
||||
)
|
||||
|
||||
|
||||
var AvailableCollectors = map[string]MetricCollector{
|
||||
"likwid": &LikwidCollector{},
|
||||
"loadavg": &LoadavgCollector{},
|
||||
"memstat": &MemstatCollector{},
|
||||
"netstat": &NetstatCollector{},
|
||||
"ibstat": &InfinibandCollector{},
|
||||
"lustrestat": &LustreCollector{},
|
||||
"cpustat": &CpustatCollector{},
|
||||
"topprocs": &TopProcsCollector{},
|
||||
"nvidia": &NvidiaCollector{},
|
||||
"customcmd": &CustomCmdCollector{},
|
||||
"diskstat": &DiskstatCollector{},
|
||||
"tempstat": &TempCollector{},
|
||||
"ipmistat": &IpmiCollector{},
|
||||
}
|
||||
|
||||
|
||||
type collectorManager struct {
|
||||
collectors []MetricCollector
|
||||
output chan lp.CCMetric
|
||||
done chan bool
|
||||
interval time.Duration
|
||||
duration time.Duration
|
||||
wg *sync.WaitGroup
|
||||
config map[string]json.RawMessage
|
||||
}
|
||||
|
||||
type CollectorManager interface {
|
||||
Init(interval time.Duration, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) error
|
||||
AddOutput(output chan lp.CCMetric)
|
||||
Start()
|
||||
Close()
|
||||
}
|
||||
|
||||
|
||||
func (cm *collectorManager) Init(interval time.Duration, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) error {
|
||||
cm.collectors = make([]MetricCollector, 0)
|
||||
cm.output = nil
|
||||
cm.done = make(chan bool)
|
||||
cm.wg = wg
|
||||
cm.interval = interval
|
||||
cm.duration = duration
|
||||
configFile, err := os.Open(collectConfigFile)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
defer configFile.Close()
|
||||
jsonParser := json.NewDecoder(configFile)
|
||||
err = jsonParser.Decode(&cm.config)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
return err
|
||||
}
|
||||
for k, cfg := range cm.config {
|
||||
log.Print(k, " ", cfg)
|
||||
if _, found := AvailableCollectors[k]; !found {
|
||||
log.Print("[CollectorManager] SKIP unknown collector ", k)
|
||||
continue
|
||||
}
|
||||
c := AvailableCollectors[k]
|
||||
|
||||
err = c.Init(cfg)
|
||||
if err != nil {
|
||||
log.Print("[CollectorManager] Collector ", k, "initialization failed: ", err.Error())
|
||||
continue
|
||||
}
|
||||
cm.collectors = append(cm.collectors, c)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *collectorManager) Start() {
|
||||
cm.wg.Add(1)
|
||||
ticker := time.NewTicker(cm.interval)
|
||||
go func() {
|
||||
for {
|
||||
CollectorManagerLoop:
|
||||
select {
|
||||
case <- cm.done:
|
||||
for _, c := range cm.collectors {
|
||||
c.Close()
|
||||
}
|
||||
cm.wg.Done()
|
||||
log.Print("[CollectorManager] DONE\n")
|
||||
break CollectorManagerLoop
|
||||
case t := <-ticker.C:
|
||||
for _, c := range cm.collectors {
|
||||
CollectorManagerInputLoop:
|
||||
select {
|
||||
case <- cm.done:
|
||||
for _, c := range cm.collectors {
|
||||
c.Close()
|
||||
}
|
||||
cm.wg.Done()
|
||||
log.Print("[CollectorManager] DONE\n")
|
||||
break CollectorManagerInputLoop
|
||||
default:
|
||||
log.Print("[CollectorManager] ", c.Name(), " ", t)
|
||||
c.Read(cm.duration, cm.output)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Print("[CollectorManager] EXIT\n")
|
||||
}()
|
||||
log.Print("[CollectorManager] STARTED\n")
|
||||
}
|
||||
|
||||
func (cm *collectorManager) AddOutput(output chan lp.CCMetric) {
|
||||
cm.output = output
|
||||
}
|
||||
|
||||
func (cm *collectorManager) Close() {
|
||||
cm.done <- true
|
||||
log.Print("[CollectorManager] CLOSE")
|
||||
}
|
||||
|
||||
func New(interval time.Duration, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) (CollectorManager, error) {
|
||||
cm := &collectorManager{}
|
||||
err := cm.Init(interval, duration, wg, collectConfigFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cm, err
|
||||
}
|
@@ -3,7 +3,7 @@ package collectors
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"io/ioutil"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -17,13 +17,14 @@ type CpustatCollectorConfig struct {
|
||||
}
|
||||
|
||||
type CpustatCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
config CpustatCollectorConfig
|
||||
}
|
||||
|
||||
func (m *CpustatCollector) Init(config []byte) error {
|
||||
func (m *CpustatCollector) Init(config json.RawMessage) error {
|
||||
m.name = "CpustatCollector"
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "CPU"}
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -34,7 +35,7 @@ func (m *CpustatCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func ParseStatLine(line string, cpu int, exclude []string, out *[]lp.MutableMetric) {
|
||||
func (c *CpustatCollector) parseStatLine(line string, cpu int, exclude []string, output chan lp.CCMetric) {
|
||||
ls := strings.Fields(line)
|
||||
matches := []string{"", "cpu_user", "cpu_nice", "cpu_system", "cpu_idle", "cpu_iowait", "cpu_irq", "cpu_softirq", "cpu_steal", "cpu_guest", "cpu_guest_nice"}
|
||||
for _, ex := range exclude {
|
||||
@@ -51,16 +52,16 @@ func ParseStatLine(line string, cpu int, exclude []string, out *[]lp.MutableMetr
|
||||
if len(m) > 0 {
|
||||
x, err := strconv.ParseInt(ls[i], 0, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New(m, tags, map[string]interface{}{"value": int(x)}, time.Now())
|
||||
y, err := lp.New(m, tags, c.meta, map[string]interface{}{"value": int(x)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *CpustatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -77,11 +78,11 @@ func (m *CpustatCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
}
|
||||
ls := strings.Fields(line)
|
||||
if strings.Compare(ls[0], "cpu") == 0 {
|
||||
ParseStatLine(line, -1, m.config.ExcludeMetrics, out)
|
||||
m.parseStatLine(line, -1, m.config.ExcludeMetrics, output)
|
||||
} else if strings.HasPrefix(ls[0], "cpu") {
|
||||
cpustr := strings.TrimLeft(ls[0], "cpu")
|
||||
cpu, _ := strconv.Atoi(cpustr)
|
||||
ParseStatLine(line, cpu, m.config.ExcludeMetrics, out)
|
||||
m.parseStatLine(line, cpu, m.config.ExcludeMetrics, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -3,7 +3,8 @@ package collectors
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
influx "github.com/influxdata/line-protocol"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os/exec"
|
||||
@@ -20,17 +21,18 @@ type CustomCmdCollectorConfig struct {
|
||||
}
|
||||
|
||||
type CustomCmdCollector struct {
|
||||
MetricCollector
|
||||
handler *lp.MetricHandler
|
||||
parser *lp.Parser
|
||||
metricCollector
|
||||
handler *influx.MetricHandler
|
||||
parser *influx.Parser
|
||||
config CustomCmdCollectorConfig
|
||||
commands []string
|
||||
files []string
|
||||
}
|
||||
|
||||
func (m *CustomCmdCollector) Init(config []byte) error {
|
||||
func (m *CustomCmdCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "CustomCmdCollector"
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "Custom"}
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -60,8 +62,8 @@ func (m *CustomCmdCollector) Init(config []byte) error {
|
||||
if len(m.files) == 0 && len(m.commands) == 0 {
|
||||
return errors.New("No metrics to collect")
|
||||
}
|
||||
m.handler = lp.NewMetricHandler()
|
||||
m.parser = lp.NewParser(m.handler)
|
||||
m.handler = influx.NewMetricHandler()
|
||||
m.parser = influx.NewParser(m.handler)
|
||||
m.parser.SetTimeFunc(DefaultTime)
|
||||
m.init = true
|
||||
return nil
|
||||
@@ -71,7 +73,7 @@ var DefaultTime = func() time.Time {
|
||||
return time.Unix(42, 0)
|
||||
}
|
||||
|
||||
func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -94,9 +96,9 @@ func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetri
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(c.Name(), Tags2Map(c), Fields2Map(c), c.Time())
|
||||
y, err := lp.New(c.Name(), Tags2Map(c), m.meta, Fields2Map(c), c.Time())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -116,9 +118,9 @@ func (m *CustomCmdCollector) Read(interval time.Duration, out *[]lp.MutableMetri
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(f.Name(), Tags2Map(f), Fields2Map(f), f.Time())
|
||||
y, err := lp.New(f.Name(), Tags2Map(f), m.meta, Fields2Map(f), f.Time())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
package collectors
|
||||
|
||||
import (
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"io/ioutil"
|
||||
// "log"
|
||||
"encoding/json"
|
||||
@@ -19,14 +19,15 @@ type DiskstatCollectorConfig struct {
|
||||
}
|
||||
|
||||
type DiskstatCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
matches map[int]string
|
||||
config DiskstatCollectorConfig
|
||||
}
|
||||
|
||||
func (m *DiskstatCollector) Init(config []byte) error {
|
||||
func (m *DiskstatCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "DiskstatCollector"
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "Disk"}
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
@@ -71,7 +72,7 @@ func (m *DiskstatCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *DiskstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
var lines []string
|
||||
if !m.init {
|
||||
return
|
||||
@@ -99,9 +100,9 @@ func (m *DiskstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric
|
||||
if idx < len(f) {
|
||||
x, err := strconv.ParseInt(f[idx], 0, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New(name, tags, map[string]interface{}{"value": int(x)}, time.Now())
|
||||
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": int(x)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -2,7 +2,7 @@ package collectors
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os/exec"
|
||||
@@ -25,7 +25,7 @@ type InfinibandCollectorConfig struct {
|
||||
}
|
||||
|
||||
type InfinibandCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
lids map[string]map[string]string
|
||||
config InfinibandCollectorConfig
|
||||
@@ -49,11 +49,12 @@ func (m *InfinibandCollector) Help() {
|
||||
fmt.Println("- ib_xmit")
|
||||
}
|
||||
|
||||
func (m *InfinibandCollector) Init(config []byte) error {
|
||||
func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "InfinibandCollector"
|
||||
m.use_perfquery = false
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "Network"}
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
@@ -110,7 +111,7 @@ func (m *InfinibandCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[string]string, out *[]lp.MutableMetric) error {
|
||||
func (m *InfinibandCollector) doPerfQuery(cmd string, dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error {
|
||||
|
||||
args := fmt.Sprintf("-r %s %s 0xf000", lid, port)
|
||||
command := exec.Command(cmd, args)
|
||||
@@ -127,9 +128,9 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin
|
||||
lv := strings.Fields(line)
|
||||
v, err := strconv.ParseFloat(lv[1], 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_recv", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -137,9 +138,9 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin
|
||||
lv := strings.Fields(line)
|
||||
v, err := strconv.ParseFloat(lv[1], 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_xmit", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -147,16 +148,16 @@ func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[strin
|
||||
return nil
|
||||
}
|
||||
|
||||
func DoSysfsRead(dev string, lid string, port string, tags map[string]string, out *[]lp.MutableMetric) error {
|
||||
func (m *InfinibandCollector) doSysfsRead(dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error {
|
||||
path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IBBASEPATH), dev, port)
|
||||
buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path))
|
||||
if err == nil {
|
||||
data := strings.Replace(string(buffer), "\n", "", -1)
|
||||
v, err := strconv.ParseFloat(data, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_recv", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -165,71 +166,29 @@ func DoSysfsRead(dev string, lid string, port string, tags map[string]string, ou
|
||||
data := strings.Replace(string(buffer), "\n", "", -1)
|
||||
v, err := strconv.ParseFloat(data, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New("ib_xmit", tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *InfinibandCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
|
||||
if m.init {
|
||||
for dev, ports := range m.lids {
|
||||
for port, lid := range ports {
|
||||
tags := map[string]string{"type": "node", "device": dev, "port": port}
|
||||
if m.use_perfquery {
|
||||
DoPerfQuery(m.config.PerfQueryPath, dev, lid, port, tags, out)
|
||||
m.doPerfQuery(m.config.PerfQueryPath, dev, lid, port, tags, output)
|
||||
} else {
|
||||
DoSysfsRead(dev, lid, port, tags, out)
|
||||
m.doSysfsRead(dev, lid, port, tags, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// buffer, err := ioutil.ReadFile(string(LIDFILE))
|
||||
|
||||
// if err != nil {
|
||||
// log.Print(err)
|
||||
// return
|
||||
// }
|
||||
|
||||
// args := fmt.Sprintf("-r %s 1 0xf000", string(buffer))
|
||||
|
||||
// command := exec.Command(PERFQUERY, args)
|
||||
// command.Wait()
|
||||
// stdout, err := command.Output()
|
||||
// if err != nil {
|
||||
// log.Print(err)
|
||||
// return
|
||||
// }
|
||||
|
||||
// ll := strings.Split(string(stdout), "\n")
|
||||
|
||||
// for _, line := range ll {
|
||||
// if strings.HasPrefix(line, "PortRcvData") || strings.HasPrefix(line, "RcvData") {
|
||||
// lv := strings.Fields(line)
|
||||
// v, err := strconv.ParseFloat(lv[1], 64)
|
||||
// if err == nil {
|
||||
// y, err := lp.New("ib_recv", m.tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
// if err == nil {
|
||||
// *out = append(*out, y)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// if strings.HasPrefix(line, "PortXmitData") || strings.HasPrefix(line, "XmtData") {
|
||||
// lv := strings.Fields(line)
|
||||
// v, err := strconv.ParseFloat(lv[1], 64)
|
||||
// if err == nil {
|
||||
// y, err := lp.New("ib_xmit", m.tags, map[string]interface{}{"value": float64(v)}, time.Now())
|
||||
// if err == nil {
|
||||
// *out = append(*out, y)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
func (m *InfinibandCollector) Close() {
|
||||
|
@@ -3,7 +3,7 @@ package collectors
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
@@ -22,15 +22,16 @@ type IpmiCollectorConfig struct {
|
||||
}
|
||||
|
||||
type IpmiCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
matches map[string]string
|
||||
config IpmiCollectorConfig
|
||||
}
|
||||
|
||||
func (m *IpmiCollector) Init(config []byte) error {
|
||||
func (m *IpmiCollector) Init(config json.RawMessage) error {
|
||||
m.name = "IpmiCollector"
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "IPMI"}
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -52,7 +53,7 @@ func (m *IpmiCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func ReadIpmiTool(cmd string, out *[]lp.MutableMetric) {
|
||||
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
|
||||
command := exec.Command(cmd, "sensor")
|
||||
command.Wait()
|
||||
stdout, err := command.Output()
|
||||
@@ -73,24 +74,25 @@ func ReadIpmiTool(cmd string, out *[]lp.MutableMetric) {
|
||||
name := strings.ToLower(strings.Replace(strings.Trim(lv[0], " "), " ", "_", -1))
|
||||
unit := strings.Trim(lv[2], " ")
|
||||
if unit == "Volts" {
|
||||
unit = "V"
|
||||
unit = "Volts"
|
||||
} else if unit == "degrees C" {
|
||||
unit = "C"
|
||||
unit = "degC"
|
||||
} else if unit == "degrees F" {
|
||||
unit = "F"
|
||||
unit = "degF"
|
||||
} else if unit == "Watts" {
|
||||
unit = "W"
|
||||
unit = "Watts"
|
||||
}
|
||||
|
||||
y, err := lp.New(name, map[string]string{"unit": unit, "type": "node"}, map[string]interface{}{"value": v}, time.Now())
|
||||
y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
y.AddMeta("unit", unit)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ReadIpmiSensors(cmd string, out *[]lp.MutableMetric) {
|
||||
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
|
||||
|
||||
command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate")
|
||||
command.Wait()
|
||||
@@ -108,25 +110,28 @@ func ReadIpmiSensors(cmd string, out *[]lp.MutableMetric) {
|
||||
v, err := strconv.ParseFloat(lv[3], 64)
|
||||
if err == nil {
|
||||
name := strings.ToLower(strings.Replace(lv[1], " ", "_", -1))
|
||||
y, err := lp.New(name, map[string]string{"unit": lv[4], "type": "node"}, map[string]interface{}{"value": v}, time.Now())
|
||||
y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
if len(lv) > 4 {
|
||||
y.AddMeta("unit", lv[4])
|
||||
}
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *IpmiCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if len(m.config.IpmitoolPath) > 0 {
|
||||
_, err := os.Stat(m.config.IpmitoolPath)
|
||||
if err == nil {
|
||||
ReadIpmiTool(m.config.IpmitoolPath, out)
|
||||
m.readIpmiTool(m.config.IpmitoolPath, output)
|
||||
}
|
||||
} else if len(m.config.IpmisensorsPath) > 0 {
|
||||
_, err := os.Stat(m.config.IpmisensorsPath)
|
||||
if err == nil {
|
||||
ReadIpmiSensors(m.config.IpmisensorsPath, out)
|
||||
m.readIpmiSensors(m.config.IpmisensorsPath, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -12,7 +12,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"gopkg.in/Knetic/govaluate.v2"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
@@ -24,10 +24,23 @@ import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type MetricScope int
|
||||
|
||||
const (
|
||||
METRIC_SCOPE_HWTHREAD = iota
|
||||
METRIC_SCOPE_SOCKET
|
||||
METRIC_SCOPE_NUMA
|
||||
METRIC_SCOPE_NODE
|
||||
)
|
||||
|
||||
func (ms MetricScope) String() string {
|
||||
return []string{"Head", "Shoulder", "Knee", "Toe"}[ms]
|
||||
}
|
||||
|
||||
type LikwidCollectorMetricConfig struct {
|
||||
Name string `json:"name"`
|
||||
Calc string `json:"calc"`
|
||||
Socket_scope bool `json:"socket_scope"`
|
||||
Scope MetricScope `json:"socket_scope"`
|
||||
Publish bool `json:"publish"`
|
||||
}
|
||||
|
||||
@@ -44,7 +57,7 @@ type LikwidCollectorConfig struct {
|
||||
}
|
||||
|
||||
type LikwidCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
cpulist []C.int
|
||||
sock2tid map[int]int
|
||||
metrics map[C.int]map[string]int
|
||||
@@ -104,7 +117,7 @@ func getSocketCpus() map[C.int]int {
|
||||
return outmap
|
||||
}
|
||||
|
||||
func (m *LikwidCollector) Init(config []byte) error {
|
||||
func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||
var ret C.int
|
||||
m.name = "LikwidCollector"
|
||||
if len(config) > 0 {
|
||||
@@ -114,11 +127,13 @@ func (m *LikwidCollector) Init(config []byte) error {
|
||||
}
|
||||
}
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "PerfCounter"}
|
||||
cpulist := CpuList()
|
||||
m.cpulist = make([]C.int, len(cpulist))
|
||||
slist := getSocketCpus()
|
||||
|
||||
m.sock2tid = make(map[int]int)
|
||||
// m.numa2tid = make(map[int]int)
|
||||
for i, c := range cpulist {
|
||||
m.cpulist[i] = C.int(c)
|
||||
if sid, found := slist[m.cpulist[i]]; found {
|
||||
@@ -168,7 +183,7 @@ func (m *LikwidCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -245,24 +260,28 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
for _, metric := range evset.Metrics {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, metric.Name)
|
||||
if metric.Publish && !skip {
|
||||
if metric.Socket_scope {
|
||||
if metric.Scope.String() == "socket" {
|
||||
for sid, tid := range m.sock2tid {
|
||||
y, err := lp.New(metric.Name,
|
||||
map[string]string{"type": "socket", "type-id": fmt.Sprintf("%d", int(sid))},
|
||||
map[string]string{"type": "socket",
|
||||
"type-id": fmt.Sprintf("%d", int(sid))},
|
||||
m.meta,
|
||||
map[string]interface{}{"value": m.mresults[i][tid][metric.Name]},
|
||||
time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
} else {
|
||||
} else if metric.Scope.String() == "hwthread" {
|
||||
for tid, cpu := range m.cpulist {
|
||||
y, err := lp.New(metric.Name,
|
||||
map[string]string{"type": "cpu", "type-id": fmt.Sprintf("%d", int(cpu))},
|
||||
map[string]string{"type": "cpu",
|
||||
"type-id": fmt.Sprintf("%d", int(cpu))},
|
||||
m.meta,
|
||||
map[string]interface{}{"value": m.mresults[i][tid][metric.Name]},
|
||||
time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -272,24 +291,28 @@ func (m *LikwidCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
for _, metric := range m.config.Metrics {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, metric.Name)
|
||||
if metric.Publish && !skip {
|
||||
if metric.Socket_scope {
|
||||
if metric.Scope.String() == "socket" {
|
||||
for sid, tid := range m.sock2tid {
|
||||
y, err := lp.New(metric.Name,
|
||||
map[string]string{"type": "socket", "type-id": fmt.Sprintf("%d", int(sid))},
|
||||
map[string]string{"type": "socket",
|
||||
"type-id": fmt.Sprintf("%d", int(sid))},
|
||||
m.meta,
|
||||
map[string]interface{}{"value": m.gmresults[tid][metric.Name]},
|
||||
time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for tid, cpu := range m.cpulist {
|
||||
y, err := lp.New(metric.Name,
|
||||
map[string]string{"type": "cpu", "type-id": fmt.Sprintf("%d", int(cpu))},
|
||||
map[string]string{"type": "cpu",
|
||||
"type-id": fmt.Sprintf("%d", int(cpu))},
|
||||
m.meta,
|
||||
map[string]interface{}{"value": m.gmresults[tid][metric.Name]},
|
||||
time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -2,7 +2,7 @@ package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"io/ioutil"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -16,14 +16,14 @@ type LoadavgCollectorConfig struct {
|
||||
}
|
||||
|
||||
type LoadavgCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
load_matches []string
|
||||
proc_matches []string
|
||||
config LoadavgCollectorConfig
|
||||
}
|
||||
|
||||
func (m *LoadavgCollector) Init(config []byte) error {
|
||||
func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
||||
m.name = "LoadavgCollector"
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
@@ -32,6 +32,7 @@ func (m *LoadavgCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "LOAD"}
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
m.load_matches = []string{"load_one", "load_five", "load_fifteen"}
|
||||
m.proc_matches = []string{"proc_run", "proc_total"}
|
||||
@@ -39,7 +40,7 @@ func (m *LoadavgCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
var skip bool
|
||||
if !m.init {
|
||||
return
|
||||
@@ -55,9 +56,9 @@ func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
x, err := strconv.ParseFloat(ls[i], 64)
|
||||
if err == nil {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||
y, err := lp.New(name, m.tags, map[string]interface{}{"value": float64(x)}, time.Now())
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": float64(x)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -66,9 +67,9 @@ func (m *LoadavgCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
x, err := strconv.ParseFloat(lv[i], 64)
|
||||
if err == nil {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, name)
|
||||
y, err := lp.New(name, m.tags, map[string]interface{}{"value": float64(x)}, time.Now())
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": float64(x)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -3,7 +3,7 @@ package collectors
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"strconv"
|
||||
@@ -19,14 +19,14 @@ type LustreCollectorConfig struct {
|
||||
}
|
||||
|
||||
type LustreCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
matches map[string]map[string]int
|
||||
devices []string
|
||||
config LustreCollectorConfig
|
||||
}
|
||||
|
||||
func (m *LustreCollector) Init(config []byte) error {
|
||||
func (m *LustreCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "LustreCollector"
|
||||
if len(config) > 0 {
|
||||
@@ -37,6 +37,7 @@ func (m *LustreCollector) Init(config []byte) error {
|
||||
}
|
||||
m.setup()
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "Lustre"}
|
||||
m.matches = map[string]map[string]int{"read_bytes": {"read_bytes": 6, "read_requests": 1},
|
||||
"write_bytes": {"write_bytes": 6, "write_requests": 1},
|
||||
"open": {"open": 1},
|
||||
@@ -63,7 +64,7 @@ func (m *LustreCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LustreCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -87,9 +88,12 @@ func (m *LustreCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
}
|
||||
x, err := strconv.ParseInt(lf[idx], 0, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New(name, m.tags, map[string]interface{}{"value": x}, time.Now())
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
if strings.Contains(name, "byte") {
|
||||
y.AddMeta("unit", "Byte")
|
||||
}
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"strconv"
|
||||
@@ -19,14 +19,14 @@ type MemstatCollectorConfig struct {
|
||||
}
|
||||
|
||||
type MemstatCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
stats map[string]int64
|
||||
tags map[string]string
|
||||
matches map[string]string
|
||||
config MemstatCollectorConfig
|
||||
}
|
||||
|
||||
func (m *MemstatCollector) Init(config []byte) error {
|
||||
func (m *MemstatCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "MemstatCollector"
|
||||
if len(config) > 0 {
|
||||
@@ -35,6 +35,7 @@ func (m *MemstatCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "Memory", "unit": "kByte"}
|
||||
m.stats = make(map[string]int64)
|
||||
m.matches = make(map[string]string)
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
@@ -64,7 +65,7 @@ func (m *MemstatCollector) Init(config []byte) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -96,9 +97,9 @@ func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
log.Print(err)
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(name, m.tags, map[string]interface{}{"value": int(float64(m.stats[match]) * 1.0e-3)}, time.Now())
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": int(float64(m.stats[match]) * 1.0e-3)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,18 +108,18 @@ func (m *MemstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
if _, cached := m.stats[`Cached`]; cached {
|
||||
memUsed := m.stats[`MemTotal`] - (m.stats[`MemFree`] + m.stats[`Buffers`] + m.stats[`Cached`])
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_used")
|
||||
y, err := lp.New("mem_used", m.tags, map[string]interface{}{"value": int(float64(memUsed) * 1.0e-3)}, time.Now())
|
||||
y, err := lp.New("mem_used", m.tags, m.meta, map[string]interface{}{"value": int(float64(memUsed) * 1.0e-3)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if _, found := m.stats[`MemShared`]; found {
|
||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, "mem_shared")
|
||||
y, err := lp.New("mem_shared", m.tags, map[string]interface{}{"value": int(float64(m.stats[`MemShared`]) * 1.0e-3)}, time.Now())
|
||||
y, err := lp.New("mem_shared", m.tags, m.meta, map[string]interface{}{"value": int(float64(m.stats[`MemShared`]) * 1.0e-3)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -2,36 +2,40 @@ package collectors
|
||||
|
||||
import (
|
||||
"errors"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
influx "github.com/influxdata/line-protocol"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"encoding/json"
|
||||
)
|
||||
|
||||
type MetricGetter interface {
|
||||
type MetricCollector interface {
|
||||
Name() string
|
||||
Init(config []byte) error
|
||||
Init(config json.RawMessage) error
|
||||
Initialized() bool
|
||||
Read(time.Duration, *[]lp.MutableMetric)
|
||||
Read(duration time.Duration, output chan lp.CCMetric)
|
||||
Close()
|
||||
}
|
||||
|
||||
type MetricCollector struct {
|
||||
type metricCollector struct {
|
||||
output chan lp.CCMetric
|
||||
name string
|
||||
init bool
|
||||
meta map[string]string
|
||||
}
|
||||
|
||||
func (c *MetricCollector) Name() string {
|
||||
func (c *metricCollector) Name() string {
|
||||
return c.name
|
||||
}
|
||||
|
||||
func (c *MetricCollector) setup() error {
|
||||
func (c *metricCollector) setup() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *MetricCollector) Initialized() bool {
|
||||
func (c *metricCollector) Initialized() bool {
|
||||
return c.init == true
|
||||
}
|
||||
|
||||
@@ -103,7 +107,7 @@ func CpuList() []int {
|
||||
return cpulist
|
||||
}
|
||||
|
||||
func Tags2Map(metric lp.Metric) map[string]string {
|
||||
func Tags2Map(metric influx.Metric) map[string]string {
|
||||
tags := make(map[string]string)
|
||||
for _, t := range metric.TagList() {
|
||||
tags[t.Key] = t.Value
|
||||
@@ -111,7 +115,7 @@ func Tags2Map(metric lp.Metric) map[string]string {
|
||||
return tags
|
||||
}
|
||||
|
||||
func Fields2Map(metric lp.Metric) map[string]interface{} {
|
||||
func Fields2Map(metric influx.Metric) map[string]interface{} {
|
||||
fields := make(map[string]interface{})
|
||||
for _, f := range metric.FieldList() {
|
||||
fields[f.Key] = f.Value
|
||||
|
@@ -2,7 +2,7 @@ package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"strconv"
|
||||
@@ -17,14 +17,15 @@ type NetstatCollectorConfig struct {
|
||||
}
|
||||
|
||||
type NetstatCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
config NetstatCollectorConfig
|
||||
matches map[int]string
|
||||
}
|
||||
|
||||
func (m *NetstatCollector) Init(config []byte) error {
|
||||
func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
m.name = "NetstatCollector"
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "Memory"}
|
||||
m.matches = map[int]string{
|
||||
1: "bytes_in",
|
||||
9: "bytes_out",
|
||||
@@ -45,7 +46,7 @@ func (m *NetstatCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NetstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
data, err := ioutil.ReadFile(string(NETSTATFILE))
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
@@ -72,9 +73,15 @@ func (m *NetstatCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
for i, name := range m.matches {
|
||||
v, err := strconv.ParseInt(f[i], 10, 0)
|
||||
if err == nil {
|
||||
y, err := lp.New(name, tags, map[string]interface{}{"value": int(float64(v) * 1.0e-3)}, time.Now())
|
||||
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": int(float64(v) * 1.0e-3)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
switch {
|
||||
case strings.Contains(name, "byte"):
|
||||
y.AddMeta("unit", "Byte")
|
||||
case strings.Contains(name, "pkt"):
|
||||
y.AddMeta("unit", "Packets")
|
||||
}
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -5,7 +5,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
@@ -16,7 +16,7 @@ type NvidiaCollectorConfig struct {
|
||||
}
|
||||
|
||||
type NvidiaCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
num_gpus int
|
||||
config NvidiaCollectorConfig
|
||||
}
|
||||
@@ -28,10 +28,11 @@ func (m *NvidiaCollector) CatchPanic() {
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NvidiaCollector) Init(config []byte) error {
|
||||
func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "NvidiaCollector"
|
||||
m.setup()
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "Nvidia"}
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -54,7 +55,7 @@ func (m *NvidiaCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NvidiaCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -73,14 +74,14 @@ func (m *NvidiaCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
util, ret := nvml.DeviceGetUtilizationRates(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "util")
|
||||
y, err := lp.New("util", tags, map[string]interface{}{"value": float64(util.Gpu)}, time.Now())
|
||||
y, err := lp.New("util", tags, m.meta, map[string]interface{}{"value": float64(util.Gpu)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_util")
|
||||
y, err = lp.New("mem_util", tags, map[string]interface{}{"value": float64(util.Memory)}, time.Now())
|
||||
y, err = lp.New("mem_util", tags, m.meta, map[string]interface{}{"value": float64(util.Memory)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,174 +89,177 @@ func (m *NvidiaCollector) Read(interval time.Duration, out *[]lp.MutableMetric)
|
||||
if ret == nvml.SUCCESS {
|
||||
t := float64(meminfo.Total) / (1024 * 1024)
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_total")
|
||||
y, err := lp.New("mem_total", tags, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.New("mem_total", tags, m.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
y.AddMeta("unit", "MByte")
|
||||
output <- y
|
||||
}
|
||||
f := float64(meminfo.Used) / (1024 * 1024)
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "fb_memory")
|
||||
y, err = lp.New("fb_memory", tags, map[string]interface{}{"value": f}, time.Now())
|
||||
y, err = lp.New("fb_memory", tags, m.meta, map[string]interface{}{"value": f}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
y.AddMeta("unit", "MByte")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
temp, ret := nvml.DeviceGetTemperature(device, nvml.TEMPERATURE_GPU)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "temp")
|
||||
y, err := lp.New("temp", tags, map[string]interface{}{"value": float64(temp)}, time.Now())
|
||||
y, err := lp.New("temp", tags, m.meta, map[string]interface{}{"value": float64(temp)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
y.AddMeta("unit", "degC")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
fan, ret := nvml.DeviceGetFanSpeed(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "fan")
|
||||
y, err := lp.New("fan", tags, map[string]interface{}{"value": float64(fan)}, time.Now())
|
||||
y, err := lp.New("fan", tags, m.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
_, ecc_pend, ret := nvml.DeviceGetEccMode(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
var y lp.MutableMetric
|
||||
var y lp.CCMetric
|
||||
var err error
|
||||
switch ecc_pend {
|
||||
case nvml.FEATURE_DISABLED:
|
||||
y, err = lp.New("ecc_mode", tags, map[string]interface{}{"value": string("OFF")}, time.Now())
|
||||
y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("OFF")}, time.Now())
|
||||
case nvml.FEATURE_ENABLED:
|
||||
y, err = lp.New("ecc_mode", tags, map[string]interface{}{"value": string("ON")}, time.Now())
|
||||
y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("ON")}, time.Now())
|
||||
default:
|
||||
y, err = lp.New("ecc_mode", tags, map[string]interface{}{"value": string("UNKNOWN")}, time.Now())
|
||||
y, err = lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("UNKNOWN")}, time.Now())
|
||||
}
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_mode")
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
} else if ret == nvml.ERROR_NOT_SUPPORTED {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_mode")
|
||||
y, err := lp.New("ecc_mode", tags, map[string]interface{}{"value": string("N/A")}, time.Now())
|
||||
y, err := lp.New("ecc_mode", tags, m.meta, map[string]interface{}{"value": string("N/A")}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
pstate, ret := nvml.DeviceGetPerformanceState(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "perf_state")
|
||||
y, err := lp.New("perf_state", tags, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pstate))}, time.Now())
|
||||
y, err := lp.New("perf_state", tags, m.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pstate))}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
power, ret := nvml.DeviceGetPowerUsage(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "power_usage_report")
|
||||
y, err := lp.New("power_usage_report", tags, map[string]interface{}{"value": float64(power) / 1000}, time.Now())
|
||||
y, err := lp.New("power_usage_report", tags, m.meta, map[string]interface{}{"value": float64(power) / 1000}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
gclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_GRAPHICS)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "graphics_clock_report")
|
||||
y, err := lp.New("graphics_clock_report", tags, map[string]interface{}{"value": float64(gclk)}, time.Now())
|
||||
y, err := lp.New("graphics_clock_report", tags, m.meta, map[string]interface{}{"value": float64(gclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
smclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_SM)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "sm_clock_report")
|
||||
y, err := lp.New("sm_clock_report", tags, map[string]interface{}{"value": float64(smclk)}, time.Now())
|
||||
y, err := lp.New("sm_clock_report", tags, m.meta, map[string]interface{}{"value": float64(smclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
memclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_MEM)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "mem_clock_report")
|
||||
y, err := lp.New("mem_clock_report", tags, map[string]interface{}{"value": float64(memclk)}, time.Now())
|
||||
y, err := lp.New("mem_clock_report", tags, m.meta, map[string]interface{}{"value": float64(memclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
max_gclk, ret := nvml.DeviceGetMaxClockInfo(device, nvml.CLOCK_GRAPHICS)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "max_graphics_clock")
|
||||
y, err := lp.New("max_graphics_clock", tags, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
|
||||
y, err := lp.New("max_graphics_clock", tags, m.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
max_smclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_SM)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "max_sm_clock")
|
||||
y, err := lp.New("max_sm_clock", tags, map[string]interface{}{"value": float64(max_smclk)}, time.Now())
|
||||
y, err := lp.New("max_sm_clock", tags, m.meta, map[string]interface{}{"value": float64(max_smclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
max_memclk, ret := nvml.DeviceGetClockInfo(device, nvml.CLOCK_MEM)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "max_mem_clock")
|
||||
y, err := lp.New("max_mem_clock", tags, map[string]interface{}{"value": float64(max_memclk)}, time.Now())
|
||||
y, err := lp.New("max_mem_clock", tags, m.meta, map[string]interface{}{"value": float64(max_memclk)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
ecc_db, ret := nvml.DeviceGetTotalEccErrors(device, 1, 1)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_db_error")
|
||||
y, err := lp.New("ecc_db_error", tags, map[string]interface{}{"value": float64(ecc_db)}, time.Now())
|
||||
y, err := lp.New("ecc_db_error", tags, m.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
ecc_sb, ret := nvml.DeviceGetTotalEccErrors(device, 0, 1)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "ecc_sb_error")
|
||||
y, err := lp.New("ecc_sb_error", tags, map[string]interface{}{"value": float64(ecc_sb)}, time.Now())
|
||||
y, err := lp.New("ecc_sb_error", tags, m.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
pwr_limit, ret := nvml.DeviceGetPowerManagementLimit(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "power_man_limit")
|
||||
y, err := lp.New("power_man_limit", tags, map[string]interface{}{"value": float64(pwr_limit)}, time.Now())
|
||||
y, err := lp.New("power_man_limit", tags, m.meta, map[string]interface{}{"value": float64(pwr_limit)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
enc_util, _, ret := nvml.DeviceGetEncoderUtilization(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "encoder_util")
|
||||
y, err := lp.New("encoder_util", tags, map[string]interface{}{"value": float64(enc_util)}, time.Now())
|
||||
y, err := lp.New("encoder_util", tags, m.meta, map[string]interface{}{"value": float64(enc_util)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
dec_util, _, ret := nvml.DeviceGetDecoderUtilization(device)
|
||||
if ret == nvml.SUCCESS {
|
||||
_, skip = stringArrayContains(m.config.ExcludeMetrics, "decoder_util")
|
||||
y, err := lp.New("decoder_util", tags, map[string]interface{}{"value": float64(dec_util)}, time.Now())
|
||||
y, err := lp.New("decoder_util", tags, m.meta, map[string]interface{}{"value": float64(dec_util)}, time.Now())
|
||||
if err == nil && !skip {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -3,13 +3,14 @@ package collectors
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"log"
|
||||
)
|
||||
|
||||
const HWMON_PATH = `/sys/class/hwmon`
|
||||
@@ -20,20 +21,21 @@ type TempCollectorConfig struct {
|
||||
}
|
||||
|
||||
type TempCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
config TempCollectorConfig
|
||||
}
|
||||
|
||||
func (m *TempCollector) Init(config []byte) error {
|
||||
func (m *TempCollector) Init(config json.RawMessage) error {
|
||||
m.name = "TempCollector"
|
||||
m.setup()
|
||||
m.init = true
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "IPMI", "unit": "degC"}
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.init = true
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -73,7 +75,7 @@ func get_hwmon_sensors() (map[string]map[string]string, error) {
|
||||
return sensors, nil
|
||||
}
|
||||
|
||||
func (m *TempCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
|
||||
sensors, err := get_hwmon_sensors()
|
||||
if err != nil {
|
||||
@@ -88,15 +90,20 @@ func (m *TempCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
break
|
||||
}
|
||||
}
|
||||
mname := strings.Replace(name, " ", "_", -1)
|
||||
if !strings.Contains(mname, "temp") {
|
||||
mname = fmt.Sprintf("temp_%s", mname)
|
||||
}
|
||||
buffer, err := ioutil.ReadFile(string(file))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
x, err := strconv.ParseInt(strings.Replace(string(buffer), "\n", "", -1), 0, 64)
|
||||
if err == nil {
|
||||
y, err := lp.New(strings.ToLower(name), tags, map[string]interface{}{"value": float64(x) / 1000}, time.Now())
|
||||
y, err := lp.New(strings.ToLower(mname), tags, m.meta, map[string]interface{}{"value": int(float64(x) / 1000)}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
log.Print("[", m.name, "] ",y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
lp "github.com/influxdata/line-protocol"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||
"log"
|
||||
"os/exec"
|
||||
"strings"
|
||||
@@ -19,15 +19,16 @@ type TopProcsCollectorConfig struct {
|
||||
}
|
||||
|
||||
type TopProcsCollector struct {
|
||||
MetricCollector
|
||||
metricCollector
|
||||
tags map[string]string
|
||||
config TopProcsCollectorConfig
|
||||
}
|
||||
|
||||
func (m *TopProcsCollector) Init(config []byte) error {
|
||||
func (m *TopProcsCollector) Init(config json.RawMessage) error {
|
||||
var err error
|
||||
m.name = "TopProcsCollector"
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
m.meta = map[string]string{"source" : m.name, "group" : "TopProcs"}
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -50,7 +51,7 @@ func (m *TopProcsCollector) Init(config []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *TopProcsCollector) Read(interval time.Duration, out *[]lp.MutableMetric) {
|
||||
func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -65,9 +66,9 @@ func (m *TopProcsCollector) Read(interval time.Duration, out *[]lp.MutableMetric
|
||||
lines := strings.Split(string(stdout), "\n")
|
||||
for i := 1; i < m.config.num_procs+1; i++ {
|
||||
name := fmt.Sprintf("topproc%d", i)
|
||||
y, err := lp.New(name, m.tags, map[string]interface{}{"value": string(lines[i])}, time.Now())
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": string(lines[i])}, time.Now())
|
||||
if err == nil {
|
||||
*out = append(*out, y)
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user