mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-01-24 21:09:06 +01:00
200af84c54
* Use channels, add a metric router, split up configuration and use extended version of Influx line protocol internally * Use central timer for collectors and router. Add expressions to router * Add expression to router config * Update entry points * Start with README * Update README for CCMetric * Formatting * Update README.md * Add README for MultiChanTicker * Add README for MultiChanTicker * Update README.md * Add README to metric router * Update main README * Remove SinkEntity type * Update README for sinks * Update go files * Update README for receivers * Update collectors README * Update collectors README * Use seperate page per collector * Fix for tempstat page * Add docs for customcmd collector * Add docs for ipmistat collector * Add docs for topprocs collector * Update customCmdMetric.md * Use seconds when calculating LIKWID metrics * Add IB metrics ib_recv_pkts and ib_xmit_pkts * Drop domain part of host name * Updated to latest stable version of likwid * Define source code dependencies in Makefile * Add GPFS / IBM Spectrum Scale collector * Add vet and staticcheck make targets * Add vet and staticcheck make targets * Avoid go vet warning: struct field tag `json:"..., omitempty"` not compatible with reflect.StructTag.Get: suspicious space in struct tag value struct field tag `json:"...", omitempty` not compatible with reflect.StructTag.Get: key:"value" pairs not separated by spaces * Add sample collector to README.md * Add CPU frequency collector * Avoid staticcheck warning: redundant return statement * Avoid staticcheck warning: unnecessary assignment to the blank identifier * Simplified code * Add CPUFreqCollectorCpuinfo a metric collector to measure the current frequency of the CPUs as obtained from /proc/cpuinfo Only measure on the first hyperthread * Add collector for NFS clients * Move publication of metrics into Flush() for NatsSink * Update GitHub actions * Refactoring * Avoid vet warning: Println arg list ends with redundant newline * Avoid vet warning struct field commands has json tag but is not exported * Avoid vet warning: return copies lock value. * Corrected typo * Refactoring * Add go sources in internal/... * Bad separator in Makefile * Fix Infiniband collector Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
264 lines
7.5 KiB
Go
264 lines
7.5 KiB
Go
package collectors
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"log"
|
|
"os/exec"
|
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
|
// "os"
|
|
"encoding/json"
|
|
"errors"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
IBBASEPATH = `/sys/class/infiniband/`
|
|
PERFQUERY = `/usr/sbin/perfquery`
|
|
)
|
|
|
|
type InfinibandCollectorConfig struct {
|
|
ExcludeDevices []string `json:"exclude_devices,omitempty"`
|
|
PerfQueryPath string `json:"perfquery_path"`
|
|
}
|
|
|
|
type InfinibandCollector struct {
|
|
metricCollector
|
|
tags map[string]string
|
|
lids map[string]map[string]string
|
|
config InfinibandCollectorConfig
|
|
use_perfquery bool
|
|
}
|
|
|
|
func (m *InfinibandCollector) Help() {
|
|
fmt.Println("This collector includes all devices that can be found below ", IBBASEPATH)
|
|
fmt.Println("and where any of the ports provides a 'lid' file (glob ", IBBASEPATH, "/<dev>/ports/<port>/lid).")
|
|
fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.")
|
|
fmt.Println("For each found LIDs the collector calls the 'perfquery' command")
|
|
fmt.Println("The path to the 'perfquery' command can be configured with the 'perfquery_path' option")
|
|
fmt.Println("in the configuration")
|
|
fmt.Println("")
|
|
fmt.Println("Full configuration object:")
|
|
fmt.Println("\"ibstat\" : {")
|
|
fmt.Println(" \"perfquery_path\" : \"path/to/perfquery\" # if omitted, it searches in $PATH")
|
|
fmt.Println(" \"exclude_devices\" : [\"dev1\"]")
|
|
fmt.Println("}")
|
|
fmt.Println("")
|
|
fmt.Println("Metrics:")
|
|
fmt.Println("- ib_recv")
|
|
fmt.Println("- ib_xmit")
|
|
fmt.Println("- ib_recv_pkts")
|
|
fmt.Println("- ib_xmit_pkts")
|
|
}
|
|
|
|
func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
|
var err error
|
|
m.name = "InfinibandCollector"
|
|
m.use_perfquery = false
|
|
m.setup()
|
|
m.meta = map[string]string{"source": m.name, "group": "Network"}
|
|
m.tags = map[string]string{"type": "node"}
|
|
if len(config) > 0 {
|
|
err = json.Unmarshal(config, &m.config)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if len(m.config.PerfQueryPath) == 0 {
|
|
path, err := exec.LookPath("perfquery")
|
|
if err == nil {
|
|
m.config.PerfQueryPath = path
|
|
}
|
|
}
|
|
m.lids = make(map[string]map[string]string)
|
|
p := fmt.Sprintf("%s/*/ports/*/lid", string(IBBASEPATH))
|
|
files, err := filepath.Glob(p)
|
|
for _, f := range files {
|
|
lid, err := ioutil.ReadFile(f)
|
|
if err == nil {
|
|
plist := strings.Split(strings.Replace(f, string(IBBASEPATH), "", -1), "/")
|
|
skip := false
|
|
for _, d := range m.config.ExcludeDevices {
|
|
if d == plist[0] {
|
|
skip = true
|
|
}
|
|
}
|
|
if !skip {
|
|
m.lids[plist[0]] = make(map[string]string)
|
|
m.lids[plist[0]][plist[2]] = string(lid)
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, ports := range m.lids {
|
|
for port, lid := range ports {
|
|
args := fmt.Sprintf("-r %s %s 0xf000", lid, port)
|
|
command := exec.Command(m.config.PerfQueryPath, args)
|
|
command.Wait()
|
|
_, err := command.Output()
|
|
if err == nil {
|
|
m.use_perfquery = true
|
|
}
|
|
break
|
|
}
|
|
break
|
|
}
|
|
|
|
if len(m.lids) > 0 {
|
|
m.init = true
|
|
} else {
|
|
err = errors.New("No usable devices")
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (m *InfinibandCollector) doPerfQuery(cmd string, dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error {
|
|
|
|
args := fmt.Sprintf("-r %s %s 0xf000", lid, port)
|
|
command := exec.Command(cmd, args)
|
|
command.Wait()
|
|
stdout, err := command.Output()
|
|
if err != nil {
|
|
log.Print(err)
|
|
return err
|
|
}
|
|
ll := strings.Split(string(stdout), "\n")
|
|
|
|
for _, line := range ll {
|
|
if strings.HasPrefix(line, "PortRcvData") || strings.HasPrefix(line, "RcvData") {
|
|
lv := strings.Fields(line)
|
|
v, err := strconv.ParseFloat(lv[1], 64)
|
|
if err == nil {
|
|
y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
|
if err == nil {
|
|
output <- y
|
|
}
|
|
}
|
|
}
|
|
if strings.HasPrefix(line, "PortXmitData") || strings.HasPrefix(line, "XmtData") {
|
|
lv := strings.Fields(line)
|
|
v, err := strconv.ParseFloat(lv[1], 64)
|
|
if err == nil {
|
|
y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
|
if err == nil {
|
|
output <- y
|
|
}
|
|
}
|
|
}
|
|
if strings.HasPrefix(line, "PortRcvPkts") || strings.HasPrefix(line, "RcvPkts") {
|
|
lv := strings.Fields(line)
|
|
v, err := strconv.ParseFloat(lv[1], 64)
|
|
if err == nil {
|
|
y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
|
if err == nil {
|
|
output <- y
|
|
}
|
|
}
|
|
}
|
|
if strings.HasPrefix(line, "PortXmitPkts") || strings.HasPrefix(line, "XmtPkts") {
|
|
lv := strings.Fields(line)
|
|
v, err := strconv.ParseFloat(lv[1], 64)
|
|
if err == nil {
|
|
y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
|
if err == nil {
|
|
output <- y
|
|
}
|
|
}
|
|
}
|
|
if strings.HasPrefix(line, "PortRcvPkts") || strings.HasPrefix(line, "RcvPkts") {
|
|
lv := strings.Fields(line)
|
|
v, err := strconv.ParseFloat(lv[1], 64)
|
|
if err == nil {
|
|
y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
|
if err == nil {
|
|
output <- y
|
|
}
|
|
}
|
|
}
|
|
if strings.HasPrefix(line, "PortXmitPkts") || strings.HasPrefix(line, "XmtPkts") {
|
|
lv := strings.Fields(line)
|
|
v, err := strconv.ParseFloat(lv[1], 64)
|
|
if err == nil {
|
|
y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
|
if err == nil {
|
|
output <- y
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (m *InfinibandCollector) doSysfsRead(dev string, lid string, port string, tags map[string]string, output chan lp.CCMetric) error {
|
|
path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IBBASEPATH), dev, port)
|
|
buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path))
|
|
if err == nil {
|
|
data := strings.Replace(string(buffer), "\n", "", -1)
|
|
v, err := strconv.ParseFloat(data, 64)
|
|
if err == nil {
|
|
y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
|
if err == nil {
|
|
output <- y
|
|
}
|
|
}
|
|
}
|
|
buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_data", path))
|
|
if err == nil {
|
|
data := strings.Replace(string(buffer), "\n", "", -1)
|
|
v, err := strconv.ParseFloat(data, 64)
|
|
if err == nil {
|
|
y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
|
if err == nil {
|
|
output <- y
|
|
}
|
|
}
|
|
}
|
|
buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_packets", path))
|
|
if err == nil {
|
|
data := strings.Replace(string(buffer), "\n", "", -1)
|
|
v, err := strconv.ParseFloat(data, 64)
|
|
if err == nil {
|
|
y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
|
if err == nil {
|
|
output <- y
|
|
}
|
|
}
|
|
}
|
|
buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_packets", path))
|
|
if err == nil {
|
|
data := strings.Replace(string(buffer), "\n", "", -1)
|
|
v, err := strconv.ParseFloat(data, 64)
|
|
if err == nil {
|
|
y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
|
|
if err == nil {
|
|
output <- y
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
|
|
|
if m.init {
|
|
for dev, ports := range m.lids {
|
|
for port, lid := range ports {
|
|
tags := map[string]string{"type": "node", "device": dev, "port": port}
|
|
if m.use_perfquery {
|
|
m.doPerfQuery(m.config.PerfQueryPath, dev, lid, port, tags, output)
|
|
} else {
|
|
m.doSysfsRead(dev, lid, port, tags, output)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (m *InfinibandCollector) Close() {
|
|
m.init = false
|
|
}
|