Moved as much work as possible to Init()

This commit is contained in:
Holger Obermaier 2022-01-27 11:08:27 +01:00
parent 7077452a5d
commit e1d0aacd1e

View File

@ -2,12 +2,12 @@ package collectors
import ( import (
"fmt" "fmt"
"io/ioutil" "os"
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric" lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
"golang.org/x/sys/unix"
"encoding/json" "encoding/json"
"errors"
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings" "strings"
@ -16,13 +16,20 @@ import (
const IB_BASEPATH = `/sys/class/infiniband/` const IB_BASEPATH = `/sys/class/infiniband/`
type InfinibandCollectorInfo struct {
LID string // IB local Identifier (LID)
device string // IB device
port string // IB device port
portCounterFiles map[string]string // mapping counter name -> file
tagSet map[string]string // corresponding tag list
}
type InfinibandCollector struct { type InfinibandCollector struct {
metricCollector metricCollector
tags map[string]string
lids map[string]map[string]string
config struct { config struct {
ExcludeDevices []string `json:"exclude_devices,omitempty"` ExcludeDevices []string `json:"exclude_devices,omitempty"` // IB device to exclude e.g. mlx5_0
} }
info []InfinibandCollectorInfo
} }
func (m *InfinibandCollector) Help() { func (m *InfinibandCollector) Help() {
@ -43,99 +50,113 @@ func (m *InfinibandCollector) Help() {
fmt.Println("- ib_xmit_pkts") fmt.Println("- ib_xmit_pkts")
} }
// Init initializes the Infiniband collector by walking through files below IB_BASEPATH
func (m *InfinibandCollector) Init(config json.RawMessage) error { func (m *InfinibandCollector) Init(config json.RawMessage) error {
var err error var err error
m.name = "InfinibandCollector" m.name = "InfinibandCollector"
m.setup() m.setup()
m.meta = map[string]string{"source": m.name, "group": "Network"} m.meta = map[string]string{
m.tags = map[string]string{"type": "node"} "source": m.name,
"group": "Network",
}
if len(config) > 0 { if len(config) > 0 {
err = json.Unmarshal(config, &m.config) err = json.Unmarshal(config, &m.config)
if err != nil { if err != nil {
return err return err
} }
} }
m.lids = make(map[string]map[string]string)
p := fmt.Sprintf("%s/*/ports/*/lid", string(IB_BASEPATH)) // Loop for all InfiniBand directories
files, err := filepath.Glob(p) globPattern := filepath.Join(IB_BASEPATH, "*", "ports", "*")
for _, f := range files { ibDirs, err := filepath.Glob(globPattern)
lid, err := ioutil.ReadFile(f) if err != nil {
if err == nil { return fmt.Errorf("Unable to glob files with pattern %s: %v", globPattern, err)
plist := strings.Split(strings.Replace(f, string(IB_BASEPATH), "", -1), "/") }
skip := false if ibDirs == nil {
for _, d := range m.config.ExcludeDevices { return fmt.Errorf("Unable to find any directories with pattern %s", globPattern)
if d == plist[0] {
skip = true
}
}
if !skip {
m.lids[plist[0]] = make(map[string]string)
m.lids[plist[0]][plist[2]] = string(lid)
}
}
} }
if len(m.lids) == 0 { for _, path := range ibDirs {
return errors.New("No usable IB devices")
// Skip, when no LID is assigned
LID, ok := readOneLine(path + "/lid")
if !ok || LID == "0x0" {
continue
}
// Get device and port component
pathSplit := strings.Split(path, string(os.PathSeparator))
device := pathSplit[4]
port := pathSplit[6]
// Skip excluded devices
skip := false
for _, excludedDevice := range m.config.ExcludeDevices {
if excludedDevice == device {
skip = true
break
}
}
if skip {
continue
}
// Check access to counter files
countersDir := filepath.Join(path, "counters")
portCounterFiles := map[string]string{
"ib_recv": filepath.Join(countersDir, "port_rcv_data"),
"ib_xmit": filepath.Join(countersDir, "port_xmit_data"),
"ib_recv_pkts": filepath.Join(countersDir, "port_rcv_packets"),
"ib_xmit_pkts": filepath.Join(countersDir, "port_xmit_packets"),
}
for _, counterFile := range portCounterFiles {
err := unix.Access(counterFile, unix.R_OK)
if err != nil {
return fmt.Errorf("Unable to access %s: %v", counterFile, err)
}
}
m.info = append(m.info,
InfinibandCollectorInfo{
LID: LID,
device: device,
port: port,
portCounterFiles: portCounterFiles,
tagSet: map[string]string{
"type": "node",
"device": device,
"port": port,
"lid": LID,
},
})
}
if len(m.info) == 0 {
return fmt.Errorf("Found no IB devices")
} }
m.init = true m.init = true
return nil return nil
} }
// Read reads Infiniband counter files below IB_BASEPATH
func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) { func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) {
if m.init { // Check if already initialized
for dev, ports := range m.lids { if !m.init {
for port, lid := range ports { return
tags := map[string]string{ }
"type": "node",
"device": dev, now := time.Now()
"port": port, for i := range m.info {
"lid": lid}
path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IB_BASEPATH), dev, port) // device info
buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path)) info := &m.info[i]
if err == nil { for counterName, counterFile := range info.portCounterFiles {
data := strings.Replace(string(buffer), "\n", "", -1) if data, ok := readOneLine(counterFile); ok {
v, err := strconv.ParseFloat(data, 64) if v, err := strconv.ParseInt(data, 10, 64); err == nil {
if err == nil { if y, err := lp.New(counterName, info.tagSet, m.meta, map[string]interface{}{"value": v}, now); err == nil {
y, err := lp.New("ib_recv", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now()) output <- y
if err == nil {
output <- y
}
}
}
buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_data", path))
if err == nil {
data := strings.Replace(string(buffer), "\n", "", -1)
v, err := strconv.ParseFloat(data, 64)
if err == nil {
y, err := lp.New("ib_xmit", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
if err == nil {
output <- y
}
}
}
buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_packets", path))
if err == nil {
data := strings.Replace(string(buffer), "\n", "", -1)
v, err := strconv.ParseFloat(data, 64)
if err == nil {
y, err := lp.New("ib_recv_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
if err == nil {
output <- y
}
}
}
buffer, err = ioutil.ReadFile(fmt.Sprintf("%s/port_xmit_packets", path))
if err == nil {
data := strings.Replace(string(buffer), "\n", "", -1)
v, err := strconv.ParseFloat(data, 64)
if err == nil {
y, err := lp.New("ib_xmit_pkts", tags, m.meta, map[string]interface{}{"value": float64(v)}, time.Now())
if err == nil {
output <- y
}
} }
} }
} }