Make Infiniband collector more configurable

This commit is contained in:
Thomas Roehl 2021-11-25 16:25:20 +01:00
parent 71e86fe79d
commit 156664d78f

View File

@ -15,22 +15,40 @@ import (
"time" "time"
) )
const BASEPATH = `/sys/class/infiniband/` const IBBASEPATH = `/sys/class/infiniband/`
const LIDFILE = `/sys/class/infiniband/mlx4_0/ports/1/lid` const LIDFILE = `/sys/class/infiniband/mlx4_0/ports/1/lid`
const PERFQUERY = `/usr/sbin/perfquery` const PERFQUERY = `/usr/sbin/perfquery`
type InfinibandCollectorConfig struct { type InfinibandCollectorConfig struct {
ExcludeDevices []string `json:"exclude_devices, omitempty"` ExcludeDevices []string `json:"exclude_devices, omitempty"`
PerfQueryPath string `json:"perfquery_path"`
} }
type InfinibandCollector struct { type InfinibandCollector struct {
MetricCollector MetricCollector
tags map[string]string tags map[string]string
lids map[string]map[string]string lids map[string]map[string]string
config NetstatCollectorConfig config InfinibandCollectorConfig
use_perfquery bool use_perfquery bool
} }
func (m *InfinibandCollector) Help() {
fmt.Println("This collector includes all devices that can be found below ", IBBASEPATH)
fmt.Println("and where any of the ports provides a 'lid' file (glob ", IBBASEPATH, "/<dev>/ports/<port>/lid).")
fmt.Println("The devices can be filtered with the 'exclude_devices' option in the configuration.")
fmt.Println("For each found LIDs the collector calls the 'perfquery' command")
fmt.Println("The path to the 'perfquery' command can be configured with the 'perfquery_path' option")
fmt.Println("in the configuration\n")
fmt.Println("Full configuration object:")
fmt.Println("\"ibstat\" : {")
fmt.Println(" \"perfquery_path\" : \"path/to/perfquery\" # if omitted, it searches in $PATH")
fmt.Println(" \"exclude_devices\" : [\"dev1\"]")
fmt.Println("}\n")
fmt.Println("Metrics:")
fmt.Println("- ib_recv")
fmt.Println("- ib_xmit")
}
func (m *InfinibandCollector) Init(config []byte) error { func (m *InfinibandCollector) Init(config []byte) error {
var err error var err error
m.name = "InfinibandCollector" m.name = "InfinibandCollector"
@ -43,13 +61,19 @@ func (m *InfinibandCollector) Init(config []byte) error {
return err return err
} }
} }
if len(m.config.PerfQueryPath) == 0 {
path, err := exec.LookPath("perfquery")
if err == nil {
m.config.PerfQueryPath = path
}
}
m.lids = make(map[string]map[string]string) m.lids = make(map[string]map[string]string)
p := fmt.Sprintf("%s/*/ports/*/lid", string(BASEPATH)) p := fmt.Sprintf("%s/*/ports/*/lid", string(IBBASEPATH))
files, err := filepath.Glob(p) files, err := filepath.Glob(p)
for _, f := range files { for _, f := range files {
lid, err := ioutil.ReadFile(f) lid, err := ioutil.ReadFile(f)
if err == nil { if err == nil {
plist := strings.Split(strings.Replace(f, string(BASEPATH), "", -1), "/") plist := strings.Split(strings.Replace(f, string(IBBASEPATH), "", -1), "/")
skip := false skip := false
for _, d := range m.config.ExcludeDevices { for _, d := range m.config.ExcludeDevices {
if d == plist[0] { if d == plist[0] {
@ -66,7 +90,7 @@ func (m *InfinibandCollector) Init(config []byte) error {
for _, ports := range m.lids { for _, ports := range m.lids {
for port, lid := range ports { for port, lid := range ports {
args := fmt.Sprintf("-r %s %s 0xf000", lid, port) args := fmt.Sprintf("-r %s %s 0xf000", lid, port)
command := exec.Command(PERFQUERY, args) command := exec.Command(m.config.PerfQueryPath, args)
command.Wait() command.Wait()
_, err := command.Output() _, err := command.Output()
if err == nil { if err == nil {
@ -86,10 +110,10 @@ func (m *InfinibandCollector) Init(config []byte) error {
return err return err
} }
func DoPerfQuery(dev string, lid string, port string, tags map[string]string, out *[]lp.MutableMetric) error { func DoPerfQuery(cmd string, dev string, lid string, port string, tags map[string]string, out *[]lp.MutableMetric) error {
args := fmt.Sprintf("-r %s %s 0xf000", lid, port) args := fmt.Sprintf("-r %s %s 0xf000", lid, port)
command := exec.Command(PERFQUERY, args) command := exec.Command(cmd, args)
command.Wait() command.Wait()
stdout, err := command.Output() stdout, err := command.Output()
if err != nil { if err != nil {
@ -124,7 +148,7 @@ func DoPerfQuery(dev string, lid string, port string, tags map[string]string, ou
} }
func DoSysfsRead(dev string, lid string, port string, tags map[string]string, out *[]lp.MutableMetric) error { func DoSysfsRead(dev string, lid string, port string, tags map[string]string, out *[]lp.MutableMetric) error {
path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(BASEPATH), dev, port) path := fmt.Sprintf("%s/%s/ports/%s/counters/", string(IBBASEPATH), dev, port)
buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path)) buffer, err := ioutil.ReadFile(fmt.Sprintf("%s/port_rcv_data", path))
if err == nil { if err == nil {
data := strings.Replace(string(buffer), "\n", "", -1) data := strings.Replace(string(buffer), "\n", "", -1)
@ -157,7 +181,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, out *[]lp.MutableMetr
for port, lid := range ports { for port, lid := range ports {
tags := map[string]string{"type": "node", "device": dev, "port": port} tags := map[string]string{"type": "node", "device": dev, "port": port}
if m.use_perfquery { if m.use_perfquery {
DoPerfQuery(dev, lid, port, tags, out) DoPerfQuery(m.config.PerfQueryPath, dev, lid, port, tags, out)
} else { } else {
DoSysfsRead(dev, lid, port, tags, out) DoSysfsRead(dev, lid, port, tags, out)
} }