Compare commits

..

2 Commits

Author SHA1 Message Date
Michael Panzlaff
41237af7bd ipmi: refactor and add sudo support 2026-03-23 18:19:17 +01:00
Michael Panzlaff
933272a8e2 update cc-lib to v2.11.0 2026-03-23 16:09:38 +01:00
3 changed files with 107 additions and 96 deletions

View File

@@ -31,7 +31,9 @@ type IpmiCollector struct {
ExcludeDevices []string `json:"exclude_devices"`
IpmitoolPath string `json:"ipmitool_path"`
IpmisensorsPath string `json:"ipmisensors_path"`
Sudo bool `json:"use_sudo"`
}
ipmitool string
ipmisensors string
}
@@ -54,6 +56,7 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
// default path to IPMI tools
m.config.IpmitoolPath = "ipmitool"
m.config.IpmisensorsPath = "ipmi-sensors"
if len(config) > 0 {
d := json.NewDecoder(bytes.NewReader(config))
d.DisallowUnknownFields()
@@ -61,51 +64,57 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
return fmt.Errorf("%s Init(): Error decoding JSON config: %w", m.name, err)
}
}
// Check if executables ipmitool or ipmisensors are found
p, err := exec.LookPath(m.config.IpmitoolPath)
if err == nil {
command := exec.Command(p)
err := command.Run()
if len(m.config.IpmitoolPath) != 0 && len(m.config.IpmisensorsPath) != 0 {
return fmt.Errorf("ipmitool_path and ipmisensors_path cannot be used at the same time. Please disable one of them")
}
// Test if the configured commands actually work
if len(m.config.IpmitoolPath) != 0 {
dummyChan := make(chan lp.CCMessage)
go func() {
for range dummyChan {
}
}()
err := m.readIpmiTool(dummyChan)
close(dummyChan)
if err != nil {
cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %s", p, err.Error()))
m.ipmitool = ""
} else {
m.ipmitool = p
return fmt.Errorf("Cannot execute '%s' (sudo=%t): %v", m.config.IpmitoolPath, m.config.Sudo, err)
}
} else if len(m.config.IpmisensorsPath) != 0 {
dummyChan := make(chan lp.CCMessage)
go func() {
for range dummyChan {
}
p, err = exec.LookPath(m.config.IpmisensorsPath)
if err == nil {
command := exec.Command(p)
err := command.Run()
}()
err := m.readIpmiSensors(dummyChan)
close(dummyChan)
if err != nil {
cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %s", p, err.Error()))
m.ipmisensors = ""
return fmt.Errorf("Cannot execute '%s' (sudo=%t): %v", m.config.IpmisensorsPath, m.config.Sudo, err)
}
} else {
m.ipmisensors = p
}
}
if len(m.ipmitool) == 0 && len(m.ipmisensors) == 0 {
return fmt.Errorf("%s Init(): no usable IPMI reader found", m.name)
return fmt.Errorf("IpmiCollector enabled, but neither ipmitool nor ipmi-sensors are configured.")
}
m.init = true
return nil
}
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
func (m *IpmiCollector) readIpmiTool(output chan lp.CCMessage) error {
// Setup ipmitool command
command := exec.Command(cmd, "sensor")
argv := make([]string, 0)
if m.config.Sudo {
argv = append(argv, "sudo")
}
argv = append(argv, m.config.IpmitoolPath, "sensor")
command := exec.Command(argv[0], argv[1:]...)
stdout, _ := command.StdoutPipe()
errBuf := new(bytes.Buffer)
command.Stderr = errBuf
// start command
if err := command.Start(); err != nil {
cclog.ComponentError(
m.name,
fmt.Sprintf("readIpmiTool(): Failed to start command \"%s\": %v", command.String(), err),
)
return
return fmt.Errorf("Failed to start command '%s': %v", command.String(), err)
}
// Read command output
@@ -116,7 +125,10 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
continue
}
v, err := strconv.ParseFloat(strings.TrimSpace(lv[1]), 64)
if err == nil {
if err != nil {
cclog.ComponentErrorf(m.name, "Failed to parse float '%s': %v", lv[1], err)
continue
}
name := strings.ToLower(strings.ReplaceAll(strings.TrimSpace(lv[0]), " ", "_"))
unit := strings.TrimSpace(lv[2])
switch unit {
@@ -131,70 +143,71 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
}
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now())
if err == nil {
if err != nil {
cclog.ComponentErrorf(m.name, "Failed to create message: %v", err)
continue
}
y.AddMeta("unit", unit)
output <- y
}
}
}
// Wait for command end
if err := command.Wait(); err != nil {
errMsg, _ := io.ReadAll(errBuf)
cclog.ComponentError(
m.name,
fmt.Sprintf("readIpmiTool(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err),
)
cclog.ComponentError(m.name, fmt.Sprintf("readIpmiTool(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg))))
return
return fmt.Errorf("Failed to complete command '%s': %v (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg)))
}
return nil
}
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMessage) {
func (m *IpmiCollector) readIpmiSensors(output chan lp.CCMessage) error {
// Setup ipmisensors command
command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate")
argv := make([]string, 0)
if m.config.Sudo {
argv = append(argv, "sudo")
}
argv = append(argv, m.config.IpmisensorsPath, "--comma-separated-output", "--sdr-cache-recreate")
command := exec.Command(argv[0], argv[1:]...)
stdout, _ := command.StdoutPipe()
errBuf := new(bytes.Buffer)
command.Stderr = errBuf
// start command
if err := command.Start(); err != nil {
cclog.ComponentError(
m.name,
fmt.Sprintf("readIpmiSensors(): Failed to start command \"%s\": %v", command.String(), err),
)
return
return fmt.Errorf("Failed to start command '%s': %v", command.String(), err)
}
// Read command output
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
lv := strings.Split(scanner.Text(), ",")
if len(lv) > 3 {
if len(lv) <= 3 {
continue
}
v, err := strconv.ParseFloat(lv[3], 64)
if err == nil {
if err != nil {
cclog.ComponentErrorf(m.name, "Failed to parse float '%s': %v", lv[3], err)
continue
}
name := strings.ToLower(strings.ReplaceAll(lv[1], " ", "_"))
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now())
if err == nil {
if err != nil {
cclog.ComponentErrorf(m.name, "Failed to create message: %v", err)
continue
}
if len(lv) > 4 {
y.AddMeta("unit", lv[4])
}
output <- y
}
}
}
}
// Wait for command end
if err := command.Wait(); err != nil {
errMsg, _ := io.ReadAll(errBuf)
cclog.ComponentError(
m.name,
fmt.Sprintf("readIpmiSensors(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err),
)
cclog.ComponentError(m.name, fmt.Sprintf("readIpmiSensors(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg))))
return
return fmt.Errorf("Failed to complete command '%s': %v (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg)))
}
return nil
}
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
@@ -204,9 +217,15 @@ func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
}
if len(m.config.IpmitoolPath) > 0 {
m.readIpmiTool(m.config.IpmitoolPath, output)
err := m.readIpmiTool(output)
if err != nil {
cclog.ComponentErrorf(m.name, "readIpmiTool() failed: %v", err)
}
} else if len(m.config.IpmisensorsPath) > 0 {
m.readIpmiSensors(m.config.IpmisensorsPath, output)
err := m.readIpmiSensors(output)
if err != nil {
cclog.ComponentErrorf(m.name, "readIpmiSensors() failed: %v", err)
}
}
}

4
go.mod
View File

@@ -3,8 +3,8 @@ module github.com/ClusterCockpit/cc-metric-collector
go 1.25.0
require (
github.com/ClusterCockpit/cc-lib/v2 v2.10.0
github.com/ClusterCockpit/go-rocm-smi v0.4.0
github.com/ClusterCockpit/cc-lib/v2 v2.11.0
github.com/ClusterCockpit/go-rocm-smi v0.3.0
github.com/NVIDIA/go-nvml v0.13.0-1
github.com/PaesslerAG/gval v1.2.4
github.com/fsnotify/fsnotify v1.9.0

18
go.sum
View File

@@ -1,9 +1,10 @@
github.com/ClusterCockpit/cc-lib/v2 v2.10.0 h1:McCcEwc1j942hV54JAzsB/pcArP6A5FoQtjzW2H7K9U=
github.com/ClusterCockpit/cc-lib/v2 v2.10.0/go.mod h1:Oj+N2lpFqiBOBzjfrLIGJ2YSWT400TX4M0ii4lNl81A=
github.com/ClusterCockpit/cc-lib/v2 v2.11.0 h1:LaLs4J0b7FArIXT8byMUcIcUr55R5obATjVi7qI02r4=
github.com/ClusterCockpit/cc-lib/v2 v2.11.0/go.mod h1:Oj+N2lpFqiBOBzjfrLIGJ2YSWT400TX4M0ii4lNl81A=
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0 h1:hIzxgTBWcmCIHtoDKDkSCsKCOCOwUC34sFsbD2wcW0Q=
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0/go.mod h1:y42qUu+YFmu5fdNuUAS4VbbIKxVjxCvbVqFdpdh8ahY=
github.com/ClusterCockpit/go-rocm-smi v0.4.0 h1:3+bEPrSkjEJcOtt+qBUX48ugDVlOFaKUnXHTef2Ve2Q=
github.com/ClusterCockpit/go-rocm-smi v0.4.0/go.mod h1:c19u5vBCcgb7DjL4EWTGSGpo6c79d07r4rxD50z25ng=
github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8=
github.com/ClusterCockpit/go-rocm-smi v0.3.0/go.mod h1:+I3UMeX3OlizXDf1WpGD43W4KGZZGVSGmny6rTeOnWA=
github.com/NVIDIA/go-nvml v0.11.6-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
github.com/PaesslerAG/gval v1.2.4 h1:rhX7MpjJlcxYwL2eTTYIOBUyEKZ+A96T9vQySWkVUiU=
@@ -89,14 +90,7 @@ github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKk
github.com/stmcginnis/gofish v0.21.4 h1:daexK8sh31CgeSMkPUNs21HWHHA9ecCPJPyLCTxukCg=
github.com/stmcginnis/gofish v0.21.4/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA=
@@ -120,7 +114,5 @@ golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=