Merge pull request #207 from ClusterCockpit/ipmi-sudo

Add IPMI sudo support
This commit is contained in:
Michael Panzlaff
2026-03-24 15:32:37 +01:00
committed by GitHub
4 changed files with 149 additions and 103 deletions

View File

@@ -31,7 +31,9 @@ type IpmiCollector struct {
ExcludeDevices []string `json:"exclude_devices"` ExcludeDevices []string `json:"exclude_devices"`
IpmitoolPath string `json:"ipmitool_path"` IpmitoolPath string `json:"ipmitool_path"`
IpmisensorsPath string `json:"ipmisensors_path"` IpmisensorsPath string `json:"ipmisensors_path"`
Sudo bool `json:"use_sudo"`
} }
ipmitool string ipmitool string
ipmisensors string ipmisensors string
} }
@@ -54,6 +56,7 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
// default path to IPMI tools // default path to IPMI tools
m.config.IpmitoolPath = "ipmitool" m.config.IpmitoolPath = "ipmitool"
m.config.IpmisensorsPath = "ipmi-sensors" m.config.IpmisensorsPath = "ipmi-sensors"
if len(config) > 0 { if len(config) > 0 {
d := json.NewDecoder(bytes.NewReader(config)) d := json.NewDecoder(bytes.NewReader(config))
d.DisallowUnknownFields() d.DisallowUnknownFields()
@@ -61,51 +64,67 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
return fmt.Errorf("%s Init(): Error decoding JSON config: %w", m.name, err) return fmt.Errorf("%s Init(): Error decoding JSON config: %w", m.name, err)
} }
} }
// Check if executables ipmitool or ipmisensors are found
p, err := exec.LookPath(m.config.IpmitoolPath) m.ipmitool = m.config.IpmitoolPath
if err == nil { m.ipmisensors = m.config.IpmisensorsPath
command := exec.Command(p)
err := command.Run() // Test if any of the supported backends work
if err != nil { var dummyChan chan lp.CCMessage
cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %s", p, err.Error())) dummyConsumer := func() {
m.ipmitool = "" for range dummyChan {
} else {
m.ipmitool = p
} }
} }
p, err = exec.LookPath(m.config.IpmisensorsPath)
if err == nil {
command := exec.Command(p)
err := command.Run()
if err != nil {
cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %s", p, err.Error()))
m.ipmisensors = ""
} else {
m.ipmisensors = p
}
}
if len(m.ipmitool) == 0 && len(m.ipmisensors) == 0 {
return fmt.Errorf("%s Init(): no usable IPMI reader found", m.name)
}
// Test if ipmi-sensors works (preferred over ipmitool, because it's faster)
var ipmiSensorsErr error
if _, ipmiSensorsErr = exec.LookPath(m.ipmisensors); ipmiSensorsErr == nil {
dummyChan = make(chan lp.CCMessage)
go dummyConsumer()
ipmiSensorsErr = m.readIpmiSensors(dummyChan)
close(dummyChan)
if ipmiSensorsErr == nil {
cclog.ComponentDebugf(m.name, "Using ipmi-sensors for ipmistat collector")
m.init = true m.init = true
return nil return nil
} }
}
cclog.ComponentDebugf(m.name, "Unable to use ipmi-sensors for ipmistat collector: %v", ipmiSensorsErr)
m.ipmisensors = ""
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) { // Test if ipmitool works (may be very slow)
var ipmiToolErr error
if _, ipmiToolErr = exec.LookPath(m.ipmitool); ipmiToolErr == nil {
dummyChan = make(chan lp.CCMessage)
go dummyConsumer()
ipmiToolErr = m.readIpmiTool(dummyChan)
close(dummyChan)
if ipmiToolErr == nil {
cclog.ComponentDebugf(m.name, "Using ipmitool for ipmistat collector")
m.init = true
return nil
}
}
m.ipmitool = ""
cclog.ComponentDebugf(m.name, "Unable to use ipmitool for ipmistat collector: %v", ipmiToolErr)
return fmt.Errorf("unable to init neither ipmitool (%w) nor ipmi-sensors (%w)", ipmiToolErr, ipmiSensorsErr)
}
func (m *IpmiCollector) readIpmiTool(output chan lp.CCMessage) error {
// Setup ipmitool command // Setup ipmitool command
command := exec.Command(cmd, "sensor") argv := make([]string, 0)
if m.config.Sudo {
argv = append(argv, "sudo", "-n")
}
argv = append(argv, m.ipmitool, "sensor")
command := exec.Command(argv[0], argv[1:]...)
stdout, _ := command.StdoutPipe() stdout, _ := command.StdoutPipe()
errBuf := new(bytes.Buffer) errBuf := new(bytes.Buffer)
command.Stderr = errBuf command.Stderr = errBuf
// start command // start command
if err := command.Start(); err != nil { if err := command.Start(); err != nil {
cclog.ComponentError( return fmt.Errorf("failed to start command '%s': %w", command.String(), err)
m.name,
fmt.Sprintf("readIpmiTool(): Failed to start command \"%s\": %v", command.String(), err),
)
return
} }
// Read command output // Read command output
@@ -115,8 +134,17 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
if len(lv) < 3 { if len(lv) < 3 {
continue continue
} }
if strings.TrimSpace(lv[1]) == "0x0" || strings.TrimSpace(lv[1]) == "na" {
// Ignore known non-float values
continue
}
v, err := strconv.ParseFloat(strings.TrimSpace(lv[1]), 64) v, err := strconv.ParseFloat(strings.TrimSpace(lv[1]), 64)
if err == nil { if err != nil {
cclog.ComponentErrorf(m.name, "Failed to parse float '%s': %v", lv[1], err)
continue
}
name := strings.ToLower(strings.ReplaceAll(strings.TrimSpace(lv[0]), " ", "_")) name := strings.ToLower(strings.ReplaceAll(strings.TrimSpace(lv[0]), " ", "_"))
unit := strings.TrimSpace(lv[2]) unit := strings.TrimSpace(lv[2])
switch unit { switch unit {
@@ -131,70 +159,75 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
} }
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now()) y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now())
if err == nil { if err != nil {
cclog.ComponentErrorf(m.name, "Failed to create message: %v", err)
continue
}
y.AddMeta("unit", unit) y.AddMeta("unit", unit)
output <- y output <- y
} }
}
}
// Wait for command end // Wait for command end
if err := command.Wait(); err != nil { if err := command.Wait(); err != nil {
errMsg, _ := io.ReadAll(errBuf) errMsg, _ := io.ReadAll(errBuf)
cclog.ComponentError( return fmt.Errorf("failed to complete command '%s': %w (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg)))
m.name,
fmt.Sprintf("readIpmiTool(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err),
)
cclog.ComponentError(m.name, fmt.Sprintf("readIpmiTool(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg))))
return
}
} }
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMessage) { return nil
}
func (m *IpmiCollector) readIpmiSensors(output chan lp.CCMessage) error {
// Setup ipmisensors command // Setup ipmisensors command
command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate") argv := make([]string, 0)
if m.config.Sudo {
argv = append(argv, "sudo", "-n")
}
argv = append(argv, m.ipmisensors, "--comma-separated-output", "--sdr-cache-recreate")
command := exec.Command(argv[0], argv[1:]...)
stdout, _ := command.StdoutPipe() stdout, _ := command.StdoutPipe()
errBuf := new(bytes.Buffer) errBuf := new(bytes.Buffer)
command.Stderr = errBuf command.Stderr = errBuf
// start command // start command
if err := command.Start(); err != nil { if err := command.Start(); err != nil {
cclog.ComponentError( return fmt.Errorf("failed to start command '%s': %w", command.String(), err)
m.name,
fmt.Sprintf("readIpmiSensors(): Failed to start command \"%s\": %v", command.String(), err),
)
return
} }
// Read command output // Read command output
scanner := bufio.NewScanner(stdout) scanner := bufio.NewScanner(stdout)
for scanner.Scan() { for scanner.Scan() {
lv := strings.Split(scanner.Text(), ",") lv := strings.Split(scanner.Text(), ",")
if len(lv) > 3 { if len(lv) <= 3 {
v, err := strconv.ParseFloat(lv[3], 64) continue
if err == nil { }
if lv[3] == "N/A" || lv[3] == "Reading" {
// Ignore known non-float values
continue
}
v, err := strconv.ParseFloat(strings.TrimSpace(lv[3]), 64)
if err != nil {
cclog.ComponentErrorf(m.name, "Failed to parse float '%s': %v", lv[3], err)
continue
}
name := strings.ToLower(strings.ReplaceAll(lv[1], " ", "_")) name := strings.ToLower(strings.ReplaceAll(lv[1], " ", "_"))
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now()) y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now())
if err == nil { if err != nil {
cclog.ComponentErrorf(m.name, "Failed to create message: %v", err)
continue
}
if len(lv) > 4 { if len(lv) > 4 {
y.AddMeta("unit", lv[4]) y.AddMeta("unit", lv[4])
} }
output <- y output <- y
} }
}
}
}
// Wait for command end // Wait for command end
if err := command.Wait(); err != nil { if err := command.Wait(); err != nil {
errMsg, _ := io.ReadAll(errBuf) errMsg, _ := io.ReadAll(errBuf)
cclog.ComponentError( return fmt.Errorf("failed to complete command '%s': %w (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg)))
m.name,
fmt.Sprintf("readIpmiSensors(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err),
)
cclog.ComponentError(m.name, fmt.Sprintf("readIpmiSensors(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg))))
return
} }
return nil
} }
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) { func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
@@ -203,10 +236,16 @@ func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
return return
} }
if len(m.config.IpmitoolPath) > 0 { if len(m.ipmisensors) > 0 {
m.readIpmiTool(m.config.IpmitoolPath, output) err := m.readIpmiSensors(output)
} else if len(m.config.IpmisensorsPath) > 0 { if err != nil {
m.readIpmiSensors(m.config.IpmisensorsPath, output) cclog.ComponentErrorf(m.name, "readIpmiSensors() failed: %v", err)
}
} else if len(m.ipmitool) > 0 {
err := m.readIpmiTool(output)
if err != nil {
cclog.ComponentErrorf(m.name, "readIpmiTool() failed: %v", err)
}
} }
} }

View File

@@ -14,10 +14,25 @@ hugo_path: docs/reference/cc-metric-collector/collectors/ipmi.md
```json ```json
"ipmistat": { "ipmistat": {
"ipmitool_path": "/path/to/ipmitool", "ipmitool_path": "/path/to/ipmitool",
"ipmisensors_path": "/path/to/ipmi-sensors" "ipmisensors_path": "/path/to/ipmi-sensors",
"use_sudo": true
} }
``` ```
The `ipmistat` collector reads data from `ipmitool` (`ipmitool sensor`) or `ipmi-sensors` (`ipmi-sensors --sdr-cache-recreate --comma-separated-output`). The `ipmistat` collector reads data from `ipmitool` (`ipmitool sensor`) or `ipmi-sensors` (`ipmi-sensors --sdr-cache-recreate --comma-separated-output`).
The metrics depend on the output of the underlying tools but contain temperature, power and energy metrics. The metrics depend on the output of the underlying tools but contain temperature, power and energy metrics.
ipmitool and ipmi-sensors typically require root to run.
In order to cc-metric-collector without root priviliges, you can enable `use_sudo`.
Add a file like this in /etc/sudoers.d/ to allow cc-metric-collector to run this command:
```
# Do not log the following sudo commands from monitoring, since this causes a lot of log spam.
# However keep log_denied enabled, to detect failures
Defaults: monitoring !log_allowed, !pam_session
# Allow to use ipmitool and ipmi-sensors
monitoring ALL = (root) NOPASSWD:/usr/bin/ipmitool sensor
monitoring ALL = (root) NOPASSWD:/usr/sbin/ipmi-sensors --comma-separated-output --sdr-cache-recreate
```

4
go.mod
View File

@@ -3,8 +3,8 @@ module github.com/ClusterCockpit/cc-metric-collector
go 1.25.0 go 1.25.0
require ( require (
github.com/ClusterCockpit/cc-lib/v2 v2.10.0 github.com/ClusterCockpit/cc-lib/v2 v2.11.0
github.com/ClusterCockpit/go-rocm-smi v0.4.0 github.com/ClusterCockpit/go-rocm-smi v0.3.0
github.com/NVIDIA/go-nvml v0.13.0-1 github.com/NVIDIA/go-nvml v0.13.0-1
github.com/PaesslerAG/gval v1.2.4 github.com/PaesslerAG/gval v1.2.4
github.com/fsnotify/fsnotify v1.9.0 github.com/fsnotify/fsnotify v1.9.0

18
go.sum
View File

@@ -1,9 +1,10 @@
github.com/ClusterCockpit/cc-lib/v2 v2.10.0 h1:McCcEwc1j942hV54JAzsB/pcArP6A5FoQtjzW2H7K9U= github.com/ClusterCockpit/cc-lib/v2 v2.11.0 h1:LaLs4J0b7FArIXT8byMUcIcUr55R5obATjVi7qI02r4=
github.com/ClusterCockpit/cc-lib/v2 v2.10.0/go.mod h1:Oj+N2lpFqiBOBzjfrLIGJ2YSWT400TX4M0ii4lNl81A= github.com/ClusterCockpit/cc-lib/v2 v2.11.0/go.mod h1:Oj+N2lpFqiBOBzjfrLIGJ2YSWT400TX4M0ii4lNl81A=
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0 h1:hIzxgTBWcmCIHtoDKDkSCsKCOCOwUC34sFsbD2wcW0Q= github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0 h1:hIzxgTBWcmCIHtoDKDkSCsKCOCOwUC34sFsbD2wcW0Q=
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0/go.mod h1:y42qUu+YFmu5fdNuUAS4VbbIKxVjxCvbVqFdpdh8ahY= github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0/go.mod h1:y42qUu+YFmu5fdNuUAS4VbbIKxVjxCvbVqFdpdh8ahY=
github.com/ClusterCockpit/go-rocm-smi v0.4.0 h1:3+bEPrSkjEJcOtt+qBUX48ugDVlOFaKUnXHTef2Ve2Q= github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8=
github.com/ClusterCockpit/go-rocm-smi v0.4.0/go.mod h1:c19u5vBCcgb7DjL4EWTGSGpo6c79d07r4rxD50z25ng= github.com/ClusterCockpit/go-rocm-smi v0.3.0/go.mod h1:+I3UMeX3OlizXDf1WpGD43W4KGZZGVSGmny6rTeOnWA=
github.com/NVIDIA/go-nvml v0.11.6-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw= github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
github.com/PaesslerAG/gval v1.2.4 h1:rhX7MpjJlcxYwL2eTTYIOBUyEKZ+A96T9vQySWkVUiU= github.com/PaesslerAG/gval v1.2.4 h1:rhX7MpjJlcxYwL2eTTYIOBUyEKZ+A96T9vQySWkVUiU=
@@ -89,14 +90,7 @@ github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKk
github.com/stmcginnis/gofish v0.21.4 h1:daexK8sh31CgeSMkPUNs21HWHHA9ecCPJPyLCTxukCg= github.com/stmcginnis/gofish v0.21.4 h1:daexK8sh31CgeSMkPUNs21HWHHA9ecCPJPyLCTxukCg=
github.com/stmcginnis/gofish v0.21.4/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU= github.com/stmcginnis/gofish v0.21.4/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA= github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA=
@@ -120,7 +114,5 @@ golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=