Compare commits

..

2 Commits

Author SHA1 Message Date
Michael Panzlaff
41237af7bd ipmi: refactor and add sudo support 2026-03-23 18:19:17 +01:00
Michael Panzlaff
933272a8e2 update cc-lib to v2.11.0 2026-03-23 16:09:38 +01:00
3 changed files with 107 additions and 96 deletions

View File

@@ -31,7 +31,9 @@ type IpmiCollector struct {
ExcludeDevices []string `json:"exclude_devices"` ExcludeDevices []string `json:"exclude_devices"`
IpmitoolPath string `json:"ipmitool_path"` IpmitoolPath string `json:"ipmitool_path"`
IpmisensorsPath string `json:"ipmisensors_path"` IpmisensorsPath string `json:"ipmisensors_path"`
Sudo bool `json:"use_sudo"`
} }
ipmitool string ipmitool string
ipmisensors string ipmisensors string
} }
@@ -54,6 +56,7 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
// default path to IPMI tools // default path to IPMI tools
m.config.IpmitoolPath = "ipmitool" m.config.IpmitoolPath = "ipmitool"
m.config.IpmisensorsPath = "ipmi-sensors" m.config.IpmisensorsPath = "ipmi-sensors"
if len(config) > 0 { if len(config) > 0 {
d := json.NewDecoder(bytes.NewReader(config)) d := json.NewDecoder(bytes.NewReader(config))
d.DisallowUnknownFields() d.DisallowUnknownFields()
@@ -61,51 +64,57 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
return fmt.Errorf("%s Init(): Error decoding JSON config: %w", m.name, err) return fmt.Errorf("%s Init(): Error decoding JSON config: %w", m.name, err)
} }
} }
// Check if executables ipmitool or ipmisensors are found
p, err := exec.LookPath(m.config.IpmitoolPath) if len(m.config.IpmitoolPath) != 0 && len(m.config.IpmisensorsPath) != 0 {
if err == nil { return fmt.Errorf("ipmitool_path and ipmisensors_path cannot be used at the same time. Please disable one of them")
command := exec.Command(p) }
err := command.Run()
// Test if the configured commands actually work
if len(m.config.IpmitoolPath) != 0 {
dummyChan := make(chan lp.CCMessage)
go func() {
for range dummyChan {
}
}()
err := m.readIpmiTool(dummyChan)
close(dummyChan)
if err != nil { if err != nil {
cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %s", p, err.Error())) return fmt.Errorf("Cannot execute '%s' (sudo=%t): %v", m.config.IpmitoolPath, m.config.Sudo, err)
m.ipmitool = ""
} else {
m.ipmitool = p
} }
} else if len(m.config.IpmisensorsPath) != 0 {
dummyChan := make(chan lp.CCMessage)
go func() {
for range dummyChan {
} }
p, err = exec.LookPath(m.config.IpmisensorsPath) }()
if err == nil { err := m.readIpmiSensors(dummyChan)
command := exec.Command(p) close(dummyChan)
err := command.Run()
if err != nil { if err != nil {
cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %s", p, err.Error())) return fmt.Errorf("Cannot execute '%s' (sudo=%t): %v", m.config.IpmisensorsPath, m.config.Sudo, err)
m.ipmisensors = "" }
} else { } else {
m.ipmisensors = p return fmt.Errorf("IpmiCollector enabled, but neither ipmitool nor ipmi-sensors are configured.")
}
}
if len(m.ipmitool) == 0 && len(m.ipmisensors) == 0 {
return fmt.Errorf("%s Init(): no usable IPMI reader found", m.name)
} }
m.init = true m.init = true
return nil return nil
} }
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) { func (m *IpmiCollector) readIpmiTool(output chan lp.CCMessage) error {
// Setup ipmitool command // Setup ipmitool command
command := exec.Command(cmd, "sensor") argv := make([]string, 0)
if m.config.Sudo {
argv = append(argv, "sudo")
}
argv = append(argv, m.config.IpmitoolPath, "sensor")
command := exec.Command(argv[0], argv[1:]...)
stdout, _ := command.StdoutPipe() stdout, _ := command.StdoutPipe()
errBuf := new(bytes.Buffer) errBuf := new(bytes.Buffer)
command.Stderr = errBuf command.Stderr = errBuf
// start command // start command
if err := command.Start(); err != nil { if err := command.Start(); err != nil {
cclog.ComponentError( return fmt.Errorf("Failed to start command '%s': %v", command.String(), err)
m.name,
fmt.Sprintf("readIpmiTool(): Failed to start command \"%s\": %v", command.String(), err),
)
return
} }
// Read command output // Read command output
@@ -116,7 +125,10 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
continue continue
} }
v, err := strconv.ParseFloat(strings.TrimSpace(lv[1]), 64) v, err := strconv.ParseFloat(strings.TrimSpace(lv[1]), 64)
if err == nil { if err != nil {
cclog.ComponentErrorf(m.name, "Failed to parse float '%s': %v", lv[1], err)
continue
}
name := strings.ToLower(strings.ReplaceAll(strings.TrimSpace(lv[0]), " ", "_")) name := strings.ToLower(strings.ReplaceAll(strings.TrimSpace(lv[0]), " ", "_"))
unit := strings.TrimSpace(lv[2]) unit := strings.TrimSpace(lv[2])
switch unit { switch unit {
@@ -131,70 +143,71 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
} }
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now()) y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now())
if err == nil { if err != nil {
cclog.ComponentErrorf(m.name, "Failed to create message: %v", err)
continue
}
y.AddMeta("unit", unit) y.AddMeta("unit", unit)
output <- y output <- y
} }
}
}
// Wait for command end // Wait for command end
if err := command.Wait(); err != nil { if err := command.Wait(); err != nil {
errMsg, _ := io.ReadAll(errBuf) errMsg, _ := io.ReadAll(errBuf)
cclog.ComponentError( return fmt.Errorf("Failed to complete command '%s': %v (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg)))
m.name,
fmt.Sprintf("readIpmiTool(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err),
)
cclog.ComponentError(m.name, fmt.Sprintf("readIpmiTool(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg))))
return
} }
return nil
} }
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMessage) { func (m *IpmiCollector) readIpmiSensors(output chan lp.CCMessage) error {
// Setup ipmisensors command // Setup ipmisensors command
command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate") argv := make([]string, 0)
if m.config.Sudo {
argv = append(argv, "sudo")
}
argv = append(argv, m.config.IpmisensorsPath, "--comma-separated-output", "--sdr-cache-recreate")
command := exec.Command(argv[0], argv[1:]...)
stdout, _ := command.StdoutPipe() stdout, _ := command.StdoutPipe()
errBuf := new(bytes.Buffer) errBuf := new(bytes.Buffer)
command.Stderr = errBuf command.Stderr = errBuf
// start command // start command
if err := command.Start(); err != nil { if err := command.Start(); err != nil {
cclog.ComponentError( return fmt.Errorf("Failed to start command '%s': %v", command.String(), err)
m.name,
fmt.Sprintf("readIpmiSensors(): Failed to start command \"%s\": %v", command.String(), err),
)
return
} }
// Read command output // Read command output
scanner := bufio.NewScanner(stdout) scanner := bufio.NewScanner(stdout)
for scanner.Scan() { for scanner.Scan() {
lv := strings.Split(scanner.Text(), ",") lv := strings.Split(scanner.Text(), ",")
if len(lv) > 3 { if len(lv) <= 3 {
continue
}
v, err := strconv.ParseFloat(lv[3], 64) v, err := strconv.ParseFloat(lv[3], 64)
if err == nil { if err != nil {
cclog.ComponentErrorf(m.name, "Failed to parse float '%s': %v", lv[3], err)
continue
}
name := strings.ToLower(strings.ReplaceAll(lv[1], " ", "_")) name := strings.ToLower(strings.ReplaceAll(lv[1], " ", "_"))
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now()) y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now())
if err == nil { if err != nil {
cclog.ComponentErrorf(m.name, "Failed to create message: %v", err)
continue
}
if len(lv) > 4 { if len(lv) > 4 {
y.AddMeta("unit", lv[4]) y.AddMeta("unit", lv[4])
} }
output <- y output <- y
} }
}
}
}
// Wait for command end // Wait for command end
if err := command.Wait(); err != nil { if err := command.Wait(); err != nil {
errMsg, _ := io.ReadAll(errBuf) errMsg, _ := io.ReadAll(errBuf)
cclog.ComponentError( return fmt.Errorf("Failed to complete command '%s': %v (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg)))
m.name,
fmt.Sprintf("readIpmiSensors(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err),
)
cclog.ComponentError(m.name, fmt.Sprintf("readIpmiSensors(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg))))
return
} }
return nil
} }
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) { func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
@@ -204,9 +217,15 @@ func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
} }
if len(m.config.IpmitoolPath) > 0 { if len(m.config.IpmitoolPath) > 0 {
m.readIpmiTool(m.config.IpmitoolPath, output) err := m.readIpmiTool(output)
if err != nil {
cclog.ComponentErrorf(m.name, "readIpmiTool() failed: %v", err)
}
} else if len(m.config.IpmisensorsPath) > 0 { } else if len(m.config.IpmisensorsPath) > 0 {
m.readIpmiSensors(m.config.IpmisensorsPath, output) err := m.readIpmiSensors(output)
if err != nil {
cclog.ComponentErrorf(m.name, "readIpmiSensors() failed: %v", err)
}
} }
} }

4
go.mod
View File

@@ -3,8 +3,8 @@ module github.com/ClusterCockpit/cc-metric-collector
go 1.25.0 go 1.25.0
require ( require (
github.com/ClusterCockpit/cc-lib/v2 v2.10.0 github.com/ClusterCockpit/cc-lib/v2 v2.11.0
github.com/ClusterCockpit/go-rocm-smi v0.4.0 github.com/ClusterCockpit/go-rocm-smi v0.3.0
github.com/NVIDIA/go-nvml v0.13.0-1 github.com/NVIDIA/go-nvml v0.13.0-1
github.com/PaesslerAG/gval v1.2.4 github.com/PaesslerAG/gval v1.2.4
github.com/fsnotify/fsnotify v1.9.0 github.com/fsnotify/fsnotify v1.9.0

18
go.sum
View File

@@ -1,9 +1,10 @@
github.com/ClusterCockpit/cc-lib/v2 v2.10.0 h1:McCcEwc1j942hV54JAzsB/pcArP6A5FoQtjzW2H7K9U= github.com/ClusterCockpit/cc-lib/v2 v2.11.0 h1:LaLs4J0b7FArIXT8byMUcIcUr55R5obATjVi7qI02r4=
github.com/ClusterCockpit/cc-lib/v2 v2.10.0/go.mod h1:Oj+N2lpFqiBOBzjfrLIGJ2YSWT400TX4M0ii4lNl81A= github.com/ClusterCockpit/cc-lib/v2 v2.11.0/go.mod h1:Oj+N2lpFqiBOBzjfrLIGJ2YSWT400TX4M0ii4lNl81A=
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0 h1:hIzxgTBWcmCIHtoDKDkSCsKCOCOwUC34sFsbD2wcW0Q= github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0 h1:hIzxgTBWcmCIHtoDKDkSCsKCOCOwUC34sFsbD2wcW0Q=
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0/go.mod h1:y42qUu+YFmu5fdNuUAS4VbbIKxVjxCvbVqFdpdh8ahY= github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0/go.mod h1:y42qUu+YFmu5fdNuUAS4VbbIKxVjxCvbVqFdpdh8ahY=
github.com/ClusterCockpit/go-rocm-smi v0.4.0 h1:3+bEPrSkjEJcOtt+qBUX48ugDVlOFaKUnXHTef2Ve2Q= github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8=
github.com/ClusterCockpit/go-rocm-smi v0.4.0/go.mod h1:c19u5vBCcgb7DjL4EWTGSGpo6c79d07r4rxD50z25ng= github.com/ClusterCockpit/go-rocm-smi v0.3.0/go.mod h1:+I3UMeX3OlizXDf1WpGD43W4KGZZGVSGmny6rTeOnWA=
github.com/NVIDIA/go-nvml v0.11.6-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw= github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
github.com/PaesslerAG/gval v1.2.4 h1:rhX7MpjJlcxYwL2eTTYIOBUyEKZ+A96T9vQySWkVUiU= github.com/PaesslerAG/gval v1.2.4 h1:rhX7MpjJlcxYwL2eTTYIOBUyEKZ+A96T9vQySWkVUiU=
@@ -89,14 +90,7 @@ github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKk
github.com/stmcginnis/gofish v0.21.4 h1:daexK8sh31CgeSMkPUNs21HWHHA9ecCPJPyLCTxukCg= github.com/stmcginnis/gofish v0.21.4 h1:daexK8sh31CgeSMkPUNs21HWHHA9ecCPJPyLCTxukCg=
github.com/stmcginnis/gofish v0.21.4/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU= github.com/stmcginnis/gofish v0.21.4/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA= github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA=
@@ -120,7 +114,5 @@ golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=