From b947f984598ec3a477c6ebfb78598961ca61a433 Mon Sep 17 00:00:00 2001 From: Michael Panzlaff Date: Mon, 23 Mar 2026 16:09:38 +0100 Subject: [PATCH 1/3] update cc-lib to v2.11.0 --- go.mod | 4 ++-- go.sum | 18 +++++------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/go.mod b/go.mod index 9bf0b37..d79879e 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,8 @@ module github.com/ClusterCockpit/cc-metric-collector go 1.25.0 require ( - github.com/ClusterCockpit/cc-lib/v2 v2.10.0 - github.com/ClusterCockpit/go-rocm-smi v0.4.0 + github.com/ClusterCockpit/cc-lib/v2 v2.11.0 + github.com/ClusterCockpit/go-rocm-smi v0.3.0 github.com/NVIDIA/go-nvml v0.13.0-1 github.com/PaesslerAG/gval v1.2.4 github.com/fsnotify/fsnotify v1.9.0 diff --git a/go.sum b/go.sum index 305a63c..439c7f6 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,10 @@ -github.com/ClusterCockpit/cc-lib/v2 v2.10.0 h1:McCcEwc1j942hV54JAzsB/pcArP6A5FoQtjzW2H7K9U= -github.com/ClusterCockpit/cc-lib/v2 v2.10.0/go.mod h1:Oj+N2lpFqiBOBzjfrLIGJ2YSWT400TX4M0ii4lNl81A= +github.com/ClusterCockpit/cc-lib/v2 v2.11.0 h1:LaLs4J0b7FArIXT8byMUcIcUr55R5obATjVi7qI02r4= +github.com/ClusterCockpit/cc-lib/v2 v2.11.0/go.mod h1:Oj+N2lpFqiBOBzjfrLIGJ2YSWT400TX4M0ii4lNl81A= github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0 h1:hIzxgTBWcmCIHtoDKDkSCsKCOCOwUC34sFsbD2wcW0Q= github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0/go.mod h1:y42qUu+YFmu5fdNuUAS4VbbIKxVjxCvbVqFdpdh8ahY= -github.com/ClusterCockpit/go-rocm-smi v0.4.0 h1:3+bEPrSkjEJcOtt+qBUX48ugDVlOFaKUnXHTef2Ve2Q= -github.com/ClusterCockpit/go-rocm-smi v0.4.0/go.mod h1:c19u5vBCcgb7DjL4EWTGSGpo6c79d07r4rxD50z25ng= +github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8= +github.com/ClusterCockpit/go-rocm-smi v0.3.0/go.mod h1:+I3UMeX3OlizXDf1WpGD43W4KGZZGVSGmny6rTeOnWA= +github.com/NVIDIA/go-nvml v0.11.6-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs= github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw= github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/PaesslerAG/gval v1.2.4 h1:rhX7MpjJlcxYwL2eTTYIOBUyEKZ+A96T9vQySWkVUiU= @@ -89,14 +90,7 @@ github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKk github.com/stmcginnis/gofish v0.21.4 h1:daexK8sh31CgeSMkPUNs21HWHHA9ecCPJPyLCTxukCg= github.com/stmcginnis/gofish v0.21.4/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA= @@ -120,7 +114,5 @@ golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From e40816eb17717ee8b86093c5b756aa5867925d5e Mon Sep 17 00:00:00 2001 From: Michael Panzlaff Date: Mon, 23 Mar 2026 18:19:17 +0100 Subject: [PATCH 2/3] ipmi: refactor and add sudo support --- collectors/ipmiMetric.go | 181 +++++++++++++++++++++------------------ 1 file changed, 100 insertions(+), 81 deletions(-) diff --git a/collectors/ipmiMetric.go b/collectors/ipmiMetric.go index 86b9e28..856a7c9 100644 --- a/collectors/ipmiMetric.go +++ b/collectors/ipmiMetric.go @@ -31,7 +31,9 @@ type IpmiCollector struct { ExcludeDevices []string `json:"exclude_devices"` IpmitoolPath string `json:"ipmitool_path"` IpmisensorsPath string `json:"ipmisensors_path"` + Sudo bool `json:"use_sudo"` } + ipmitool string ipmisensors string } @@ -54,6 +56,7 @@ func (m *IpmiCollector) Init(config json.RawMessage) error { // default path to IPMI tools m.config.IpmitoolPath = "ipmitool" m.config.IpmisensorsPath = "ipmi-sensors" + if len(config) > 0 { d := json.NewDecoder(bytes.NewReader(config)) d.DisallowUnknownFields() @@ -61,51 +64,57 @@ func (m *IpmiCollector) Init(config json.RawMessage) error { return fmt.Errorf("%s Init(): Error decoding JSON config: %w", m.name, err) } } - // Check if executables ipmitool or ipmisensors are found - p, err := exec.LookPath(m.config.IpmitoolPath) - if err == nil { - command := exec.Command(p) - err := command.Run() - if err != nil { - cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %s", p, err.Error())) - m.ipmitool = "" - } else { - m.ipmitool = p - } + + if len(m.config.IpmitoolPath) != 0 && len(m.config.IpmisensorsPath) != 0 { + return fmt.Errorf("ipmitool_path and ipmisensors_path cannot be used at the same time. Please disable one of them") } - p, err = exec.LookPath(m.config.IpmisensorsPath) - if err == nil { - command := exec.Command(p) - err := command.Run() + + // Test if the configured commands actually work + if len(m.config.IpmitoolPath) != 0 { + dummyChan := make(chan lp.CCMessage) + go func() { + for range dummyChan { + } + }() + err := m.readIpmiTool(dummyChan) + close(dummyChan) if err != nil { - cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %s", p, err.Error())) - m.ipmisensors = "" - } else { - m.ipmisensors = p + return fmt.Errorf("Cannot execute '%s' (sudo=%t): %v", m.config.IpmitoolPath, m.config.Sudo, err) } - } - if len(m.ipmitool) == 0 && len(m.ipmisensors) == 0 { - return fmt.Errorf("%s Init(): no usable IPMI reader found", m.name) + } else if len(m.config.IpmisensorsPath) != 0 { + dummyChan := make(chan lp.CCMessage) + go func() { + for range dummyChan { + } + }() + err := m.readIpmiSensors(dummyChan) + close(dummyChan) + if err != nil { + return fmt.Errorf("Cannot execute '%s' (sudo=%t): %v", m.config.IpmisensorsPath, m.config.Sudo, err) + } + } else { + return fmt.Errorf("IpmiCollector enabled, but neither ipmitool nor ipmi-sensors are configured.") } m.init = true return nil } -func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) { +func (m *IpmiCollector) readIpmiTool(output chan lp.CCMessage) error { // Setup ipmitool command - command := exec.Command(cmd, "sensor") + argv := make([]string, 0) + if m.config.Sudo { + argv = append(argv, "sudo") + } + argv = append(argv, m.config.IpmitoolPath, "sensor") + command := exec.Command(argv[0], argv[1:]...) stdout, _ := command.StdoutPipe() errBuf := new(bytes.Buffer) command.Stderr = errBuf // start command if err := command.Start(); err != nil { - cclog.ComponentError( - m.name, - fmt.Sprintf("readIpmiTool(): Failed to start command \"%s\": %v", command.String(), err), - ) - return + return fmt.Errorf("Failed to start command '%s': %v", command.String(), err) } // Read command output @@ -116,85 +125,89 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) { continue } v, err := strconv.ParseFloat(strings.TrimSpace(lv[1]), 64) - if err == nil { - name := strings.ToLower(strings.ReplaceAll(strings.TrimSpace(lv[0]), " ", "_")) - unit := strings.TrimSpace(lv[2]) - switch unit { - case "Volts": - unit = "Volts" - case "degrees C": - unit = "degC" - case "degrees F": - unit = "degF" - case "Watts": - unit = "Watts" - } - - y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now()) - if err == nil { - y.AddMeta("unit", unit) - output <- y - } + if err != nil { + cclog.ComponentErrorf(m.name, "Failed to parse float '%s': %v", lv[1], err) + continue } + name := strings.ToLower(strings.ReplaceAll(strings.TrimSpace(lv[0]), " ", "_")) + unit := strings.TrimSpace(lv[2]) + switch unit { + case "Volts": + unit = "Volts" + case "degrees C": + unit = "degC" + case "degrees F": + unit = "degF" + case "Watts": + unit = "Watts" + } + + y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now()) + if err != nil { + cclog.ComponentErrorf(m.name, "Failed to create message: %v", err) + continue + } + y.AddMeta("unit", unit) + output <- y } // Wait for command end if err := command.Wait(); err != nil { errMsg, _ := io.ReadAll(errBuf) - cclog.ComponentError( - m.name, - fmt.Sprintf("readIpmiTool(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err), - ) - cclog.ComponentError(m.name, fmt.Sprintf("readIpmiTool(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg)))) - return + return fmt.Errorf("Failed to complete command '%s': %v (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg))) } + + return nil } -func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMessage) { +func (m *IpmiCollector) readIpmiSensors(output chan lp.CCMessage) error { // Setup ipmisensors command - command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate") + argv := make([]string, 0) + if m.config.Sudo { + argv = append(argv, "sudo") + } + argv = append(argv, m.config.IpmisensorsPath, "--comma-separated-output", "--sdr-cache-recreate") + command := exec.Command(argv[0], argv[1:]...) stdout, _ := command.StdoutPipe() errBuf := new(bytes.Buffer) command.Stderr = errBuf // start command if err := command.Start(); err != nil { - cclog.ComponentError( - m.name, - fmt.Sprintf("readIpmiSensors(): Failed to start command \"%s\": %v", command.String(), err), - ) - return + return fmt.Errorf("Failed to start command '%s': %v", command.String(), err) } // Read command output scanner := bufio.NewScanner(stdout) for scanner.Scan() { lv := strings.Split(scanner.Text(), ",") - if len(lv) > 3 { - v, err := strconv.ParseFloat(lv[3], 64) - if err == nil { - name := strings.ToLower(strings.ReplaceAll(lv[1], " ", "_")) - y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now()) - if err == nil { - if len(lv) > 4 { - y.AddMeta("unit", lv[4]) - } - output <- y - } - } + if len(lv) <= 3 { + continue } + v, err := strconv.ParseFloat(lv[3], 64) + if err != nil { + cclog.ComponentErrorf(m.name, "Failed to parse float '%s': %v", lv[3], err) + continue + } + name := strings.ToLower(strings.ReplaceAll(lv[1], " ", "_")) + y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]any{"value": v}, time.Now()) + if err != nil { + cclog.ComponentErrorf(m.name, "Failed to create message: %v", err) + continue + } + if len(lv) > 4 { + y.AddMeta("unit", lv[4]) + } + output <- y } // Wait for command end if err := command.Wait(); err != nil { errMsg, _ := io.ReadAll(errBuf) - cclog.ComponentError( - m.name, - fmt.Sprintf("readIpmiSensors(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err), - ) - cclog.ComponentError(m.name, fmt.Sprintf("readIpmiSensors(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg)))) - return + return fmt.Errorf("Failed to complete command '%s': %v (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg))) } + + return nil } func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) { @@ -204,9 +217,15 @@ func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) { } if len(m.config.IpmitoolPath) > 0 { - m.readIpmiTool(m.config.IpmitoolPath, output) + err := m.readIpmiTool(output) + if err != nil { + cclog.ComponentErrorf(m.name, "readIpmiTool() failed: %v", err) + } } else if len(m.config.IpmisensorsPath) > 0 { - m.readIpmiSensors(m.config.IpmisensorsPath, output) + err := m.readIpmiSensors(output) + if err != nil { + cclog.ComponentErrorf(m.name, "readIpmiSensors() failed: %v", err) + } } } From f816f4991b8dea970198eb4d75a57eb26001c6fc Mon Sep 17 00:00:00 2001 From: Michael Panzlaff Date: Mon, 23 Mar 2026 18:19:17 +0100 Subject: [PATCH 3/3] ipmi: refactor and add sudo support --- collectors/ipmiMetric.go | 104 +++++++++++++++++++++++---------------- collectors/ipmiMetric.md | 17 ++++++- 2 files changed, 78 insertions(+), 43 deletions(-) diff --git a/collectors/ipmiMetric.go b/collectors/ipmiMetric.go index 856a7c9..38fde8b 100644 --- a/collectors/ipmiMetric.go +++ b/collectors/ipmiMetric.go @@ -65,48 +65,58 @@ func (m *IpmiCollector) Init(config json.RawMessage) error { } } - if len(m.config.IpmitoolPath) != 0 && len(m.config.IpmisensorsPath) != 0 { - return fmt.Errorf("ipmitool_path and ipmisensors_path cannot be used at the same time. Please disable one of them") + m.ipmitool = m.config.IpmitoolPath + m.ipmisensors = m.config.IpmisensorsPath + + // Test if any of the supported backends work + var dummyChan chan lp.CCMessage + dummyConsumer := func() { + for range dummyChan { + } } - // Test if the configured commands actually work - if len(m.config.IpmitoolPath) != 0 { - dummyChan := make(chan lp.CCMessage) - go func() { - for range dummyChan { - } - }() - err := m.readIpmiTool(dummyChan) + // Test if ipmi-sensors works (preferred over ipmitool, because it's faster) + var ipmiSensorsErr error + if _, ipmiSensorsErr = exec.LookPath(m.ipmisensors); ipmiSensorsErr == nil { + dummyChan = make(chan lp.CCMessage) + go dummyConsumer() + ipmiSensorsErr = m.readIpmiSensors(dummyChan) close(dummyChan) - if err != nil { - return fmt.Errorf("Cannot execute '%s' (sudo=%t): %v", m.config.IpmitoolPath, m.config.Sudo, err) + if ipmiSensorsErr == nil { + cclog.ComponentDebugf(m.name, "Using ipmi-sensors for ipmistat collector") + m.init = true + return nil } - } else if len(m.config.IpmisensorsPath) != 0 { - dummyChan := make(chan lp.CCMessage) - go func() { - for range dummyChan { - } - }() - err := m.readIpmiSensors(dummyChan) - close(dummyChan) - if err != nil { - return fmt.Errorf("Cannot execute '%s' (sudo=%t): %v", m.config.IpmisensorsPath, m.config.Sudo, err) - } - } else { - return fmt.Errorf("IpmiCollector enabled, but neither ipmitool nor ipmi-sensors are configured.") } + cclog.ComponentDebugf(m.name, "Unable to use ipmi-sensors for ipmistat collector: %v", ipmiSensorsErr) + m.ipmisensors = "" - m.init = true - return nil + // Test if ipmitool works (may be very slow) + var ipmiToolErr error + if _, ipmiToolErr = exec.LookPath(m.ipmitool); ipmiToolErr == nil { + dummyChan = make(chan lp.CCMessage) + go dummyConsumer() + ipmiToolErr = m.readIpmiTool(dummyChan) + close(dummyChan) + if ipmiToolErr == nil { + cclog.ComponentDebugf(m.name, "Using ipmitool for ipmistat collector") + m.init = true + return nil + } + } + m.ipmitool = "" + cclog.ComponentDebugf(m.name, "Unable to use ipmitool for ipmistat collector: %v", ipmiToolErr) + + return fmt.Errorf("unable to init neither ipmitool (%w) nor ipmi-sensors (%w)", ipmiToolErr, ipmiSensorsErr) } func (m *IpmiCollector) readIpmiTool(output chan lp.CCMessage) error { // Setup ipmitool command argv := make([]string, 0) if m.config.Sudo { - argv = append(argv, "sudo") + argv = append(argv, "sudo", "-n") } - argv = append(argv, m.config.IpmitoolPath, "sensor") + argv = append(argv, m.ipmitool, "sensor") command := exec.Command(argv[0], argv[1:]...) stdout, _ := command.StdoutPipe() errBuf := new(bytes.Buffer) @@ -114,7 +124,7 @@ func (m *IpmiCollector) readIpmiTool(output chan lp.CCMessage) error { // start command if err := command.Start(); err != nil { - return fmt.Errorf("Failed to start command '%s': %v", command.String(), err) + return fmt.Errorf("failed to start command '%s': %w", command.String(), err) } // Read command output @@ -124,6 +134,12 @@ func (m *IpmiCollector) readIpmiTool(output chan lp.CCMessage) error { if len(lv) < 3 { continue } + + if strings.TrimSpace(lv[1]) == "0x0" || strings.TrimSpace(lv[1]) == "na" { + // Ignore known non-float values + continue + } + v, err := strconv.ParseFloat(strings.TrimSpace(lv[1]), 64) if err != nil { cclog.ComponentErrorf(m.name, "Failed to parse float '%s': %v", lv[1], err) @@ -154,7 +170,7 @@ func (m *IpmiCollector) readIpmiTool(output chan lp.CCMessage) error { // Wait for command end if err := command.Wait(); err != nil { errMsg, _ := io.ReadAll(errBuf) - return fmt.Errorf("Failed to complete command '%s': %v (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg))) + return fmt.Errorf("failed to complete command '%s': %w (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg))) } return nil @@ -164,9 +180,9 @@ func (m *IpmiCollector) readIpmiSensors(output chan lp.CCMessage) error { // Setup ipmisensors command argv := make([]string, 0) if m.config.Sudo { - argv = append(argv, "sudo") + argv = append(argv, "sudo", "-n") } - argv = append(argv, m.config.IpmisensorsPath, "--comma-separated-output", "--sdr-cache-recreate") + argv = append(argv, m.ipmisensors, "--comma-separated-output", "--sdr-cache-recreate") command := exec.Command(argv[0], argv[1:]...) stdout, _ := command.StdoutPipe() errBuf := new(bytes.Buffer) @@ -174,7 +190,7 @@ func (m *IpmiCollector) readIpmiSensors(output chan lp.CCMessage) error { // start command if err := command.Start(); err != nil { - return fmt.Errorf("Failed to start command '%s': %v", command.String(), err) + return fmt.Errorf("failed to start command '%s': %w", command.String(), err) } // Read command output @@ -184,7 +200,11 @@ func (m *IpmiCollector) readIpmiSensors(output chan lp.CCMessage) error { if len(lv) <= 3 { continue } - v, err := strconv.ParseFloat(lv[3], 64) + if lv[3] == "N/A" || lv[3] == "Reading" { + // Ignore known non-float values + continue + } + v, err := strconv.ParseFloat(strings.TrimSpace(lv[3]), 64) if err != nil { cclog.ComponentErrorf(m.name, "Failed to parse float '%s': %v", lv[3], err) continue @@ -204,7 +224,7 @@ func (m *IpmiCollector) readIpmiSensors(output chan lp.CCMessage) error { // Wait for command end if err := command.Wait(); err != nil { errMsg, _ := io.ReadAll(errBuf) - return fmt.Errorf("Failed to complete command '%s': %v (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg))) + return fmt.Errorf("failed to complete command '%s': %w (stderr: %s)", command.String(), err, strings.TrimSpace(string(errMsg))) } return nil @@ -216,16 +236,16 @@ func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) { return } - if len(m.config.IpmitoolPath) > 0 { - err := m.readIpmiTool(output) - if err != nil { - cclog.ComponentErrorf(m.name, "readIpmiTool() failed: %v", err) - } - } else if len(m.config.IpmisensorsPath) > 0 { + if len(m.ipmisensors) > 0 { err := m.readIpmiSensors(output) if err != nil { cclog.ComponentErrorf(m.name, "readIpmiSensors() failed: %v", err) } + } else if len(m.ipmitool) > 0 { + err := m.readIpmiTool(output) + if err != nil { + cclog.ComponentErrorf(m.name, "readIpmiTool() failed: %v", err) + } } } diff --git a/collectors/ipmiMetric.md b/collectors/ipmiMetric.md index 5d21e86..3407a70 100644 --- a/collectors/ipmiMetric.md +++ b/collectors/ipmiMetric.md @@ -14,10 +14,25 @@ hugo_path: docs/reference/cc-metric-collector/collectors/ipmi.md ```json "ipmistat": { "ipmitool_path": "/path/to/ipmitool", - "ipmisensors_path": "/path/to/ipmi-sensors" + "ipmisensors_path": "/path/to/ipmi-sensors", + "use_sudo": true } ``` The `ipmistat` collector reads data from `ipmitool` (`ipmitool sensor`) or `ipmi-sensors` (`ipmi-sensors --sdr-cache-recreate --comma-separated-output`). The metrics depend on the output of the underlying tools but contain temperature, power and energy metrics. + +ipmitool and ipmi-sensors typically require root to run. +In order to cc-metric-collector without root priviliges, you can enable `use_sudo`. +Add a file like this in /etc/sudoers.d/ to allow cc-metric-collector to run this command: + +``` +# Do not log the following sudo commands from monitoring, since this causes a lot of log spam. +# However keep log_denied enabled, to detect failures +Defaults: monitoring !log_allowed, !pam_session + +# Allow to use ipmitool and ipmi-sensors +monitoring ALL = (root) NOPASSWD:/usr/bin/ipmitool sensor +monitoring ALL = (root) NOPASSWD:/usr/sbin/ipmi-sensors --comma-separated-output --sdr-cache-recreate +```