cc-metric-collector/collectors/ipmiMetric.go
Thomas Gruber 8d85bd53f1
Merge latest development changes to main branch (#79)
* Cleanup: Remove unused code

* Use Golang duration parser for 'interval' and 'duration'
 in main config

* Update handling of LIKWID headers. Download only if not already present in the system. Fixes #73

* Units with cc-units (#64)

* Add option to normalize units with cc-unit

* Add unit conversion to router

* Add option to change unit prefix in the router

* Add to MetricRouter README

* Add order of operations in router to README

* Use second add_tags/del_tags only if metric gets renamed

* Skip disks in DiskstatCollector that have size=0

* Check readability of sensor files in TempCollector

* Fix for --once option

* Rename `cpu` type to `hwthread` (#69)

* Rename 'cpu' type to 'hwthread' to avoid naming clashes with MetricStore and CC-Webfrontend

* Collectors in parallel (#74)

* Provide info to CollectorManager whether the collector can be executed in parallel with others

* Split serial and parallel collectors. Read in parallel first

* Update NvidiaCollector with new metrics, MIG and NvLink support (#75)

* CC topology module update (#76)

* Rename CPU to hardware thread, write some comments

* Do renaming in other parts

* Remove CpuList and SocketList function from metricCollector. Available in ccTopology

* Option to use MIG UUID as subtype-id in NvidiaCollector

* Option to use MIG slice name as subtype-id in NvidiaCollector

* MetricRouter: Fix JSON in README

* Fix for Github Action to really use the selected version

* Remove Ganglia installation in runonce Action and add Go 1.18

* Fix daemon options in init script

* Add separate go.mod files to use it with deprecated 1.16

* Minor updates for Makefiles

* fix string comparison

* AMD ROCm SMI collector (#77)

* Add collector for AMD ROCm SMI metrics

* Fix import path

* Fix imports

* Remove Board Number

* store GPU index explicitly

* Remove board number from description

* Use http instead of ftp to download likwid

* Fix serial number in rocmCollector

* Improved http sink (#78)

* automatic flush in NatsSink

* tweak default options of HttpSink

* shorter cirt. section and retries for HttpSink

* fix error handling

* Remove file added by mistake.

* Use http instead of ftp to download likwid

* Fix serial number in rocmCollector

Co-authored-by: Thomas Roehl <thomas.roehl@fau.de>

Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
Co-authored-by: Lou <lou.knauer@gmx.de>
2022-06-08 15:25:40 +02:00

150 lines
3.4 KiB
Go

package collectors
import (
"encoding/json"
"errors"
"log"
"os"
"os/exec"
"strconv"
"strings"
"time"
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
)
const IPMITOOL_PATH = `ipmitool`
const IPMISENSORS_PATH = `ipmi-sensors`
type IpmiCollectorConfig struct {
ExcludeDevices []string `json:"exclude_devices"`
IpmitoolPath string `json:"ipmitool_path"`
IpmisensorsPath string `json:"ipmisensors_path"`
}
type IpmiCollector struct {
metricCollector
//tags map[string]string
//matches map[string]string
config IpmiCollectorConfig
ipmitool string
ipmisensors string
}
func (m *IpmiCollector) Init(config json.RawMessage) error {
m.name = "IpmiCollector"
m.setup()
m.parallel = true
m.meta = map[string]string{"source": m.name, "group": "IPMI"}
m.config.IpmitoolPath = string(IPMITOOL_PATH)
m.config.IpmisensorsPath = string(IPMISENSORS_PATH)
m.ipmitool = ""
m.ipmisensors = ""
if len(config) > 0 {
err := json.Unmarshal(config, &m.config)
if err != nil {
return err
}
}
p, err := exec.LookPath(m.config.IpmitoolPath)
if err == nil {
m.ipmitool = p
}
p, err = exec.LookPath(m.config.IpmisensorsPath)
if err == nil {
m.ipmisensors = p
}
if len(m.ipmitool) == 0 && len(m.ipmisensors) == 0 {
return errors.New("no IPMI reader found")
}
m.init = true
return nil
}
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
command := exec.Command(cmd, "sensor")
command.Wait()
stdout, err := command.Output()
if err != nil {
log.Print(err)
return
}
ll := strings.Split(string(stdout), "\n")
for _, line := range ll {
lv := strings.Split(line, "|")
if len(lv) < 3 {
continue
}
v, err := strconv.ParseFloat(strings.Trim(lv[1], " "), 64)
if err == nil {
name := strings.ToLower(strings.Replace(strings.Trim(lv[0], " "), " ", "_", -1))
unit := strings.Trim(lv[2], " ")
if unit == "Volts" {
unit = "Volts"
} else if unit == "degrees C" {
unit = "degC"
} else if unit == "degrees F" {
unit = "degF"
} else if unit == "Watts" {
unit = "Watts"
}
y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
if err == nil {
y.AddMeta("unit", unit)
output <- y
}
}
}
}
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate")
command.Wait()
stdout, err := command.Output()
if err != nil {
log.Print(err)
return
}
ll := strings.Split(string(stdout), "\n")
for _, line := range ll {
lv := strings.Split(line, ",")
if len(lv) > 3 {
v, err := strconv.ParseFloat(lv[3], 64)
if err == nil {
name := strings.ToLower(strings.Replace(lv[1], " ", "_", -1))
y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
if err == nil {
if len(lv) > 4 {
y.AddMeta("unit", lv[4])
}
output <- y
}
}
}
}
}
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMetric) {
if len(m.config.IpmitoolPath) > 0 {
_, err := os.Stat(m.config.IpmitoolPath)
if err == nil {
m.readIpmiTool(m.config.IpmitoolPath, output)
}
} else if len(m.config.IpmisensorsPath) > 0 {
_, err := os.Stat(m.config.IpmisensorsPath)
if err == nil {
m.readIpmiSensors(m.config.IpmisensorsPath, output)
}
}
}
func (m *IpmiCollector) Close() {
m.init = false
}