mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2024-12-25 23:19:06 +01:00
Add receiver to gather remote IPMI sensor metrics
This commit is contained in:
parent
234ad3c54e
commit
b1a8674c4c
427
receivers/ipmiReceiver.go
Normal file
427
receivers/ipmiReceiver.go
Normal file
@ -0,0 +1,427 @@
|
|||||||
|
package receivers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||||
|
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||||
|
)
|
||||||
|
|
||||||
|
type IPMIReceiverClientConfig struct {
|
||||||
|
|
||||||
|
// Hostname the IPMI service belongs to
|
||||||
|
Protocol string
|
||||||
|
DriverType string
|
||||||
|
Fanout int
|
||||||
|
NumHosts int
|
||||||
|
IPMIHosts string
|
||||||
|
IPMI2HostMapping map[string]string
|
||||||
|
Username string
|
||||||
|
Password string
|
||||||
|
isExcluded map[string]bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type IPMIReceiver struct {
|
||||||
|
receiver
|
||||||
|
config struct {
|
||||||
|
Interval time.Duration
|
||||||
|
|
||||||
|
// Client config for each IPMI hosts
|
||||||
|
ClientConfigs []IPMIReceiverClientConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
// Storage for static information
|
||||||
|
meta map[string]string
|
||||||
|
|
||||||
|
done chan bool // channel to finish / stop IPMI receiver
|
||||||
|
wg sync.WaitGroup // wait group for IPMI receiver
|
||||||
|
}
|
||||||
|
|
||||||
|
// doReadMetrics reads metrics from all configure IPMI hosts.
|
||||||
|
func (r *IPMIReceiver) doReadMetric() {
|
||||||
|
for i := range r.config.ClientConfigs {
|
||||||
|
clientConfig := &r.config.ClientConfigs[i]
|
||||||
|
var cmd_options []string
|
||||||
|
if clientConfig.Protocol == "ipmi-sensors" {
|
||||||
|
cmd_options = append(cmd_options,
|
||||||
|
"--always-prefix",
|
||||||
|
"--sdr-cache-recreate",
|
||||||
|
// Attempt to interpret OEM data, such as event data, sensor readings, or general extra info
|
||||||
|
"--interpret-oem-data",
|
||||||
|
// Ignore not-available (i.e. N/A) sensors in output
|
||||||
|
"--ignore-not-available-sensors",
|
||||||
|
// Ignore unrecognized sensor events
|
||||||
|
"--ignore-unrecognized-events",
|
||||||
|
// Output fields in comma separated format
|
||||||
|
"--comma-separated-output",
|
||||||
|
// Do not output column headers
|
||||||
|
"--no-header-output",
|
||||||
|
// Output non-abbreviated units (e.g. 'Amps' instead of 'A').
|
||||||
|
// May aid in disambiguation of units (e.g. 'C' for Celsius or Coulombs).
|
||||||
|
"--non-abbreviated-units",
|
||||||
|
"--fanout", fmt.Sprint(clientConfig.Fanout),
|
||||||
|
"--driver-type", clientConfig.DriverType,
|
||||||
|
"--host", clientConfig.IPMIHosts,
|
||||||
|
"--user", clientConfig.Username,
|
||||||
|
"--password", clientConfig.Password,
|
||||||
|
)
|
||||||
|
|
||||||
|
command := exec.Command("ipmi-sensors", cmd_options...)
|
||||||
|
stdout, _ := command.StdoutPipe()
|
||||||
|
errBuf := new(bytes.Buffer)
|
||||||
|
command.Stderr = errBuf
|
||||||
|
|
||||||
|
// start command
|
||||||
|
if err := command.Start(); err != nil {
|
||||||
|
cclog.ComponentError(
|
||||||
|
r.name,
|
||||||
|
fmt.Sprintf("doReadMetric(): Failed to start command \"%s\": %v", command.String(), err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read command output
|
||||||
|
const (
|
||||||
|
idxID = iota
|
||||||
|
idxName
|
||||||
|
idxType
|
||||||
|
idxReading
|
||||||
|
idxUnits
|
||||||
|
idxEvent
|
||||||
|
)
|
||||||
|
scanner := bufio.NewScanner(stdout)
|
||||||
|
for scanner.Scan() {
|
||||||
|
// Read host
|
||||||
|
v1 := strings.Split(scanner.Text(), ": ")
|
||||||
|
if len(v1) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
host, ok := clientConfig.IPMI2HostMapping[v1[0]]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read sensors
|
||||||
|
v2 := strings.Split(v1[1], ",")
|
||||||
|
if len(v2) != 6 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Skip sensors with non available sensor readings
|
||||||
|
if v2[idxReading] == "N/A" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
name := strings.ToLower(
|
||||||
|
strings.Replace(v2[idxName], " ", "_", -1))
|
||||||
|
metric := strings.ToLower(v2[idxType])
|
||||||
|
unit := v2[idxUnits]
|
||||||
|
if unit == "Watts" {
|
||||||
|
metric = "power"
|
||||||
|
} else if metric == "voltage" && unit == "Volts" {
|
||||||
|
} else if metric == "temperature" && unit == "degrees C" {
|
||||||
|
unit = "degC"
|
||||||
|
} else if metric == "temperature" && unit == "degrees F" {
|
||||||
|
unit = "degF"
|
||||||
|
} else if metric == "fan" && unit == "RPM" {
|
||||||
|
metric = "fan_speed"
|
||||||
|
} else if metric == "other units based sensor" &&
|
||||||
|
(unit == "unspecified" ||
|
||||||
|
unit == "%") &&
|
||||||
|
(name == "cpu_utilization" ||
|
||||||
|
name == "io_utilization" ||
|
||||||
|
name == "mem_utilization" ||
|
||||||
|
name == "sys_utilization") {
|
||||||
|
metric = "utilization"
|
||||||
|
unit = "percent"
|
||||||
|
} else {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip excluded metrics
|
||||||
|
if clientConfig.isExcluded[metric] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse sensor value
|
||||||
|
value, err := strconv.ParseFloat(v2[idxReading], 64)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
y, err := lp.New(
|
||||||
|
metric,
|
||||||
|
map[string]string{
|
||||||
|
"hostname": host,
|
||||||
|
"type": "node",
|
||||||
|
"name": name,
|
||||||
|
},
|
||||||
|
map[string]string{
|
||||||
|
"source": r.name,
|
||||||
|
"group": "IPMI",
|
||||||
|
"unit": unit,
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": value,
|
||||||
|
},
|
||||||
|
time.Now())
|
||||||
|
if err == nil {
|
||||||
|
r.sink <- y
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for command end
|
||||||
|
if err := command.Wait(); err != nil {
|
||||||
|
errMsg, _ := io.ReadAll(errBuf)
|
||||||
|
cclog.ComponentError(
|
||||||
|
r.name,
|
||||||
|
fmt.Sprintf("doReadMetric(): Failed to wait for the end of command \"%s\": %v\n",
|
||||||
|
strings.Replace(command.String(), clientConfig.Password, "<PW>", -1), err),
|
||||||
|
fmt.Sprintf("doReadMetric(): command stderr: \"%s\"\n", string(errMsg)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *IPMIReceiver) Start() {
|
||||||
|
cclog.ComponentDebug(r.name, "START")
|
||||||
|
|
||||||
|
// Start IPMI receiver
|
||||||
|
r.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer r.wg.Done()
|
||||||
|
|
||||||
|
// Create ticker
|
||||||
|
ticker := time.NewTicker(r.config.Interval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
r.doReadMetric()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case tickerTime := <-ticker.C:
|
||||||
|
// Check if we missed the ticker event
|
||||||
|
if since := time.Since(tickerTime); since > 5*time.Second {
|
||||||
|
cclog.ComponentInfo(r.name, "Missed ticker event for more then", since)
|
||||||
|
}
|
||||||
|
|
||||||
|
// process ticker event -> continue
|
||||||
|
continue
|
||||||
|
case <-r.done:
|
||||||
|
// process done event
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
cclog.ComponentDebug(r.name, "STARTED")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close receiver: close network connection, close files, close libraries, ...
|
||||||
|
func (r *IPMIReceiver) Close() {
|
||||||
|
cclog.ComponentDebug(r.name, "CLOSE")
|
||||||
|
|
||||||
|
// Send the signal and wait
|
||||||
|
close(r.done)
|
||||||
|
r.wg.Wait()
|
||||||
|
|
||||||
|
cclog.ComponentDebug(r.name, "DONE")
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewIPMIReceiver creates a new instance of the redfish receiver
|
||||||
|
// Initialize the receiver by giving it a name and reading in the config JSON
|
||||||
|
func NewIPMIReceiver(name string, config json.RawMessage) (Receiver, error) {
|
||||||
|
r := new(IPMIReceiver)
|
||||||
|
|
||||||
|
// Config options from config file
|
||||||
|
configJSON := struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
|
||||||
|
// Maximum number of simultaneous IPMI connections (default: 64)
|
||||||
|
Fanout int `json:"fanout,omitempty"`
|
||||||
|
// Out of band IPMI driver (default: LAN_2_0)
|
||||||
|
DriverType string `json:"driver_type,omitempty"`
|
||||||
|
|
||||||
|
// How often the IPMI sensor metrics should be read and send to the sink (default: 30 s)
|
||||||
|
IntervalString string `json:"interval,omitempty"`
|
||||||
|
|
||||||
|
// Default client username, password and endpoint
|
||||||
|
Username *string `json:"username"` // User name to authenticate with
|
||||||
|
Password *string `json:"password"` // Password to use for authentication
|
||||||
|
Endpoint *string `json:"endpoint"` // URL of the IPMI service
|
||||||
|
|
||||||
|
// Globally excluded metrics
|
||||||
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
|
|
||||||
|
ClientConfigs []struct {
|
||||||
|
Endpoint *string `json:"endpoint"` // URL of the IPMI service
|
||||||
|
Fanout int `json:"fanout,omitempty"` // Maximum number of simultaneous IPMI connections (default: 64)
|
||||||
|
DriverType string `json:"driver_type,omitempty"` // Out of band IPMI driver (default: LAN_2_0)
|
||||||
|
HostList []string `json:"host_list"` // List of hosts with the same client configuration
|
||||||
|
Username *string `json:"username"` // User name to authenticate with
|
||||||
|
Password *string `json:"password"` // Password to use for authentication
|
||||||
|
|
||||||
|
// Per client excluded metrics
|
||||||
|
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||||
|
} `json:"client_config"`
|
||||||
|
}{
|
||||||
|
// Set defaults values
|
||||||
|
// Allow overwriting these defaults by reading config JSON
|
||||||
|
Fanout: 64,
|
||||||
|
DriverType: "LAN_2_0",
|
||||||
|
IntervalString: "30s",
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set name of IPMIReceiver
|
||||||
|
r.name = fmt.Sprintf("IPMIReceiver(%s)", name)
|
||||||
|
|
||||||
|
// Create done channel
|
||||||
|
r.done = make(chan bool)
|
||||||
|
|
||||||
|
// Set static information
|
||||||
|
r.meta = map[string]string{"source": r.name}
|
||||||
|
|
||||||
|
// Read the IPMI receiver specific JSON config
|
||||||
|
if len(config) > 0 {
|
||||||
|
err := json.Unmarshal(config, &configJSON)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(r.name, "Error reading config:", err.Error())
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert interval string representation to duration
|
||||||
|
var err error
|
||||||
|
r.config.Interval, err = time.ParseDuration(configJSON.IntervalString)
|
||||||
|
if err != nil {
|
||||||
|
err := fmt.Errorf(
|
||||||
|
"Failed to parse duration string interval='%s': %w",
|
||||||
|
configJSON.IntervalString,
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
cclog.Error(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create client config from JSON config
|
||||||
|
totalNumHosts := 0
|
||||||
|
for i := range configJSON.ClientConfigs {
|
||||||
|
clientConfigJSON := &configJSON.ClientConfigs[i]
|
||||||
|
|
||||||
|
var endpoint string
|
||||||
|
if clientConfigJSON.Endpoint != nil {
|
||||||
|
endpoint = *clientConfigJSON.Endpoint
|
||||||
|
} else if configJSON.Endpoint != nil {
|
||||||
|
endpoint = *configJSON.Endpoint
|
||||||
|
} else {
|
||||||
|
err := fmt.Errorf("client config number %v requires endpoint", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fanout := configJSON.Fanout
|
||||||
|
if clientConfigJSON.Fanout != 0 {
|
||||||
|
fanout = clientConfigJSON.Fanout
|
||||||
|
}
|
||||||
|
|
||||||
|
driverType := configJSON.DriverType
|
||||||
|
if clientConfigJSON.DriverType != "" {
|
||||||
|
driverType = clientConfigJSON.DriverType
|
||||||
|
}
|
||||||
|
if driverType != "LAN" && driverType != "LAN_2_0" {
|
||||||
|
err := fmt.Errorf("client config number %v has invalid driver type %s", i, driverType)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var protocol string
|
||||||
|
var host_pattern string
|
||||||
|
if e := strings.Split(endpoint, "://"); len(e) == 2 {
|
||||||
|
protocol = e[0]
|
||||||
|
host_pattern = e[1]
|
||||||
|
} else {
|
||||||
|
err := fmt.Errorf("client config number %v has invalid endpoint %s", i, endpoint)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var username string
|
||||||
|
if clientConfigJSON.Username != nil {
|
||||||
|
username = *clientConfigJSON.Username
|
||||||
|
} else if configJSON.Username != nil {
|
||||||
|
username = *configJSON.Username
|
||||||
|
} else {
|
||||||
|
err := fmt.Errorf("client config number %v requires username", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var password string
|
||||||
|
if clientConfigJSON.Password != nil {
|
||||||
|
password = *clientConfigJSON.Password
|
||||||
|
} else if configJSON.Password != nil {
|
||||||
|
password = *configJSON.Password
|
||||||
|
} else {
|
||||||
|
err := fmt.Errorf("client config number %v requires password", i)
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create mapping between ipmi hostname and node hostname
|
||||||
|
// This also guaranties that all ipmi hostnames are uniqu
|
||||||
|
ipmi2HostMapping := make(map[string]string)
|
||||||
|
for _, host := range clientConfigJSON.HostList {
|
||||||
|
ipmiHost := strings.Replace(host_pattern, "%h", host, -1)
|
||||||
|
ipmi2HostMapping[ipmiHost] = host
|
||||||
|
}
|
||||||
|
|
||||||
|
numHosts := len(ipmi2HostMapping)
|
||||||
|
totalNumHosts += numHosts
|
||||||
|
ipmiHostList := make([]string, 0, numHosts)
|
||||||
|
for ipmiHost := range ipmi2HostMapping {
|
||||||
|
ipmiHostList = append(ipmiHostList, ipmiHost)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is metrics excluded globally or per client
|
||||||
|
isExcluded := make(map[string]bool)
|
||||||
|
for _, key := range clientConfigJSON.ExcludeMetrics {
|
||||||
|
isExcluded[key] = true
|
||||||
|
}
|
||||||
|
for _, key := range configJSON.ExcludeMetrics {
|
||||||
|
isExcluded[key] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
r.config.ClientConfigs = append(
|
||||||
|
r.config.ClientConfigs,
|
||||||
|
IPMIReceiverClientConfig{
|
||||||
|
Protocol: protocol,
|
||||||
|
Fanout: fanout,
|
||||||
|
DriverType: driverType,
|
||||||
|
NumHosts: numHosts,
|
||||||
|
IPMIHosts: strings.Join(ipmiHostList, ","),
|
||||||
|
IPMI2HostMapping: ipmi2HostMapping,
|
||||||
|
Username: username,
|
||||||
|
Password: password,
|
||||||
|
isExcluded: isExcluded,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if totalNumHosts == 0 {
|
||||||
|
err := fmt.Errorf("at least one IPMI host config is required")
|
||||||
|
cclog.ComponentError(r.name, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.ComponentInfo(r.name, "monitoring", totalNumHosts, "IPMI hosts")
|
||||||
|
return r, nil
|
||||||
|
}
|
@ -11,6 +11,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var AvailableReceivers = map[string]func(name string, config json.RawMessage) (Receiver, error){
|
var AvailableReceivers = map[string]func(name string, config json.RawMessage) (Receiver, error){
|
||||||
|
"ipmi": NewIPMIReceiver,
|
||||||
"nats": NewNatsReceiver,
|
"nats": NewNatsReceiver,
|
||||||
"redfish": NewRedfishReceiver,
|
"redfish": NewRedfishReceiver,
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user