Refactor redfishReceiver.

This commit is contained in:
Holger Obermaier 2022-08-16 15:14:20 +02:00
parent f7b39d027b
commit 0dd430e7e9

View File

@ -18,71 +18,42 @@ import (
"github.com/stmcginnis/gofish/redfish" "github.com/stmcginnis/gofish/redfish"
) )
// RedfishReceiver configuration: type RedfishReceiverClientConfig struct {
type RedfishReceiver struct {
receiver
config struct {
Type string `json:"type"`
// Maximum number of simultaneous redfish connections (default: 64) // Hostname the redfish service belongs to
Fanout int `json:"fanout,omitempty"` Hostname string
// How often the redfish power metrics should be read and send to the sink (default: 30 s)
IntervalString string `json:"interval,omitempty"`
Interval time.Duration
// Control whether a client verifies the server's certificate
// (default: true == do not verify server's certificate)
HttpInsecure bool `json:"http_insecure,omitempty"`
// Time limit for requests made by this HTTP client (default: 10 s)
HttpTimeoutString string `json:"http_timeout,omitempty"`
HttpTimeout time.Duration
// Globally disable collection of power, processor or thermal metrics
DisablePowerMetrics bool `json:"disable_power_metrics"`
DisableProcessorMetrics bool `json:"disable_processor_metrics"`
DisableThermalMetrics bool `json:"disable_thermal_metrics"`
// Globally excluded metrics
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
// Client config for each redfish service
ClientConfigs []struct {
Hostname *string `json:"hostname"` // Hostname the redfish service belongs to
Username *string `json:"username"` // User name to authenticate with
Password *string `json:"password"` // Password to use for authentication
Endpoint *string `json:"endpoint"` // URL of the redfish service
// Per client disable collection of power,processor or thermal metrics
DisablePowerMetrics bool `json:"disable_power_metrics"`
DisableProcessorMetrics bool `json:"disable_processor_metrics"`
DisableThermalMetrics bool `json:"disable_thermal_metrics"`
// Per client excluded metrics
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
// is metric excluded globally or per client // is metric excluded globally or per client
isExcluded map[string](bool) isExcluded map[string](bool)
doPowerMetric bool
doProcessorMetrics bool
doThermalMetrics bool
gofish gofish.ClientConfig gofish gofish.ClientConfig
} `json:"client_config"` }
// RedfishReceiver configuration:
type RedfishReceiver struct {
receiver
config struct {
fanout int
Interval time.Duration
HttpTimeout time.Duration
// Client config for each redfish service
ClientConfigs []RedfishReceiverClientConfig
} }
done chan bool // channel to finish / stop redfish receiver done chan bool // channel to finish / stop redfish receiver
wg sync.WaitGroup // wait group for redfish receiver wg sync.WaitGroup // wait group for redfish receiver
} }
// Start starts the redfish receiver // readThermalMetrics reads thermal metrics from a redfish device
func (r *RedfishReceiver) Start() { func (r *RedfishReceiver) readThermalMetrics(
cclog.ComponentDebug(r.name, "START") clientConfig *RedfishReceiverClientConfig,
chassis *redfish.Chassis) error {
// Read redfish thermal metrics
readThermalMetrics := func(clientConfigIndex int, chassis *redfish.Chassis) error {
clientConfig := &r.config.ClientConfigs[clientConfigIndex]
// Skip collection off thermal metrics when disabled by config
if r.config.DisableThermalMetrics || clientConfig.DisableThermalMetrics {
return nil
}
// Get thermal information for each chassis // Get thermal information for each chassis
thermal, err := chassis.Thermal() thermal, err := chassis.Thermal()
@ -110,7 +81,7 @@ func (r *RedfishReceiver) Start() {
} }
tags := map[string]string{ tags := map[string]string{
"hostname": *clientConfig.Hostname, "hostname": clientConfig.Hostname,
"type": "node", "type": "node",
// ChassisType shall indicate the physical form factor for the type of chassis // ChassisType shall indicate the physical form factor for the type of chassis
"chassis_typ": string(chassis.ChassisType), "chassis_typ": string(chassis.ChassisType),
@ -168,7 +139,7 @@ func (r *RedfishReceiver) Start() {
} }
tags := map[string]string{ tags := map[string]string{
"hostname": *clientConfig.Hostname, "hostname": clientConfig.Hostname,
"type": "node", "type": "node",
// ChassisType shall indicate the physical form factor for the type of chassis // ChassisType shall indicate the physical form factor for the type of chassis
"chassis_typ": string(chassis.ChassisType), "chassis_typ": string(chassis.ChassisType),
@ -215,16 +186,12 @@ func (r *RedfishReceiver) Start() {
} }
return nil return nil
} }
// Read redfish power metrics // readPowerMetrics reads power metrics from a redfish device
readPowerMetrics := func(clientConfigIndex int, chassis *redfish.Chassis) error { func (r *RedfishReceiver) readPowerMetrics(
clientConfig := &r.config.ClientConfigs[clientConfigIndex] clientConfig *RedfishReceiverClientConfig,
chassis *redfish.Chassis) error {
// Skip collection off thermal metrics when disabled by config
if r.config.DisablePowerMetrics || clientConfig.DisablePowerMetrics {
return nil
}
// Get power information for each chassis // Get power information for each chassis
power, err := chassis.Power() power, err := chassis.Power()
@ -282,7 +249,7 @@ func (r *RedfishReceiver) Start() {
// Set tags // Set tags
tags := map[string]string{ tags := map[string]string{
"hostname": *clientConfig.Hostname, "hostname": clientConfig.Hostname,
"type": "node", "type": "node",
// ChassisType shall indicate the physical form factor for the type of chassis // ChassisType shall indicate the physical form factor for the type of chassis
"chassis_typ": string(chassis.ChassisType), "chassis_typ": string(chassis.ChassisType),
@ -337,17 +304,13 @@ func (r *RedfishReceiver) Start() {
} }
return nil return nil
} }
// Read redfish processor metrics // readProcessorMetrics reads processor metrics from a redfish device
// See: https://redfish.dmtf.org/schemas/v1/ProcessorMetrics.json // See: https://redfish.dmtf.org/schemas/v1/ProcessorMetrics.json
readProcessorMetrics := func(clientConfigIndex int, processor *redfish.Processor) error { func (r *RedfishReceiver) readProcessorMetrics(
clientConfig := &r.config.ClientConfigs[clientConfigIndex] clientConfig *RedfishReceiverClientConfig,
processor *redfish.Processor) error {
// Skip collection off processor metrics when disabled by config
if r.config.DisableProcessorMetrics || clientConfig.DisableProcessorMetrics {
return nil
}
timestamp := time.Now() timestamp := time.Now()
@ -377,7 +340,7 @@ func (r *RedfishReceiver) Start() {
// Set tags // Set tags
tags := map[string]string{ tags := map[string]string{
"hostname": *clientConfig.Hostname, "hostname": clientConfig.Hostname,
"type": "socket", "type": "socket",
// ProcessorType shall contain the string which identifies the type of processor contained in this Socket // ProcessorType shall contain the string which identifies the type of processor contained in this Socket
"processor_typ": string(processor.ProcessorType), "processor_typ": string(processor.ProcessorType),
@ -433,13 +396,11 @@ func (r *RedfishReceiver) Start() {
} }
} }
return nil return nil
} }
// readMetrics reads redfish temperature and power metrics from the endpoint configured in conf // readMetrics reads redfish thermal, power and processor metrics from the redfish device
readMetrics := func(clientConfigIndex int) error { // configured in clientConfig
func (r *RedfishReceiver) readMetrics(clientConfig *RedfishReceiverClientConfig) error {
// access client config
clientConfig := &r.config.ClientConfigs[clientConfigIndex]
// Connect to redfish service // Connect to redfish service
c, err := gofish.Connect(clientConfig.gofish) c, err := gofish.Connect(clientConfig.gofish)
@ -464,30 +425,51 @@ func (r *RedfishReceiver) Start() {
} }
// Get all chassis managed by this service // Get all chassis managed by this service
chassis_list, err := c.Service.Chassis() isChassisListRequired :=
clientConfig.doThermalMetrics ||
clientConfig.doPowerMetric
var chassisList []*redfish.Chassis
if isChassisListRequired {
chassisList, err = c.Service.Chassis()
if err != nil { if err != nil {
return fmt.Errorf("readMetrics: c.Service.Chassis() failed: %v", err) return fmt.Errorf("readMetrics: c.Service.Chassis() failed: %v", err)
} }
for _, chassis := range chassis_list {
err := readThermalMetrics(clientConfigIndex, chassis)
if err != nil {
return err
} }
err = readPowerMetrics(clientConfigIndex, chassis) // Get all computer systems managed by this service
if err != nil { isComputerSystemListRequired := clientConfig.doProcessorMetrics
return err var computerSystemList []*redfish.ComputerSystem
} if isComputerSystemListRequired {
} computerSystemList, err = c.Service.Systems()
// loop for all computer systems
systems, err := c.Service.Systems()
if err != nil { if err != nil {
return fmt.Errorf("readMetrics: c.Service.Systems() failed: %v", err) return fmt.Errorf("readMetrics: c.Service.Systems() failed: %v", err)
} }
for _, system := range systems { }
// read thermal metrics
if clientConfig.doThermalMetrics {
for _, chassis := range chassisList {
err := r.readThermalMetrics(clientConfig, chassis)
if err != nil {
return err
}
}
}
// read power metrics
if clientConfig.doPowerMetric {
for _, chassis := range chassisList {
err = r.readPowerMetrics(clientConfig, chassis)
if err != nil {
return err
}
}
}
// read processor metrics
if clientConfig.doProcessorMetrics {
// loop for all computer systems
for _, system := range computerSystemList {
// loop for all processors // loop for all processors
processors, err := system.Processors() processors, err := system.Processors()
@ -495,32 +477,27 @@ func (r *RedfishReceiver) Start() {
return fmt.Errorf("readMetrics: system.Processors() failed: %v", err) return fmt.Errorf("readMetrics: system.Processors() failed: %v", err)
} }
for _, processor := range processors { for _, processor := range processors {
err := readProcessorMetrics(clientConfigIndex, processor) err := r.readProcessorMetrics(clientConfig, processor)
if err != nil { if err != nil {
return err return err
} }
} }
} }
}
return nil return nil
} }
// doReadMetrics read power and temperature metrics for all configure redfish services. // doReadMetrics reads metrics from all configure redfish devices.
// To compensate latencies of the Redfish services a fanout is used. // To compensate latencies of the Redfish devices a fanout is used.
doReadMetric := func() { func (r *RedfishReceiver) doReadMetric() {
// Compute fanout to use
realFanout := r.config.Fanout
if len(r.config.ClientConfigs) < realFanout {
realFanout = len(r.config.ClientConfigs)
}
// Create wait group and input channel for workers // Create wait group and input channel for workers
var workerWaitGroup sync.WaitGroup var workerWaitGroup sync.WaitGroup
workerInput := make(chan int, realFanout) workerInput := make(chan *RedfishReceiverClientConfig, r.config.fanout)
// Create worker go routines // Create worker go routines
for i := 0; i < realFanout; i++ { for i := 0; i < r.config.fanout; i++ {
// Increment worker wait group counter // Increment worker wait group counter
workerWaitGroup.Add(1) workerWaitGroup.Add(1)
go func() { go func() {
@ -528,8 +505,8 @@ func (r *RedfishReceiver) Start() {
defer workerWaitGroup.Done() defer workerWaitGroup.Done()
// Read power metrics for each client config // Read power metrics for each client config
for clientConfigIndex := range workerInput { for clientConfig := range workerInput {
err := readMetrics(clientConfigIndex) err := r.readMetrics(clientConfig)
if err != nil { if err != nil {
cclog.ComponentError(r.name, err) cclog.ComponentError(r.name, err)
} }
@ -539,9 +516,10 @@ func (r *RedfishReceiver) Start() {
// Distribute client configs to workers // Distribute client configs to workers
for i := range r.config.ClientConfigs { for i := range r.config.ClientConfigs {
// Check done channel status // Check done channel status
select { select {
case workerInput <- i: case workerInput <- &r.config.ClientConfigs[i]:
case <-r.done: case <-r.done:
// process done event // process done event
// Stop workers, clear channel and wait for all workers to finish // Stop workers, clear channel and wait for all workers to finish
@ -556,7 +534,11 @@ func (r *RedfishReceiver) Start() {
// Stop workers and wait for all workers to finish // Stop workers and wait for all workers to finish
close(workerInput) close(workerInput)
workerWaitGroup.Wait() workerWaitGroup.Wait()
} }
// Start starts the redfish receiver
func (r *RedfishReceiver) Start() {
cclog.ComponentDebug(r.name, "START")
// Start redfish receiver // Start redfish receiver
r.wg.Add(1) r.wg.Add(1)
@ -568,10 +550,15 @@ func (r *RedfishReceiver) Start() {
defer ticker.Stop() defer ticker.Stop()
for { for {
doReadMetric() r.doReadMetric()
select { select {
case <-ticker.C: case tickerTime := <-ticker.C:
// Check if we missed the ticker event
if since := time.Since(tickerTime); since > 5*time.Second {
cclog.ComponentInfo(r.name, "Missed ticker event for more then", since)
}
// process ticker event -> continue // process ticker event -> continue
continue continue
case <-r.done: case <-r.done:
@ -584,7 +571,7 @@ func (r *RedfishReceiver) Start() {
cclog.ComponentDebug(r.name, "STARTED") cclog.ComponentDebug(r.name, "STARTED")
} }
// Close redfish receiver // Close closes the redfish receiver
func (r *RedfishReceiver) Close() { func (r *RedfishReceiver) Close() {
cclog.ComponentDebug(r.name, "CLOSE") cclog.ComponentDebug(r.name, "CLOSE")
@ -595,27 +582,66 @@ func (r *RedfishReceiver) Close() {
cclog.ComponentDebug(r.name, "DONE") cclog.ComponentDebug(r.name, "DONE")
} }
// New function to create a new instance of the receiver // NewRedfishReceiver creates a new instance of the redfish receiver
// Initialize the receiver by giving it a name and reading in the config JSON // Initialize the receiver by giving it a name and reading in the config JSON
func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) { func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
r := new(RedfishReceiver) r := new(RedfishReceiver)
// Config options from config file
configJSON := struct {
Type string `json:"type"`
// Maximum number of simultaneous redfish connections (default: 64)
Fanout int `json:"fanout,omitempty"`
// How often the redfish power metrics should be read and send to the sink (default: 30 s)
IntervalString string `json:"interval,omitempty"`
// Control whether a client verifies the server's certificate
// (default: true == do not verify server's certificate)
HttpInsecure bool `json:"http_insecure,omitempty"`
// Time limit for requests made by this HTTP client (default: 10 s)
HttpTimeoutString string `json:"http_timeout,omitempty"`
// Globally disable collection of power, processor or thermal metrics
DisablePowerMetrics bool `json:"disable_power_metrics"`
DisableProcessorMetrics bool `json:"disable_processor_metrics"`
DisableThermalMetrics bool `json:"disable_thermal_metrics"`
// Globally excluded metrics
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
ClientConfigs []struct {
Hostname *string `json:"hostname"` // Hostname the redfish service belongs to
Username *string `json:"username"` // User name to authenticate with
Password *string `json:"password"` // Password to use for authentication
Endpoint *string `json:"endpoint"` // URL of the redfish service
// Per client disable collection of power,processor or thermal metrics
DisablePowerMetrics bool `json:"disable_power_metrics"`
DisableProcessorMetrics bool `json:"disable_processor_metrics"`
DisableThermalMetrics bool `json:"disable_thermal_metrics"`
// Per client excluded metrics
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
} `json:"client_config"`
}{
// Set defaults values
// Allow overwriting these defaults by reading config JSON
Fanout: 64,
IntervalString: "30s",
HttpTimeoutString: "10s",
HttpInsecure: true,
}
// Set name // Set name
r.name = fmt.Sprintf("RedfishReceiver(%s)", name) r.name = fmt.Sprintf("RedfishReceiver(%s)", name)
// Create done channel // Create done channel
r.done = make(chan bool) r.done = make(chan bool)
// Set defaults in r.config
// Allow overwriting these defaults by reading config JSON
r.config.Fanout = 64
r.config.IntervalString = "30s"
r.config.HttpTimeoutString = "10s"
r.config.HttpInsecure = true
// Read the redfish receiver specific JSON config // Read the redfish receiver specific JSON config
if len(config) > 0 { if len(config) > 0 {
err := json.Unmarshal(config, &r.config) err := json.Unmarshal(config, &configJSON)
if err != nil { if err != nil {
cclog.ComponentError(r.name, "Error reading config:", err.Error()) cclog.ComponentError(r.name, "Error reading config:", err.Error())
return nil, err return nil, err
@ -624,11 +650,11 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
// interval duration // interval duration
var err error var err error
r.config.Interval, err = time.ParseDuration(r.config.IntervalString) r.config.Interval, err = time.ParseDuration(configJSON.IntervalString)
if err != nil { if err != nil {
err := fmt.Errorf( err := fmt.Errorf(
"Failed to parse duration string interval='%s': %w", "Failed to parse duration string interval='%s': %w",
r.config.IntervalString, configJSON.IntervalString,
err, err,
) )
cclog.Error(r.name, err) cclog.Error(r.name, err)
@ -636,11 +662,11 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
} }
// HTTP timeout duration // HTTP timeout duration
r.config.HttpTimeout, err = time.ParseDuration(r.config.HttpTimeoutString) r.config.HttpTimeout, err = time.ParseDuration(configJSON.HttpTimeoutString)
if err != nil { if err != nil {
err := fmt.Errorf( err := fmt.Errorf(
"Failed to parse duration string http_timeout='%s': %w", "Failed to parse duration string http_timeout='%s': %w",
r.config.HttpTimeoutString, configJSON.HttpTimeoutString,
err, err,
) )
cclog.Error(r.name, err) cclog.Error(r.name, err)
@ -650,54 +676,77 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
// Create new http client // Create new http client
customTransport := http.DefaultTransport.(*http.Transport).Clone() customTransport := http.DefaultTransport.(*http.Transport).Clone()
customTransport.TLSClientConfig = &tls.Config{ customTransport.TLSClientConfig = &tls.Config{
InsecureSkipVerify: r.config.HttpInsecure, InsecureSkipVerify: configJSON.HttpInsecure,
} }
httpClient := &http.Client{ httpClient := &http.Client{
Timeout: r.config.HttpTimeout, Timeout: r.config.HttpTimeout,
Transport: customTransport, Transport: customTransport,
} }
// Compute fanout to use
numClients := len(configJSON.ClientConfigs)
r.config.fanout = configJSON.Fanout
if numClients < r.config.fanout {
r.config.fanout = numClients
}
// Initialize derived configuration
r.config.ClientConfigs = make([]RedfishReceiverClientConfig, numClients)
// Create gofish client config // Create gofish client config
for i := range r.config.ClientConfigs { for i := 0; i < numClients; i++ {
clientConfig := &r.config.ClientConfigs[i] clientConfig := &r.config.ClientConfigs[i]
clientConfigJSON := &configJSON.ClientConfigs[i]
gofishConfig := &clientConfig.gofish gofishConfig := &clientConfig.gofish
if clientConfig.Hostname == nil { if clientConfigJSON.Hostname == nil {
err := fmt.Errorf("client config number %v requires hostname", i) err := fmt.Errorf("client config number %v requires hostname", i)
cclog.ComponentError(r.name, err) cclog.ComponentError(r.name, err)
return nil, err return nil, err
} }
clientConfig.Hostname = *clientConfigJSON.Hostname
if clientConfig.Endpoint == nil { if clientConfigJSON.Endpoint == nil {
err := fmt.Errorf("client config number %v requires endpoint", i) err := fmt.Errorf("client config number %v requires endpoint", i)
cclog.ComponentError(r.name, err) cclog.ComponentError(r.name, err)
return nil, err return nil, err
} }
gofishConfig.Endpoint = *clientConfig.Endpoint gofishConfig.Endpoint = *clientConfigJSON.Endpoint
if clientConfig.Username == nil { if clientConfigJSON.Username == nil {
err := fmt.Errorf("client config number %v requires username", i) err := fmt.Errorf("client config number %v requires username", i)
cclog.ComponentError(r.name, err) cclog.ComponentError(r.name, err)
return nil, err return nil, err
} }
gofishConfig.Username = *clientConfig.Username gofishConfig.Username = *clientConfigJSON.Username
if clientConfig.Password == nil { if clientConfigJSON.Password == nil {
err := fmt.Errorf("client config number %v requires password", i) err := fmt.Errorf("client config number %v requires password", i)
cclog.ComponentError(r.name, err) cclog.ComponentError(r.name, err)
return nil, err return nil, err
} }
gofishConfig.Password = *clientConfig.Password gofishConfig.Password = *clientConfigJSON.Password
// Reuse existing http client // Reuse existing http client
gofishConfig.HTTPClient = httpClient gofishConfig.HTTPClient = httpClient
// Which metrics should be collected
clientConfig.doPowerMetric =
!(configJSON.DisablePowerMetrics ||
clientConfigJSON.DisablePowerMetrics)
clientConfig.doProcessorMetrics =
!(configJSON.DisableProcessorMetrics ||
clientConfigJSON.DisableProcessorMetrics)
clientConfig.doThermalMetrics =
!(configJSON.DisableThermalMetrics ||
clientConfigJSON.DisableThermalMetrics)
// Is metrics excluded globally or per client // Is metrics excluded globally or per client
clientConfig.isExcluded = make(map[string]bool) clientConfig.isExcluded = make(map[string]bool)
for _, key := range clientConfig.ExcludeMetrics { for _, key := range clientConfigJSON.ExcludeMetrics {
clientConfig.isExcluded[key] = true clientConfig.isExcluded[key] = true
} }
for _, key := range r.config.ExcludeMetrics { for _, key := range configJSON.ExcludeMetrics {
clientConfig.isExcluded[key] = true clientConfig.isExcluded[key] = true
} }
} }