mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2026-04-05 15:37:29 +02:00
Merge develop branch into main (#96)
* InfiniBandCollector: Scale raw readings from octets to bytes * Fix clock frequency coming from LikwidCollector and update docs * Build DEB package for Ubuntu 20.04 for releases * Fix memstat collector with numa_stats option * Remove useless prints from MemstatCollector * Replace ioutils with os and io (#87) * Use lower case for error strings in RocmSmiCollector * move maybe-usable-by-other-cc-components to pkg. Fix all files to use the new paths (#88) * Add collector for monitoring the execution of cc-metric-collector itself (#81) * Add collector to monitor execution of cc-metric-collector itself * Register SelfCollector * Fix import paths for moved packages * Check if at least one CPU with frequency information was detected * Correct type: /proc/stats -> /proc/stat * Update README.md * Run ipmitool asynchron. Improved error handling. * Corrected some typos * Add running average power limit (RAPL) metric collector * Add running average power limit (RAPL) metric collector * Do not mess up with the orignal configuration * * Corrected json config in numastatsMetric.md * Added some debug output to numastatsMetric.go * Fixed computing number of physical packages for non continous physical package IDs (e.g. on Ampere Altra Q80-30) * Fix kernel panic for receiver config with missing receiver type * Add receiver to gather remote IPMI sensor metrics * Added config option to add ipmi-sensors command line options * Add documentaion for IPMI receiver * Update to latest version of included go modules * Add go.mod to App dependency * Try to use common metric tags across hardware vendors * Add IPMI metric: current * remove prefix enumeration like 01-... * Add IPMI receiver example configuration to receivers.json * Minimal formating changes * Add hostlist package * Added tests for hostlist Expand() * Use package hostlist to expand a host list * Use package hostlist to expand a host list * Some servers return "ConsumedPowerWatt":65535 instead of "ConsumedPowerWatt":null * Updated to latest package versions * Do not allow unknown fields in JSON configuration file * Add workflow to customize packages to docs * NFS I/O Stats Collector (#91) * Initial version * Delete values for vanished mount points and comments * Fix for Likwid collector (#95) * Run LIKWID in separate thread and check metric type * Change LIKWID collector documentation to use 'type' instead of 'scope' * Re-initialize LIKWID after one read is missing due to lock toggle * Register cc-metric-collector at Zenodo (#93) * Add initial version of Zenodo project file * Orcid ID added * Update .zenodo.json Co-authored-by: Holger Obermaier <holger.obermaier@kit.edu> * Update ipmiMetric.go Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com> Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu>
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
package receivers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -12,6 +14,7 @@ import (
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
"github.com/ClusterCockpit/cc-metric-collector/pkg/hostlist"
|
||||
|
||||
// See: https://pkg.go.dev/github.com/stmcginnis/gofish
|
||||
"github.com/stmcginnis/gofish"
|
||||
@@ -346,9 +349,17 @@ func (r *RedfishReceiver) readProcessorMetrics(
|
||||
// This property shall contain the temperature, in Celsius, of the processor.
|
||||
TemperatureCelsius float32 `json:"TemperatureCelsius"`
|
||||
}
|
||||
err = json.NewDecoder(resp.Body).Decode(&processorMetrics)
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to decode JSON for processor metrics: %+w", err)
|
||||
return fmt.Errorf("unable to read JSON for processor metrics: %+w", err)
|
||||
}
|
||||
err = json.Unmarshal(body, &processorMetrics)
|
||||
if err != nil {
|
||||
return fmt.Errorf(
|
||||
"unable to unmarshal JSON='%s' for processor metrics: %+w",
|
||||
string(body),
|
||||
err,
|
||||
)
|
||||
}
|
||||
processorMetrics.SetClient(processor.Client)
|
||||
|
||||
@@ -380,7 +391,9 @@ func (r *RedfishReceiver) readProcessorMetrics(
|
||||
|
||||
namePower := "consumed_power"
|
||||
|
||||
if !clientConfig.isExcluded[namePower] {
|
||||
if !clientConfig.isExcluded[namePower] &&
|
||||
// Some servers return "ConsumedPowerWatt":65535 instead of "ConsumedPowerWatt":null
|
||||
processorMetrics.ConsumedPowerWatt != 65535 {
|
||||
y, err := lp.New(namePower, tags, metaPower,
|
||||
map[string]interface{}{
|
||||
"value": processorMetrics.ConsumedPowerWatt,
|
||||
@@ -630,10 +643,10 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
|
||||
ClientConfigs []struct {
|
||||
HostList []string `json:"host_list"` // List of hosts with the same client configuration
|
||||
Username *string `json:"username"` // User name to authenticate with
|
||||
Password *string `json:"password"` // Password to use for authentication
|
||||
Endpoint *string `json:"endpoint"` // URL of the redfish service
|
||||
HostList string `json:"host_list"` // List of hosts with the same client configuration
|
||||
Username *string `json:"username"` // User name to authenticate with
|
||||
Password *string `json:"password"` // Password to use for authentication
|
||||
Endpoint *string `json:"endpoint"` // URL of the redfish service
|
||||
|
||||
// Per client disable collection of power,processor or thermal metrics
|
||||
DisablePowerMetrics bool `json:"disable_power_metrics"`
|
||||
@@ -660,8 +673,9 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
|
||||
|
||||
// Read the redfish receiver specific JSON config
|
||||
if len(config) > 0 {
|
||||
err := json.Unmarshal(config, &configJSON)
|
||||
if err != nil {
|
||||
d := json.NewDecoder(bytes.NewReader(config))
|
||||
d.DisallowUnknownFields()
|
||||
if err := d.Decode(&configJSON); err != nil {
|
||||
cclog.ComponentError(r.name, "Error reading config:", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
@@ -763,7 +777,14 @@ func NewRedfishReceiver(name string, config json.RawMessage) (Receiver, error) {
|
||||
isExcluded[key] = true
|
||||
}
|
||||
|
||||
for _, host := range clientConfigJSON.HostList {
|
||||
hostList, err := hostlist.Expand(clientConfigJSON.HostList)
|
||||
if err != nil {
|
||||
err := fmt.Errorf("client config number %d failed to parse host list %s: %v",
|
||||
i, clientConfigJSON.HostList, err)
|
||||
cclog.ComponentError(r.name, err)
|
||||
return nil, err
|
||||
}
|
||||
for _, host := range hostList {
|
||||
|
||||
// Endpoint of the redfish service
|
||||
endpoint := strings.Replace(endpoint_pattern, "%h", host, -1)
|
||||
|
||||
Reference in New Issue
Block a user