mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-07-19 11:21:41 +02:00
Compare commits
1 Commits
rapl_colle
...
perf_colle
Author | SHA1 | Date | |
---|---|---|---|
|
1edddc3dc2 |
@@ -41,6 +41,8 @@ var AvailableCollectors = map[string]MetricCollector{
|
||||
"self": new(SelfCollector),
|
||||
"schedstat": new(SchedstatCollector),
|
||||
"nfsiostat": new(NfsIOStatCollector),
|
||||
"perf_event": new(PerfEventCollector),
|
||||
"perf_cmd": new(PerfCmdCollector),
|
||||
}
|
||||
|
||||
// Metric collector manager data structure
|
||||
|
@@ -8,21 +8,23 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// "log"
|
||||
|
||||
const MOUNTFILE = `/proc/self/mounts`
|
||||
|
||||
type DiskstatCollectorConfig struct {
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
ExcludeMounts []string `json:"exclude_mounts,omitempty"`
|
||||
}
|
||||
|
||||
type DiskstatCollector struct {
|
||||
metricCollector
|
||||
config DiskstatCollectorConfig
|
||||
allowedMetrics map[string]bool
|
||||
//matches map[string]int
|
||||
config IOstatCollectorConfig
|
||||
//devices map[string]IOstatCollectorEntry
|
||||
}
|
||||
|
||||
func (m *DiskstatCollector) Init(config json.RawMessage) error {
|
||||
@@ -31,21 +33,12 @@ func (m *DiskstatCollector) Init(config json.RawMessage) error {
|
||||
m.meta = map[string]string{"source": m.name, "group": "Disk"}
|
||||
m.setup()
|
||||
if len(config) > 0 {
|
||||
if err := json.Unmarshal(config, &m.config); err != nil {
|
||||
err := json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.allowedMetrics = map[string]bool{
|
||||
"disk_total": true,
|
||||
"disk_free": true,
|
||||
"part_max_used": true,
|
||||
}
|
||||
for _, excl := range m.config.ExcludeMetrics {
|
||||
if _, ok := m.allowedMetrics[excl]; ok {
|
||||
m.allowedMetrics[excl] = false
|
||||
}
|
||||
}
|
||||
file, err := os.Open(MOUNTFILE)
|
||||
file, err := os.Open(string(MOUNTFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
@@ -60,7 +53,7 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMessag
|
||||
return
|
||||
}
|
||||
|
||||
file, err := os.Open(MOUNTFILE)
|
||||
file, err := os.Open(string(MOUNTFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return
|
||||
@@ -69,7 +62,6 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMessag
|
||||
|
||||
part_max_used := uint64(0)
|
||||
scanner := bufio.NewScanner(file)
|
||||
mountLoop:
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if len(line) == 0 {
|
||||
@@ -85,17 +77,13 @@ mountLoop:
|
||||
if strings.Contains(linefields[1], "boot") {
|
||||
continue
|
||||
}
|
||||
|
||||
mountPath := strings.Replace(linefields[1], `\040`, " ", -1)
|
||||
|
||||
for _, excl := range m.config.ExcludeMounts {
|
||||
if strings.Contains(mountPath, excl) {
|
||||
continue mountLoop
|
||||
}
|
||||
path := strings.Replace(linefields[1], `\040`, " ", -1)
|
||||
stat := syscall.Statfs_t{
|
||||
Blocks: 0,
|
||||
Bsize: 0,
|
||||
Bfree: 0,
|
||||
}
|
||||
|
||||
stat := syscall.Statfs_t{}
|
||||
err := syscall.Statfs(mountPath, &stat)
|
||||
err := syscall.Statfs(path, &stat)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
@@ -104,20 +92,16 @@ mountLoop:
|
||||
}
|
||||
tags := map[string]string{"type": "node", "device": linefields[0]}
|
||||
total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000000000)
|
||||
if m.allowedMetrics["disk_total"] {
|
||||
y, err := lp.NewMessage("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "GBytes")
|
||||
output <- y
|
||||
}
|
||||
y, err := lp.NewMessage("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "GBytes")
|
||||
output <- y
|
||||
}
|
||||
free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000000000)
|
||||
if m.allowedMetrics["disk_free"] {
|
||||
y, err := lp.NewMessage("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "GBytes")
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.NewMessage("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "GBytes")
|
||||
output <- y
|
||||
}
|
||||
if total > 0 {
|
||||
perc := (100 * (total - free)) / total
|
||||
@@ -126,12 +110,10 @@ mountLoop:
|
||||
}
|
||||
}
|
||||
}
|
||||
if m.allowedMetrics["part_max_used"] {
|
||||
y, err := lp.NewMessage("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": int(part_max_used)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "percent")
|
||||
output <- y
|
||||
}
|
||||
y, err := lp.NewMessage("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": int(part_max_used)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "percent")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -6,13 +6,10 @@
|
||||
"exclude_metrics": [
|
||||
"disk_total"
|
||||
],
|
||||
"exclude_mounts": [
|
||||
"slurm-tmpfs"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `diskstat` collector reads data from `/proc/self/mounts` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink. Additionally, any mount point containing one of the strings specified in `exclude_mounts` will be skipped during metric collection.
|
||||
The `diskstat` collector reads data from `/proc/self/mounts` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
Metrics per device (with `device` tag):
|
||||
* `disk_total` (unit `GBytes`)
|
||||
|
@@ -4,8 +4,8 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"encoding/json"
|
||||
|
@@ -2,24 +2,24 @@ package collectors
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
|
||||
// "log"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
)
|
||||
|
||||
// Konstante für den Pfad zu /proc/diskstats
|
||||
const IOSTATFILE = `/proc/diskstats`
|
||||
const IOSTAT_SYSFSPATH = `/sys/block`
|
||||
|
||||
type IOstatCollectorConfig struct {
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
// Neues Feld zum Ausschließen von Devices per JSON-Konfiguration
|
||||
ExcludeDevices []string `json:"exclude_devices,omitempty"`
|
||||
}
|
||||
|
||||
type IOstatCollectorEntry struct {
|
||||
@@ -76,7 +76,7 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||
if len(m.matches) == 0 {
|
||||
return errors.New("no metrics to collect")
|
||||
}
|
||||
file, err := os.Open(IOSTATFILE)
|
||||
file, err := os.Open(string(IOSTATFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return err
|
||||
@@ -87,24 +87,17 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
linefields := strings.Fields(line)
|
||||
if len(linefields) < 3 {
|
||||
continue
|
||||
}
|
||||
device := linefields[2]
|
||||
|
||||
if strings.Contains(device, "loop") {
|
||||
continue
|
||||
}
|
||||
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
|
||||
continue
|
||||
}
|
||||
values := make(map[string]int64)
|
||||
for m := range m.matches {
|
||||
values[m] = 0
|
||||
}
|
||||
m.devices[device] = IOstatCollectorEntry{
|
||||
tags: map[string]string{
|
||||
"device": device,
|
||||
"device": linefields[2],
|
||||
"type": "node",
|
||||
},
|
||||
lastValues: values,
|
||||
@@ -119,7 +112,7 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
return
|
||||
}
|
||||
|
||||
file, err := os.Open(IOSTATFILE)
|
||||
file, err := os.Open(string(IOSTATFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return
|
||||
@@ -133,16 +126,10 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
||||
continue
|
||||
}
|
||||
linefields := strings.Fields(line)
|
||||
if len(linefields) < 3 {
|
||||
continue
|
||||
}
|
||||
device := linefields[2]
|
||||
if strings.Contains(device, "loop") {
|
||||
continue
|
||||
}
|
||||
if _, skip := stringArrayContains(m.config.ExcludeDevices, device); skip {
|
||||
continue
|
||||
}
|
||||
if _, ok := m.devices[device]; !ok {
|
||||
continue
|
||||
}
|
||||
|
@@ -4,17 +4,12 @@
|
||||
```json
|
||||
"iostat": {
|
||||
"exclude_metrics": [
|
||||
"io_read_ms"
|
||||
"read_ms"
|
||||
],
|
||||
"exclude_devices": [
|
||||
"nvme0n1p1",
|
||||
"nvme0n1p2",
|
||||
"md127"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric or device is not required, it can be excluded from forwarding it to the sink.
|
||||
The `iostat` collector reads data from `/proc/diskstats` and outputs a handful **node** metrics. If a metric is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
Metrics:
|
||||
* `io_reads`
|
||||
|
@@ -12,8 +12,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
const MEMSTATFILE = "/proc/meminfo"
|
||||
|
@@ -9,17 +9,16 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
const NETSTATFILE = "/proc/net/dev"
|
||||
|
||||
type NetstatCollectorConfig struct {
|
||||
IncludeDevices []string `json:"include_devices"`
|
||||
SendAbsoluteValues bool `json:"send_abs_values"`
|
||||
SendDerivedValues bool `json:"send_derived_values"`
|
||||
InterfaceAliases map[string][]string `json:"interface_aliases,omitempty"`
|
||||
IncludeDevices []string `json:"include_devices"`
|
||||
SendAbsoluteValues bool `json:"send_abs_values"`
|
||||
SendDerivedValues bool `json:"send_derived_values"`
|
||||
}
|
||||
|
||||
type NetstatCollectorMetric struct {
|
||||
@@ -33,26 +32,9 @@ type NetstatCollectorMetric struct {
|
||||
|
||||
type NetstatCollector struct {
|
||||
metricCollector
|
||||
config NetstatCollectorConfig
|
||||
aliasToCanonical map[string]string
|
||||
matches map[string][]NetstatCollectorMetric
|
||||
lastTimestamp time.Time
|
||||
}
|
||||
|
||||
func (m *NetstatCollector) buildAliasMapping() {
|
||||
m.aliasToCanonical = make(map[string]string)
|
||||
for canon, aliases := range m.config.InterfaceAliases {
|
||||
for _, alias := range aliases {
|
||||
m.aliasToCanonical[alias] = canon
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getCanonicalName(raw string, aliasToCanonical map[string]string) string {
|
||||
if canon, ok := aliasToCanonical[raw]; ok {
|
||||
return canon
|
||||
}
|
||||
return raw
|
||||
config NetstatCollectorConfig
|
||||
matches map[string][]NetstatCollectorMetric
|
||||
lastTimestamp time.Time
|
||||
}
|
||||
|
||||
func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
@@ -95,8 +77,6 @@ func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
}
|
||||
|
||||
m.buildAliasMapping()
|
||||
|
||||
// Check access to net statistic file
|
||||
file, err := os.Open(NETSTATFILE)
|
||||
if err != nil {
|
||||
@@ -117,20 +97,18 @@ func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
// Split line into fields
|
||||
f := strings.Fields(l)
|
||||
|
||||
// Get raw and canonical names
|
||||
raw := strings.Trim(f[0], ": ")
|
||||
canonical := getCanonicalName(raw, m.aliasToCanonical)
|
||||
// Get net device entry
|
||||
dev := strings.Trim(f[0], ": ")
|
||||
|
||||
// Check if device is a included device
|
||||
if _, ok := stringArrayContains(m.config.IncludeDevices, canonical); ok {
|
||||
// Tag will contain original device name (raw).
|
||||
tags := map[string]string{"stype": "network", "stype-id": raw, "type": "node"}
|
||||
if _, ok := stringArrayContains(m.config.IncludeDevices, dev); ok {
|
||||
tags := map[string]string{"stype": "network", "stype-id": dev, "type": "node"}
|
||||
meta_unit_byte := map[string]string{"source": m.name, "group": "Network", "unit": "bytes"}
|
||||
meta_unit_byte_per_sec := map[string]string{"source": m.name, "group": "Network", "unit": "bytes/sec"}
|
||||
meta_unit_pkts := map[string]string{"source": m.name, "group": "Network", "unit": "packets"}
|
||||
meta_unit_pkts_per_sec := map[string]string{"source": m.name, "group": "Network", "unit": "packets/sec"}
|
||||
|
||||
m.matches[canonical] = []NetstatCollectorMetric{
|
||||
m.matches[dev] = []NetstatCollectorMetric{
|
||||
{
|
||||
name: "net_bytes_in",
|
||||
index: fieldReceiveBytes,
|
||||
@@ -165,6 +143,7 @@ func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if len(m.matches) == 0 {
|
||||
@@ -185,7 +164,7 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMessage
|
||||
// Save current timestamp
|
||||
m.lastTimestamp = now
|
||||
|
||||
file, err := os.Open(NETSTATFILE)
|
||||
file, err := os.Open(string(NETSTATFILE))
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, err.Error())
|
||||
return
|
||||
@@ -204,12 +183,11 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMessage
|
||||
// Split line into fields
|
||||
f := strings.Fields(l)
|
||||
|
||||
// Get raw and canonical names
|
||||
raw := strings.Trim(f[0], ":")
|
||||
canonical := getCanonicalName(raw, m.aliasToCanonical)
|
||||
// Get net device entry
|
||||
dev := strings.Trim(f[0], ":")
|
||||
|
||||
// Check if device is a included device
|
||||
if devmetrics, ok := m.matches[canonical]; ok {
|
||||
if devmetrics, ok := m.matches[dev]; ok {
|
||||
for i := range devmetrics {
|
||||
metric := &devmetrics[i]
|
||||
|
||||
|
@@ -4,19 +4,14 @@
|
||||
```json
|
||||
"netstat": {
|
||||
"include_devices": [
|
||||
"eth0",
|
||||
"eno1"
|
||||
"eth0"
|
||||
],
|
||||
"send_abs_values": true,
|
||||
"send_derived_values": true,
|
||||
"interface_aliases": {
|
||||
"eno1": ["eno1np0", "eno1_alt"],
|
||||
"eth0": ["eth0_alias"]
|
||||
}
|
||||
"send_abs_values" : true,
|
||||
"send_derived_values" : true
|
||||
}
|
||||
```
|
||||
|
||||
The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. With the `include_devices` list you can specify which network devices should be measured. **Note**: Most other collectors use an _exclude_ list instead of an include list. Optionally, you can define an interface_aliases mapping. For each canonical device (as listed in include_devices), you may provide an array of aliases that may be reported by the system. When an alias is detected, it is preferred for matching, while the output tag stype-id always shows the actual system-reported name.
|
||||
The `netstat` collector reads data from `/proc/net/dev` and outputs a handful **node** metrics. With the `include_devices` list you can specify which network devices should be measured. **Note**: Most other collectors use an _exclude_ list instead of an include list.
|
||||
|
||||
Metrics:
|
||||
* `net_bytes_in` (`unit=bytes`)
|
||||
@@ -28,4 +23,5 @@ Metrics:
|
||||
* `net_pkts_in_bw` (`unit=packets/sec` if `send_derived_values == true`)
|
||||
* `net_pkts_out_bw` (`unit=packets/sec` if `send_derived_values == true`)
|
||||
|
||||
The device name is added as tag `stype=network,stype-id=<device>`.
|
||||
The device name is added as tag `stype=network,stype-id=<device>`.
|
||||
|
||||
|
@@ -9,8 +9,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// These are the fields we read from the JSON configuration
|
||||
@@ -18,20 +18,17 @@ type NfsIOStatCollectorConfig struct {
|
||||
ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
|
||||
ExcludeFilesystem []string `json:"exclude_filesystem,omitempty"`
|
||||
UseServerAddressAsSType bool `json:"use_server_as_stype,omitempty"`
|
||||
SendAbsoluteValues bool `json:"send_abs_values"`
|
||||
SendDerivedValues bool `json:"send_derived_values"`
|
||||
}
|
||||
|
||||
// This contains all variables we need during execution and the variables
|
||||
// defined by metricCollector (name, init, ...)
|
||||
type NfsIOStatCollector struct {
|
||||
metricCollector
|
||||
config NfsIOStatCollectorConfig // the configuration structure
|
||||
meta map[string]string // default meta information
|
||||
tags map[string]string // default tags
|
||||
data map[string]map[string]int64 // data storage for difference calculation
|
||||
key string // which device info should be used as subtype ID? 'server' or 'mntpoint'
|
||||
lastTimestamp time.Time
|
||||
config NfsIOStatCollectorConfig // the configuration structure
|
||||
meta map[string]string // default meta information
|
||||
tags map[string]string // default tags
|
||||
data map[string]map[string]int64 // data storage for difference calculation
|
||||
key string // which device info should be used as subtype ID? 'server' or 'mntpoint', see NfsIOStatCollectorConfig.UseServerAddressAsSType
|
||||
}
|
||||
|
||||
var deviceRegex = regexp.MustCompile(`device (?P<server>[^ ]+) mounted on (?P<mntpoint>[^ ]+) with fstype nfs(?P<version>\d*) statvers=[\d\.]+`)
|
||||
@@ -84,6 +81,7 @@ func (m *NfsIOStatCollector) readNfsiostats() map[string]map[string]int64 {
|
||||
data[current[m.key]][name] = val
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
current = nil
|
||||
}
|
||||
@@ -100,9 +98,6 @@ func (m *NfsIOStatCollector) Init(config json.RawMessage) error {
|
||||
m.meta = map[string]string{"source": m.name, "group": "NFS", "unit": "bytes"}
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
m.config.UseServerAddressAsSType = false
|
||||
// Set default configuration
|
||||
m.config.SendAbsoluteValues = true
|
||||
m.config.SendDerivedValues = false
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -115,15 +110,12 @@ func (m *NfsIOStatCollector) Init(config json.RawMessage) error {
|
||||
m.key = "server"
|
||||
}
|
||||
m.data = m.readNfsiostats()
|
||||
m.lastTimestamp = time.Now()
|
||||
m.init = true
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
now := time.Now()
|
||||
timeDiff := now.Sub(m.lastTimestamp).Seconds()
|
||||
m.lastTimestamp = now
|
||||
timestamp := time.Now()
|
||||
|
||||
// Get the current values for all mountpoints
|
||||
newdata := m.readNfsiostats()
|
||||
@@ -131,30 +123,21 @@ func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
for mntpoint, values := range newdata {
|
||||
// Was the mount point already present in the last iteration
|
||||
if old, ok := m.data[mntpoint]; ok {
|
||||
for name, newVal := range values {
|
||||
if m.config.SendAbsoluteValues {
|
||||
msg, err := lp.NewMessage(fmt.Sprintf("nfsio_%s", name), m.tags, m.meta, map[string]interface{}{"value": newVal}, now)
|
||||
if err == nil {
|
||||
msg.AddTag("stype", "filesystem")
|
||||
msg.AddTag("stype-id", mntpoint)
|
||||
output <- msg
|
||||
// Calculate the difference of old and new values
|
||||
for i := range values {
|
||||
x := values[i] - old[i]
|
||||
y, err := lp.NewMessage(fmt.Sprintf("nfsio_%s", i), m.tags, m.meta, map[string]interface{}{"value": x}, timestamp)
|
||||
if err == nil {
|
||||
if strings.HasPrefix(i, "page") {
|
||||
y.AddMeta("unit", "4K_Pages")
|
||||
}
|
||||
y.AddTag("stype", "filesystem")
|
||||
y.AddTag("stype-id", mntpoint)
|
||||
// Send it to output channel
|
||||
output <- y
|
||||
}
|
||||
if m.config.SendDerivedValues {
|
||||
rate := float64(newVal-old[name]) / timeDiff
|
||||
msg, err := lp.NewMessage(fmt.Sprintf("nfsio_%s_bw", name), m.tags, m.meta, map[string]interface{}{"value": rate}, now)
|
||||
if err == nil {
|
||||
if strings.HasPrefix(name, "page") {
|
||||
msg.AddMeta("unit", "4K_pages/s")
|
||||
} else {
|
||||
msg.AddMeta("unit", "bytes/sec")
|
||||
}
|
||||
msg.AddTag("stype", "filesystem")
|
||||
msg.AddTag("stype-id", mntpoint)
|
||||
output <- msg
|
||||
}
|
||||
}
|
||||
old[name] = newVal
|
||||
// Update old to the new value for the next iteration
|
||||
old[i] = values[i]
|
||||
}
|
||||
} else {
|
||||
// First time we see this mount point, store all values
|
||||
@@ -174,6 +157,7 @@ func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
m.data[mntpoint] = nil
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (m *NfsIOStatCollector) Close() {
|
||||
|
@@ -3,18 +3,16 @@
|
||||
```json
|
||||
"nfsiostat": {
|
||||
"exclude_metrics": [
|
||||
"oread", "pageread"
|
||||
"nfsio_oread"
|
||||
],
|
||||
"exclude_filesystems": [
|
||||
"/mnt"
|
||||
"exclude_filesystems" : [
|
||||
"/mnt",
|
||||
],
|
||||
"use_server_as_stype": false,
|
||||
"send_abs_values": false,
|
||||
"send_derived_values": true
|
||||
"use_server_as_stype": false
|
||||
}
|
||||
```
|
||||
|
||||
The `nfsiostat` collector reads data from `/proc/self/mountstats` and outputs a handful **node** metrics for each NFS filesystem. If a metric or filesystem is not required, it can be excluded from forwarding it to the sink. **Note:** When excluding metrics, you must provide the base metric name (e.g. pageread) without the nfsio_ prefix. This exclusion applies to both absolute and derived values.
|
||||
The `nfsiostat` collector reads data from `/proc/self/mountstats` and outputs a handful **node** metrics for each NFS filesystem. If a metric or filesystem is not required, it can be excluded from forwarding it to the sink.
|
||||
|
||||
Metrics:
|
||||
* `nfsio_nread`: Bytes transferred by normal `read()` calls
|
||||
@@ -26,9 +24,4 @@ Metrics:
|
||||
* `nfsio_nfsread`: Bytes transferred for reading from the server
|
||||
* `nfsio_nfswrite`: Pages transferred by writing to the server
|
||||
|
||||
For each of these, if derived values are enabled, an additional metric is sent with the `_bw` suffix, which represents the rate:
|
||||
|
||||
* For normal byte metrics: `unit=bytes/sec`
|
||||
* For page metrics: `unit=4K_pages/s`
|
||||
|
||||
The `nfsiostat` collector adds the mountpoint to the tags as `stype=filesystem,stype-id=<mountpoint>`. If the server address should be used instead of the mountpoint, use the `use_server_as_stype` config setting.
|
||||
The `nfsiostat` collector adds the mountpoint to the tags as `stype=filesystem,stype-id=<mountpoint>`. If the server address should be used instead of the mountpoint, use the `use_server_as_stype` config setting.
|
@@ -10,15 +10,10 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
type NUMAStatsCollectorConfig struct {
|
||||
SendAbsoluteValues bool `json:"send_abs_values"`
|
||||
SendDerivedValues bool `json:"send_derived_values"`
|
||||
}
|
||||
|
||||
// Non-Uniform Memory Access (NUMA) policy hit/miss statistics
|
||||
//
|
||||
// numa_hit:
|
||||
@@ -52,16 +47,13 @@ type NUMAStatsCollectorConfig struct {
|
||||
//
|
||||
// See: https://www.kernel.org/doc/html/latest/admin-guide/numastat.html
|
||||
type NUMAStatsCollectorTopolgy struct {
|
||||
file string
|
||||
tagSet map[string]string
|
||||
previousValues map[string]int64
|
||||
file string
|
||||
tagSet map[string]string
|
||||
}
|
||||
|
||||
type NUMAStatsCollector struct {
|
||||
metricCollector
|
||||
topology []NUMAStatsCollectorTopolgy
|
||||
config NUMAStatsCollectorConfig
|
||||
lastTimestamp time.Time
|
||||
topology []NUMAStatsCollectorTopolgy
|
||||
}
|
||||
|
||||
func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
||||
@@ -94,9 +86,8 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
||||
file := filepath.Join(dir, "numastat")
|
||||
m.topology = append(m.topology,
|
||||
NUMAStatsCollectorTopolgy{
|
||||
file: file,
|
||||
tagSet: map[string]string{"memoryDomain": node},
|
||||
previousValues: make(map[string]int64),
|
||||
file: file,
|
||||
tagSet: map[string]string{"memoryDomain": node},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -111,27 +102,23 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
timeDiff := now.Sub(m.lastTimestamp).Seconds()
|
||||
m.lastTimestamp = now
|
||||
|
||||
for i := range m.topology {
|
||||
// Loop for all NUMA domains
|
||||
t := &m.topology[i]
|
||||
|
||||
now := time.Now()
|
||||
file, err := os.Open(t.file)
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to open file '%s': %v", t.file, err))
|
||||
continue
|
||||
return
|
||||
}
|
||||
scanner := bufio.NewScanner(file)
|
||||
|
||||
// Read line by line
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
split := strings.Fields(line)
|
||||
split := strings.Fields(scanner.Text())
|
||||
if len(split) != 2 {
|
||||
continue
|
||||
}
|
||||
@@ -143,38 +130,18 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessa
|
||||
fmt.Sprintf("Read(): Failed to convert %s='%s' to int64: %v", key, split[1], err))
|
||||
continue
|
||||
}
|
||||
|
||||
if m.config.SendAbsoluteValues {
|
||||
msg, err := lp.NewMessage(
|
||||
"numastats_"+key,
|
||||
t.tagSet,
|
||||
m.meta,
|
||||
map[string]interface{}{"value": value},
|
||||
now,
|
||||
)
|
||||
if err == nil {
|
||||
output <- msg
|
||||
}
|
||||
}
|
||||
|
||||
if m.config.SendDerivedValues {
|
||||
prev, ok := t.previousValues[key]
|
||||
if ok {
|
||||
rate := float64(value-prev) / timeDiff
|
||||
msg, err := lp.NewMessage(
|
||||
"numastats_"+key+"_rate",
|
||||
t.tagSet,
|
||||
m.meta,
|
||||
map[string]interface{}{"value": rate},
|
||||
now,
|
||||
)
|
||||
if err == nil {
|
||||
output <- msg
|
||||
}
|
||||
}
|
||||
t.previousValues[key] = value
|
||||
y, err := lp.NewMessage(
|
||||
"numastats_"+key,
|
||||
t.tagSet,
|
||||
m.meta,
|
||||
map[string]interface{}{"value": value},
|
||||
now,
|
||||
)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
||||
file.Close()
|
||||
}
|
||||
}
|
||||
|
@@ -2,10 +2,7 @@
|
||||
## `numastat` collector
|
||||
|
||||
```json
|
||||
"numastats": {
|
||||
"send_abs_values" : true,
|
||||
"send_derived_values" : true
|
||||
}
|
||||
"numastats": {}
|
||||
```
|
||||
|
||||
The `numastat` collector reads data from `/sys/devices/system/node/node*/numastat` and outputs a handful **memoryDomain** metrics. See: <https://www.kernel.org/doc/html/latest/admin-guide/numastat.html>
|
||||
@@ -18,9 +15,3 @@ Metrics:
|
||||
* `numastats_local_node`: A process ran on this node's CPU, and got memory from this node.
|
||||
* `numastats_other_node`: A process ran on a different node's CPU, and got memory from this node.
|
||||
* `numastats_interleave_hit`: Interleaving wanted to allocate from this node and succeeded.
|
||||
* `numastats_numa_hit_rate` (if `send_derived_values == true`): Derived rate value per second.
|
||||
* `numastats_numa_miss_rate` (if `send_derived_values == true`): Derived rate value per second.
|
||||
* `numastats_numa_foreign_rate` (if `send_derived_values == true`): Derived rate value per second.
|
||||
* `numastats_local_node_rate` (if `send_derived_values == true`): Derived rate value per second.
|
||||
* `numastats_other_node_rate` (if `send_derived_values == true`): Derived rate value per second.
|
||||
* `numastats_interleave_hit_rate` (if `send_derived_values == true`): Derived rate value per second.
|
||||
|
384
collectors/perfCmdMetric.go
Normal file
384
collectors/perfCmdMetric.go
Normal file
@@ -0,0 +1,384 @@
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||
)
|
||||
|
||||
var perf_number_regex = regexp.MustCompile(`(\d+),(\d+)`)
|
||||
|
||||
const PERF_NOT_COUNTED = "<not counted>"
|
||||
const PERF_UNIT_NULL = "(null)"
|
||||
|
||||
var VALID_METRIC_TYPES = []string{
|
||||
"hwthread",
|
||||
"core",
|
||||
"llc",
|
||||
"socket",
|
||||
"die",
|
||||
"node",
|
||||
"memoryDomain",
|
||||
}
|
||||
|
||||
type PerfCmdCollectorEventConfig struct {
|
||||
Metric string `json:"metric"` // metric name
|
||||
Event string `json:"event"` // perf event configuration
|
||||
Type string `json:"type"` // Metric type (aka node, socket, hwthread, ...)
|
||||
Tags map[string]string `json:"tags,omitempty"` // extra tags for the metric
|
||||
Meta map[string]string `json:"meta,omitempty"` // extra meta information for the metric
|
||||
Unit string `json:"unit,omitempty"` // unit of metric (if any)
|
||||
UsePerfUnit bool `json:"use_perf_unit,omitempty"` // for some events perf tells a metric
|
||||
TypeAgg string `json:"type_aggregation,omitempty"` // how to aggregate cpu-data to metric type
|
||||
Publish bool `json:"publish,omitempty"`
|
||||
//lastCounterValue float64
|
||||
//lastMetricValue float64
|
||||
collectorTags *map[string]string
|
||||
collectorMeta *map[string]string
|
||||
useCpus map[int][]int
|
||||
}
|
||||
|
||||
type PerfCmdCollectorExpression struct {
|
||||
Metric string `json:"metric"` // metric name
|
||||
Expression string `json:"expression"` // expression based on metrics
|
||||
Type string `json:"type"` // Metric type (aka node, socket, hwthread, ...)
|
||||
TypeAgg string `json:"type_aggregation,omitempty"` // how to aggregate cpu-data to metric type
|
||||
Publish bool `json:"publish,omitempty"`
|
||||
}
|
||||
|
||||
// These are the fields we read from the JSON configuration
|
||||
type PerfCmdCollectorConfig struct {
|
||||
Metrics []PerfCmdCollectorEventConfig `json:"metrics"`
|
||||
Expressions []PerfCmdCollectorExpression `json:"expressions"`
|
||||
PerfCmd string `json:"perf_command,omitempty"`
|
||||
}
|
||||
|
||||
// This contains all variables we need during execution and the variables
|
||||
// defined by metricCollector (name, init, ...)
|
||||
type PerfCmdCollector struct {
|
||||
metricCollector
|
||||
config PerfCmdCollectorConfig // the configuration structure
|
||||
meta map[string]string // default meta information
|
||||
tags map[string]string // default tags
|
||||
metrics map[string]*PerfCmdCollectorEventConfig // list of events for internal data
|
||||
perfEventString string
|
||||
}
|
||||
|
||||
// Functions to implement MetricCollector interface
|
||||
// Init(...), Read(...), Close()
|
||||
// See: metricCollector.go
|
||||
|
||||
// Init initializes the sample collector
|
||||
// Called once by the collector manager
|
||||
// All tags, meta data tags and metrics that do not change over the runtime should be set here
|
||||
func (m *PerfCmdCollector) Init(config json.RawMessage) error {
|
||||
var err error = nil
|
||||
// Always set the name early in Init() to use it in cclog.Component* functions
|
||||
m.name = "PerfCmdCollector"
|
||||
m.parallel = false
|
||||
// This is for later use, also call it early
|
||||
m.setup()
|
||||
// Tell whether the collector should be run in parallel with others (reading files, ...)
|
||||
// or it should be run serially, mostly for collectors actually doing measurements
|
||||
// because they should not measure the execution of the other collectors
|
||||
m.parallel = true
|
||||
// Define meta information sent with each metric
|
||||
// (Can also be dynamic or this is the basic set with extension through AddMeta())
|
||||
m.meta = map[string]string{"source": m.name, "group": "PerfCounter"}
|
||||
// Define tags sent with each metric
|
||||
// The 'type' tag is always needed, it defines the granularity of the metric
|
||||
// node -> whole system
|
||||
// socket -> CPU socket (requires socket ID as 'type-id' tag)
|
||||
// die -> CPU die (requires CPU die ID as 'type-id' tag)
|
||||
// memoryDomain -> NUMA domain (requires NUMA domain ID as 'type-id' tag)
|
||||
// llc -> Last level cache (requires last level cache ID as 'type-id' tag)
|
||||
// core -> single CPU core that may consist of multiple hardware threads (SMT) (requires core ID as 'type-id' tag)
|
||||
// hwthtread -> single CPU hardware thread (requires hardware thread ID as 'type-id' tag)
|
||||
// accelerator -> A accelerator device like GPU or FPGA (requires an accelerator ID as 'type-id' tag)
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
// Read in the JSON configuration
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.config.PerfCmd = "perf"
|
||||
if len(m.config.PerfCmd) > 0 {
|
||||
_, err := os.Stat(m.config.PerfCmd)
|
||||
if err != nil {
|
||||
abs, err := exec.LookPath(m.config.PerfCmd)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error looking up perf command", m.config.PerfCmd, ":", err.Error())
|
||||
return err
|
||||
}
|
||||
m.config.PerfCmd = abs
|
||||
}
|
||||
}
|
||||
|
||||
// Set up everything that the collector requires during the Read() execution
|
||||
// Check files required, test execution of some commands, create data structure
|
||||
// for all topological entities (sockets, NUMA domains, ...)
|
||||
// Return some useful error message in case of any failures
|
||||
|
||||
valid_metrics := make([]*PerfCmdCollectorEventConfig, 0)
|
||||
valid_events := make([]string, 0)
|
||||
test_type := func(Type string) bool {
|
||||
for _, t := range VALID_METRIC_TYPES {
|
||||
if Type == t {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
for i, metric := range m.config.Metrics {
|
||||
if !test_type(metric.Type) {
|
||||
cclog.ComponentError(m.name, "Metric", metric.Metric, "has an invalid type")
|
||||
continue
|
||||
}
|
||||
cmd := exec.Command(m.config.PerfCmd, "stat", "--null", "-e", metric.Event, "hostname")
|
||||
cclog.ComponentDebug(m.name, "Running", cmd.String())
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Event", metric.Event, "not available in perf", err.Error())
|
||||
} else {
|
||||
valid_metrics = append(valid_metrics, &m.config.Metrics[i])
|
||||
}
|
||||
}
|
||||
if len(valid_metrics) == 0 {
|
||||
return errors.New("no configured metric available through perf")
|
||||
}
|
||||
|
||||
IntToStringList := func(ilist []int) []string {
|
||||
list := make([]string, 0)
|
||||
for _, i := range ilist {
|
||||
list = append(list, fmt.Sprintf("%v", i))
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
m.metrics = make(map[string]*PerfCmdCollectorEventConfig, 0)
|
||||
for _, metric := range valid_metrics {
|
||||
metric.collectorMeta = &m.meta
|
||||
metric.collectorTags = &m.tags
|
||||
metric.useCpus = make(map[int][]int)
|
||||
tlist := topo.GetTypeList(metric.Type)
|
||||
cclog.ComponentDebug(m.name, "Metric", metric.Metric, "with type", metric.Type, ":", strings.Join(IntToStringList(tlist), ","))
|
||||
|
||||
for _, t := range tlist {
|
||||
metric.useCpus[t] = topo.GetTypeHwthreads(metric.Type, t)
|
||||
cclog.ComponentDebug(m.name, "Metric", metric.Metric, "with type", metric.Type, "and ID", t, ":", strings.Join(IntToStringList(metric.useCpus[t]), ","))
|
||||
}
|
||||
|
||||
m.metrics[metric.Event] = metric
|
||||
valid_events = append(valid_events, metric.Event)
|
||||
}
|
||||
m.perfEventString = strings.Join(valid_events, ",")
|
||||
cclog.ComponentDebug(m.name, "perfEventString", m.perfEventString)
|
||||
|
||||
// Set this flag only if everything is initialized properly, all required files exist, ...
|
||||
m.init = true
|
||||
return err
|
||||
}
|
||||
|
||||
type PerfEventJson struct {
|
||||
CounterValue string `json:"counter-value"`
|
||||
counterValue float64
|
||||
MetricValue string `json:"metric-value"`
|
||||
metricValue float64
|
||||
CounterUnit string `json:"unit"`
|
||||
counterUnit string
|
||||
MetricUnit string `json:"metric-unit"`
|
||||
metricUnit string
|
||||
Cpu string `json:"cpu,omitempty"`
|
||||
cpu int
|
||||
Event string `json:"event"`
|
||||
Runtime uint64 `json:"event-runtime"`
|
||||
PcntRunning float64 `json:"pcnt-running"`
|
||||
metrictypeid string
|
||||
metrictype string
|
||||
metricname string
|
||||
publish bool
|
||||
}
|
||||
|
||||
func parseEvent(line string) (*PerfEventJson, error) {
|
||||
data := PerfEventJson{}
|
||||
|
||||
tmp := perf_number_regex.ReplaceAllString(line, `$1.$2`)
|
||||
err := json.Unmarshal([]byte(tmp), &data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(data.CounterValue) > 0 && data.CounterValue != PERF_NOT_COUNTED {
|
||||
val, err := strconv.ParseFloat(data.CounterValue, 64)
|
||||
if err == nil {
|
||||
if data.PcntRunning != 100.0 {
|
||||
val = (val / data.PcntRunning) * 100
|
||||
}
|
||||
data.counterValue = val
|
||||
}
|
||||
}
|
||||
if len(data.MetricValue) > 0 && data.MetricValue != PERF_NOT_COUNTED {
|
||||
val, err := strconv.ParseFloat(data.MetricValue, 64)
|
||||
if err == nil {
|
||||
if data.PcntRunning != 100.0 {
|
||||
val = (val / data.PcntRunning) * 100
|
||||
}
|
||||
data.metricValue = val
|
||||
}
|
||||
}
|
||||
if len(data.CounterUnit) > 0 && data.CounterUnit != PERF_UNIT_NULL {
|
||||
data.counterUnit = data.CounterUnit
|
||||
}
|
||||
if len(data.MetricUnit) > 0 && data.MetricUnit != PERF_UNIT_NULL {
|
||||
data.metricUnit = data.MetricUnit
|
||||
}
|
||||
if len(data.Cpu) > 0 {
|
||||
val, err := strconv.ParseInt(data.Cpu, 10, 64)
|
||||
if err == nil {
|
||||
data.cpu = int(val)
|
||||
}
|
||||
}
|
||||
|
||||
return &data, nil
|
||||
}
|
||||
|
||||
func perfdataToMetric(data *PerfEventJson, config *PerfCmdCollectorEventConfig, timestamp time.Time) (lp.CCMetric, error) {
|
||||
metric, err := lp.NewMetric(config.Metric, *config.collectorTags, *config.collectorMeta, data.counterValue, timestamp)
|
||||
if err == nil {
|
||||
metric.AddTag("type", data.metrictype)
|
||||
if data.metrictype != "node" {
|
||||
metric.AddTag("type-id", data.metrictypeid)
|
||||
}
|
||||
for k, v := range config.Tags {
|
||||
metric.AddTag(k, v)
|
||||
}
|
||||
for k, v := range config.Meta {
|
||||
metric.AddMeta(k, v)
|
||||
}
|
||||
if len(config.Unit) > 0 {
|
||||
metric.AddMeta("unit", config.Unit)
|
||||
}
|
||||
if config.UsePerfUnit && (!metric.HasMeta("unit")) && (!metric.HasTag("unit")) {
|
||||
var unit string = ""
|
||||
if len(data.counterUnit) > 0 {
|
||||
unit = data.counterUnit
|
||||
} else if len(data.metricUnit) > 0 {
|
||||
unit = data.metricUnit
|
||||
}
|
||||
if len(unit) > 0 {
|
||||
metric.AddMeta("unit", unit)
|
||||
}
|
||||
}
|
||||
return metric, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Read collects all metrics belonging to the sample collector
|
||||
// and sends them through the output channel to the collector manager
|
||||
func (m *PerfCmdCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
perfdata := make([]*PerfEventJson, 0)
|
||||
// Create a sample metric
|
||||
timestamp := time.Now()
|
||||
|
||||
cmd := exec.Command(m.config.PerfCmd, "stat", "-A", "-a", "-j", "-e", m.perfEventString, "/usr/bin/sleep", fmt.Sprintf("%d", int(interval.Seconds())))
|
||||
|
||||
cclog.ComponentDebug(m.name, "Running", cmd.String())
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err == nil {
|
||||
sout := strings.TrimSpace(string(out))
|
||||
for _, l := range strings.Split(sout, "\n") {
|
||||
d, err := parseEvent(l)
|
||||
if err == nil {
|
||||
perfdata = append(perfdata, d)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cclog.ComponentError(m.name, "Execution of", cmd.String(), "failed with", err.Error())
|
||||
}
|
||||
|
||||
metricData := make([]*PerfEventJson, 0)
|
||||
for _, metricTmp := range m.config.Metrics {
|
||||
metricConfig := m.metrics[metricTmp.Event]
|
||||
for t, clist := range metricConfig.useCpus {
|
||||
val := float64(0)
|
||||
sum := float64(0)
|
||||
min := math.MaxFloat64
|
||||
max := float64(0)
|
||||
count := 0
|
||||
cunit := ""
|
||||
munit := ""
|
||||
for _, c := range clist {
|
||||
for _, d := range perfdata {
|
||||
if strings.HasPrefix(d.Event, metricConfig.Event) && d.cpu == c {
|
||||
//cclog.ComponentDebug(m.name, "do calc on CPU", c, ":", d.counterValue)
|
||||
sum += d.counterValue
|
||||
if d.counterValue < min {
|
||||
min = d.counterValue
|
||||
}
|
||||
if d.counterValue > max {
|
||||
max = d.counterValue
|
||||
}
|
||||
count++
|
||||
cunit = d.counterUnit
|
||||
munit = d.metricUnit
|
||||
}
|
||||
}
|
||||
}
|
||||
if metricConfig.TypeAgg == "sum" {
|
||||
val = sum
|
||||
} else if metricConfig.TypeAgg == "min" {
|
||||
val = min
|
||||
} else if metricConfig.TypeAgg == "max" {
|
||||
val = max
|
||||
} else if metricConfig.TypeAgg == "avg" || metricConfig.TypeAgg == "mean" {
|
||||
val = sum / float64(count)
|
||||
} else {
|
||||
val = sum
|
||||
}
|
||||
//cclog.ComponentDebug(m.name, "Metric", metricConfig.Metric, "type", metricConfig.Type, "ID", t, ":", val)
|
||||
metricData = append(metricData, &PerfEventJson{
|
||||
Event: metricConfig.Event,
|
||||
metricname: metricConfig.Metric,
|
||||
metrictype: metricConfig.Type,
|
||||
metrictypeid: fmt.Sprintf("%v", t),
|
||||
counterValue: val,
|
||||
metricValue: 0,
|
||||
metricUnit: munit,
|
||||
counterUnit: cunit,
|
||||
publish: metricConfig.Publish,
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for _, d := range metricData {
|
||||
if d.publish {
|
||||
m, err := perfdataToMetric(d, m.metrics[d.Event], timestamp)
|
||||
if err == nil {
|
||||
output <- m
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Close metric collector: close network connection, close files, close libraries, ...
|
||||
// Called once by the collector manager
|
||||
func (m *PerfCmdCollector) Close() {
|
||||
// Unset flag
|
||||
m.init = false
|
||||
}
|
54
collectors/perfCmdMetric.md
Normal file
54
collectors/perfCmdMetric.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# PerfCmdMetric collector
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"perf_command": "perf",
|
||||
"metrics" : [
|
||||
{
|
||||
"name": "cpu_cycles",
|
||||
"event": "cycles",
|
||||
"unit": "Hz",
|
||||
"type": "hwthread",
|
||||
"publish": true,
|
||||
"use_perf_unit": false,
|
||||
"type_aggregation": "socket",
|
||||
"tags": {
|
||||
"tags_just" : "for_the_event"
|
||||
},
|
||||
"meta": {
|
||||
"meta_info_just" : "for_the_event"
|
||||
}
|
||||
}
|
||||
],
|
||||
"expressions": [
|
||||
{
|
||||
"metric": "avg_cycles_per_second",
|
||||
"expression": "cpu_cycles / time",
|
||||
"type": "node",
|
||||
"type_aggregation": "avg",
|
||||
"publish": true
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
- `perf_command`: Path to the `perf` command. If it is not an absolute path, the command is looked up in `$PATH`.
|
||||
- `metrics`: List of metrics to measure
|
||||
- `name`: Name of metric for output and expressions
|
||||
- `event`: Event as supplied to `perf stat -e <event>` like `cycles` or `uncore_imc_0/event=0x01,umask=0x00/`
|
||||
- `unit` : Unit for the metric. Will be added as meta information thus similar then adding `"meta" : {"unit": "myunit"}`.
|
||||
- `type`: Do measurments at this level (`hwthread` and `socket` are the most common ones).
|
||||
- `publish`: Publish the metric or use it only for expressions.
|
||||
- `use_perf_unit`: For some events, `perf` outputs a unit. With this switch, the unit provided by `perf` is added as meta informations.
|
||||
- `type_aggregation`: Sum the metric values to the given type
|
||||
- `tags`: Tags just for this metric
|
||||
- `meta`: Meta informations just for this metric
|
||||
- `expressions`: Calculate metrics out of multiple measurements
|
||||
- `metric`: Name of metric for output
|
||||
- `expression`: What should be calculated
|
||||
- `type`: Aggregate the expression results to this level
|
||||
- `type_aggregation`: Aggregate the expression results with `sum`, `min`, `max`, `avg` or `mean`
|
||||
- `publish`: Publish metric
|
481
collectors/perfEventMetric.go
Normal file
481
collectors/perfEventMetric.go
Normal file
@@ -0,0 +1,481 @@
|
||||
package collectors
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -I/usr/include
|
||||
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <syscall.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
typedef enum {
|
||||
PERF_EVENT_WITH_CONFIG1 = (1<<0),
|
||||
PERF_EVENT_WITH_CONFIG2 = (1<<1),
|
||||
PERF_EVENT_WITH_EXCLUDE_KERNEL = (1<<2),
|
||||
PERF_EVENT_WITH_EXCLUDE_HV = (1<<3),
|
||||
} PERF_EVENT_FLAG;
|
||||
|
||||
int perf_event_open(int type, uint64_t config, int cpu, uint64_t config1, uint64_t config2, int uncore)
|
||||
{
|
||||
int ret;
|
||||
struct perf_event_attr attr;
|
||||
|
||||
memset(&attr, 0, sizeof(struct perf_event_attr));
|
||||
attr.type = type;
|
||||
attr.config = config;
|
||||
if (!uncore) {
|
||||
attr.exclude_kernel = 1;
|
||||
attr.exclude_hv = 1;
|
||||
}
|
||||
//attr.disabled = 1;
|
||||
//
|
||||
// if (config1 > 0)
|
||||
// {
|
||||
// attr.config1 = config1;
|
||||
// }
|
||||
// if (config2 > 0)
|
||||
// {
|
||||
// attr.config2 = config2;
|
||||
// }
|
||||
// if (flags & PERF_EVENT_WITH_CONFIG1)
|
||||
// {
|
||||
// attr.config1 = config1;
|
||||
// }
|
||||
// if (flags & PERF_EVENT_WITH_CONFIG2)
|
||||
// {
|
||||
// attr.config2 = config2;
|
||||
// }
|
||||
// if (flags & PERF_EVENT_WITH_EXCLUDE_KERNEL)
|
||||
// {
|
||||
// attr.exclude_kernel = 1;
|
||||
// }
|
||||
// if (flags & PERF_EVENT_WITH_EXCLUDE_HV)
|
||||
// {
|
||||
// attr.exclude_hv = 1;
|
||||
// }
|
||||
|
||||
|
||||
|
||||
ret = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
|
||||
if (ret < 0)
|
||||
{
|
||||
return -errno;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int perf_event_stop(int fd)
|
||||
{
|
||||
return ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
||||
}
|
||||
|
||||
|
||||
int perf_event_start(int fd)
|
||||
{
|
||||
return ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
}
|
||||
|
||||
int perf_event_reset(int fd)
|
||||
{
|
||||
return ioctl(fd, PERF_EVENT_IOC_RESET, 0);
|
||||
}
|
||||
|
||||
int perf_event_read(int fd, uint64_t *data)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
ret = read(fd, data, sizeof(uint64_t));
|
||||
if (ret != sizeof(uint64_t))
|
||||
{
|
||||
return -errno;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int perf_event_close(int fd)
|
||||
{
|
||||
close(fd);
|
||||
}
|
||||
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||
)
|
||||
|
||||
const SYSFS_PERF_EVENT_PATH = `/sys/devices`
|
||||
|
||||
type PerfEventCollectorEventConfig struct {
|
||||
Name string `json:"name"`
|
||||
Unit string `json:"unit,omitempty"`
|
||||
unitType int
|
||||
Config string `json:"config"`
|
||||
config C.uint64_t
|
||||
Config1 string `json:"config1,omitempty"`
|
||||
config1 C.uint64_t
|
||||
Config2 string `json:"config2,omitempty"`
|
||||
config2 C.uint64_t
|
||||
ExcludeKernel bool `json:"exclude_kernel,omitempty"`
|
||||
ExcludeHypervisor bool `json:"exclude_hypervisor,omitempty"`
|
||||
Tags map[string]string `json:"tags,omitempty"`
|
||||
Meta map[string]string `json:"meta,omitempty"`
|
||||
PerHwthread bool `json:"per_hwthread,omitempty"`
|
||||
PerSocket bool `json:"per_socket,omitempty"`
|
||||
ScaleFile string `json:"scale_file,omitempty"`
|
||||
scaling_factor float64
|
||||
flags uint64
|
||||
valid bool
|
||||
cpumask []int
|
||||
}
|
||||
|
||||
type PerfEventCollectorEventData struct {
|
||||
fd C.int
|
||||
last uint64
|
||||
last_diff uint64
|
||||
idx int
|
||||
}
|
||||
|
||||
type PerfEventCollectorConfig struct {
|
||||
Events []PerfEventCollectorEventConfig `json:"events"`
|
||||
events []PerfEventCollectorEventConfig
|
||||
}
|
||||
|
||||
type PerfEventCollector struct {
|
||||
metricCollector
|
||||
config PerfEventCollectorConfig // the configuration structure
|
||||
meta map[string]string // default meta information
|
||||
tags map[string]string // default tags
|
||||
events map[int]map[int]PerfEventCollectorEventData
|
||||
}
|
||||
|
||||
func UpdateEventConfig(event *PerfEventCollectorEventConfig) error {
|
||||
parseHexNumber := func(number string) (uint64, error) {
|
||||
snum := strings.Trim(number, "\n")
|
||||
snum = strings.Replace(snum, "0x", "", -1)
|
||||
snum = strings.Replace(snum, "0X", "", -1)
|
||||
return strconv.ParseUint(snum, 16, 64)
|
||||
}
|
||||
if len(event.Unit) == 0 {
|
||||
event.Unit = "cpu"
|
||||
}
|
||||
|
||||
unitpath := path.Join(SYSFS_PERF_EVENT_PATH, event.Unit)
|
||||
if _, err := os.Stat(unitpath); err != nil {
|
||||
return err
|
||||
}
|
||||
typefile := path.Join(unitpath, "type")
|
||||
if _, err := os.Stat(typefile); err != nil {
|
||||
return err
|
||||
}
|
||||
typebytes, err := os.ReadFile(typefile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
typestring := string(typebytes)
|
||||
ut, err := strconv.ParseUint(strings.Trim(typestring, "\n"), 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
event.unitType = int(ut)
|
||||
|
||||
if len(event.Config) > 0 {
|
||||
x, err := parseHexNumber(event.Config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
event.config = C.uint64_t(x)
|
||||
}
|
||||
if len(event.Config1) > 0 {
|
||||
x, err := parseHexNumber(event.Config1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
event.config1 = C.uint64_t(x)
|
||||
}
|
||||
if len(event.Config2) > 0 {
|
||||
x, err := parseHexNumber(event.Config2)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
event.config2 = C.uint64_t(x)
|
||||
}
|
||||
if len(event.ScaleFile) > 0 {
|
||||
if _, err := os.Stat(event.ScaleFile); err != nil {
|
||||
return err
|
||||
}
|
||||
scalebytes, err := os.ReadFile(event.ScaleFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
x, err := strconv.ParseFloat(string(scalebytes), 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
event.scaling_factor = x
|
||||
}
|
||||
event.cpumask = make([]int, 0)
|
||||
cpumaskfile := path.Join(unitpath, "cpumask")
|
||||
if _, err := os.Stat(cpumaskfile); err == nil {
|
||||
|
||||
cpumaskbytes, err := os.ReadFile(cpumaskfile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cpumaskstring := strings.Trim(string(cpumaskbytes), "\n")
|
||||
cclog.Debug("cpumask", cpumaskstring)
|
||||
for _, part := range strings.Split(cpumaskstring, ",") {
|
||||
start := 0
|
||||
end := 0
|
||||
count, _ := fmt.Sscanf(part, "%d-%d", &start, &end)
|
||||
cclog.Debug("scanf", count, " s ", start, " e ", end)
|
||||
|
||||
if count == 1 {
|
||||
cclog.Debug("adding ", start)
|
||||
event.cpumask = append(event.cpumask, start)
|
||||
} else if count == 2 {
|
||||
for i := start; i <= end; i++ {
|
||||
cclog.Debug("adding ", i)
|
||||
event.cpumask = append(event.cpumask, i)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} else {
|
||||
event.cpumask = append(event.cpumask, ccTopology.CpuList()...)
|
||||
}
|
||||
|
||||
event.valid = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *PerfEventCollector) Init(config json.RawMessage) error {
|
||||
var err error = nil
|
||||
|
||||
m.name = "PerfEventCollector"
|
||||
|
||||
m.setup()
|
||||
|
||||
m.parallel = false
|
||||
|
||||
m.meta = map[string]string{"source": m.name, "group": "PerfCounter"}
|
||||
|
||||
m.tags = map[string]string{"type": "node"}
|
||||
|
||||
cpudata := ccTopology.CpuData()
|
||||
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Error reading config:", err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for i, e := range m.config.Events {
|
||||
err = UpdateEventConfig(&e)
|
||||
if err != nil {
|
||||
cclog.ComponentError(m.name, "Checks for event unit", e.Name, "failed:", err.Error())
|
||||
}
|
||||
m.config.Events[i] = e
|
||||
}
|
||||
total := 0
|
||||
m.events = make(map[int]map[int]PerfEventCollectorEventData)
|
||||
for _, hwt := range cpudata {
|
||||
cclog.ComponentDebug(m.name, "Adding events for cpuid", hwt.CpuID)
|
||||
hwt_events := make(map[int]PerfEventCollectorEventData)
|
||||
for j, e := range m.config.Events {
|
||||
if e.valid {
|
||||
if _, ok := intArrayContains(e.cpumask, hwt.CpuID); ok {
|
||||
cclog.ComponentDebug(m.name, "Adding event", e.Name, fmt.Sprintf("(cpuid %d unit %s(%d) config %s config1 %s config2 %s)",
|
||||
hwt.CpuID,
|
||||
e.Unit,
|
||||
e.unitType,
|
||||
e.Config,
|
||||
e.Config1,
|
||||
e.Config2,
|
||||
))
|
||||
// (int type, uint64_t config, int cpu, uint64_t config1, uint64_t config2, int uncore)
|
||||
fd := C.perf_event_open(C.int(e.unitType), e.config, C.int(hwt.CpuID), e.config1, e.config2, C.int(1))
|
||||
if fd < 0 {
|
||||
cclog.ComponentError(m.name, "Failed to create event", e.Name, ":", fd)
|
||||
continue
|
||||
}
|
||||
hwt_events[j] = PerfEventCollectorEventData{
|
||||
idx: j,
|
||||
fd: fd,
|
||||
last: 0,
|
||||
}
|
||||
total++
|
||||
} else {
|
||||
cclog.ComponentDebug(m.name, "Cpu not in cpumask of unit", e.cpumask)
|
||||
hwt_events[j] = PerfEventCollectorEventData{
|
||||
idx: j,
|
||||
fd: -1,
|
||||
last: 0,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cclog.ComponentError(m.name, "Event", e.Name, "not valid")
|
||||
}
|
||||
}
|
||||
cclog.ComponentDebug(m.name, "Adding", len(hwt_events), "events for cpuid", hwt.CpuID)
|
||||
m.events[hwt.CpuID] = hwt_events
|
||||
}
|
||||
if total == 0 {
|
||||
cclog.ComponentError(m.name, "Failed to add events")
|
||||
return errors.New("failed to add events")
|
||||
}
|
||||
|
||||
m.init = true
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *PerfEventCollector) CalcSocketData() map[int]map[int]interface{} {
|
||||
out := make(map[int]map[int]interface{})
|
||||
|
||||
for cpuid, cpudata := range m.events {
|
||||
for i, eventdata := range cpudata {
|
||||
eventconfig := m.config.Events[i]
|
||||
sid := ccTopology.GetHwthreadSocket(cpuid)
|
||||
if _, ok := out[sid]; !ok {
|
||||
out[sid] = make(map[int]interface{})
|
||||
for i := range cpudata {
|
||||
out[sid][i] = 0
|
||||
}
|
||||
}
|
||||
if eventconfig.scaling_factor != 0 {
|
||||
out[sid][i] = out[sid][i].(float64) + (float64(eventdata.last_diff) * eventconfig.scaling_factor)
|
||||
} else {
|
||||
out[sid][i] = out[sid][i].(uint64) + eventdata.last_diff
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (m *PerfEventCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
|
||||
timestamp := time.Now()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for cpuid := range m.events {
|
||||
wg.Add(1)
|
||||
go func(cpuid int, data map[int]map[int]PerfEventCollectorEventData, wg *sync.WaitGroup) {
|
||||
var err error = nil
|
||||
var events map[int]PerfEventCollectorEventData = data[cpuid]
|
||||
for i, e := range events {
|
||||
|
||||
var data C.uint64_t = 0
|
||||
if e.fd < 0 {
|
||||
continue
|
||||
}
|
||||
ret := C.perf_event_read(e.fd, &data)
|
||||
if ret < 0 {
|
||||
event := m.config.Events[i]
|
||||
cclog.ComponentError(m.name, "Failed to read event", event.Name, ":", ret)
|
||||
}
|
||||
if e.last == 0 {
|
||||
cclog.ComponentDebug(m.name, "Updating last value on first iteration")
|
||||
e.last = uint64(data)
|
||||
|
||||
} else {
|
||||
var metric lp.CCMetric
|
||||
event := m.config.Events[i]
|
||||
value := uint64(data) - e.last
|
||||
cclog.ComponentDebug(m.name, "Calculating difference", uint64(data), "-", e.last, "=", uint64(data)-e.last)
|
||||
e.last = uint64(data)
|
||||
e.last_diff = value
|
||||
|
||||
if event.scaling_factor == 0 {
|
||||
metric, err = lp.NewMetric(event.Name, m.tags, m.meta, value, timestamp)
|
||||
} else {
|
||||
var f64_value float64 = float64(value) * event.scaling_factor
|
||||
metric, err = lp.NewMetric(event.Name, m.tags, m.meta, f64_value, timestamp)
|
||||
}
|
||||
//if event.PerHwthread {
|
||||
if err == nil {
|
||||
metric.AddTag("type", "hwthread")
|
||||
metric.AddTag("type-id", fmt.Sprintf("%d", cpuid))
|
||||
for k, v := range event.Tags {
|
||||
metric.AddTag(k, v)
|
||||
}
|
||||
for k, v := range event.Meta {
|
||||
metric.AddMeta(k, v)
|
||||
}
|
||||
output <- metric
|
||||
} else {
|
||||
cclog.ComponentError(m.name, "Failed to create CCMetric for event", event.Name)
|
||||
}
|
||||
//}
|
||||
}
|
||||
events[i] = e
|
||||
}
|
||||
data[cpuid] = events
|
||||
wg.Done()
|
||||
}(cpuid, m.events, &wg)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// var data C.uint64_t = 0
|
||||
// event := m.config.Events[e.idx]
|
||||
// cclog.ComponentDebug(m.name, "Reading event", event.Name)
|
||||
// ret := C.perf_event_read(e.fd, &data)
|
||||
// if ret < 0 {
|
||||
// cclog.ComponentError(m.name, "Failed to read event", event.Name, ":", ret)
|
||||
// }
|
||||
// if e.last == 0 {
|
||||
// cclog.ComponentDebug(m.name, "Updating last value on first iteration")
|
||||
// e.last = uint64(data)
|
||||
|
||||
// } else {
|
||||
// value := uint64(data) - e.last
|
||||
// cclog.ComponentDebug(m.name, "Calculating difference", uint64(data), "-", e.last, "=", uint64(data)-e.last)
|
||||
// e.last = uint64(data)
|
||||
|
||||
// y, err := lp.NewMetric(event.Name, m.tags, m.meta, value, timestamp)
|
||||
// if err == nil {
|
||||
// for k, v := range event.Tags {
|
||||
// y.AddTag(k, v)
|
||||
// }
|
||||
// for k, v := range event.Meta {
|
||||
// y.AddMeta(k, v)
|
||||
// }
|
||||
// output <- y
|
||||
// } else {
|
||||
// cclog.ComponentError(m.name, "Failed to create CCMetric for event", event.Name)
|
||||
// }
|
||||
// }
|
||||
// m.events[i] = e
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
func (m *PerfEventCollector) Close() {
|
||||
|
||||
for _, events := range m.events {
|
||||
for _, e := range events {
|
||||
C.perf_event_close(e.fd)
|
||||
}
|
||||
}
|
||||
m.init = false
|
||||
}
|
44
collectors/perfEventMetric.md
Normal file
44
collectors/perfEventMetric.md
Normal file
@@ -0,0 +1,44 @@
|
||||
# `perf_event` collector
|
||||
|
||||
This collector uses directly the `perf_event_open` system call to measure events. There is no name to event translation, the configuration has to be as low-level as required by the system call. It allows to aggregate the measurements to topological entities like socket or the whole node.
|
||||
|
||||
## Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"events" : [
|
||||
{
|
||||
"name" : "instructions",
|
||||
"unit" : "uncore_imc_0",
|
||||
"config": "0x01",
|
||||
"scale_file" : "/sys/devices/<unit>/events/<event>.scale",
|
||||
"per_hwthread": true,
|
||||
"per_socket": true,
|
||||
"exclude_kernel": true,
|
||||
"exclude_hypervisor": true,
|
||||
"tags": {
|
||||
"tags": "just_for_the_event"
|
||||
},
|
||||
"meta": {
|
||||
"meta_info": "just_for_the_event"
|
||||
},
|
||||
"config1": "0x00",
|
||||
"config2": "0x00",
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
- `events`: List of events to measure
|
||||
- `name`: Name for the metric
|
||||
- `unit`: Unit of the event or `cpu` if not given. The unit type ID is resolved by reading the file `/sys/devices/<unit>/type`. The unit type ID is then written to the `perf_event_attr` struct member `type`.
|
||||
- `config`: Hex value written to the `perf_event_attr` struct member `config`.
|
||||
- `config1`: Hex value written to the `perf_event_attr` struct member `config1` (optional).
|
||||
- `config2`: Hex value written to the `perf_event_attr` struct member `config1` (optional).
|
||||
- `scale_file`: If a measurement requires scaling, like the `power` unit aka RAPL, it is provided by the kernel in a `.scale` file at `/sys/devices/<unit>/events/<event>.scale`.
|
||||
- `exclude_kernel`: Exclude the kernel from measurements (default: `true`). It sets the `perf_event_attr` struct member `exclude_kernel`.
|
||||
- `exclude_hypervisor`: Exclude the hypervisors from measurements (default: `true`). It sets the `perf_event_attr` struct member `exclude_hypervisor`.
|
||||
- `per_hwthread`: Generate metrics per hardware thread (default: `false`)
|
||||
- `per_socket`: Generate metrics per hardware thread (default: `false`)
|
||||
- `tags`: Tags just for the event.
|
||||
- `meta`: Meta information just for the event, often a `unit`
|
@@ -9,29 +9,20 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// running average power limit (RAPL) monitoring attributes for a zone
|
||||
// Only for Intel systems
|
||||
|
||||
type RAPLZoneInfo struct {
|
||||
energy int64 // current reading of the energy counter in micro joules
|
||||
maxEnergyRange int64 // Range of the above energy counter in micro-joules
|
||||
energyTimestamp time.Time // timestamp when energy counter was read
|
||||
energyFilepath string // path to a file containing the zones current energy counter in micro joules
|
||||
shortTermFilepath string // path to short term power limit
|
||||
longTermFilepath string // path to long term power limit
|
||||
enabledFilepath string // path to check whether limits are enabled
|
||||
name string
|
||||
|
||||
// tags describing the RAPL zone:
|
||||
// * zone_name, subzone_name: e.g. psys, dram, core, uncore, package-0
|
||||
// * zone_id: e.g. 0:1 (zone 0 sub zone 1)
|
||||
// type=socket for dram, core, uncore, package-* and type=node for psys
|
||||
// type-id=socket id
|
||||
tags map[string]string
|
||||
tags map[string]string
|
||||
energyFilepath string // path to a file containing the zones current energy counter in micro joules
|
||||
energy int64 // current reading of the energy counter in micro joules
|
||||
energyTimestamp time.Time // timestamp when energy counter was read
|
||||
maxEnergyRange int64 // Range of the above energy counter in micro-joules
|
||||
}
|
||||
|
||||
type RAPLCollector struct {
|
||||
@@ -42,40 +33,12 @@ type RAPLCollector struct {
|
||||
// * 0:1 for zone 0 subzone 1
|
||||
ExcludeByID []string `json:"exclude_device_by_id,omitempty"`
|
||||
// Exclude names for RAPL zones, e.g. psys, dram, core, uncore, package-0
|
||||
ExcludeByName []string `json:"exclude_device_by_name,omitempty"`
|
||||
SkipEnergyReading bool `json:"skip_energy_reading,omitempty"`
|
||||
SkipLimitsReading bool `json:"skip_limits_reading,omitempty"`
|
||||
OnlyEnabledLimits bool `json:"only_enabled_limits,omitempty"`
|
||||
ExcludeByName []string `json:"exclude_device_by_name,omitempty"`
|
||||
}
|
||||
raplZoneInfo []RAPLZoneInfo
|
||||
RAPLZoneInfo []RAPLZoneInfo
|
||||
meta map[string]string // default meta information
|
||||
}
|
||||
|
||||
// Get the path to the power limit file for zone selectable by limit name
|
||||
// Common limit names for Intel systems are
|
||||
// - long_term
|
||||
// - short_term
|
||||
// Does not support AMD as AMD systems do not provide the power limits
|
||||
// through sysfs
|
||||
func ZoneLimitFile(folder string, limit_name string) string {
|
||||
nameGlob := filepath.Join(folder, "constraint_*_name")
|
||||
candidates, err := filepath.Glob(nameGlob)
|
||||
if err == nil {
|
||||
for _, c := range candidates {
|
||||
if v, err := os.ReadFile(c); err == nil {
|
||||
if strings.TrimSpace(string(v)) == limit_name {
|
||||
var i int
|
||||
n, err := fmt.Sscanf(filepath.Base(c), "constraint_%d_name", &i)
|
||||
if err == nil && n == 1 {
|
||||
return filepath.Join(folder, fmt.Sprintf("constraint_%d_power_limit_uw", i))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Init initializes the running average power limit (RAPL) collector
|
||||
func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||
|
||||
@@ -95,9 +58,6 @@ func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
// Read in the JSON configuration
|
||||
m.config.SkipEnergyReading = false
|
||||
m.config.SkipLimitsReading = false
|
||||
m.config.OnlyEnabledLimits = true
|
||||
if len(config) > 0 {
|
||||
err = json.Unmarshal(config, &m.config)
|
||||
if err != nil {
|
||||
@@ -123,62 +83,50 @@ func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||
// readZoneInfo reads RAPL monitoring attributes for a zone given by zonePath
|
||||
// See: https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes
|
||||
readZoneInfo := func(zonePath string) (z struct {
|
||||
name string // zones name e.g. psys, dram, core, uncore, package-0
|
||||
energyFilepath string // path to a file containing the zones current energy counter in micro joules
|
||||
energy int64 // current reading of the energy counter in micro joules
|
||||
energyTimestamp time.Time // timestamp when energy counter was read
|
||||
maxEnergyRange int64 // Range of the above energy counter in micro-joules
|
||||
shortTermFilepath string
|
||||
longTermFilepath string
|
||||
enabledFilepath string
|
||||
name string // zones name e.g. psys, dram, core, uncore, package-0
|
||||
energyFilepath string // path to a file containing the zones current energy counter in micro joules
|
||||
energy int64 // current reading of the energy counter in micro joules
|
||||
energyTimestamp time.Time // timestamp when energy counter was read
|
||||
maxEnergyRange int64 // Range of the above energy counter in micro-joules
|
||||
ok bool // Are all information available?
|
||||
}) {
|
||||
// zones name e.g. psys, dram, core, uncore, package-0
|
||||
|
||||
foundName := false
|
||||
if v, err :=
|
||||
os.ReadFile(
|
||||
filepath.Join(zonePath, "name")); err == nil {
|
||||
foundName = true
|
||||
z.name = strings.TrimSpace(string(v))
|
||||
}
|
||||
|
||||
if os.Geteuid() == 0 && (!m.config.SkipEnergyReading) {
|
||||
// path to a file containing the zones current energy counter in micro joules
|
||||
z.energyFilepath = filepath.Join(zonePath, "energy_uj")
|
||||
// current reading of the energy counter in micro joules
|
||||
if v, err := os.ReadFile(z.energyFilepath); err == nil {
|
||||
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||
z.energy = i
|
||||
// timestamp when energy counter was read
|
||||
z.energyTimestamp = time.Now()
|
||||
}
|
||||
} else {
|
||||
cclog.ComponentError(m.name, "Cannot read energy file for ", z.name, ":", err.Error())
|
||||
// path to a file containing the zones current energy counter in micro joules
|
||||
z.energyFilepath = filepath.Join(zonePath, "energy_uj")
|
||||
|
||||
// current reading of the energy counter in micro joules
|
||||
foundEnergy := false
|
||||
if v, err := os.ReadFile(z.energyFilepath); err == nil {
|
||||
// timestamp when energy counter was read
|
||||
z.energyTimestamp = time.Now()
|
||||
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||
foundEnergy = true
|
||||
z.energy = i
|
||||
}
|
||||
// Range of the above energy counter in micro-joules
|
||||
if v, err :=
|
||||
os.ReadFile(
|
||||
filepath.Join(zonePath, "max_energy_range_uj")); err == nil {
|
||||
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||
z.maxEnergyRange = i
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cclog.ComponentInfo(m.name, "Energy readings for", zonePath, "disabled")
|
||||
}
|
||||
|
||||
if !m.config.SkipLimitsReading {
|
||||
z.shortTermFilepath = ZoneLimitFile(zonePath, "short_term")
|
||||
if _, err := os.Stat(z.shortTermFilepath); err != nil {
|
||||
z.shortTermFilepath = ""
|
||||
// Range of the above energy counter in micro-joules
|
||||
foundMaxEnergyRange := false
|
||||
if v, err :=
|
||||
os.ReadFile(
|
||||
filepath.Join(zonePath, "max_energy_range_uj")); err == nil {
|
||||
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||
foundMaxEnergyRange = true
|
||||
z.maxEnergyRange = i
|
||||
}
|
||||
z.longTermFilepath = ZoneLimitFile(zonePath, "long_term")
|
||||
if _, err := os.Stat(z.longTermFilepath); err != nil {
|
||||
z.longTermFilepath = ""
|
||||
}
|
||||
z.enabledFilepath = filepath.Join(zonePath, "enabled")
|
||||
} else {
|
||||
cclog.ComponentInfo(m.name, "Power limit readings for", zonePath, "disabled")
|
||||
}
|
||||
|
||||
// Are all information available?
|
||||
z.ok = foundName && foundEnergy && foundMaxEnergyRange
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -195,42 +143,25 @@ func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||
|
||||
for _, zonePath := range zonesPath {
|
||||
zoneID := strings.TrimPrefix(zonePath, zonePrefix)
|
||||
zonetags := make(map[string]string)
|
||||
|
||||
z := readZoneInfo(zonePath)
|
||||
if !isIDExcluded[zoneID] &&
|
||||
if z.ok &&
|
||||
!isIDExcluded[zoneID] &&
|
||||
!isNameExcluded[z.name] {
|
||||
|
||||
si := RAPLZoneInfo{
|
||||
tags: make(map[string]string),
|
||||
energyFilepath: z.energyFilepath,
|
||||
energy: z.energy,
|
||||
energyTimestamp: z.energyTimestamp,
|
||||
maxEnergyRange: z.maxEnergyRange,
|
||||
shortTermFilepath: z.shortTermFilepath,
|
||||
longTermFilepath: z.longTermFilepath,
|
||||
enabledFilepath: z.enabledFilepath,
|
||||
name: z.name,
|
||||
}
|
||||
si.tags["type"] = "node"
|
||||
si.tags["type-id"] = "0"
|
||||
var pid int = 0
|
||||
if strings.HasPrefix(z.name, "package-") {
|
||||
n, err := fmt.Sscanf(z.name, "package-%d", &pid)
|
||||
if err == nil && n == 1 {
|
||||
si.tags["type-id"] = fmt.Sprintf("%d", pid)
|
||||
si.tags["type"] = "socket"
|
||||
}
|
||||
si.name = "pkg"
|
||||
}
|
||||
// Add RAPL monitoring attributes for a zone
|
||||
if _, ok1 := si.tags["type"]; ok1 {
|
||||
if _, ok2 := si.tags["type-id"]; ok2 {
|
||||
m.raplZoneInfo = append(m.raplZoneInfo, si)
|
||||
zonetags["type"] = si.tags["type"]
|
||||
zonetags["type-id"] = si.tags["type-id"]
|
||||
}
|
||||
}
|
||||
m.RAPLZoneInfo =
|
||||
append(
|
||||
m.RAPLZoneInfo,
|
||||
RAPLZoneInfo{
|
||||
tags: map[string]string{
|
||||
"id": zoneID,
|
||||
"zone_name": z.name,
|
||||
},
|
||||
energyFilepath: z.energyFilepath,
|
||||
energy: z.energy,
|
||||
energyTimestamp: z.energyTimestamp,
|
||||
maxEnergyRange: z.maxEnergyRange,
|
||||
})
|
||||
}
|
||||
|
||||
// find all sub zones for the given zone
|
||||
@@ -243,32 +174,29 @@ func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||
for _, subZonePath := range subZonesPath {
|
||||
subZoneID := strings.TrimPrefix(subZonePath, subZonePrefix)
|
||||
sz := readZoneInfo(subZonePath)
|
||||
|
||||
if len(zoneID) > 0 && len(z.name) > 0 &&
|
||||
sz.ok &&
|
||||
!isIDExcluded[zoneID+":"+subZoneID] &&
|
||||
!isNameExcluded[sz.name] {
|
||||
|
||||
si := RAPLZoneInfo{
|
||||
tags: zonetags,
|
||||
energyFilepath: sz.energyFilepath,
|
||||
energy: sz.energy,
|
||||
energyTimestamp: sz.energyTimestamp,
|
||||
maxEnergyRange: sz.maxEnergyRange,
|
||||
shortTermFilepath: sz.shortTermFilepath,
|
||||
longTermFilepath: sz.longTermFilepath,
|
||||
enabledFilepath: sz.enabledFilepath,
|
||||
name: sz.name,
|
||||
}
|
||||
if _, ok1 := si.tags["type"]; ok1 {
|
||||
if _, ok2 := si.tags["type-id"]; ok2 {
|
||||
m.raplZoneInfo = append(m.raplZoneInfo, si)
|
||||
}
|
||||
}
|
||||
m.RAPLZoneInfo =
|
||||
append(
|
||||
m.RAPLZoneInfo,
|
||||
RAPLZoneInfo{
|
||||
tags: map[string]string{
|
||||
"id": zoneID + ":" + subZoneID,
|
||||
"zone_name": z.name,
|
||||
"sub_zone_name": sz.name,
|
||||
},
|
||||
energyFilepath: sz.energyFilepath,
|
||||
energy: sz.energy,
|
||||
energyTimestamp: sz.energyTimestamp,
|
||||
maxEnergyRange: sz.maxEnergyRange,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if m.raplZoneInfo == nil {
|
||||
if m.RAPLZoneInfo == nil {
|
||||
return fmt.Errorf("no running average power limit (RAPL) device found in %s", controlTypePath)
|
||||
|
||||
}
|
||||
@@ -277,7 +205,7 @@ func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||
cclog.ComponentDebug(
|
||||
m.name,
|
||||
"initialized",
|
||||
len(m.raplZoneInfo),
|
||||
len(m.RAPLZoneInfo),
|
||||
"zones with running average power limit (RAPL) monitoring attributes")
|
||||
m.init = true
|
||||
|
||||
@@ -288,89 +216,40 @@ func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||
// See: https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes
|
||||
func (m *RAPLCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
|
||||
for i := range m.raplZoneInfo {
|
||||
p := &m.raplZoneInfo[i]
|
||||
for i := range m.RAPLZoneInfo {
|
||||
p := &m.RAPLZoneInfo[i]
|
||||
|
||||
if os.Geteuid() == 0 && (!m.config.SkipEnergyReading) {
|
||||
// Read current value of the energy counter in micro joules
|
||||
if v, err := os.ReadFile(p.energyFilepath); err == nil {
|
||||
energyTimestamp := time.Now()
|
||||
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||
energy := i
|
||||
// Read current value of the energy counter in micro joules
|
||||
if v, err := os.ReadFile(p.energyFilepath); err == nil {
|
||||
energyTimestamp := time.Now()
|
||||
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||
energy := i
|
||||
|
||||
// Compute average power (Δ energy / Δ time)
|
||||
energyDiff := energy - p.energy
|
||||
if energyDiff < 0 {
|
||||
// Handle overflow:
|
||||
// ( p.maxEnergyRange - p.energy ) + energy
|
||||
// = p.maxEnergyRange + ( energy - p.energy )
|
||||
// = p.maxEnergyRange + diffEnergy
|
||||
energyDiff += p.maxEnergyRange
|
||||
}
|
||||
timeDiff := energyTimestamp.Sub(p.energyTimestamp)
|
||||
averagePower := float64(energyDiff) / float64(timeDiff.Microseconds())
|
||||
|
||||
y, err := lp.NewMetric(
|
||||
fmt.Sprintf("rapl_%s_average_power", p.name),
|
||||
p.tags,
|
||||
m.meta,
|
||||
averagePower,
|
||||
energyTimestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
e, err := lp.NewMetric(
|
||||
fmt.Sprintf("rapl_%s_energy", p.name),
|
||||
p.tags,
|
||||
m.meta,
|
||||
float64(energyDiff)*1e-3,
|
||||
energyTimestamp)
|
||||
if err == nil {
|
||||
e.AddMeta("unit", "Joules")
|
||||
output <- e
|
||||
}
|
||||
|
||||
// Save current energy counter state
|
||||
p.energy = energy
|
||||
p.energyTimestamp = energyTimestamp
|
||||
// Compute average power (Δ energy / Δ time)
|
||||
energyDiff := energy - p.energy
|
||||
if energyDiff < 0 {
|
||||
// Handle overflow:
|
||||
// ( p.maxEnergyRange - p.energy ) + energy
|
||||
// = p.maxEnergyRange + ( energy - p.energy )
|
||||
// = p.maxEnergyRange + diffEnergy
|
||||
energyDiff += p.maxEnergyRange
|
||||
}
|
||||
}
|
||||
}
|
||||
// https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#constraints
|
||||
if !m.config.SkipLimitsReading {
|
||||
skip := false
|
||||
if m.config.OnlyEnabledLimits {
|
||||
if v, err := os.ReadFile(p.enabledFilepath); err == nil {
|
||||
if strings.TrimSpace(string(v)) == "0" {
|
||||
skip = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if !skip {
|
||||
if len(p.shortTermFilepath) > 0 {
|
||||
if v, err := os.ReadFile(p.shortTermFilepath); err == nil {
|
||||
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||
name := fmt.Sprintf("rapl_%s_limit_short_term", p.name)
|
||||
y, err := lp.NewMetric(name, p.tags, m.meta, i/1e6, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
timeDiff := energyTimestamp.Sub(p.energyTimestamp)
|
||||
averagePower := float64(energyDiff) / float64(timeDiff.Microseconds())
|
||||
|
||||
y, err := lp.NewMessage(
|
||||
"rapl_average_power",
|
||||
p.tags,
|
||||
m.meta,
|
||||
map[string]interface{}{"value": averagePower},
|
||||
energyTimestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
if len(p.longTermFilepath) > 0 {
|
||||
if v, err := os.ReadFile(p.longTermFilepath); err == nil {
|
||||
if i, err := strconv.ParseInt(strings.TrimSpace(string(v)), 10, 64); err == nil {
|
||||
name := fmt.Sprintf("rapl_%s_limit_long_term", p.name)
|
||||
y, err := lp.NewMetric(name, p.tags, m.meta, i/1e6, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Save current energy counter state
|
||||
p.energy = energy
|
||||
p.energyTimestamp = energyTimestamp
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,23 +1,15 @@
|
||||
## `rapl` collector
|
||||
|
||||
This collector reads running average power limit (RAPL) monitoring attributes to compute average power consumption metrics. See <https://www.kernel.org/doc/html/latest/power/powercap/powercap.html>.
|
||||
This collector reads running average power limit (RAPL) monitoring attributes to compute average power consumption metrics. See <https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes>.
|
||||
|
||||
The Likwid metric collector provides similar functionality.
|
||||
|
||||
```json
|
||||
"rapl": {
|
||||
"exclude_device_by_id": ["0:1", "0:2"],
|
||||
"exclude_device_by_name": ["psys"],
|
||||
"skip_energy_reading": false,
|
||||
"skip_limits_reading": false,
|
||||
"only_enabled_limits": true
|
||||
"exclude_device_by_name": ["psys"]
|
||||
}
|
||||
```
|
||||
|
||||
Metrics:
|
||||
* `rapl_<domain>_average_power`: average power consumption in Watt. The average is computed over the entire runtime from the last measurement to the current measurement
|
||||
* `rapl_<domain>_energy`: Difference from the last measurement
|
||||
* `rapl_<domain>_limit_short_term`: Short term powercap setting for the domain
|
||||
* `rapl_<domain>_limit_long_term`: Long term powercap setting for the domain
|
||||
|
||||
Only the `rapl_<domain>_average_power` and `rapl_<domain>_energy` metrics require root-permissions. The limits can be read as user. Some domains have limits available but they are not enabled. By default, only enabled domain limits are collected.
|
||||
|
||||
Energy and power measurments can also be done with the Likwid metric collector.
|
||||
* `rapl_average_power`: average power consumption in Watt. The average is computed over the entire runtime from the last measurement to the current measurement
|
||||
|
36
go.mod
36
go.mod
@@ -1,12 +1,11 @@
|
||||
module github.com/ClusterCockpit/cc-metric-collector
|
||||
|
||||
go 1.23.0
|
||||
go 1.21.1
|
||||
|
||||
toolchain go1.23.2
|
||||
toolchain go1.22.1
|
||||
|
||||
require (
|
||||
github.com/ClusterCockpit/cc-energy-manager v0.0.0-20240709142550-dd446f7ab900
|
||||
github.com/ClusterCockpit/cc-lib v0.1.0-beta.1
|
||||
github.com/ClusterCockpit/cc-units v0.4.0
|
||||
github.com/ClusterCockpit/go-rocm-smi v0.3.0
|
||||
github.com/NVIDIA/go-nvml v0.12.0-2
|
||||
@@ -14,36 +13,33 @@ require (
|
||||
github.com/expr-lang/expr v1.16.9
|
||||
github.com/fsnotify/fsnotify v1.7.0
|
||||
github.com/gorilla/mux v1.8.1
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.13.0
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1
|
||||
github.com/nats-io/nats.go v1.39.0
|
||||
github.com/prometheus/client_golang v1.20.5
|
||||
github.com/nats-io/nats.go v1.36.0
|
||||
github.com/prometheus/client_golang v1.19.0
|
||||
github.com/stmcginnis/gofish v0.15.0
|
||||
github.com/tklauser/go-sysconf v0.3.13
|
||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1
|
||||
golang.org/x/exp v0.0.0-20250215185904-eff6e970281f
|
||||
golang.org/x/sys v0.28.0
|
||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8
|
||||
golang.org/x/sys v0.18.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/ClusterCockpit/cc-backend v1.4.2 // indirect
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/klauspost/compress v1.17.9 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/nats-io/nkeys v0.4.9 // indirect
|
||||
github.com/klauspost/compress v1.17.7 // indirect
|
||||
github.com/nats-io/nkeys v0.4.7 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/common v0.55.0 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 // indirect
|
||||
github.com/prometheus/client_model v0.6.0 // indirect
|
||||
github.com/prometheus/common v0.49.0 // indirect
|
||||
github.com/prometheus/procfs v0.12.0 // indirect
|
||||
github.com/shopspring/decimal v1.3.1 // indirect
|
||||
github.com/tklauser/numcpus v0.7.0 // indirect
|
||||
golang.org/x/crypto v0.31.0 // indirect
|
||||
golang.org/x/net v0.31.0 // indirect
|
||||
google.golang.org/protobuf v1.35.2 // indirect
|
||||
golang.org/x/crypto v0.21.0 // indirect
|
||||
golang.org/x/net v0.22.0 // indirect
|
||||
google.golang.org/protobuf v1.33.0 // indirect
|
||||
)
|
||||
|
69
go.sum
69
go.sum
@@ -1,9 +1,5 @@
|
||||
github.com/ClusterCockpit/cc-backend v1.4.2 h1:kTOzqkh9N0564N9nqQThnSs7TAfg8RLgvSm00e5HtIc=
|
||||
github.com/ClusterCockpit/cc-backend v1.4.2/go.mod h1:g8TNHXe4AXej26snu2//jO3mUF980elT93iV/k11O/c=
|
||||
github.com/ClusterCockpit/cc-energy-manager v0.0.0-20240709142550-dd446f7ab900 h1:6+WNav16uWTEDC09hkZKEHfBhtc91p/ZcjgCtyntuIg=
|
||||
github.com/ClusterCockpit/cc-energy-manager v0.0.0-20240709142550-dd446f7ab900/go.mod h1:EbYeC5t+Y0kW1Q1pP2n9zMqbeYEJITG8YGvAUihXVn4=
|
||||
github.com/ClusterCockpit/cc-lib v0.1.0-beta.1 h1:dz9j0g2cod8+SMDjuoIY6ISpiHHeekhX6yQaeiwiwJw=
|
||||
github.com/ClusterCockpit/cc-lib v0.1.0-beta.1/go.mod h1:kXMskla1i5ZSfXW0vVRIHgGeXMU5zu2PzYOYnUaOr80=
|
||||
github.com/ClusterCockpit/cc-units v0.4.0 h1:zP5DOu99GmErW0tCDf0gcLrlWt42RQ9dpoONEOh4cI0=
|
||||
github.com/ClusterCockpit/cc-units v0.4.0/go.mod h1:3S3PAhAayS3pbgcT4q9Vn9VJw22Op51X0YimtG77zBw=
|
||||
github.com/ClusterCockpit/go-rocm-smi v0.3.0 h1:1qZnSpG7/NyLtc7AjqnUL9Jb8xtqG1nMVgp69rJfaR8=
|
||||
@@ -21,8 +17,8 @@ github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
|
||||
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
@@ -43,8 +39,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0 h1:AjbBfJuq+QoaXNcrova8smSjwJdUHnwvfjMF71M1iI4=
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI=
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.13.0 h1:ioBbLmR5NMbAjP4UVA5r9b5xGjpABD7j65pI8kFphDM=
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.13.0/go.mod h1:k+spCbt9hcvqvUiz0sr5D8LolXHqAAOfPw9v/RIRHl4=
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
|
||||
github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98=
|
||||
@@ -55,8 +51,8 @@ github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxks
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
|
||||
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
||||
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
|
||||
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
|
||||
github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg=
|
||||
github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
@@ -64,14 +60,10 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/nats-io/nats.go v1.39.0 h1:2/yg2JQjiYYKLwDuBzV0FbB2sIV+eFNkEevlRi4n9lI=
|
||||
github.com/nats-io/nats.go v1.39.0/go.mod h1:MgRb8oOdigA6cYpEPhXJuRVH6UE/V4jblJ2jQ27IXYM=
|
||||
github.com/nats-io/nkeys v0.4.9 h1:qe9Faq2Gxwi6RZnZMXfmGMZkg3afLLOtrU+gDZJ35b0=
|
||||
github.com/nats-io/nkeys v0.4.9/go.mod h1:jcMqs+FLG+W5YO36OX6wFIFcmpdAns+w1Wm6D3I/evE=
|
||||
github.com/nats-io/nats.go v1.36.0 h1:suEUPuWzTSse/XhESwqLxXGuj8vGRuPRoG7MoRN/qyU=
|
||||
github.com/nats-io/nats.go v1.36.0/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8=
|
||||
github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI=
|
||||
github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc=
|
||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
||||
@@ -79,18 +71,16 @@ github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmt
|
||||
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
|
||||
github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
|
||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
|
||||
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
|
||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||
github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU=
|
||||
github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k=
|
||||
github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos=
|
||||
github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8=
|
||||
github.com/prometheus/common v0.49.0 h1:ToNTdK4zSnPVJmh698mGFkDor9wBI/iGaJy5dbH1EgI=
|
||||
github.com/prometheus/common v0.49.0/go.mod h1:Kxm+EULxRbUkjGU6WFsQqo3ORzB4tyKvlWFOE9mB2sE=
|
||||
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
|
||||
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
|
||||
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4=
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY=
|
||||
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
|
||||
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
|
||||
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
||||
@@ -102,27 +92,26 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/tklauser/go-sysconf v0.3.13 h1:GBUpcahXSpR2xN01jhkNAbTLRk2Yzgggk8IM08lq3r4=
|
||||
github.com/tklauser/go-sysconf v0.3.13/go.mod h1:zwleP4Q4OehZHGn4CYZDipCgg9usW5IJePewFCGVEa0=
|
||||
github.com/tklauser/numcpus v0.7.0 h1:yjuerZP127QG9m5Zh/mSO4wqurYil27tHrqwRoRjpr4=
|
||||
github.com/tklauser/numcpus v0.7.0/go.mod h1:bb6dMVcj8A42tSE7i32fsIUCbQNllK5iDguyOZRUzAY=
|
||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1 h1:P7S/GeHBAFEZIYp0ePPs2kHXoazz8q2KsyxHyQVGCJg=
|
||||
golang.design/x/thread v0.0.0-20210122121316-335e9adffdf1/go.mod h1:9CWpnTUmlQkfdpdutA1nNf4iE5lAVt3QZOu0Z6hahBE=
|
||||
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
|
||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||
golang.org/x/exp v0.0.0-20250215185904-eff6e970281f h1:oFMYAjX0867ZD2jcNiLBrI9BdpmEkvPyi5YrBGXbamg=
|
||||
golang.org/x/exp v0.0.0-20250215185904-eff6e970281f/go.mod h1:BHOTPb3L19zxehTsLoJXVaTktb06DFgmdW6Wb9s8jqk=
|
||||
golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo=
|
||||
golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM=
|
||||
golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
|
||||
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
|
||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY=
|
||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI=
|
||||
golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
|
||||
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||
golang.org/x/sys v0.0.0-20210122093101-04d7465088b8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
|
||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
|
||||
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
|
||||
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
|
||||
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
@@ -40,7 +40,7 @@ type metricRouterConfig struct {
|
||||
NormalizeUnits bool `json:"normalize_units"` // Check unit meta flag and normalize it using cc-units
|
||||
ChangeUnitPrefix map[string]string `json:"change_unit_prefix"` // Add prefix that should be applied to the metrics
|
||||
// dropMetrics map[string]bool // Internal map for O(1) lookup
|
||||
MessageProcessor json.RawMessage `json:"process_messages,omitempty"`
|
||||
MessageProcessor json.RawMessage `json:"process_message,omitempty"`
|
||||
}
|
||||
|
||||
// Metric router data structure
|
||||
|
Reference in New Issue
Block a user