mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2025-07-31 08:56:06 +02:00
Ccmessage migration (#119)
* Add cpu_used (all-cpu_idle) to CpustatCollector * Update cc-metric-collector.init * Allow selection of timestamp precision in HttpSink * Add comment about precision requirement for cc-metric-store * Fix for API changes in gofish@v0.15.0 * Update requirements to latest version * Read sensors through redfish * Update golang toolchain to 1.21 * Remove stray error check * Update main config in configuration.md * Update Release action to use golang 1.22 stable release, no golang RPMs anymore * Update runonce action to use golang 1.22 stable release, no golang RPMs anymore * Switch to CCMessage for all files. --------- Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu> Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
This commit is contained in:
@@ -15,7 +15,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
const DEFAULT_BEEGFS_CMD = "beegfs-ctl"
|
||||
@@ -110,7 +110,7 @@ func (m *BeegfsMetaCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -216,7 +216,7 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
|
||||
|
||||
for key, data := range m.matches {
|
||||
value, _ := strconv.ParseFloat(data, 32)
|
||||
y, err := lp.New(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
y, err := lp.NewMessage(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
@@ -15,7 +15,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// Struct for the collector-specific JSON config
|
||||
@@ -103,7 +103,7 @@ func (m *BeegfsStorageCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -208,7 +208,7 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
|
||||
|
||||
for key, data := range m.matches {
|
||||
value, _ := strconv.ParseFloat(data, 32)
|
||||
y, err := lp.New(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
y, err := lp.NewMessage(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
@@ -7,7 +7,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
|
||||
)
|
||||
|
||||
@@ -47,7 +47,7 @@ var AvailableCollectors = map[string]MetricCollector{
|
||||
type collectorManager struct {
|
||||
collectors []MetricCollector // List of metric collectors to read in parallel
|
||||
serial []MetricCollector // List of metric collectors to read serially
|
||||
output chan lp.CCMetric // Output channels
|
||||
output chan lp.CCMessage // Output channels
|
||||
done chan bool // channel to finish / stop metric collector manager
|
||||
ticker mct.MultiChanTicker // periodically ticking once each interval
|
||||
duration time.Duration // duration (for metrics that measure over a given duration)
|
||||
@@ -60,7 +60,7 @@ type collectorManager struct {
|
||||
// Metric collector manager access functions
|
||||
type CollectorManager interface {
|
||||
Init(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) error
|
||||
AddOutput(output chan lp.CCMetric)
|
||||
AddOutput(output chan lp.CCMessage)
|
||||
Start()
|
||||
Close()
|
||||
}
|
||||
@@ -187,7 +187,7 @@ func (cm *collectorManager) Start() {
|
||||
}
|
||||
|
||||
// AddOutput adds the output channel to the metric collector manager
|
||||
func (cm *collectorManager) AddOutput(output chan lp.CCMetric) {
|
||||
func (cm *collectorManager) AddOutput(output chan lp.CCMessage) {
|
||||
cm.output = output
|
||||
}
|
||||
|
||||
|
@@ -10,8 +10,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
)
|
||||
|
||||
// CPUFreqCollector
|
||||
@@ -112,14 +112,14 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// Check if at least one CPU with frequency information was detected
|
||||
if len(m.topology) == 0 {
|
||||
return fmt.Errorf("No CPU frequency info found in %s", cpuInfoFile)
|
||||
return fmt.Errorf("no CPU frequency info found in %s", cpuInfoFile)
|
||||
}
|
||||
|
||||
m.init = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
// Check if already initialized
|
||||
if !m.init {
|
||||
return
|
||||
@@ -154,7 +154,7 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CC
|
||||
fmt.Sprintf("Read(): Failed to convert cpu MHz '%s' to float64: %v", lineSplit[1], err))
|
||||
return
|
||||
}
|
||||
if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now); err == nil {
|
||||
if y, err := lp.NewMessage("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now); err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
"github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
@@ -91,7 +91,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
// Check if already initialized
|
||||
if !m.init {
|
||||
return
|
||||
@@ -117,7 +117,7 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
continue
|
||||
}
|
||||
|
||||
if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now); err == nil {
|
||||
if y, err := lp.NewMessage("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now); err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
sysconf "github.com/tklauser/go-sysconf"
|
||||
)
|
||||
|
||||
@@ -105,7 +105,7 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]string, output chan lp.CCMetric, now time.Time, tsdelta time.Duration) {
|
||||
func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]string, output chan lp.CCMessage, now time.Time, tsdelta time.Duration) {
|
||||
values := make(map[string]float64)
|
||||
clktck, _ := sysconf.Sysconf(sysconf.SC_CLK_TCK)
|
||||
for match, index := range m.matches {
|
||||
@@ -122,21 +122,21 @@ func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]st
|
||||
sum := float64(0)
|
||||
for name, value := range values {
|
||||
sum += value
|
||||
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value * 100}, now)
|
||||
y, err := lp.NewMessage(name, tags, m.meta, map[string]interface{}{"value": value * 100}, now)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if v, ok := values["cpu_idle"]; ok {
|
||||
sum -= v
|
||||
y, err := lp.New("cpu_used", tags, m.meta, map[string]interface{}{"value": sum * 100}, now)
|
||||
y, err := lp.NewMessage("cpu_used", tags, m.meta, map[string]interface{}{"value": sum * 100}, now)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -162,7 +162,7 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
}
|
||||
}
|
||||
|
||||
num_cpus_metric, err := lp.New("num_cpus",
|
||||
num_cpus_metric, err := lp.NewMessage("num_cpus",
|
||||
m.nodetags,
|
||||
m.meta,
|
||||
map[string]interface{}{"value": int(num_cpus)},
|
||||
|
@@ -9,7 +9,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
influx "github.com/influxdata/line-protocol"
|
||||
)
|
||||
|
||||
@@ -75,7 +75,7 @@ var DefaultTime = func() time.Time {
|
||||
return time.Unix(42, 0)
|
||||
}
|
||||
|
||||
func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// "log"
|
||||
@@ -48,7 +48,7 @@ func (m *DiskstatCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -92,13 +92,13 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric
|
||||
}
|
||||
tags := map[string]string{"type": "node", "device": linefields[0]}
|
||||
total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000000000)
|
||||
y, err := lp.New("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now())
|
||||
y, err := lp.NewMessage("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "GBytes")
|
||||
output <- y
|
||||
}
|
||||
free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000000000)
|
||||
y, err = lp.New("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now())
|
||||
y, err = lp.NewMessage("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "GBytes")
|
||||
output <- y
|
||||
@@ -110,7 +110,7 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric
|
||||
}
|
||||
}
|
||||
}
|
||||
y, err := lp.New("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": int(part_max_used)}, time.Now())
|
||||
y, err := lp.NewMessage("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": int(part_max_used)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "percent")
|
||||
output <- y
|
||||
|
@@ -14,7 +14,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
const DEFAULT_GPFS_CMD = "mmpmon"
|
||||
@@ -94,7 +94,7 @@ func (m *GpfsCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
// Check if already initialized
|
||||
if !m.init {
|
||||
return
|
||||
@@ -218,7 +218,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
continue
|
||||
}
|
||||
if y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
"gpfs_bytes_read",
|
||||
m.tags,
|
||||
m.meta,
|
||||
@@ -234,7 +234,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if lastBytesRead := m.lastState[filesystem].bytesRead; lastBytesRead >= 0 {
|
||||
bwRead := float64(bytesRead-lastBytesRead) / timeDiff
|
||||
if y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
"gpfs_bw_read",
|
||||
m.tags,
|
||||
m.meta,
|
||||
@@ -258,7 +258,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
continue
|
||||
}
|
||||
if y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
"gpfs_bytes_written",
|
||||
m.tags,
|
||||
m.meta,
|
||||
@@ -274,7 +274,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if lastBytesWritten := m.lastState[filesystem].bytesRead; lastBytesWritten >= 0 {
|
||||
bwWrite := float64(bytesWritten-lastBytesWritten) / timeDiff
|
||||
if y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
"gpfs_bw_write",
|
||||
m.tags,
|
||||
m.meta,
|
||||
@@ -304,7 +304,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
fmt.Sprintf("Read(): Failed to convert number of opens '%s' to int64: %v", key_value["_oc_"], err))
|
||||
continue
|
||||
}
|
||||
if y, err := lp.New("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
|
||||
if y, err := lp.NewMessage("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@@ -316,7 +316,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
fmt.Sprintf("Read(): Failed to convert number of closes: '%s' to int64: %v", key_value["_cc_"], err))
|
||||
continue
|
||||
}
|
||||
if y, err := lp.New("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
|
||||
if y, err := lp.NewMessage("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@@ -328,7 +328,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
fmt.Sprintf("Read(): Failed to convert number of reads: '%s' to int64: %v", key_value["_rdc_"], err))
|
||||
continue
|
||||
}
|
||||
if y, err := lp.New("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
|
||||
if y, err := lp.NewMessage("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@@ -340,7 +340,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
fmt.Sprintf("Read(): Failed to convert number of writes: '%s' to int64: %v", key_value["_wc_"], err))
|
||||
continue
|
||||
}
|
||||
if y, err := lp.New("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
|
||||
if y, err := lp.NewMessage("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@@ -352,7 +352,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
fmt.Sprintf("Read(): Failed to convert number of read directories: '%s' to int64: %v", key_value["_dir_"], err))
|
||||
continue
|
||||
}
|
||||
if y, err := lp.New("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
|
||||
if y, err := lp.NewMessage("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@@ -364,7 +364,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
fmt.Sprintf("Read(): Failed to convert number of inode updates: '%s' to int: %v", key_value["_iu_"], err))
|
||||
continue
|
||||
}
|
||||
if y, err := lp.New("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
|
||||
if y, err := lp.NewMessage("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
||||
@@ -372,7 +372,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if m.config.SendTotalValues {
|
||||
bytesTotal := bytesRead + bytesWritten
|
||||
if y, err :=
|
||||
lp.New("gpfs_bytes_total",
|
||||
lp.NewMessage("gpfs_bytes_total",
|
||||
m.tags,
|
||||
m.meta,
|
||||
map[string]interface{}{
|
||||
@@ -385,7 +385,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
}
|
||||
iops := numReads + numWrites
|
||||
if y, err :=
|
||||
lp.New("gpfs_iops",
|
||||
lp.NewMessage("gpfs_iops",
|
||||
m.tags,
|
||||
m.meta,
|
||||
map[string]interface{}{
|
||||
@@ -397,7 +397,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
}
|
||||
metaops := numInodeUpdates + numCloses + numOpens + numReaddirs
|
||||
if y, err :=
|
||||
lp.New("gpfs_metaops",
|
||||
lp.NewMessage("gpfs_metaops",
|
||||
m.tags,
|
||||
m.meta,
|
||||
map[string]interface{}{
|
||||
|
@@ -5,7 +5,7 @@ import (
|
||||
"os"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"encoding/json"
|
||||
@@ -182,7 +182,7 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
|
||||
}
|
||||
|
||||
// Read reads Infiniband counter files below IB_BASEPATH
|
||||
func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
|
||||
// Check if already initialized
|
||||
if !m.init {
|
||||
@@ -230,7 +230,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
||||
// Send absolut values
|
||||
if m.config.SendAbsoluteValues {
|
||||
if y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
counterDef.name,
|
||||
info.tagSet,
|
||||
m.meta,
|
||||
@@ -248,7 +248,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
||||
if counterDef.lastState >= 0 {
|
||||
rate := float64((counterDef.currentState - counterDef.lastState)) / timeDiff
|
||||
if y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
counterDef.name+"_bw",
|
||||
info.tagSet,
|
||||
m.meta,
|
||||
@@ -278,7 +278,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
||||
// Send total values
|
||||
if m.config.SendTotalValues {
|
||||
if y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
"ib_total",
|
||||
info.tagSet,
|
||||
m.meta,
|
||||
@@ -291,7 +291,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
|
||||
}
|
||||
|
||||
if y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
"ib_total_pkts",
|
||||
info.tagSet,
|
||||
m.meta,
|
||||
|
@@ -5,7 +5,7 @@ import (
|
||||
"os"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
|
||||
// "log"
|
||||
"encoding/json"
|
||||
@@ -107,7 +107,7 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -139,7 +139,7 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
x, err := strconv.ParseInt(linefields[idx], 0, 64)
|
||||
if err == nil {
|
||||
diff := x - entry.lastValues[name]
|
||||
y, err := lp.New(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
|
||||
y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
@@ -14,7 +14,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
const IPMISENSORS_PATH = `ipmi-sensors`
|
||||
@@ -83,7 +83,7 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
|
||||
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
|
||||
|
||||
// Setup ipmitool command
|
||||
command := exec.Command(cmd, "sensor")
|
||||
@@ -121,7 +121,7 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
|
||||
unit = "Watts"
|
||||
}
|
||||
|
||||
y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
|
||||
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", unit)
|
||||
output <- y
|
||||
@@ -141,7 +141,7 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
|
||||
}
|
||||
}
|
||||
|
||||
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
|
||||
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMessage) {
|
||||
|
||||
command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate")
|
||||
command.Wait()
|
||||
@@ -159,7 +159,7 @@ func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
|
||||
v, err := strconv.ParseFloat(lv[3], 64)
|
||||
if err == nil {
|
||||
name := strings.ToLower(strings.Replace(lv[1], " ", "_", -1))
|
||||
y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
|
||||
y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
|
||||
if err == nil {
|
||||
if len(lv) > 4 {
|
||||
y.AddMeta("unit", lv[4])
|
||||
@@ -171,7 +171,7 @@ func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
|
||||
}
|
||||
}
|
||||
|
||||
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
|
||||
// Check if already initialized
|
||||
if !m.init {
|
||||
|
@@ -24,9 +24,9 @@ import (
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||
"github.com/fsnotify/fsnotify"
|
||||
@@ -393,9 +393,9 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
if uid != uint32(os.Getuid()) {
|
||||
usr, err := user.LookupId(fmt.Sprint(uid))
|
||||
if err == nil {
|
||||
return true, fmt.Errorf("Access to performance counters locked by %s", usr.Username)
|
||||
return true, fmt.Errorf("access to performance counters locked by %s", usr.Username)
|
||||
} else {
|
||||
return true, fmt.Errorf("Access to performance counters locked by %d", uid)
|
||||
return true, fmt.Errorf("access to performance counters locked by %d", uid)
|
||||
}
|
||||
}
|
||||
// Add the lock file to the watcher
|
||||
@@ -436,9 +436,7 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
gid = C.perfmon_addEventSet(evset.estr)
|
||||
}
|
||||
if gid < 0 {
|
||||
return true, fmt.Errorf("failed to add events %s, error %d", evset.go_estr, gid)
|
||||
} else {
|
||||
evset.gid = gid
|
||||
return true, fmt.Errorf("failed to add events %s, id %d, error %d", evset.go_estr, evidx, gid)
|
||||
}
|
||||
|
||||
// Setup all performance monitoring counters of an eventSet
|
||||
@@ -549,11 +547,12 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
||||
}
|
||||
|
||||
// Get all measurement results for an event set, derive the metric values out of the measurement results and send it
|
||||
func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interval time.Duration, output chan lp.CCMetric) error {
|
||||
func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interval time.Duration, output chan lp.CCMessage) error {
|
||||
invClock := float64(1.0 / m.basefreq)
|
||||
|
||||
for _, tid := range m.cpu2tid {
|
||||
evset.results[tid]["inverseClock"] = invClock
|
||||
evset.results[tid]["gotime"] = interval.Seconds()
|
||||
}
|
||||
|
||||
// Go over the event set metrics, derive the value out of the event:counter values and send it
|
||||
@@ -582,7 +581,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
|
||||
if !math.IsNaN(value) && metric.Publish {
|
||||
fields := map[string]interface{}{"value": value}
|
||||
y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
metric.Name,
|
||||
map[string]string{
|
||||
"type": metric.Type,
|
||||
@@ -619,7 +618,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
|
||||
|
||||
for coreID, value := range totalCoreValues {
|
||||
y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
metric.Name,
|
||||
map[string]string{
|
||||
"type": "core",
|
||||
@@ -656,7 +655,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
|
||||
|
||||
for socketID, value := range totalSocketValues {
|
||||
y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
metric.Name,
|
||||
map[string]string{
|
||||
"type": "socket",
|
||||
@@ -691,7 +690,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
|
||||
}
|
||||
|
||||
y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
metric.Name,
|
||||
map[string]string{
|
||||
"type": "node",
|
||||
@@ -716,7 +715,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
|
||||
}
|
||||
|
||||
// Go over the global metrics, derive the value out of the event sets' metric values and send it
|
||||
func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, interval time.Duration, output chan lp.CCMetric) error {
|
||||
func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, interval time.Duration, output chan lp.CCMessage) error {
|
||||
// Send all metrics with same time stamp
|
||||
// This function does only computiation, counter measurement is done before
|
||||
now := time.Now()
|
||||
@@ -737,6 +736,7 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter
|
||||
params[mname] = mres
|
||||
}
|
||||
}
|
||||
params["gotime"] = interval.Seconds()
|
||||
// Evaluate the metric
|
||||
value, err := agg.EvalFloat64Condition(metric.Calc, params)
|
||||
if err != nil {
|
||||
@@ -750,7 +750,7 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter
|
||||
if !math.IsNaN(value) {
|
||||
if metric.Publish {
|
||||
y, err :=
|
||||
lp.New(
|
||||
lp.NewMessage(
|
||||
metric.Name,
|
||||
map[string]string{
|
||||
"type": metric.Type,
|
||||
@@ -778,7 +778,7 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LikwidCollector) ReadThread(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *LikwidCollector) ReadThread(interval time.Duration, output chan lp.CCMessage) {
|
||||
var err error = nil
|
||||
groups := make([]LikwidEventsetConfig, 0)
|
||||
|
||||
@@ -806,7 +806,7 @@ func (m *LikwidCollector) ReadThread(interval time.Duration, output chan lp.CCMe
|
||||
}
|
||||
|
||||
// main read function taking multiple measurement rounds, each 'interval' seconds long
|
||||
func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
|
@@ -8,18 +8,16 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
)
|
||||
|
||||
//
|
||||
// LoadavgCollector collects:
|
||||
// * load average of last 1, 5 & 15 minutes
|
||||
// * number of processes currently runnable
|
||||
// * total number of processes in system
|
||||
//
|
||||
// See: https://www.kernel.org/doc/html/latest/filesystems/proc.html
|
||||
//
|
||||
const LOADAVGFILE = "/proc/loadavg"
|
||||
|
||||
type LoadavgCollector struct {
|
||||
@@ -68,17 +66,15 @@ func (m *LoadavgCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
buffer, err := os.ReadFile(LOADAVGFILE)
|
||||
if err != nil {
|
||||
if err != nil {
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to read file '%s': %v", LOADAVGFILE, err))
|
||||
}
|
||||
cclog.ComponentError(
|
||||
m.name,
|
||||
fmt.Sprintf("Read(): Failed to read file '%s': %v", LOADAVGFILE, err))
|
||||
return
|
||||
}
|
||||
now := time.Now()
|
||||
@@ -96,7 +92,7 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
if m.load_skips[i] {
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||
y, err := lp.NewMessage(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -115,7 +111,7 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
if m.proc_skips[i] {
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||
y, err := lp.NewMessage(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
const LUSTRE_SYSFS = `/sys/fs/lustre`
|
||||
@@ -377,7 +377,7 @@ func (m *LustreCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -388,7 +388,7 @@ func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
for _, def := range m.definitions {
|
||||
var use_x int64
|
||||
var err error
|
||||
var y lp.CCMetric
|
||||
var y lp.CCMessage
|
||||
x, err := getMetricData(data, def.lineprefix, def.lineoffset)
|
||||
if err == nil {
|
||||
use_x = x
|
||||
@@ -399,19 +399,19 @@ func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
switch def.calc {
|
||||
case "none":
|
||||
value = use_x
|
||||
y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
y, err = lp.NewMessage(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
case "difference":
|
||||
value = use_x - devData[def.name]
|
||||
if value.(int64) < 0 {
|
||||
value = 0
|
||||
}
|
||||
y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
y, err = lp.NewMessage(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
case "derivative":
|
||||
value = float64(use_x-devData[def.name]) / tdiff.Seconds()
|
||||
if value.(float64) < 0 {
|
||||
value = 0
|
||||
}
|
||||
y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
y, err = lp.NewMessage(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
}
|
||||
if err == nil {
|
||||
y.AddTag("device", device)
|
||||
|
@@ -13,7 +13,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
const MEMSTATFILE = "/proc/meminfo"
|
||||
@@ -159,7 +159,7 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -175,7 +175,7 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
}
|
||||
}
|
||||
|
||||
y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
y, err := lp.NewMessage(name, tags, m.meta, map[string]interface{}{"value": value}, time.Now())
|
||||
if err == nil {
|
||||
if len(unit) > 0 {
|
||||
y.AddMeta("unit", unit)
|
||||
@@ -208,7 +208,7 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
}
|
||||
}
|
||||
}
|
||||
y, err := lp.New("mem_used", tags, m.meta, map[string]interface{}{"value": memUsed}, time.Now())
|
||||
y, err := lp.NewMessage("mem_used", tags, m.meta, map[string]interface{}{"value": memUsed}, time.Now())
|
||||
if err == nil {
|
||||
if len(unit) > 0 {
|
||||
y.AddMeta("unit", unit)
|
||||
|
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
type MetricCollector interface {
|
||||
@@ -13,7 +13,7 @@ type MetricCollector interface {
|
||||
Init(config json.RawMessage) error // Initialize metric collector
|
||||
Initialized() bool // Is metric collector initialized?
|
||||
Parallel() bool
|
||||
Read(duration time.Duration, output chan lp.CCMetric) // Read metrics from metric collector
|
||||
Read(duration time.Duration, output chan lp.CCMessage) // Read metrics from metric collector
|
||||
Close() // Close / finish metric collector
|
||||
}
|
||||
|
||||
|
@@ -10,7 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
const NETSTATFILE = "/proc/net/dev"
|
||||
@@ -153,7 +153,7 @@ func (m *NetstatCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -197,14 +197,14 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
continue
|
||||
}
|
||||
if m.config.SendAbsoluteValues {
|
||||
if y, err := lp.New(metric.name, metric.tags, metric.meta, map[string]interface{}{"value": v}, now); err == nil {
|
||||
if y, err := lp.NewMessage(metric.name, metric.tags, metric.meta, map[string]interface{}{"value": v}, now); err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if m.config.SendDerivedValues {
|
||||
if metric.lastValue >= 0 {
|
||||
rate := float64(v-metric.lastValue) / timeDiff
|
||||
if y, err := lp.New(metric.name+"_bw", metric.tags, metric.meta_rates, map[string]interface{}{"value": rate}, now); err == nil {
|
||||
if y, err := lp.NewMessage(metric.name+"_bw", metric.tags, metric.meta_rates, map[string]interface{}{"value": rate}, now); err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// First part contains the code for the general NfsCollector.
|
||||
@@ -118,7 +118,7 @@ func (m *nfsCollector) MainInit(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -140,7 +140,7 @@ func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
continue
|
||||
}
|
||||
value := data.current - data.last
|
||||
y, err := lp.New(fmt.Sprintf("%s_%s", prefix, name), m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage(fmt.Sprintf("%s_%s", prefix, name), m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
y.AddMeta("version", m.version)
|
||||
output <- y
|
||||
|
@@ -10,7 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// These are the fields we read from the JSON configuration
|
||||
@@ -114,7 +114,7 @@ func (m *NfsIOStatCollector) Init(config json.RawMessage) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
timestamp := time.Now()
|
||||
|
||||
// Get the current values for all mountpoints
|
||||
@@ -126,7 +126,7 @@ func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMetri
|
||||
// Calculate the difference of old and new values
|
||||
for i := range values {
|
||||
x := values[i] - old[i]
|
||||
y, err := lp.New(fmt.Sprintf("nfsio_%s", i), m.tags, m.meta, map[string]interface{}{"value": x}, timestamp)
|
||||
y, err := lp.NewMessage(fmt.Sprintf("nfsio_%s", i), m.tags, m.meta, map[string]interface{}{"value": x}, timestamp)
|
||||
if err == nil {
|
||||
if strings.HasPrefix(i, "page") {
|
||||
y.AddMeta("unit", "4K_Pages")
|
||||
|
@@ -11,7 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// Non-Uniform Memory Access (NUMA) policy hit/miss statistics
|
||||
@@ -97,7 +97,7 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -130,7 +130,7 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMetri
|
||||
fmt.Sprintf("Read(): Failed to convert %s='%s' to int64: %v", key, split[1], err))
|
||||
continue
|
||||
}
|
||||
y, err := lp.New(
|
||||
y, err := lp.NewMessage(
|
||||
"numastats_"+key,
|
||||
t.tagSet,
|
||||
m.meta,
|
||||
|
@@ -9,7 +9,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
)
|
||||
|
||||
@@ -206,7 +206,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_fb_mem_total"] || !device.excludeMetrics["nv_fb_mem_used"] || !device.excludeMetrics["nv_fb_mem_reserved"] {
|
||||
var total uint64
|
||||
var used uint64
|
||||
@@ -222,7 +222,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
|
||||
if !device.excludeMetrics["nv_fb_mem_total"] {
|
||||
t := float64(total) / (1024 * 1024)
|
||||
y, err := lp.New("nv_fb_mem_total", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_fb_mem_total", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MByte")
|
||||
output <- y
|
||||
@@ -231,7 +231,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
|
||||
if !device.excludeMetrics["nv_fb_mem_used"] {
|
||||
f := float64(used) / (1024 * 1024)
|
||||
y, err := lp.New("nv_fb_mem_used", device.tags, device.meta, map[string]interface{}{"value": f}, time.Now())
|
||||
y, err := lp.NewMessage("nv_fb_mem_used", device.tags, device.meta, map[string]interface{}{"value": f}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MByte")
|
||||
output <- y
|
||||
@@ -240,7 +240,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
|
||||
if v2 && !device.excludeMetrics["nv_fb_mem_reserved"] {
|
||||
r := float64(reserved) / (1024 * 1024)
|
||||
y, err := lp.New("nv_fb_mem_reserved", device.tags, device.meta, map[string]interface{}{"value": r}, time.Now())
|
||||
y, err := lp.NewMessage("nv_fb_mem_reserved", device.tags, device.meta, map[string]interface{}{"value": r}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MByte")
|
||||
output <- y
|
||||
@@ -250,7 +250,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_bar1_mem_total"] || !device.excludeMetrics["nv_bar1_mem_used"] {
|
||||
meminfo, ret := nvml.DeviceGetBAR1MemoryInfo(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
@@ -259,7 +259,7 @@ func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) er
|
||||
}
|
||||
if !device.excludeMetrics["nv_bar1_mem_total"] {
|
||||
t := float64(meminfo.Bar1Total) / (1024 * 1024)
|
||||
y, err := lp.New("nv_bar1_mem_total", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_bar1_mem_total", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MByte")
|
||||
output <- y
|
||||
@@ -267,7 +267,7 @@ func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) er
|
||||
}
|
||||
if !device.excludeMetrics["nv_bar1_mem_used"] {
|
||||
t := float64(meminfo.Bar1Used) / (1024 * 1024)
|
||||
y, err := lp.New("nv_bar1_mem_used", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_bar1_mem_used", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MByte")
|
||||
output <- y
|
||||
@@ -277,7 +277,7 @@ func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) er
|
||||
return nil
|
||||
}
|
||||
|
||||
func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
err := errors.New(nvml.ErrorString(ret))
|
||||
@@ -301,14 +301,14 @@ func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
|
||||
util, ret := nvml.DeviceGetUtilizationRates(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
if !device.excludeMetrics["nv_util"] {
|
||||
y, err := lp.New("nv_util", device.tags, device.meta, map[string]interface{}{"value": float64(util.Gpu)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_util", device.tags, device.meta, map[string]interface{}{"value": float64(util.Gpu)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "%")
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !device.excludeMetrics["nv_mem_util"] {
|
||||
y, err := lp.New("nv_mem_util", device.tags, device.meta, map[string]interface{}{"value": float64(util.Memory)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_mem_util", device.tags, device.meta, map[string]interface{}{"value": float64(util.Memory)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "%")
|
||||
output <- y
|
||||
@@ -319,7 +319,7 @@ func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
func readTemp(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readTemp(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_temp"] {
|
||||
// Retrieves the current temperature readings for the device, in degrees C.
|
||||
//
|
||||
@@ -328,7 +328,7 @@ func readTemp(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
// * NVML_TEMPERATURE_COUNT
|
||||
temp, ret := nvml.DeviceGetTemperature(device.device, nvml.TEMPERATURE_GPU)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_temp", device.tags, device.meta, map[string]interface{}{"value": float64(temp)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_temp", device.tags, device.meta, map[string]interface{}{"value": float64(temp)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "degC")
|
||||
output <- y
|
||||
@@ -338,7 +338,7 @@ func readTemp(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func readFan(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_fan"] {
|
||||
// Retrieves the intended operating speed of the device's fan.
|
||||
//
|
||||
@@ -351,7 +351,7 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
// This value may exceed 100% in certain cases.
|
||||
fan, ret := nvml.DeviceGetFanSpeed(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_fan", device.tags, device.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_fan", device.tags, device.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "%")
|
||||
output <- y
|
||||
@@ -361,14 +361,14 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// func readFans(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
// func readFans(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// if !device.excludeMetrics["nv_fan"] {
|
||||
// numFans, ret := nvml.DeviceGetNumFans(device.device)
|
||||
// if ret == nvml.SUCCESS {
|
||||
// for i := 0; i < numFans; i++ {
|
||||
// fan, ret := nvml.DeviceGetFanSpeed_v2(device.device, i)
|
||||
// if ret == nvml.SUCCESS {
|
||||
// y, err := lp.New("nv_fan", device.tags, device.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
|
||||
// y, err := lp.NewMessage("nv_fan", device.tags, device.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
|
||||
// if err == nil {
|
||||
// y.AddMeta("unit", "%")
|
||||
// y.AddTag("stype", "fan")
|
||||
@@ -382,7 +382,7 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
// return nil
|
||||
// }
|
||||
|
||||
func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_ecc_mode"] {
|
||||
// Retrieves the current and pending ECC modes for the device.
|
||||
//
|
||||
@@ -393,21 +393,21 @@ func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
// The "pending" ECC mode refers to the target mode following the next reboot.
|
||||
_, ecc_pend, ret := nvml.DeviceGetEccMode(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
var y lp.CCMetric
|
||||
var y lp.CCMessage
|
||||
var err error
|
||||
switch ecc_pend {
|
||||
case nvml.FEATURE_DISABLED:
|
||||
y, err = lp.New("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "OFF"}, time.Now())
|
||||
y, err = lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "OFF"}, time.Now())
|
||||
case nvml.FEATURE_ENABLED:
|
||||
y, err = lp.New("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "ON"}, time.Now())
|
||||
y, err = lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "ON"}, time.Now())
|
||||
default:
|
||||
y, err = lp.New("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "UNKNOWN"}, time.Now())
|
||||
y, err = lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "UNKNOWN"}, time.Now())
|
||||
}
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
} else if ret == nvml.ERROR_NOT_SUPPORTED {
|
||||
y, err := lp.New("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "N/A"}, time.Now())
|
||||
y, err := lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "N/A"}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -416,7 +416,7 @@ func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_perf_state"] {
|
||||
// Retrieves the current performance state for the device.
|
||||
//
|
||||
@@ -427,7 +427,7 @@ func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
// 32: Unknown performance state.
|
||||
pState, ret := nvml.DeviceGetPerformanceState(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_perf_state", device.tags, device.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pState))}, time.Now())
|
||||
y, err := lp.NewMessage("nv_perf_state", device.tags, device.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pState))}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -436,7 +436,7 @@ func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_power_usage"] {
|
||||
// Retrieves power usage for this GPU in milliwatts and its associated circuitry (e.g. memory)
|
||||
//
|
||||
@@ -450,7 +450,7 @@ func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
if mode == nvml.FEATURE_ENABLED {
|
||||
power, ret := nvml.DeviceGetPowerUsage(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_power_usage", device.tags, device.meta, map[string]interface{}{"value": float64(power) / 1000}, time.Now())
|
||||
y, err := lp.NewMessage("nv_power_usage", device.tags, device.meta, map[string]interface{}{"value": float64(power) / 1000}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "watts")
|
||||
output <- y
|
||||
@@ -461,7 +461,7 @@ func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// Retrieves the current clock speeds for the device.
|
||||
//
|
||||
// Available clock information:
|
||||
@@ -471,7 +471,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
if !device.excludeMetrics["nv_graphics_clock"] {
|
||||
graphicsClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_GRAPHICS)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(graphicsClock)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(graphicsClock)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -482,7 +482,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
if !device.excludeMetrics["nv_sm_clock"] {
|
||||
smCock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(smCock)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(smCock)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -493,7 +493,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
if !device.excludeMetrics["nv_mem_clock"] {
|
||||
memClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(memClock)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(memClock)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -503,7 +503,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
if !device.excludeMetrics["nv_video_clock"] {
|
||||
memClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(memClock)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(memClock)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -513,7 +513,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// Retrieves the maximum clock speeds for the device.
|
||||
//
|
||||
// Available clock information:
|
||||
@@ -528,7 +528,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
if !device.excludeMetrics["nv_max_graphics_clock"] {
|
||||
max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_max_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_max_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -539,7 +539,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
if !device.excludeMetrics["nv_max_sm_clock"] {
|
||||
maxSmClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_max_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxSmClock)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_max_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxSmClock)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -550,7 +550,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
if !device.excludeMetrics["nv_max_mem_clock"] {
|
||||
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_max_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_max_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -561,7 +561,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
if !device.excludeMetrics["nv_max_video_clock"] {
|
||||
maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_max_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_max_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "MHz")
|
||||
output <- y
|
||||
@@ -571,7 +571,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_ecc_uncorrected_error"] {
|
||||
// Retrieves the total ECC error counts for the device.
|
||||
//
|
||||
@@ -584,7 +584,7 @@ func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
// i.e. the total set of errors across the entire device.
|
||||
ecc_db, ret := nvml.DeviceGetTotalEccErrors(device.device, nvml.MEMORY_ERROR_TYPE_UNCORRECTED, nvml.AGGREGATE_ECC)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_ecc_uncorrected_error", device.tags, device.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_ecc_uncorrected_error", device.tags, device.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -593,7 +593,7 @@ func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
if !device.excludeMetrics["nv_ecc_corrected_error"] {
|
||||
ecc_sb, ret := nvml.DeviceGetTotalEccErrors(device.device, nvml.MEMORY_ERROR_TYPE_CORRECTED, nvml.AGGREGATE_ECC)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_ecc_corrected_error", device.tags, device.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_ecc_corrected_error", device.tags, device.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -602,7 +602,7 @@ func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_power_max_limit"] {
|
||||
// Retrieves the power management limit associated with this device.
|
||||
//
|
||||
@@ -612,7 +612,7 @@ func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
// If the card's total power draw reaches this limit the power management algorithm kicks in.
|
||||
pwr_limit, ret := nvml.DeviceGetPowerManagementLimit(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_power_max_limit", device.tags, device.meta, map[string]interface{}{"value": float64(pwr_limit) / 1000}, time.Now())
|
||||
y, err := lp.NewMessage("nv_power_max_limit", device.tags, device.meta, map[string]interface{}{"value": float64(pwr_limit) / 1000}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "watts")
|
||||
output <- y
|
||||
@@ -622,7 +622,7 @@ func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMetric) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
err := errors.New(nvml.ErrorString(ret))
|
||||
@@ -639,7 +639,7 @@ func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
// Note: On MIG-enabled GPUs, querying encoder utilization is not currently supported.
|
||||
enc_util, _, ret := nvml.DeviceGetEncoderUtilization(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_encoder_util", device.tags, device.meta, map[string]interface{}{"value": float64(enc_util)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_encoder_util", device.tags, device.meta, map[string]interface{}{"value": float64(enc_util)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "%")
|
||||
output <- y
|
||||
@@ -649,7 +649,7 @@ func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
return nil
|
||||
}
|
||||
|
||||
func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
err := errors.New(nvml.ErrorString(ret))
|
||||
@@ -666,7 +666,7 @@ func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
// Note: On MIG-enabled GPUs, querying encoder utilization is not currently supported.
|
||||
dec_util, _, ret := nvml.DeviceGetDecoderUtilization(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_decoder_util", device.tags, device.meta, map[string]interface{}{"value": float64(dec_util)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_decoder_util", device.tags, device.meta, map[string]interface{}{"value": float64(dec_util)}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "%")
|
||||
output <- y
|
||||
@@ -676,7 +676,7 @@ func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
return nil
|
||||
}
|
||||
|
||||
func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_remapped_rows_corrected"] ||
|
||||
!device.excludeMetrics["nv_remapped_rows_uncorrected"] ||
|
||||
!device.excludeMetrics["nv_remapped_rows_pending"] ||
|
||||
@@ -693,13 +693,13 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMetric) err
|
||||
corrected, uncorrected, pending, failure, ret := nvml.DeviceGetRemappedRows(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
if !device.excludeMetrics["nv_remapped_rows_corrected"] {
|
||||
y, err := lp.New("nv_remapped_rows_corrected", device.tags, device.meta, map[string]interface{}{"value": float64(corrected)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_remapped_rows_corrected", device.tags, device.meta, map[string]interface{}{"value": float64(corrected)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !device.excludeMetrics["nv_remapped_rows_uncorrected"] {
|
||||
y, err := lp.New("nv_remapped_rows_corrected", device.tags, device.meta, map[string]interface{}{"value": float64(uncorrected)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_remapped_rows_corrected", device.tags, device.meta, map[string]interface{}{"value": float64(uncorrected)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -709,7 +709,7 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMetric) err
|
||||
if pending {
|
||||
p = 1
|
||||
}
|
||||
y, err := lp.New("nv_remapped_rows_pending", device.tags, device.meta, map[string]interface{}{"value": p}, time.Now())
|
||||
y, err := lp.NewMessage("nv_remapped_rows_pending", device.tags, device.meta, map[string]interface{}{"value": p}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -719,7 +719,7 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMetric) err
|
||||
if failure {
|
||||
f = 1
|
||||
}
|
||||
y, err := lp.New("nv_remapped_rows_failure", device.tags, device.meta, map[string]interface{}{"value": f}, time.Now())
|
||||
y, err := lp.NewMessage("nv_remapped_rows_failure", device.tags, device.meta, map[string]interface{}{"value": f}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -729,7 +729,7 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMetric) err
|
||||
return nil
|
||||
}
|
||||
|
||||
func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
if !device.excludeMetrics["nv_compute_processes"] {
|
||||
// Get information about processes with a compute context on a device
|
||||
//
|
||||
@@ -753,7 +753,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMetric) er
|
||||
// Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
|
||||
procList, ret := nvml.DeviceGetComputeRunningProcesses(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_compute_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_compute_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -782,7 +782,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMetric) er
|
||||
// Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
|
||||
procList, ret := nvml.DeviceGetGraphicsRunningProcesses(device.device)
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_graphics_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
|
||||
y, err := lp.NewMessage("nv_graphics_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -812,7 +812,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMetric) er
|
||||
// // Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
|
||||
// procList, ret := nvml.DeviceGetMPSComputeRunningProcesses(device.device)
|
||||
// if ret == nvml.SUCCESS {
|
||||
// y, err := lp.New("nv_mps_compute_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
|
||||
// y, err := lp.NewMessage("nv_mps_compute_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
|
||||
// if err == nil {
|
||||
// output <- y
|
||||
// }
|
||||
@@ -821,7 +821,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMetric) er
|
||||
return nil
|
||||
}
|
||||
|
||||
func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
var violTime nvml.ViolationTime
|
||||
var ret nvml.Return
|
||||
|
||||
@@ -840,7 +840,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_POWER)
|
||||
if ret == nvml.SUCCESS {
|
||||
t := float64(violTime.ViolationTime) * 1e-9
|
||||
y, err := lp.New("nv_violation_power", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_violation_power", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "sec")
|
||||
output <- y
|
||||
@@ -852,7 +852,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_THERMAL)
|
||||
if ret == nvml.SUCCESS {
|
||||
t := float64(violTime.ViolationTime) * 1e-9
|
||||
y, err := lp.New("nv_violation_thermal", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_violation_thermal", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "sec")
|
||||
output <- y
|
||||
@@ -864,7 +864,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_SYNC_BOOST)
|
||||
if ret == nvml.SUCCESS {
|
||||
t := float64(violTime.ViolationTime) * 1e-9
|
||||
y, err := lp.New("nv_violation_sync_boost", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_violation_sync_boost", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "sec")
|
||||
output <- y
|
||||
@@ -876,7 +876,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_BOARD_LIMIT)
|
||||
if ret == nvml.SUCCESS {
|
||||
t := float64(violTime.ViolationTime) * 1e-9
|
||||
y, err := lp.New("nv_violation_board_limit", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_violation_board_limit", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "sec")
|
||||
output <- y
|
||||
@@ -888,7 +888,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_LOW_UTILIZATION)
|
||||
if ret == nvml.SUCCESS {
|
||||
t := float64(violTime.ViolationTime) * 1e-9
|
||||
y, err := lp.New("nv_violation_low_util", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_violation_low_util", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "sec")
|
||||
output <- y
|
||||
@@ -900,7 +900,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_RELIABILITY)
|
||||
if ret == nvml.SUCCESS {
|
||||
t := float64(violTime.ViolationTime) * 1e-9
|
||||
y, err := lp.New("nv_violation_reliability", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_violation_reliability", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "sec")
|
||||
output <- y
|
||||
@@ -912,7 +912,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_TOTAL_APP_CLOCKS)
|
||||
if ret == nvml.SUCCESS {
|
||||
t := float64(violTime.ViolationTime) * 1e-9
|
||||
y, err := lp.New("nv_violation_below_app_clock", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_violation_below_app_clock", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "sec")
|
||||
output <- y
|
||||
@@ -924,7 +924,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_TOTAL_BASE_CLOCKS)
|
||||
if ret == nvml.SUCCESS {
|
||||
t := float64(violTime.ViolationTime) * 1e-9
|
||||
y, err := lp.New("nv_violation_below_base_clock", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
y, err := lp.NewMessage("nv_violation_below_base_clock", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "sec")
|
||||
output <- y
|
||||
@@ -935,7 +935,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
|
||||
return nil
|
||||
}
|
||||
|
||||
func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
|
||||
func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||
// Retrieves the specified error counter value
|
||||
// Please refer to \a nvmlNvLinkErrorCounter_t for error counters that are available
|
||||
//
|
||||
@@ -956,7 +956,7 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
|
||||
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_CRC_DATA)
|
||||
aggregate_crc_errors = aggregate_crc_errors + count
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_nvlink_crc_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
|
||||
y, err := lp.NewMessage("nv_nvlink_crc_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
|
||||
if err == nil {
|
||||
y.AddTag("stype", "nvlink")
|
||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
||||
@@ -969,7 +969,7 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
|
||||
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_ECC_DATA)
|
||||
aggregate_ecc_errors = aggregate_ecc_errors + count
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_nvlink_ecc_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
|
||||
y, err := lp.NewMessage("nv_nvlink_ecc_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
|
||||
if err == nil {
|
||||
y.AddTag("stype", "nvlink")
|
||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
||||
@@ -982,7 +982,7 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
|
||||
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_REPLAY)
|
||||
aggregate_replay_errors = aggregate_replay_errors + count
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_nvlink_replay_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
|
||||
y, err := lp.NewMessage("nv_nvlink_replay_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
|
||||
if err == nil {
|
||||
y.AddTag("stype", "nvlink")
|
||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
||||
@@ -995,7 +995,7 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
|
||||
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_RECOVERY)
|
||||
aggregate_recovery_errors = aggregate_recovery_errors + count
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_nvlink_recovery_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
|
||||
y, err := lp.NewMessage("nv_nvlink_recovery_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
|
||||
if err == nil {
|
||||
y.AddTag("stype", "nvlink")
|
||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
||||
@@ -1008,7 +1008,7 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
|
||||
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_CRC_FLIT)
|
||||
aggregate_crc_flit_errors = aggregate_crc_flit_errors + count
|
||||
if ret == nvml.SUCCESS {
|
||||
y, err := lp.New("nv_nvlink_crc_flit_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
|
||||
y, err := lp.NewMessage("nv_nvlink_crc_flit_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
|
||||
if err == nil {
|
||||
y.AddTag("stype", "nvlink")
|
||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
||||
@@ -1064,13 +1064,13 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
var err error
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
|
||||
readAll := func(device NvidiaCollectorDevice, output chan lp.CCMetric) {
|
||||
readAll := func(device NvidiaCollectorDevice, output chan lp.CCMessage) {
|
||||
name, ret := nvml.DeviceGetName(device.device)
|
||||
if ret != nvml.SUCCESS {
|
||||
name = "NoName"
|
||||
|
@@ -10,7 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// running average power limit (RAPL) monitoring attributes for a zone
|
||||
@@ -214,7 +214,7 @@ func (m *RAPLCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// Read reads running average power limit (RAPL) monitoring attributes for all initialized zones
|
||||
// See: https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes
|
||||
func (m *RAPLCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *RAPLCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
|
||||
for i := range m.RAPLZoneInfo {
|
||||
p := &m.RAPLZoneInfo[i]
|
||||
@@ -237,7 +237,7 @@ func (m *RAPLCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
timeDiff := energyTimestamp.Sub(p.energyTimestamp)
|
||||
averagePower := float64(energyDiff) / float64(timeDiff.Microseconds())
|
||||
|
||||
y, err := lp.New(
|
||||
y, err := lp.NewMessage(
|
||||
"rapl_average_power",
|
||||
p.tags,
|
||||
m.meta,
|
||||
|
@@ -7,7 +7,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
"github.com/ClusterCockpit/go-rocm-smi/pkg/rocm_smi"
|
||||
)
|
||||
|
||||
@@ -162,7 +162,7 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// Read collects all metrics belonging to the sample collector
|
||||
// and sends them through the output channel to the collector manager
|
||||
func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
// Create a sample metric
|
||||
timestamp := time.Now()
|
||||
|
||||
@@ -175,119 +175,119 @@ func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
|
||||
if !dev.excludeMetrics["rocm_gfx_util"] {
|
||||
value := metrics.Average_gfx_activity
|
||||
y, err := lp.New("rocm_gfx_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_gfx_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_umc_util"] {
|
||||
value := metrics.Average_umc_activity
|
||||
y, err := lp.New("rocm_umc_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_umc_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_mm_util"] {
|
||||
value := metrics.Average_mm_activity
|
||||
y, err := lp.New("rocm_mm_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_mm_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_avg_power"] {
|
||||
value := metrics.Average_socket_power
|
||||
y, err := lp.New("rocm_avg_power", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_avg_power", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_temp_mem"] {
|
||||
value := metrics.Temperature_mem
|
||||
y, err := lp.New("rocm_temp_mem", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_temp_mem", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_temp_hotspot"] {
|
||||
value := metrics.Temperature_hotspot
|
||||
y, err := lp.New("rocm_temp_hotspot", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_temp_hotspot", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_temp_edge"] {
|
||||
value := metrics.Temperature_edge
|
||||
y, err := lp.New("rocm_temp_edge", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_temp_edge", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_temp_vrgfx"] {
|
||||
value := metrics.Temperature_vrgfx
|
||||
y, err := lp.New("rocm_temp_vrgfx", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_temp_vrgfx", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_temp_vrsoc"] {
|
||||
value := metrics.Temperature_vrsoc
|
||||
y, err := lp.New("rocm_temp_vrsoc", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_temp_vrsoc", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_temp_vrmem"] {
|
||||
value := metrics.Temperature_vrmem
|
||||
y, err := lp.New("rocm_temp_vrmem", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_temp_vrmem", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_gfx_clock"] {
|
||||
value := metrics.Average_gfxclk_frequency
|
||||
y, err := lp.New("rocm_gfx_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_gfx_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_soc_clock"] {
|
||||
value := metrics.Average_socclk_frequency
|
||||
y, err := lp.New("rocm_soc_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_soc_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_u_clock"] {
|
||||
value := metrics.Average_uclk_frequency
|
||||
y, err := lp.New("rocm_u_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_u_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_v0_clock"] {
|
||||
value := metrics.Average_vclk0_frequency
|
||||
y, err := lp.New("rocm_v0_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_v0_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_v1_clock"] {
|
||||
value := metrics.Average_vclk1_frequency
|
||||
y, err := lp.New("rocm_v1_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_v1_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_d0_clock"] {
|
||||
value := metrics.Average_dclk0_frequency
|
||||
y, err := lp.New("rocm_d0_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_d0_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if !dev.excludeMetrics["rocm_d1_clock"] {
|
||||
value := metrics.Average_dclk1_frequency
|
||||
y, err := lp.New("rocm_d1_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_d1_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -295,7 +295,7 @@ func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
if !dev.excludeMetrics["rocm_temp_hbm"] {
|
||||
for i := 0; i < rocm_smi.NUM_HBM_INSTANCES; i++ {
|
||||
value := metrics.Temperature_hbm[i]
|
||||
y, err := lp.New("rocm_temp_hbm", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("rocm_temp_hbm", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
y.AddTag("stype", "device")
|
||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
||||
|
@@ -5,7 +5,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// These are the fields we read from the JSON configuration
|
||||
@@ -74,7 +74,7 @@ func (m *SampleCollector) Init(config json.RawMessage) error {
|
||||
|
||||
// Read collects all metrics belonging to the sample collector
|
||||
// and sends them through the output channel to the collector manager
|
||||
func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
// Create a sample metric
|
||||
timestamp := time.Now()
|
||||
|
||||
@@ -85,7 +85,7 @@ func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
||||
// stop := readState()
|
||||
// value = (stop - start) / interval.Seconds()
|
||||
|
||||
y, err := lp.New("sample_metric", m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("sample_metric", m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil {
|
||||
// Send it to output channel
|
||||
output <- y
|
||||
|
@@ -6,7 +6,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// These are the fields we read from the JSON configuration
|
||||
@@ -25,7 +25,7 @@ type SampleTimerCollector struct {
|
||||
config SampleTimerCollectorConfig // the configuration structure
|
||||
interval time.Duration // the interval parsed from configuration
|
||||
ticker *time.Ticker // own timer
|
||||
output chan lp.CCMetric // own internal output channel
|
||||
output chan lp.CCMessage // own internal output channel
|
||||
}
|
||||
|
||||
func (m *SampleTimerCollector) Init(name string, config json.RawMessage) error {
|
||||
@@ -100,14 +100,14 @@ func (m *SampleTimerCollector) ReadMetrics(timestamp time.Time) {
|
||||
// stop := readState()
|
||||
// value = (stop - start) / interval.Seconds()
|
||||
|
||||
y, err := lp.New("sample_metric", m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
y, err := lp.NewMessage("sample_metric", m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
|
||||
if err == nil && m.output != nil {
|
||||
// Send it to output channel if we have a valid channel
|
||||
m.output <- y
|
||||
}
|
||||
}
|
||||
|
||||
func (m *SampleTimerCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *SampleTimerCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
// Capture output channel
|
||||
m.output = output
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
const SCHEDSTATFILE = `/proc/schedstat`
|
||||
@@ -96,7 +96,7 @@ func (m *SchedstatCollector) Init(config json.RawMessage) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]string, output chan lp.CCMetric, now time.Time, tsdelta time.Duration) {
|
||||
func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]string, output chan lp.CCMessage, now time.Time, tsdelta time.Duration) {
|
||||
running, _ := strconv.ParseInt(linefields[7], 10, 64)
|
||||
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
|
||||
diff_running := running - m.olddata[linefields[0]]["running"]
|
||||
@@ -109,7 +109,7 @@ func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]
|
||||
m.olddata[linefields[0]]["waiting"] = waiting
|
||||
value := l_running + l_waiting
|
||||
|
||||
y, err := lp.New("cpu_load_core", tags, m.meta, map[string]interface{}{"value": value}, now)
|
||||
y, err := lp.NewMessage("cpu_load_core", tags, m.meta, map[string]interface{}{"value": value}, now)
|
||||
if err == nil {
|
||||
// Send it to output channel
|
||||
output <- y
|
||||
@@ -118,7 +118,7 @@ func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]
|
||||
|
||||
// Read collects all metrics belonging to the sample collector
|
||||
// and sends them through the output channel to the collector manager
|
||||
func (m *SchedstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *SchedstatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
|
@@ -7,7 +7,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
type SelfCollectorConfig struct {
|
||||
@@ -42,56 +42,56 @@ func (m *SelfCollector) Init(config json.RawMessage) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *SelfCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *SelfCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
timestamp := time.Now()
|
||||
|
||||
if m.config.MemStats {
|
||||
var memstats runtime.MemStats
|
||||
runtime.ReadMemStats(&memstats)
|
||||
|
||||
y, err := lp.New("total_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.TotalAlloc}, timestamp)
|
||||
y, err := lp.NewMessage("total_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.TotalAlloc}, timestamp)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("heap_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapAlloc}, timestamp)
|
||||
y, err = lp.NewMessage("heap_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapAlloc}, timestamp)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("heap_sys", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapSys}, timestamp)
|
||||
y, err = lp.NewMessage("heap_sys", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapSys}, timestamp)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("heap_idle", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapIdle}, timestamp)
|
||||
y, err = lp.NewMessage("heap_idle", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapIdle}, timestamp)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("heap_inuse", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapInuse}, timestamp)
|
||||
y, err = lp.NewMessage("heap_inuse", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapInuse}, timestamp)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("heap_released", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapReleased}, timestamp)
|
||||
y, err = lp.NewMessage("heap_released", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapReleased}, timestamp)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "Bytes")
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("heap_objects", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapObjects}, timestamp)
|
||||
y, err = lp.NewMessage("heap_objects", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapObjects}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if m.config.GoRoutines {
|
||||
y, err := lp.New("num_goroutines", m.tags, m.meta, map[string]interface{}{"value": runtime.NumGoroutine()}, timestamp)
|
||||
y, err := lp.NewMessage("num_goroutines", m.tags, m.meta, map[string]interface{}{"value": runtime.NumGoroutine()}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
}
|
||||
if m.config.CgoCalls {
|
||||
y, err := lp.New("num_cgo_calls", m.tags, m.meta, map[string]interface{}{"value": runtime.NumCgoCall()}, timestamp)
|
||||
y, err := lp.NewMessage("num_cgo_calls", m.tags, m.meta, map[string]interface{}{"value": runtime.NumCgoCall()}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
@@ -102,35 +102,35 @@ func (m *SelfCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
if err == nil {
|
||||
sec, nsec := rusage.Utime.Unix()
|
||||
t := float64(sec) + (float64(nsec) * 1e-9)
|
||||
y, err := lp.New("rusage_user_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
|
||||
y, err := lp.NewMessage("rusage_user_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "seconds")
|
||||
output <- y
|
||||
}
|
||||
sec, nsec = rusage.Stime.Unix()
|
||||
t = float64(sec) + (float64(nsec) * 1e-9)
|
||||
y, err = lp.New("rusage_system_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
|
||||
y, err = lp.NewMessage("rusage_system_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
|
||||
if err == nil {
|
||||
y.AddMeta("unit", "seconds")
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("rusage_vol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nvcsw}, timestamp)
|
||||
y, err = lp.NewMessage("rusage_vol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nvcsw}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("rusage_invol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nivcsw}, timestamp)
|
||||
y, err = lp.NewMessage("rusage_invol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nivcsw}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("rusage_signals", m.tags, m.meta, map[string]interface{}{"value": rusage.Nsignals}, timestamp)
|
||||
y, err = lp.NewMessage("rusage_signals", m.tags, m.meta, map[string]interface{}{"value": rusage.Nsignals}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("rusage_major_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Majflt}, timestamp)
|
||||
y, err = lp.NewMessage("rusage_major_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Majflt}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
y, err = lp.New("rusage_minor_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Minflt}, timestamp)
|
||||
y, err = lp.NewMessage("rusage_minor_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Minflt}, timestamp)
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
// See: https://www.kernel.org/doc/html/latest/hwmon/sysfs-interface.html
|
||||
@@ -171,7 +171,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
|
||||
for _, sensor := range m.sensors {
|
||||
// Read sensor file
|
||||
@@ -190,7 +190,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
continue
|
||||
}
|
||||
x /= 1000
|
||||
y, err := lp.New(
|
||||
y, err := lp.NewMessage(
|
||||
sensor.metricName,
|
||||
sensor.tags,
|
||||
m.meta,
|
||||
@@ -203,7 +203,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
|
||||
// max temperature
|
||||
if m.config.ReportMaxTemp && sensor.maxTemp != 0 {
|
||||
y, err := lp.New(
|
||||
y, err := lp.NewMessage(
|
||||
sensor.maxTempName,
|
||||
sensor.tags,
|
||||
m.meta,
|
||||
@@ -217,7 +217,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
|
||||
// critical temperature
|
||||
if m.config.ReportCriticalTemp && sensor.critTemp != 0 {
|
||||
y, err := lp.New(
|
||||
y, err := lp.NewMessage(
|
||||
sensor.critTempName,
|
||||
sensor.tags,
|
||||
m.meta,
|
||||
|
@@ -9,7 +9,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
|
||||
lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
|
||||
)
|
||||
|
||||
const MAX_NUM_PROCS = 10
|
||||
@@ -53,7 +53,7 @@ func (m *TopProcsCollector) Init(config json.RawMessage) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||
func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
||||
if !m.init {
|
||||
return
|
||||
}
|
||||
@@ -68,7 +68,7 @@ func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMetric
|
||||
lines := strings.Split(string(stdout), "\n")
|
||||
for i := 1; i < m.config.Num_procs+1; i++ {
|
||||
name := fmt.Sprintf("topproc%d", i)
|
||||
y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": string(lines[i])}, time.Now())
|
||||
y, err := lp.NewMessage(name, m.tags, m.meta, map[string]interface{}{"value": string(lines[i])}, time.Now())
|
||||
if err == nil {
|
||||
output <- y
|
||||
}
|
||||
|
Reference in New Issue
Block a user