mirror of
				https://github.com/ClusterCockpit/cc-metric-collector.git
				synced 2025-11-04 02:35:07 +01:00 
			
		
		
		
	Ccmessage migration (#119)
* Add cpu_used (all-cpu_idle) to CpustatCollector * Update cc-metric-collector.init * Allow selection of timestamp precision in HttpSink * Add comment about precision requirement for cc-metric-store * Fix for API changes in gofish@v0.15.0 * Update requirements to latest version * Read sensors through redfish * Update golang toolchain to 1.21 * Remove stray error check * Update main config in configuration.md * Update Release action to use golang 1.22 stable release, no golang RPMs anymore * Update runonce action to use golang 1.22 stable release, no golang RPMs anymore * Switch to CCMessage for all files. --------- Co-authored-by: Holger Obermaier <Holger.Obermaier@kit.edu> Co-authored-by: Holger Obermaier <40787752+ho-ob@users.noreply.github.com>
This commit is contained in:
		@@ -15,7 +15,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const DEFAULT_BEEGFS_CMD = "beegfs-ctl"
 | 
			
		||||
@@ -110,7 +110,7 @@ func (m *BeegfsMetaCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -216,7 +216,7 @@ func (m *BeegfsMetaCollector) Read(interval time.Duration, output chan lp.CCMetr
 | 
			
		||||
 | 
			
		||||
			for key, data := range m.matches {
 | 
			
		||||
				value, _ := strconv.ParseFloat(data, 32)
 | 
			
		||||
				y, err := lp.New(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
				y, err := lp.NewMessage(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					output <- y
 | 
			
		||||
				}
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Struct for the collector-specific JSON config
 | 
			
		||||
@@ -103,7 +103,7 @@ func (m *BeegfsStorageCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -208,7 +208,7 @@ func (m *BeegfsStorageCollector) Read(interval time.Duration, output chan lp.CCM
 | 
			
		||||
 | 
			
		||||
			for key, data := range m.matches {
 | 
			
		||||
				value, _ := strconv.ParseFloat(data, 32)
 | 
			
		||||
				y, err := lp.New(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
				y, err := lp.NewMessage(key, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					output <- y
 | 
			
		||||
				}
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
	mct "github.com/ClusterCockpit/cc-metric-collector/pkg/multiChanTicker"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -47,7 +47,7 @@ var AvailableCollectors = map[string]MetricCollector{
 | 
			
		||||
type collectorManager struct {
 | 
			
		||||
	collectors   []MetricCollector          // List of metric collectors to read in parallel
 | 
			
		||||
	serial       []MetricCollector          // List of metric collectors to read serially
 | 
			
		||||
	output       chan lp.CCMetric           // Output channels
 | 
			
		||||
	output       chan lp.CCMessage           // Output channels
 | 
			
		||||
	done         chan bool                  // channel to finish / stop metric collector manager
 | 
			
		||||
	ticker       mct.MultiChanTicker        // periodically ticking once each interval
 | 
			
		||||
	duration     time.Duration              // duration (for metrics that measure over a given duration)
 | 
			
		||||
@@ -60,7 +60,7 @@ type collectorManager struct {
 | 
			
		||||
// Metric collector manager access functions
 | 
			
		||||
type CollectorManager interface {
 | 
			
		||||
	Init(ticker mct.MultiChanTicker, duration time.Duration, wg *sync.WaitGroup, collectConfigFile string) error
 | 
			
		||||
	AddOutput(output chan lp.CCMetric)
 | 
			
		||||
	AddOutput(output chan lp.CCMessage)
 | 
			
		||||
	Start()
 | 
			
		||||
	Close()
 | 
			
		||||
}
 | 
			
		||||
@@ -187,7 +187,7 @@ func (cm *collectorManager) Start() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// AddOutput adds the output channel to the metric collector manager
 | 
			
		||||
func (cm *collectorManager) AddOutput(output chan lp.CCMetric) {
 | 
			
		||||
func (cm *collectorManager) AddOutput(output chan lp.CCMessage) {
 | 
			
		||||
	cm.output = output
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -10,8 +10,8 @@ import (
 | 
			
		||||
	"strings"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// CPUFreqCollector
 | 
			
		||||
@@ -112,14 +112,14 @@ func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
 | 
			
		||||
 | 
			
		||||
	// Check if at least one CPU with frequency information was detected
 | 
			
		||||
	if len(m.topology) == 0 {
 | 
			
		||||
		return fmt.Errorf("No CPU frequency info found in %s", cpuInfoFile)
 | 
			
		||||
		return fmt.Errorf("no CPU frequency info found in %s", cpuInfoFile)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	m.init = true
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	// Check if already initialized
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
@@ -154,7 +154,7 @@ func (m *CPUFreqCpuInfoCollector) Read(interval time.Duration, output chan lp.CC
 | 
			
		||||
							fmt.Sprintf("Read(): Failed to convert cpu MHz '%s' to float64: %v", lineSplit[1], err))
 | 
			
		||||
						return
 | 
			
		||||
					}
 | 
			
		||||
					if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now); err == nil {
 | 
			
		||||
					if y, err := lp.NewMessage("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": value}, now); err == nil {
 | 
			
		||||
						output <- y
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
	"github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
 | 
			
		||||
	"golang.org/x/sys/unix"
 | 
			
		||||
)
 | 
			
		||||
@@ -91,7 +91,7 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	// Check if already initialized
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
@@ -117,7 +117,7 @@ func (m *CPUFreqCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if y, err := lp.New("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now); err == nil {
 | 
			
		||||
		if y, err := lp.NewMessage("cpufreq", t.tagSet, m.meta, map[string]interface{}{"value": cpuFreq}, now); err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
	sysconf "github.com/tklauser/go-sysconf"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -105,7 +105,7 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]string, output chan lp.CCMetric, now time.Time, tsdelta time.Duration) {
 | 
			
		||||
func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]string, output chan lp.CCMessage, now time.Time, tsdelta time.Duration) {
 | 
			
		||||
	values := make(map[string]float64)
 | 
			
		||||
	clktck, _ := sysconf.Sysconf(sysconf.SC_CLK_TCK)
 | 
			
		||||
	for match, index := range m.matches {
 | 
			
		||||
@@ -122,21 +122,21 @@ func (m *CpustatCollector) parseStatLine(linefields []string, tags map[string]st
 | 
			
		||||
	sum := float64(0)
 | 
			
		||||
	for name, value := range values {
 | 
			
		||||
		sum += value
 | 
			
		||||
		y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value * 100}, now)
 | 
			
		||||
		y, err := lp.NewMessage(name, tags, m.meta, map[string]interface{}{"value": value * 100}, now)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if v, ok := values["cpu_idle"]; ok {
 | 
			
		||||
		sum -= v
 | 
			
		||||
		y, err := lp.New("cpu_used", tags, m.meta, map[string]interface{}{"value": sum * 100}, now)
 | 
			
		||||
		y, err := lp.NewMessage("cpu_used", tags, m.meta, map[string]interface{}{"value": sum * 100}, now)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -162,7 +162,7 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	num_cpus_metric, err := lp.New("num_cpus",
 | 
			
		||||
	num_cpus_metric, err := lp.NewMessage("num_cpus",
 | 
			
		||||
		m.nodetags,
 | 
			
		||||
		m.meta,
 | 
			
		||||
		map[string]interface{}{"value": int(num_cpus)},
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@ import (
 | 
			
		||||
	"strings"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
	influx "github.com/influxdata/line-protocol"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -75,7 +75,7 @@ var DefaultTime = func() time.Time {
 | 
			
		||||
	return time.Unix(42, 0)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *CustomCmdCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
//	"log"
 | 
			
		||||
@@ -48,7 +48,7 @@ func (m *DiskstatCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -92,13 +92,13 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric
 | 
			
		||||
		}
 | 
			
		||||
		tags := map[string]string{"type": "node", "device": linefields[0]}
 | 
			
		||||
		total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000000000)
 | 
			
		||||
		y, err := lp.New("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now())
 | 
			
		||||
		y, err := lp.NewMessage("disk_total", tags, m.meta, map[string]interface{}{"value": total}, time.Now())
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			y.AddMeta("unit", "GBytes")
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
		free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000000000)
 | 
			
		||||
		y, err = lp.New("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now())
 | 
			
		||||
		y, err = lp.NewMessage("disk_free", tags, m.meta, map[string]interface{}{"value": free}, time.Now())
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			y.AddMeta("unit", "GBytes")
 | 
			
		||||
			output <- y
 | 
			
		||||
@@ -110,7 +110,7 @@ func (m *DiskstatCollector) Read(interval time.Duration, output chan lp.CCMetric
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	y, err := lp.New("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": int(part_max_used)}, time.Now())
 | 
			
		||||
	y, err := lp.NewMessage("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": int(part_max_used)}, time.Now())
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		y.AddMeta("unit", "percent")
 | 
			
		||||
		output <- y
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const DEFAULT_GPFS_CMD = "mmpmon"
 | 
			
		||||
@@ -94,7 +94,7 @@ func (m *GpfsCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	// Check if already initialized
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
@@ -218,7 +218,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if y, err :=
 | 
			
		||||
			lp.New(
 | 
			
		||||
			lp.NewMessage(
 | 
			
		||||
				"gpfs_bytes_read",
 | 
			
		||||
				m.tags,
 | 
			
		||||
				m.meta,
 | 
			
		||||
@@ -234,7 +234,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
			if lastBytesRead := m.lastState[filesystem].bytesRead; lastBytesRead >= 0 {
 | 
			
		||||
				bwRead := float64(bytesRead-lastBytesRead) / timeDiff
 | 
			
		||||
				if y, err :=
 | 
			
		||||
					lp.New(
 | 
			
		||||
					lp.NewMessage(
 | 
			
		||||
						"gpfs_bw_read",
 | 
			
		||||
						m.tags,
 | 
			
		||||
						m.meta,
 | 
			
		||||
@@ -258,7 +258,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if y, err :=
 | 
			
		||||
			lp.New(
 | 
			
		||||
			lp.NewMessage(
 | 
			
		||||
				"gpfs_bytes_written",
 | 
			
		||||
				m.tags,
 | 
			
		||||
				m.meta,
 | 
			
		||||
@@ -274,7 +274,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
			if lastBytesWritten := m.lastState[filesystem].bytesRead; lastBytesWritten >= 0 {
 | 
			
		||||
				bwWrite := float64(bytesWritten-lastBytesWritten) / timeDiff
 | 
			
		||||
				if y, err :=
 | 
			
		||||
					lp.New(
 | 
			
		||||
					lp.NewMessage(
 | 
			
		||||
						"gpfs_bw_write",
 | 
			
		||||
						m.tags,
 | 
			
		||||
						m.meta,
 | 
			
		||||
@@ -304,7 +304,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
				fmt.Sprintf("Read(): Failed to convert number of opens '%s' to int64: %v", key_value["_oc_"], err))
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if y, err := lp.New("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
 | 
			
		||||
		if y, err := lp.NewMessage("gpfs_num_opens", m.tags, m.meta, map[string]interface{}{"value": numOpens}, timestamp); err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
@@ -316,7 +316,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
				fmt.Sprintf("Read(): Failed to convert number of closes: '%s' to int64: %v", key_value["_cc_"], err))
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if y, err := lp.New("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
 | 
			
		||||
		if y, err := lp.NewMessage("gpfs_num_closes", m.tags, m.meta, map[string]interface{}{"value": numCloses}, timestamp); err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
@@ -328,7 +328,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
				fmt.Sprintf("Read(): Failed to convert number of reads: '%s' to int64: %v", key_value["_rdc_"], err))
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if y, err := lp.New("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
 | 
			
		||||
		if y, err := lp.NewMessage("gpfs_num_reads", m.tags, m.meta, map[string]interface{}{"value": numReads}, timestamp); err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
@@ -340,7 +340,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
				fmt.Sprintf("Read(): Failed to convert number of writes: '%s' to int64: %v", key_value["_wc_"], err))
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if y, err := lp.New("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
 | 
			
		||||
		if y, err := lp.NewMessage("gpfs_num_writes", m.tags, m.meta, map[string]interface{}{"value": numWrites}, timestamp); err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
@@ -352,7 +352,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
				fmt.Sprintf("Read(): Failed to convert number of read directories: '%s' to int64: %v", key_value["_dir_"], err))
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if y, err := lp.New("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
 | 
			
		||||
		if y, err := lp.NewMessage("gpfs_num_readdirs", m.tags, m.meta, map[string]interface{}{"value": numReaddirs}, timestamp); err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
@@ -364,7 +364,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
				fmt.Sprintf("Read(): Failed to convert number of inode updates: '%s' to int: %v", key_value["_iu_"], err))
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if y, err := lp.New("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
 | 
			
		||||
		if y, err := lp.NewMessage("gpfs_num_inode_updates", m.tags, m.meta, map[string]interface{}{"value": numInodeUpdates}, timestamp); err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
@@ -372,7 +372,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
		if m.config.SendTotalValues {
 | 
			
		||||
			bytesTotal := bytesRead + bytesWritten
 | 
			
		||||
			if y, err :=
 | 
			
		||||
				lp.New("gpfs_bytes_total",
 | 
			
		||||
				lp.NewMessage("gpfs_bytes_total",
 | 
			
		||||
					m.tags,
 | 
			
		||||
					m.meta,
 | 
			
		||||
					map[string]interface{}{
 | 
			
		||||
@@ -385,7 +385,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
			}
 | 
			
		||||
			iops := numReads + numWrites
 | 
			
		||||
			if y, err :=
 | 
			
		||||
				lp.New("gpfs_iops",
 | 
			
		||||
				lp.NewMessage("gpfs_iops",
 | 
			
		||||
					m.tags,
 | 
			
		||||
					m.meta,
 | 
			
		||||
					map[string]interface{}{
 | 
			
		||||
@@ -397,7 +397,7 @@ func (m *GpfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
			}
 | 
			
		||||
			metaops := numInodeUpdates + numCloses + numOpens + numReaddirs
 | 
			
		||||
			if y, err :=
 | 
			
		||||
				lp.New("gpfs_metaops",
 | 
			
		||||
				lp.NewMessage("gpfs_metaops",
 | 
			
		||||
					m.tags,
 | 
			
		||||
					m.meta,
 | 
			
		||||
					map[string]interface{}{
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ import (
 | 
			
		||||
	"os"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
	"golang.org/x/sys/unix"
 | 
			
		||||
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
@@ -182,7 +182,7 @@ func (m *InfinibandCollector) Init(config json.RawMessage) error {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Read reads Infiniband counter files below IB_BASEPATH
 | 
			
		||||
func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
 | 
			
		||||
	// Check if already initialized
 | 
			
		||||
	if !m.init {
 | 
			
		||||
@@ -230,7 +230,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
 | 
			
		||||
			// Send absolut values
 | 
			
		||||
			if m.config.SendAbsoluteValues {
 | 
			
		||||
				if y, err :=
 | 
			
		||||
					lp.New(
 | 
			
		||||
					lp.NewMessage(
 | 
			
		||||
						counterDef.name,
 | 
			
		||||
						info.tagSet,
 | 
			
		||||
						m.meta,
 | 
			
		||||
@@ -248,7 +248,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
 | 
			
		||||
				if counterDef.lastState >= 0 {
 | 
			
		||||
					rate := float64((counterDef.currentState - counterDef.lastState)) / timeDiff
 | 
			
		||||
					if y, err :=
 | 
			
		||||
						lp.New(
 | 
			
		||||
						lp.NewMessage(
 | 
			
		||||
							counterDef.name+"_bw",
 | 
			
		||||
							info.tagSet,
 | 
			
		||||
							m.meta,
 | 
			
		||||
@@ -278,7 +278,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
 | 
			
		||||
		// Send total values
 | 
			
		||||
		if m.config.SendTotalValues {
 | 
			
		||||
			if y, err :=
 | 
			
		||||
				lp.New(
 | 
			
		||||
				lp.NewMessage(
 | 
			
		||||
					"ib_total",
 | 
			
		||||
					info.tagSet,
 | 
			
		||||
					m.meta,
 | 
			
		||||
@@ -291,7 +291,7 @@ func (m *InfinibandCollector) Read(interval time.Duration, output chan lp.CCMetr
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if y, err :=
 | 
			
		||||
				lp.New(
 | 
			
		||||
				lp.NewMessage(
 | 
			
		||||
					"ib_total_pkts",
 | 
			
		||||
					info.tagSet,
 | 
			
		||||
					m.meta,
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ import (
 | 
			
		||||
	"os"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
 | 
			
		||||
	//	"log"
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
@@ -107,7 +107,7 @@ func (m *IOstatCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -139,7 +139,7 @@ func (m *IOstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
				x, err := strconv.ParseInt(linefields[idx], 0, 64)
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					diff := x - entry.lastValues[name]
 | 
			
		||||
					y, err := lp.New(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
 | 
			
		||||
					y, err := lp.NewMessage(name, entry.tags, m.meta, map[string]interface{}{"value": int(diff)}, time.Now())
 | 
			
		||||
					if err == nil {
 | 
			
		||||
						output <- y
 | 
			
		||||
					}
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const IPMISENSORS_PATH = `ipmi-sensors`
 | 
			
		||||
@@ -83,7 +83,7 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
 | 
			
		||||
func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMessage) {
 | 
			
		||||
 | 
			
		||||
	// Setup ipmitool command
 | 
			
		||||
	command := exec.Command(cmd, "sensor")
 | 
			
		||||
@@ -121,7 +121,7 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
 | 
			
		||||
				unit = "Watts"
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", unit)
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -141,7 +141,7 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
 | 
			
		||||
func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMessage) {
 | 
			
		||||
 | 
			
		||||
	command := exec.Command(cmd, "--comma-separated-output", "--sdr-cache-recreate")
 | 
			
		||||
	command.Wait()
 | 
			
		||||
@@ -159,7 +159,7 @@ func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
 | 
			
		||||
			v, err := strconv.ParseFloat(lv[3], 64)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				name := strings.ToLower(strings.Replace(lv[1], " ", "_", -1))
 | 
			
		||||
				y, err := lp.New(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
 | 
			
		||||
				y, err := lp.NewMessage(name, map[string]string{"type": "node"}, m.meta, map[string]interface{}{"value": v}, time.Now())
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					if len(lv) > 4 {
 | 
			
		||||
						y.AddMeta("unit", lv[4])
 | 
			
		||||
@@ -171,7 +171,7 @@ func (m *IpmiCollector) readIpmiSensors(cmd string, output chan lp.CCMetric) {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *IpmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
 | 
			
		||||
	// Check if already initialized
 | 
			
		||||
	if !m.init {
 | 
			
		||||
 
 | 
			
		||||
@@ -24,9 +24,9 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
	"unsafe"
 | 
			
		||||
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
	agg "github.com/ClusterCockpit/cc-metric-collector/internal/metricAggregator"
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
 | 
			
		||||
	"github.com/NVIDIA/go-nvml/pkg/dl"
 | 
			
		||||
	"github.com/fsnotify/fsnotify"
 | 
			
		||||
@@ -393,9 +393,9 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
 | 
			
		||||
		if uid != uint32(os.Getuid()) {
 | 
			
		||||
			usr, err := user.LookupId(fmt.Sprint(uid))
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				return true, fmt.Errorf("Access to performance counters locked by %s", usr.Username)
 | 
			
		||||
				return true, fmt.Errorf("access to performance counters locked by %s", usr.Username)
 | 
			
		||||
			} else {
 | 
			
		||||
				return true, fmt.Errorf("Access to performance counters locked by %d", uid)
 | 
			
		||||
				return true, fmt.Errorf("access to performance counters locked by %d", uid)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		// Add the lock file to the watcher
 | 
			
		||||
@@ -436,9 +436,7 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
 | 
			
		||||
		gid = C.perfmon_addEventSet(evset.estr)
 | 
			
		||||
	}
 | 
			
		||||
	if gid < 0 {
 | 
			
		||||
		return true, fmt.Errorf("failed to add events %s, error %d", evset.go_estr, gid)
 | 
			
		||||
	} else {
 | 
			
		||||
		evset.gid = gid
 | 
			
		||||
		return true, fmt.Errorf("failed to add events %s, id %d, error %d", evset.go_estr, evidx, gid)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Setup all performance monitoring counters of an eventSet
 | 
			
		||||
@@ -549,11 +547,12 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Get all measurement results for an event set, derive the metric values out of the measurement results and send it
 | 
			
		||||
func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interval time.Duration, output chan lp.CCMetric) error {
 | 
			
		||||
func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interval time.Duration, output chan lp.CCMessage) error {
 | 
			
		||||
	invClock := float64(1.0 / m.basefreq)
 | 
			
		||||
 | 
			
		||||
	for _, tid := range m.cpu2tid {
 | 
			
		||||
		evset.results[tid]["inverseClock"] = invClock
 | 
			
		||||
		evset.results[tid]["gotime"] = interval.Seconds()
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Go over the event set metrics, derive the value out of the event:counter values and send it
 | 
			
		||||
@@ -582,7 +581,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
 | 
			
		||||
				if !math.IsNaN(value) && metric.Publish {
 | 
			
		||||
					fields := map[string]interface{}{"value": value}
 | 
			
		||||
					y, err :=
 | 
			
		||||
						lp.New(
 | 
			
		||||
						lp.NewMessage(
 | 
			
		||||
							metric.Name,
 | 
			
		||||
							map[string]string{
 | 
			
		||||
								"type": metric.Type,
 | 
			
		||||
@@ -619,7 +618,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
 | 
			
		||||
 | 
			
		||||
			for coreID, value := range totalCoreValues {
 | 
			
		||||
				y, err :=
 | 
			
		||||
					lp.New(
 | 
			
		||||
					lp.NewMessage(
 | 
			
		||||
						metric.Name,
 | 
			
		||||
						map[string]string{
 | 
			
		||||
							"type":    "core",
 | 
			
		||||
@@ -656,7 +655,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
 | 
			
		||||
 | 
			
		||||
			for socketID, value := range totalSocketValues {
 | 
			
		||||
				y, err :=
 | 
			
		||||
					lp.New(
 | 
			
		||||
					lp.NewMessage(
 | 
			
		||||
						metric.Name,
 | 
			
		||||
						map[string]string{
 | 
			
		||||
							"type":    "socket",
 | 
			
		||||
@@ -691,7 +690,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			y, err :=
 | 
			
		||||
				lp.New(
 | 
			
		||||
				lp.NewMessage(
 | 
			
		||||
					metric.Name,
 | 
			
		||||
					map[string]string{
 | 
			
		||||
						"type": "node",
 | 
			
		||||
@@ -716,7 +715,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Go over the global metrics, derive the value out of the event sets' metric values and send it
 | 
			
		||||
func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, interval time.Duration, output chan lp.CCMetric) error {
 | 
			
		||||
func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, interval time.Duration, output chan lp.CCMessage) error {
 | 
			
		||||
	// Send all metrics with same time stamp
 | 
			
		||||
	// This function does only computiation, counter measurement is done before
 | 
			
		||||
	now := time.Now()
 | 
			
		||||
@@ -737,6 +736,7 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter
 | 
			
		||||
						params[mname] = mres
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
				params["gotime"] = interval.Seconds()
 | 
			
		||||
				// Evaluate the metric
 | 
			
		||||
				value, err := agg.EvalFloat64Condition(metric.Calc, params)
 | 
			
		||||
				if err != nil {
 | 
			
		||||
@@ -750,7 +750,7 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter
 | 
			
		||||
				if !math.IsNaN(value) {
 | 
			
		||||
					if metric.Publish {
 | 
			
		||||
						y, err :=
 | 
			
		||||
							lp.New(
 | 
			
		||||
							lp.NewMessage(
 | 
			
		||||
								metric.Name,
 | 
			
		||||
								map[string]string{
 | 
			
		||||
									"type": metric.Type,
 | 
			
		||||
@@ -778,7 +778,7 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *LikwidCollector) ReadThread(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *LikwidCollector) ReadThread(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	var err error = nil
 | 
			
		||||
	groups := make([]LikwidEventsetConfig, 0)
 | 
			
		||||
 | 
			
		||||
@@ -806,7 +806,7 @@ func (m *LikwidCollector) ReadThread(interval time.Duration, output chan lp.CCMe
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// main read function taking multiple measurement rounds, each 'interval' seconds long
 | 
			
		||||
func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
@@ -8,18 +8,16 @@ import (
 | 
			
		||||
	"strings"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// LoadavgCollector collects:
 | 
			
		||||
// * load average of last 1, 5 & 15 minutes
 | 
			
		||||
// * number of processes currently runnable
 | 
			
		||||
// * total number of processes in system
 | 
			
		||||
//
 | 
			
		||||
// See: https://www.kernel.org/doc/html/latest/filesystems/proc.html
 | 
			
		||||
//
 | 
			
		||||
const LOADAVGFILE = "/proc/loadavg"
 | 
			
		||||
 | 
			
		||||
type LoadavgCollector struct {
 | 
			
		||||
@@ -68,17 +66,15 @@ func (m *LoadavgCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	buffer, err := os.ReadFile(LOADAVGFILE)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			cclog.ComponentError(
 | 
			
		||||
				m.name,
 | 
			
		||||
				fmt.Sprintf("Read(): Failed to read file '%s': %v", LOADAVGFILE, err))
 | 
			
		||||
		}
 | 
			
		||||
		cclog.ComponentError(
 | 
			
		||||
			m.name,
 | 
			
		||||
			fmt.Sprintf("Read(): Failed to read file '%s': %v", LOADAVGFILE, err))
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	now := time.Now()
 | 
			
		||||
@@ -96,7 +92,7 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
		if m.load_skips[i] {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
 | 
			
		||||
		y, err := lp.NewMessage(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
@@ -115,7 +111,7 @@ func (m *LoadavgCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
		if m.proc_skips[i] {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
 | 
			
		||||
		y, err := lp.NewMessage(name, m.tags, m.meta, map[string]interface{}{"value": x}, now)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const LUSTRE_SYSFS = `/sys/fs/lustre`
 | 
			
		||||
@@ -377,7 +377,7 @@ func (m *LustreCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -388,7 +388,7 @@ func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
		for _, def := range m.definitions {
 | 
			
		||||
			var use_x int64
 | 
			
		||||
			var err error
 | 
			
		||||
			var y lp.CCMetric
 | 
			
		||||
			var y lp.CCMessage
 | 
			
		||||
			x, err := getMetricData(data, def.lineprefix, def.lineoffset)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				use_x = x
 | 
			
		||||
@@ -399,19 +399,19 @@ func (m *LustreCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
			switch def.calc {
 | 
			
		||||
			case "none":
 | 
			
		||||
				value = use_x
 | 
			
		||||
				y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
				y, err = lp.NewMessage(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
			case "difference":
 | 
			
		||||
				value = use_x - devData[def.name]
 | 
			
		||||
				if value.(int64) < 0 {
 | 
			
		||||
					value = 0
 | 
			
		||||
				}
 | 
			
		||||
				y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
				y, err = lp.NewMessage(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
			case "derivative":
 | 
			
		||||
				value = float64(use_x-devData[def.name]) / tdiff.Seconds()
 | 
			
		||||
				if value.(float64) < 0 {
 | 
			
		||||
					value = 0
 | 
			
		||||
				}
 | 
			
		||||
				y, err = lp.New(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
				y, err = lp.NewMessage(def.name, m.tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
			}
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddTag("device", device)
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const MEMSTATFILE = "/proc/meminfo"
 | 
			
		||||
@@ -159,7 +159,7 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -175,7 +175,7 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			y, err := lp.New(name, tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage(name, tags, m.meta, map[string]interface{}{"value": value}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				if len(unit) > 0 {
 | 
			
		||||
					y.AddMeta("unit", unit)
 | 
			
		||||
@@ -208,7 +208,7 @@ func (m *MemstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			y, err := lp.New("mem_used", tags, m.meta, map[string]interface{}{"value": memUsed}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("mem_used", tags, m.meta, map[string]interface{}{"value": memUsed}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				if len(unit) > 0 {
 | 
			
		||||
					y.AddMeta("unit", unit)
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type MetricCollector interface {
 | 
			
		||||
@@ -13,7 +13,7 @@ type MetricCollector interface {
 | 
			
		||||
	Init(config json.RawMessage) error // Initialize metric collector
 | 
			
		||||
	Initialized() bool                 // Is metric collector initialized?
 | 
			
		||||
	Parallel() bool
 | 
			
		||||
	Read(duration time.Duration, output chan lp.CCMetric) // Read metrics from metric collector
 | 
			
		||||
	Read(duration time.Duration, output chan lp.CCMessage) // Read metrics from metric collector
 | 
			
		||||
	Close()                                               // Close / finish metric collector
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const NETSTATFILE = "/proc/net/dev"
 | 
			
		||||
@@ -153,7 +153,7 @@ func (m *NetstatCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -197,14 +197,14 @@ func (m *NetstatCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
					continue
 | 
			
		||||
				}
 | 
			
		||||
				if m.config.SendAbsoluteValues {
 | 
			
		||||
					if y, err := lp.New(metric.name, metric.tags, metric.meta, map[string]interface{}{"value": v}, now); err == nil {
 | 
			
		||||
					if y, err := lp.NewMessage(metric.name, metric.tags, metric.meta, map[string]interface{}{"value": v}, now); err == nil {
 | 
			
		||||
						output <- y
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
				if m.config.SendDerivedValues {
 | 
			
		||||
					if metric.lastValue >= 0 {
 | 
			
		||||
						rate := float64(v-metric.lastValue) / timeDiff
 | 
			
		||||
						if y, err := lp.New(metric.name+"_bw", metric.tags, metric.meta_rates, map[string]interface{}{"value": rate}, now); err == nil {
 | 
			
		||||
						if y, err := lp.NewMessage(metric.name+"_bw", metric.tags, metric.meta_rates, map[string]interface{}{"value": rate}, now); err == nil {
 | 
			
		||||
							output <- y
 | 
			
		||||
						}
 | 
			
		||||
					}
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,7 @@ import (
 | 
			
		||||
	"strings"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// First part contains the code for the general NfsCollector.
 | 
			
		||||
@@ -118,7 +118,7 @@ func (m *nfsCollector) MainInit(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -140,7 +140,7 @@ func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		value := data.current - data.last
 | 
			
		||||
		y, err := lp.New(fmt.Sprintf("%s_%s", prefix, name), m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
		y, err := lp.NewMessage(fmt.Sprintf("%s_%s", prefix, name), m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			y.AddMeta("version", m.version)
 | 
			
		||||
			output <- y
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// These are the fields we read from the JSON configuration
 | 
			
		||||
@@ -114,7 +114,7 @@ func (m *NfsIOStatCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	timestamp := time.Now()
 | 
			
		||||
 | 
			
		||||
	// Get the current values for all mountpoints
 | 
			
		||||
@@ -126,7 +126,7 @@ func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMetri
 | 
			
		||||
			// Calculate the difference of old and new values
 | 
			
		||||
			for i := range values {
 | 
			
		||||
				x := values[i] - old[i]
 | 
			
		||||
				y, err := lp.New(fmt.Sprintf("nfsio_%s", i), m.tags, m.meta, map[string]interface{}{"value": x}, timestamp)
 | 
			
		||||
				y, err := lp.NewMessage(fmt.Sprintf("nfsio_%s", i), m.tags, m.meta, map[string]interface{}{"value": x}, timestamp)
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					if strings.HasPrefix(i, "page") {
 | 
			
		||||
						y.AddMeta("unit", "4K_Pages")
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Non-Uniform Memory Access (NUMA) policy hit/miss statistics
 | 
			
		||||
@@ -97,7 +97,7 @@ func (m *NUMAStatsCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -130,7 +130,7 @@ func (m *NUMAStatsCollector) Read(interval time.Duration, output chan lp.CCMetri
 | 
			
		||||
					fmt.Sprintf("Read(): Failed to convert %s='%s' to int64: %v", key, split[1], err))
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			y, err := lp.New(
 | 
			
		||||
			y, err := lp.NewMessage(
 | 
			
		||||
				"numastats_"+key,
 | 
			
		||||
				t.tagSet,
 | 
			
		||||
				m.meta,
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
	"github.com/NVIDIA/go-nvml/pkg/nvml"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -206,7 +206,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_fb_mem_total"] || !device.excludeMetrics["nv_fb_mem_used"] || !device.excludeMetrics["nv_fb_mem_reserved"] {
 | 
			
		||||
		var total uint64
 | 
			
		||||
		var used uint64
 | 
			
		||||
@@ -222,7 +222,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
 | 
			
		||||
		if !device.excludeMetrics["nv_fb_mem_total"] {
 | 
			
		||||
			t := float64(total) / (1024 * 1024)
 | 
			
		||||
			y, err := lp.New("nv_fb_mem_total", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_fb_mem_total", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MByte")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -231,7 +231,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
 | 
			
		||||
		if !device.excludeMetrics["nv_fb_mem_used"] {
 | 
			
		||||
			f := float64(used) / (1024 * 1024)
 | 
			
		||||
			y, err := lp.New("nv_fb_mem_used", device.tags, device.meta, map[string]interface{}{"value": f}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_fb_mem_used", device.tags, device.meta, map[string]interface{}{"value": f}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MByte")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -240,7 +240,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
 | 
			
		||||
		if v2 && !device.excludeMetrics["nv_fb_mem_reserved"] {
 | 
			
		||||
			r := float64(reserved) / (1024 * 1024)
 | 
			
		||||
			y, err := lp.New("nv_fb_mem_reserved", device.tags, device.meta, map[string]interface{}{"value": r}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_fb_mem_reserved", device.tags, device.meta, map[string]interface{}{"value": r}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MByte")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -250,7 +250,7 @@ func readMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_bar1_mem_total"] || !device.excludeMetrics["nv_bar1_mem_used"] {
 | 
			
		||||
		meminfo, ret := nvml.DeviceGetBAR1MemoryInfo(device.device)
 | 
			
		||||
		if ret != nvml.SUCCESS {
 | 
			
		||||
@@ -259,7 +259,7 @@ func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) er
 | 
			
		||||
		}
 | 
			
		||||
		if !device.excludeMetrics["nv_bar1_mem_total"] {
 | 
			
		||||
			t := float64(meminfo.Bar1Total) / (1024 * 1024)
 | 
			
		||||
			y, err := lp.New("nv_bar1_mem_total", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_bar1_mem_total", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MByte")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -267,7 +267,7 @@ func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) er
 | 
			
		||||
		}
 | 
			
		||||
		if !device.excludeMetrics["nv_bar1_mem_used"] {
 | 
			
		||||
			t := float64(meminfo.Bar1Used) / (1024 * 1024)
 | 
			
		||||
			y, err := lp.New("nv_bar1_mem_used", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_bar1_mem_used", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MByte")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -277,7 +277,7 @@ func readBarMemoryInfo(device NvidiaCollectorDevice, output chan lp.CCMetric) er
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
 | 
			
		||||
	if ret != nvml.SUCCESS {
 | 
			
		||||
		err := errors.New(nvml.ErrorString(ret))
 | 
			
		||||
@@ -301,14 +301,14 @@ func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
 | 
			
		||||
		util, ret := nvml.DeviceGetUtilizationRates(device.device)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			if !device.excludeMetrics["nv_util"] {
 | 
			
		||||
				y, err := lp.New("nv_util", device.tags, device.meta, map[string]interface{}{"value": float64(util.Gpu)}, time.Now())
 | 
			
		||||
				y, err := lp.NewMessage("nv_util", device.tags, device.meta, map[string]interface{}{"value": float64(util.Gpu)}, time.Now())
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					y.AddMeta("unit", "%")
 | 
			
		||||
					output <- y
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			if !device.excludeMetrics["nv_mem_util"] {
 | 
			
		||||
				y, err := lp.New("nv_mem_util", device.tags, device.meta, map[string]interface{}{"value": float64(util.Memory)}, time.Now())
 | 
			
		||||
				y, err := lp.NewMessage("nv_mem_util", device.tags, device.meta, map[string]interface{}{"value": float64(util.Memory)}, time.Now())
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					y.AddMeta("unit", "%")
 | 
			
		||||
					output <- y
 | 
			
		||||
@@ -319,7 +319,7 @@ func readUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readTemp(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readTemp(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_temp"] {
 | 
			
		||||
		// Retrieves the current temperature readings for the device, in degrees C.
 | 
			
		||||
		//
 | 
			
		||||
@@ -328,7 +328,7 @@ func readTemp(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
		// * NVML_TEMPERATURE_COUNT
 | 
			
		||||
		temp, ret := nvml.DeviceGetTemperature(device.device, nvml.TEMPERATURE_GPU)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_temp", device.tags, device.meta, map[string]interface{}{"value": float64(temp)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_temp", device.tags, device.meta, map[string]interface{}{"value": float64(temp)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "degC")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -338,7 +338,7 @@ func readTemp(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readFan(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readFan(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_fan"] {
 | 
			
		||||
		// Retrieves the intended operating speed of the device's fan.
 | 
			
		||||
		//
 | 
			
		||||
@@ -351,7 +351,7 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
		// This value may exceed 100% in certain cases.
 | 
			
		||||
		fan, ret := nvml.DeviceGetFanSpeed(device.device)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_fan", device.tags, device.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_fan", device.tags, device.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "%")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -361,14 +361,14 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// func readFans(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
// func readFans(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
// 	if !device.excludeMetrics["nv_fan"] {
 | 
			
		||||
// 		numFans, ret := nvml.DeviceGetNumFans(device.device)
 | 
			
		||||
// 		if ret == nvml.SUCCESS {
 | 
			
		||||
// 			for i := 0; i < numFans; i++ {
 | 
			
		||||
// 				fan, ret := nvml.DeviceGetFanSpeed_v2(device.device, i)
 | 
			
		||||
// 				if ret == nvml.SUCCESS {
 | 
			
		||||
// 					y, err := lp.New("nv_fan", device.tags, device.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
 | 
			
		||||
// 					y, err := lp.NewMessage("nv_fan", device.tags, device.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
 | 
			
		||||
// 					if err == nil {
 | 
			
		||||
// 						y.AddMeta("unit", "%")
 | 
			
		||||
// 						y.AddTag("stype", "fan")
 | 
			
		||||
@@ -382,7 +382,7 @@ func readFan(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
// 	return nil
 | 
			
		||||
// }
 | 
			
		||||
 | 
			
		||||
func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_ecc_mode"] {
 | 
			
		||||
		// Retrieves the current and pending ECC modes for the device.
 | 
			
		||||
		//
 | 
			
		||||
@@ -393,21 +393,21 @@ func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
		// The "pending" ECC mode refers to the target mode following the next reboot.
 | 
			
		||||
		_, ecc_pend, ret := nvml.DeviceGetEccMode(device.device)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			var y lp.CCMetric
 | 
			
		||||
			var y lp.CCMessage
 | 
			
		||||
			var err error
 | 
			
		||||
			switch ecc_pend {
 | 
			
		||||
			case nvml.FEATURE_DISABLED:
 | 
			
		||||
				y, err = lp.New("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "OFF"}, time.Now())
 | 
			
		||||
				y, err = lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "OFF"}, time.Now())
 | 
			
		||||
			case nvml.FEATURE_ENABLED:
 | 
			
		||||
				y, err = lp.New("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "ON"}, time.Now())
 | 
			
		||||
				y, err = lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "ON"}, time.Now())
 | 
			
		||||
			default:
 | 
			
		||||
				y, err = lp.New("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "UNKNOWN"}, time.Now())
 | 
			
		||||
				y, err = lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "UNKNOWN"}, time.Now())
 | 
			
		||||
			}
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		} else if ret == nvml.ERROR_NOT_SUPPORTED {
 | 
			
		||||
			y, err := lp.New("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "N/A"}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]interface{}{"value": "N/A"}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
@@ -416,7 +416,7 @@ func readEccMode(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_perf_state"] {
 | 
			
		||||
		// Retrieves the current performance state for the device.
 | 
			
		||||
		//
 | 
			
		||||
@@ -427,7 +427,7 @@ func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
		// 32: Unknown performance state.
 | 
			
		||||
		pState, ret := nvml.DeviceGetPerformanceState(device.device)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_perf_state", device.tags, device.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pState))}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_perf_state", device.tags, device.meta, map[string]interface{}{"value": fmt.Sprintf("P%d", int(pState))}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
@@ -436,7 +436,7 @@ func readPerfState(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_power_usage"] {
 | 
			
		||||
		// Retrieves power usage for this GPU in milliwatts and its associated circuitry (e.g. memory)
 | 
			
		||||
		//
 | 
			
		||||
@@ -450,7 +450,7 @@ func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
		if mode == nvml.FEATURE_ENABLED {
 | 
			
		||||
			power, ret := nvml.DeviceGetPowerUsage(device.device)
 | 
			
		||||
			if ret == nvml.SUCCESS {
 | 
			
		||||
				y, err := lp.New("nv_power_usage", device.tags, device.meta, map[string]interface{}{"value": float64(power) / 1000}, time.Now())
 | 
			
		||||
				y, err := lp.NewMessage("nv_power_usage", device.tags, device.meta, map[string]interface{}{"value": float64(power) / 1000}, time.Now())
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					y.AddMeta("unit", "watts")
 | 
			
		||||
					output <- y
 | 
			
		||||
@@ -461,7 +461,7 @@ func readPowerUsage(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	// Retrieves the current clock speeds for the device.
 | 
			
		||||
	//
 | 
			
		||||
	// Available clock information:
 | 
			
		||||
@@ -471,7 +471,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_graphics_clock"] {
 | 
			
		||||
		graphicsClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_GRAPHICS)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(graphicsClock)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(graphicsClock)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MHz")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -482,7 +482,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_sm_clock"] {
 | 
			
		||||
		smCock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(smCock)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(smCock)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MHz")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -493,7 +493,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_mem_clock"] {
 | 
			
		||||
		memClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(memClock)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(memClock)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MHz")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -503,7 +503,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_video_clock"] {
 | 
			
		||||
		memClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(memClock)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(memClock)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MHz")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -513,7 +513,7 @@ func readClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	// Retrieves the maximum clock speeds for the device.
 | 
			
		||||
	//
 | 
			
		||||
	// Available clock information:
 | 
			
		||||
@@ -528,7 +528,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	if !device.excludeMetrics["nv_max_graphics_clock"] {
 | 
			
		||||
		max_gclk, ret := nvml.DeviceGetMaxClockInfo(device.device, nvml.CLOCK_GRAPHICS)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_max_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_max_graphics_clock", device.tags, device.meta, map[string]interface{}{"value": float64(max_gclk)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MHz")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -539,7 +539,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	if !device.excludeMetrics["nv_max_sm_clock"] {
 | 
			
		||||
		maxSmClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_max_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxSmClock)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_max_sm_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxSmClock)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MHz")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -550,7 +550,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	if !device.excludeMetrics["nv_max_mem_clock"] {
 | 
			
		||||
		maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_max_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_max_mem_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MHz")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -561,7 +561,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	if !device.excludeMetrics["nv_max_video_clock"] {
 | 
			
		||||
		maxMemClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_max_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_max_video_clock", device.tags, device.meta, map[string]interface{}{"value": float64(maxMemClock)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "MHz")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -571,7 +571,7 @@ func readMaxClocks(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_ecc_uncorrected_error"] {
 | 
			
		||||
		// Retrieves the total ECC error counts for the device.
 | 
			
		||||
		//
 | 
			
		||||
@@ -584,7 +584,7 @@ func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
		// i.e. the total set of errors across the entire device.
 | 
			
		||||
		ecc_db, ret := nvml.DeviceGetTotalEccErrors(device.device, nvml.MEMORY_ERROR_TYPE_UNCORRECTED, nvml.AGGREGATE_ECC)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_ecc_uncorrected_error", device.tags, device.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_ecc_uncorrected_error", device.tags, device.meta, map[string]interface{}{"value": float64(ecc_db)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
@@ -593,7 +593,7 @@ func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	if !device.excludeMetrics["nv_ecc_corrected_error"] {
 | 
			
		||||
		ecc_sb, ret := nvml.DeviceGetTotalEccErrors(device.device, nvml.MEMORY_ERROR_TYPE_CORRECTED, nvml.AGGREGATE_ECC)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_ecc_corrected_error", device.tags, device.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_ecc_corrected_error", device.tags, device.meta, map[string]interface{}{"value": float64(ecc_sb)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
@@ -602,7 +602,7 @@ func readEccErrors(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_power_max_limit"] {
 | 
			
		||||
		// Retrieves the power management limit associated with this device.
 | 
			
		||||
		//
 | 
			
		||||
@@ -612,7 +612,7 @@ func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
		// If the card's total power draw reaches this limit the power management algorithm kicks in.
 | 
			
		||||
		pwr_limit, ret := nvml.DeviceGetPowerManagementLimit(device.device)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_power_max_limit", device.tags, device.meta, map[string]interface{}{"value": float64(pwr_limit) / 1000}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_power_max_limit", device.tags, device.meta, map[string]interface{}{"value": float64(pwr_limit) / 1000}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "watts")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -622,7 +622,7 @@ func readPowerLimit(device NvidiaCollectorDevice, output chan lp.CCMetric) error
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
 | 
			
		||||
	if ret != nvml.SUCCESS {
 | 
			
		||||
		err := errors.New(nvml.ErrorString(ret))
 | 
			
		||||
@@ -639,7 +639,7 @@ func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
		// Note: On MIG-enabled GPUs, querying encoder utilization is not currently supported.
 | 
			
		||||
		enc_util, _, ret := nvml.DeviceGetEncoderUtilization(device.device)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_encoder_util", device.tags, device.meta, map[string]interface{}{"value": float64(enc_util)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_encoder_util", device.tags, device.meta, map[string]interface{}{"value": float64(enc_util)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "%")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -649,7 +649,7 @@ func readEncUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	isMig, ret := nvml.DeviceIsMigDeviceHandle(device.device)
 | 
			
		||||
	if ret != nvml.SUCCESS {
 | 
			
		||||
		err := errors.New(nvml.ErrorString(ret))
 | 
			
		||||
@@ -666,7 +666,7 @@ func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
		// Note: On MIG-enabled GPUs, querying encoder utilization is not currently supported.
 | 
			
		||||
		dec_util, _, ret := nvml.DeviceGetDecoderUtilization(device.device)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_decoder_util", device.tags, device.meta, map[string]interface{}{"value": float64(dec_util)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_decoder_util", device.tags, device.meta, map[string]interface{}{"value": float64(dec_util)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "%")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -676,7 +676,7 @@ func readDecUtilization(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_remapped_rows_corrected"] ||
 | 
			
		||||
		!device.excludeMetrics["nv_remapped_rows_uncorrected"] ||
 | 
			
		||||
		!device.excludeMetrics["nv_remapped_rows_pending"] ||
 | 
			
		||||
@@ -693,13 +693,13 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMetric) err
 | 
			
		||||
		corrected, uncorrected, pending, failure, ret := nvml.DeviceGetRemappedRows(device.device)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			if !device.excludeMetrics["nv_remapped_rows_corrected"] {
 | 
			
		||||
				y, err := lp.New("nv_remapped_rows_corrected", device.tags, device.meta, map[string]interface{}{"value": float64(corrected)}, time.Now())
 | 
			
		||||
				y, err := lp.NewMessage("nv_remapped_rows_corrected", device.tags, device.meta, map[string]interface{}{"value": float64(corrected)}, time.Now())
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					output <- y
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			if !device.excludeMetrics["nv_remapped_rows_uncorrected"] {
 | 
			
		||||
				y, err := lp.New("nv_remapped_rows_corrected", device.tags, device.meta, map[string]interface{}{"value": float64(uncorrected)}, time.Now())
 | 
			
		||||
				y, err := lp.NewMessage("nv_remapped_rows_corrected", device.tags, device.meta, map[string]interface{}{"value": float64(uncorrected)}, time.Now())
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					output <- y
 | 
			
		||||
				}
 | 
			
		||||
@@ -709,7 +709,7 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMetric) err
 | 
			
		||||
				if pending {
 | 
			
		||||
					p = 1
 | 
			
		||||
				}
 | 
			
		||||
				y, err := lp.New("nv_remapped_rows_pending", device.tags, device.meta, map[string]interface{}{"value": p}, time.Now())
 | 
			
		||||
				y, err := lp.NewMessage("nv_remapped_rows_pending", device.tags, device.meta, map[string]interface{}{"value": p}, time.Now())
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					output <- y
 | 
			
		||||
				}
 | 
			
		||||
@@ -719,7 +719,7 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMetric) err
 | 
			
		||||
				if failure {
 | 
			
		||||
					f = 1
 | 
			
		||||
				}
 | 
			
		||||
				y, err := lp.New("nv_remapped_rows_failure", device.tags, device.meta, map[string]interface{}{"value": f}, time.Now())
 | 
			
		||||
				y, err := lp.NewMessage("nv_remapped_rows_failure", device.tags, device.meta, map[string]interface{}{"value": f}, time.Now())
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					output <- y
 | 
			
		||||
				}
 | 
			
		||||
@@ -729,7 +729,7 @@ func readRemappedRows(device NvidiaCollectorDevice, output chan lp.CCMetric) err
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	if !device.excludeMetrics["nv_compute_processes"] {
 | 
			
		||||
		// Get information about processes with a compute context on a device
 | 
			
		||||
		//
 | 
			
		||||
@@ -753,7 +753,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMetric) er
 | 
			
		||||
		//        Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
 | 
			
		||||
		procList, ret := nvml.DeviceGetComputeRunningProcesses(device.device)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_compute_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_compute_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
@@ -782,7 +782,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMetric) er
 | 
			
		||||
		//       Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
 | 
			
		||||
		procList, ret := nvml.DeviceGetGraphicsRunningProcesses(device.device)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			y, err := lp.New("nv_graphics_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_graphics_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
@@ -812,7 +812,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMetric) er
 | 
			
		||||
	// 	//       Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
 | 
			
		||||
	// 	procList, ret := nvml.DeviceGetMPSComputeRunningProcesses(device.device)
 | 
			
		||||
	// 	if ret == nvml.SUCCESS {
 | 
			
		||||
	// 		y, err := lp.New("nv_mps_compute_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
 | 
			
		||||
	// 		y, err := lp.NewMessage("nv_mps_compute_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
 | 
			
		||||
	// 		if err == nil {
 | 
			
		||||
	// 			output <- y
 | 
			
		||||
	// 		}
 | 
			
		||||
@@ -821,7 +821,7 @@ func readProcessCounts(device NvidiaCollectorDevice, output chan lp.CCMetric) er
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	var violTime nvml.ViolationTime
 | 
			
		||||
	var ret nvml.Return
 | 
			
		||||
 | 
			
		||||
@@ -840,7 +840,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
		violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_POWER)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			t := float64(violTime.ViolationTime) * 1e-9
 | 
			
		||||
			y, err := lp.New("nv_violation_power", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_violation_power", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "sec")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -852,7 +852,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
		violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_THERMAL)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			t := float64(violTime.ViolationTime) * 1e-9
 | 
			
		||||
			y, err := lp.New("nv_violation_thermal", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_violation_thermal", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "sec")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -864,7 +864,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
		violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_SYNC_BOOST)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			t := float64(violTime.ViolationTime) * 1e-9
 | 
			
		||||
			y, err := lp.New("nv_violation_sync_boost", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_violation_sync_boost", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "sec")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -876,7 +876,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
		violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_BOARD_LIMIT)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			t := float64(violTime.ViolationTime) * 1e-9
 | 
			
		||||
			y, err := lp.New("nv_violation_board_limit", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_violation_board_limit", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "sec")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -888,7 +888,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
		violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_LOW_UTILIZATION)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			t := float64(violTime.ViolationTime) * 1e-9
 | 
			
		||||
			y, err := lp.New("nv_violation_low_util", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_violation_low_util", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "sec")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -900,7 +900,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
		violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_RELIABILITY)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			t := float64(violTime.ViolationTime) * 1e-9
 | 
			
		||||
			y, err := lp.New("nv_violation_reliability", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_violation_reliability", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "sec")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -912,7 +912,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
		violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_TOTAL_APP_CLOCKS)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			t := float64(violTime.ViolationTime) * 1e-9
 | 
			
		||||
			y, err := lp.New("nv_violation_below_app_clock", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_violation_below_app_clock", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "sec")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -924,7 +924,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
		violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_TOTAL_BASE_CLOCKS)
 | 
			
		||||
		if ret == nvml.SUCCESS {
 | 
			
		||||
			t := float64(violTime.ViolationTime) * 1e-9
 | 
			
		||||
			y, err := lp.New("nv_violation_below_base_clock", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			y, err := lp.NewMessage("nv_violation_below_base_clock", device.tags, device.meta, map[string]interface{}{"value": t}, time.Now())
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "sec")
 | 
			
		||||
				output <- y
 | 
			
		||||
@@ -935,7 +935,7 @@ func readViolationStats(device NvidiaCollectorDevice, output chan lp.CCMetric) e
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) error {
 | 
			
		||||
func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMessage) error {
 | 
			
		||||
	// Retrieves the specified error counter value
 | 
			
		||||
	// Please refer to \a nvmlNvLinkErrorCounter_t for error counters that are available
 | 
			
		||||
	//
 | 
			
		||||
@@ -956,7 +956,7 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
 | 
			
		||||
					count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_CRC_DATA)
 | 
			
		||||
					aggregate_crc_errors = aggregate_crc_errors + count
 | 
			
		||||
					if ret == nvml.SUCCESS {
 | 
			
		||||
						y, err := lp.New("nv_nvlink_crc_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
 | 
			
		||||
						y, err := lp.NewMessage("nv_nvlink_crc_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
 | 
			
		||||
						if err == nil {
 | 
			
		||||
							y.AddTag("stype", "nvlink")
 | 
			
		||||
							y.AddTag("stype-id", fmt.Sprintf("%d", i))
 | 
			
		||||
@@ -969,7 +969,7 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
 | 
			
		||||
					count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_ECC_DATA)
 | 
			
		||||
					aggregate_ecc_errors = aggregate_ecc_errors + count
 | 
			
		||||
					if ret == nvml.SUCCESS {
 | 
			
		||||
						y, err := lp.New("nv_nvlink_ecc_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
 | 
			
		||||
						y, err := lp.NewMessage("nv_nvlink_ecc_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
 | 
			
		||||
						if err == nil {
 | 
			
		||||
							y.AddTag("stype", "nvlink")
 | 
			
		||||
							y.AddTag("stype-id", fmt.Sprintf("%d", i))
 | 
			
		||||
@@ -982,7 +982,7 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
 | 
			
		||||
					count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_REPLAY)
 | 
			
		||||
					aggregate_replay_errors = aggregate_replay_errors + count
 | 
			
		||||
					if ret == nvml.SUCCESS {
 | 
			
		||||
						y, err := lp.New("nv_nvlink_replay_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
 | 
			
		||||
						y, err := lp.NewMessage("nv_nvlink_replay_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
 | 
			
		||||
						if err == nil {
 | 
			
		||||
							y.AddTag("stype", "nvlink")
 | 
			
		||||
							y.AddTag("stype-id", fmt.Sprintf("%d", i))
 | 
			
		||||
@@ -995,7 +995,7 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
 | 
			
		||||
					count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_RECOVERY)
 | 
			
		||||
					aggregate_recovery_errors = aggregate_recovery_errors + count
 | 
			
		||||
					if ret == nvml.SUCCESS {
 | 
			
		||||
						y, err := lp.New("nv_nvlink_recovery_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
 | 
			
		||||
						y, err := lp.NewMessage("nv_nvlink_recovery_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
 | 
			
		||||
						if err == nil {
 | 
			
		||||
							y.AddTag("stype", "nvlink")
 | 
			
		||||
							y.AddTag("stype-id", fmt.Sprintf("%d", i))
 | 
			
		||||
@@ -1008,7 +1008,7 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
 | 
			
		||||
					count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_CRC_FLIT)
 | 
			
		||||
					aggregate_crc_flit_errors = aggregate_crc_flit_errors + count
 | 
			
		||||
					if ret == nvml.SUCCESS {
 | 
			
		||||
						y, err := lp.New("nv_nvlink_crc_flit_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
 | 
			
		||||
						y, err := lp.NewMessage("nv_nvlink_crc_flit_errors", device.tags, device.meta, map[string]interface{}{"value": count}, time.Now())
 | 
			
		||||
						if err == nil {
 | 
			
		||||
							y.AddTag("stype", "nvlink")
 | 
			
		||||
							y.AddTag("stype-id", fmt.Sprintf("%d", i))
 | 
			
		||||
@@ -1064,13 +1064,13 @@ func readNVLinkStats(device NvidiaCollectorDevice, output chan lp.CCMetric) erro
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	var err error
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	readAll := func(device NvidiaCollectorDevice, output chan lp.CCMetric) {
 | 
			
		||||
	readAll := func(device NvidiaCollectorDevice, output chan lp.CCMessage) {
 | 
			
		||||
		name, ret := nvml.DeviceGetName(device.device)
 | 
			
		||||
		if ret != nvml.SUCCESS {
 | 
			
		||||
			name = "NoName"
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// running average power limit (RAPL) monitoring attributes for a zone
 | 
			
		||||
@@ -214,7 +214,7 @@ func (m *RAPLCollector) Init(config json.RawMessage) error {
 | 
			
		||||
 | 
			
		||||
// Read reads running average power limit (RAPL) monitoring attributes for all initialized zones
 | 
			
		||||
// See: https://www.kernel.org/doc/html/latest/power/powercap/powercap.html#monitoring-attributes
 | 
			
		||||
func (m *RAPLCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *RAPLCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
 | 
			
		||||
	for i := range m.RAPLZoneInfo {
 | 
			
		||||
		p := &m.RAPLZoneInfo[i]
 | 
			
		||||
@@ -237,7 +237,7 @@ func (m *RAPLCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
				timeDiff := energyTimestamp.Sub(p.energyTimestamp)
 | 
			
		||||
				averagePower := float64(energyDiff) / float64(timeDiff.Microseconds())
 | 
			
		||||
 | 
			
		||||
				y, err := lp.New(
 | 
			
		||||
				y, err := lp.NewMessage(
 | 
			
		||||
					"rapl_average_power",
 | 
			
		||||
					p.tags,
 | 
			
		||||
					m.meta,
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
	"github.com/ClusterCockpit/go-rocm-smi/pkg/rocm_smi"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -162,7 +162,7 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
 | 
			
		||||
 | 
			
		||||
// Read collects all metrics belonging to the sample collector
 | 
			
		||||
// and sends them through the output channel to the collector manager
 | 
			
		||||
func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	// Create a sample metric
 | 
			
		||||
	timestamp := time.Now()
 | 
			
		||||
 | 
			
		||||
@@ -175,119 +175,119 @@ func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
 | 
			
		||||
		if !dev.excludeMetrics["rocm_gfx_util"] {
 | 
			
		||||
			value := metrics.Average_gfx_activity
 | 
			
		||||
			y, err := lp.New("rocm_gfx_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_gfx_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_umc_util"] {
 | 
			
		||||
			value := metrics.Average_umc_activity
 | 
			
		||||
			y, err := lp.New("rocm_umc_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_umc_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_mm_util"] {
 | 
			
		||||
			value := metrics.Average_mm_activity
 | 
			
		||||
			y, err := lp.New("rocm_mm_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_mm_util", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_avg_power"] {
 | 
			
		||||
			value := metrics.Average_socket_power
 | 
			
		||||
			y, err := lp.New("rocm_avg_power", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_avg_power", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_temp_mem"] {
 | 
			
		||||
			value := metrics.Temperature_mem
 | 
			
		||||
			y, err := lp.New("rocm_temp_mem", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_temp_mem", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_temp_hotspot"] {
 | 
			
		||||
			value := metrics.Temperature_hotspot
 | 
			
		||||
			y, err := lp.New("rocm_temp_hotspot", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_temp_hotspot", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_temp_edge"] {
 | 
			
		||||
			value := metrics.Temperature_edge
 | 
			
		||||
			y, err := lp.New("rocm_temp_edge", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_temp_edge", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_temp_vrgfx"] {
 | 
			
		||||
			value := metrics.Temperature_vrgfx
 | 
			
		||||
			y, err := lp.New("rocm_temp_vrgfx", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_temp_vrgfx", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_temp_vrsoc"] {
 | 
			
		||||
			value := metrics.Temperature_vrsoc
 | 
			
		||||
			y, err := lp.New("rocm_temp_vrsoc", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_temp_vrsoc", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_temp_vrmem"] {
 | 
			
		||||
			value := metrics.Temperature_vrmem
 | 
			
		||||
			y, err := lp.New("rocm_temp_vrmem", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_temp_vrmem", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_gfx_clock"] {
 | 
			
		||||
			value := metrics.Average_gfxclk_frequency
 | 
			
		||||
			y, err := lp.New("rocm_gfx_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_gfx_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_soc_clock"] {
 | 
			
		||||
			value := metrics.Average_socclk_frequency
 | 
			
		||||
			y, err := lp.New("rocm_soc_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_soc_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_u_clock"] {
 | 
			
		||||
			value := metrics.Average_uclk_frequency
 | 
			
		||||
			y, err := lp.New("rocm_u_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_u_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_v0_clock"] {
 | 
			
		||||
			value := metrics.Average_vclk0_frequency
 | 
			
		||||
			y, err := lp.New("rocm_v0_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_v0_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_v1_clock"] {
 | 
			
		||||
			value := metrics.Average_vclk1_frequency
 | 
			
		||||
			y, err := lp.New("rocm_v1_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_v1_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_d0_clock"] {
 | 
			
		||||
			value := metrics.Average_dclk0_frequency
 | 
			
		||||
			y, err := lp.New("rocm_d0_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_d0_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if !dev.excludeMetrics["rocm_d1_clock"] {
 | 
			
		||||
			value := metrics.Average_dclk1_frequency
 | 
			
		||||
			y, err := lp.New("rocm_d1_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rocm_d1_clock", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
@@ -295,7 +295,7 @@ func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
		if !dev.excludeMetrics["rocm_temp_hbm"] {
 | 
			
		||||
			for i := 0; i < rocm_smi.NUM_HBM_INSTANCES; i++ {
 | 
			
		||||
				value := metrics.Temperature_hbm[i]
 | 
			
		||||
				y, err := lp.New("rocm_temp_hbm", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
				y, err := lp.NewMessage("rocm_temp_hbm", dev.tags, dev.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
				if err == nil {
 | 
			
		||||
					y.AddTag("stype", "device")
 | 
			
		||||
					y.AddTag("stype-id", fmt.Sprintf("%d", i))
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// These are the fields we read from the JSON configuration
 | 
			
		||||
@@ -74,7 +74,7 @@ func (m *SampleCollector) Init(config json.RawMessage) error {
 | 
			
		||||
 | 
			
		||||
// Read collects all metrics belonging to the sample collector
 | 
			
		||||
// and sends them through the output channel to the collector manager
 | 
			
		||||
func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	// Create a sample metric
 | 
			
		||||
	timestamp := time.Now()
 | 
			
		||||
 | 
			
		||||
@@ -85,7 +85,7 @@ func (m *SampleCollector) Read(interval time.Duration, output chan lp.CCMetric)
 | 
			
		||||
	// stop := readState()
 | 
			
		||||
	// value = (stop - start) / interval.Seconds()
 | 
			
		||||
 | 
			
		||||
	y, err := lp.New("sample_metric", m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
	y, err := lp.NewMessage("sample_metric", m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		// Send it to output channel
 | 
			
		||||
		output <- y
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// These are the fields we read from the JSON configuration
 | 
			
		||||
@@ -25,7 +25,7 @@ type SampleTimerCollector struct {
 | 
			
		||||
	config   SampleTimerCollectorConfig // the configuration structure
 | 
			
		||||
	interval time.Duration              // the interval parsed from configuration
 | 
			
		||||
	ticker   *time.Ticker               // own timer
 | 
			
		||||
	output   chan lp.CCMetric           // own internal output channel
 | 
			
		||||
	output   chan lp.CCMessage           // own internal output channel
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *SampleTimerCollector) Init(name string, config json.RawMessage) error {
 | 
			
		||||
@@ -100,14 +100,14 @@ func (m *SampleTimerCollector) ReadMetrics(timestamp time.Time) {
 | 
			
		||||
	// stop := readState()
 | 
			
		||||
	// value = (stop - start) / interval.Seconds()
 | 
			
		||||
 | 
			
		||||
	y, err := lp.New("sample_metric", m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
	y, err := lp.NewMessage("sample_metric", m.tags, m.meta, map[string]interface{}{"value": value}, timestamp)
 | 
			
		||||
	if err == nil && m.output != nil {
 | 
			
		||||
		// Send it to output channel if we have a valid channel
 | 
			
		||||
		m.output <- y
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *SampleTimerCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *SampleTimerCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	// Capture output channel
 | 
			
		||||
	m.output = output
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const SCHEDSTATFILE = `/proc/schedstat`
 | 
			
		||||
@@ -96,7 +96,7 @@ func (m *SchedstatCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]string, output chan lp.CCMetric, now time.Time, tsdelta time.Duration) {
 | 
			
		||||
func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]string, output chan lp.CCMessage, now time.Time, tsdelta time.Duration) {
 | 
			
		||||
	running, _ := strconv.ParseInt(linefields[7], 10, 64)
 | 
			
		||||
	waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
 | 
			
		||||
	diff_running := running - m.olddata[linefields[0]]["running"]
 | 
			
		||||
@@ -109,7 +109,7 @@ func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]
 | 
			
		||||
	m.olddata[linefields[0]]["waiting"] = waiting
 | 
			
		||||
	value := l_running + l_waiting
 | 
			
		||||
 | 
			
		||||
	y, err := lp.New("cpu_load_core", tags, m.meta, map[string]interface{}{"value": value}, now)
 | 
			
		||||
	y, err := lp.NewMessage("cpu_load_core", tags, m.meta, map[string]interface{}{"value": value}, now)
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		// Send it to output channel
 | 
			
		||||
		output <- y
 | 
			
		||||
@@ -118,7 +118,7 @@ func (m *SchedstatCollector) ParseProcLine(linefields []string, tags map[string]
 | 
			
		||||
 | 
			
		||||
// Read collects all metrics belonging to the sample collector
 | 
			
		||||
// and sends them through the output channel to the collector manager
 | 
			
		||||
func (m *SchedstatCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *SchedstatCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type SelfCollectorConfig struct {
 | 
			
		||||
@@ -42,56 +42,56 @@ func (m *SelfCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *SelfCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *SelfCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	timestamp := time.Now()
 | 
			
		||||
 | 
			
		||||
	if m.config.MemStats {
 | 
			
		||||
		var memstats runtime.MemStats
 | 
			
		||||
		runtime.ReadMemStats(&memstats)
 | 
			
		||||
 | 
			
		||||
		y, err := lp.New("total_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.TotalAlloc}, timestamp)
 | 
			
		||||
		y, err := lp.NewMessage("total_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.TotalAlloc}, timestamp)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			y.AddMeta("unit", "Bytes")
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
		y, err = lp.New("heap_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapAlloc}, timestamp)
 | 
			
		||||
		y, err = lp.NewMessage("heap_alloc", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapAlloc}, timestamp)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			y.AddMeta("unit", "Bytes")
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
		y, err = lp.New("heap_sys", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapSys}, timestamp)
 | 
			
		||||
		y, err = lp.NewMessage("heap_sys", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapSys}, timestamp)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			y.AddMeta("unit", "Bytes")
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
		y, err = lp.New("heap_idle", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapIdle}, timestamp)
 | 
			
		||||
		y, err = lp.NewMessage("heap_idle", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapIdle}, timestamp)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			y.AddMeta("unit", "Bytes")
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
		y, err = lp.New("heap_inuse", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapInuse}, timestamp)
 | 
			
		||||
		y, err = lp.NewMessage("heap_inuse", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapInuse}, timestamp)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			y.AddMeta("unit", "Bytes")
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
		y, err = lp.New("heap_released", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapReleased}, timestamp)
 | 
			
		||||
		y, err = lp.NewMessage("heap_released", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapReleased}, timestamp)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			y.AddMeta("unit", "Bytes")
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
		y, err = lp.New("heap_objects", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapObjects}, timestamp)
 | 
			
		||||
		y, err = lp.NewMessage("heap_objects", m.tags, m.meta, map[string]interface{}{"value": memstats.HeapObjects}, timestamp)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if m.config.GoRoutines {
 | 
			
		||||
		y, err := lp.New("num_goroutines", m.tags, m.meta, map[string]interface{}{"value": runtime.NumGoroutine()}, timestamp)
 | 
			
		||||
		y, err := lp.NewMessage("num_goroutines", m.tags, m.meta, map[string]interface{}{"value": runtime.NumGoroutine()}, timestamp)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if m.config.CgoCalls {
 | 
			
		||||
		y, err := lp.New("num_cgo_calls", m.tags, m.meta, map[string]interface{}{"value": runtime.NumCgoCall()}, timestamp)
 | 
			
		||||
		y, err := lp.NewMessage("num_cgo_calls", m.tags, m.meta, map[string]interface{}{"value": runtime.NumCgoCall()}, timestamp)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
@@ -102,35 +102,35 @@ func (m *SelfCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			sec, nsec := rusage.Utime.Unix()
 | 
			
		||||
			t := float64(sec) + (float64(nsec) * 1e-9)
 | 
			
		||||
			y, err := lp.New("rusage_user_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
 | 
			
		||||
			y, err := lp.NewMessage("rusage_user_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "seconds")
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
			sec, nsec = rusage.Stime.Unix()
 | 
			
		||||
			t = float64(sec) + (float64(nsec) * 1e-9)
 | 
			
		||||
			y, err = lp.New("rusage_system_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
 | 
			
		||||
			y, err = lp.NewMessage("rusage_system_time", m.tags, m.meta, map[string]interface{}{"value": t}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				y.AddMeta("unit", "seconds")
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
			y, err = lp.New("rusage_vol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nvcsw}, timestamp)
 | 
			
		||||
			y, err = lp.NewMessage("rusage_vol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nvcsw}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
			y, err = lp.New("rusage_invol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nivcsw}, timestamp)
 | 
			
		||||
			y, err = lp.NewMessage("rusage_invol_ctx_switch", m.tags, m.meta, map[string]interface{}{"value": rusage.Nivcsw}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
			y, err = lp.New("rusage_signals", m.tags, m.meta, map[string]interface{}{"value": rusage.Nsignals}, timestamp)
 | 
			
		||||
			y, err = lp.NewMessage("rusage_signals", m.tags, m.meta, map[string]interface{}{"value": rusage.Nsignals}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
			y, err = lp.New("rusage_major_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Majflt}, timestamp)
 | 
			
		||||
			y, err = lp.NewMessage("rusage_major_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Majflt}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
			y, err = lp.New("rusage_minor_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Minflt}, timestamp)
 | 
			
		||||
			y, err = lp.NewMessage("rusage_minor_pgfaults", m.tags, m.meta, map[string]interface{}{"value": rusage.Minflt}, timestamp)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				output <- y
 | 
			
		||||
			}
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// See: https://www.kernel.org/doc/html/latest/hwmon/sysfs-interface.html
 | 
			
		||||
@@ -171,7 +171,7 @@ func (m *TempCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
 | 
			
		||||
	for _, sensor := range m.sensors {
 | 
			
		||||
		// Read sensor file
 | 
			
		||||
@@ -190,7 +190,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		x /= 1000
 | 
			
		||||
		y, err := lp.New(
 | 
			
		||||
		y, err := lp.NewMessage(
 | 
			
		||||
			sensor.metricName,
 | 
			
		||||
			sensor.tags,
 | 
			
		||||
			m.meta,
 | 
			
		||||
@@ -203,7 +203,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
 | 
			
		||||
		// max temperature
 | 
			
		||||
		if m.config.ReportMaxTemp && sensor.maxTemp != 0 {
 | 
			
		||||
			y, err := lp.New(
 | 
			
		||||
			y, err := lp.NewMessage(
 | 
			
		||||
				sensor.maxTempName,
 | 
			
		||||
				sensor.tags,
 | 
			
		||||
				m.meta,
 | 
			
		||||
@@ -217,7 +217,7 @@ func (m *TempCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
 | 
			
		||||
		// critical temperature
 | 
			
		||||
		if m.config.ReportCriticalTemp && sensor.critTemp != 0 {
 | 
			
		||||
			y, err := lp.New(
 | 
			
		||||
			y, err := lp.NewMessage(
 | 
			
		||||
				sensor.critTempName,
 | 
			
		||||
				sensor.tags,
 | 
			
		||||
				m.meta,
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@ import (
 | 
			
		||||
	"strings"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
 | 
			
		||||
	lp "github.com/ClusterCockpit/cc-energy-manager/pkg/cc-message"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const MAX_NUM_PROCS = 10
 | 
			
		||||
@@ -53,7 +53,7 @@ func (m *TopProcsCollector) Init(config json.RawMessage) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMetric) {
 | 
			
		||||
func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
 | 
			
		||||
	if !m.init {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
@@ -68,7 +68,7 @@ func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMetric
 | 
			
		||||
	lines := strings.Split(string(stdout), "\n")
 | 
			
		||||
	for i := 1; i < m.config.Num_procs+1; i++ {
 | 
			
		||||
		name := fmt.Sprintf("topproc%d", i)
 | 
			
		||||
		y, err := lp.New(name, m.tags, m.meta, map[string]interface{}{"value": string(lines[i])}, time.Now())
 | 
			
		||||
		y, err := lp.NewMessage(name, m.tags, m.meta, map[string]interface{}{"value": string(lines[i])}, time.Now())
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			output <- y
 | 
			
		||||
		}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user