mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2026-02-13 14:41:45 +01:00
* Replace fmt.Sprintf("%d", i)) by strconv.Itoa(i)
* Correct misspelled words * Remove unused code * Break up very long lines into multiple lines * lp.NewMessage -> lp.NewMetric
This commit is contained in:
2
.github/workflows/runonce.yml
vendored
2
.github/workflows/runonce.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
|||||||
# Running the linter requires likwid.h, which gets downloaded in the build step
|
# Running the linter requires likwid.h, which gets downloaded in the build step
|
||||||
- name: Static Analysis with GolangCI-Lint and Upload Report with reviewdog
|
- name: Static Analysis with GolangCI-Lint and Upload Report with reviewdog
|
||||||
run: |
|
run: |
|
||||||
golangci-lint run --enable errorlint,govet,modernize,staticcheck | reviewdog -f=golangci-lint -name "Check golangci-lint on build-latest" -reporter=github-check -filter-mode=nofilter -fail-level none
|
golangci-lint run --enable errorlint,govet,misspell,modernize,staticcheck,unconvert,wastedassign | reviewdog -f=golangci-lint -name "Check golangci-lint on build-latest" -reporter=github-check -filter-mode=nofilter -fail-level none
|
||||||
env:
|
env:
|
||||||
REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ type CPUFreqCpuInfoCollector struct {
|
|||||||
topology []CPUFreqCpuInfoCollectorTopology
|
topology []CPUFreqCpuInfoCollectorTopology
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *CPUFreqCpuInfoCollector) Init(config json.RawMessage) error {
|
func (m *CPUFreqCpuInfoCollector) Init(_ json.RawMessage) error {
|
||||||
// Check if already initialized
|
// Check if already initialized
|
||||||
if m.init {
|
if m.init {
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -83,8 +83,8 @@ func (m *CPUFreqCollector) Init(config json.RawMessage) error {
|
|||||||
CPUFreqCollectorTopology{
|
CPUFreqCollectorTopology{
|
||||||
tagSet: map[string]string{
|
tagSet: map[string]string{
|
||||||
"type": "hwthread",
|
"type": "hwthread",
|
||||||
"type-id": fmt.Sprint(c.CpuID),
|
"type-id": strconv.Itoa(c.CpuID),
|
||||||
"package_id": fmt.Sprint(c.Socket),
|
"package_id": strconv.Itoa(c.Socket),
|
||||||
},
|
},
|
||||||
scalingCurFreqFile: scalingCurFreqFile,
|
scalingCurFreqFile: scalingCurFreqFile,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -108,7 +108,9 @@ func (m *CpustatCollector) Init(config json.RawMessage) error {
|
|||||||
} else if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
|
} else if strings.HasPrefix(linefields[0], "cpu") && strings.Compare(linefields[0], "cpu") != 0 {
|
||||||
cpustr := strings.TrimLeft(linefields[0], "cpu")
|
cpustr := strings.TrimLeft(linefields[0], "cpu")
|
||||||
cpu, _ := strconv.Atoi(cpustr)
|
cpu, _ := strconv.Atoi(cpustr)
|
||||||
m.cputags[linefields[0]] = map[string]string{"type": "hwthread", "type-id": fmt.Sprintf("%d", cpu)}
|
m.cputags[linefields[0]] = map[string]string{
|
||||||
|
"type": "hwthread",
|
||||||
|
"type-id": strconv.Itoa(cpu)}
|
||||||
m.olddata[linefields[0]] = make(map[string]int64)
|
m.olddata[linefields[0]] = make(map[string]int64)
|
||||||
for k, v := range m.matches {
|
for k, v := range m.matches {
|
||||||
m.olddata[linefields[0]][k], _ = strconv.ParseInt(linefields[v], 0, 64)
|
m.olddata[linefields[0]][k], _ = strconv.ParseInt(linefields[v], 0, 64)
|
||||||
@@ -191,7 +193,7 @@ func (m *CpustatCollector) Read(interval time.Duration, output chan lp.CCMessage
|
|||||||
num_cpus_metric, err := lp.NewMessage("num_cpus",
|
num_cpus_metric, err := lp.NewMessage("num_cpus",
|
||||||
m.nodetags,
|
m.nodetags,
|
||||||
m.meta,
|
m.meta,
|
||||||
map[string]any{"value": int(num_cpus)},
|
map[string]any{"value": num_cpus},
|
||||||
now,
|
now,
|
||||||
)
|
)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
|||||||
@@ -124,7 +124,13 @@ mountLoop:
|
|||||||
tags := map[string]string{"type": "node", "device": linefields[0]}
|
tags := map[string]string{"type": "node", "device": linefields[0]}
|
||||||
total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000000000)
|
total := (stat.Blocks * uint64(stat.Bsize)) / uint64(1000000000)
|
||||||
if m.allowedMetrics["disk_total"] {
|
if m.allowedMetrics["disk_total"] {
|
||||||
y, err := lp.NewMessage("disk_total", tags, m.meta, map[string]any{"value": total}, time.Now())
|
y, err := lp.NewMessage(
|
||||||
|
"disk_total",
|
||||||
|
tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": total},
|
||||||
|
time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "GBytes")
|
y.AddMeta("unit", "GBytes")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -132,7 +138,13 @@ mountLoop:
|
|||||||
}
|
}
|
||||||
free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000000000)
|
free := (stat.Bfree * uint64(stat.Bsize)) / uint64(1000000000)
|
||||||
if m.allowedMetrics["disk_free"] {
|
if m.allowedMetrics["disk_free"] {
|
||||||
y, err := lp.NewMessage("disk_free", tags, m.meta, map[string]any{"value": free}, time.Now())
|
y, err := lp.NewMessage(
|
||||||
|
"disk_free",
|
||||||
|
tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": free},
|
||||||
|
time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "GBytes")
|
y.AddMeta("unit", "GBytes")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -146,7 +158,14 @@ mountLoop:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if m.allowedMetrics["part_max_used"] {
|
if m.allowedMetrics["part_max_used"] {
|
||||||
y, err := lp.NewMessage("part_max_used", map[string]string{"type": "node"}, m.meta, map[string]any{"value": int(part_max_used)}, time.Now())
|
y, err := lp.NewMessage(
|
||||||
|
"part_max_used",
|
||||||
|
map[string]string{
|
||||||
|
"type": "node"},
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": int(part_max_used)},
|
||||||
|
time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "percent")
|
y.AddMeta("unit", "percent")
|
||||||
output <- y
|
output <- y
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"os/user"
|
"os/user"
|
||||||
"sort"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -125,22 +125,14 @@ func checkMetricType(t string) bool {
|
|||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
func eventsToEventStr(events map[string]string) string {
|
|
||||||
elist := make([]string, 0)
|
|
||||||
for k, v := range events {
|
|
||||||
elist = append(elist, fmt.Sprintf("%s:%s", v, k))
|
|
||||||
}
|
|
||||||
return strings.Join(elist, ",")
|
|
||||||
}
|
|
||||||
|
|
||||||
func genLikwidEventSet(input LikwidCollectorEventsetConfig) LikwidEventsetConfig {
|
func genLikwidEventSet(input LikwidCollectorEventsetConfig) LikwidEventsetConfig {
|
||||||
tmplist := make([]string, 0)
|
clist := make([]string, len(input.Events))
|
||||||
clist := make([]string, 0)
|
|
||||||
for k := range input.Events {
|
for k := range input.Events {
|
||||||
clist = append(clist, k)
|
clist = append(clist, k)
|
||||||
}
|
}
|
||||||
sort.Strings(clist)
|
slices.Sort(clist)
|
||||||
elist := make([]*C.char, 0)
|
tmplist := make([]string, len(clist))
|
||||||
|
elist := make([]*C.char, len(clist))
|
||||||
for _, k := range clist {
|
for _, k := range clist {
|
||||||
v := input.Events[k]
|
v := input.Events[k]
|
||||||
tmplist = append(tmplist, fmt.Sprintf("%s:%s", v, k))
|
tmplist = append(tmplist, fmt.Sprintf("%s:%s", v, k))
|
||||||
@@ -381,7 +373,6 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
|||||||
// take a measurement for 'interval' seconds of event set index 'group'
|
// take a measurement for 'interval' seconds of event set index 'group'
|
||||||
func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig, interval time.Duration) (bool, error) {
|
func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig, interval time.Duration) (bool, error) {
|
||||||
var ret C.int
|
var ret C.int
|
||||||
var gid C.int = -1
|
|
||||||
sigchan := make(chan os.Signal, 1)
|
sigchan := make(chan os.Signal, 1)
|
||||||
|
|
||||||
// Watch changes for the lock file ()
|
// Watch changes for the lock file ()
|
||||||
@@ -462,6 +453,7 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
|
|||||||
signal.Notify(sigchan, syscall.SIGCHLD)
|
signal.Notify(sigchan, syscall.SIGCHLD)
|
||||||
|
|
||||||
// Add an event string to LIKWID
|
// Add an event string to LIKWID
|
||||||
|
var gid C.int
|
||||||
select {
|
select {
|
||||||
case <-sigchan:
|
case <-sigchan:
|
||||||
gid = -1
|
gid = -1
|
||||||
@@ -631,7 +623,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
|
|||||||
)
|
)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if metric.Type != "node" {
|
if metric.Type != "node" {
|
||||||
y.AddTag("type-id", fmt.Sprintf("%d", domain))
|
y.AddTag("type-id", strconv.Itoa(domain))
|
||||||
}
|
}
|
||||||
if len(metric.Unit) > 0 {
|
if len(metric.Unit) > 0 {
|
||||||
y.AddMeta("unit", metric.Unit)
|
y.AddMeta("unit", metric.Unit)
|
||||||
@@ -661,7 +653,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
|
|||||||
metric.Name,
|
metric.Name,
|
||||||
map[string]string{
|
map[string]string{
|
||||||
"type": "core",
|
"type": "core",
|
||||||
"type-id": fmt.Sprintf("%d", coreID),
|
"type-id": strconv.Itoa(coreID),
|
||||||
},
|
},
|
||||||
m.meta,
|
m.meta,
|
||||||
map[string]any{
|
map[string]any{
|
||||||
@@ -698,7 +690,7 @@ func (m *LikwidCollector) calcEventsetMetrics(evset LikwidEventsetConfig, interv
|
|||||||
metric.Name,
|
metric.Name,
|
||||||
map[string]string{
|
map[string]string{
|
||||||
"type": "socket",
|
"type": "socket",
|
||||||
"type-id": fmt.Sprintf("%d", socketID),
|
"type-id": strconv.Itoa(socketID),
|
||||||
},
|
},
|
||||||
m.meta,
|
m.meta,
|
||||||
map[string]any{
|
map[string]any{
|
||||||
@@ -800,7 +792,7 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter
|
|||||||
)
|
)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if metric.Type != "node" {
|
if metric.Type != "node" {
|
||||||
y.AddTag("type-id", fmt.Sprintf("%d", domain))
|
y.AddTag("type-id", strconv.Itoa(domain))
|
||||||
}
|
}
|
||||||
if len(metric.Unit) > 0 {
|
if len(metric.Unit) > 0 {
|
||||||
y.AddMeta("unit", metric.Unit)
|
y.AddMeta("unit", metric.Unit)
|
||||||
@@ -816,7 +808,7 @@ func (m *LikwidCollector) calcGlobalMetrics(groups []LikwidEventsetConfig, inter
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *LikwidCollector) ReadThread(interval time.Duration, output chan lp.CCMessage) {
|
func (m *LikwidCollector) ReadThread(interval time.Duration, output chan lp.CCMessage) {
|
||||||
var err error = nil
|
var err error
|
||||||
groups := make([]LikwidEventsetConfig, 0)
|
groups := make([]LikwidEventsetConfig, 0)
|
||||||
|
|
||||||
for evidx, evset := range m.config.Eventsets {
|
for evidx, evset := range m.config.Eventsets {
|
||||||
|
|||||||
@@ -159,7 +159,7 @@ func (m *MemstatCollector) Init(config json.RawMessage) error {
|
|||||||
file: f,
|
file: f,
|
||||||
tags: map[string]string{
|
tags: map[string]string{
|
||||||
"type": "memoryDomain",
|
"type": "memoryDomain",
|
||||||
"type-id": fmt.Sprintf("%d", id),
|
"type-id": strconv.Itoa(id),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
m.nodefiles[id] = f
|
m.nodefiles[id] = f
|
||||||
|
|||||||
@@ -150,7 +150,7 @@ func (m *nfsCollector) Read(interval time.Duration, output chan lp.CCMessage) {
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
prefix := ""
|
var prefix string
|
||||||
switch m.version {
|
switch m.version {
|
||||||
case "v3":
|
case "v3":
|
||||||
prefix = "nfs3"
|
prefix = "nfs3"
|
||||||
|
|||||||
@@ -143,7 +143,13 @@ func (m *NfsIOStatCollector) Read(interval time.Duration, output chan lp.CCMessa
|
|||||||
if old, ok := m.data[mntpoint]; ok {
|
if old, ok := m.data[mntpoint]; ok {
|
||||||
for name, newVal := range values {
|
for name, newVal := range values {
|
||||||
if m.config.SendAbsoluteValues {
|
if m.config.SendAbsoluteValues {
|
||||||
msg, err := lp.NewMessage(fmt.Sprintf("nfsio_%s", name), m.tags, m.meta, map[string]any{"value": newVal}, now)
|
msg, err := lp.NewMessage(
|
||||||
|
"nfsio_"+name,
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": newVal},
|
||||||
|
now)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
msg.AddTag("stype", "filesystem")
|
msg.AddTag("stype", "filesystem")
|
||||||
msg.AddTag("stype-id", mntpoint)
|
msg.AddTag("stype-id", mntpoint)
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"maps"
|
"maps"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -112,7 +113,7 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
|
|||||||
for i := range num_gpus {
|
for i := range num_gpus {
|
||||||
|
|
||||||
// Skip excluded devices by ID
|
// Skip excluded devices by ID
|
||||||
str_i := fmt.Sprintf("%d", i)
|
str_i := strconv.Itoa(i)
|
||||||
if slices.Contains(m.config.ExcludeDevices, str_i) {
|
if slices.Contains(m.config.ExcludeDevices, str_i) {
|
||||||
cclog.ComponentDebug(m.name, "Skipping excluded device", str_i)
|
cclog.ComponentDebug(m.name, "Skipping excluded device", str_i)
|
||||||
continue
|
continue
|
||||||
@@ -239,7 +240,7 @@ func readMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage) err
|
|||||||
|
|
||||||
if !device.excludeMetrics["nv_fb_mem_total"] {
|
if !device.excludeMetrics["nv_fb_mem_total"] {
|
||||||
t := float64(total) / (1024 * 1024)
|
t := float64(total) / (1024 * 1024)
|
||||||
y, err := lp.NewMessage("nv_fb_mem_total", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_fb_mem_total", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MByte")
|
y.AddMeta("unit", "MByte")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -248,7 +249,7 @@ func readMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage) err
|
|||||||
|
|
||||||
if !device.excludeMetrics["nv_fb_mem_used"] {
|
if !device.excludeMetrics["nv_fb_mem_used"] {
|
||||||
f := float64(used) / (1024 * 1024)
|
f := float64(used) / (1024 * 1024)
|
||||||
y, err := lp.NewMessage("nv_fb_mem_used", device.tags, device.meta, map[string]any{"value": f}, time.Now())
|
y, err := lp.NewMetric("nv_fb_mem_used", device.tags, device.meta, f, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MByte")
|
y.AddMeta("unit", "MByte")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -257,7 +258,7 @@ func readMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage) err
|
|||||||
|
|
||||||
if v2 && !device.excludeMetrics["nv_fb_mem_reserved"] {
|
if v2 && !device.excludeMetrics["nv_fb_mem_reserved"] {
|
||||||
r := float64(reserved) / (1024 * 1024)
|
r := float64(reserved) / (1024 * 1024)
|
||||||
y, err := lp.NewMessage("nv_fb_mem_reserved", device.tags, device.meta, map[string]any{"value": r}, time.Now())
|
y, err := lp.NewMetric("nv_fb_mem_reserved", device.tags, device.meta, r, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MByte")
|
y.AddMeta("unit", "MByte")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -276,7 +277,7 @@ func readBarMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
}
|
}
|
||||||
if !device.excludeMetrics["nv_bar1_mem_total"] {
|
if !device.excludeMetrics["nv_bar1_mem_total"] {
|
||||||
t := float64(meminfo.Bar1Total) / (1024 * 1024)
|
t := float64(meminfo.Bar1Total) / (1024 * 1024)
|
||||||
y, err := lp.NewMessage("nv_bar1_mem_total", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_bar1_mem_total", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MByte")
|
y.AddMeta("unit", "MByte")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -284,7 +285,7 @@ func readBarMemoryInfo(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
}
|
}
|
||||||
if !device.excludeMetrics["nv_bar1_mem_used"] {
|
if !device.excludeMetrics["nv_bar1_mem_used"] {
|
||||||
t := float64(meminfo.Bar1Used) / (1024 * 1024)
|
t := float64(meminfo.Bar1Used) / (1024 * 1024)
|
||||||
y, err := lp.NewMessage("nv_bar1_mem_used", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_bar1_mem_used", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MByte")
|
y.AddMeta("unit", "MByte")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -318,14 +319,14 @@ func readUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
util, ret := nvml.DeviceGetUtilizationRates(device.device)
|
util, ret := nvml.DeviceGetUtilizationRates(device.device)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
if !device.excludeMetrics["nv_util"] {
|
if !device.excludeMetrics["nv_util"] {
|
||||||
y, err := lp.NewMessage("nv_util", device.tags, device.meta, map[string]any{"value": float64(util.Gpu)}, time.Now())
|
y, err := lp.NewMetric("nv_util", device.tags, device.meta, float64(util.Gpu), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !device.excludeMetrics["nv_mem_util"] {
|
if !device.excludeMetrics["nv_mem_util"] {
|
||||||
y, err := lp.NewMessage("nv_mem_util", device.tags, device.meta, map[string]any{"value": float64(util.Memory)}, time.Now())
|
y, err := lp.NewMetric("nv_mem_util", device.tags, device.meta, float64(util.Memory), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -345,7 +346,7 @@ func readTemp(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
// * NVML_TEMPERATURE_COUNT
|
// * NVML_TEMPERATURE_COUNT
|
||||||
temp, ret := nvml.DeviceGetTemperature(device.device, nvml.TEMPERATURE_GPU)
|
temp, ret := nvml.DeviceGetTemperature(device.device, nvml.TEMPERATURE_GPU)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_temp", device.tags, device.meta, map[string]any{"value": float64(temp)}, time.Now())
|
y, err := lp.NewMetric("nv_temp", device.tags, device.meta, float64(temp), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "degC")
|
y.AddMeta("unit", "degC")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -368,7 +369,7 @@ func readFan(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
// This value may exceed 100% in certain cases.
|
// This value may exceed 100% in certain cases.
|
||||||
fan, ret := nvml.DeviceGetFanSpeed(device.device)
|
fan, ret := nvml.DeviceGetFanSpeed(device.device)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_fan", device.tags, device.meta, map[string]any{"value": float64(fan)}, time.Now())
|
y, err := lp.NewMetric("nv_fan", device.tags, device.meta, float64(fan), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -378,27 +379,6 @@ func readFan(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// func readFans(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|
||||||
// if !device.excludeMetrics["nv_fan"] {
|
|
||||||
// numFans, ret := nvml.DeviceGetNumFans(device.device)
|
|
||||||
// if ret == nvml.SUCCESS {
|
|
||||||
// for i := 0; i < numFans; i++ {
|
|
||||||
// fan, ret := nvml.DeviceGetFanSpeed_v2(device.device, i)
|
|
||||||
// if ret == nvml.SUCCESS {
|
|
||||||
// y, err := lp.NewMessage("nv_fan", device.tags, device.meta, map[string]interface{}{"value": float64(fan)}, time.Now())
|
|
||||||
// if err == nil {
|
|
||||||
// y.AddMeta("unit", "%")
|
|
||||||
// y.AddTag("stype", "fan")
|
|
||||||
// y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
|
||||||
// output <- y
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return nil
|
|
||||||
// }
|
|
||||||
|
|
||||||
func readEccMode(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
func readEccMode(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
||||||
if !device.excludeMetrics["nv_ecc_mode"] {
|
if !device.excludeMetrics["nv_ecc_mode"] {
|
||||||
// Retrieves the current and pending ECC modes for the device.
|
// Retrieves the current and pending ECC modes for the device.
|
||||||
@@ -415,17 +395,17 @@ func readEccMode(device *NvidiaCollectorDevice, output chan lp.CCMessage) error
|
|||||||
var err error
|
var err error
|
||||||
switch ecc_pend {
|
switch ecc_pend {
|
||||||
case nvml.FEATURE_DISABLED:
|
case nvml.FEATURE_DISABLED:
|
||||||
y, err = lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]any{"value": "OFF"}, time.Now())
|
y, err = lp.NewMetric("nv_ecc_mode", device.tags, device.meta, "OFF", time.Now())
|
||||||
case nvml.FEATURE_ENABLED:
|
case nvml.FEATURE_ENABLED:
|
||||||
y, err = lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]any{"value": "ON"}, time.Now())
|
y, err = lp.NewMetric("nv_ecc_mode", device.tags, device.meta, "ON", time.Now())
|
||||||
default:
|
default:
|
||||||
y, err = lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]any{"value": "UNKNOWN"}, time.Now())
|
y, err = lp.NewMetric("nv_ecc_mode", device.tags, device.meta, "UNKNOWN", time.Now())
|
||||||
}
|
}
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
case nvml.ERROR_NOT_SUPPORTED:
|
case nvml.ERROR_NOT_SUPPORTED:
|
||||||
y, err := lp.NewMessage("nv_ecc_mode", device.tags, device.meta, map[string]any{"value": "N/A"}, time.Now())
|
y, err := lp.NewMetric("nv_ecc_mode", device.tags, device.meta, "N/A", time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -445,7 +425,7 @@ func readPerfState(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
|||||||
// 32: Unknown performance state.
|
// 32: Unknown performance state.
|
||||||
pState, ret := nvml.DeviceGetPerformanceState(device.device)
|
pState, ret := nvml.DeviceGetPerformanceState(device.device)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_perf_state", device.tags, device.meta, map[string]any{"value": fmt.Sprintf("P%d", int(pState))}, time.Now())
|
y, err := lp.NewMetric("nv_perf_state", device.tags, device.meta, fmt.Sprintf("P%d", int(pState)), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -471,7 +451,7 @@ func readPowerUsage(device *NvidiaCollectorDevice, output chan lp.CCMessage) err
|
|||||||
if mode == nvml.FEATURE_ENABLED {
|
if mode == nvml.FEATURE_ENABLED {
|
||||||
power, ret := nvml.DeviceGetPowerUsage(device.device)
|
power, ret := nvml.DeviceGetPowerUsage(device.device)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_power_usage", device.tags, device.meta, map[string]any{"value": float64(power) / 1000}, time.Now())
|
y, err := lp.NewMetric("nv_power_usage", device.tags, device.meta, float64(power)/1000, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "watts")
|
y.AddMeta("unit", "watts")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -497,7 +477,12 @@ func readEnergyConsumption(device *NvidiaCollectorDevice, output chan lp.CCMessa
|
|||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
if device.lastEnergyReading != 0 {
|
if device.lastEnergyReading != 0 {
|
||||||
if !device.excludeMetrics["nv_energy"] {
|
if !device.excludeMetrics["nv_energy"] {
|
||||||
y, err := lp.NewMetric("nv_energy", device.tags, device.meta, (energy-device.lastEnergyReading)/1000, now)
|
y, err := lp.NewMetric(
|
||||||
|
"nv_energy",
|
||||||
|
device.tags,
|
||||||
|
device.meta,
|
||||||
|
(energy-device.lastEnergyReading)/1000,
|
||||||
|
now)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "Joules")
|
y.AddMeta("unit", "Joules")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -539,7 +524,7 @@ func readClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
if !device.excludeMetrics["nv_graphics_clock"] {
|
if !device.excludeMetrics["nv_graphics_clock"] {
|
||||||
graphicsClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_GRAPHICS)
|
graphicsClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_GRAPHICS)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_graphics_clock", device.tags, device.meta, map[string]any{"value": float64(graphicsClock)}, time.Now())
|
y, err := lp.NewMetric("nv_graphics_clock", device.tags, device.meta, float64(graphicsClock), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -550,7 +535,7 @@ func readClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
if !device.excludeMetrics["nv_sm_clock"] {
|
if !device.excludeMetrics["nv_sm_clock"] {
|
||||||
smCock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM)
|
smCock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_SM)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_sm_clock", device.tags, device.meta, map[string]any{"value": float64(smCock)}, time.Now())
|
y, err := lp.NewMetric("nv_sm_clock", device.tags, device.meta, float64(smCock), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -561,7 +546,7 @@ func readClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
if !device.excludeMetrics["nv_mem_clock"] {
|
if !device.excludeMetrics["nv_mem_clock"] {
|
||||||
memClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM)
|
memClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_MEM)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_mem_clock", device.tags, device.meta, map[string]any{"value": float64(memClock)}, time.Now())
|
y, err := lp.NewMetric("nv_mem_clock", device.tags, device.meta, float64(memClock), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -571,7 +556,7 @@ func readClocks(device *NvidiaCollectorDevice, output chan lp.CCMessage) error {
|
|||||||
if !device.excludeMetrics["nv_video_clock"] {
|
if !device.excludeMetrics["nv_video_clock"] {
|
||||||
memClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO)
|
memClock, ret := nvml.DeviceGetClockInfo(device.device, nvml.CLOCK_VIDEO)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_video_clock", device.tags, device.meta, map[string]any{"value": float64(memClock)}, time.Now())
|
y, err := lp.NewMetric("nv_video_clock", device.tags, device.meta, float64(memClock), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "MHz")
|
y.AddMeta("unit", "MHz")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -652,7 +637,7 @@ func readEccErrors(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
|||||||
// i.e. the total set of errors across the entire device.
|
// i.e. the total set of errors across the entire device.
|
||||||
ecc_db, ret := nvml.DeviceGetTotalEccErrors(device.device, nvml.MEMORY_ERROR_TYPE_UNCORRECTED, nvml.AGGREGATE_ECC)
|
ecc_db, ret := nvml.DeviceGetTotalEccErrors(device.device, nvml.MEMORY_ERROR_TYPE_UNCORRECTED, nvml.AGGREGATE_ECC)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_ecc_uncorrected_error", device.tags, device.meta, map[string]any{"value": float64(ecc_db)}, time.Now())
|
y, err := lp.NewMetric("nv_ecc_uncorrected_error", device.tags, device.meta, float64(ecc_db), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -661,7 +646,7 @@ func readEccErrors(device *NvidiaCollectorDevice, output chan lp.CCMessage) erro
|
|||||||
if !device.excludeMetrics["nv_ecc_corrected_error"] {
|
if !device.excludeMetrics["nv_ecc_corrected_error"] {
|
||||||
ecc_sb, ret := nvml.DeviceGetTotalEccErrors(device.device, nvml.MEMORY_ERROR_TYPE_CORRECTED, nvml.AGGREGATE_ECC)
|
ecc_sb, ret := nvml.DeviceGetTotalEccErrors(device.device, nvml.MEMORY_ERROR_TYPE_CORRECTED, nvml.AGGREGATE_ECC)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_ecc_corrected_error", device.tags, device.meta, map[string]any{"value": float64(ecc_sb)}, time.Now())
|
y, err := lp.NewMetric("nv_ecc_corrected_error", device.tags, device.meta, float64(ecc_sb), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -680,7 +665,7 @@ func readPowerLimit(device *NvidiaCollectorDevice, output chan lp.CCMessage) err
|
|||||||
// If the card's total power draw reaches this limit the power management algorithm kicks in.
|
// If the card's total power draw reaches this limit the power management algorithm kicks in.
|
||||||
pwr_limit, ret := nvml.DeviceGetPowerManagementLimit(device.device)
|
pwr_limit, ret := nvml.DeviceGetPowerManagementLimit(device.device)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_power_max_limit", device.tags, device.meta, map[string]any{"value": float64(pwr_limit) / 1000}, time.Now())
|
y, err := lp.NewMetric("nv_power_max_limit", device.tags, device.meta, float64(pwr_limit)/1000, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "watts")
|
y.AddMeta("unit", "watts")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -707,7 +692,7 @@ func readEncUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
// Note: On MIG-enabled GPUs, querying encoder utilization is not currently supported.
|
// Note: On MIG-enabled GPUs, querying encoder utilization is not currently supported.
|
||||||
enc_util, _, ret := nvml.DeviceGetEncoderUtilization(device.device)
|
enc_util, _, ret := nvml.DeviceGetEncoderUtilization(device.device)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_encoder_util", device.tags, device.meta, map[string]any{"value": float64(enc_util)}, time.Now())
|
y, err := lp.NewMetric("nv_encoder_util", device.tags, device.meta, float64(enc_util), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -734,7 +719,7 @@ func readDecUtilization(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
// Note: On MIG-enabled GPUs, querying encoder utilization is not currently supported.
|
// Note: On MIG-enabled GPUs, querying encoder utilization is not currently supported.
|
||||||
dec_util, _, ret := nvml.DeviceGetDecoderUtilization(device.device)
|
dec_util, _, ret := nvml.DeviceGetDecoderUtilization(device.device)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_decoder_util", device.tags, device.meta, map[string]any{"value": float64(dec_util)}, time.Now())
|
y, err := lp.NewMetric("nv_decoder_util", device.tags, device.meta, float64(dec_util), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -761,13 +746,13 @@ func readRemappedRows(device *NvidiaCollectorDevice, output chan lp.CCMessage) e
|
|||||||
corrected, uncorrected, pending, failure, ret := nvml.DeviceGetRemappedRows(device.device)
|
corrected, uncorrected, pending, failure, ret := nvml.DeviceGetRemappedRows(device.device)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
if !device.excludeMetrics["nv_remapped_rows_corrected"] {
|
if !device.excludeMetrics["nv_remapped_rows_corrected"] {
|
||||||
y, err := lp.NewMessage("nv_remapped_rows_corrected", device.tags, device.meta, map[string]any{"value": float64(corrected)}, time.Now())
|
y, err := lp.NewMetric("nv_remapped_rows_corrected", device.tags, device.meta, float64(corrected), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !device.excludeMetrics["nv_remapped_rows_uncorrected"] {
|
if !device.excludeMetrics["nv_remapped_rows_uncorrected"] {
|
||||||
y, err := lp.NewMessage("nv_remapped_rows_corrected", device.tags, device.meta, map[string]any{"value": float64(uncorrected)}, time.Now())
|
y, err := lp.NewMetric("nv_remapped_rows_corrected", device.tags, device.meta, float64(uncorrected), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -777,7 +762,7 @@ func readRemappedRows(device *NvidiaCollectorDevice, output chan lp.CCMessage) e
|
|||||||
if pending {
|
if pending {
|
||||||
p = 1
|
p = 1
|
||||||
}
|
}
|
||||||
y, err := lp.NewMessage("nv_remapped_rows_pending", device.tags, device.meta, map[string]any{"value": p}, time.Now())
|
y, err := lp.NewMetric("nv_remapped_rows_pending", device.tags, device.meta, p, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -787,7 +772,7 @@ func readRemappedRows(device *NvidiaCollectorDevice, output chan lp.CCMessage) e
|
|||||||
if failure {
|
if failure {
|
||||||
f = 1
|
f = 1
|
||||||
}
|
}
|
||||||
y, err := lp.NewMessage("nv_remapped_rows_failure", device.tags, device.meta, map[string]any{"value": f}, time.Now())
|
y, err := lp.NewMetric("nv_remapped_rows_failure", device.tags, device.meta, f, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -821,7 +806,7 @@ func readProcessCounts(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
// Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
|
// Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
|
||||||
procList, ret := nvml.DeviceGetComputeRunningProcesses(device.device)
|
procList, ret := nvml.DeviceGetComputeRunningProcesses(device.device)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_compute_processes", device.tags, device.meta, map[string]any{"value": len(procList)}, time.Now())
|
y, err := lp.NewMetric("nv_compute_processes", device.tags, device.meta, len(procList), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -850,7 +835,7 @@ func readProcessCounts(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
// Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
|
// Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
|
||||||
procList, ret := nvml.DeviceGetGraphicsRunningProcesses(device.device)
|
procList, ret := nvml.DeviceGetGraphicsRunningProcesses(device.device)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_graphics_processes", device.tags, device.meta, map[string]any{"value": len(procList)}, time.Now())
|
y, err := lp.NewMetric("nv_graphics_processes", device.tags, device.meta, len(procList), time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -880,7 +865,7 @@ func readProcessCounts(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
// // Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
|
// // Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
|
||||||
// procList, ret := nvml.DeviceGetMPSComputeRunningProcesses(device.device)
|
// procList, ret := nvml.DeviceGetMPSComputeRunningProcesses(device.device)
|
||||||
// if ret == nvml.SUCCESS {
|
// if ret == nvml.SUCCESS {
|
||||||
// y, err := lp.NewMessage("nv_mps_compute_processes", device.tags, device.meta, map[string]interface{}{"value": len(procList)}, time.Now())
|
// y, err := lp.NewMetric("nv_mps_compute_processes", device.tags, device.meta, len(procList), time.Now())
|
||||||
// if err == nil {
|
// if err == nil {
|
||||||
// output <- y
|
// output <- y
|
||||||
// }
|
// }
|
||||||
@@ -908,7 +893,7 @@ func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_POWER)
|
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_POWER)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
t := float64(violTime.ViolationTime) * 1e-9
|
t := float64(violTime.ViolationTime) * 1e-9
|
||||||
y, err := lp.NewMessage("nv_violation_power", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_violation_power", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "sec")
|
y.AddMeta("unit", "sec")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -920,7 +905,7 @@ func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_THERMAL)
|
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_THERMAL)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
t := float64(violTime.ViolationTime) * 1e-9
|
t := float64(violTime.ViolationTime) * 1e-9
|
||||||
y, err := lp.NewMessage("nv_violation_thermal", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_violation_thermal", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "sec")
|
y.AddMeta("unit", "sec")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -932,7 +917,7 @@ func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_SYNC_BOOST)
|
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_SYNC_BOOST)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
t := float64(violTime.ViolationTime) * 1e-9
|
t := float64(violTime.ViolationTime) * 1e-9
|
||||||
y, err := lp.NewMessage("nv_violation_sync_boost", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_violation_sync_boost", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "sec")
|
y.AddMeta("unit", "sec")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -944,7 +929,7 @@ func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_BOARD_LIMIT)
|
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_BOARD_LIMIT)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
t := float64(violTime.ViolationTime) * 1e-9
|
t := float64(violTime.ViolationTime) * 1e-9
|
||||||
y, err := lp.NewMessage("nv_violation_board_limit", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_violation_board_limit", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "sec")
|
y.AddMeta("unit", "sec")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -956,7 +941,7 @@ func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_LOW_UTILIZATION)
|
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_LOW_UTILIZATION)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
t := float64(violTime.ViolationTime) * 1e-9
|
t := float64(violTime.ViolationTime) * 1e-9
|
||||||
y, err := lp.NewMessage("nv_violation_low_util", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_violation_low_util", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "sec")
|
y.AddMeta("unit", "sec")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -968,7 +953,7 @@ func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_RELIABILITY)
|
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_RELIABILITY)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
t := float64(violTime.ViolationTime) * 1e-9
|
t := float64(violTime.ViolationTime) * 1e-9
|
||||||
y, err := lp.NewMessage("nv_violation_reliability", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_violation_reliability", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "sec")
|
y.AddMeta("unit", "sec")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -980,7 +965,7 @@ func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_TOTAL_APP_CLOCKS)
|
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_TOTAL_APP_CLOCKS)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
t := float64(violTime.ViolationTime) * 1e-9
|
t := float64(violTime.ViolationTime) * 1e-9
|
||||||
y, err := lp.NewMessage("nv_violation_below_app_clock", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_violation_below_app_clock", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "sec")
|
y.AddMeta("unit", "sec")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -992,7 +977,7 @@ func readViolationStats(device *NvidiaCollectorDevice, output chan lp.CCMessage)
|
|||||||
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_TOTAL_BASE_CLOCKS)
|
violTime, ret = nvml.DeviceGetViolationStatus(device.device, nvml.PERF_POLICY_TOTAL_BASE_CLOCKS)
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
t := float64(violTime.ViolationTime) * 1e-9
|
t := float64(violTime.ViolationTime) * 1e-9
|
||||||
y, err := lp.NewMessage("nv_violation_below_base_clock", device.tags, device.meta, map[string]any{"value": t}, time.Now())
|
y, err := lp.NewMetric("nv_violation_below_base_clock", device.tags, device.meta, t, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddMeta("unit", "sec")
|
y.AddMeta("unit", "sec")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -1024,10 +1009,10 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_CRC_DATA)
|
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_CRC_DATA)
|
||||||
aggregate_crc_errors += count
|
aggregate_crc_errors += count
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_nvlink_crc_errors", device.tags, device.meta, map[string]any{"value": count}, time.Now())
|
y, err := lp.NewMetric("nv_nvlink_crc_errors", device.tags, device.meta, count, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "nvlink")
|
y.AddTag("stype", "nvlink")
|
||||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
y.AddTag("stype-id", strconv.Itoa(i))
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1037,10 +1022,10 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_ECC_DATA)
|
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_ECC_DATA)
|
||||||
aggregate_ecc_errors += count
|
aggregate_ecc_errors += count
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_nvlink_ecc_errors", device.tags, device.meta, map[string]any{"value": count}, time.Now())
|
y, err := lp.NewMetric("nv_nvlink_ecc_errors", device.tags, device.meta, count, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "nvlink")
|
y.AddTag("stype", "nvlink")
|
||||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
y.AddTag("stype-id", strconv.Itoa(i))
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1050,10 +1035,10 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_REPLAY)
|
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_REPLAY)
|
||||||
aggregate_replay_errors += count
|
aggregate_replay_errors += count
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_nvlink_replay_errors", device.tags, device.meta, map[string]any{"value": count}, time.Now())
|
y, err := lp.NewMetric("nv_nvlink_replay_errors", device.tags, device.meta, count, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "nvlink")
|
y.AddTag("stype", "nvlink")
|
||||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
y.AddTag("stype-id", strconv.Itoa(i))
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1063,10 +1048,10 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_RECOVERY)
|
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_RECOVERY)
|
||||||
aggregate_recovery_errors += count
|
aggregate_recovery_errors += count
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_nvlink_recovery_errors", device.tags, device.meta, map[string]any{"value": count}, time.Now())
|
y, err := lp.NewMetric("nv_nvlink_recovery_errors", device.tags, device.meta, count, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "nvlink")
|
y.AddTag("stype", "nvlink")
|
||||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
y.AddTag("stype-id", strconv.Itoa(i))
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1076,10 +1061,10 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_CRC_FLIT)
|
count, ret := nvml.DeviceGetNvLinkErrorCounter(device.device, i, nvml.NVLINK_ERROR_DL_CRC_FLIT)
|
||||||
aggregate_crc_flit_errors += count
|
aggregate_crc_flit_errors += count
|
||||||
if ret == nvml.SUCCESS {
|
if ret == nvml.SUCCESS {
|
||||||
y, err := lp.NewMessage("nv_nvlink_crc_flit_errors", device.tags, device.meta, map[string]any{"value": count}, time.Now())
|
y, err := lp.NewMetric("nv_nvlink_crc_flit_errors", device.tags, device.meta, count, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "nvlink")
|
y.AddTag("stype", "nvlink")
|
||||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
y.AddTag("stype-id", strconv.Itoa(i))
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1091,7 +1076,7 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
// Export aggegated values
|
// Export aggegated values
|
||||||
if !device.excludeMetrics["nv_nvlink_crc_errors"] {
|
if !device.excludeMetrics["nv_nvlink_crc_errors"] {
|
||||||
// Data link receive data CRC error counter
|
// Data link receive data CRC error counter
|
||||||
y, err := lp.NewMessage("nv_nvlink_crc_errors_sum", device.tags, device.meta, map[string]any{"value": aggregate_crc_errors}, time.Now())
|
y, err := lp.NewMetric("nv_nvlink_crc_errors_sum", device.tags, device.meta, aggregate_crc_errors, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "nvlink")
|
y.AddTag("stype", "nvlink")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -1099,7 +1084,7 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
}
|
}
|
||||||
if !device.excludeMetrics["nv_nvlink_ecc_errors"] {
|
if !device.excludeMetrics["nv_nvlink_ecc_errors"] {
|
||||||
// Data link receive data ECC error counter
|
// Data link receive data ECC error counter
|
||||||
y, err := lp.NewMessage("nv_nvlink_ecc_errors_sum", device.tags, device.meta, map[string]any{"value": aggregate_ecc_errors}, time.Now())
|
y, err := lp.NewMetric("nv_nvlink_ecc_errors_sum", device.tags, device.meta, aggregate_ecc_errors, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "nvlink")
|
y.AddTag("stype", "nvlink")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -1107,7 +1092,7 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
}
|
}
|
||||||
if !device.excludeMetrics["nv_nvlink_replay_errors"] {
|
if !device.excludeMetrics["nv_nvlink_replay_errors"] {
|
||||||
// Data link transmit replay error counter
|
// Data link transmit replay error counter
|
||||||
y, err := lp.NewMessage("nv_nvlink_replay_errors_sum", device.tags, device.meta, map[string]any{"value": aggregate_replay_errors}, time.Now())
|
y, err := lp.NewMetric("nv_nvlink_replay_errors_sum", device.tags, device.meta, aggregate_replay_errors, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "nvlink")
|
y.AddTag("stype", "nvlink")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -1115,7 +1100,7 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
}
|
}
|
||||||
if !device.excludeMetrics["nv_nvlink_recovery_errors"] {
|
if !device.excludeMetrics["nv_nvlink_recovery_errors"] {
|
||||||
// Data link transmit recovery error counter
|
// Data link transmit recovery error counter
|
||||||
y, err := lp.NewMessage("nv_nvlink_recovery_errors_sum", device.tags, device.meta, map[string]any{"value": aggregate_recovery_errors}, time.Now())
|
y, err := lp.NewMetric("nv_nvlink_recovery_errors_sum", device.tags, device.meta, aggregate_recovery_errors, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "nvlink")
|
y.AddTag("stype", "nvlink")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -1123,7 +1108,7 @@ func readNVLinkStats(device *NvidiaCollectorDevice, output chan lp.CCMessage) er
|
|||||||
}
|
}
|
||||||
if !device.excludeMetrics["nv_nvlink_crc_flit_errors"] {
|
if !device.excludeMetrics["nv_nvlink_crc_flit_errors"] {
|
||||||
// Data link receive flow control digit CRC error counter
|
// Data link receive flow control digit CRC error counter
|
||||||
y, err := lp.NewMessage("nv_nvlink_crc_flit_errors_sum", device.tags, device.meta, map[string]any{"value": aggregate_crc_flit_errors}, time.Now())
|
y, err := lp.NewMetric("nv_nvlink_crc_flit_errors_sum", device.tags, device.meta, aggregate_crc_flit_errors, time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "nvlink")
|
y.AddTag("stype", "nvlink")
|
||||||
output <- y
|
output <- y
|
||||||
@@ -1302,7 +1287,7 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMessage)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if _, ok := migDevice.tags["stype-id"]; !ok {
|
if _, ok := migDevice.tags["stype-id"]; !ok {
|
||||||
migDevice.tags["stype-id"] = fmt.Sprintf("%d", j)
|
migDevice.tags["stype-id"] = strconv.Itoa(j)
|
||||||
}
|
}
|
||||||
maps.Copy(migDevice.meta, m.gpus[i].meta)
|
maps.Copy(migDevice.meta, m.gpus[i].meta)
|
||||||
if _, ok := migDevice.meta["uuid"]; ok && !m.config.UseUuidForMigDevices {
|
if _, ok := migDevice.meta["uuid"]; ok && !m.config.UseUuidForMigDevices {
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
@@ -91,7 +92,7 @@ func (m *RocmSmiCollector) Init(config json.RawMessage) error {
|
|||||||
m.devices = make([]RocmSmiCollectorDevice, 0)
|
m.devices = make([]RocmSmiCollectorDevice, 0)
|
||||||
|
|
||||||
for i := range numDevs {
|
for i := range numDevs {
|
||||||
str_i := fmt.Sprintf("%d", i)
|
str_i := strconv.Itoa(i)
|
||||||
if slices.Contains(m.config.ExcludeDevices, str_i) {
|
if slices.Contains(m.config.ExcludeDevices, str_i) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -297,7 +298,7 @@ func (m *RocmSmiCollector) Read(interval time.Duration, output chan lp.CCMessage
|
|||||||
y, err := lp.NewMessage("rocm_temp_hbm", dev.tags, dev.meta, map[string]any{"value": value}, timestamp)
|
y, err := lp.NewMessage("rocm_temp_hbm", dev.tags, dev.meta, map[string]any{"value": value}, timestamp)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
y.AddTag("stype", "device")
|
y.AddTag("stype", "device")
|
||||||
y.AddTag("stype-id", fmt.Sprintf("%d", i))
|
y.AddTag("stype-id", strconv.Itoa(i))
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,11 +37,11 @@ type SampleTimerCollector struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *SampleTimerCollector) Init(name string, config json.RawMessage) error {
|
func (m *SampleTimerCollector) Init(name string, config json.RawMessage) error {
|
||||||
var err error = nil
|
var err error
|
||||||
// Always set the name early in Init() to use it in cclog.Component* functions
|
// Always set the name early in Init() to use it in cclog.Component* functions
|
||||||
m.name = "SampleTimerCollector"
|
m.name = "SampleTimerCollector"
|
||||||
// This is for later use, also call it early
|
// This is for later use, also call it early
|
||||||
if err := m.setup(); err != nil {
|
if err = m.setup(); err != nil {
|
||||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||||
}
|
}
|
||||||
// Define meta information sent with each metric
|
// Define meta information sent with each metric
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ func (m *SchedstatCollector) Init(config json.RawMessage) error {
|
|||||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||||
}
|
}
|
||||||
// Tell whether the collector should be run in parallel with others (reading files, ...)
|
// Tell whether the collector should be run in parallel with others (reading files, ...)
|
||||||
// or it should be run serially, mostly for collectors acutally doing measurements
|
// or it should be run serially, mostly for collectors actually doing measurements
|
||||||
// because they should not measure the execution of the other collectors
|
// because they should not measure the execution of the other collectors
|
||||||
m.parallel = true
|
m.parallel = true
|
||||||
// Define meta information sent with each metric
|
// Define meta information sent with each metric
|
||||||
@@ -90,7 +90,7 @@ func (m *SchedstatCollector) Init(config json.RawMessage) error {
|
|||||||
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
|
waiting, _ := strconv.ParseInt(linefields[8], 10, 64)
|
||||||
m.cputags[linefields[0]] = map[string]string{
|
m.cputags[linefields[0]] = map[string]string{
|
||||||
"type": "hwthread",
|
"type": "hwthread",
|
||||||
"type-id": fmt.Sprintf("%d", cpu),
|
"type-id": strconv.Itoa(cpu),
|
||||||
}
|
}
|
||||||
m.olddata[linefields[0]] = map[string]int64{
|
m.olddata[linefields[0]] = map[string]int64{
|
||||||
"running": running,
|
"running": running,
|
||||||
|
|||||||
@@ -107,8 +107,11 @@ func (m *SlurmCgroupCollector) Init(config json.RawMessage) error {
|
|||||||
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
|
||||||
}
|
}
|
||||||
m.parallel = true
|
m.parallel = true
|
||||||
m.meta = map[string]string{"source": m.name, "group": "SLURM"}
|
m.meta = map[string]string{
|
||||||
m.tags = map[string]string{"type": "hwthread"}
|
"source": m.name,
|
||||||
|
"group": "SLURM"}
|
||||||
|
m.tags = map[string]string{
|
||||||
|
"type": "hwthread"}
|
||||||
m.cpuUsed = make(map[int]bool)
|
m.cpuUsed = make(map[int]bool)
|
||||||
m.cgroupBase = defaultCgroupBase
|
m.cgroupBase = defaultCgroupBase
|
||||||
|
|
||||||
@@ -156,7 +159,9 @@ func (m *SlurmCgroupCollector) ReadJobData(jobdir string) (SlurmJobData, error)
|
|||||||
CpuSet: []int{},
|
CpuSet: []int{},
|
||||||
}
|
}
|
||||||
|
|
||||||
cg := func(f string) string { return filepath.Join(m.cgroupBase, jobdir, f) }
|
cg := func(f string) string {
|
||||||
|
return filepath.Join(m.cgroupBase, jobdir, f)
|
||||||
|
}
|
||||||
|
|
||||||
memUsage, err := m.readFile(cg("memory.current"))
|
memUsage, err := m.readFile(cg("memory.current"))
|
||||||
if err == nil {
|
if err == nil {
|
||||||
@@ -205,8 +210,8 @@ func (m *SlurmCgroupCollector) ReadJobData(jobdir string) (SlurmJobData, error)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if usageUsec > 0 {
|
if usageUsec > 0 {
|
||||||
jobdata.CpuUsageUser = (userUsec * 100 / usageUsec)
|
jobdata.CpuUsageUser = (userUsec * 100.0 / usageUsec)
|
||||||
jobdata.CpuUsageSys = (systemUsec * 100 / usageUsec)
|
jobdata.CpuUsageSys = (systemUsec * 100.0 / usageUsec)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -249,12 +254,18 @@ func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMes
|
|||||||
for _, cpu := range jobdata.CpuSet {
|
for _, cpu := range jobdata.CpuSet {
|
||||||
coreTags := map[string]string{
|
coreTags := map[string]string{
|
||||||
"type": "hwthread",
|
"type": "hwthread",
|
||||||
"type-id": fmt.Sprintf("%d", cpu),
|
"type-id": strconv.Itoa(cpu),
|
||||||
}
|
}
|
||||||
|
|
||||||
if coreCount > 0 && !m.isExcluded("job_mem_used") {
|
if coreCount > 0 && !m.isExcluded("job_mem_used") {
|
||||||
memPerCore := jobdata.MemoryUsage / coreCount
|
memPerCore := jobdata.MemoryUsage / coreCount
|
||||||
if y, err := lp.NewMessage("job_mem_used", coreTags, m.meta, map[string]any{"value": memPerCore}, timestamp); err == nil {
|
if y, err := lp.NewMessage(
|
||||||
|
"job_mem_used",
|
||||||
|
coreTags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": memPerCore},
|
||||||
|
timestamp); err == nil {
|
||||||
y.AddMeta("unit", "Bytes")
|
y.AddMeta("unit", "Bytes")
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -262,7 +273,13 @@ func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMes
|
|||||||
|
|
||||||
if coreCount > 0 && !m.isExcluded("job_max_mem_used") {
|
if coreCount > 0 && !m.isExcluded("job_max_mem_used") {
|
||||||
maxMemPerCore := jobdata.MaxMemoryUsage / coreCount
|
maxMemPerCore := jobdata.MaxMemoryUsage / coreCount
|
||||||
if y, err := lp.NewMessage("job_max_mem_used", coreTags, m.meta, map[string]any{"value": maxMemPerCore}, timestamp); err == nil {
|
if y, err := lp.NewMessage(
|
||||||
|
"job_max_mem_used",
|
||||||
|
coreTags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": maxMemPerCore},
|
||||||
|
timestamp); err == nil {
|
||||||
y.AddMeta("unit", "Bytes")
|
y.AddMeta("unit", "Bytes")
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -270,7 +287,13 @@ func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMes
|
|||||||
|
|
||||||
if coreCount > 0 && !m.isExcluded("job_mem_limit") {
|
if coreCount > 0 && !m.isExcluded("job_mem_limit") {
|
||||||
limitPerCore := jobdata.LimitMemoryUsage / coreCount
|
limitPerCore := jobdata.LimitMemoryUsage / coreCount
|
||||||
if y, err := lp.NewMessage("job_mem_limit", coreTags, m.meta, map[string]any{"value": limitPerCore}, timestamp); err == nil {
|
if y, err := lp.NewMessage(
|
||||||
|
"job_mem_limit",
|
||||||
|
coreTags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": limitPerCore},
|
||||||
|
timestamp); err == nil {
|
||||||
y.AddMeta("unit", "Bytes")
|
y.AddMeta("unit", "Bytes")
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -278,7 +301,13 @@ func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMes
|
|||||||
|
|
||||||
if coreCount > 0 && !m.isExcluded("job_user_cpu") {
|
if coreCount > 0 && !m.isExcluded("job_user_cpu") {
|
||||||
cpuUserPerCore := jobdata.CpuUsageUser / coreCount
|
cpuUserPerCore := jobdata.CpuUsageUser / coreCount
|
||||||
if y, err := lp.NewMessage("job_user_cpu", coreTags, m.meta, map[string]any{"value": cpuUserPerCore}, timestamp); err == nil {
|
if y, err := lp.NewMessage(
|
||||||
|
"job_user_cpu",
|
||||||
|
coreTags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": cpuUserPerCore},
|
||||||
|
timestamp); err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -286,7 +315,13 @@ func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMes
|
|||||||
|
|
||||||
if coreCount > 0 && !m.isExcluded("job_sys_cpu") {
|
if coreCount > 0 && !m.isExcluded("job_sys_cpu") {
|
||||||
cpuSysPerCore := jobdata.CpuUsageSys / coreCount
|
cpuSysPerCore := jobdata.CpuUsageSys / coreCount
|
||||||
if y, err := lp.NewMessage("job_sys_cpu", coreTags, m.meta, map[string]any{"value": cpuSysPerCore}, timestamp); err == nil {
|
if y, err := lp.NewMessage(
|
||||||
|
"job_sys_cpu",
|
||||||
|
coreTags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": cpuSysPerCore},
|
||||||
|
timestamp); err == nil {
|
||||||
y.AddMeta("unit", "%")
|
y.AddMeta("unit", "%")
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
@@ -301,25 +336,43 @@ func (m *SlurmCgroupCollector) Read(interval time.Duration, output chan lp.CCMes
|
|||||||
if !m.cpuUsed[cpu] {
|
if !m.cpuUsed[cpu] {
|
||||||
coreTags := map[string]string{
|
coreTags := map[string]string{
|
||||||
"type": "hwthread",
|
"type": "hwthread",
|
||||||
"type-id": fmt.Sprintf("%d", cpu),
|
"type-id": strconv.Itoa(cpu),
|
||||||
}
|
}
|
||||||
|
|
||||||
if !m.isExcluded("job_mem_used") {
|
if !m.isExcluded("job_mem_used") {
|
||||||
if y, err := lp.NewMessage("job_mem_used", coreTags, m.meta, map[string]any{"value": 0}, timestamp); err == nil {
|
if y, err := lp.NewMessage(
|
||||||
|
"job_mem_used",
|
||||||
|
coreTags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": 0},
|
||||||
|
timestamp); err == nil {
|
||||||
y.AddMeta("unit", "Bytes")
|
y.AddMeta("unit", "Bytes")
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !m.isExcluded("job_max_mem_used") {
|
if !m.isExcluded("job_max_mem_used") {
|
||||||
if y, err := lp.NewMessage("job_max_mem_used", coreTags, m.meta, map[string]any{"value": 0}, timestamp); err == nil {
|
if y, err := lp.NewMessage(
|
||||||
|
"job_max_mem_used",
|
||||||
|
coreTags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": 0},
|
||||||
|
timestamp); err == nil {
|
||||||
y.AddMeta("unit", "Bytes")
|
y.AddMeta("unit", "Bytes")
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !m.isExcluded("job_mem_limit") {
|
if !m.isExcluded("job_mem_limit") {
|
||||||
if y, err := lp.NewMessage("job_mem_limit", coreTags, m.meta, map[string]any{"value": 0}, timestamp); err == nil {
|
if y, err := lp.NewMessage(
|
||||||
|
"job_mem_limit",
|
||||||
|
coreTags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": 0},
|
||||||
|
timestamp); err == nil {
|
||||||
y.AddMeta("unit", "Bytes")
|
y.AddMeta("unit", "Bytes")
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,7 +81,13 @@ func (m *TopProcsCollector) Read(interval time.Duration, output chan lp.CCMessag
|
|||||||
lines := strings.Split(string(stdout), "\n")
|
lines := strings.Split(string(stdout), "\n")
|
||||||
for i := 1; i < m.config.Num_procs+1; i++ {
|
for i := 1; i < m.config.Num_procs+1; i++ {
|
||||||
name := fmt.Sprintf("topproc%d", i)
|
name := fmt.Sprintf("topproc%d", i)
|
||||||
y, err := lp.NewMessage(name, m.tags, m.meta, map[string]any{"value": string(lines[i])}, time.Now())
|
y, err := lp.NewMessage(
|
||||||
|
name,
|
||||||
|
m.tags,
|
||||||
|
m.meta,
|
||||||
|
map[string]any{
|
||||||
|
"value": lines[i]},
|
||||||
|
time.Now())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
output <- y
|
output <- y
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -170,22 +170,22 @@ func (c *metricAggregator) Eval(starttime time.Time, endtime time.Time, metrics
|
|||||||
// Check, that only values of one type were collected
|
// Check, that only values of one type were collected
|
||||||
countValueTypes := 0
|
countValueTypes := 0
|
||||||
if len(valuesFloat64) > 0 {
|
if len(valuesFloat64) > 0 {
|
||||||
countValueTypes += 1
|
countValueTypes++
|
||||||
}
|
}
|
||||||
if len(valuesFloat32) > 0 {
|
if len(valuesFloat32) > 0 {
|
||||||
countValueTypes += 1
|
countValueTypes++
|
||||||
}
|
}
|
||||||
if len(valuesInt) > 0 {
|
if len(valuesInt) > 0 {
|
||||||
countValueTypes += 1
|
countValueTypes++
|
||||||
}
|
}
|
||||||
if len(valuesInt32) > 0 {
|
if len(valuesInt32) > 0 {
|
||||||
countValueTypes += 1
|
countValueTypes++
|
||||||
}
|
}
|
||||||
if len(valuesInt64) > 0 {
|
if len(valuesInt64) > 0 {
|
||||||
countValueTypes += 1
|
countValueTypes++
|
||||||
}
|
}
|
||||||
if len(valuesBool) > 0 {
|
if len(valuesBool) > 0 {
|
||||||
countValueTypes += 1
|
countValueTypes++
|
||||||
}
|
}
|
||||||
if countValueTypes > 1 {
|
if countValueTypes > 1 {
|
||||||
cclog.ComponentError("MetricCache", "Collected values of different types")
|
cclog.ComponentError("MetricCache", "Collected values of different types")
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
topo "github.com/ClusterCockpit/cc-metric-collector/pkg/ccTopology"
|
||||||
@@ -208,7 +209,7 @@ func infunc(a any, b any) (any, error) {
|
|||||||
case []int:
|
case []int:
|
||||||
return slices.Contains(total, match), nil
|
return slices.Contains(total, match), nil
|
||||||
case string:
|
case string:
|
||||||
smatch := fmt.Sprintf("%d", match)
|
smatch := strconv.Itoa(match)
|
||||||
return strings.Contains(total, smatch), nil
|
return strings.Contains(total, smatch), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -186,10 +186,6 @@ func (r *metricRouter) Init(ticker mct.MultiChanTicker, wg *sync.WaitGroup, rout
|
|||||||
return fmt.Errorf("MessageProcessor AddAddTagsByCondition() failed: %w", err)
|
return fmt.Errorf("MessageProcessor AddAddTagsByCondition() failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// r.config.dropMetrics = make(map[string]bool)
|
|
||||||
// for _, mname := range r.config.DropMetrics {
|
|
||||||
// r.config.dropMetrics[mname] = true
|
|
||||||
// }
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -208,7 +204,7 @@ func getParamMap(point lp.CCMessage) map[string]any {
|
|||||||
return params
|
return params
|
||||||
}
|
}
|
||||||
|
|
||||||
// DoAddTags adds a tag when condition is fullfiled
|
// DoAddTags adds a tag when condition is fulfilled
|
||||||
func (r *metricRouter) DoAddTags(point lp.CCMessage) {
|
func (r *metricRouter) DoAddTags(point lp.CCMessage) {
|
||||||
var conditionMatches bool
|
var conditionMatches bool
|
||||||
for _, m := range r.config.AddTags {
|
for _, m := range r.config.AddTags {
|
||||||
@@ -230,83 +226,6 @@ func (r *metricRouter) DoAddTags(point lp.CCMessage) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DoDelTags removes a tag when condition is fullfiled
|
|
||||||
// func (r *metricRouter) DoDelTags(point lp.CCMessage) {
|
|
||||||
// var conditionMatches bool
|
|
||||||
// for _, m := range r.config.DelTags {
|
|
||||||
// if m.Condition == "*" {
|
|
||||||
// // Condition is always matched
|
|
||||||
// conditionMatches = true
|
|
||||||
// } else {
|
|
||||||
// // Evaluate condition
|
|
||||||
// var err error
|
|
||||||
// conditionMatches, err = agg.EvalBoolCondition(m.Condition, getParamMap(point))
|
|
||||||
// if err != nil {
|
|
||||||
// cclog.ComponentError("MetricRouter", err.Error())
|
|
||||||
// conditionMatches = false
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// if conditionMatches {
|
|
||||||
// point.RemoveTag(m.Key)
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Conditional test whether a metric should be dropped
|
|
||||||
// func (r *metricRouter) dropMetric(point lp.CCMessage) bool {
|
|
||||||
// // Simple drop check
|
|
||||||
// if conditionMatches, ok := r.config.dropMetrics[point.Name()]; ok {
|
|
||||||
// return conditionMatches
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // Checking the dropping conditions
|
|
||||||
// for _, m := range r.config.DropMetricsIf {
|
|
||||||
// conditionMatches, err := agg.EvalBoolCondition(m, getParamMap(point))
|
|
||||||
// if err != nil {
|
|
||||||
// cclog.ComponentError("MetricRouter", err.Error())
|
|
||||||
// conditionMatches = false
|
|
||||||
// }
|
|
||||||
// if conditionMatches {
|
|
||||||
// return conditionMatches
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // No dropping condition met
|
|
||||||
// return false
|
|
||||||
// }
|
|
||||||
|
|
||||||
// func (r *metricRouter) prepareUnit(point lp.CCMessage) bool {
|
|
||||||
// if r.config.NormalizeUnits {
|
|
||||||
// if in_unit, ok := point.GetMeta("unit"); ok {
|
|
||||||
// u := units.NewUnit(in_unit)
|
|
||||||
// if u.Valid() {
|
|
||||||
// point.AddMeta("unit", u.Short())
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// if newP, ok := r.config.ChangeUnitPrefix[point.Name()]; ok {
|
|
||||||
|
|
||||||
// newPrefix := units.NewPrefix(newP)
|
|
||||||
|
|
||||||
// if in_unit, ok := point.GetMeta("unit"); ok && newPrefix != units.InvalidPrefix {
|
|
||||||
// u := units.NewUnit(in_unit)
|
|
||||||
// if u.Valid() {
|
|
||||||
// cclog.ComponentDebug("MetricRouter", "Change prefix to", newP, "for metric", point.Name())
|
|
||||||
// conv, out_unit := units.GetUnitPrefixFactor(u, newPrefix)
|
|
||||||
// if conv != nil && out_unit.Valid() {
|
|
||||||
// if val, ok := point.GetField("value"); ok {
|
|
||||||
// point.AddField("value", conv(val))
|
|
||||||
// point.AddMeta("unit", out_unit.Short())
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// return true
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Start starts the metric router
|
// Start starts the metric router
|
||||||
func (r *metricRouter) Start() {
|
func (r *metricRouter) Start() {
|
||||||
// start timer if configured
|
// start timer if configured
|
||||||
@@ -322,28 +241,7 @@ func (r *metricRouter) Start() {
|
|||||||
cclog.ComponentDebug("MetricRouter", "DONE")
|
cclog.ComponentDebug("MetricRouter", "DONE")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Forward takes a received metric, adds or deletes tags
|
// Forward message received from collector channel
|
||||||
// and forwards it to the output channels
|
|
||||||
// forward := func(point lp.CCMessage) {
|
|
||||||
// cclog.ComponentDebug("MetricRouter", "FORWARD", point)
|
|
||||||
// r.DoAddTags(point)
|
|
||||||
// r.DoDelTags(point)
|
|
||||||
// name := point.Name()
|
|
||||||
// if new, ok := r.config.RenameMetrics[name]; ok {
|
|
||||||
// point.SetName(new)
|
|
||||||
// point.AddMeta("oldname", name)
|
|
||||||
// r.DoAddTags(point)
|
|
||||||
// r.DoDelTags(point)
|
|
||||||
// }
|
|
||||||
|
|
||||||
// r.prepareUnit(point)
|
|
||||||
|
|
||||||
// for _, o := range r.outputs {
|
|
||||||
// o <- point
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Foward message received from collector channel
|
|
||||||
coll_forward := func(p lp.CCMessage) {
|
coll_forward := func(p lp.CCMessage) {
|
||||||
// receive from metric collector
|
// receive from metric collector
|
||||||
//p.AddTag(r.config.HostnameTagName, r.hostname)
|
//p.AddTag(r.config.HostnameTagName, r.hostname)
|
||||||
@@ -356,11 +254,6 @@ func (r *metricRouter) Start() {
|
|||||||
o <- m
|
o <- m
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if !r.dropMetric(p) {
|
|
||||||
// for _, o := range r.outputs {
|
|
||||||
// o <- point
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// even if the metric is dropped, it is stored in the cache for
|
// even if the metric is dropped, it is stored in the cache for
|
||||||
// aggregations
|
// aggregations
|
||||||
if r.config.NumCacheIntervals > 0 {
|
if r.config.NumCacheIntervals > 0 {
|
||||||
@@ -380,9 +273,6 @@ func (r *metricRouter) Start() {
|
|||||||
o <- m
|
o <- m
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if !r.dropMetric(p) {
|
|
||||||
// forward(p)
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Forward message received from cache channel
|
// Forward message received from cache channel
|
||||||
|
|||||||
@@ -51,14 +51,13 @@ var cache struct {
|
|||||||
func fileToInt(path string) int {
|
func fileToInt(path string) int {
|
||||||
buffer, err := os.ReadFile(path)
|
buffer, err := os.ReadFile(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Print(err)
|
cclogger.ComponentError("ccTopology", fmt.Sprintf("fileToInt(): Reading \"%s\": %v", path, err))
|
||||||
cclogger.ComponentError("ccTopology", "fileToInt", "Reading", path, ":", err.Error())
|
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
stringBuffer := strings.TrimSpace(string(buffer))
|
stringBuffer := strings.TrimSpace(string(buffer))
|
||||||
id, err := strconv.Atoi(stringBuffer)
|
id, err := strconv.Atoi(stringBuffer)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclogger.ComponentError("ccTopology", "fileToInt", "Parsing", path, ":", stringBuffer, err.Error())
|
cclogger.ComponentError("ccTopology", fmt.Sprintf("fileToInt(): Parsing \"%s\": %v", stringBuffer, err))
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
return id
|
return id
|
||||||
@@ -304,20 +303,19 @@ func GetTypeList(topology_type string) []int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func GetTypeId(hwt HwthreadEntry, topology_type string) (int, error) {
|
func GetTypeId(hwt HwthreadEntry, topology_type string) (int, error) {
|
||||||
var err error = nil
|
|
||||||
switch topology_type {
|
switch topology_type {
|
||||||
case "node":
|
case "node":
|
||||||
return 0, err
|
return 0, nil
|
||||||
case "socket":
|
case "socket":
|
||||||
return hwt.Socket, err
|
return hwt.Socket, nil
|
||||||
case "die":
|
case "die":
|
||||||
return hwt.Die, err
|
return hwt.Die, nil
|
||||||
case "memoryDomain":
|
case "memoryDomain":
|
||||||
return hwt.NumaDomain, err
|
return hwt.NumaDomain, nil
|
||||||
case "core":
|
case "core":
|
||||||
return hwt.Core, err
|
return hwt.Core, nil
|
||||||
case "hwthread":
|
case "hwthread":
|
||||||
return hwt.CpuID, err
|
return hwt.CpuID, nil
|
||||||
}
|
}
|
||||||
return -1, fmt.Errorf("unknown topology type '%s'", topology_type)
|
return -1, fmt.Errorf("unknown topology type '%s'", topology_type)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user