Fix lint errors

This commit is contained in:
Thomas Roehl
2026-06-02 16:35:11 +02:00
parent 5938368a76
commit 5d55ee7a77

View File

@@ -130,6 +130,7 @@ type NvidiaGPMCollector struct {
func (m *NvidiaGPMCollector) Init(config json.RawMessage) error { func (m *NvidiaGPMCollector) Init(config json.RawMessage) error {
var err error = nil var err error = nil
m.name = "NvidiaGPMCollector" m.name = "NvidiaGPMCollector"
m.parallel = true
if err := m.setup(); err != nil { if err := m.setup(); err != nil {
return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err) return fmt.Errorf("%s Init(): setup() call failed: %w", m.name, err)
} }
@@ -166,7 +167,6 @@ func (m *NvidiaGPMCollector) Init(config json.RawMessage) error {
} }
// For all GPUs // For all GPUs
idx := 0
m.gpus = make([]NvidiaGPMCollectorDevice, 0, num_gpus) m.gpus = make([]NvidiaGPMCollectorDevice, 0, num_gpus)
for i := range num_gpus { for i := range num_gpus {
@@ -185,15 +185,31 @@ func (m *NvidiaGPMCollector) Init(config json.RawMessage) error {
continue continue
} }
//supportInfoFunc := nvml.GpmQueryDeviceSupportV(device)
supportInfo, ret := nvml.GpmQueryDeviceSupport(device) supportInfo, ret := nvml.GpmQueryDeviceSupport(device)
if supportInfo.IsSupportedDevice == uint32(nvml.FEATURE_DISABLED) { if ret != nvml.SUCCESS {
cclog.ComponentErrorf(m.name, "Device at index %d does not support GPM metrics", i) err = errors.New(nvml.ErrorString(ret))
cclog.ComponentErrorf(m.name, "Unable to query GPM support for device at index %d: %s", i, err.Error())
continue continue
} else {
if supportInfo.IsSupportedDevice == uint32(nvml.FEATURE_DISABLED) {
cclog.ComponentErrorf(m.name, "Device at index %d does not support GPM metrics", i)
continue
}
} }
stream, ret := nvml.GpmQueryIfStreamingEnabled(device) stream, ret := nvml.GpmQueryIfStreamingEnabled(device)
if stream == uint32(nvml.FEATURE_DISABLED) { if ret != nvml.SUCCESS {
nvml.GpmSetStreamingEnabled(device, uint32(nvml.FEATURE_ENABLED)) err = errors.New(nvml.ErrorString(ret))
cclog.ComponentErrorf(m.name, "Unable to query GPM streaming for device at index %d: %s", i, err.Error())
continue
} else {
if stream == uint32(nvml.FEATURE_DISABLED) {
ret = nvml.GpmSetStreamingEnabled(device, uint32(nvml.FEATURE_ENABLED))
if ret != nvml.SUCCESS {
err = errors.New(nvml.ErrorString(ret))
cclog.ComponentErrorf(m.name, "Unable to set streaming mode for device at index %d: %s", i, err.Error())
}
}
} }
// Get device's PCI info // Get device's PCI info
@@ -240,7 +256,7 @@ func (m *NvidiaGPMCollector) Init(config json.RawMessage) error {
} }
// Now we got all infos together, populate the device list // Now we got all infos together, populate the device list
g := &m.gpus[idx] g := NvidiaGPMCollectorDevice{}
// Add device handle // Add device handle
g.device = device g.device = device
@@ -296,7 +312,7 @@ func (m *NvidiaGPMCollector) Init(config json.RawMessage) error {
metIdx := 0 metIdx := 0
for _, inmetric := range m.config.Metrics { for _, inmetric := range m.config.Metrics {
for _, defmetric := range NvidiaGPMMetrics { for _, defmetric := range NvidiaGPMMetrics {
if inmetric == defmetric.outname { if inmetric == defmetric.outname || inmetric == defmetric.name {
g.measurement.Metrics[metIdx] = nvml.GpmMetric{ g.measurement.Metrics[metIdx] = nvml.GpmMetric{
MetricId: uint32(defmetric.id), MetricId: uint32(defmetric.id),
} }
@@ -306,8 +322,10 @@ func (m *NvidiaGPMCollector) Init(config json.RawMessage) error {
} }
} }
g.measurement.NumMetrics = uint32(metIdx) g.measurement.NumMetrics = uint32(metIdx)
m.gpus = append(m.gpus, g)
} }
cclog.ComponentDebugf(m.name, "Found %d Nvidia GPUs with GPM support", len(m.gpus)) cclog.ComponentDebugf(m.name, "Found %d Nvidia GPUs with GPM support", len(m.gpus))
m.num_gpus = len(m.gpus)
m.init = true m.init = true
return err return err
} }
@@ -358,9 +376,17 @@ func (m *NvidiaGPMCollector) Read(interval time.Duration, output chan lp.CCMessa
func (m *NvidiaGPMCollector) Close() { func (m *NvidiaGPMCollector) Close() {
if m.init { if m.init {
for _, gpu := range m.gpus { for i, gpu := range m.gpus {
gpu.measurement.Sample1.Free() ret := gpu.measurement.Sample1.Free()
gpu.measurement.Sample2.Free() if ret != nvml.SUCCESS {
err := errors.New(nvml.ErrorString(ret))
cclog.ComponentErrorf(m.name, "Unable to free start sample for device at index %d: %s", i, err.Error())
}
ret = gpu.measurement.Sample2.Free()
if ret != nvml.SUCCESS {
err := errors.New(nvml.ErrorString(ret))
cclog.ComponentErrorf(m.name, "Unable to free stop sample for device at index %d: %s", i, err.Error())
}
} }
if ret := nvml.Shutdown(); ret != nvml.SUCCESS { if ret := nvml.Shutdown(); ret != nvml.SUCCESS {
cclog.ComponentError(m.name, "nvml.Shutdown() not successful") cclog.ComponentError(m.name, "nvml.Shutdown() not successful")