Option to use MIG UUID as subtype-id in NvidiaCollector

This commit is contained in:
Thomas Roehl 2022-05-13 14:34:32 +02:00
parent 826f364772
commit d4c89a4206

View File

@ -13,14 +13,15 @@ import (
) )
type NvidiaCollectorConfig struct { type NvidiaCollectorConfig struct {
ExcludeMetrics []string `json:"exclude_metrics,omitempty"` ExcludeMetrics []string `json:"exclude_metrics,omitempty"`
ExcludeDevices []string `json:"exclude_devices,omitempty"` ExcludeDevices []string `json:"exclude_devices,omitempty"`
AddPciInfoTag bool `json:"add_pci_info_tag,omitempty"` AddPciInfoTag bool `json:"add_pci_info_tag,omitempty"`
UsePciInfoAsTypeId bool `json:"use_pci_info_as_type_id,omitempty"` UsePciInfoAsTypeId bool `json:"use_pci_info_as_type_id,omitempty"`
AddUuidMeta bool `json:"add_uuid_meta,omitempty"` AddUuidMeta bool `json:"add_uuid_meta,omitempty"`
AddBoardNumberMeta bool `json:"add_board_number_meta,omitempty"` AddBoardNumberMeta bool `json:"add_board_number_meta,omitempty"`
AddSerialMeta bool `json:"add_serial_meta,omitempty"` AddSerialMeta bool `json:"add_serial_meta,omitempty"`
ProcessMigDevices bool `json:"process_mig_devices,omitempty"` ProcessMigDevices bool `json:"process_mig_devices,omitempty"`
UseUuidForMigDevices bool `json:"use_uuid_for_mig_device,omitempty"`
} }
type NvidiaCollectorDevice struct { type NvidiaCollectorDevice struct {
@ -49,6 +50,8 @@ func (m *NvidiaCollector) Init(config json.RawMessage) error {
m.name = "NvidiaCollector" m.name = "NvidiaCollector"
m.config.AddPciInfoTag = false m.config.AddPciInfoTag = false
m.config.UsePciInfoAsTypeId = false m.config.UsePciInfoAsTypeId = false
m.config.ProcessMigDevices = false
m.config.UseUuidForMigDevices = false
m.setup() m.setup()
if len(config) > 0 { if len(config) > 0 {
err = json.Unmarshal(config, &m.config) err = json.Unmarshal(config, &m.config)
@ -1143,12 +1146,21 @@ func (m *NvidiaCollector) Read(interval time.Duration, output chan lp.CCMetric)
for k, v := range m.gpus[i].tags { for k, v := range m.gpus[i].tags {
migDevice.tags[k] = v migDevice.tags[k] = v
} }
m.gpus[i].tags["stype"] = "mig" migDevice.tags["stype"] = "mig"
m.gpus[i].tags["stype-id"] = fmt.Sprintf("%d", j) if !m.config.UseUuidForMigDevices {
migDevice.tags["stype-id"] = fmt.Sprintf("%d", j)
} else {
uuid, ret := nvml.DeviceGetUUID(mdev)
if ret != nvml.SUCCESS {
cclog.ComponentError(m.name, "Unable to get UUID for mig device at index", j, ":", err.Error())
} else {
migDevice.tags["stype-id"] = uuid
}
}
for k, v := range m.gpus[i].meta { for k, v := range m.gpus[i].meta {
migDevice.meta[k] = v migDevice.meta[k] = v
} }
if _, ok := migDevice.meta["uuid"]; ok { if _, ok := migDevice.meta["uuid"]; ok && !m.config.UseUuidForMigDevices {
uuid, ret := nvml.DeviceGetUUID(mdev) uuid, ret := nvml.DeviceGetUUID(mdev)
if ret == nvml.SUCCESS { if ret == nvml.SUCCESS {
migDevice.meta["uuid"] = uuid migDevice.meta["uuid"] = uuid