Merge branch 'dev' of github.com:ClusterCockpit/cc-backend into dev

This commit is contained in:
brinkcoder
2025-03-14 10:52:39 +01:00
23 changed files with 665 additions and 292 deletions

View File

@@ -22,6 +22,13 @@ type Topology struct {
Die [][]*int `json:"die,omitempty"`
Core [][]int `json:"core"`
Accelerators []*Accelerator `json:"accelerators,omitempty"`
// Cache maps for faster lookups
hwthreadToSocket map[int][]int
hwthreadToCore map[int][]int
hwthreadToMemoryDomain map[int][]int
coreToSocket map[int][]int
memoryDomainToSocket map[int]int // New: Direct mapping from memory domain to socket
}
type MetricValue struct {
@@ -92,156 +99,233 @@ type GlobalMetricListItem struct {
Availability []ClusterSupport `json:"availability"`
}
// Return a list of socket IDs given a list of hwthread IDs. Even if just one
// hwthread is in that socket, add it to the list. If no hwthreads other than
// those in the argument list are assigned to one of the sockets in the first
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
// InitTopologyMaps initializes the topology mapping caches
func (topo *Topology) InitTopologyMaps() {
// Initialize maps
topo.hwthreadToSocket = make(map[int][]int)
topo.hwthreadToCore = make(map[int][]int)
topo.hwthreadToMemoryDomain = make(map[int][]int)
topo.coreToSocket = make(map[int][]int)
topo.memoryDomainToSocket = make(map[int]int)
// Build hwthread to socket mapping
for socketID, hwthreads := range topo.Socket {
for _, hwthread := range hwthreads {
topo.hwthreadToSocket[hwthread] = append(topo.hwthreadToSocket[hwthread], socketID)
}
}
// Build hwthread to core mapping
for coreID, hwthreads := range topo.Core {
for _, hwthread := range hwthreads {
topo.hwthreadToCore[hwthread] = append(topo.hwthreadToCore[hwthread], coreID)
}
}
// Build hwthread to memory domain mapping
for memDomID, hwthreads := range topo.MemoryDomain {
for _, hwthread := range hwthreads {
topo.hwthreadToMemoryDomain[hwthread] = append(topo.hwthreadToMemoryDomain[hwthread], memDomID)
}
}
// Build core to socket mapping
for coreID, hwthreads := range topo.Core {
socketSet := make(map[int]struct{})
for _, hwthread := range hwthreads {
for socketID := range topo.hwthreadToSocket[hwthread] {
socketSet[socketID] = struct{}{}
}
}
topo.coreToSocket[coreID] = make([]int, 0, len(socketSet))
for socketID := range socketSet {
topo.coreToSocket[coreID] = append(topo.coreToSocket[coreID], socketID)
}
}
// Build memory domain to socket mapping
for memDomID, hwthreads := range topo.MemoryDomain {
if len(hwthreads) > 0 {
// Use the first hwthread to determine the socket
if socketIDs, ok := topo.hwthreadToSocket[hwthreads[0]]; ok && len(socketIDs) > 0 {
topo.memoryDomainToSocket[memDomID] = socketIDs[0]
}
}
}
}
// EnsureTopologyMaps ensures that the topology maps are initialized
func (topo *Topology) EnsureTopologyMaps() {
if topo.hwthreadToSocket == nil {
topo.InitTopologyMaps()
}
}
func (topo *Topology) GetSocketsFromHWThreads(
hwthreads []int,
) (sockets []int, exclusive bool) {
socketsMap := map[int]int{}
topo.EnsureTopologyMaps()
socketsMap := make(map[int]int)
for _, hwthread := range hwthreads {
for socket, hwthreadsInSocket := range topo.Socket {
for _, hwthreadInSocket := range hwthreadsInSocket {
if hwthread == hwthreadInSocket {
socketsMap[socket] += 1
}
}
for _, socketID := range topo.hwthreadToSocket[hwthread] {
socketsMap[socketID]++
}
}
exclusive = true
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
sockets = make([]int, 0, len(socketsMap))
for socket, count := range socketsMap {
sockets = append(sockets, socket)
exclusive = exclusive && count == hwthreadsPerSocket
// Check if all hwthreads in this socket are in our input list
exclusive = exclusive && count == len(topo.Socket[socket])
}
return sockets, exclusive
}
// Return a list of socket IDs given a list of core IDs. Even if just one
// core is in that socket, add it to the list. If no cores other than
// those in the argument list are assigned to one of the sockets in the first
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
func (topo *Topology) GetSocketsFromCores (
func (topo *Topology) GetSocketsFromCores(
cores []int,
) (sockets []int, exclusive bool) {
socketsMap := map[int]int{}
topo.EnsureTopologyMaps()
socketsMap := make(map[int]int)
for _, core := range cores {
for _, hwthreadInCore := range topo.Core[core] {
for socket, hwthreadsInSocket := range topo.Socket {
for _, hwthreadInSocket := range hwthreadsInSocket {
if hwthreadInCore == hwthreadInSocket {
socketsMap[socket] += 1
for _, socketID := range topo.coreToSocket[core] {
socketsMap[socketID]++
}
}
exclusive = true
sockets = make([]int, 0, len(socketsMap))
for socket, count := range socketsMap {
sockets = append(sockets, socket)
// Count total cores in this socket
totalCoresInSocket := 0
for _, hwthreads := range topo.Core {
for _, hwthread := range hwthreads {
for _, sID := range topo.hwthreadToSocket[hwthread] {
if sID == socket {
totalCoresInSocket++
break
}
}
}
}
}
exclusive = true
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
sockets = make([]int, 0, len(socketsMap))
for socket, count := range socketsMap {
sockets = append(sockets, socket)
exclusive = exclusive && count == hwthreadsPerSocket
exclusive = exclusive && count == totalCoresInSocket
}
return sockets, exclusive
}
// Return a list of core IDs given a list of hwthread IDs. Even if just one
// hwthread is in that core, add it to the list. If no hwthreads other than
// those in the argument list are assigned to one of the cores in the first
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
func (topo *Topology) GetCoresFromHWThreads(
hwthreads []int,
) (cores []int, exclusive bool) {
coresMap := map[int]int{}
topo.EnsureTopologyMaps()
coresMap := make(map[int]int)
for _, hwthread := range hwthreads {
for core, hwthreadsInCore := range topo.Core {
for _, hwthreadInCore := range hwthreadsInCore {
if hwthread == hwthreadInCore {
coresMap[core] += 1
}
}
for _, coreID := range topo.hwthreadToCore[hwthread] {
coresMap[coreID]++
}
}
exclusive = true
hwthreadsPerCore := len(topo.Node) / len(topo.Core)
cores = make([]int, 0, len(coresMap))
for core, count := range coresMap {
cores = append(cores, core)
exclusive = exclusive && count == hwthreadsPerCore
// Check if all hwthreads in this core are in our input list
exclusive = exclusive && count == len(topo.Core[core])
}
return cores, exclusive
}
// Return a list of memory domain IDs given a list of hwthread IDs. Even if
// just one hwthread is in that memory domain, add it to the list. If no
// hwthreads other than those in the argument list are assigned to one of the
// memory domains in the first return value, return true as the second value.
// TODO: Optimize this, there must be a more efficient way/algorithm.
func (topo *Topology) GetMemoryDomainsFromHWThreads(
hwthreads []int,
) (memDoms []int, exclusive bool) {
memDomsMap := map[int]int{}
topo.EnsureTopologyMaps()
memDomsMap := make(map[int]int)
for _, hwthread := range hwthreads {
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {
for _, hwthreadInmemDom := range hwthreadsInmemDom {
if hwthread == hwthreadInmemDom {
memDomsMap[memDom] += 1
}
}
for _, memDomID := range topo.hwthreadToMemoryDomain[hwthread] {
memDomsMap[memDomID]++
}
}
exclusive = true
hwthreadsPermemDom := len(topo.Node) / len(topo.MemoryDomain)
memDoms = make([]int, 0, len(memDomsMap))
for memDom, count := range memDomsMap {
memDoms = append(memDoms, memDom)
exclusive = exclusive && count == hwthreadsPermemDom
// Check if all hwthreads in this memory domain are in our input list
exclusive = exclusive && count == len(topo.MemoryDomain[memDom])
}
return memDoms, exclusive
}
// Temporary fix to convert back from int id to string id for accelerators
func (topo *Topology) GetAcceleratorID(id int) (string, error) {
if id < 0 {
fmt.Printf("ID smaller than 0!\n")
return topo.Accelerators[0].ID, nil
} else if id < len(topo.Accelerators) {
return topo.Accelerators[id].ID, nil
} else {
return "", fmt.Errorf("index %d out of range", id)
// GetMemoryDomainsBySocket can now use the direct mapping
func (topo *Topology) GetMemoryDomainsBySocket(domainIDs []int) (map[int][]int, error) {
socketToDomains := make(map[int][]int)
for _, domainID := range domainIDs {
if domainID < 0 || domainID >= len(topo.MemoryDomain) || len(topo.MemoryDomain[domainID]) == 0 {
return nil, fmt.Errorf("MemoryDomain %d is invalid or empty", domainID)
}
socketID, ok := topo.memoryDomainToSocket[domainID]
if !ok {
return nil, fmt.Errorf("MemoryDomain %d could not be assigned to any socket", domainID)
}
socketToDomains[socketID] = append(socketToDomains[socketID], domainID)
}
return socketToDomains, nil
}
// Return list of hardware (string) accelerator IDs
// GetAcceleratorID converts a numeric ID to the corresponding Accelerator ID as a string.
// This is useful when accelerators are stored in arrays and accessed by index.
func (topo *Topology) GetAcceleratorID(id int) (string, error) {
if id < 0 {
return "", fmt.Errorf("accelerator ID %d is negative", id)
}
if id >= len(topo.Accelerators) {
return "", fmt.Errorf("accelerator index %d out of valid range (max: %d)",
id, len(topo.Accelerators)-1)
}
return topo.Accelerators[id].ID, nil
}
// GetAcceleratorIDs returns a list of all Accelerator IDs (as strings).
// Capacity is pre-allocated to improve efficiency.
func (topo *Topology) GetAcceleratorIDs() []string {
accels := make([]string, 0)
if len(topo.Accelerators) == 0 {
return []string{}
}
accels := make([]string, 0, len(topo.Accelerators))
for _, accel := range topo.Accelerators {
accels = append(accels, accel.ID)
}
return accels
}
// Outdated? Or: Return indices of accelerators in parent array?
// GetAcceleratorIDsAsInt converts all Accelerator IDs to integer values.
// This function can fail if the IDs cannot be interpreted as numbers.
// Capacity is pre-allocated to improve efficiency.
func (topo *Topology) GetAcceleratorIDsAsInt() ([]int, error) {
accels := make([]int, 0)
for _, accel := range topo.Accelerators {
if len(topo.Accelerators) == 0 {
return []int{}, nil
}
accels := make([]int, 0, len(topo.Accelerators))
for i, accel := range topo.Accelerators {
id, err := strconv.Atoi(accel.ID)
if err != nil {
return nil, err
return nil, fmt.Errorf("accelerator ID at position %d (%s) cannot be converted to a number: %w",
i, accel.ID, err)
}
accels = append(accels, id)
}