Support for memoryDomain scoped metrics

This commit is contained in:
Lou Knauer 2022-02-15 13:19:26 +01:00
parent 53312c4882
commit f738a332c1
3 changed files with 71 additions and 12 deletions

View File

@ -79,6 +79,34 @@ func (topo *Topology) GetCoresFromHWThreads(hwthreads []int) (cores []int, exclu
return cores, exclusive
}
// Return a list of memory domain IDs given a list of hwthread IDs.
// Even if just one hwthread is in that memory domain, add it to the list.
// If no hwthreads other than those in the argument list are assigned to
// one of the memory domains in the first return value, return true as the second value.
// TODO: Optimize this, there must be a more efficient way/algorithm.
func (topo *Topology) GetMemoryDomainsFromHWThreads(hwthreads []int) (memDoms []int, exclusive bool) {
memDomsMap := map[int]int{}
for _, hwthread := range hwthreads {
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {
for _, hwthreadInmemDom := range hwthreadsInmemDom {
if hwthread == hwthreadInmemDom {
memDomsMap[memDom] += 1
}
}
}
}
exclusive = true
hwthreadsPermemDom := len(topo.Node) / len(topo.MemoryDomain)
memDoms = make([]int, 0, len(memDomsMap))
for memDom, count := range memDomsMap {
memDoms = append(memDoms, memDom)
exclusive = exclusive && count == hwthreadsPermemDom
}
return memDoms, exclusive
}
func (topo *Topology) GetAcceleratorIDs() ([]int, error) {
accels := make([]int, 0)
for _, accel := range topo.Accelerators {

View File

@ -203,10 +203,11 @@ func (ccms *CCMetricStore) LoadData(job *schema.Job, metrics []string, scopes []
}
var (
hwthreadString = string("cpu") // TODO/FIXME: inconsistency between cc-metric-collector and ClusterCockpit
coreString = string(schema.MetricScopeCore)
socketString = string(schema.MetricScopeSocket)
acceleratorString = string(schema.MetricScopeAccelerator)
hwthreadString = string("cpu") // TODO/FIXME: inconsistency between cc-metric-collector and ClusterCockpit
coreString = string(schema.MetricScopeCore)
memoryDomainString = string(schema.MetricScopeMemoryDomain)
socketString = string(schema.MetricScopeSocket)
acceleratorString = string(schema.MetricScopeAccelerator)
)
func (ccms *CCMetricStore) buildQueries(job *schema.Job, metrics []string, scopes []schema.MetricScope) ([]ApiQuery, []schema.MetricScope, error) {
@ -359,6 +360,34 @@ func (ccms *CCMetricStore) buildQueries(job *schema.Job, metrics []string, scope
continue
}
// MemoryDomain -> MemoryDomain
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &memoryDomainString,
TypeIds: sockets,
})
assignedScope = append(assignedScope, scope)
continue
}
// MemoryDoman -> Node
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &memoryDomainString,
TypeIds: sockets,
})
assignedScope = append(assignedScope, scope)
continue
}
// Socket -> Socket
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)

View File

@ -43,19 +43,21 @@ type MetricScope string
const (
MetricScopeInvalid MetricScope = "invalid_scope"
MetricScopeNode MetricScope = "node"
MetricScopeSocket MetricScope = "socket"
MetricScopeCore MetricScope = "core"
MetricScopeHWThread MetricScope = "hwthread"
MetricScopeNode MetricScope = "node"
MetricScopeSocket MetricScope = "socket"
MetricScopeMemoryDomain MetricScope = "memoryDomain"
MetricScopeCore MetricScope = "core"
MetricScopeHWThread MetricScope = "hwthread"
MetricScopeAccelerator MetricScope = "accelerator"
)
var metricScopeGranularity map[MetricScope]int = map[MetricScope]int{
MetricScopeNode: 10,
MetricScopeSocket: 5,
MetricScopeCore: 2,
MetricScopeHWThread: 1,
MetricScopeNode: 10,
MetricScopeSocket: 5,
MetricScopeMemoryDomain: 3,
MetricScopeCore: 2,
MetricScopeHWThread: 1,
MetricScopeAccelerator: 5, // Special/Randomly choosen