mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-01-26 03:19:06 +01:00
Adapt for accs in shared threshold s
This commit is contained in:
parent
ec581e3509
commit
58415ab5c3
@ -20,12 +20,12 @@
|
||||
const subclusterConfig = clusters.find((c) => c.name == job.cluster).subClusters.find((sc) => sc.name == job.subCluster)
|
||||
|
||||
const footprintMetrics = (job.numAcc !== 0)
|
||||
? (job.exclusive !== 1)
|
||||
? ['cpu_load', 'flops_any', 'acc_utilization']
|
||||
: ['cpu_load', 'flops_any', 'acc_utilization', 'mem_bw']
|
||||
: (job.exclusive !== 1)
|
||||
? ['cpu_load', 'flops_any', 'mem_used']
|
||||
: ['cpu_load', 'flops_any', 'mem_used', 'mem_bw']
|
||||
? (job.exclusive !== 1) // GPU
|
||||
? ['acc_utilization', 'acc_mem_used', 'nv_sm_clock', 'nv_mem_util'] // Shared
|
||||
: ['acc_utilization', 'acc_mem_used', 'nv_sm_clock', 'nv_mem_util'] // Exclusive
|
||||
: (job.exclusive !== 1) // CPU only
|
||||
? ['flops_any', 'mem_used'] // Shared
|
||||
: ['cpu_load', 'flops_any', 'mem_used', 'mem_bw'] // Exclusive
|
||||
|
||||
const footprintData = footprintMetrics.map((fm) => {
|
||||
// Mean: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
|
||||
@ -155,7 +155,13 @@
|
||||
} else if (metricConfig.aggregation === 'avg' ){
|
||||
return defaultThresholds
|
||||
} else if (metricConfig.aggregation === 'sum' ){
|
||||
const jobFraction = job.numHWThreads / subClusterConfig.topology.node.length
|
||||
let jobFraction = 0.0
|
||||
if (job.numAcc > 0) {
|
||||
jobFraction = job.numAcc / subClusterConfig.topology.accelerators.length
|
||||
} else if (job.numHWThreads > 0) {
|
||||
jobFraction = job.numHWThreads / subClusterConfig.topology.node.length
|
||||
}
|
||||
|
||||
return {
|
||||
peak: round((defaultThresholds.peak * jobFraction), 0),
|
||||
normal: round((defaultThresholds.normal * jobFraction), 0),
|
||||
|
@ -163,7 +163,8 @@
|
||||
subCluster={job.subCluster}
|
||||
isShared={(job.exclusive != 1)}
|
||||
resources={job.resources}
|
||||
hwthreads={job.numHWThreads}
|
||||
numhwthreads={job.numHWThreads}
|
||||
numaccs={job.numAcc}
|
||||
/>
|
||||
{:else if metric.disabled == true && metric.data}
|
||||
<Card body color="info">Metric disabled for subcluster <code>{metric.data.name}:{job.subCluster}</code></Card>
|
||||
|
@ -39,7 +39,8 @@
|
||||
export let subCluster
|
||||
export let isShared = false
|
||||
export let forNode = false
|
||||
export let hwthreads = 0
|
||||
export let numhwthreads = 0
|
||||
export let numaccs = 0
|
||||
|
||||
if (useStatsSeries == null)
|
||||
useStatsSeries = statisticsSeries != null
|
||||
@ -54,7 +55,7 @@
|
||||
const lineWidth = clusterCockpitConfig.plot_general_lineWidth / window.devicePixelRatio
|
||||
const lineColors = clusterCockpitConfig.plot_general_colorscheme
|
||||
const backgroundColors = { normal: 'rgba(255, 255, 255, 1.0)', caution: 'rgba(255, 128, 0, 0.3)', alert: 'rgba(255, 0, 0, 0.3)' }
|
||||
const thresholds = findThresholds(metricConfig, scope, typeof subCluster == 'string' ? cluster.subClusters.find(sc => sc.name == subCluster) : subCluster, isShared, hwthreads)
|
||||
const thresholds = findThresholds(metricConfig, scope, typeof subCluster == 'string' ? cluster.subClusters.find(sc => sc.name == subCluster) : subCluster, isShared, numhwthreads, numaccs)
|
||||
|
||||
// converts the legend into a simple tooltip
|
||||
function legendAsTooltipPlugin({ className, style = { backgroundColor:"rgba(255, 249, 196, 0.92)", color: "black" } } = {}) {
|
||||
@ -381,7 +382,7 @@
|
||||
}
|
||||
}
|
||||
|
||||
export function findThresholds(metricConfig, scope, subCluster, isShared, hwthreads) {
|
||||
export function findThresholds(metricConfig, scope, subCluster, isShared, numhwthreads, numaccs) {
|
||||
// console.log('NAME ' + metricConfig.name + ' / SCOPE ' + scope + ' / SUBCLUSTER ' + subCluster.name)
|
||||
if (!metricConfig || !scope || !subCluster) {
|
||||
console.warn('Argument missing for findThresholds!')
|
||||
@ -409,9 +410,13 @@
|
||||
}
|
||||
|
||||
let divisor = 1
|
||||
if (isShared == true && hwthreads > 0) { // Shared
|
||||
divisor = subCluster.topology.node.length / hwthreads
|
||||
} else if (scope == 'socket')
|
||||
if (isShared == true) { // Shared
|
||||
if (numaccs > 0) {
|
||||
divisor = subCluster.topology.accelerators.length / numaccs
|
||||
} else if (numhwthreads > 0) {
|
||||
divisor = subCluster.topology.node.length / numhwthreads
|
||||
}
|
||||
else if (scope == 'socket')
|
||||
divisor = subCluster.topology.socket.length
|
||||
else if (scope == 'core')
|
||||
divisor = subCluster.topology.core.length
|
||||
@ -419,7 +424,7 @@
|
||||
divisor = subCluster.topology.accelerators.length
|
||||
else if (scope == 'hwthread')
|
||||
divisor = subCluster.topology.node.length
|
||||
else {
|
||||
else
|
||||
// console.log('TODO: how to calc thresholds for ', scope)
|
||||
return null
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user