mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-26 13:29:05 +01:00
Adapt for accs in shared threshold s
This commit is contained in:
parent
ec581e3509
commit
58415ab5c3
@ -20,12 +20,12 @@
|
|||||||
const subclusterConfig = clusters.find((c) => c.name == job.cluster).subClusters.find((sc) => sc.name == job.subCluster)
|
const subclusterConfig = clusters.find((c) => c.name == job.cluster).subClusters.find((sc) => sc.name == job.subCluster)
|
||||||
|
|
||||||
const footprintMetrics = (job.numAcc !== 0)
|
const footprintMetrics = (job.numAcc !== 0)
|
||||||
? (job.exclusive !== 1)
|
? (job.exclusive !== 1) // GPU
|
||||||
? ['cpu_load', 'flops_any', 'acc_utilization']
|
? ['acc_utilization', 'acc_mem_used', 'nv_sm_clock', 'nv_mem_util'] // Shared
|
||||||
: ['cpu_load', 'flops_any', 'acc_utilization', 'mem_bw']
|
: ['acc_utilization', 'acc_mem_used', 'nv_sm_clock', 'nv_mem_util'] // Exclusive
|
||||||
: (job.exclusive !== 1)
|
: (job.exclusive !== 1) // CPU only
|
||||||
? ['cpu_load', 'flops_any', 'mem_used']
|
? ['flops_any', 'mem_used'] // Shared
|
||||||
: ['cpu_load', 'flops_any', 'mem_used', 'mem_bw']
|
: ['cpu_load', 'flops_any', 'mem_used', 'mem_bw'] // Exclusive
|
||||||
|
|
||||||
const footprintData = footprintMetrics.map((fm) => {
|
const footprintData = footprintMetrics.map((fm) => {
|
||||||
// Mean: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
|
// Mean: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
|
||||||
@ -155,7 +155,13 @@
|
|||||||
} else if (metricConfig.aggregation === 'avg' ){
|
} else if (metricConfig.aggregation === 'avg' ){
|
||||||
return defaultThresholds
|
return defaultThresholds
|
||||||
} else if (metricConfig.aggregation === 'sum' ){
|
} else if (metricConfig.aggregation === 'sum' ){
|
||||||
const jobFraction = job.numHWThreads / subClusterConfig.topology.node.length
|
let jobFraction = 0.0
|
||||||
|
if (job.numAcc > 0) {
|
||||||
|
jobFraction = job.numAcc / subClusterConfig.topology.accelerators.length
|
||||||
|
} else if (job.numHWThreads > 0) {
|
||||||
|
jobFraction = job.numHWThreads / subClusterConfig.topology.node.length
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
peak: round((defaultThresholds.peak * jobFraction), 0),
|
peak: round((defaultThresholds.peak * jobFraction), 0),
|
||||||
normal: round((defaultThresholds.normal * jobFraction), 0),
|
normal: round((defaultThresholds.normal * jobFraction), 0),
|
||||||
|
@ -163,7 +163,8 @@
|
|||||||
subCluster={job.subCluster}
|
subCluster={job.subCluster}
|
||||||
isShared={(job.exclusive != 1)}
|
isShared={(job.exclusive != 1)}
|
||||||
resources={job.resources}
|
resources={job.resources}
|
||||||
hwthreads={job.numHWThreads}
|
numhwthreads={job.numHWThreads}
|
||||||
|
numaccs={job.numAcc}
|
||||||
/>
|
/>
|
||||||
{:else if metric.disabled == true && metric.data}
|
{:else if metric.disabled == true && metric.data}
|
||||||
<Card body color="info">Metric disabled for subcluster <code>{metric.data.name}:{job.subCluster}</code></Card>
|
<Card body color="info">Metric disabled for subcluster <code>{metric.data.name}:{job.subCluster}</code></Card>
|
||||||
|
@ -39,7 +39,8 @@
|
|||||||
export let subCluster
|
export let subCluster
|
||||||
export let isShared = false
|
export let isShared = false
|
||||||
export let forNode = false
|
export let forNode = false
|
||||||
export let hwthreads = 0
|
export let numhwthreads = 0
|
||||||
|
export let numaccs = 0
|
||||||
|
|
||||||
if (useStatsSeries == null)
|
if (useStatsSeries == null)
|
||||||
useStatsSeries = statisticsSeries != null
|
useStatsSeries = statisticsSeries != null
|
||||||
@ -54,7 +55,7 @@
|
|||||||
const lineWidth = clusterCockpitConfig.plot_general_lineWidth / window.devicePixelRatio
|
const lineWidth = clusterCockpitConfig.plot_general_lineWidth / window.devicePixelRatio
|
||||||
const lineColors = clusterCockpitConfig.plot_general_colorscheme
|
const lineColors = clusterCockpitConfig.plot_general_colorscheme
|
||||||
const backgroundColors = { normal: 'rgba(255, 255, 255, 1.0)', caution: 'rgba(255, 128, 0, 0.3)', alert: 'rgba(255, 0, 0, 0.3)' }
|
const backgroundColors = { normal: 'rgba(255, 255, 255, 1.0)', caution: 'rgba(255, 128, 0, 0.3)', alert: 'rgba(255, 0, 0, 0.3)' }
|
||||||
const thresholds = findThresholds(metricConfig, scope, typeof subCluster == 'string' ? cluster.subClusters.find(sc => sc.name == subCluster) : subCluster, isShared, hwthreads)
|
const thresholds = findThresholds(metricConfig, scope, typeof subCluster == 'string' ? cluster.subClusters.find(sc => sc.name == subCluster) : subCluster, isShared, numhwthreads, numaccs)
|
||||||
|
|
||||||
// converts the legend into a simple tooltip
|
// converts the legend into a simple tooltip
|
||||||
function legendAsTooltipPlugin({ className, style = { backgroundColor:"rgba(255, 249, 196, 0.92)", color: "black" } } = {}) {
|
function legendAsTooltipPlugin({ className, style = { backgroundColor:"rgba(255, 249, 196, 0.92)", color: "black" } } = {}) {
|
||||||
@ -381,7 +382,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function findThresholds(metricConfig, scope, subCluster, isShared, hwthreads) {
|
export function findThresholds(metricConfig, scope, subCluster, isShared, numhwthreads, numaccs) {
|
||||||
// console.log('NAME ' + metricConfig.name + ' / SCOPE ' + scope + ' / SUBCLUSTER ' + subCluster.name)
|
// console.log('NAME ' + metricConfig.name + ' / SCOPE ' + scope + ' / SUBCLUSTER ' + subCluster.name)
|
||||||
if (!metricConfig || !scope || !subCluster) {
|
if (!metricConfig || !scope || !subCluster) {
|
||||||
console.warn('Argument missing for findThresholds!')
|
console.warn('Argument missing for findThresholds!')
|
||||||
@ -409,9 +410,13 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
let divisor = 1
|
let divisor = 1
|
||||||
if (isShared == true && hwthreads > 0) { // Shared
|
if (isShared == true) { // Shared
|
||||||
divisor = subCluster.topology.node.length / hwthreads
|
if (numaccs > 0) {
|
||||||
} else if (scope == 'socket')
|
divisor = subCluster.topology.accelerators.length / numaccs
|
||||||
|
} else if (numhwthreads > 0) {
|
||||||
|
divisor = subCluster.topology.node.length / numhwthreads
|
||||||
|
}
|
||||||
|
else if (scope == 'socket')
|
||||||
divisor = subCluster.topology.socket.length
|
divisor = subCluster.topology.socket.length
|
||||||
else if (scope == 'core')
|
else if (scope == 'core')
|
||||||
divisor = subCluster.topology.core.length
|
divisor = subCluster.topology.core.length
|
||||||
@ -419,7 +424,7 @@
|
|||||||
divisor = subCluster.topology.accelerators.length
|
divisor = subCluster.topology.accelerators.length
|
||||||
else if (scope == 'hwthread')
|
else if (scope == 'hwthread')
|
||||||
divisor = subCluster.topology.node.length
|
divisor = subCluster.topology.node.length
|
||||||
else {
|
else
|
||||||
// console.log('TODO: how to calc thresholds for ', scope)
|
// console.log('TODO: how to calc thresholds for ', scope)
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user