diff --git a/web/frontend/src/JobFootprint.svelte b/web/frontend/src/JobFootprint.svelte index 30034c0..06597de 100644 --- a/web/frontend/src/JobFootprint.svelte +++ b/web/frontend/src/JobFootprint.svelte @@ -20,12 +20,12 @@ const subclusterConfig = clusters.find((c) => c.name == job.cluster).subClusters.find((sc) => sc.name == job.subCluster) const footprintMetrics = (job.numAcc !== 0) - ? (job.exclusive !== 1) - ? ['cpu_load', 'flops_any', 'acc_utilization'] - : ['cpu_load', 'flops_any', 'acc_utilization', 'mem_bw'] - : (job.exclusive !== 1) - ? ['cpu_load', 'flops_any', 'mem_used'] - : ['cpu_load', 'flops_any', 'mem_used', 'mem_bw'] + ? (job.exclusive !== 1) // GPU + ? ['acc_utilization', 'acc_mem_used', 'nv_sm_clock', 'nv_mem_util'] // Shared + : ['acc_utilization', 'acc_mem_used', 'nv_sm_clock', 'nv_mem_util'] // Exclusive + : (job.exclusive !== 1) // CPU only + ? ['flops_any', 'mem_used'] // Shared + : ['cpu_load', 'flops_any', 'mem_used', 'mem_bw'] // Exclusive const footprintData = footprintMetrics.map((fm) => { // Mean: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata @@ -155,7 +155,13 @@ } else if (metricConfig.aggregation === 'avg' ){ return defaultThresholds } else if (metricConfig.aggregation === 'sum' ){ - const jobFraction = job.numHWThreads / subClusterConfig.topology.node.length + let jobFraction = 0.0 + if (job.numAcc > 0) { + jobFraction = job.numAcc / subClusterConfig.topology.accelerators.length + } else if (job.numHWThreads > 0) { + jobFraction = job.numHWThreads / subClusterConfig.topology.node.length + } + return { peak: round((defaultThresholds.peak * jobFraction), 0), normal: round((defaultThresholds.normal * jobFraction), 0), diff --git a/web/frontend/src/joblist/Row.svelte b/web/frontend/src/joblist/Row.svelte index 4d9013c..c5bd515 100644 --- a/web/frontend/src/joblist/Row.svelte +++ b/web/frontend/src/joblist/Row.svelte @@ -163,7 +163,8 @@ subCluster={job.subCluster} isShared={(job.exclusive != 1)} resources={job.resources} - hwthreads={job.numHWThreads} + numhwthreads={job.numHWThreads} + numaccs={job.numAcc} /> {:else if metric.disabled == true && metric.data} Metric disabled for subcluster {metric.data.name}:{job.subCluster} diff --git a/web/frontend/src/plots/MetricPlot.svelte b/web/frontend/src/plots/MetricPlot.svelte index 7bd264c..e7ae5b1 100644 --- a/web/frontend/src/plots/MetricPlot.svelte +++ b/web/frontend/src/plots/MetricPlot.svelte @@ -39,7 +39,8 @@ export let subCluster export let isShared = false export let forNode = false - export let hwthreads = 0 + export let numhwthreads = 0 + export let numaccs = 0 if (useStatsSeries == null) useStatsSeries = statisticsSeries != null @@ -54,7 +55,7 @@ const lineWidth = clusterCockpitConfig.plot_general_lineWidth / window.devicePixelRatio const lineColors = clusterCockpitConfig.plot_general_colorscheme const backgroundColors = { normal: 'rgba(255, 255, 255, 1.0)', caution: 'rgba(255, 128, 0, 0.3)', alert: 'rgba(255, 0, 0, 0.3)' } - const thresholds = findThresholds(metricConfig, scope, typeof subCluster == 'string' ? cluster.subClusters.find(sc => sc.name == subCluster) : subCluster, isShared, hwthreads) + const thresholds = findThresholds(metricConfig, scope, typeof subCluster == 'string' ? cluster.subClusters.find(sc => sc.name == subCluster) : subCluster, isShared, numhwthreads, numaccs) // converts the legend into a simple tooltip function legendAsTooltipPlugin({ className, style = { backgroundColor:"rgba(255, 249, 196, 0.92)", color: "black" } } = {}) { @@ -381,7 +382,7 @@ } } - export function findThresholds(metricConfig, scope, subCluster, isShared, hwthreads) { + export function findThresholds(metricConfig, scope, subCluster, isShared, numhwthreads, numaccs) { // console.log('NAME ' + metricConfig.name + ' / SCOPE ' + scope + ' / SUBCLUSTER ' + subCluster.name) if (!metricConfig || !scope || !subCluster) { console.warn('Argument missing for findThresholds!') @@ -409,9 +410,13 @@ } let divisor = 1 - if (isShared == true && hwthreads > 0) { // Shared - divisor = subCluster.topology.node.length / hwthreads - } else if (scope == 'socket') + if (isShared == true) { // Shared + if (numaccs > 0) { + divisor = subCluster.topology.accelerators.length / numaccs + } else if (numhwthreads > 0) { + divisor = subCluster.topology.node.length / numhwthreads + } + else if (scope == 'socket') divisor = subCluster.topology.socket.length else if (scope == 'core') divisor = subCluster.topology.core.length @@ -419,7 +424,7 @@ divisor = subCluster.topology.accelerators.length else if (scope == 'hwthread') divisor = subCluster.topology.node.length - else { + else // console.log('TODO: how to calc thresholds for ', scope) return null }