Fix: Simplify footprint logic, fix aggregated sum values

This commit is contained in:
Christoph Kluge 2024-03-18 18:57:15 +01:00
parent 849b7e038d
commit 60b56bd41a

View File

@ -23,38 +23,25 @@
: metricConfig.alert, : metricConfig.alert,
}; };
if (job.exclusive === 1) { // Job_Exclusivity does not matter, only aggregation
// Exclusive: Use as defined if (metricConfig.aggregation === "avg") {
return defaultThresholds; return defaultThresholds;
} else if (metricConfig.aggregation === "sum") {
const jobFraction =
job.numHWThreads / subClusterConfig.topology.node.length;
return {
peak: round(defaultThresholds.peak * jobFraction, 0),
normal: round(defaultThresholds.normal * jobFraction, 0),
caution: round(defaultThresholds.caution * jobFraction, 0),
alert: round(defaultThresholds.alert * jobFraction, 0),
};
} else { } else {
// Shared: Handle specifically console.warn(
if (metricConfig.name === "cpu_load") { "Missing or unkown aggregation mode (sum/avg) for metric:",
// Special: Avg Aggregation BUT scaled based on #hwthreads metricConfig,
return { );
peak: job.numHWThreads, return defaultThresholds;
normal: job.numHWThreads, }
caution: defaultThresholds.caution,
alert: defaultThresholds.alert,
};
} else if (metricConfig.aggregation === "avg") {
return defaultThresholds;
} else if (metricConfig.aggregation === "sum") {
const jobFraction =
job.numHWThreads / subClusterConfig.topology.node.length;
return {
peak: round(defaultThresholds.peak * jobFraction, 0),
normal: round(defaultThresholds.normal * jobFraction, 0),
caution: round(defaultThresholds.caution * jobFraction, 0),
alert: round(defaultThresholds.alert * jobFraction, 0),
};
} else {
console.warn(
"Missing or unkown aggregation mode (sum/avg) for metric:",
metricConfig,
);
return null;
}
} // Other job.exclusive cases?
} }
</script> </script>
@ -91,29 +78,6 @@
: ["cpu_load", "flops_any", "mem_used", "mem_bw"]; // Exclusive : ["cpu_load", "flops_any", "mem_used", "mem_bw"]; // Exclusive
const footprintData = footprintMetrics.map((fm) => { const footprintData = footprintMetrics.map((fm) => {
// Mean: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
let mv = null;
if (fm === "cpu_load" && job.loadAvg !== 0) {
mv = round(job.loadAvg, 2);
} else if (fm === "flops_any" && job.flopsAnyAvg !== 0) {
mv = round(job.flopsAnyAvg, 2);
} else if (fm === "mem_bw" && job.memBwAvg !== 0) {
mv = round(job.memBwAvg, 2);
} else {
// Calculate from jobMetrics
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
if (jm?.metric?.statisticsSeries) {
mv = round(mean(jm.metric.statisticsSeries.mean), 2);
} else if (jm?.metric?.series?.length > 1) {
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
mv = round(mean(avgs), 2);
} else if (jm?.metric?.series) {
mv = round(jm.metric.series[0].statistics.avg, 2);
} else {
mv = 0.0;
}
}
// Unit // Unit
const fmc = getContext("metrics")(job.cluster, fm); const fmc = getContext("metrics")(job.cluster, fm);
let unit = ""; let unit = "";
@ -123,6 +87,44 @@
const fmt = findJobThresholds(job, fmc, subclusterConfig); const fmt = findJobThresholds(job, fmc, subclusterConfig);
if (fm === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0); if (fm === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0);
// Value: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
// Exclusivity does not matter
let mv = 0.0;
if (fmc.aggregation === "avg") {
if (fm === "cpu_load" && job.loadAvg !== 0) {
mv = round(job.loadAvg, 2);
} else if (fm === "flops_any" && job.flopsAnyAvg !== 0) {
mv = round(job.flopsAnyAvg, 2);
} else if (fm === "mem_bw" && job.memBwAvg !== 0) {
mv = round(job.memBwAvg, 2);
} else {
// Calculate Avg from jobMetrics
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
if (jm?.metric?.statisticsSeries) {
mv = round(mean(jm.metric.statisticsSeries.mean), 2);
} else if (jm?.metric?.series?.length > 1) {
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
mv = round(mean(avgs), 2);
} else if (jm?.metric?.series) {
mv = round(jm.metric.series[0].statistics.avg, 2);
}
}
} else if (fmc.aggregation === "sum") {
// Calculate Sum from jobMetrics: Sum all node averages
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
if (jm?.metric?.series?.length > 1) { // More than 1 node
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
mv = round(avgs.reduce((a, b) => a + b, 0));
} else if (jm?.metric?.series) {
mv = round(jm.metric.series[0].statistics.avg, 2);
}
} else {
console.warn(
"Missing or unkown aggregation mode (sum/avg) for metric:",
metricConfig,
);
}
// Define basic data // Define basic data
const fmBase = { const fmBase = {
name: fm, name: fm,