From b0c0d1550543d373c527dabb89e60876cef93e81 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 4 Dec 2024 10:55:29 +0100 Subject: [PATCH 1/5] fix stat filter url write --- web/frontend/src/generic/Filters.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/frontend/src/generic/Filters.svelte b/web/frontend/src/generic/Filters.svelte index 312135a..481211b 100644 --- a/web/frontend/src/generic/Filters.svelte +++ b/web/frontend/src/generic/Filters.svelte @@ -219,7 +219,7 @@ opts.push(`projectMatch=${filters.projectMatch}`); if (filters.stats.length != 0) for (let stat of filters.stats) { - opts.push(`stat=${stat?.field ? stat.field : stat.metricName}-${stat?.from ? stat.from : stat.range.from}-${stat?.to ? stat.to : stat.range.to}`); + opts.push(`stat=${stat.field}-${stat.from}-${stat.to}`); } if (opts.length == 0 && window.location.search.length <= 1) return; @@ -390,7 +390,7 @@ {#if filters.stats.length > 0} (isStatsOpen = true)}> {filters.stats - .map((stat) => `${stat?.text ? stat.text : stat.field}: ${stat?.from ? stat.from : stat.range.from} - ${stat?.to ? stat.to : stat.range.to}`) + .map((stat) => `${stat.field}: ${stat.from} - ${stat.to}`) .join(", ")} {/if} From ab07c7928f4b640fb9f751b51e472d74b2132654 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 4 Dec 2024 13:56:00 +0100 Subject: [PATCH 2/5] fix: fix footprint logic, do not scale thresholds on multi node jobs --- .../src/generic/helper/JobFootprint.svelte | 58 +++++++++++++------ web/frontend/src/job/JobSummary.svelte | 58 +++++++++++++------ 2 files changed, 78 insertions(+), 38 deletions(-) diff --git a/web/frontend/src/generic/helper/JobFootprint.svelte b/web/frontend/src/generic/helper/JobFootprint.svelte index b6087a7..dd56a95 100644 --- a/web/frontend/src/generic/helper/JobFootprint.svelte +++ b/web/frontend/src/generic/helper/JobFootprint.svelte @@ -23,26 +23,46 @@ alert: metricConfig.alert }; - // Job_Exclusivity does not matter, only aggregation - if (metricConfig.aggregation === "avg") { - return defaultThresholds; - } else if (metricConfig.aggregation === "sum") { + /* + NEW: Footprints should be comparable: Always use Unchanged Single Node Thresholds, except for shared jobs. + HW Clocks, HW Temperatures and File/Net IO Thresholds will be scaled down too, even if they are independent. + 'jf.stats' is one of: avg, min, max -> Always relative to one nodes' thresholds as configured. + */ + if (job.exclusive === 1) { + return defaultThresholds + } else { const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster) const jobFraction = job.numHWThreads / topol.node.length; - return { peak: round(defaultThresholds.peak * jobFraction, 0), normal: round(defaultThresholds.normal * jobFraction, 0), caution: round(defaultThresholds.caution * jobFraction, 0), alert: round(defaultThresholds.alert * jobFraction, 0), }; - } else { - console.warn( - "Missing or unkown aggregation mode (sum/avg) for metric:", - metricConfig, - ); - return defaultThresholds; } + + /* OLD: Based on Metric Aggregation Setting + // Job_Exclusivity does not matter, only aggregation + if (metricConfig.aggregation === "avg") { + return defaultThresholds; + } else if (metricConfig.aggregation === "sum") { + const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster) + const jobFraction = job.numHWThreads / topol.node.length; + + return { + peak: round(defaultThresholds.peak * jobFraction, 0), + normal: round(defaultThresholds.normal * jobFraction, 0), + caution: round(defaultThresholds.caution * jobFraction, 0), + alert: round(defaultThresholds.alert * jobFraction, 0), + }; + } else { + console.warn( + "Missing or unkown aggregation mode (sum/avg) for metric:", + metricConfig, + ); + return defaultThresholds; + } + */ } @@ -136,25 +156,25 @@ return a.impact - b.impact || ((a.name > b.name) ? 1 : ((b.name > a.name) ? -1 : 0)); });; - function evalFootprint(mean, thresholds, lowerIsBetter, level) { + function evalFootprint(value, thresholds, lowerIsBetter, level) { // Handle Metrics in which less value is better switch (level) { case "peak": if (lowerIsBetter) return false; // metric over peak -> return false to trigger impact -1 - else return mean <= thresholds.peak && mean > thresholds.normal; + else return value <= thresholds.peak && value > thresholds.normal; case "alert": if (lowerIsBetter) - return mean <= thresholds.peak && mean >= thresholds.alert; - else return mean <= thresholds.alert && mean >= 0; + return value <= thresholds.peak && value >= thresholds.alert; + else return value <= thresholds.alert && value >= 0; case "caution": if (lowerIsBetter) - return mean < thresholds.alert && mean >= thresholds.caution; - else return mean <= thresholds.caution && mean > thresholds.alert; + return value < thresholds.alert && value >= thresholds.caution; + else return value <= thresholds.caution && value > thresholds.alert; case "normal": if (lowerIsBetter) - return mean < thresholds.caution && mean >= 0; - else return mean <= thresholds.normal && mean > thresholds.caution; + return value < thresholds.caution && value >= 0; + else return value <= thresholds.normal && value > thresholds.caution; default: return false; } diff --git a/web/frontend/src/job/JobSummary.svelte b/web/frontend/src/job/JobSummary.svelte index f2295f5..303782f 100644 --- a/web/frontend/src/job/JobSummary.svelte +++ b/web/frontend/src/job/JobSummary.svelte @@ -23,26 +23,46 @@ alert: metricConfig.alert }; - // Job_Exclusivity does not matter, only aggregation - if (metricConfig.aggregation === "avg") { - return defaultThresholds; - } else if (metricConfig.aggregation === "sum") { + /* + NEW: Footprints should be comparable: Always use Unchanged Single Node Thresholds, except for shared jobs. + HW Clocks, HW Temperatures and File/Net IO Thresholds will be scaled down too, even if they are independent. + 'jf.stats' is one of: avg, min, max -> Always relative to one nodes' thresholds as configured. + */ + if (job.exclusive === 1) { + return defaultThresholds + } else { const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster) const jobFraction = job.numHWThreads / topol.node.length; - return { peak: round(defaultThresholds.peak * jobFraction, 0), normal: round(defaultThresholds.normal * jobFraction, 0), caution: round(defaultThresholds.caution * jobFraction, 0), alert: round(defaultThresholds.alert * jobFraction, 0), }; - } else { - console.warn( - "Missing or unkown aggregation mode (sum/avg) for metric:", - metricConfig, - ); - return defaultThresholds; } + + /* OLD: Based on Metric Aggregation Setting + // Job_Exclusivity does not matter, only aggregation + if (metricConfig.aggregation === "avg") { + return defaultThresholds; + } else if (metricConfig.aggregation === "sum") { + const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster) + const jobFraction = job.numHWThreads / topol.node.length; + + return { + peak: round(defaultThresholds.peak * jobFraction, 0), + normal: round(defaultThresholds.normal * jobFraction, 0), + caution: round(defaultThresholds.caution * jobFraction, 0), + alert: round(defaultThresholds.alert * jobFraction, 0), + }; + } else { + console.warn( + "Missing or unkown aggregation mode (sum/avg) for metric:", + metricConfig, + ); + return defaultThresholds; + } + */ } @@ -142,25 +162,25 @@ return a.impact - b.impact || ((a.name > b.name) ? 1 : ((b.name > a.name) ? -1 : 0)); });; - function evalFootprint(mean, thresholds, lowerIsBetter, level) { + function evalFootprint(value, thresholds, lowerIsBetter, level) { // Handle Metrics in which less value is better switch (level) { case "peak": if (lowerIsBetter) return false; // metric over peak -> return false to trigger impact -1 - else return mean <= thresholds.peak && mean > thresholds.normal; + else return value <= thresholds.peak && value > thresholds.normal; case "alert": if (lowerIsBetter) - return mean <= thresholds.peak && mean >= thresholds.alert; - else return mean <= thresholds.alert && mean >= 0; + return value <= thresholds.peak && value >= thresholds.alert; + else return value <= thresholds.alert && value >= 0; case "caution": if (lowerIsBetter) - return mean < thresholds.alert && mean >= thresholds.caution; - else return mean <= thresholds.caution && mean > thresholds.alert; + return value < thresholds.alert && value >= thresholds.caution; + else return value <= thresholds.caution && value > thresholds.alert; case "normal": if (lowerIsBetter) - return mean < thresholds.caution && mean >= 0; - else return mean <= thresholds.normal && mean > thresholds.caution; + return value < thresholds.caution && value >= 0; + else return value <= thresholds.normal && value > thresholds.caution; default: return false; } From a7395ed45bb3ea623550be4bbebf221757eb8cc3 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 4 Dec 2024 13:57:05 +0100 Subject: [PATCH 3/5] remove config for polarPlotMetrics --- internal/config/config.go | 1 - pkg/schema/schemas/config.schema.json | 9 --------- web/frontend/src/Job.root.svelte | 3 --- 3 files changed, 13 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 1ba49cf..4f1a8c3 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -29,7 +29,6 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{ "analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}}, "job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"}, - "job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "job_view_showFootprint": true, "job_list_usePaging": false, diff --git a/pkg/schema/schemas/config.schema.json b/pkg/schema/schemas/config.schema.json index c04dd74..0a3905a 100644 --- a/pkg/schema/schemas/config.schema.json +++ b/pkg/schema/schemas/config.schema.json @@ -445,14 +445,6 @@ "minItems": 1 } }, - "job_view_polarPlotMetrics": { - "description": "Metrics shown in polar plot of single job view", - "type": "array", - "items": { - "type": "string", - "minItems": 1 - } - }, "job_view_selectedMetrics": { "description": "", "type": "array", @@ -492,7 +484,6 @@ "analysis_view_histogramMetrics", "analysis_view_scatterPlotMetrics", "job_view_nodestats_selectedMetrics", - "job_view_polarPlotMetrics", "job_view_selectedMetrics", "plot_general_colorscheme", "plot_list_selectedMetrics" diff --git a/web/frontend/src/Job.root.svelte b/web/frontend/src/Job.root.svelte index bb48479..ad9a0c7 100644 --- a/web/frontend/src/Job.root.svelte +++ b/web/frontend/src/Job.root.svelte @@ -139,9 +139,6 @@ return names; }, []) ), - ...(ccconfig[`job_view_polarPlotMetrics:${job.cluster}`] || - ccconfig[`job_view_polarPlotMetrics`] - ), ...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] || ccconfig[`job_view_nodestats_selectedMetrics`] ), From 257250714d9a4835eca5316d5716a843a53a7527 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 4 Dec 2024 15:22:19 +0100 Subject: [PATCH 4/5] review polar plot component, adds min dataset --- web/frontend/src/generic/plots/Polar.svelte | 52 +++++++++++++++------ 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/web/frontend/src/generic/plots/Polar.svelte b/web/frontend/src/generic/plots/Polar.svelte index b55e4f0..7e6957c 100644 --- a/web/frontend/src/generic/plots/Polar.svelte +++ b/web/frontend/src/generic/plots/Polar.svelte @@ -45,7 +45,7 @@ if (footprintData) { return footprintData.filter(fpd => { if (!jobMetrics.find(m => m.name == fpd.name && m.scope == "node" || fpd.impact == 4)) { - console.warn(`PolarPlot: No metric data (or config) for '${fpd.name}'`) + console.warn(`PolarPlot: No metric data for '${fpd.name}'`) return false } return true @@ -72,6 +72,7 @@ const getMetricConfig = getContext("getMetricConfig"); const getValuesForStatGeneric = (getStat) => labels.map(name => { + // TODO: Requires Scaling if Shared Job const peak = getMetricConfig(cluster, subCluster, name).peak const metric = jobMetrics.find(m => m.name == name && m.scope == "node") const value = getStat(metric.metric) / peak @@ -79,6 +80,7 @@ }) const getValuesForStatFootprint = (getStat) => labels.map(name => { + // FootprintData 'Peak' is pre-scaled for Shared Jobs in JobSummary Component const peak = footprintData.find(fpd => fpd.name === name).peak const metric = jobMetrics.find(m => m.name == name && m.scope == "node") const value = getStat(metric.metric) / peak @@ -86,14 +88,21 @@ }) function getMax(metric) { - let max = 0 + let max = metric.series[0].statistics.max; for (let series of metric.series) max = Math.max(max, series.statistics.max) return max } + function getMin(metric) { + let min = metric.series[0].statistics.min; + for (let series of metric.series) + min = Math.min(min, series.statistics.min) + return min + } + function getAvg(metric) { - let avg = 0 + let avg = 0; for (let series of metric.series) avg += series.statistics.avg return avg / metric.series.length @@ -104,6 +113,8 @@ return getValuesForStatGeneric(getAvg) } else if (type === 'max') { return getValuesForStatGeneric(getMax) + } else if (type === 'min') { + return getValuesForStatGeneric(getMin) } console.log('Unknown Type For Polar Data') return [] @@ -114,6 +125,8 @@ return getValuesForStatFootprint(getAvg) } else if (type === 'max') { return getValuesForStatFootprint(getMax) + } else if (type === 'min') { + return getValuesForStatFootprint(getMin) } console.log('Unknown Type For Polar Data') return [] @@ -124,25 +137,36 @@ datasets: [ { label: 'Max', - data: footprintData ? loadDataForFootprint('max') : loadDataGeneric('max'), // + data: footprintData ? loadDataForFootprint('max') : loadDataGeneric('max'), // Node Scope Only fill: 1, - backgroundColor: 'rgba(0, 102, 255, 0.25)', - borderColor: 'rgb(0, 102, 255)', - pointBackgroundColor: 'rgb(0, 102, 255)', + backgroundColor: 'rgba(0, 0, 255, 0.25)', + borderColor: 'rgb(0, 0, 255)', + pointBackgroundColor: 'rgb(0, 0, 255)', pointBorderColor: '#fff', pointHoverBackgroundColor: '#fff', - pointHoverBorderColor: 'rgb(0, 102, 255)' + pointHoverBorderColor: 'rgb(0, 0, 255)' }, { label: 'Avg', - data: footprintData ? loadDataForFootprint('avg') : loadDataGeneric('avg'), // getValuesForStat(getAvg) - fill: true, - backgroundColor: 'rgba(255, 153, 0, 0.25)', - borderColor: 'rgb(255, 153, 0)', - pointBackgroundColor: 'rgb(255, 153, 0)', + data: footprintData ? loadDataForFootprint('avg') : loadDataGeneric('avg'), // Node Scope Only + fill: 2, + backgroundColor: 'rgba(255, 210, 0, 0.25)', + borderColor: 'rgb(255, 210, 0)', + pointBackgroundColor: 'rgb(255, 210, 0)', pointBorderColor: '#fff', pointHoverBackgroundColor: '#fff', - pointHoverBorderColor: 'rgb(255, 153, 0)' + pointHoverBorderColor: 'rgb(255, 210, 0)' + }, + { + label: 'Min', + data: footprintData ? loadDataForFootprint('min') : loadDataGeneric('min'), // Node Scope Only + fill: true, + backgroundColor: 'rgba(255, 0, 0, 0.25)', + borderColor: 'rgb(255, 0, 0)', + pointBackgroundColor: 'rgb(255, 0, 0)', + pointBorderColor: '#fff', + pointHoverBackgroundColor: '#fff', + pointHoverBorderColor: 'rgb(255, 0, 0)' } ] } From 01c06728ebf6c4dc1957251dd1fc59d6b25ca79f Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 4 Dec 2024 16:09:06 +0100 Subject: [PATCH 5/5] review footprint iconography and messages --- .../src/generic/helper/JobFootprint.svelte | 20 +++++++++++-------- web/frontend/src/job/JobSummary.svelte | 18 ++++++++++------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/web/frontend/src/generic/helper/JobFootprint.svelte b/web/frontend/src/generic/helper/JobFootprint.svelte index dd56a95..187eff9 100644 --- a/web/frontend/src/generic/helper/JobFootprint.svelte +++ b/web/frontend/src/generic/helper/JobFootprint.svelte @@ -109,21 +109,21 @@ return { ...fmBase, color: "danger", - message: `Metric average way ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`, + message: `Footprint value way ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`, impact: 3 }; } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "caution")) { return { ...fmBase, color: "warning", - message: `Metric average ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`, + message: `Footprint value ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`, impact: 2, }; } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "normal")) { return { ...fmBase, color: "success", - message: "Metric average within expected thresholds.", + message: "Footprint value within expected thresholds.", impact: 1, }; } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "peak")) { @@ -131,7 +131,7 @@ ...fmBase, color: "info", message: - "Metric average above expected normal thresholds: Check for artifacts recommended.", + "Footprint value above expected normal threshold: Check for artifacts recommended.", impact: 0, }; } else { @@ -139,7 +139,7 @@ ...fmBase, color: "secondary", message: - "Metric average above expected peak threshold: Check for artifacts!", + "Footprint value above expected peak threshold: Check for artifacts!", impact: -1, }; } @@ -201,10 +201,14 @@ >
- {#if fpd.impact === 3 || fpd.impact === -1} - + {#if fpd.impact === 3} + {:else if fpd.impact === 2} + {:else if fpd.impact === 0} + + {:else if fpd.impact === -1} + {/if} {#if fpd.impact === 3} @@ -214,7 +218,7 @@ {:else if fpd.impact === 1} {:else if fpd.impact === 0} - + {:else if fpd.impact === -1} {/if} diff --git a/web/frontend/src/job/JobSummary.svelte b/web/frontend/src/job/JobSummary.svelte index 303782f..f7772c7 100644 --- a/web/frontend/src/job/JobSummary.svelte +++ b/web/frontend/src/job/JobSummary.svelte @@ -114,21 +114,21 @@ return { ...fmBase, color: "danger", - message: `Metric average way ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`, + message: `Footprint value way ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`, impact: 3 }; } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "caution")) { return { ...fmBase, color: "warning", - message: `Metric average ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`, + message: `Footprint value ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`, impact: 2, }; } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "normal")) { return { ...fmBase, color: "success", - message: "Metric average within expected thresholds.", + message: "Footprint value within expected thresholds.", impact: 1, }; } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "peak")) { @@ -136,7 +136,7 @@ ...fmBase, color: "info", message: - "Metric average above expected normal thresholds: Check for artifacts recommended.", + "Footprint value above expected normal threshold: Check for artifacts recommended.", impact: 0, }; } else { @@ -144,7 +144,7 @@ ...fmBase, color: "secondary", message: - "Metric average above expected peak threshold: Check for artifacts!", + "Footprint value above expected peak threshold: Check for artifacts!", impact: -1, }; } @@ -264,10 +264,14 @@ id={`footprint-${job.jobId}-${index}`} >
- {#if fpd.impact === 3 || fpd.impact === -1} + {#if fpd.impact === 3} {:else if fpd.impact === 2} + {:else if fpd.impact === 0} + + {:else if fpd.impact === -1} + {/if} {#if fpd.impact === 3} @@ -276,7 +280,7 @@ {:else if fpd.impact === 1} {:else if fpd.impact === 0} - + {:else if fpd.impact === -1} {/if}