Merge pull request #310 from ClusterCockpit/hotfix

Hotfix
This commit is contained in:
Jan Eitzinger 2024-12-04 18:12:19 +01:00 committed by GitHub
commit 5d2c350ce2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 141 additions and 82 deletions

View File

@ -29,7 +29,6 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}}, "analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_showFootprint": true, "job_view_showFootprint": true,
"job_list_usePaging": false, "job_list_usePaging": false,

View File

@ -445,14 +445,6 @@
"minItems": 1 "minItems": 1
} }
}, },
"job_view_polarPlotMetrics": {
"description": "Metrics shown in polar plot of single job view",
"type": "array",
"items": {
"type": "string",
"minItems": 1
}
},
"job_view_selectedMetrics": { "job_view_selectedMetrics": {
"description": "", "description": "",
"type": "array", "type": "array",
@ -492,7 +484,6 @@
"analysis_view_histogramMetrics", "analysis_view_histogramMetrics",
"analysis_view_scatterPlotMetrics", "analysis_view_scatterPlotMetrics",
"job_view_nodestats_selectedMetrics", "job_view_nodestats_selectedMetrics",
"job_view_polarPlotMetrics",
"job_view_selectedMetrics", "job_view_selectedMetrics",
"plot_general_colorscheme", "plot_general_colorscheme",
"plot_list_selectedMetrics" "plot_list_selectedMetrics"

View File

@ -139,9 +139,6 @@
return names; return names;
}, []) }, [])
), ),
...(ccconfig[`job_view_polarPlotMetrics:${job.cluster}`] ||
ccconfig[`job_view_polarPlotMetrics`]
),
...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] || ...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
ccconfig[`job_view_nodestats_selectedMetrics`] ccconfig[`job_view_nodestats_selectedMetrics`]
), ),

View File

@ -219,7 +219,7 @@
opts.push(`projectMatch=${filters.projectMatch}`); opts.push(`projectMatch=${filters.projectMatch}`);
if (filters.stats.length != 0) if (filters.stats.length != 0)
for (let stat of filters.stats) { for (let stat of filters.stats) {
opts.push(`stat=${stat?.field ? stat.field : stat.metricName}-${stat?.from ? stat.from : stat.range.from}-${stat?.to ? stat.to : stat.range.to}`); opts.push(`stat=${stat.field}-${stat.from}-${stat.to}`);
} }
if (opts.length == 0 && window.location.search.length <= 1) return; if (opts.length == 0 && window.location.search.length <= 1) return;
@ -390,7 +390,7 @@
{#if filters.stats.length > 0} {#if filters.stats.length > 0}
<Info icon="bar-chart" on:click={() => (isStatsOpen = true)}> <Info icon="bar-chart" on:click={() => (isStatsOpen = true)}>
{filters.stats {filters.stats
.map((stat) => `${stat?.text ? stat.text : stat.field}: ${stat?.from ? stat.from : stat.range.from} - ${stat?.to ? stat.to : stat.range.to}`) .map((stat) => `${stat.field}: ${stat.from} - ${stat.to}`)
.join(", ")} .join(", ")}
</Info> </Info>
{/if} {/if}

View File

@ -23,6 +23,25 @@
alert: metricConfig.alert alert: metricConfig.alert
}; };
/*
NEW: Footprints should be comparable: Always use Unchanged Single Node Thresholds, except for shared jobs.
HW Clocks, HW Temperatures and File/Net IO Thresholds will be scaled down too, even if they are independent.
'jf.stats' is one of: avg, min, max -> Always relative to one nodes' thresholds as configured.
*/
if (job.exclusive === 1) {
return defaultThresholds
} else {
const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster)
const jobFraction = job.numHWThreads / topol.node.length;
return {
peak: round(defaultThresholds.peak * jobFraction, 0),
normal: round(defaultThresholds.normal * jobFraction, 0),
caution: round(defaultThresholds.caution * jobFraction, 0),
alert: round(defaultThresholds.alert * jobFraction, 0),
};
}
/* OLD: Based on Metric Aggregation Setting
// Job_Exclusivity does not matter, only aggregation // Job_Exclusivity does not matter, only aggregation
if (metricConfig.aggregation === "avg") { if (metricConfig.aggregation === "avg") {
return defaultThresholds; return defaultThresholds;
@ -43,6 +62,7 @@
); );
return defaultThresholds; return defaultThresholds;
} }
*/
} }
</script> </script>
@ -89,21 +109,21 @@
return { return {
...fmBase, ...fmBase,
color: "danger", color: "danger",
message: `Metric average way ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`, message: `Footprint value way ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`,
impact: 3 impact: 3
}; };
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "caution")) { } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "caution")) {
return { return {
...fmBase, ...fmBase,
color: "warning", color: "warning",
message: `Metric average ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`, message: `Footprint value ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`,
impact: 2, impact: 2,
}; };
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "normal")) { } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "normal")) {
return { return {
...fmBase, ...fmBase,
color: "success", color: "success",
message: "Metric average within expected thresholds.", message: "Footprint value within expected thresholds.",
impact: 1, impact: 1,
}; };
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "peak")) { } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "peak")) {
@ -111,7 +131,7 @@
...fmBase, ...fmBase,
color: "info", color: "info",
message: message:
"Metric average above expected normal thresholds: Check for artifacts recommended.", "Footprint value above expected normal threshold: Check for artifacts recommended.",
impact: 0, impact: 0,
}; };
} else { } else {
@ -119,7 +139,7 @@
...fmBase, ...fmBase,
color: "secondary", color: "secondary",
message: message:
"Metric average above expected peak threshold: Check for artifacts!", "Footprint value above expected peak threshold: Check for artifacts!",
impact: -1, impact: -1,
}; };
} }
@ -136,25 +156,25 @@
return a.impact - b.impact || ((a.name > b.name) ? 1 : ((b.name > a.name) ? -1 : 0)); return a.impact - b.impact || ((a.name > b.name) ? 1 : ((b.name > a.name) ? -1 : 0));
});; });;
function evalFootprint(mean, thresholds, lowerIsBetter, level) { function evalFootprint(value, thresholds, lowerIsBetter, level) {
// Handle Metrics in which less value is better // Handle Metrics in which less value is better
switch (level) { switch (level) {
case "peak": case "peak":
if (lowerIsBetter) if (lowerIsBetter)
return false; // metric over peak -> return false to trigger impact -1 return false; // metric over peak -> return false to trigger impact -1
else return mean <= thresholds.peak && mean > thresholds.normal; else return value <= thresholds.peak && value > thresholds.normal;
case "alert": case "alert":
if (lowerIsBetter) if (lowerIsBetter)
return mean <= thresholds.peak && mean >= thresholds.alert; return value <= thresholds.peak && value >= thresholds.alert;
else return mean <= thresholds.alert && mean >= 0; else return value <= thresholds.alert && value >= 0;
case "caution": case "caution":
if (lowerIsBetter) if (lowerIsBetter)
return mean < thresholds.alert && mean >= thresholds.caution; return value < thresholds.alert && value >= thresholds.caution;
else return mean <= thresholds.caution && mean > thresholds.alert; else return value <= thresholds.caution && value > thresholds.alert;
case "normal": case "normal":
if (lowerIsBetter) if (lowerIsBetter)
return mean < thresholds.caution && mean >= 0; return value < thresholds.caution && value >= 0;
else return mean <= thresholds.normal && mean > thresholds.caution; else return value <= thresholds.normal && value > thresholds.caution;
default: default:
return false; return false;
} }
@ -181,10 +201,14 @@
> >
<div class="mx-1"> <div class="mx-1">
<!-- Alerts Only --> <!-- Alerts Only -->
{#if fpd.impact === 3 || fpd.impact === -1} {#if fpd.impact === 3}
<Icon name="exclamation-triangle-fill" class="text-danger" /> <Icon name="exclamation-triangle-fill" class="text-danger" />
{:else if fpd.impact === 2} {:else if fpd.impact === 2}
<Icon name="exclamation-triangle" class="text-warning" /> <Icon name="exclamation-triangle" class="text-warning" />
{:else if fpd.impact === 0}
<Icon name="info-circle" class="text-info" />
{:else if fpd.impact === -1}
<Icon name="info-circle-fill" class="text-danger" />
{/if} {/if}
<!-- Emoji for all states--> <!-- Emoji for all states-->
{#if fpd.impact === 3} {#if fpd.impact === 3}
@ -194,7 +218,7 @@
{:else if fpd.impact === 1} {:else if fpd.impact === 1}
<Icon name="emoji-smile" class="text-success" /> <Icon name="emoji-smile" class="text-success" />
{:else if fpd.impact === 0} {:else if fpd.impact === 0}
<Icon name="emoji-laughing" class="text-info" /> <Icon name="emoji-smile" class="text-info" />
{:else if fpd.impact === -1} {:else if fpd.impact === -1}
<Icon name="emoji-dizzy" class="text-danger" /> <Icon name="emoji-dizzy" class="text-danger" />
{/if} {/if}

View File

@ -45,7 +45,7 @@
if (footprintData) { if (footprintData) {
return footprintData.filter(fpd => { return footprintData.filter(fpd => {
if (!jobMetrics.find(m => m.name == fpd.name && m.scope == "node" || fpd.impact == 4)) { if (!jobMetrics.find(m => m.name == fpd.name && m.scope == "node" || fpd.impact == 4)) {
console.warn(`PolarPlot: No metric data (or config) for '${fpd.name}'`) console.warn(`PolarPlot: No metric data for '${fpd.name}'`)
return false return false
} }
return true return true
@ -72,6 +72,7 @@
const getMetricConfig = getContext("getMetricConfig"); const getMetricConfig = getContext("getMetricConfig");
const getValuesForStatGeneric = (getStat) => labels.map(name => { const getValuesForStatGeneric = (getStat) => labels.map(name => {
// TODO: Requires Scaling if Shared Job
const peak = getMetricConfig(cluster, subCluster, name).peak const peak = getMetricConfig(cluster, subCluster, name).peak
const metric = jobMetrics.find(m => m.name == name && m.scope == "node") const metric = jobMetrics.find(m => m.name == name && m.scope == "node")
const value = getStat(metric.metric) / peak const value = getStat(metric.metric) / peak
@ -79,6 +80,7 @@
}) })
const getValuesForStatFootprint = (getStat) => labels.map(name => { const getValuesForStatFootprint = (getStat) => labels.map(name => {
// FootprintData 'Peak' is pre-scaled for Shared Jobs in JobSummary Component
const peak = footprintData.find(fpd => fpd.name === name).peak const peak = footprintData.find(fpd => fpd.name === name).peak
const metric = jobMetrics.find(m => m.name == name && m.scope == "node") const metric = jobMetrics.find(m => m.name == name && m.scope == "node")
const value = getStat(metric.metric) / peak const value = getStat(metric.metric) / peak
@ -86,14 +88,21 @@
}) })
function getMax(metric) { function getMax(metric) {
let max = 0 let max = metric.series[0].statistics.max;
for (let series of metric.series) for (let series of metric.series)
max = Math.max(max, series.statistics.max) max = Math.max(max, series.statistics.max)
return max return max
} }
function getMin(metric) {
let min = metric.series[0].statistics.min;
for (let series of metric.series)
min = Math.min(min, series.statistics.min)
return min
}
function getAvg(metric) { function getAvg(metric) {
let avg = 0 let avg = 0;
for (let series of metric.series) for (let series of metric.series)
avg += series.statistics.avg avg += series.statistics.avg
return avg / metric.series.length return avg / metric.series.length
@ -104,6 +113,8 @@
return getValuesForStatGeneric(getAvg) return getValuesForStatGeneric(getAvg)
} else if (type === 'max') { } else if (type === 'max') {
return getValuesForStatGeneric(getMax) return getValuesForStatGeneric(getMax)
} else if (type === 'min') {
return getValuesForStatGeneric(getMin)
} }
console.log('Unknown Type For Polar Data') console.log('Unknown Type For Polar Data')
return [] return []
@ -114,6 +125,8 @@
return getValuesForStatFootprint(getAvg) return getValuesForStatFootprint(getAvg)
} else if (type === 'max') { } else if (type === 'max') {
return getValuesForStatFootprint(getMax) return getValuesForStatFootprint(getMax)
} else if (type === 'min') {
return getValuesForStatFootprint(getMin)
} }
console.log('Unknown Type For Polar Data') console.log('Unknown Type For Polar Data')
return [] return []
@ -124,25 +137,36 @@
datasets: [ datasets: [
{ {
label: 'Max', label: 'Max',
data: footprintData ? loadDataForFootprint('max') : loadDataGeneric('max'), // data: footprintData ? loadDataForFootprint('max') : loadDataGeneric('max'), // Node Scope Only
fill: 1, fill: 1,
backgroundColor: 'rgba(0, 102, 255, 0.25)', backgroundColor: 'rgba(0, 0, 255, 0.25)',
borderColor: 'rgb(0, 102, 255)', borderColor: 'rgb(0, 0, 255)',
pointBackgroundColor: 'rgb(0, 102, 255)', pointBackgroundColor: 'rgb(0, 0, 255)',
pointBorderColor: '#fff', pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff', pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(0, 102, 255)' pointHoverBorderColor: 'rgb(0, 0, 255)'
}, },
{ {
label: 'Avg', label: 'Avg',
data: footprintData ? loadDataForFootprint('avg') : loadDataGeneric('avg'), // getValuesForStat(getAvg) data: footprintData ? loadDataForFootprint('avg') : loadDataGeneric('avg'), // Node Scope Only
fill: true, fill: 2,
backgroundColor: 'rgba(255, 153, 0, 0.25)', backgroundColor: 'rgba(255, 210, 0, 0.25)',
borderColor: 'rgb(255, 153, 0)', borderColor: 'rgb(255, 210, 0)',
pointBackgroundColor: 'rgb(255, 153, 0)', pointBackgroundColor: 'rgb(255, 210, 0)',
pointBorderColor: '#fff', pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff', pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(255, 153, 0)' pointHoverBorderColor: 'rgb(255, 210, 0)'
},
{
label: 'Min',
data: footprintData ? loadDataForFootprint('min') : loadDataGeneric('min'), // Node Scope Only
fill: true,
backgroundColor: 'rgba(255, 0, 0, 0.25)',
borderColor: 'rgb(255, 0, 0)',
pointBackgroundColor: 'rgb(255, 0, 0)',
pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(255, 0, 0)'
} }
] ]
} }

View File

@ -23,6 +23,25 @@
alert: metricConfig.alert alert: metricConfig.alert
}; };
/*
NEW: Footprints should be comparable: Always use Unchanged Single Node Thresholds, except for shared jobs.
HW Clocks, HW Temperatures and File/Net IO Thresholds will be scaled down too, even if they are independent.
'jf.stats' is one of: avg, min, max -> Always relative to one nodes' thresholds as configured.
*/
if (job.exclusive === 1) {
return defaultThresholds
} else {
const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster)
const jobFraction = job.numHWThreads / topol.node.length;
return {
peak: round(defaultThresholds.peak * jobFraction, 0),
normal: round(defaultThresholds.normal * jobFraction, 0),
caution: round(defaultThresholds.caution * jobFraction, 0),
alert: round(defaultThresholds.alert * jobFraction, 0),
};
}
/* OLD: Based on Metric Aggregation Setting
// Job_Exclusivity does not matter, only aggregation // Job_Exclusivity does not matter, only aggregation
if (metricConfig.aggregation === "avg") { if (metricConfig.aggregation === "avg") {
return defaultThresholds; return defaultThresholds;
@ -43,6 +62,7 @@
); );
return defaultThresholds; return defaultThresholds;
} }
*/
} }
</script> </script>
@ -94,21 +114,21 @@
return { return {
...fmBase, ...fmBase,
color: "danger", color: "danger",
message: `Metric average way ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`, message: `Footprint value way ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`,
impact: 3 impact: 3
}; };
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "caution")) { } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "caution")) {
return { return {
...fmBase, ...fmBase,
color: "warning", color: "warning",
message: `Metric average ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`, message: `Footprint value ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`,
impact: 2, impact: 2,
}; };
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "normal")) { } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "normal")) {
return { return {
...fmBase, ...fmBase,
color: "success", color: "success",
message: "Metric average within expected thresholds.", message: "Footprint value within expected thresholds.",
impact: 1, impact: 1,
}; };
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "peak")) { } else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "peak")) {
@ -116,7 +136,7 @@
...fmBase, ...fmBase,
color: "info", color: "info",
message: message:
"Metric average above expected normal thresholds: Check for artifacts recommended.", "Footprint value above expected normal threshold: Check for artifacts recommended.",
impact: 0, impact: 0,
}; };
} else { } else {
@ -124,7 +144,7 @@
...fmBase, ...fmBase,
color: "secondary", color: "secondary",
message: message:
"Metric average above expected peak threshold: Check for artifacts!", "Footprint value above expected peak threshold: Check for artifacts!",
impact: -1, impact: -1,
}; };
} }
@ -142,25 +162,25 @@
return a.impact - b.impact || ((a.name > b.name) ? 1 : ((b.name > a.name) ? -1 : 0)); return a.impact - b.impact || ((a.name > b.name) ? 1 : ((b.name > a.name) ? -1 : 0));
});; });;
function evalFootprint(mean, thresholds, lowerIsBetter, level) { function evalFootprint(value, thresholds, lowerIsBetter, level) {
// Handle Metrics in which less value is better // Handle Metrics in which less value is better
switch (level) { switch (level) {
case "peak": case "peak":
if (lowerIsBetter) if (lowerIsBetter)
return false; // metric over peak -> return false to trigger impact -1 return false; // metric over peak -> return false to trigger impact -1
else return mean <= thresholds.peak && mean > thresholds.normal; else return value <= thresholds.peak && value > thresholds.normal;
case "alert": case "alert":
if (lowerIsBetter) if (lowerIsBetter)
return mean <= thresholds.peak && mean >= thresholds.alert; return value <= thresholds.peak && value >= thresholds.alert;
else return mean <= thresholds.alert && mean >= 0; else return value <= thresholds.alert && value >= 0;
case "caution": case "caution":
if (lowerIsBetter) if (lowerIsBetter)
return mean < thresholds.alert && mean >= thresholds.caution; return value < thresholds.alert && value >= thresholds.caution;
else return mean <= thresholds.caution && mean > thresholds.alert; else return value <= thresholds.caution && value > thresholds.alert;
case "normal": case "normal":
if (lowerIsBetter) if (lowerIsBetter)
return mean < thresholds.caution && mean >= 0; return value < thresholds.caution && value >= 0;
else return mean <= thresholds.normal && mean > thresholds.caution; else return value <= thresholds.normal && value > thresholds.caution;
default: default:
return false; return false;
} }
@ -244,10 +264,14 @@
id={`footprint-${job.jobId}-${index}`} id={`footprint-${job.jobId}-${index}`}
> >
<div class="mx-1"> <div class="mx-1">
{#if fpd.impact === 3 || fpd.impact === -1} {#if fpd.impact === 3}
<Icon name="exclamation-triangle-fill" class="text-danger" /> <Icon name="exclamation-triangle-fill" class="text-danger" />
{:else if fpd.impact === 2} {:else if fpd.impact === 2}
<Icon name="exclamation-triangle" class="text-warning" /> <Icon name="exclamation-triangle" class="text-warning" />
{:else if fpd.impact === 0}
<Icon name="info-circle" class="text-info" />
{:else if fpd.impact === -1}
<Icon name="info-circle-fill" class="text-danger" />
{/if} {/if}
{#if fpd.impact === 3} {#if fpd.impact === 3}
<Icon name="emoji-frown" class="text-danger" /> <Icon name="emoji-frown" class="text-danger" />
@ -256,7 +280,7 @@
{:else if fpd.impact === 1} {:else if fpd.impact === 1}
<Icon name="emoji-smile" class="text-success" /> <Icon name="emoji-smile" class="text-success" />
{:else if fpd.impact === 0} {:else if fpd.impact === 0}
<Icon name="emoji-laughing" class="text-info" /> <Icon name="emoji-smile" class="text-info" />
{:else if fpd.impact === -1} {:else if fpd.impact === -1}
<Icon name="emoji-dizzy" class="text-danger" /> <Icon name="emoji-dizzy" class="text-danger" />
{/if} {/if}