Merge pull request #310 from ClusterCockpit/hotfix

Hotfix
This commit is contained in:
Jan Eitzinger 2024-12-04 18:12:19 +01:00 committed by GitHub
commit 5d2c350ce2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 141 additions and 82 deletions

View File

@ -29,7 +29,6 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_showFootprint": true,
"job_list_usePaging": false,

View File

@ -445,14 +445,6 @@
"minItems": 1
}
},
"job_view_polarPlotMetrics": {
"description": "Metrics shown in polar plot of single job view",
"type": "array",
"items": {
"type": "string",
"minItems": 1
}
},
"job_view_selectedMetrics": {
"description": "",
"type": "array",
@ -492,7 +484,6 @@
"analysis_view_histogramMetrics",
"analysis_view_scatterPlotMetrics",
"job_view_nodestats_selectedMetrics",
"job_view_polarPlotMetrics",
"job_view_selectedMetrics",
"plot_general_colorscheme",
"plot_list_selectedMetrics"

View File

@ -139,9 +139,6 @@
return names;
}, [])
),
...(ccconfig[`job_view_polarPlotMetrics:${job.cluster}`] ||
ccconfig[`job_view_polarPlotMetrics`]
),
...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
ccconfig[`job_view_nodestats_selectedMetrics`]
),

View File

@ -219,7 +219,7 @@
opts.push(`projectMatch=${filters.projectMatch}`);
if (filters.stats.length != 0)
for (let stat of filters.stats) {
opts.push(`stat=${stat?.field ? stat.field : stat.metricName}-${stat?.from ? stat.from : stat.range.from}-${stat?.to ? stat.to : stat.range.to}`);
opts.push(`stat=${stat.field}-${stat.from}-${stat.to}`);
}
if (opts.length == 0 && window.location.search.length <= 1) return;
@ -390,7 +390,7 @@
{#if filters.stats.length > 0}
<Info icon="bar-chart" on:click={() => (isStatsOpen = true)}>
{filters.stats
.map((stat) => `${stat?.text ? stat.text : stat.field}: ${stat?.from ? stat.from : stat.range.from} - ${stat?.to ? stat.to : stat.range.to}`)
.map((stat) => `${stat.field}: ${stat.from} - ${stat.to}`)
.join(", ")}
</Info>
{/if}

View File

@ -23,6 +23,25 @@
alert: metricConfig.alert
};
/*
NEW: Footprints should be comparable: Always use Unchanged Single Node Thresholds, except for shared jobs.
HW Clocks, HW Temperatures and File/Net IO Thresholds will be scaled down too, even if they are independent.
'jf.stats' is one of: avg, min, max -> Always relative to one nodes' thresholds as configured.
*/
if (job.exclusive === 1) {
return defaultThresholds
} else {
const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster)
const jobFraction = job.numHWThreads / topol.node.length;
return {
peak: round(defaultThresholds.peak * jobFraction, 0),
normal: round(defaultThresholds.normal * jobFraction, 0),
caution: round(defaultThresholds.caution * jobFraction, 0),
alert: round(defaultThresholds.alert * jobFraction, 0),
};
}
/* OLD: Based on Metric Aggregation Setting
// Job_Exclusivity does not matter, only aggregation
if (metricConfig.aggregation === "avg") {
return defaultThresholds;
@ -43,6 +62,7 @@
);
return defaultThresholds;
}
*/
}
</script>
@ -89,21 +109,21 @@
return {
...fmBase,
color: "danger",
message: `Metric average way ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`,
message: `Footprint value way ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`,
impact: 3
};
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "caution")) {
return {
...fmBase,
color: "warning",
message: `Metric average ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`,
message: `Footprint value ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`,
impact: 2,
};
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "normal")) {
return {
...fmBase,
color: "success",
message: "Metric average within expected thresholds.",
message: "Footprint value within expected thresholds.",
impact: 1,
};
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "peak")) {
@ -111,7 +131,7 @@
...fmBase,
color: "info",
message:
"Metric average above expected normal thresholds: Check for artifacts recommended.",
"Footprint value above expected normal threshold: Check for artifacts recommended.",
impact: 0,
};
} else {
@ -119,7 +139,7 @@
...fmBase,
color: "secondary",
message:
"Metric average above expected peak threshold: Check for artifacts!",
"Footprint value above expected peak threshold: Check for artifacts!",
impact: -1,
};
}
@ -136,25 +156,25 @@
return a.impact - b.impact || ((a.name > b.name) ? 1 : ((b.name > a.name) ? -1 : 0));
});;
function evalFootprint(mean, thresholds, lowerIsBetter, level) {
function evalFootprint(value, thresholds, lowerIsBetter, level) {
// Handle Metrics in which less value is better
switch (level) {
case "peak":
if (lowerIsBetter)
return false; // metric over peak -> return false to trigger impact -1
else return mean <= thresholds.peak && mean > thresholds.normal;
else return value <= thresholds.peak && value > thresholds.normal;
case "alert":
if (lowerIsBetter)
return mean <= thresholds.peak && mean >= thresholds.alert;
else return mean <= thresholds.alert && mean >= 0;
return value <= thresholds.peak && value >= thresholds.alert;
else return value <= thresholds.alert && value >= 0;
case "caution":
if (lowerIsBetter)
return mean < thresholds.alert && mean >= thresholds.caution;
else return mean <= thresholds.caution && mean > thresholds.alert;
return value < thresholds.alert && value >= thresholds.caution;
else return value <= thresholds.caution && value > thresholds.alert;
case "normal":
if (lowerIsBetter)
return mean < thresholds.caution && mean >= 0;
else return mean <= thresholds.normal && mean > thresholds.caution;
return value < thresholds.caution && value >= 0;
else return value <= thresholds.normal && value > thresholds.caution;
default:
return false;
}
@ -181,10 +201,14 @@
>
<div class="mx-1">
<!-- Alerts Only -->
{#if fpd.impact === 3 || fpd.impact === -1}
{#if fpd.impact === 3}
<Icon name="exclamation-triangle-fill" class="text-danger" />
{:else if fpd.impact === 2}
<Icon name="exclamation-triangle" class="text-warning" />
{:else if fpd.impact === 0}
<Icon name="info-circle" class="text-info" />
{:else if fpd.impact === -1}
<Icon name="info-circle-fill" class="text-danger" />
{/if}
<!-- Emoji for all states-->
{#if fpd.impact === 3}
@ -194,7 +218,7 @@
{:else if fpd.impact === 1}
<Icon name="emoji-smile" class="text-success" />
{:else if fpd.impact === 0}
<Icon name="emoji-laughing" class="text-info" />
<Icon name="emoji-smile" class="text-info" />
{:else if fpd.impact === -1}
<Icon name="emoji-dizzy" class="text-danger" />
{/if}

View File

@ -45,7 +45,7 @@
if (footprintData) {
return footprintData.filter(fpd => {
if (!jobMetrics.find(m => m.name == fpd.name && m.scope == "node" || fpd.impact == 4)) {
console.warn(`PolarPlot: No metric data (or config) for '${fpd.name}'`)
console.warn(`PolarPlot: No metric data for '${fpd.name}'`)
return false
}
return true
@ -72,6 +72,7 @@
const getMetricConfig = getContext("getMetricConfig");
const getValuesForStatGeneric = (getStat) => labels.map(name => {
// TODO: Requires Scaling if Shared Job
const peak = getMetricConfig(cluster, subCluster, name).peak
const metric = jobMetrics.find(m => m.name == name && m.scope == "node")
const value = getStat(metric.metric) / peak
@ -79,6 +80,7 @@
})
const getValuesForStatFootprint = (getStat) => labels.map(name => {
// FootprintData 'Peak' is pre-scaled for Shared Jobs in JobSummary Component
const peak = footprintData.find(fpd => fpd.name === name).peak
const metric = jobMetrics.find(m => m.name == name && m.scope == "node")
const value = getStat(metric.metric) / peak
@ -86,14 +88,21 @@
})
function getMax(metric) {
let max = 0
let max = metric.series[0].statistics.max;
for (let series of metric.series)
max = Math.max(max, series.statistics.max)
return max
}
function getMin(metric) {
let min = metric.series[0].statistics.min;
for (let series of metric.series)
min = Math.min(min, series.statistics.min)
return min
}
function getAvg(metric) {
let avg = 0
let avg = 0;
for (let series of metric.series)
avg += series.statistics.avg
return avg / metric.series.length
@ -104,6 +113,8 @@
return getValuesForStatGeneric(getAvg)
} else if (type === 'max') {
return getValuesForStatGeneric(getMax)
} else if (type === 'min') {
return getValuesForStatGeneric(getMin)
}
console.log('Unknown Type For Polar Data')
return []
@ -114,6 +125,8 @@
return getValuesForStatFootprint(getAvg)
} else if (type === 'max') {
return getValuesForStatFootprint(getMax)
} else if (type === 'min') {
return getValuesForStatFootprint(getMin)
}
console.log('Unknown Type For Polar Data')
return []
@ -124,25 +137,36 @@
datasets: [
{
label: 'Max',
data: footprintData ? loadDataForFootprint('max') : loadDataGeneric('max'), //
data: footprintData ? loadDataForFootprint('max') : loadDataGeneric('max'), // Node Scope Only
fill: 1,
backgroundColor: 'rgba(0, 102, 255, 0.25)',
borderColor: 'rgb(0, 102, 255)',
pointBackgroundColor: 'rgb(0, 102, 255)',
backgroundColor: 'rgba(0, 0, 255, 0.25)',
borderColor: 'rgb(0, 0, 255)',
pointBackgroundColor: 'rgb(0, 0, 255)',
pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(0, 102, 255)'
pointHoverBorderColor: 'rgb(0, 0, 255)'
},
{
label: 'Avg',
data: footprintData ? loadDataForFootprint('avg') : loadDataGeneric('avg'), // getValuesForStat(getAvg)
fill: true,
backgroundColor: 'rgba(255, 153, 0, 0.25)',
borderColor: 'rgb(255, 153, 0)',
pointBackgroundColor: 'rgb(255, 153, 0)',
data: footprintData ? loadDataForFootprint('avg') : loadDataGeneric('avg'), // Node Scope Only
fill: 2,
backgroundColor: 'rgba(255, 210, 0, 0.25)',
borderColor: 'rgb(255, 210, 0)',
pointBackgroundColor: 'rgb(255, 210, 0)',
pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(255, 153, 0)'
pointHoverBorderColor: 'rgb(255, 210, 0)'
},
{
label: 'Min',
data: footprintData ? loadDataForFootprint('min') : loadDataGeneric('min'), // Node Scope Only
fill: true,
backgroundColor: 'rgba(255, 0, 0, 0.25)',
borderColor: 'rgb(255, 0, 0)',
pointBackgroundColor: 'rgb(255, 0, 0)',
pointBorderColor: '#fff',
pointHoverBackgroundColor: '#fff',
pointHoverBorderColor: 'rgb(255, 0, 0)'
}
]
}

View File

@ -23,6 +23,25 @@
alert: metricConfig.alert
};
/*
NEW: Footprints should be comparable: Always use Unchanged Single Node Thresholds, except for shared jobs.
HW Clocks, HW Temperatures and File/Net IO Thresholds will be scaled down too, even if they are independent.
'jf.stats' is one of: avg, min, max -> Always relative to one nodes' thresholds as configured.
*/
if (job.exclusive === 1) {
return defaultThresholds
} else {
const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster)
const jobFraction = job.numHWThreads / topol.node.length;
return {
peak: round(defaultThresholds.peak * jobFraction, 0),
normal: round(defaultThresholds.normal * jobFraction, 0),
caution: round(defaultThresholds.caution * jobFraction, 0),
alert: round(defaultThresholds.alert * jobFraction, 0),
};
}
/* OLD: Based on Metric Aggregation Setting
// Job_Exclusivity does not matter, only aggregation
if (metricConfig.aggregation === "avg") {
return defaultThresholds;
@ -43,6 +62,7 @@
);
return defaultThresholds;
}
*/
}
</script>
@ -94,21 +114,21 @@
return {
...fmBase,
color: "danger",
message: `Metric average way ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`,
message: `Footprint value way ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`,
impact: 3
};
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "caution")) {
return {
...fmBase,
color: "warning",
message: `Metric average ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`,
message: `Footprint value ${fmc.lowerIsBetter ? "above" : "below"} expected normal threshold.`,
impact: 2,
};
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "normal")) {
return {
...fmBase,
color: "success",
message: "Metric average within expected thresholds.",
message: "Footprint value within expected thresholds.",
impact: 1,
};
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "peak")) {
@ -116,7 +136,7 @@
...fmBase,
color: "info",
message:
"Metric average above expected normal thresholds: Check for artifacts recommended.",
"Footprint value above expected normal threshold: Check for artifacts recommended.",
impact: 0,
};
} else {
@ -124,7 +144,7 @@
...fmBase,
color: "secondary",
message:
"Metric average above expected peak threshold: Check for artifacts!",
"Footprint value above expected peak threshold: Check for artifacts!",
impact: -1,
};
}
@ -142,25 +162,25 @@
return a.impact - b.impact || ((a.name > b.name) ? 1 : ((b.name > a.name) ? -1 : 0));
});;
function evalFootprint(mean, thresholds, lowerIsBetter, level) {
function evalFootprint(value, thresholds, lowerIsBetter, level) {
// Handle Metrics in which less value is better
switch (level) {
case "peak":
if (lowerIsBetter)
return false; // metric over peak -> return false to trigger impact -1
else return mean <= thresholds.peak && mean > thresholds.normal;
else return value <= thresholds.peak && value > thresholds.normal;
case "alert":
if (lowerIsBetter)
return mean <= thresholds.peak && mean >= thresholds.alert;
else return mean <= thresholds.alert && mean >= 0;
return value <= thresholds.peak && value >= thresholds.alert;
else return value <= thresholds.alert && value >= 0;
case "caution":
if (lowerIsBetter)
return mean < thresholds.alert && mean >= thresholds.caution;
else return mean <= thresholds.caution && mean > thresholds.alert;
return value < thresholds.alert && value >= thresholds.caution;
else return value <= thresholds.caution && value > thresholds.alert;
case "normal":
if (lowerIsBetter)
return mean < thresholds.caution && mean >= 0;
else return mean <= thresholds.normal && mean > thresholds.caution;
return value < thresholds.caution && value >= 0;
else return value <= thresholds.normal && value > thresholds.caution;
default:
return false;
}
@ -244,10 +264,14 @@
id={`footprint-${job.jobId}-${index}`}
>
<div class="mx-1">
{#if fpd.impact === 3 || fpd.impact === -1}
{#if fpd.impact === 3}
<Icon name="exclamation-triangle-fill" class="text-danger" />
{:else if fpd.impact === 2}
<Icon name="exclamation-triangle" class="text-warning" />
{:else if fpd.impact === 0}
<Icon name="info-circle" class="text-info" />
{:else if fpd.impact === -1}
<Icon name="info-circle-fill" class="text-danger" />
{/if}
{#if fpd.impact === 3}
<Icon name="emoji-frown" class="text-danger" />
@ -256,7 +280,7 @@
{:else if fpd.impact === 1}
<Icon name="emoji-smile" class="text-success" />
{:else if fpd.impact === 0}
<Icon name="emoji-laughing" class="text-info" />
<Icon name="emoji-smile" class="text-info" />
{:else if fpd.impact === -1}
<Icon name="emoji-dizzy" class="text-danger" />
{/if}