rework job view header, change footprint to summary component

This commit is contained in:
Christoph Kluge 2024-09-05 16:44:03 +02:00
parent 398e3c1b91
commit df484dc816
7 changed files with 626 additions and 177 deletions

View File

@ -37,10 +37,9 @@
import Metric from "./job/Metric.svelte";
import TagManagement from "./job/TagManagement.svelte";
import StatsTable from "./job/StatsTable.svelte";
import JobFootprint from "./generic/helper/JobFootprint.svelte";
import JobSummary from "./job/JobSummary.svelte";
import ConcurrentJobs from "./generic/helper/ConcurrentJobs.svelte";
import PlotTable from "./generic/PlotTable.svelte";
import Polar from "./generic/plots/Polar.svelte";
import Roofline from "./generic/plots/Roofline.svelte";
import JobInfo from "./generic/joblist/JobInfo.svelte";
import MetricSelection from "./generic/select/MetricSelection.svelte";
@ -232,40 +231,72 @@
}));
</script>
<Row>
<Col>
<Row cols={4} class="mb-2">
<!-- Column 1: Job Info, Concurrent Jobs, Admin Message if found-->
<Col xs={3}>
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq.data}
<Card class="overflow-auto" style="height: 400px;">
<TabContent> <!-- on:tab={(e) => (status = e.detail)} -->
<TabPane tabId="meta-info" tab="Job Info" active>
<CardBody class="pb-2">
<JobInfo job={$initq.data.job} {jobTags} />
</CardBody>
</TabPane>
{#if $initq.data.job.concurrentJobs != null && $initq.data.job.concurrentJobs.items.length != 0}
<TabPane tabId="shared-jobs">
<span slot="tab">
{$initq.data.job.concurrentJobs.items.length} Concurrent Jobs
</span>
<CardBody>
<ConcurrentJobs cJobs={$initq.data.job.concurrentJobs} showLinks={(authlevel > roles.manager)}/>
</CardBody>
</TabPane>
{/if}
{#if $initq.data?.job?.metaData?.message}
<TabPane tabId="admin-msg" tab="Admin Note">
<CardBody>
<p>This note was added by administrators:</p>
<hr/>
<p>{@html $initq.data.job.metaData.message}</p>
</CardBody>
</TabPane>
{/if}
</TabContent>
</Card>
{:else}
<Spinner secondary />
{/if}
</Col>
{#if $initq.data && showFootprint}
<Col>
<JobFootprint
job={$initq.data.job}
/>
</Col>
<!-- If enabled: Column 2: Job Footprint, Polar Representation, Heuristic Summary -->
{#if showFootprint}
<Col xs={3}>
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq?.data && $jobMetrics?.data}
<JobSummary job={$initq.data.job} jobMetrics={$jobMetrics.data.jobMetrics}/>
{:else}
<Spinner secondary />
{/if}
{#if $initq?.data && $jobMetrics?.data?.jobMetrics}
{#if $initq.data.job.concurrentJobs != null && $initq.data.job.concurrentJobs.items.length != 0}
<Col>
<ConcurrentJobs cJobs={$initq.data.job.concurrentJobs} showLinks={(authlevel > roles.manager)}/>
</Col>
{:else}
<Col xs={3}/>
{/if}
<Col>
<Polar
metrics={ccconfig[
`job_view_polarPlotMetrics:${$initq.data.job.cluster}`
] || ccconfig[`job_view_polarPlotMetrics`]}
cluster={$initq.data.job.cluster}
subCluster={$initq.data.job.subCluster}
jobMetrics={$jobMetrics.data.jobMetrics}
/>
</Col>
<Col>
<!-- Column 3: Spacer -->
<Col xs={2}/>
<!-- Column 4: Job Roofline -->
<Col xs={4}>
{#if $initq.error || $jobMetrics.error}
<Card body color="danger">
<p>Initq Error: {$initq.error?.message}</p>
<p>jobMetrics Error: {$jobMetrics.error?.message}</p>
</Card>
{:else if $initq?.data && $jobMetrics?.data}
<Card style="height: 400px;">
<Roofline
renderTime={true}
subCluster={$initq.data.clusters
@ -280,14 +311,16 @@
)?.metric,
)}
/>
</Col>
</Card>
{:else}
<Col />
<Spinner secondary />
<Col />
{/if}
</Col>
</Row>
<Row class="mb-3">
<hr/>
<Row class="mb-2">
<Col xs="auto">
{#if $initq.data}
<TagManagement job={$initq.data.job} bind:jobTags />
@ -344,9 +377,13 @@
{/if}
</Col>
</Row>
<Row class="mt-2">
<hr/>
<Row>
<Col>
{#if $initq.data}
<Card>
<TabContent>
{#if somethingMissing}
<TabPane tabId="resources" tab="Resources" active={somethingMissing}>
@ -414,6 +451,7 @@
</div>
</TabPane>
</TabContent>
</Card>
{/if}
</Col>
</Row>

View File

@ -13,31 +13,27 @@
import {
Card,
CardHeader,
CardTitle,
CardBody,
Icon
} from "@sveltestrap/sveltestrap";
export let cJobs;
export let showLinks = false;
export let displayTitle = true;
export let renderCard = false;
export let width = "auto";
export let height = "310px";
export let height = "400px";
</script>
<Card class="mt-1 overflow-auto" style="width: {width}; height: {height}">
{#if displayTitle}
<CardHeader>
<CardTitle class="mb-0 d-flex justify-content-center">
{#if renderCard}
<Card class="overflow-auto" style="width: {width}; height: {height}">
<CardHeader class="mb-0 d-flex justify-content-center">
{cJobs.items.length} Concurrent Jobs
<Icon
style="cursor:help; margin-left:0.5rem;"
name="info-circle"
title="Jobs running on the same node with overlapping runtimes using shared resources"
/>
</CardTitle>
</CardHeader>
{/if}
<CardBody>
{#if showLinks}
<ul>
@ -56,19 +52,47 @@
{/each}
</ul>
{:else}
{#if displayTitle}
<p>
Jobs running on the same node with overlapping runtimes using shared resources.
</p>
{:else}
<p>
<b>{cJobs.items.length} </b>
Jobs running on the same node with overlapping runtimes using shared resources.
</p>
{/if}
<ul>
{#each cJobs.items as cJob}
<li>
{cJob.jobId}
</li>
{/each}
</ul>
{/if}
</CardBody>
</Card>
{:else}
<p>
Jobs running on the same node with overlapping runtimes using shared resources.
</p>
<hr/>
{#if showLinks}
<ul>
<li>
<a
href="/monitoring/jobs/?{cJobs.listQuery}"
target="_blank">See All</a
>
</li>
{#each cJobs.items as cJob}
<li>
<a href="/monitoring/job/{cJob.id}" target="_blank"
>{cJob.jobId}</a
>
</li>
{/each}
</ul>
{:else}
<ul>
{#each cJobs.items as cJob}
<li>
{cJob.jobId}
</li>
{/each}
</ul>
{/if}
{/if}
<style>
ul {

View File

@ -117,7 +117,7 @@
{/if}
</p>
<p>
<p class="mb-2">
{#each jobTags as tag}
<Tag {tag} />
{/each}

View File

@ -2,10 +2,11 @@
@component Polar Plot based on chartJS Radar
Properties:
- `metrics [String]`: Metric names to display as polar plot
- `cluster GraphQL.Cluster`: Cluster Object of the parent job
- `subCluster GraphQL.SubCluster`: SubCluster Object of the parent job
- `jobMetrics [GraphQL.JobMetricWithName]`: Metric data
- `footprintData [Object]?`: job.footprint content, evaluated in regards to peak config in jobSummary.svelte [Default: null]
- `metrics [String]?`: Metric names to display as polar plot [Default: null]
- `cluster GraphQL.Cluster?`: Cluster Object of the parent job [Default: null]
- `subCluster GraphQL.SubCluster?`: SubCluster Object of the parent job [Default: null]
- `jobMetrics [GraphQL.JobMetricWithName]?`: Metric data [Default: null]
- `height Number?`: Plot height [Default: 365]
-->
@ -33,29 +34,57 @@
LineElement
);
export let metrics
export let cluster
export let subCluster
export let jobMetrics
export let height = 365
export let footprintData = null;
export let metrics = null;
export let cluster = null;
export let subCluster = null;
export let jobMetrics = null;
export let height = 350;
const getMetricConfig = getContext("getMetricConfig")
const labels = metrics.filter(name => {
function getLabels() {
if (footprintData) {
return footprintData.filter(fpd => {
if (!jobMetrics.find(m => m.name == fpd.name && m.scope == "node" || fpd.impact == 4)) {
console.warn(`PolarPlot: No metric data (or config) for '${fpd.name}'`)
return false
}
return true
})
.map(filtered => filtered.name)
.sort(function (a, b) {
return ((a > b) ? 1 : ((b > a) ? -1 : 0));
});
} else {
return metrics.filter(name => {
if (!jobMetrics.find(m => m.name == name && m.scope == "node")) {
console.warn(`PolarPlot: No metric data for '${name}'`)
return false
}
return true
})
.sort(function (a, b) {
return ((a > b) ? 1 : ((b > a) ? -1 : 0));
});
}
}
const getValuesForStat = (getStat) => labels.map(name => {
const labels = getLabels();
const getMetricConfig = getContext("getMetricConfig");
const getValuesForStatGeneric = (getStat) => labels.map(name => {
const peak = getMetricConfig(cluster, subCluster, name).peak
const metric = jobMetrics.find(m => m.name == name && m.scope == "node")
const value = getStat(metric.metric) / peak
return value <= 1. ? value : 1.
})
const getValuesForStatFootprint = (getStat) => labels.map(name => {
const peak = footprintData.find(fpd => fpd.name === name).peak
const metric = jobMetrics.find(m => m.name == name && m.scope == "node")
const value = getStat(metric.metric) / peak
return value <= 1. ? value : 1.
})
function getMax(metric) {
let max = 0
for (let series of metric.series)
@ -70,12 +99,32 @@
return avg / metric.series.length
}
function loadDataGeneric(type) {
if (type === 'avg') {
return getValuesForStatGeneric(getAvg)
} else if (type === 'max') {
return getValuesForStatGeneric(getMax)
}
console.log('Unknown Type For Polar Data')
return []
}
function loadDataForFootprint(type) {
if (type === 'avg') {
return getValuesForStatFootprint(getAvg)
} else if (type === 'max') {
return getValuesForStatFootprint(getMax)
}
console.log('Unknown Type For Polar Data')
return []
}
const data = {
labels: labels,
datasets: [
{
label: 'Max',
data: getValuesForStat(getMax),
data: footprintData ? loadDataForFootprint('max') : loadDataGeneric('max'), //
fill: 1,
backgroundColor: 'rgba(0, 102, 255, 0.25)',
borderColor: 'rgb(0, 102, 255)',
@ -86,7 +135,7 @@
},
{
label: 'Avg',
data: getValuesForStat(getAvg),
data: footprintData ? loadDataForFootprint('avg') : loadDataGeneric('avg'), // getValuesForStat(getAvg)
fill: true,
backgroundColor: 'rgba(255, 153, 0, 0.25)',
borderColor: 'rgb(255, 153, 0)',
@ -100,7 +149,7 @@
// No custom defined options but keep for clarity
const options = {
maintainAspectRatio: false,
maintainAspectRatio: true,
animation: false,
scales: { // fix scale
r: {

View File

@ -7,7 +7,7 @@
- `allowSizeChange Bool?`: If dimensions of rendered plot can change [Default: false]
- `subCluster GraphQL.SubCluster?`: SubCluster Object; contains required topology information [Default: null]
- `width Number?`: Plot width (reactively adaptive) [Default: 600]
- `height Number?`: Plot height (reactively adaptive) [Default: 350]
- `height Number?`: Plot height (reactively adaptive) [Default: 380]
Data Format:
- `data = [null, [], []]`
@ -33,7 +33,7 @@
export let allowSizeChange = false;
export let subCluster = null;
export let width = 600;
export let height = 350;
export let height = 380;
let plotWrapper = null;
let uplot = null;
@ -317,7 +317,7 @@
// The Color Scale For Time Information
const posX = u.valToPos(0.1, "x", true)
const posXLimit = u.valToPos(100, "x", true)
const posY = u.valToPos(15000.0, "y", true)
const posY = u.valToPos(14000.0, "y", true)
u.ctx.fillStyle = 'black'
u.ctx.fillText('Start', posX, posY)
const start = posX + 10
@ -364,7 +364,7 @@
</script>
{#if data != null}
<div bind:this={plotWrapper} />
<div bind:this={plotWrapper} class="p-2"/>
{:else}
<Card class="mx-4" body color="warning">Cannot render roofline: No data!</Card
>

View File

@ -0,0 +1,340 @@
<!--
@component Job Summary component; Displays job.footprint data as bars in relation to thresholds, as polar plot, and summariziong comment
Properties:
- `job Object`: The GQL job object
- `displayTitle Bool?`: If to display cardHeader with title [Default: true]
- `width String?`: Width of the card [Default: 'auto']
- `height String?`: Height of the card [Default: '310px']
-->
<script context="module">
function findJobThresholds(job, metricConfig) {
if (!job || !metricConfig) {
console.warn("Argument missing for findJobThresholds!");
return null;
}
// metricConfig is on subCluster-Level
const defaultThresholds = {
peak: metricConfig.peak,
normal: metricConfig.normal,
caution: metricConfig.caution,
alert: metricConfig.alert
};
// Job_Exclusivity does not matter, only aggregation
if (metricConfig.aggregation === "avg") {
return defaultThresholds;
} else if (metricConfig.aggregation === "sum") {
const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster)
const jobFraction = job.numHWThreads / topol.node.length;
return {
peak: round(defaultThresholds.peak * jobFraction, 0),
normal: round(defaultThresholds.normal * jobFraction, 0),
caution: round(defaultThresholds.caution * jobFraction, 0),
alert: round(defaultThresholds.alert * jobFraction, 0),
};
} else {
console.warn(
"Missing or unkown aggregation mode (sum/avg) for metric:",
metricConfig,
);
return defaultThresholds;
}
}
</script>
<script>
import { getContext } from "svelte";
import {
Card,
CardBody,
Progress,
Icon,
Tooltip,
Row,
Col,
TabContent,
TabPane
} from "@sveltestrap/sveltestrap";
import Polar from "../generic/plots/Polar.svelte";
import { round } from "mathjs";
export let job;
export let jobMetrics;
export let width = "auto";
export let height = "400px";
const ccconfig = getContext("cc-config")
const footprintData = job?.footprint?.map((jf) => {
const fmc = getContext("getMetricConfig")(job.cluster, job.subCluster, jf.name);
if (fmc) {
// Unit
const unit = (fmc?.unit?.prefix ? fmc.unit.prefix : "") + (fmc?.unit?.base ? fmc.unit.base : "")
// Threshold / -Differences
const fmt = findJobThresholds(job, fmc);
if (jf.name === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0);
// Define basic data -> Value: Use as Provided
const fmBase = {
name: jf.name,
stat: jf.stat,
value: jf.value,
unit: unit,
peak: fmt.peak,
dir: fmc.lowerIsBetter
};
if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "alert")) {
return {
...fmBase,
color: "danger",
message: `Metric average way ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`,
impact: 3
};
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "caution")) {
return {
...fmBase,
color: "warning",
message: `Metric average ${fmc.lowerIsBetter ? "above" : "below"} expected normal thresholds.`,
impact: 2,
};
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "normal")) {
return {
...fmBase,
color: "success",
message: "Metric average within expected thresholds.",
impact: 1,
};
} else if (evalFootprint(jf.value, fmt, fmc.lowerIsBetter, "peak")) {
return {
...fmBase,
color: "info",
message:
"Metric average above expected normal thresholds: Check for artifacts recommended.",
impact: 0,
};
} else {
return {
...fmBase,
color: "secondary",
message:
"Metric average above expected peak threshold: Check for artifacts!",
impact: -1,
};
}
} else { // No matching metric config: display as single value
return {
name: jf.name,
stat: jf.stat,
value: jf.value,
message:
`No config for metric ${jf.name} found.`,
impact: 4,
};
}
}).sort(function (a, b) { // Sort by impact value primarily, within impact sort name alphabetically
return a.impact - b.impact || ((a.name > b.name) ? 1 : ((b.name > a.name) ? -1 : 0));
});;
function evalFootprint(mean, thresholds, lowerIsBetter, level) {
// Handle Metrics in which less value is better
switch (level) {
case "peak":
if (lowerIsBetter)
return false; // metric over peak -> return false to trigger impact -1
else return mean <= thresholds.peak && mean > thresholds.normal;
case "alert":
if (lowerIsBetter)
return mean <= thresholds.peak && mean >= thresholds.alert;
else return mean <= thresholds.alert && mean >= 0;
case "caution":
if (lowerIsBetter)
return mean < thresholds.alert && mean >= thresholds.caution;
else return mean <= thresholds.caution && mean > thresholds.alert;
case "normal":
if (lowerIsBetter)
return mean < thresholds.caution && mean >= 0;
else return mean <= thresholds.normal && mean > thresholds.caution;
default:
return false;
}
}
function writeSummary(fpd) {
// Hardcoded! Needs to be retrieved from globalMetrics
const performanceMetrics = ['flops_any', 'mem_bw'];
const utilizationMetrics = ['cpu_load', 'acc_utilization'];
const energyMetrics = ['cpu_power'];
let performanceScore = 0;
let utilizationScore = 0;
let energyScore = 0;
let performanceMetricsCounted = 0;
let utilizationMetricsCounted = 0;
let energyMetricsCounted = 0;
fpd.forEach(metric => {
console.log('Metric, Impact', metric.name, metric.impact)
if (performanceMetrics.includes(metric.name)) {
performanceScore += metric.impact
performanceMetricsCounted += 1
} else if (utilizationMetrics.includes(metric.name)) {
utilizationScore += metric.impact
utilizationMetricsCounted += 1
} else if (energyMetrics.includes(metric.name)) {
energyScore += metric.impact
energyMetricsCounted += 1
}
});
performanceScore = (performanceMetricsCounted == 0) ? performanceScore : (performanceScore / performanceMetricsCounted);
utilizationScore = (utilizationMetricsCounted == 0) ? utilizationScore : (utilizationScore / utilizationMetricsCounted);
energyScore = (energyMetricsCounted == 0) ? energyScore : (energyScore / energyMetricsCounted);
let res = [];
console.log('Perf', performanceScore, performanceMetricsCounted)
console.log('Util', utilizationScore, utilizationMetricsCounted)
console.log('Energy', energyScore, energyMetricsCounted)
if (performanceScore == 1) {
res.push('<b>Performance:</b> Your job performs well.')
} else if (performanceScore != 0) {
res.push('<b>Performance:</b> Your job performs suboptimal.')
}
if (utilizationScore == 1) {
res.push('<b>Utilization:</b> Your job utilizes resources well.')
} else if (utilizationScore != 0) {
res.push('<b>Utilization:</b> Your job utilizes resources suboptimal.')
}
if (energyScore == 1) {
res.push('<b>Energy:</b> Your job has good energy values.')
} else if (energyScore != 0) {
res.push('<b>Energy:</b> Your job consumes more energy than necessary.')
}
return res;
};
$: summaryMessages = writeSummary(footprintData)
</script>
<Card class="overflow-auto" style="width: {width}; height: {height}">
<TabContent> <!-- on:tab={(e) => (status = e.detail)} -->
<TabPane tabId="foot" tab="Footprint" active>
<CardBody>
{#each footprintData as fpd, index}
{#if fpd.impact !== 4}
<div class="mb-1 d-flex justify-content-between">
<div>&nbsp;<b>{fpd.name} ({fpd.stat})</b></div>
<div
class="cursor-help d-inline-flex"
id={`footprint-${job.jobId}-${index}`}
>
<div class="mx-1">
{#if fpd.impact === 3 || fpd.impact === -1}
<Icon name="exclamation-triangle-fill" class="text-danger" />
{:else if fpd.impact === 2}
<Icon name="exclamation-triangle" class="text-warning" />
{/if}
{#if fpd.impact === 3}
<Icon name="emoji-frown" class="text-danger" />
{:else if fpd.impact === 2}
<Icon name="emoji-neutral" class="text-warning" />
{:else if fpd.impact === 1}
<Icon name="emoji-smile" class="text-success" />
{:else if fpd.impact === 0}
<Icon name="emoji-laughing" class="text-info" />
{:else if fpd.impact === -1}
<Icon name="emoji-dizzy" class="text-danger" />
{/if}
</div>
<div>
{fpd.value} / {fpd.peak}
{fpd.unit} &nbsp;
</div>
</div>
<Tooltip
target={`footprint-${job.jobId}-${index}`}
placement="right"
offset={[0, 20]}>{fpd.message}</Tooltip
>
</div>
<Row cols={12} class="{(footprintData.length == (index + 1)) ? 'mb-0' : 'mb-2'}">
{#if fpd.dir}
<Col xs="1">
<Icon name="caret-left-fill" />
</Col>
{/if}
<Col xs="11" class="align-content-center">
<Progress value={fpd.value} max={fpd.peak} color={fpd.color} />
</Col>
{#if !fpd.dir}
<Col xs="1">
<Icon name="caret-right-fill" />
</Col>
{/if}
</Row>
{:else}
<div class="mb-1 d-flex justify-content-between">
<div>
&nbsp;<b>{fpd.name} ({fpd.stat})</b>
</div>
<div
class="cursor-help d-inline-flex"
id={`footprint-${job.jobId}-${index}`}
>
<div class="mx-1">
<Icon name="info-circle"/>
</div>
<div>
{fpd.value}&nbsp;
</div>
</div>
</div>
<Tooltip
target={`footprint-${job.jobId}-${index}`}
placement="right"
offset={[0, 20]}>{fpd.message}</Tooltip
>
{/if}
{/each}
</CardBody>
</TabPane>
<TabPane tabId="polar" tab="Polar">
<CardBody>
<Polar
{footprintData}
{jobMetrics}
/>
</CardBody>
</TabPane>
<TabPane tabId="summary" tab="Summary">
<CardBody>
<p>Based on footprint data, this job performs as follows:</p>
<hr/>
<ul>
{#each summaryMessages as sm}
<li>
{@html sm}
</li>
{/each}
</ul>
</CardBody>
</TabPane>
</TabContent>
</Card>
<style>
.cursor-help {
cursor: help;
}
</style>

View File

@ -84,7 +84,7 @@
}
</script>
<Table>
<Table class="mb-0">
<thead>
<tr>
<th>
@ -146,8 +146,6 @@
</tbody>
</Table>
<br />
<MetricSelection
cluster={job.cluster}
configName="job_view_nodestats_selectedMetrics"