cc-backend/web/frontend/src/Job.root.svelte

453 lines
13 KiB
Svelte
Raw Normal View History

2022-06-22 11:20:57 +02:00
<script>
import {
init,
groupByScope,
fetchMetricsStore,
checkMetricDisabled,
transformDataForRoofline,
} from "./utils.js";
import {
Row,
Col,
Card,
Spinner,
TabContent,
TabPane,
CardBody,
CardHeader,
CardTitle,
Button,
Icon,
} from "@sveltestrap/sveltestrap";
import PlotTable from "./PlotTable.svelte";
import Metric from "./Metric.svelte";
import Polar from "./plots/Polar.svelte";
import Roofline from "./plots/Roofline.svelte";
import JobInfo from "./joblist/JobInfo.svelte";
import TagManagement from "./TagManagement.svelte";
import MetricSelection from "./MetricSelection.svelte";
import StatsTable from "./StatsTable.svelte";
import JobFootprint from "./JobFootprint.svelte";
import { getContext } from "svelte";
2022-06-22 11:20:57 +02:00
export let dbid;
export let authlevel;
export let roles;
2022-06-22 11:20:57 +02:00
const accMetrics = [
"acc_utilization",
"acc_mem_used",
"acc_power",
"nv_mem_util",
"nv_sm_clock",
"nv_temp",
];
let accNodeOnly;
const { query: initq } = init(`
2022-06-22 11:20:57 +02:00
job(id: "${dbid}") {
id, jobId, user, project, cluster, startTime,
duration, numNodes, numHWThreads, numAcc,
SMT, exclusive, partition, subCluster, arrayJobId,
monitoringStatus, state, walltime,
tags { id, type, name },
resources { hostname, hwthreads, accelerators },
metaData,
userData { name, email },
concurrentJobs { items { id, jobId }, count, listQuery },
flopsAnyAvg, memBwAvg, loadAvg
2022-06-22 11:20:57 +02:00
}
`);
2022-06-22 11:20:57 +02:00
const ccconfig = getContext("cc-config"),
clusters = getContext("clusters"),
metrics = getContext("metrics");
2022-06-22 11:20:57 +02:00
let isMetricsSelectionOpen = false,
selectedMetrics = [],
showFootprint = true,
isFetched = new Set();
const [jobMetrics, startFetching] = fetchMetricsStore();
getContext("on-init")(() => {
let job = $initq.data.job;
if (!job) return;
2022-06-22 11:20:57 +02:00
selectedMetrics =
ccconfig[`job_view_selectedMetrics:${job.cluster}`] ||
clusters
.find((c) => c.name == job.cluster)
.metricConfig.map((mc) => mc.name);
showFootprint =
ccconfig[`job_view_showFootprint`]
let toFetch = new Set([
"flops_any",
"mem_bw",
...selectedMetrics,
...(ccconfig[`job_view_polarPlotMetrics:${job.cluster}`] ||
ccconfig[`job_view_polarPlotMetrics`]),
...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
ccconfig[`job_view_nodestats_selectedMetrics`]),
]);
// Select default Scopes to load: Check before if accelerator metrics are not on accelerator scope by default
accNodeOnly = [...toFetch].some(function (m) {
if (accMetrics.includes(m)) {
const mc = metrics(job.cluster, m);
return mc.scope !== "accelerator";
} else {
return false;
}
});
2022-06-22 11:20:57 +02:00
if (job.numAcc === 0 || accNodeOnly === true) {
// No Accels or Accels on Node Scope
startFetching(
job,
[...toFetch],
job.numNodes > 2 ? ["node"] : ["node", "socket", "core"],
);
} else {
// Accels and not on node scope
startFetching(
job,
[...toFetch],
job.numNodes > 2
? ["node", "accelerator"]
: ["node", "accelerator", "socket", "core"],
);
}
isFetched = toFetch;
});
const lazyFetchMoreMetrics = () => {
let notYetFetched = new Set();
for (let m of selectedMetrics) {
if (!isFetched.has(m)) {
notYetFetched.add(m);
isFetched.add(m);
}
}
if (notYetFetched.size > 0)
startFetching(
$initq.data.job,
[...notYetFetched],
$initq.data.job.numNodes > 2 ? ["node"] : ["node", "core"],
);
};
// Fetch more data once required:
$: if ($initq.data && $jobMetrics.data && selectedMetrics)
lazyFetchMoreMetrics();
2023-11-20 17:53:12 +01:00
let plots = {},
jobTags,
statsTable,
jobFootprint;
2022-06-22 11:20:57 +02:00
$: document.title = $initq.fetching
? "Loading..."
: $initq.error
? "Error"
: `Job ${$initq.data.job.jobId} - ClusterCockpit`;
2022-06-22 11:20:57 +02:00
// Find out what metrics or hosts are missing:
let missingMetrics = [],
missingHosts = [],
somethingMissing = false;
$: if ($initq.data && $jobMetrics.data) {
let job = $initq.data.job,
metrics = $jobMetrics.data.jobMetrics,
metricNames = clusters
.find((c) => c.name == job.cluster)
.metricConfig.map((mc) => mc.name);
2022-06-22 11:20:57 +02:00
// Metric not found in JobMetrics && Metric not explicitly disabled: Was expected, but is Missing
missingMetrics = metricNames.filter(
(metric) =>
!metrics.some((jm) => jm.name == metric) &&
!checkMetricDisabled(
metric,
$initq.data.job.cluster,
$initq.data.job.subCluster,
),
);
missingHosts = job.resources
.map(({ hostname }) => ({
hostname: hostname,
metrics: metricNames.filter(
(metric) =>
!metrics.some(
(jm) =>
jm.scope == "node" &&
jm.metric.series.some((series) => series.hostname == hostname),
),
),
}))
.filter(({ metrics }) => metrics.length > 0);
somethingMissing = missingMetrics.length > 0 || missingHosts.length > 0;
}
const orderAndMap = (grouped, selectedMetrics) =>
selectedMetrics.map((metric) => ({
metric: metric,
data: grouped.find((group) => group[0].name == metric),
disabled: checkMetricDisabled(
metric,
$initq.data.job.cluster,
$initq.data.job.subCluster,
),
}));
2022-06-22 11:20:57 +02:00
</script>
<Row>
<Col>
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq.data}
<JobInfo job={$initq.data.job} {jobTags} />
{:else}
<Spinner secondary />
2023-11-16 12:49:20 +01:00
{/if}
</Col>
{#if $jobMetrics.data && showFootprint}
{#key $jobMetrics.data}
<Col>
<JobFootprint
bind:this={jobFootprint}
job={$initq.data.job}
jobMetrics={$jobMetrics.data.jobMetrics}
/>
</Col>
{/key}
{/if}
{#if $jobMetrics.data && $initq.data}
{#if $initq.data.job.concurrentJobs != null && $initq.data.job.concurrentJobs.items.length != 0}
{#if authlevel > roles.manager}
2022-06-22 11:20:57 +02:00
<Col>
<h5>
Concurrent Jobs <Icon
name="info-circle"
style="cursor:help;"
title="Shared jobs running on the same node with overlapping runtimes"
/>
</h5>
<ul>
<li>
<a
href="/monitoring/jobs/?{$initq.data.job.concurrentJobs
.listQuery}"
target="_blank">See All</a
>
</li>
{#each $initq.data.job.concurrentJobs.items as pjob, index}
<li>
<a href="/monitoring/job/{pjob.id}" target="_blank"
>{pjob.jobId}</a
>
</li>
{/each}
</ul>
2022-06-22 11:20:57 +02:00
</Col>
{:else}
2022-06-22 11:20:57 +02:00
<Col>
<h5>
{$initq.data.job.concurrentJobs.items.length} Concurrent Jobs
</h5>
<p>
Number of shared jobs on the same node with overlapping runtimes.
</p>
2022-06-22 11:20:57 +02:00
</Col>
{/if}
2022-06-22 11:20:57 +02:00
{/if}
<Col>
<Polar
metrics={ccconfig[
`job_view_polarPlotMetrics:${$initq.data.job.cluster}`
] || ccconfig[`job_view_polarPlotMetrics`]}
cluster={$initq.data.job.cluster}
jobMetrics={$jobMetrics.data.jobMetrics}
/>
2022-06-22 11:20:57 +02:00
</Col>
<Col>
<Roofline
renderTime={true}
cluster={clusters
.find((c) => c.name == $initq.data.job.cluster)
.subClusters.find((sc) => sc.name == $initq.data.job.subCluster)}
data={transformDataForRoofline(
$jobMetrics.data.jobMetrics.find(
(m) => m.name == "flops_any" && m.scope == "node",
).metric,
$jobMetrics.data.jobMetrics.find(
(m) => m.name == "mem_bw" && m.scope == "node",
).metric,
)}
/>
2022-06-22 11:20:57 +02:00
</Col>
{:else}
<Col />
<Col />
{/if}
</Row>
<Row class="mb-3">
<Col xs="auto">
{#if $initq.data}
<TagManagement job={$initq.data.job} bind:jobTags />
{/if}
</Col>
<Col xs="auto">
{#if $initq.data}
<Button outline on:click={() => (isMetricsSelectionOpen = true)}>
<Icon name="graph-up" /> Metrics
</Button>
{/if}
</Col>
<!-- <Col xs="auto">
2022-06-22 11:20:57 +02:00
<Zoom timeseriesPlots={plots} />
</Col> -->
2022-06-22 11:20:57 +02:00
</Row>
<Row>
<Col>
{#if $jobMetrics.error}
{#if $initq.data.job.monitoringStatus == 0 || $initq.data.job.monitoringStatus == 2}
<Card body color="warning">Not monitored or archiving failed</Card>
<br />
{/if}
<Card body color="danger">{$jobMetrics.error.message}</Card>
{:else if $jobMetrics.fetching}
<Spinner secondary />
{:else if $jobMetrics.data && $initq.data}
<PlotTable
let:item
let:width
renderFor="job"
items={orderAndMap(
groupByScope($jobMetrics.data.jobMetrics),
selectedMetrics,
)}
itemsPerRow={ccconfig.plot_view_plotsPerRow}
>
{#if item.data}
<Metric
bind:this={plots[item.metric]}
on:more-loaded={({ detail }) => statsTable.moreLoaded(detail)}
job={$initq.data.job}
metricName={item.metric}
rawData={item.data.map((x) => x.metric)}
scopes={item.data.map((x) => x.scope)}
{width}
isShared={$initq.data.job.exclusive != 1}
resources={$initq.data.job.resources}
/>
{:else}
<Card body color="warning"
>No dataset returned for <code>{item.metric}</code></Card
>
2022-06-22 11:20:57 +02:00
{/if}
</PlotTable>
{/if}
</Col>
2022-06-22 11:20:57 +02:00
</Row>
2023-09-05 15:17:39 +02:00
<Row class="mt-2">
<Col>
{#if $initq.data}
<TabContent>
{#if somethingMissing}
<TabPane tabId="resources" tab="Resources" active={somethingMissing}>
<div style="margin: 10px;">
<Card color="warning">
<CardHeader>
<CardTitle>Missing Metrics/Reseources</CardTitle>
</CardHeader>
<CardBody>
{#if missingMetrics.length > 0}
<p>
No data at all is available for the metrics: {missingMetrics.join(
", ",
)}
</p>
{/if}
{#if missingHosts.length > 0}
<p>Some metrics are missing for the following hosts:</p>
<ul>
{#each missingHosts as missing}
<li>
{missing.hostname}: {missing.metrics.join(", ")}
</li>
{/each}
</ul>
{/if}
</CardBody>
</Card>
</div>
</TabPane>
2022-06-22 11:20:57 +02:00
{/if}
<TabPane
tabId="stats"
tab="Statistics Table"
active={!somethingMissing}
>
{#if $jobMetrics.data}
{#key $jobMetrics.data}
<StatsTable
bind:this={statsTable}
job={$initq.data.job}
jobMetrics={$jobMetrics.data.jobMetrics}
/>
{/key}
{/if}
</TabPane>
<TabPane tabId="job-script" tab="Job Script">
<div class="pre-wrapper">
{#if $initq.data.job.metaData?.jobScript}
<pre><code>{$initq.data.job.metaData?.jobScript}</code></pre>
{:else}
<Card body color="warning">No job script available</Card>
{/if}
</div>
</TabPane>
<TabPane tabId="slurm-info" tab="Slurm Info">
<div class="pre-wrapper">
{#if $initq.data.job.metaData?.slurmInfo}
<pre><code>{$initq.data.job.metaData?.slurmInfo}</code></pre>
{:else}
<Card body color="warning"
>No additional slurm information available</Card
>
{/if}
</div>
</TabPane>
</TabContent>
{/if}
</Col>
2022-06-22 11:20:57 +02:00
</Row>
{#if $initq.data}
<MetricSelection
cluster={$initq.data.job.cluster}
configName="job_view_selectedMetrics"
bind:metrics={selectedMetrics}
bind:isOpen={isMetricsSelectionOpen}
/>
2022-06-22 11:20:57 +02:00
{/if}
<style>
.pre-wrapper {
font-size: 1.1rem;
margin: 10px;
border: 1px solid #bbb;
border-radius: 5px;
padding: 5px;
}
ul {
columns: 2;
-webkit-columns: 2;
-moz-columns: 2;
}
2022-06-22 11:20:57 +02:00
</style>