cc-backend/web/frontend/src/Status.root.svelte

671 lines
19 KiB
Svelte

<!--
@component Main cluster status view component; renders current system-usage information
Properties:
- `cluster String`: The cluster to show status information for
-->
<script>
import { getContext } from "svelte";
import {
Row,
Col,
Spinner,
Card,
CardHeader,
CardTitle,
CardBody,
Table,
Progress,
Icon,
Button,
} from "@sveltestrap/sveltestrap";
import {
queryStore,
gql,
getContextClient,
mutationStore,
} from "@urql/svelte";
import {
init,
convert2uplot,
transformPerNodeDataForRoofline,
} from "./generic/utils.js";
import { scaleNumbers } from "./generic/units.js";
import PlotGrid from "./generic/PlotGrid.svelte";
import Roofline from "./generic/plots/Roofline.svelte";
import Pie, { colors } from "./generic/plots/Pie.svelte";
import Histogram from "./generic/plots/Histogram.svelte";
import Refresher from "./generic/helper/Refresher.svelte";
import HistogramSelection from "./generic/select/HistogramSelection.svelte";
const { query: initq } = init();
const ccconfig = getContext("cc-config");
export let cluster;
let plotWidths = [];
let colWidth;
let from = new Date(Date.now() - 5 * 60 * 1000),
to = new Date(Date.now());
const topOptions = [
{ key: "totalJobs", label: "Jobs" },
{ key: "totalNodes", label: "Nodes" },
{ key: "totalCores", label: "Cores" },
{ key: "totalAccs", label: "Accelerators" },
];
let topProjectSelection =
topOptions.find(
(option) =>
option.key ==
ccconfig[`status_view_selectedTopProjectCategory:${cluster}`],
) ||
topOptions.find(
(option) => option.key == ccconfig.status_view_selectedTopProjectCategory,
);
let topUserSelection =
topOptions.find(
(option) =>
option.key ==
ccconfig[`status_view_selectedTopUserCategory:${cluster}`],
) ||
topOptions.find(
(option) => option.key == ccconfig.status_view_selectedTopUserCategory,
);
let isHistogramSelectionOpen = false;
$: metricsInHistograms = cluster
? ccconfig[`user_view_histogramMetrics:${cluster}`] || []
: ccconfig.user_view_histogramMetrics || [];
const client = getContextClient();
$: mainQuery = queryStore({
client: client,
query: gql`
query (
$cluster: String!
$filter: [JobFilter!]!
$metrics: [String!]
$from: Time!
$to: Time!
$metricsInHistograms: [String!]
) {
nodeMetrics(
cluster: $cluster
metrics: $metrics
from: $from
to: $to
) {
host
subCluster
metrics {
name
scope
metric {
timestep
unit {
base
prefix
}
series {
data
}
}
}
}
stats: jobsStatistics(filter: $filter, metrics: $metricsInHistograms) {
histDuration {
count
value
}
histNumNodes {
count
value
}
histNumCores {
count
value
}
histNumAccs {
count
value
}
histMetrics {
metric
unit
data {
min
max
count
bin
}
}
}
allocatedNodes(cluster: $cluster) {
name
count
}
}
`,
variables: {
cluster: cluster,
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
from: from.toISOString(),
to: to.toISOString(),
filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
metricsInHistograms: metricsInHistograms,
},
});
const paging = { itemsPerPage: 10, page: 1 }; // Top 10
$: topUserQuery = queryStore({
client: client,
query: gql`
query (
$filter: [JobFilter!]!
$paging: PageRequest!
$sortBy: SortByAggregate!
) {
topUser: jobsStatistics(
filter: $filter
page: $paging
sortBy: $sortBy
groupBy: USER
) {
id
totalJobs
totalNodes
totalCores
totalAccs
}
}
`,
variables: {
filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
paging,
sortBy: topUserSelection.key.toUpperCase(),
},
});
$: topProjectQuery = queryStore({
client: client,
query: gql`
query (
$filter: [JobFilter!]!
$paging: PageRequest!
$sortBy: SortByAggregate!
) {
topProjects: jobsStatistics(
filter: $filter
page: $paging
sortBy: $sortBy
groupBy: PROJECT
) {
id
totalJobs
totalNodes
totalCores
totalAccs
}
}
`,
variables: {
filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
paging,
sortBy: topProjectSelection.key.toUpperCase(),
},
});
const sumUp = (data, subcluster, metric) =>
data.reduce(
(sum, node) =>
node.subCluster == subcluster
? sum +
(node.metrics
.find((m) => m.name == metric)
?.metric.series.reduce(
(sum, series) => sum + series.data[series.data.length - 1],
0,
) || 0)
: sum,
0,
);
let allocatedNodes = {},
flopRate = {},
flopRateUnitPrefix = {},
flopRateUnitBase = {},
memBwRate = {},
memBwRateUnitPrefix = {},
memBwRateUnitBase = {};
$: if ($initq.data && $mainQuery.data) {
let subClusters = $initq.data.clusters.find(
(c) => c.name == cluster,
).subClusters;
for (let subCluster of subClusters) {
allocatedNodes[subCluster.name] =
$mainQuery.data.allocatedNodes.find(
({ name }) => name == subCluster.name,
)?.count || 0;
flopRate[subCluster.name] =
Math.floor(
sumUp($mainQuery.data.nodeMetrics, subCluster.name, "flops_any") *
100,
) / 100;
flopRateUnitPrefix[subCluster.name] = subCluster.flopRateSimd.unit.prefix;
flopRateUnitBase[subCluster.name] = subCluster.flopRateSimd.unit.base;
memBwRate[subCluster.name] =
Math.floor(
sumUp($mainQuery.data.nodeMetrics, subCluster.name, "mem_bw") * 100,
) / 100;
memBwRateUnitPrefix[subCluster.name] =
subCluster.memoryBandwidth.unit.prefix;
memBwRateUnitBase[subCluster.name] = subCluster.memoryBandwidth.unit.base;
}
}
const updateConfigurationMutation = ({ name, value }) => {
return mutationStore({
client: client,
query: gql`
mutation ($name: String!, $value: String!) {
updateConfiguration(name: $name, value: $value)
}
`,
variables: { name, value },
});
};
function updateTopUserConfiguration(select) {
if (ccconfig[`status_view_selectedTopUserCategory:${cluster}`] != select) {
updateConfigurationMutation({
name: `status_view_selectedTopUserCategory:${cluster}`,
value: JSON.stringify(select),
}).subscribe((res) => {
if (res.fetching === false && !res.error) {
// console.log(`status_view_selectedTopUserCategory:${cluster}` + ' -> Updated!')
} else if (res.fetching === false && res.error) {
throw res.error;
}
});
} else {
// console.log('No Mutation Required: Top User')
}
}
function updateTopProjectConfiguration(select) {
if (
ccconfig[`status_view_selectedTopProjectCategory:${cluster}`] != select
) {
updateConfigurationMutation({
name: `status_view_selectedTopProjectCategory:${cluster}`,
value: JSON.stringify(select),
}).subscribe((res) => {
if (res.fetching === false && !res.error) {
// console.log(`status_view_selectedTopProjectCategory:${cluster}` + ' -> Updated!')
} else if (res.fetching === false && res.error) {
throw res.error;
}
});
} else {
// console.log('No Mutation Required: Top Project')
}
}
$: updateTopUserConfiguration(topUserSelection.key);
$: updateTopProjectConfiguration(topProjectSelection.key);
</script>
<!-- Loading indicator & Refresh -->
<Row cols={{ lg: 3, md: 3, sm: 1 }}>
<Col style="">
<h4 class="mb-0">Current utilization of cluster "{cluster}"</h4>
</Col>
<Col class="mt-2 mt-md-0 text-md-end">
<Button
outline
color="secondary"
on:click={() => (isHistogramSelectionOpen = true)}
>
<Icon name="bar-chart-line" /> Select Histograms
</Button>
</Col>
<Col class="mt-2 mt-md-0">
<Refresher
initially={120}
on:refresh={() => {
from = new Date(Date.now() - 5 * 60 * 1000);
to = new Date(Date.now());
}}
/>
</Col>
</Row>
<Row cols={1} class="text-center mt-3">
<Col>
{#if $initq.fetching || $mainQuery.fetching}
<Spinner />
{:else if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else}
<!-- ... -->
{/if}
</Col>
</Row>
{#if $mainQuery.error}
<Row cols={1}>
<Col>
<Card body color="danger">{$mainQuery.error.message}</Card>
</Col>
</Row>
{/if}
<hr />
<!-- Gauges & Roofline per Subcluster-->
{#if $initq.data && $mainQuery.data}
{#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
<Row cols={{ lg: 2, md: 1 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3">
<Card class="h-auto mt-1">
<CardHeader>
<CardTitle class="mb-0">SubCluster "{subCluster.name}"</CardTitle>
</CardHeader>
<CardBody>
<Table borderless>
<tr class="py-2">
<th scope="col">Allocated Nodes</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={allocatedNodes[subCluster.name]}
max={subCluster.numberOfNodes}
/>
</div></td
>
<td
>{allocatedNodes[subCluster.name]} / {subCluster.numberOfNodes}
Nodes</td
>
</tr>
<tr class="py-2">
<th scope="col"
>Flop Rate (Any) <Icon
name="info-circle"
class="p-1"
style="cursor: help;"
title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]"
/></th
>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={flopRate[subCluster.name]}
max={subCluster.flopRateSimd.value *
subCluster.numberOfNodes}
/>
</div></td
>
<td>
{scaleNumbers(
flopRate[subCluster.name],
subCluster.flopRateSimd.value * subCluster.numberOfNodes,
flopRateUnitPrefix[subCluster.name],
)}{flopRateUnitBase[subCluster.name]} [Max]
</td>
</tr>
<tr class="py-2">
<th scope="col">MemBw Rate</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={memBwRate[subCluster.name]}
max={subCluster.memoryBandwidth.value *
subCluster.numberOfNodes}
/>
</div></td
>
<td>
{scaleNumbers(
memBwRate[subCluster.name],
subCluster.memoryBandwidth.value * subCluster.numberOfNodes,
memBwRateUnitPrefix[subCluster.name],
)}{memBwRateUnitBase[subCluster.name]} [Max]
</td>
</tr>
</Table>
</CardBody>
</Card>
</Col>
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={plotWidths[i]}>
{#key $mainQuery.data.nodeMetrics}
<Roofline
allowSizeChange={true}
width={plotWidths[i] - 10}
height={300}
subCluster={subCluster}
data={transformPerNodeDataForRoofline(
$mainQuery.data.nodeMetrics.filter(
(data) => data.subCluster == subCluster.name,
),
)}
/>
{/key}
</div>
</Col>
</Row>
{/each}
<hr />
<!-- Usage Stats as Histograms -->
<Row cols={{ lg: 4, md: 2, sm: 1 }}>
<Col class="p-2">
<div bind:clientWidth={colWidth}>
<h4 class="text-center">
Top Users on {cluster.charAt(0).toUpperCase() + cluster.slice(1)}
</h4>
{#key $topUserQuery.data}
{#if $topUserQuery.fetching}
<Spinner />
{:else if $topUserQuery.error}
<Card body color="danger">{$topUserQuery.error.message}</Card>
{:else}
<Pie
size={colWidth}
sliceLabel={topUserSelection.label}
quantities={$topUserQuery.data.topUser.map(
(tu) => tu[topUserSelection.key],
)}
entities={$topUserQuery.data.topUser.map((tu) => tu.id)}
/>
{/if}
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key $topUserQuery.data}
{#if $topUserQuery.fetching}
<Spinner />
{:else if $topUserQuery.error}
<Card body color="danger">{$topUserQuery.error.message}</Card>
{:else}
<Table>
<tr class="mb-2">
<th>Legend</th>
<th>User Name</th>
<th
>Number of
<select class="p-0" bind:value={topUserSelection}>
{#each topOptions as option}
<option value={option}>
{option.label}
</option>
{/each}
</select>
</th>
</tr>
{#each $topUserQuery.data.topUser as tu, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors[i]};" /></td>
<th scope="col"
><a
href="/monitoring/user/{tu.id}?cluster={cluster}&state=running"
>{tu.id}</a
></th
>
<td>{tu[topUserSelection.key]}</td>
</tr>
{/each}
</Table>
{/if}
{/key}
</Col>
<Col class="p-2">
<h4 class="text-center">
Top Projects on {cluster.charAt(0).toUpperCase() + cluster.slice(1)}
</h4>
{#key $topProjectQuery.data}
{#if $topProjectQuery.fetching}
<Spinner />
{:else if $topProjectQuery.error}
<Card body color="danger">{$topProjectQuery.error.message}</Card>
{:else}
<Pie
size={colWidth}
sliceLabel={topProjectSelection.label}
quantities={$topProjectQuery.data.topProjects.map(
(tp) => tp[topProjectSelection.key],
)}
entities={$topProjectQuery.data.topProjects.map((tp) => tp.id)}
/>
{/if}
{/key}
</Col>
<Col class="px-4 py-2">
{#key $topProjectQuery.data}
{#if $topProjectQuery.fetching}
<Spinner />
{:else if $topProjectQuery.error}
<Card body color="danger">{$topProjectQuery.error.message}</Card>
{:else}
<Table>
<tr class="mb-2">
<th>Legend</th>
<th>Project Code</th>
<th
>Number of
<select class="p-0" bind:value={topProjectSelection}>
{#each topOptions as option}
<option value={option}>
{option.label}
</option>
{/each}
</select>
</th>
</tr>
{#each $topProjectQuery.data.topProjects as tp, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors[i]};" /></td>
<th scope="col"
><a
href="/monitoring/jobs/?cluster={cluster}&state=running&project={tp.id}&projectMatch=eq"
>{tp.id}</a
></th
>
<td>{tp[topProjectSelection.key]}</td>
</tr>
{/each}
</Table>
{/if}
{/key}
</Col>
</Row>
<hr class="my-2" />
<Row cols={{ lg: 2, md: 1 }}>
<Col class="p-2">
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histDuration)}
title="Duration Distribution"
xlabel="Current Runtimes"
xunit="Hours"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</Col>
<Col class="p-2">
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histNumNodes)}
title="Number of Nodes Distribution"
xlabel="Allocated Nodes"
xunit="Nodes"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</Col>
</Row>
<Row cols={{ lg: 2, md: 1 }}>
<Col class="p-2">
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histNumCores)}
title="Number of Cores Distribution"
xlabel="Allocated Cores"
xunit="Cores"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</Col>
<Col class="p-2">
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histNumAccs)}
title="Number of Accelerators Distribution"
xlabel="Allocated Accs"
xunit="Accs"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</Col>
</Row>
<hr class="my-2" />
{#if metricsInHistograms}
{#key $mainQuery.data.stats[0].histMetrics}
<PlotGrid
let:item
renderFor="user"
items={$mainQuery.data.stats[0].histMetrics}
itemsPerRow={2}
>
<Histogram
data={convert2uplot(item.data)}
usesBins={true}
title="Distribution of '{item.metric}' averages"
xlabel={`${item.metric} bin maximum ${item?.unit ? `[${item.unit}]` : ``}`}
xunit={item.unit}
ylabel="Number of Jobs"
yunit="Jobs"
/>
</PlotGrid>
{/key}
{/if}
{/if}
<HistogramSelection
bind:cluster
bind:metricsInHistograms
bind:isOpen={isHistogramSelectionOpen}
/>