mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-12-16 20:26:16 +01:00
Add public dashboard and route, add DoubleMetricPlot and GQL queries
- add roofline legend display switch - small fixes
This commit is contained in:
@@ -74,5 +74,6 @@ export default [
|
||||
entrypoint('node', 'src/node.entrypoint.js'),
|
||||
entrypoint('analysis', 'src/analysis.entrypoint.js'),
|
||||
entrypoint('status', 'src/status.entrypoint.js'),
|
||||
entrypoint('dashpublic', 'src/dashpublic.entrypoint.js'),
|
||||
entrypoint('config', 'src/config.entrypoint.js')
|
||||
];
|
||||
|
||||
671
web/frontend/src/DashPublic.root.svelte
Normal file
671
web/frontend/src/DashPublic.root.svelte
Normal file
@@ -0,0 +1,671 @@
|
||||
<!--
|
||||
@component Main cluster status view component; renders current system-usage information
|
||||
|
||||
Properties:
|
||||
- `presetCluster String`: The cluster to show status information for
|
||||
-->
|
||||
|
||||
<script>
|
||||
import {
|
||||
getContext
|
||||
} from "svelte"
|
||||
import {
|
||||
queryStore,
|
||||
gql,
|
||||
getContextClient,
|
||||
} from "@urql/svelte";
|
||||
import {
|
||||
init,
|
||||
scramble,
|
||||
scrambleNames,
|
||||
convert2uplot
|
||||
} from "./generic/utils.js";
|
||||
import {
|
||||
formatDurationTime,
|
||||
formatNumber,
|
||||
} from "./generic/units.js";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
Card,
|
||||
CardTitle,
|
||||
CardHeader,
|
||||
CardBody,
|
||||
Spinner,
|
||||
Table,
|
||||
Progress,
|
||||
Icon,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import Roofline from "./generic/plots/Roofline.svelte";
|
||||
import Pie, { colors } from "./generic/plots/Pie.svelte";
|
||||
import Stacked from "./generic/plots/Stacked.svelte";
|
||||
// import Histogram from "./generic/plots/Histogram.svelte";
|
||||
import DoubleMetric from "./generic/plots/DoubleMetricPlot.svelte";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
presetCluster,
|
||||
} = $props();
|
||||
|
||||
/*Const Init */
|
||||
const { query: initq } = init();
|
||||
const client = getContextClient();
|
||||
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
|
||||
|
||||
/* States */
|
||||
let pagingState = $state({page: 1, itemsPerPage: 10}) // Top 10
|
||||
let from = $state(new Date(Date.now() - (5 * 60 * 1000)));
|
||||
let clusterFrom = $state(new Date(Date.now() - (8 * 60 * 60 * 1000)));
|
||||
let to = $state(new Date(Date.now()));
|
||||
let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400);
|
||||
let colWidthStates = $state(0);
|
||||
let colWidthRoof = $state(0);
|
||||
let colWidthTotals = $state(0);
|
||||
let colWidthStacked = $state(0);
|
||||
|
||||
/* Derived */
|
||||
// States for Stacked charts
|
||||
const statesTimed = $derived(queryStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
query ($filter: [NodeFilter!], $typeNode: String!, $typeHealth: String!) {
|
||||
nodeStates: nodeStatesTimed(filter: $filter, type: $typeNode) {
|
||||
state
|
||||
counts
|
||||
times
|
||||
}
|
||||
healthStates: nodeStatesTimed(filter: $filter, type: $typeHealth) {
|
||||
state
|
||||
counts
|
||||
times
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: {
|
||||
filter: { cluster: { eq: presetCluster }, timeStart: 1760096999}, // DEBUG VALUE, use StackedFrom
|
||||
typeNode: "node",
|
||||
typeHealth: "health"
|
||||
},
|
||||
requestPolicy: "network-only"
|
||||
}));
|
||||
|
||||
// Note: nodeMetrics are requested on configured $timestep resolution
|
||||
// Result: The latest 5 minutes (datapoints) for each node independent of job
|
||||
const statusQuery = $derived(queryStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
query (
|
||||
$cluster: String!
|
||||
$metrics: [String!]
|
||||
$from: Time!
|
||||
$to: Time!
|
||||
$clusterFrom: Time!
|
||||
$jobFilter: [JobFilter!]!
|
||||
$nodeFilter: [NodeFilter!]!
|
||||
$paging: PageRequest!
|
||||
$sorting: OrderByInput!
|
||||
) {
|
||||
# Node 5 Minute Averages for Roofline
|
||||
nodeMetrics(
|
||||
cluster: $cluster
|
||||
metrics: $metrics
|
||||
from: $from
|
||||
to: $to
|
||||
) {
|
||||
host
|
||||
subCluster
|
||||
metrics {
|
||||
name
|
||||
metric {
|
||||
series {
|
||||
statistics {
|
||||
avg
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# Running Job Metric Average for Rooflines
|
||||
jobsMetricStats(filter: $jobFilter, metrics: $metrics) {
|
||||
id
|
||||
jobId
|
||||
duration
|
||||
numNodes
|
||||
numAccelerators
|
||||
subCluster
|
||||
stats {
|
||||
name
|
||||
data {
|
||||
avg
|
||||
}
|
||||
}
|
||||
}
|
||||
# Get Jobs for Per-Node Counts
|
||||
jobs(filter: $jobFilter, order: $sorting, page: $paging) {
|
||||
items {
|
||||
jobId
|
||||
resources {
|
||||
hostname
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
# Only counts shared nodes once
|
||||
allocatedNodes(cluster: $cluster) {
|
||||
name
|
||||
count
|
||||
}
|
||||
# Get Current States fir Pie Charts
|
||||
nodeStates(filter: $nodeFilter) {
|
||||
state
|
||||
count
|
||||
}
|
||||
# Get States for Node Roofline; $sorting unused in backend: Use placeholder
|
||||
nodes(filter: $nodeFilter, order: $sorting) {
|
||||
count
|
||||
items {
|
||||
hostname
|
||||
cluster
|
||||
subCluster
|
||||
schedulerState
|
||||
}
|
||||
}
|
||||
# totalNodes includes multiples if shared jobs: Info-Card Data
|
||||
jobsStatistics(
|
||||
filter: $jobFilter
|
||||
page: $paging
|
||||
sortBy: TOTALJOBS
|
||||
groupBy: SUBCLUSTER
|
||||
) {
|
||||
id
|
||||
totalJobs
|
||||
totalUsers
|
||||
totalCores
|
||||
totalAccs
|
||||
}
|
||||
# TEST
|
||||
clusterMetrics(
|
||||
cluster: $cluster
|
||||
metrics: $metrics
|
||||
from: $clusterFrom
|
||||
to: $to
|
||||
) {
|
||||
nodeCount
|
||||
metrics {
|
||||
name
|
||||
unit {
|
||||
prefix
|
||||
base
|
||||
}
|
||||
timestep
|
||||
data
|
||||
}
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: {
|
||||
cluster: presetCluster,
|
||||
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
|
||||
from: from.toISOString(),
|
||||
clusterFrom: clusterFrom.toISOString(),
|
||||
to: to.toISOString(),
|
||||
jobFilter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
|
||||
nodeFilter: { cluster: { eq: presetCluster }},
|
||||
paging: { itemsPerPage: -1, page: 1 }, // Get all: -1
|
||||
sorting: { field: "startTime", type: "col", order: "DESC" }
|
||||
},
|
||||
requestPolicy: "network-only"
|
||||
}));
|
||||
|
||||
const topJobsQuery = $derived(queryStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
query (
|
||||
$filter: [JobFilter!]!
|
||||
$paging: PageRequest!
|
||||
) {
|
||||
jobsStatistics(
|
||||
filter: $filter
|
||||
page: $paging
|
||||
sortBy: TOTALJOBS
|
||||
groupBy: PROJECT
|
||||
) {
|
||||
id
|
||||
totalJobs
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: {
|
||||
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster} }],
|
||||
paging: pagingState // Top 10
|
||||
},
|
||||
requestPolicy: "network-only"
|
||||
}));
|
||||
|
||||
// Note: nodeMetrics are requested on configured $timestep resolution
|
||||
const nodeStatusQuery = $derived(queryStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
query (
|
||||
$filter: [JobFilter!]!
|
||||
$selectedHistograms: [String!]
|
||||
$numDurationBins: String
|
||||
) {
|
||||
jobsStatistics(filter: $filter, metrics: $selectedHistograms, numDurationBins: $numDurationBins) {
|
||||
histNumCores {
|
||||
count
|
||||
value
|
||||
}
|
||||
histNumAccs {
|
||||
count
|
||||
value
|
||||
}
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: {
|
||||
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
|
||||
selectedHistograms: [], // No Metrics requested for node hardware stats - Empty Array can be used for refresh
|
||||
numDurationBins: "1h", // Hardcode or selector?
|
||||
},
|
||||
requestPolicy: "network-only"
|
||||
}));
|
||||
|
||||
const clusterInfo = $derived.by(() => {
|
||||
if ($initq?.data?.clusters) {
|
||||
let rawInfos = {};
|
||||
let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || [];
|
||||
for (let subCluster of subClusters) {
|
||||
// Allocations
|
||||
if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
|
||||
else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
|
||||
|
||||
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
|
||||
else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
|
||||
|
||||
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
|
||||
else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
|
||||
|
||||
// Infos
|
||||
if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]);
|
||||
else rawInfos['processorTypes'].add(subCluster.processorType);
|
||||
|
||||
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
|
||||
else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
|
||||
|
||||
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
|
||||
else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
|
||||
|
||||
if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0;
|
||||
else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0;
|
||||
|
||||
if (!rawInfos['totalCores']) rawInfos['totalCores'] = (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
|
||||
else rawInfos['totalCores'] += (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
|
||||
|
||||
if (!rawInfos['totalAccs']) rawInfos['totalAccs'] = (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
|
||||
else rawInfos['totalAccs'] += (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
|
||||
|
||||
// Units (Set Once)
|
||||
if (!rawInfos['flopRateUnit']) rawInfos['flopRateUnit'] = subCluster.flopRateSimd.unit.prefix + subCluster.flopRateSimd.unit.base
|
||||
if (!rawInfos['memBwRateUnit']) rawInfos['memBwRateUnit'] = subCluster.memoryBandwidth.unit.prefix + subCluster.memoryBandwidth.unit.base
|
||||
|
||||
// Get Maxima For Roofline Knee Render
|
||||
if (!rawInfos['roofData']) {
|
||||
rawInfos['roofData'] = {
|
||||
flopRateScalar: {value: subCluster.flopRateScalar.value},
|
||||
flopRateSimd: {value: subCluster.flopRateSimd.value},
|
||||
memoryBandwidth: {value: subCluster.memoryBandwidth.value}
|
||||
};
|
||||
} else {
|
||||
rawInfos['roofData']['flopRateScalar']['value'] = Math.max(rawInfos['roofData']['flopRateScalar']['value'], subCluster.flopRateScalar.value)
|
||||
rawInfos['roofData']['flopRateSimd']['value'] = Math.max(rawInfos['roofData']['flopRateSimd']['value'], subCluster.flopRateSimd.value)
|
||||
rawInfos['roofData']['memoryBandwidth']['value'] = Math.max(rawInfos['roofData']['memoryBandwidth']['value'], subCluster.memoryBandwidth.value)
|
||||
}
|
||||
}
|
||||
|
||||
// Keymetrics (Data on Cluster-Scope)
|
||||
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
|
||||
sum + (node.metrics.find((m) => m.name == 'flops_any')?.metric?.series[0]?.statistics?.avg || 0),
|
||||
0, // Initial Value
|
||||
) || 0;
|
||||
rawInfos['flopRate'] = Math.floor((rawFlops * 100) / 100)
|
||||
|
||||
let rawMemBw = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
|
||||
sum + (node.metrics.find((m) => m.name == 'mem_bw')?.metric?.series[0]?.statistics?.avg || 0),
|
||||
0, // Initial Value
|
||||
) || 0;
|
||||
rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100)
|
||||
|
||||
return rawInfos
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
});
|
||||
|
||||
const refinedStateData = $derived.by(() => {
|
||||
return $statusQuery?.data?.nodeStates.
|
||||
filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)).
|
||||
sort((a, b) => b.count - a.count)
|
||||
});
|
||||
|
||||
/* Functions */
|
||||
function legendColors(targetIdx, useAltColors) {
|
||||
// Reuses first color if targetIdx overflows
|
||||
let c;
|
||||
if (useCbColors) {
|
||||
c = [...colors['colorblind']];
|
||||
} else if (useAltColors) {
|
||||
c = [...colors['alternative']];
|
||||
} else {
|
||||
c = [...colors['default']];
|
||||
}
|
||||
return c[(c.length + targetIdx) % c.length];
|
||||
}
|
||||
|
||||
function transformNodesStatsToData(subclusterData) {
|
||||
let data = null
|
||||
const x = [], y = []
|
||||
|
||||
if (subclusterData) {
|
||||
for (let i = 0; i < subclusterData.length; i++) {
|
||||
const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any")
|
||||
const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw")
|
||||
|
||||
const f = flopsData.metric.series[0].statistics.avg
|
||||
const m = memBwData.metric.series[0].statistics.avg
|
||||
|
||||
let intensity = f / m
|
||||
if (Number.isNaN(intensity) || !Number.isFinite(intensity)) {
|
||||
intensity = 0.0 // Set to Float Zero: Will not show in Log-Plot (Always below render limit)
|
||||
}
|
||||
|
||||
x.push(intensity)
|
||||
y.push(f)
|
||||
}
|
||||
} else {
|
||||
// console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
|
||||
}
|
||||
|
||||
if (x.length > 0 && y.length > 0) {
|
||||
data = [null, [x, y]] // for dataformat see roofline.svelte
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
function transformNodesStatsToInfo(subClusterData) {
|
||||
let result = [];
|
||||
if (subClusterData) { // && $nodesState?.data) {
|
||||
// Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API!
|
||||
for (let j = 0; j < subClusterData.length; j++) {
|
||||
const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown"
|
||||
const nodeMatch = $statusQuery?.data?.nodes?.items?.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster);
|
||||
const schedulerState = nodeMatch?.schedulerState ? nodeMatch.schedulerState : "notindb"
|
||||
let numJobs = 0
|
||||
|
||||
if ($statusQuery?.data) {
|
||||
const nodeJobs = $statusQuery?.data?.jobs?.items?.filter((job) => job.resources.find((res) => res.hostname == nodeName))
|
||||
numJobs = nodeJobs?.length ? nodeJobs.length : 0
|
||||
}
|
||||
|
||||
result.push({nodeName: nodeName, schedulerState: schedulerState, numJobs: numJobs})
|
||||
};
|
||||
};
|
||||
return result
|
||||
}
|
||||
|
||||
/* Inspect */
|
||||
$inspect(clusterInfo).with((type, clusterInfo) => {
|
||||
console.log(type, 'clusterInfo', clusterInfo)
|
||||
});
|
||||
|
||||
$inspect($statusQuery?.data?.clusterMetrics).with((type, clusterMetrics) => {
|
||||
console.log(type, 'clusterMetrics', clusterMetrics)
|
||||
});
|
||||
|
||||
</script>
|
||||
|
||||
<Card style="height: 98vh;">
|
||||
<CardHeader class="text-center">
|
||||
<h3 class="mb-0">{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)} Dashboard</h3>
|
||||
</CardHeader>
|
||||
<CardBody>
|
||||
{#if $statusQuery.fetching || $statesTimed.fetching || $topJobsQuery.fetching || $nodeStatusQuery.fetching}
|
||||
<Row class="justify-content-center">
|
||||
<Col xs="auto">
|
||||
<Spinner />
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
{:else if $statusQuery.error || $statesTimed.error || $topJobsQuery.error || $nodeStatusQuery.error}
|
||||
<Row cols={{xs:1, md:2}}>
|
||||
{#if $statusQuery.error}
|
||||
<Col>
|
||||
<Card color="danger">Error Requesting StatusQuery: {$statusQuery.error.message}</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
{#if $statesTimed.error}
|
||||
<Col>
|
||||
<Card color="danger">Error Requesting StatesTimed: {$statesTimed.error.message}</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
{#if $topJobsQuery.error}
|
||||
<Col>
|
||||
<Card color="danger">Error Requesting TopJobsQuery: {$topJobsQuery.error.message}</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
{#if $nodeStatusQuery.error}
|
||||
<Col>
|
||||
<Card color="danger">Error Requesting NodeStatusQuery: {$nodeStatusQuery.error.message}</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
</Row>
|
||||
|
||||
{:else}
|
||||
<Row cols={{xs:1, md:2, xl: 3}}>
|
||||
<Col> <!-- Info Card -->
|
||||
<Card class="h-auto mt-1">
|
||||
<CardHeader>
|
||||
<CardTitle class="mb-0">Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</CardTitle>
|
||||
<span>{[...clusterInfo?.processorTypes].toString()}</span>
|
||||
</CardHeader>
|
||||
<CardBody>
|
||||
<Table borderless>
|
||||
<tr class="py-2">
|
||||
<td style="font-size:x-large;">{clusterInfo?.runningJobs} Running Jobs</td>
|
||||
<td colspan="2" style="font-size:x-large;">{clusterInfo?.activeUsers} Active Users</td>
|
||||
</tr>
|
||||
<hr class="my-1"/>
|
||||
<tr class="pt-2">
|
||||
<td style="font-size: large;">
|
||||
Flop Rate (<span style="cursor: help;" title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]">Any</span>)
|
||||
</td>
|
||||
<td colspan="2" style="font-size: large;">
|
||||
Memory BW Rate
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="pb-2">
|
||||
<td style="font-size:x-large;">
|
||||
{clusterInfo?.flopRate}
|
||||
{clusterInfo?.flopRateUnit}
|
||||
</td>
|
||||
<td colspan="2" style="font-size:x-large;">
|
||||
{clusterInfo?.memBwRate}
|
||||
{clusterInfo?.memBwRateUnit}
|
||||
</td>
|
||||
</tr>
|
||||
<hr class="my-1"/>
|
||||
<tr class="py-2">
|
||||
<th scope="col">Allocated Nodes</th>
|
||||
<td style="min-width: 100px;"
|
||||
><div class="col">
|
||||
<Progress
|
||||
value={clusterInfo?.allocatedNodes}
|
||||
max={clusterInfo?.totalNodes}
|
||||
/>
|
||||
</div></td
|
||||
>
|
||||
<td
|
||||
>{clusterInfo?.allocatedNodes} / {clusterInfo?.totalNodes}
|
||||
Nodes</td
|
||||
>
|
||||
</tr>
|
||||
<tr class="py-2">
|
||||
<th scope="col">Allocated Cores</th>
|
||||
<td style="min-width: 100px;"
|
||||
><div class="col">
|
||||
<Progress
|
||||
value={clusterInfo?.allocatedCores}
|
||||
max={clusterInfo?.totalCores}
|
||||
/>
|
||||
</div></td
|
||||
>
|
||||
<td
|
||||
>{formatNumber(clusterInfo?.allocatedCores)} / {formatNumber(clusterInfo?.totalCores)}
|
||||
Cores</td
|
||||
>
|
||||
</tr>
|
||||
{#if clusterInfo?.totalAccs !== 0}
|
||||
<tr class="py-2">
|
||||
<th scope="col">Allocated Accelerators</th>
|
||||
<td style="min-width: 100px;"
|
||||
><div class="col">
|
||||
<Progress
|
||||
value={clusterInfo?.allocatedAccs}
|
||||
max={clusterInfo?.totalAccs}
|
||||
/>
|
||||
</div></td
|
||||
>
|
||||
<td
|
||||
>{clusterInfo?.allocatedAccs} / {clusterInfo?.totalAccs}
|
||||
Accelerators</td
|
||||
>
|
||||
</tr>
|
||||
{/if}
|
||||
</Table>
|
||||
</CardBody>
|
||||
</Card>
|
||||
</Col>
|
||||
<Col> <!-- Pie Last States -->
|
||||
<Row>
|
||||
<Col class="px-3 mt-2 mt-lg-0">
|
||||
<div bind:clientWidth={colWidthStates}>
|
||||
{#key refinedStateData}
|
||||
<h4 class="text-center">
|
||||
Current Node States
|
||||
</h4>
|
||||
<Pie
|
||||
useAltColors
|
||||
canvasId="hpcpie-slurm"
|
||||
size={colWidthStates * 0.75}
|
||||
sliceLabel="Nodes"
|
||||
quantities={refinedStateData.map(
|
||||
(sd) => sd.count,
|
||||
)}
|
||||
entities={refinedStateData.map(
|
||||
(sd) => sd.state,
|
||||
)}
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
</Col>
|
||||
<Col class="px-4 py-2">
|
||||
{#key refinedStateData}
|
||||
<Table>
|
||||
<tr class="mb-2">
|
||||
<th></th>
|
||||
<th>Current State</th>
|
||||
<th>Nodes</th>
|
||||
</tr>
|
||||
{#each refinedStateData as sd, i}
|
||||
<tr>
|
||||
<td><Icon name="circle-fill" style="color: {legendColors(i, true)};"/></td>
|
||||
<td>{sd.state}</td>
|
||||
<td>{sd.count}</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</Table>
|
||||
{/key}
|
||||
</Col>
|
||||
</Row>
|
||||
</Col>
|
||||
<Col> <!-- General Cluster Info Card? -->
|
||||
<!-- TODO -->
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle>Infos</CardTitle>
|
||||
</CardHeader>
|
||||
<CardBody>
|
||||
Contents
|
||||
</CardBody>
|
||||
</Card>
|
||||
</Col>
|
||||
<Col> <!-- Nodes Roofline -->
|
||||
<div bind:clientWidth={colWidthRoof}>
|
||||
{#key $statusQuery?.data?.nodeMetrics}
|
||||
<Roofline
|
||||
useColors={false}
|
||||
useLegend={false}
|
||||
allowSizeChange
|
||||
width={colWidthRoof - 10}
|
||||
height={300}
|
||||
cluster={presetCluster}
|
||||
subCluster={clusterInfo?.roofData ? clusterInfo.roofData : null}
|
||||
roofData={transformNodesStatsToData($statusQuery?.data?.nodeMetrics)}
|
||||
nodesData={transformNodesStatsToInfo($statusQuery?.data?.nodeMetrics)}
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
</Col>
|
||||
<Col> <!-- Resources/Job Histogram OR Total Cluster Metric in Time SUMS-->
|
||||
<div bind:clientWidth={colWidthTotals}>
|
||||
<DoubleMetric
|
||||
width={colWidthTotals}
|
||||
timestep={$statusQuery?.data?.clusterMetrics[0]?.timestep || 60}
|
||||
numNodes={$statusQuery?.data?.clusterMetrics?.nodeCount || 0}
|
||||
metricData={$statusQuery?.data?.clusterMetrics?.metrics || []}
|
||||
cluster={presetCluster}
|
||||
/>
|
||||
</div>
|
||||
<!-- {#if clusterInfo?.totalAccs == 0}
|
||||
<Histogram
|
||||
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumCores)}
|
||||
title="Number of Cores Distribution"
|
||||
xlabel="Allocated Cores"
|
||||
xunit="Nodes"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
height="275"
|
||||
enableFlip
|
||||
/>
|
||||
{:else}
|
||||
<Histogram
|
||||
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumAccs)}
|
||||
title="Number of Accelerators Distribution"
|
||||
xlabel="Allocated Accs"
|
||||
xunit="Accs"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
height="275"
|
||||
enableFlip
|
||||
/>
|
||||
{/if} -->
|
||||
</Col>
|
||||
<Col> <!-- Stacked SchedState -->
|
||||
<div bind:clientWidth={colWidthStacked}>
|
||||
{#key $statesTimed?.data?.nodeStates}
|
||||
<Stacked
|
||||
data={$statesTimed?.data?.nodeStates}
|
||||
width={colWidthStacked * 0.95}
|
||||
xlabel="Time"
|
||||
ylabel="Nodes"
|
||||
yunit = "#Count"
|
||||
title = "Node States"
|
||||
stateType = "Node"
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
</Col>
|
||||
</Row>
|
||||
{/if}
|
||||
</CardBody>
|
||||
</Card>
|
||||
@@ -120,7 +120,7 @@
|
||||
href: "/monitoring/status/",
|
||||
icon: "clipboard-data",
|
||||
perCluster: true,
|
||||
listOptions: false,
|
||||
listOptions: true,
|
||||
menu: "Info",
|
||||
},
|
||||
];
|
||||
|
||||
@@ -6,77 +6,43 @@
|
||||
-->
|
||||
|
||||
<script>
|
||||
import {
|
||||
getContext
|
||||
} from "svelte"
|
||||
import {
|
||||
init,
|
||||
} from "./generic/utils.js";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
Card,
|
||||
CardBody,
|
||||
TabContent,
|
||||
TabPane,
|
||||
Spinner
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
|
||||
import StatusDash from "./status/StatusDash.svelte";
|
||||
import UsageDash from "./status/UsageDash.svelte";
|
||||
import StatisticsDash from "./status/StatisticsDash.svelte";
|
||||
import DashDetails from "./status/DashDetails.svelte";
|
||||
import DashInternal from "./status/DashInternal.svelte";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
presetCluster
|
||||
presetCluster,
|
||||
displayType
|
||||
} = $props();
|
||||
|
||||
/*Const Init */
|
||||
const { query: initq } = init();
|
||||
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
|
||||
const displayStatusDetail = (displayType === 'DETAILS');
|
||||
</script>
|
||||
|
||||
<!-- Loading indicator & Refresh -->
|
||||
|
||||
<Row cols={1} class="mb-2">
|
||||
<!-- <Row cols={1} class="mb-2">
|
||||
<Col>
|
||||
<h3 class="mb-0">Current Status of Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</h3>
|
||||
</Col>
|
||||
</Row>
|
||||
</Row> -->
|
||||
|
||||
|
||||
{#if $initq.fetching}
|
||||
<Row cols={1} class="text-center mt-3">
|
||||
{#if displayType !== "DASHBOARD" && displayType !== "DETAILS"}
|
||||
<Row>
|
||||
<Col>
|
||||
<Spinner />
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $initq.error}
|
||||
<Row cols={1} class="text-center mt-3">
|
||||
<Col>
|
||||
<Card body color="danger">{$initq.error.message}</Card>
|
||||
<Card body color="danger">Unknown displayList type! </Card>
|
||||
</Col>
|
||||
</Row>
|
||||
{:else}
|
||||
<Card class="overflow-auto" style="height: auto;">
|
||||
<TabContent>
|
||||
<TabPane tabId="status-dash" tab="Status" active>
|
||||
<CardBody>
|
||||
<StatusDash clusters={$initq.data.clusters} {presetCluster} {useCbColors} useAltColors></StatusDash>
|
||||
</CardBody>
|
||||
</TabPane>
|
||||
|
||||
<TabPane tabId="usage-dash" tab="Usage">
|
||||
<CardBody>
|
||||
<UsageDash {presetCluster} {useCbColors}></UsageDash>
|
||||
</CardBody>
|
||||
</TabPane>
|
||||
|
||||
<TabPane tabId="metric-dash" tab="Statistics">
|
||||
<CardBody>
|
||||
<StatisticsDash {presetCluster} {useCbColors}></StatisticsDash>
|
||||
</CardBody>
|
||||
</TabPane>
|
||||
</TabContent>
|
||||
</Card>
|
||||
{#if displayStatusDetail}
|
||||
<!-- ROW2-1: Node Overview (Grid Included)-->
|
||||
<DashDetails {presetCluster}/>
|
||||
{:else}
|
||||
<!-- ROW2-2: Node List (Grid Included)-->
|
||||
<DashInternal {presetCluster}/>
|
||||
{/if}
|
||||
{/if}
|
||||
|
||||
13
web/frontend/src/dashpublic.entrypoint.js
Normal file
13
web/frontend/src/dashpublic.entrypoint.js
Normal file
@@ -0,0 +1,13 @@
|
||||
import { mount } from 'svelte';
|
||||
// import {} from './header.entrypoint.js'
|
||||
import DashPublic from './DashPublic.root.svelte'
|
||||
|
||||
mount(DashPublic, {
|
||||
target: document.getElementById('svelte-app'),
|
||||
props: {
|
||||
presetCluster: infos.cluster,
|
||||
},
|
||||
context: new Map([
|
||||
['cc-config', clusterCockpitConfig]
|
||||
])
|
||||
})
|
||||
640
web/frontend/src/generic/plots/DoubleMetricPlot.svelte
Normal file
640
web/frontend/src/generic/plots/DoubleMetricPlot.svelte
Normal file
@@ -0,0 +1,640 @@
|
||||
<!--
|
||||
@component Main plot component, based on uPlot; metricdata values by time
|
||||
|
||||
Only width/height should change reactively.
|
||||
|
||||
Properties:
|
||||
- `metric String`: The metric name
|
||||
- `scope String?`: Scope of the displayed data [Default: node]
|
||||
- `height Number?`: The plot height [Default: 300]
|
||||
- `timestep Number`: The timestep used for X-axis rendering
|
||||
- `series [GraphQL.Series]`: The metric data object
|
||||
- `statisticsSeries [GraphQL.StatisticsSeries]?`: Min/Max/Median representation of metric data [Default: null]
|
||||
- `cluster String?`: Cluster name of the parent job / data [Default: ""]
|
||||
- `subCluster String`: Name of the subCluster of the parent job
|
||||
- `isShared Bool?`: If this job used shared resources; for additional legend display [Default: false]
|
||||
- `forNode Bool?`: If this plot is used for node data display; will render x-axis as negative time with $now as maximum [Default: false]
|
||||
- `numhwthreads Number?`: Number of job HWThreads [Default: 0]
|
||||
- `numaccs Number?`: Number of job Accelerators [Default: 0]
|
||||
- `zoomState Object?`: The last zoom state to preserve on user zoom [Default: null]
|
||||
- `thersholdState Object?`: The last threshold state to preserve on user zoom [Default: null]
|
||||
- `extendedLegendData Object?`: Additional information to be rendered in an extended legend [Default: null]
|
||||
- `onZoom Func`: Callback function to handle zoom-in event
|
||||
-->
|
||||
|
||||
<script>
|
||||
import uPlot from "uplot";
|
||||
import { formatNumber, formatDurationTime } from "../units.js";
|
||||
import { getContext, onMount, onDestroy } from "svelte";
|
||||
import { Card } from "@sveltestrap/sveltestrap";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
// metric,
|
||||
width = 0,
|
||||
height = 300,
|
||||
timestep,
|
||||
numNodes,
|
||||
metricData,
|
||||
// useStatsSeries = false,
|
||||
// statisticsSeries = null,
|
||||
cluster = "",
|
||||
forNode = true,
|
||||
// zoomState = null,
|
||||
// thresholdState = null,
|
||||
enableFlip = false,
|
||||
// onZoom
|
||||
} = $props();
|
||||
|
||||
/* Const Init */
|
||||
const clusterCockpitConfig = getContext("cc-config");
|
||||
// const resampleConfig = getContext("resampling");
|
||||
// const subClusterTopology = getContext("getHardwareTopology")(cluster, subCluster);
|
||||
// const metricConfig = getContext("getMetricConfig")(cluster, subCluster, metric);
|
||||
const lineColors = clusterCockpitConfig.plotConfiguration_colorScheme;
|
||||
const lineWidth = clusterCockpitConfig.plotConfiguration_lineWidth / window.devicePixelRatio;
|
||||
// const cbmode = clusterCockpitConfig?.plotConfiguration_colorblindMode || false;
|
||||
const renderSleepTime = 200;
|
||||
// const normalLineColor = "#000000";
|
||||
// const backgroundColors = {
|
||||
// normal: "rgba(255, 255, 255, 1.0)",
|
||||
// caution: cbmode ? "rgba(239, 230, 69, 0.3)" : "rgba(255, 128, 0, 0.3)",
|
||||
// alert: cbmode ? "rgba(225, 86, 44, 0.3)" : "rgba(255, 0, 0, 0.3)",
|
||||
// };
|
||||
|
||||
/* Var Init */
|
||||
let timeoutId = null;
|
||||
|
||||
/* State Init */
|
||||
let plotWrapper = $state(null);
|
||||
let uplot = $state(null);
|
||||
|
||||
/* Derived */
|
||||
// const usesMeanStatsSeries = $derived((statisticsSeries?.mean && statisticsSeries.mean.length != 0));
|
||||
// const resampleTrigger = $derived(resampleConfig?.trigger ? Number(resampleConfig.trigger) : null);
|
||||
// const resampleResolutions = $derived(resampleConfig?.resolutions ? [...resampleConfig.resolutions] : null);
|
||||
// const resampleMinimum = $derived(resampleConfig?.resolutions ? Math.min(...resampleConfig.resolutions) : null);
|
||||
// const thresholds = $derived(findJobAggregationThresholds(
|
||||
// subClusterTopology,
|
||||
// metricConfig,
|
||||
// scope,
|
||||
// numhwthreads,
|
||||
// numaccs
|
||||
// ));
|
||||
const longestSeries = $derived.by(() => {
|
||||
// if (useStatsSeries) {
|
||||
// return usesMeanStatsSeries ? statisticsSeries?.mean?.length : statisticsSeries?.median?.length;
|
||||
// } else {
|
||||
return metricData.reduce((n, m) => Math.max(n, m.data.length), 0);
|
||||
// }
|
||||
});
|
||||
const maxX = $derived(longestSeries * timestep);
|
||||
// const maxY = $derived.by(() => {
|
||||
// let pendingY = 0;
|
||||
// // if (useStatsSeries) {
|
||||
// // pendingY = statisticsSeries.max.reduce(
|
||||
// // (max, x) => Math.max(max, x),
|
||||
// // thresholds?.normal,
|
||||
// // ) || thresholds?.normal
|
||||
// // } else {
|
||||
// pendingY = series.reduce(
|
||||
// (max, series) => Math.max(max, series?.statistics?.max),
|
||||
// thresholds?.normal,
|
||||
// ) || thresholds?.normal;
|
||||
// // }
|
||||
|
||||
// if (pendingY >= 10 * thresholds.peak) {
|
||||
// // Hard y-range render limit if outliers in series data
|
||||
// return (10 * thresholds.peak);
|
||||
// } else {
|
||||
// return pendingY;
|
||||
// }
|
||||
// });
|
||||
// const plotBands = $derived.by(() => {
|
||||
// if (useStatsSeries) {
|
||||
// return [
|
||||
// { series: [2, 3], fill: cbmode ? "rgba(0,0,255,0.1)" : "rgba(0,255,0,0.1)" },
|
||||
// { series: [3, 1], fill: cbmode ? "rgba(0,255,0,0.1)" : "rgba(255,0,0,0.1)" },
|
||||
// ];
|
||||
// };
|
||||
// return null;
|
||||
// })
|
||||
const plotData = $derived.by(() => {
|
||||
let pendingData = [new Array(longestSeries)];
|
||||
// X
|
||||
if (forNode === true) {
|
||||
// Negative Timestamp Buildup
|
||||
for (let i = 0; i <= longestSeries; i++) {
|
||||
pendingData[0][i] = (longestSeries - i) * timestep * -1;
|
||||
}
|
||||
} else {
|
||||
// Positive Timestamp Buildup
|
||||
for (let j = 0; j < longestSeries; j++) {
|
||||
pendingData[0][j] = j * timestep;
|
||||
};
|
||||
};
|
||||
// Y
|
||||
// if (useStatsSeries) {
|
||||
// pendingData.push(statisticsSeries.min);
|
||||
// pendingData.push(statisticsSeries.max);
|
||||
// if (usesMeanStatsSeries) {
|
||||
// pendingData.push(statisticsSeries.mean);
|
||||
// } else {
|
||||
// pendingData.push(statisticsSeries.median);
|
||||
// }
|
||||
|
||||
// } else {
|
||||
for (let i = 0; i < metricData.length; i++) {
|
||||
pendingData.push(metricData[i]?.data);
|
||||
};
|
||||
// };
|
||||
return pendingData;
|
||||
})
|
||||
const plotSeries = $derived.by(() => {
|
||||
let pendingSeries = [
|
||||
// Note: X-Legend Will not be shown as soon as Y-Axis are in extendedMode
|
||||
{
|
||||
label: "Runtime",
|
||||
value: (u, ts, sidx, didx) =>
|
||||
(didx == null) ? null : formatDurationTime(ts, forNode),
|
||||
}
|
||||
];
|
||||
// Y
|
||||
// if (useStatsSeries) {
|
||||
// pendingSeries.push({
|
||||
// label: "min",
|
||||
// scale: "y",
|
||||
// width: lineWidth,
|
||||
// stroke: cbmode ? "rgb(0,255,0)" : "red",
|
||||
// });
|
||||
// pendingSeries.push({
|
||||
// label: "max",
|
||||
// scale: "y",
|
||||
// width: lineWidth,
|
||||
// stroke: cbmode ? "rgb(0,0,255)" : "green",
|
||||
// });
|
||||
// pendingSeries.push({
|
||||
// label: usesMeanStatsSeries ? "mean" : "median",
|
||||
// scale: "y",
|
||||
// width: lineWidth,
|
||||
// stroke: "black",
|
||||
// });
|
||||
|
||||
// } else {
|
||||
for (let i = 0; i < metricData.length; i++) {
|
||||
// Default
|
||||
// if (!extendedLegendData) {
|
||||
pendingSeries.push({
|
||||
label: `${metricData[i]?.name} (${metricData[i]?.unit?.prefix}${metricData[i]?.unit?.base})`,
|
||||
scale: `y${i+1}`,
|
||||
width: lineWidth,
|
||||
stroke: lineColor(i, metricData.length),
|
||||
});
|
||||
// }
|
||||
// Extended Legend For NodeList
|
||||
// else {
|
||||
// pendingSeries.push({
|
||||
// label:
|
||||
// scope === "node"
|
||||
// ? series[i].hostname
|
||||
// : scope === "accelerator"
|
||||
// ? 'Acc #' + (i + 1) // series[i].id.slice(9, 14) | Too Hardware Specific
|
||||
// : scope + " #" + (i + 1),
|
||||
// scale: "y",
|
||||
// width: lineWidth,
|
||||
// stroke: lineColor(i, series?.length),
|
||||
// values: (u, sidx, idx) => {
|
||||
// // "i" = "sidx - 1" : sidx contains x-axis-data
|
||||
// if (idx == null)
|
||||
// return {
|
||||
// time: '-',
|
||||
// value: '-',
|
||||
// user: '-',
|
||||
// job: '-'
|
||||
// };
|
||||
|
||||
// if (series[i].id in extendedLegendData) {
|
||||
// return {
|
||||
// time: formatDurationTime(plotData[0][idx], forNode),
|
||||
// value: plotData[sidx][idx],
|
||||
// user: extendedLegendData[series[i].id].user,
|
||||
// job: extendedLegendData[series[i].id].job,
|
||||
// };
|
||||
// } else {
|
||||
// return {
|
||||
// time: formatDurationTime(plotData[0][idx], forNode),
|
||||
// value: plotData[sidx][idx],
|
||||
// user: '-',
|
||||
// job: '-',
|
||||
// };
|
||||
// }
|
||||
// }
|
||||
// });
|
||||
// }
|
||||
// };
|
||||
};
|
||||
return pendingSeries;
|
||||
})
|
||||
|
||||
/* Effects */
|
||||
// $effect(() => {
|
||||
// if (!useStatsSeries && statisticsSeries != null) useStatsSeries = true;
|
||||
// })
|
||||
|
||||
// This updates plot on all size changes if wrapper (== data) exists
|
||||
$effect(() => {
|
||||
if (plotWrapper) {
|
||||
onSizeChange(width, height);
|
||||
}
|
||||
});
|
||||
|
||||
/* Functions */
|
||||
function timeIncrs(timestep, maxX, forNode) {
|
||||
if (forNode === true) {
|
||||
return [60, 120, 240, 300, 360, 480, 600, 900, 1800, 3600, 7200, 14400, 21600]; // forNode fixed increments
|
||||
} else {
|
||||
let incrs = [];
|
||||
for (let t = timestep; t < maxX; t *= 10)
|
||||
incrs.push(t, t * 2, t * 3, t * 5);
|
||||
|
||||
return incrs;
|
||||
}
|
||||
}
|
||||
|
||||
// removed arg "subcluster": input metricconfig and topology now directly derived from subcluster
|
||||
// function findJobAggregationThresholds(
|
||||
// subClusterTopology,
|
||||
// metricConfig,
|
||||
// scope,
|
||||
// numhwthreads,
|
||||
// numaccs
|
||||
// ) {
|
||||
|
||||
// if (!subClusterTopology || !metricConfig || !scope) {
|
||||
// console.warn("Argument missing for findJobAggregationThresholds!");
|
||||
// return null;
|
||||
// }
|
||||
|
||||
// // handle special *-stat scopes
|
||||
// if (scope.match(/(.*)-stat$/)) {
|
||||
// const statParts = scope.split('-');
|
||||
// scope = statParts[0]
|
||||
// }
|
||||
|
||||
// if (metricConfig?.aggregation == "avg") {
|
||||
// // Return as Configured
|
||||
// return {
|
||||
// normal: metricConfig.normal,
|
||||
// caution: metricConfig.caution,
|
||||
// alert: metricConfig.alert,
|
||||
// peak: metricConfig.peak,
|
||||
// };
|
||||
// }
|
||||
|
||||
// if (metricConfig?.aggregation == "sum") {
|
||||
// // Scale Thresholds
|
||||
// let fraction;
|
||||
// if (numaccs > 0) fraction = subClusterTopology.accelerators.length / numaccs;
|
||||
// else if (numhwthreads > 0) fraction = subClusterTopology.core.length / numhwthreads;
|
||||
// else fraction = 1; // Fallback
|
||||
|
||||
// let divisor;
|
||||
// // Exclusive: Fraction = 1; Shared: Fraction > 1
|
||||
// if (scope == 'node') divisor = fraction;
|
||||
// // Cap divisor at number of available sockets or domains
|
||||
// else if (scope == 'socket') divisor = (fraction < subClusterTopology.socket.length) ? subClusterTopology.socket.length : fraction;
|
||||
// else if (scope == "memoryDomain") divisor = (fraction < subClusterTopology.memoryDomain.length) ? subClusterTopology.socket.length : fraction;
|
||||
// // Use Maximum Division for Smallest Scopes
|
||||
// else if (scope == "core") divisor = subClusterTopology.core.length;
|
||||
// else if (scope == "hwthread") divisor = subClusterTopology.core.length; // alt. name for core
|
||||
// else if (scope == "accelerator") divisor = subClusterTopology.accelerators.length;
|
||||
// else {
|
||||
// console.log('Unknown scope, return default aggregation thresholds for sum', scope)
|
||||
// divisor = 1;
|
||||
// }
|
||||
|
||||
// return {
|
||||
// peak: metricConfig.peak / divisor,
|
||||
// normal: metricConfig.normal / divisor,
|
||||
// caution: metricConfig.caution / divisor,
|
||||
// alert: metricConfig.alert / divisor,
|
||||
// };
|
||||
// }
|
||||
|
||||
// console.warn(
|
||||
// "Missing or unkown aggregation mode (sum/avg) for metric:",
|
||||
// metricConfig,
|
||||
// );
|
||||
// return null;
|
||||
// }
|
||||
|
||||
// UPLOT PLUGIN // converts the legend into a simple tooltip
|
||||
function legendAsTooltipPlugin({
|
||||
className,
|
||||
style = { backgroundColor: "rgba(255, 249, 196, 0.92)", color: "black" },
|
||||
} = {}) {
|
||||
let legendEl;
|
||||
const dataSize = metricData.length;
|
||||
|
||||
function init(u, opts) {
|
||||
legendEl = u.root.querySelector(".u-legend");
|
||||
|
||||
legendEl.classList.remove("u-inline");
|
||||
className && legendEl.classList.add(className);
|
||||
|
||||
uPlot.assign(legendEl.style, {
|
||||
minWidth: "100px",
|
||||
textAlign: "left",
|
||||
pointerEvents: "none",
|
||||
display: "none",
|
||||
position: "absolute",
|
||||
left: 0,
|
||||
top: 0,
|
||||
zIndex: 100,
|
||||
boxShadow: "2px 2px 10px rgba(0,0,0,0.5)",
|
||||
...style,
|
||||
});
|
||||
|
||||
// conditional hide series color markers:
|
||||
if (
|
||||
// useStatsSeries || // Min/Max/Median Self-Explanatory
|
||||
dataSize === 1 || // Only one Y-Dataseries
|
||||
dataSize > 8 // More than 8 Y-Dataseries
|
||||
) {
|
||||
const idents = legendEl.querySelectorAll(".u-marker");
|
||||
for (let i = 0; i < idents.length; i++)
|
||||
idents[i].style.display = "none";
|
||||
}
|
||||
|
||||
const overEl = u.over;
|
||||
overEl.style.overflow = "visible";
|
||||
|
||||
// move legend into plot bounds
|
||||
overEl.appendChild(legendEl);
|
||||
|
||||
// show/hide tooltip on enter/exit
|
||||
overEl.addEventListener("mouseenter", () => {
|
||||
legendEl.style.display = null;
|
||||
});
|
||||
overEl.addEventListener("mouseleave", () => {
|
||||
legendEl.style.display = "none";
|
||||
});
|
||||
|
||||
// let tooltip exit plot
|
||||
// overEl.style.overflow = "visible";
|
||||
}
|
||||
|
||||
function update(u) {
|
||||
const { left, top } = u.cursor;
|
||||
const internalWidth = u?.over?.querySelector(".u-legend")?.offsetWidth ? u.over.querySelector(".u-legend").offsetWidth : 0;
|
||||
if (enableFlip && (left < (width/2))) {
|
||||
legendEl.style.transform = "translate(" + (left + 15) + "px, " + (top + 15) + "px)";
|
||||
} else {
|
||||
legendEl.style.transform = "translate(" + (left - internalWidth - 15) + "px, " + (top + 15) + "px)";
|
||||
}
|
||||
}
|
||||
|
||||
if (dataSize <= 12 ) { // || useStatsSeries) {
|
||||
return {
|
||||
hooks: {
|
||||
init: init,
|
||||
setCursor: update,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
// Setting legend-opts show/live as object with false here will not work ...
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
// RETURN BG COLOR FROM THRESHOLD
|
||||
// function backgroundColor() {
|
||||
// if (
|
||||
// clusterCockpitConfig.plotConfiguration_colorBackground == false ||
|
||||
// // !thresholds ||
|
||||
// !(series && series.every((s) => s.statistics != null))
|
||||
// )
|
||||
// return backgroundColors.normal;
|
||||
|
||||
// let cond =
|
||||
// thresholds.alert < thresholds.caution
|
||||
// ? (a, b) => a <= b
|
||||
// : (a, b) => a >= b;
|
||||
|
||||
// let avg =
|
||||
// series.reduce((sum, series) => sum + series.statistics.avg, 0) /
|
||||
// series.length;
|
||||
|
||||
// if (Number.isNaN(avg)) return backgroundColors.normal;
|
||||
|
||||
// if (cond(avg, thresholds.alert)) return backgroundColors.alert;
|
||||
|
||||
// if (cond(avg, thresholds.caution)) return backgroundColors.caution;
|
||||
|
||||
// return backgroundColors.normal;
|
||||
// }
|
||||
|
||||
function lineColor(i, n) {
|
||||
if (n && n >= lineColors.length) return lineColors[i % lineColors.length];
|
||||
else return lineColors[Math.floor((i / n) * lineColors.length)];
|
||||
}
|
||||
|
||||
function render(ren_width, ren_height) {
|
||||
// Set Options
|
||||
const opts = {
|
||||
width,
|
||||
height,
|
||||
plugins: [legendAsTooltipPlugin()],
|
||||
series: plotSeries,
|
||||
axes: [
|
||||
{
|
||||
scale: "x",
|
||||
space: 35,
|
||||
incrs: timeIncrs(timestep, maxX, forNode),
|
||||
label: "Time",
|
||||
values: (_, vals) => vals.map((v) => formatDurationTime(v, forNode)),
|
||||
},
|
||||
{
|
||||
scale: "y1",
|
||||
grid: { show: true },
|
||||
label: `${metricData[0]?.name} (${metricData[0]?.unit?.prefix}${metricData[0]?.unit?.base})`,
|
||||
values: (u, vals) => vals.map((v) => formatNumber(v)),
|
||||
},
|
||||
{
|
||||
side: 1,
|
||||
scale: "y2",
|
||||
grid: { show: false },
|
||||
label: `${metricData[1]?.name} (${metricData[1]?.unit?.prefix}${metricData[1]?.unit?.base})`,
|
||||
values: (u, vals) => vals.map((v) => formatNumber(v)),
|
||||
},
|
||||
],
|
||||
// bands: plotBands,
|
||||
padding: [5, 10, -20, 0],
|
||||
hooks: {
|
||||
// init: [
|
||||
// (u) => {
|
||||
// /* IF Zoom Enabled */
|
||||
// if (resampleConfig && !forNode) {
|
||||
// u.over.addEventListener("dblclick", (e) => {
|
||||
// // console.log('Dispatch: Zoom Reset')
|
||||
// onZoom({
|
||||
// lastZoomState: {
|
||||
// x: { time: false },
|
||||
// y: { auto: true }
|
||||
// }
|
||||
// });
|
||||
// });
|
||||
// };
|
||||
// },
|
||||
// ],
|
||||
draw: [
|
||||
(u) => {
|
||||
// Draw plot type label:
|
||||
let textl = `Cluster ${cluster}`
|
||||
// let textl = `${scope}${plotSeries.length > 2 ? "s" : ""}${
|
||||
// useStatsSeries
|
||||
// ? (usesMeanStatsSeries ? ": min/mean/max" : ": min/median/max")
|
||||
// : metricConfig != null && scope != metricConfig.scope
|
||||
// ? ` (${metricConfig.aggregation})`
|
||||
// : ""
|
||||
// }`;
|
||||
let textr = `Sums of ${numNodes} nodes`
|
||||
//let textr = `${isShared && scope != "core" && scope != "accelerator" ? "[Shared]" : ""}`;
|
||||
u.ctx.save();
|
||||
u.ctx.textAlign = "start"; // 'end'
|
||||
u.ctx.fillStyle = "black";
|
||||
u.ctx.fillText(textl, u.bbox.left + 10, u.bbox.top + (forNode ? 0 : 10));
|
||||
u.ctx.textAlign = "end";
|
||||
u.ctx.fillStyle = "black";
|
||||
u.ctx.fillText(
|
||||
textr,
|
||||
u.bbox.left + u.bbox.width - 10,
|
||||
u.bbox.top + (forNode ? 0 : 10),
|
||||
);
|
||||
// u.ctx.fillText(text, u.bbox.left + u.bbox.width - 10, u.bbox.top + u.bbox.height - 10) // Recipe for bottom right
|
||||
|
||||
// if (!thresholds) {
|
||||
u.ctx.restore();
|
||||
return;
|
||||
// }
|
||||
|
||||
// let y = u.valToPos(thresholds.normal, "y", true);
|
||||
// u.ctx.save();
|
||||
// u.ctx.lineWidth = lineWidth;
|
||||
// u.ctx.strokeStyle = normalLineColor;
|
||||
// u.ctx.setLineDash([5, 5]);
|
||||
// u.ctx.beginPath();
|
||||
// u.ctx.moveTo(u.bbox.left, y);
|
||||
// u.ctx.lineTo(u.bbox.left + u.bbox.width, y);
|
||||
// u.ctx.stroke();
|
||||
// u.ctx.restore();
|
||||
},
|
||||
],
|
||||
// setScale: [
|
||||
// (u, key) => { // If ZoomResample is Configured && Not System/Node View
|
||||
// if (resampleConfig && !forNode && key === 'x') {
|
||||
// const numX = (u.series[0].idxs[1] - u.series[0].idxs[0])
|
||||
// if (numX <= resampleTrigger && timestep !== resampleMinimum) {
|
||||
// /* Get closest zoom level; prevents multiple iterative zoom requests for big zoom-steps (e.g. 600 -> 300 -> 120 -> 60) */
|
||||
// // Which resolution to theoretically request to achieve 30 or more visible data points:
|
||||
// const target = (numX * timestep) / resampleTrigger
|
||||
// // Which configured resolution actually matches the closest to theoretical target:
|
||||
// const closest = resampleResolutions.reduce(function(prev, curr) {
|
||||
// return (Math.abs(curr - target) < Math.abs(prev - target) ? curr : prev);
|
||||
// });
|
||||
// // Prevents non-required dispatches
|
||||
// if (timestep !== closest) {
|
||||
// // console.log('Dispatch: Zoom with Res from / to', timestep, closest)
|
||||
// onZoom({
|
||||
// newRes: closest,
|
||||
// lastZoomState: u?.scales,
|
||||
// lastThreshold: thresholds?.normal
|
||||
// });
|
||||
// }
|
||||
// } else {
|
||||
// // console.log('Dispatch: Zoom Update States')
|
||||
// onZoom({
|
||||
// lastZoomState: u?.scales,
|
||||
// lastThreshold: thresholds?.normal
|
||||
// });
|
||||
// };
|
||||
// };
|
||||
// },
|
||||
// ]
|
||||
},
|
||||
scales: {
|
||||
x: { time: false },
|
||||
y1: { auto: true },
|
||||
y1: { auto: true },
|
||||
},
|
||||
legend: {
|
||||
// Display legend until max 12 Y-dataseries
|
||||
show: true, // metricData.length <= 12 || useStatsSeries,
|
||||
live: true // But This Plot always for 2 Data-Series
|
||||
},
|
||||
cursor: {
|
||||
drag: { x: true, y: true },
|
||||
}
|
||||
};
|
||||
|
||||
// Handle Render
|
||||
if (!uplot) {
|
||||
opts.width = ren_width;
|
||||
opts.height = ren_height;
|
||||
|
||||
// if (plotSync) {
|
||||
// opts.cursor.sync = {
|
||||
// key: plotSync.key,
|
||||
// scales: ["x", null],
|
||||
// }
|
||||
// }
|
||||
|
||||
// if (zoomState && metricConfig?.aggregation == "avg") {
|
||||
// opts.scales = {...zoomState}
|
||||
// } else if (zoomState && metricConfig?.aggregation == "sum") {
|
||||
// // Allow Zoom In === Ymin changed
|
||||
// if (zoomState.y.min !== 0) { // scope change?: only use zoomState if thresholds match
|
||||
// if ((thresholdState === thresholds?.normal)) { opts.scales = {...zoomState} };
|
||||
// } // else: reset scaling to default
|
||||
// }
|
||||
|
||||
uplot = new uPlot(opts, plotData, plotWrapper);
|
||||
} else {
|
||||
uplot.setSize({ width: ren_width, height: ren_height });
|
||||
}
|
||||
}
|
||||
|
||||
function onSizeChange(chg_width, chg_height) {
|
||||
if (!uplot) return;
|
||||
if (timeoutId != null) clearTimeout(timeoutId);
|
||||
timeoutId = setTimeout(() => {
|
||||
timeoutId = null;
|
||||
render(chg_width, chg_height);
|
||||
}, renderSleepTime);
|
||||
}
|
||||
|
||||
/* On Mount */
|
||||
onMount(() => {
|
||||
if (plotWrapper) {
|
||||
render(width, height);
|
||||
}
|
||||
});
|
||||
|
||||
/* On Destroy */
|
||||
onDestroy(() => {
|
||||
if (timeoutId != null) clearTimeout(timeoutId);
|
||||
if (uplot) uplot.destroy();
|
||||
});
|
||||
|
||||
</script>
|
||||
|
||||
<!-- Define $width Wrapper and NoData Card -->
|
||||
{#if metricData[0]?.data && metricData[0]?.data?.length > 0}
|
||||
<div bind:this={plotWrapper} bind:clientWidth={width}
|
||||
class={forNode ? 'py-2 rounded' : 'rounded'}
|
||||
></div>
|
||||
{:else}
|
||||
<Card body color="warning" class="mx-4"
|
||||
>Cannot render plot: No series data returned for <code>{cluster}</code></Card
|
||||
>
|
||||
{/if}
|
||||
@@ -36,6 +36,7 @@
|
||||
subCluster = null,
|
||||
allowSizeChange = false,
|
||||
useColors = true,
|
||||
useLegend = true,
|
||||
width = 600,
|
||||
height = 380,
|
||||
} = $props();
|
||||
@@ -534,7 +535,7 @@
|
||||
width: width,
|
||||
height: height,
|
||||
legend: {
|
||||
show: true,
|
||||
show: useLegend,
|
||||
},
|
||||
cursor: {
|
||||
dataIdx: (u, seriesIdx) => {
|
||||
|
||||
@@ -156,7 +156,7 @@
|
||||
{
|
||||
scale: "y",
|
||||
grid: { show: true },
|
||||
labelFont: "sans-serif",
|
||||
// labelFont: "sans-serif",
|
||||
label: ylabel + (yunit ? ` (${yunit})` : ''),
|
||||
// values: (u, vals) => vals.map((v) => formatNumber(v)),
|
||||
},
|
||||
|
||||
@@ -64,6 +64,34 @@
|
||||
{/each}
|
||||
</DropdownMenu>
|
||||
</Dropdown>
|
||||
{:else if item.title === 'Status'}
|
||||
<Dropdown nav inNavbar {direction}>
|
||||
<DropdownToggle nav caret>
|
||||
<Icon name={item.icon} />
|
||||
{item.title}
|
||||
</DropdownToggle>
|
||||
<DropdownMenu class="dropdown-menu-lg-end">
|
||||
{#each clusters as cluster}
|
||||
<Dropdown nav direction="right">
|
||||
<DropdownToggle nav caret class="dropdown-item py-1 px-2">
|
||||
{cluster.name}
|
||||
</DropdownToggle>
|
||||
<DropdownMenu>
|
||||
<DropdownItem class="py-1 px-2"
|
||||
href={item.href + cluster.name}
|
||||
>
|
||||
Status Dashboard
|
||||
</DropdownItem>
|
||||
<DropdownItem class="py-1 px-2"
|
||||
href={item.href + 'detail/' + cluster.name}
|
||||
>
|
||||
Status Details
|
||||
</DropdownItem>
|
||||
</DropdownMenu>
|
||||
</Dropdown>
|
||||
{/each}
|
||||
</DropdownMenu>
|
||||
</Dropdown>
|
||||
{:else}
|
||||
<Dropdown nav inNavbar {direction}>
|
||||
<DropdownToggle nav caret>
|
||||
|
||||
@@ -6,6 +6,7 @@ mount(Status, {
|
||||
target: document.getElementById('svelte-app'),
|
||||
props: {
|
||||
presetCluster: infos.cluster,
|
||||
displayType: displayType,
|
||||
},
|
||||
context: new Map([
|
||||
['cc-config', clusterCockpitConfig]
|
||||
|
||||
82
web/frontend/src/status/DashDetails.svelte
Normal file
82
web/frontend/src/status/DashDetails.svelte
Normal file
@@ -0,0 +1,82 @@
|
||||
<!--
|
||||
@component Main cluster status view component; renders current system-usage information
|
||||
|
||||
Properties:
|
||||
- `presetCluster String`: The cluster to show status information for
|
||||
-->
|
||||
|
||||
<script>
|
||||
import {
|
||||
getContext
|
||||
} from "svelte"
|
||||
import {
|
||||
init,
|
||||
} from "../generic/utils.js";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
Card,
|
||||
CardBody,
|
||||
TabContent,
|
||||
TabPane,
|
||||
Spinner
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
|
||||
import StatusDash from "./dashdetails/StatusDash.svelte";
|
||||
import UsageDash from "./dashdetails/UsageDash.svelte";
|
||||
import StatisticsDash from "./dashdetails/StatisticsDash.svelte";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
presetCluster,
|
||||
} = $props();
|
||||
|
||||
/*Const Init */
|
||||
const { query: initq } = init();
|
||||
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
|
||||
</script>
|
||||
|
||||
<!-- Loading indicator & Refresh -->
|
||||
|
||||
<Row cols={1} class="mb-2">
|
||||
<Col>
|
||||
<h3 class="mb-0">Current Status of Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</h3>
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
|
||||
{#if $initq.fetching}
|
||||
<Row cols={1} class="text-center mt-3">
|
||||
<Col>
|
||||
<Spinner />
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $initq.error}
|
||||
<Row cols={1} class="text-center mt-3">
|
||||
<Col>
|
||||
<Card body color="danger">{$initq.error.message}</Card>
|
||||
</Col>
|
||||
</Row>
|
||||
{:else}
|
||||
<Card class="overflow-auto" style="height: auto;">
|
||||
<TabContent>
|
||||
<TabPane tabId="status-dash" tab="Status" active>
|
||||
<CardBody>
|
||||
<StatusDash clusters={$initq.data.clusters} {presetCluster} {useCbColors} useAltColors></StatusDash>
|
||||
</CardBody>
|
||||
</TabPane>
|
||||
|
||||
<TabPane tabId="usage-dash" tab="Usage">
|
||||
<CardBody>
|
||||
<UsageDash {presetCluster} {useCbColors}></UsageDash>
|
||||
</CardBody>
|
||||
</TabPane>
|
||||
|
||||
<TabPane tabId="metric-dash" tab="Statistics">
|
||||
<CardBody>
|
||||
<StatisticsDash {presetCluster} {useCbColors}></StatisticsDash>
|
||||
</CardBody>
|
||||
</TabPane>
|
||||
</TabContent>
|
||||
</Card>
|
||||
{/if}
|
||||
605
web/frontend/src/status/DashInternal.svelte
Normal file
605
web/frontend/src/status/DashInternal.svelte
Normal file
@@ -0,0 +1,605 @@
|
||||
<!--
|
||||
@component Main cluster status view component; renders current system-usage information
|
||||
|
||||
Properties:
|
||||
- `presetCluster String`: The cluster to show status information for
|
||||
-->
|
||||
|
||||
<script>
|
||||
import {
|
||||
getContext
|
||||
} from "svelte"
|
||||
import {
|
||||
queryStore,
|
||||
gql,
|
||||
getContextClient,
|
||||
} from "@urql/svelte";
|
||||
import {
|
||||
init,
|
||||
scramble,
|
||||
scrambleNames,
|
||||
convert2uplot
|
||||
} from "../generic/utils.js";
|
||||
import {
|
||||
formatDurationTime,
|
||||
formatNumber,
|
||||
} from "../generic/units.js";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
Card,
|
||||
CardTitle,
|
||||
CardHeader,
|
||||
CardBody,
|
||||
Spinner,
|
||||
Table,
|
||||
Progress,
|
||||
Icon,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import Roofline from "../generic/plots/Roofline.svelte";
|
||||
import Pie, { colors } from "../generic/plots/Pie.svelte";
|
||||
import Stacked from "../generic/plots/Stacked.svelte";
|
||||
import Histogram from "../generic/plots/Histogram.svelte";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
presetCluster,
|
||||
} = $props();
|
||||
|
||||
/*Const Init */
|
||||
const { query: initq } = init();
|
||||
const client = getContextClient();
|
||||
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
|
||||
|
||||
/* States */
|
||||
let pagingState = $state({page: 1, itemsPerPage: 10}) // Top 10
|
||||
let from = $state(new Date(Date.now() - 5 * 60 * 1000));
|
||||
let to = $state(new Date(Date.now()));
|
||||
let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400);
|
||||
let colWidthJobs = $state(0);
|
||||
let colWidthRoof = $state(0);
|
||||
let colWidthStacked1 = $state(0);
|
||||
let colWidthStacked2 = $state(0);
|
||||
|
||||
/* Derived */
|
||||
// States for Stacked charts
|
||||
const statesTimed = $derived(queryStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
query ($filter: [NodeFilter!], $typeNode: String!, $typeHealth: String!) {
|
||||
nodeStates: nodeStatesTimed(filter: $filter, type: $typeNode) {
|
||||
state
|
||||
counts
|
||||
times
|
||||
}
|
||||
healthStates: nodeStatesTimed(filter: $filter, type: $typeHealth) {
|
||||
state
|
||||
counts
|
||||
times
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: {
|
||||
filter: { cluster: { eq: presetCluster }, timeStart: 1760096999}, // DEBUG VALUE, use StackedFrom
|
||||
typeNode: "node",
|
||||
typeHealth: "health"
|
||||
},
|
||||
requestPolicy: "network-only"
|
||||
}));
|
||||
|
||||
// Note: nodeMetrics are requested on configured $timestep resolution
|
||||
// Result: The latest 5 minutes (datapoints) for each node independent of job
|
||||
const statusQuery = $derived(queryStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
query (
|
||||
$cluster: String!
|
||||
$metrics: [String!]
|
||||
$from: Time!
|
||||
$to: Time!
|
||||
$jobFilter: [JobFilter!]!
|
||||
$paging: PageRequest!
|
||||
$sorting: OrderByInput!
|
||||
) {
|
||||
# Node 5 Minute Averages for Roofline
|
||||
nodeMetrics(
|
||||
cluster: $cluster
|
||||
metrics: $metrics
|
||||
from: $from
|
||||
to: $to
|
||||
) {
|
||||
host
|
||||
subCluster
|
||||
metrics {
|
||||
name
|
||||
metric {
|
||||
series {
|
||||
statistics {
|
||||
avg
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# Running Job Metric Average for Rooflines
|
||||
jobsMetricStats(filter: $jobFilter, metrics: $metrics) {
|
||||
id
|
||||
jobId
|
||||
duration
|
||||
numNodes
|
||||
numAccelerators
|
||||
subCluster
|
||||
stats {
|
||||
name
|
||||
data {
|
||||
avg
|
||||
}
|
||||
}
|
||||
}
|
||||
# Get Jobs for Per-Node Counts
|
||||
jobs(filter: $jobFilter, order: $sorting, page: $paging) {
|
||||
items {
|
||||
jobId
|
||||
resources {
|
||||
hostname
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
# Only counts shared nodes once
|
||||
allocatedNodes(cluster: $cluster) {
|
||||
name
|
||||
count
|
||||
}
|
||||
# totalNodes includes multiples if shared jobs: Info-Card Data
|
||||
jobsStatistics(
|
||||
filter: $jobFilter
|
||||
page: $paging
|
||||
sortBy: TOTALJOBS
|
||||
groupBy: SUBCLUSTER
|
||||
) {
|
||||
id
|
||||
totalJobs
|
||||
totalUsers
|
||||
totalCores
|
||||
totalAccs
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: {
|
||||
cluster: presetCluster,
|
||||
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
|
||||
from: from.toISOString(),
|
||||
to: to.toISOString(),
|
||||
jobFilter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
|
||||
paging: { itemsPerPage: -1, page: 1 }, // Get all: -1
|
||||
sorting: { field: "startTime", type: "col", order: "DESC" }
|
||||
},
|
||||
requestPolicy: "network-only"
|
||||
}));
|
||||
|
||||
const topJobsQuery = $derived(queryStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
query (
|
||||
$filter: [JobFilter!]!
|
||||
$paging: PageRequest!
|
||||
) {
|
||||
jobsStatistics(
|
||||
filter: $filter
|
||||
page: $paging
|
||||
sortBy: TOTALJOBS
|
||||
groupBy: PROJECT
|
||||
) {
|
||||
id
|
||||
totalJobs
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: {
|
||||
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster} }],
|
||||
paging: pagingState // Top 10
|
||||
},
|
||||
requestPolicy: "network-only"
|
||||
}));
|
||||
|
||||
// Note: nodeMetrics are requested on configured $timestep resolution
|
||||
const nodeStatusQuery = $derived(queryStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
query (
|
||||
$filter: [JobFilter!]!
|
||||
$selectedHistograms: [String!]
|
||||
$numDurationBins: String
|
||||
) {
|
||||
jobsStatistics(filter: $filter, metrics: $selectedHistograms, numDurationBins: $numDurationBins) {
|
||||
histNumCores {
|
||||
count
|
||||
value
|
||||
}
|
||||
histNumAccs {
|
||||
count
|
||||
value
|
||||
}
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: {
|
||||
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
|
||||
selectedHistograms: [], // No Metrics requested for node hardware stats - Empty Array can be used for refresh
|
||||
numDurationBins: "1h", // Hardcode or selector?
|
||||
},
|
||||
requestPolicy: "network-only"
|
||||
}));
|
||||
|
||||
const clusterInfo = $derived.by(() => {
|
||||
if ($initq?.data?.clusters) {
|
||||
let rawInfos = {};
|
||||
let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || [];
|
||||
for (let subCluster of subClusters) {
|
||||
// Allocations
|
||||
if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
|
||||
else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
|
||||
|
||||
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
|
||||
else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
|
||||
|
||||
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
|
||||
else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
|
||||
|
||||
// Infos
|
||||
if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]);
|
||||
else rawInfos['processorTypes'].add(subCluster.processorType);
|
||||
|
||||
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
|
||||
else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
|
||||
|
||||
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
|
||||
else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
|
||||
|
||||
if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0;
|
||||
else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0;
|
||||
|
||||
if (!rawInfos['totalCores']) rawInfos['totalCores'] = (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
|
||||
else rawInfos['totalCores'] += (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
|
||||
|
||||
if (!rawInfos['totalAccs']) rawInfos['totalAccs'] = (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
|
||||
else rawInfos['totalAccs'] += (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
|
||||
|
||||
// Units (Set Once)
|
||||
if (!rawInfos['flopRateUnit']) rawInfos['flopRateUnit'] = subCluster.flopRateSimd.unit.prefix + subCluster.flopRateSimd.unit.base
|
||||
if (!rawInfos['memBwRateUnit']) rawInfos['memBwRateUnit'] = subCluster.memoryBandwidth.unit.prefix + subCluster.memoryBandwidth.unit.base
|
||||
|
||||
// Get Maxima For Roofline Knee Render
|
||||
if (!rawInfos['roofData']) {
|
||||
rawInfos['roofData'] = {
|
||||
flopRateScalar: {value: subCluster.flopRateScalar.value},
|
||||
flopRateSimd: {value: subCluster.flopRateSimd.value},
|
||||
memoryBandwidth: {value: subCluster.memoryBandwidth.value}
|
||||
};
|
||||
} else {
|
||||
rawInfos['roofData']['flopRateScalar']['value'] = Math.max(rawInfos['roofData']['flopRateScalar']['value'], subCluster.flopRateScalar.value)
|
||||
rawInfos['roofData']['flopRateSimd']['value'] = Math.max(rawInfos['roofData']['flopRateSimd']['value'], subCluster.flopRateSimd.value)
|
||||
rawInfos['roofData']['memoryBandwidth']['value'] = Math.max(rawInfos['roofData']['memoryBandwidth']['value'], subCluster.memoryBandwidth.value)
|
||||
}
|
||||
}
|
||||
|
||||
// Keymetrics (Data on Cluster-Scope)
|
||||
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
|
||||
sum + (node.metrics.find((m) => m.name == 'flops_any')?.metric?.series[0]?.statistics?.avg || 0),
|
||||
0, // Initial Value
|
||||
) || 0;
|
||||
rawInfos['flopRate'] = Math.floor((rawFlops * 100) / 100)
|
||||
|
||||
let rawMemBw = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
|
||||
sum + (node.metrics.find((m) => m.name == 'mem_bw')?.metric?.series[0]?.statistics?.avg || 0),
|
||||
0, // Initial Value
|
||||
) || 0;
|
||||
rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100)
|
||||
|
||||
return rawInfos
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
});
|
||||
|
||||
/* Functions */
|
||||
function legendColors(targetIdx) {
|
||||
// Reuses first color if targetIdx overflows
|
||||
let c;
|
||||
if (useCbColors) {
|
||||
c = [...colors['colorblind']];
|
||||
// } else if (useAltColors) {
|
||||
// c = [...colors['alternative']];
|
||||
} else {
|
||||
c = [...colors['default']];
|
||||
}
|
||||
return c[(c.length + targetIdx) % c.length];
|
||||
}
|
||||
|
||||
function transformJobsStatsToData(clusterData) {
|
||||
/* c will contain values from 0 to 1 representing the duration */
|
||||
let data = null
|
||||
const x = [], y = [], c = [], day = 86400.0
|
||||
|
||||
if (clusterData) {
|
||||
for (let i = 0; i < clusterData.length; i++) {
|
||||
const flopsData = clusterData[i].stats.find((s) => s.name == "flops_any")
|
||||
const memBwData = clusterData[i].stats.find((s) => s.name == "mem_bw")
|
||||
|
||||
const f = flopsData.data.avg
|
||||
const m = memBwData.data.avg
|
||||
const d = clusterData[i].duration / day
|
||||
|
||||
const intensity = f / m
|
||||
if (Number.isNaN(intensity) || !Number.isFinite(intensity))
|
||||
continue
|
||||
|
||||
x.push(intensity)
|
||||
y.push(f)
|
||||
// Long Jobs > 1 Day: Use max Color
|
||||
if (d > 1.0) c.push(1.0)
|
||||
else c.push(d)
|
||||
}
|
||||
} else {
|
||||
console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
|
||||
}
|
||||
|
||||
if (x.length > 0 && y.length > 0 && c.length > 0) {
|
||||
data = [null, [x, y], c] // for dataformat see roofline.svelte
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
function transformJobsStatsToInfo(clusterData) {
|
||||
if (clusterData) {
|
||||
return clusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0, duration: formatDurationTime(sc.duration)} })
|
||||
} else {
|
||||
console.warn("transformJobsStatsToInfo: jobInfo missing!")
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/* Inspect */
|
||||
$inspect(clusterInfo).with((type, clusterInfo) => {
|
||||
console.log(type, 'clusterInfo', clusterInfo)
|
||||
});
|
||||
|
||||
</script>
|
||||
|
||||
<Card>
|
||||
<CardHeader class="text-center">
|
||||
<h3 class="mb-0">{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)} Dashboard</h3>
|
||||
</CardHeader>
|
||||
<CardBody>
|
||||
{#if $statusQuery.fetching || $statesTimed.fetching || $topJobsQuery.fetching || $nodeStatusQuery.fetching}
|
||||
<Row class="justify-content-center">
|
||||
<Col xs="auto">
|
||||
<Spinner />
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
{:else if $statusQuery.error || $statesTimed.error || $topJobsQuery.error || $nodeStatusQuery.error}
|
||||
<Row cols={{xs:1, md:2}}>
|
||||
{#if $statusQuery.error}
|
||||
<Col>
|
||||
<Card color="danger">Error Requesting StatusQuery: {$statusQuery.error.message}</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
{#if $statesTimed.error}
|
||||
<Col>
|
||||
<Card color="danger">Error Requesting StatesTimed: {$statesTimed.error.message}</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
{#if $topJobsQuery.error}
|
||||
<Col>
|
||||
<Card color="danger">Error Requesting TopJobsQuery: {$topJobsQuery.error.message}</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
{#if $nodeStatusQuery.error}
|
||||
<Col>
|
||||
<Card color="danger">Error Requesting NodeStatusQuery: {$nodeStatusQuery.error.message}</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
</Row>
|
||||
|
||||
{:else}
|
||||
<Row cols={{xs:1, md:2, xl: 3}}>
|
||||
<Col> <!-- Info Card -->
|
||||
<Card class="h-auto mt-1">
|
||||
<CardHeader>
|
||||
<CardTitle class="mb-0">Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</CardTitle>
|
||||
<span>{[...clusterInfo?.processorTypes].toString()}</span>
|
||||
</CardHeader>
|
||||
<CardBody>
|
||||
<Table borderless>
|
||||
<tr class="py-2">
|
||||
<td style="font-size:x-large;">{clusterInfo?.runningJobs} Running Jobs</td>
|
||||
<td colspan="2" style="font-size:x-large;">{clusterInfo?.activeUsers} Active Users</td>
|
||||
</tr>
|
||||
<hr class="my-1"/>
|
||||
<tr class="pt-2">
|
||||
<td style="font-size: large;">
|
||||
Flop Rate (<span style="cursor: help;" title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]">Any</span>)
|
||||
</td>
|
||||
<td colspan="2" style="font-size: large;">
|
||||
Memory BW Rate
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="pb-2">
|
||||
<td style="font-size:x-large;">
|
||||
{clusterInfo?.flopRate}
|
||||
{clusterInfo?.flopRateUnit}
|
||||
</td>
|
||||
<td colspan="2" style="font-size:x-large;">
|
||||
{clusterInfo?.memBwRate}
|
||||
{clusterInfo?.memBwRateUnit}
|
||||
</td>
|
||||
</tr>
|
||||
<hr class="my-1"/>
|
||||
<tr class="py-2">
|
||||
<th scope="col">Allocated Nodes</th>
|
||||
<td style="min-width: 100px;"
|
||||
><div class="col">
|
||||
<Progress
|
||||
value={clusterInfo?.allocatedNodes}
|
||||
max={clusterInfo?.totalNodes}
|
||||
/>
|
||||
</div></td
|
||||
>
|
||||
<td
|
||||
>{clusterInfo?.allocatedNodes} / {clusterInfo?.totalNodes}
|
||||
Nodes</td
|
||||
>
|
||||
</tr>
|
||||
<tr class="py-2">
|
||||
<th scope="col">Allocated Cores</th>
|
||||
<td style="min-width: 100px;"
|
||||
><div class="col">
|
||||
<Progress
|
||||
value={clusterInfo?.allocatedCores}
|
||||
max={clusterInfo?.totalCores}
|
||||
/>
|
||||
</div></td
|
||||
>
|
||||
<td
|
||||
>{formatNumber(clusterInfo?.allocatedCores)} / {formatNumber(clusterInfo?.totalCores)}
|
||||
Cores</td
|
||||
>
|
||||
</tr>
|
||||
{#if clusterInfo?.totalAccs !== 0}
|
||||
<tr class="py-2">
|
||||
<th scope="col">Allocated Accelerators</th>
|
||||
<td style="min-width: 100px;"
|
||||
><div class="col">
|
||||
<Progress
|
||||
value={clusterInfo?.allocatedAccs}
|
||||
max={clusterInfo?.totalAccs}
|
||||
/>
|
||||
</div></td
|
||||
>
|
||||
<td
|
||||
>{clusterInfo?.allocatedAccs} / {clusterInfo?.totalAccs}
|
||||
Accelerators</td
|
||||
>
|
||||
</tr>
|
||||
{/if}
|
||||
</Table>
|
||||
</CardBody>
|
||||
</Card>
|
||||
</Col>
|
||||
<Col> <!-- Pie Jobs -->
|
||||
<Row cols={{xs:1, md:2}}>
|
||||
<Col class="p-2">
|
||||
<div bind:clientWidth={colWidthJobs}>
|
||||
<h4 class="text-center">
|
||||
Top Projects: Jobs
|
||||
</h4>
|
||||
<Pie
|
||||
{useCbColors}
|
||||
canvasId="hpcpie-jobs-projects"
|
||||
size={colWidthJobs * 0.75}
|
||||
sliceLabel={'Jobs'}
|
||||
quantities={$topJobsQuery.data.jobsStatistics.map(
|
||||
(tp) => tp['totalJobs'],
|
||||
)}
|
||||
entities={$topJobsQuery.data.jobsStatistics.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)}
|
||||
/>
|
||||
</div>
|
||||
</Col>
|
||||
<Col class="p-2">
|
||||
<Table>
|
||||
<tr class="mb-2">
|
||||
<th></th>
|
||||
<th style="padding-left: 0.5rem;">Project</th>
|
||||
<th>Jobs</th>
|
||||
</tr>
|
||||
{#each $topJobsQuery.data.jobsStatistics as tp, i}
|
||||
<tr>
|
||||
<td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td>
|
||||
<td>
|
||||
<a target="_blank" href="/monitoring/jobs/?cluster={presetCluster}&state=running&project={tp.id}&projectMatch=eq"
|
||||
>{scrambleNames ? scramble(tp.id) : tp.id}
|
||||
</a>
|
||||
</td>
|
||||
<td>{tp['totalJobs']}</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</Table>
|
||||
</Col>
|
||||
</Row>
|
||||
</Col>
|
||||
<Col> <!-- Job Roofline -->
|
||||
<div bind:clientWidth={colWidthRoof}>
|
||||
{#key $statusQuery?.data?.jobsMetricStats}
|
||||
<Roofline
|
||||
useColors={true}
|
||||
allowSizeChange
|
||||
width={colWidthRoof - 10}
|
||||
height={300}
|
||||
subCluster={clusterInfo?.roofData ? clusterInfo.roofData : null}
|
||||
roofData={transformJobsStatsToData($statusQuery?.data?.jobsMetricStats)}
|
||||
jobsData={transformJobsStatsToInfo($statusQuery?.data?.jobsMetricStats)}
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
</Col>
|
||||
<Col> <!-- Resources/Job Histogram -->
|
||||
{#if clusterInfo?.totalAccs == 0}
|
||||
<Histogram
|
||||
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumCores)}
|
||||
title="Number of Cores Distribution"
|
||||
xlabel="Allocated Cores"
|
||||
xunit="Nodes"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
height="275"
|
||||
enableFlip
|
||||
/>
|
||||
{:else}
|
||||
<Histogram
|
||||
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumAccs)}
|
||||
title="Number of Accelerators Distribution"
|
||||
xlabel="Allocated Accs"
|
||||
xunit="Accs"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
height="275"
|
||||
enableFlip
|
||||
/>
|
||||
{/if}
|
||||
</Col>
|
||||
<Col> <!-- Stacked SchedState -->
|
||||
<div bind:clientWidth={colWidthStacked1}>
|
||||
{#key $statesTimed?.data?.nodeStates}
|
||||
<Stacked
|
||||
data={$statesTimed?.data?.nodeStates}
|
||||
width={colWidthStacked1 * 0.95}
|
||||
xlabel="Time"
|
||||
ylabel="Nodes"
|
||||
yunit = "#Count"
|
||||
title = "Node States"
|
||||
stateType = "Node"
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
</Col>
|
||||
<Col> <!-- Stacked Healthstate -->
|
||||
<div bind:clientWidth={colWidthStacked2}>
|
||||
{#key $statesTimed?.data?.healthStates}
|
||||
<Stacked
|
||||
data={$statesTimed?.data?.healthStates}
|
||||
width={colWidthStacked2 * 0.95}
|
||||
xlabel="Time"
|
||||
ylabel="Nodes"
|
||||
yunit = "#Count"
|
||||
title = "Health States"
|
||||
stateType = "Health"
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
</Col>
|
||||
</Row>
|
||||
{/if}
|
||||
</CardBody>
|
||||
</Card>
|
||||
@@ -22,11 +22,11 @@
|
||||
} from "@urql/svelte";
|
||||
import {
|
||||
convert2uplot,
|
||||
} from "../generic/utils.js";
|
||||
import PlotGrid from "../generic/PlotGrid.svelte";
|
||||
import Histogram from "../generic/plots/Histogram.svelte";
|
||||
import HistogramSelection from "../generic/select/HistogramSelection.svelte";
|
||||
import Refresher from "../generic/helper/Refresher.svelte";
|
||||
} from "../../generic/utils.js";
|
||||
import PlotGrid from "../../generic/PlotGrid.svelte";
|
||||
import Histogram from "../../generic/plots/Histogram.svelte";
|
||||
import HistogramSelection from "../../generic/select/HistogramSelection.svelte";
|
||||
import Refresher from "../../generic/helper/Refresher.svelte";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
@@ -22,12 +22,12 @@
|
||||
gql,
|
||||
getContextClient,
|
||||
} from "@urql/svelte";
|
||||
import { formatDurationTime } from "../generic/units.js";
|
||||
import Refresher from "../generic/helper/Refresher.svelte";
|
||||
import TimeSelection from "../generic/select/TimeSelection.svelte";
|
||||
import Roofline from "../generic/plots/Roofline.svelte";
|
||||
import Pie, { colors } from "../generic/plots/Pie.svelte";
|
||||
import Stacked from "../generic/plots/Stacked.svelte";
|
||||
import { formatDurationTime } from "../../generic/units.js";
|
||||
import Refresher from "../../generic/helper/Refresher.svelte";
|
||||
import TimeSelection from "../../generic/select/TimeSelection.svelte";
|
||||
import Roofline from "../../generic/plots/Roofline.svelte";
|
||||
import Pie, { colors } from "../../generic/plots/Pie.svelte";
|
||||
import Stacked from "../../generic/plots/Stacked.svelte";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
@@ -83,7 +83,7 @@
|
||||
}
|
||||
`,
|
||||
variables: {
|
||||
filter: { cluster: { eq: cluster }, timeStart: stackedFrom},
|
||||
filter: { cluster: { eq: cluster }, timeStart: 1760096999},
|
||||
typeNode: "node",
|
||||
typeHealth: "health"
|
||||
},
|
||||
@@ -27,10 +27,10 @@
|
||||
scramble,
|
||||
scrambleNames,
|
||||
convert2uplot,
|
||||
} from "../generic/utils.js";
|
||||
import Pie, { colors } from "../generic/plots/Pie.svelte";
|
||||
import Histogram from "../generic/plots/Histogram.svelte";
|
||||
import Refresher from "../generic/helper/Refresher.svelte";
|
||||
} from "../../generic/utils.js";
|
||||
import Pie, { colors } from "../../generic/plots/Pie.svelte";
|
||||
import Histogram from "../../generic/plots/Histogram.svelte";
|
||||
import Refresher from "../../generic/helper/Refresher.svelte";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
Reference in New Issue
Block a user