Add public dashboard and route, add DoubleMetricPlot and GQL queries

- add roofline legend display switch
- small fixes
This commit is contained in:
Christoph Kluge
2025-12-09 10:26:55 +01:00
parent 967f0a3294
commit 4083de2a51
23 changed files with 2918 additions and 96 deletions

View File

@@ -74,5 +74,6 @@ export default [
entrypoint('node', 'src/node.entrypoint.js'),
entrypoint('analysis', 'src/analysis.entrypoint.js'),
entrypoint('status', 'src/status.entrypoint.js'),
entrypoint('dashpublic', 'src/dashpublic.entrypoint.js'),
entrypoint('config', 'src/config.entrypoint.js')
];

View File

@@ -0,0 +1,671 @@
<!--
@component Main cluster status view component; renders current system-usage information
Properties:
- `presetCluster String`: The cluster to show status information for
-->
<script>
import {
getContext
} from "svelte"
import {
queryStore,
gql,
getContextClient,
} from "@urql/svelte";
import {
init,
scramble,
scrambleNames,
convert2uplot
} from "./generic/utils.js";
import {
formatDurationTime,
formatNumber,
} from "./generic/units.js";
import {
Row,
Col,
Card,
CardTitle,
CardHeader,
CardBody,
Spinner,
Table,
Progress,
Icon,
} from "@sveltestrap/sveltestrap";
import Roofline from "./generic/plots/Roofline.svelte";
import Pie, { colors } from "./generic/plots/Pie.svelte";
import Stacked from "./generic/plots/Stacked.svelte";
// import Histogram from "./generic/plots/Histogram.svelte";
import DoubleMetric from "./generic/plots/DoubleMetricPlot.svelte";
/* Svelte 5 Props */
let {
presetCluster,
} = $props();
/*Const Init */
const { query: initq } = init();
const client = getContextClient();
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
/* States */
let pagingState = $state({page: 1, itemsPerPage: 10}) // Top 10
let from = $state(new Date(Date.now() - (5 * 60 * 1000)));
let clusterFrom = $state(new Date(Date.now() - (8 * 60 * 60 * 1000)));
let to = $state(new Date(Date.now()));
let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400);
let colWidthStates = $state(0);
let colWidthRoof = $state(0);
let colWidthTotals = $state(0);
let colWidthStacked = $state(0);
/* Derived */
// States for Stacked charts
const statesTimed = $derived(queryStore({
client: client,
query: gql`
query ($filter: [NodeFilter!], $typeNode: String!, $typeHealth: String!) {
nodeStates: nodeStatesTimed(filter: $filter, type: $typeNode) {
state
counts
times
}
healthStates: nodeStatesTimed(filter: $filter, type: $typeHealth) {
state
counts
times
}
}
`,
variables: {
filter: { cluster: { eq: presetCluster }, timeStart: 1760096999}, // DEBUG VALUE, use StackedFrom
typeNode: "node",
typeHealth: "health"
},
requestPolicy: "network-only"
}));
// Note: nodeMetrics are requested on configured $timestep resolution
// Result: The latest 5 minutes (datapoints) for each node independent of job
const statusQuery = $derived(queryStore({
client: client,
query: gql`
query (
$cluster: String!
$metrics: [String!]
$from: Time!
$to: Time!
$clusterFrom: Time!
$jobFilter: [JobFilter!]!
$nodeFilter: [NodeFilter!]!
$paging: PageRequest!
$sorting: OrderByInput!
) {
# Node 5 Minute Averages for Roofline
nodeMetrics(
cluster: $cluster
metrics: $metrics
from: $from
to: $to
) {
host
subCluster
metrics {
name
metric {
series {
statistics {
avg
}
}
}
}
}
# Running Job Metric Average for Rooflines
jobsMetricStats(filter: $jobFilter, metrics: $metrics) {
id
jobId
duration
numNodes
numAccelerators
subCluster
stats {
name
data {
avg
}
}
}
# Get Jobs for Per-Node Counts
jobs(filter: $jobFilter, order: $sorting, page: $paging) {
items {
jobId
resources {
hostname
}
}
count
}
# Only counts shared nodes once
allocatedNodes(cluster: $cluster) {
name
count
}
# Get Current States fir Pie Charts
nodeStates(filter: $nodeFilter) {
state
count
}
# Get States for Node Roofline; $sorting unused in backend: Use placeholder
nodes(filter: $nodeFilter, order: $sorting) {
count
items {
hostname
cluster
subCluster
schedulerState
}
}
# totalNodes includes multiples if shared jobs: Info-Card Data
jobsStatistics(
filter: $jobFilter
page: $paging
sortBy: TOTALJOBS
groupBy: SUBCLUSTER
) {
id
totalJobs
totalUsers
totalCores
totalAccs
}
# TEST
clusterMetrics(
cluster: $cluster
metrics: $metrics
from: $clusterFrom
to: $to
) {
nodeCount
metrics {
name
unit {
prefix
base
}
timestep
data
}
}
}
`,
variables: {
cluster: presetCluster,
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
from: from.toISOString(),
clusterFrom: clusterFrom.toISOString(),
to: to.toISOString(),
jobFilter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
nodeFilter: { cluster: { eq: presetCluster }},
paging: { itemsPerPage: -1, page: 1 }, // Get all: -1
sorting: { field: "startTime", type: "col", order: "DESC" }
},
requestPolicy: "network-only"
}));
const topJobsQuery = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [JobFilter!]!
$paging: PageRequest!
) {
jobsStatistics(
filter: $filter
page: $paging
sortBy: TOTALJOBS
groupBy: PROJECT
) {
id
totalJobs
}
}
`,
variables: {
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster} }],
paging: pagingState // Top 10
},
requestPolicy: "network-only"
}));
// Note: nodeMetrics are requested on configured $timestep resolution
const nodeStatusQuery = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [JobFilter!]!
$selectedHistograms: [String!]
$numDurationBins: String
) {
jobsStatistics(filter: $filter, metrics: $selectedHistograms, numDurationBins: $numDurationBins) {
histNumCores {
count
value
}
histNumAccs {
count
value
}
}
}
`,
variables: {
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
selectedHistograms: [], // No Metrics requested for node hardware stats - Empty Array can be used for refresh
numDurationBins: "1h", // Hardcode or selector?
},
requestPolicy: "network-only"
}));
const clusterInfo = $derived.by(() => {
if ($initq?.data?.clusters) {
let rawInfos = {};
let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || [];
for (let subCluster of subClusters) {
// Allocations
if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
// Infos
if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]);
else rawInfos['processorTypes'].add(subCluster.processorType);
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0;
else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0;
if (!rawInfos['totalCores']) rawInfos['totalCores'] = (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
else rawInfos['totalCores'] += (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
if (!rawInfos['totalAccs']) rawInfos['totalAccs'] = (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
else rawInfos['totalAccs'] += (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
// Units (Set Once)
if (!rawInfos['flopRateUnit']) rawInfos['flopRateUnit'] = subCluster.flopRateSimd.unit.prefix + subCluster.flopRateSimd.unit.base
if (!rawInfos['memBwRateUnit']) rawInfos['memBwRateUnit'] = subCluster.memoryBandwidth.unit.prefix + subCluster.memoryBandwidth.unit.base
// Get Maxima For Roofline Knee Render
if (!rawInfos['roofData']) {
rawInfos['roofData'] = {
flopRateScalar: {value: subCluster.flopRateScalar.value},
flopRateSimd: {value: subCluster.flopRateSimd.value},
memoryBandwidth: {value: subCluster.memoryBandwidth.value}
};
} else {
rawInfos['roofData']['flopRateScalar']['value'] = Math.max(rawInfos['roofData']['flopRateScalar']['value'], subCluster.flopRateScalar.value)
rawInfos['roofData']['flopRateSimd']['value'] = Math.max(rawInfos['roofData']['flopRateSimd']['value'], subCluster.flopRateSimd.value)
rawInfos['roofData']['memoryBandwidth']['value'] = Math.max(rawInfos['roofData']['memoryBandwidth']['value'], subCluster.memoryBandwidth.value)
}
}
// Keymetrics (Data on Cluster-Scope)
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
sum + (node.metrics.find((m) => m.name == 'flops_any')?.metric?.series[0]?.statistics?.avg || 0),
0, // Initial Value
) || 0;
rawInfos['flopRate'] = Math.floor((rawFlops * 100) / 100)
let rawMemBw = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
sum + (node.metrics.find((m) => m.name == 'mem_bw')?.metric?.series[0]?.statistics?.avg || 0),
0, // Initial Value
) || 0;
rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100)
return rawInfos
} else {
return {};
}
});
const refinedStateData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
/* Functions */
function legendColors(targetIdx, useAltColors) {
// Reuses first color if targetIdx overflows
let c;
if (useCbColors) {
c = [...colors['colorblind']];
} else if (useAltColors) {
c = [...colors['alternative']];
} else {
c = [...colors['default']];
}
return c[(c.length + targetIdx) % c.length];
}
function transformNodesStatsToData(subclusterData) {
let data = null
const x = [], y = []
if (subclusterData) {
for (let i = 0; i < subclusterData.length; i++) {
const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any")
const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw")
const f = flopsData.metric.series[0].statistics.avg
const m = memBwData.metric.series[0].statistics.avg
let intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity)) {
intensity = 0.0 // Set to Float Zero: Will not show in Log-Plot (Always below render limit)
}
x.push(intensity)
y.push(f)
}
} else {
// console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0) {
data = [null, [x, y]] // for dataformat see roofline.svelte
}
return data
}
function transformNodesStatsToInfo(subClusterData) {
let result = [];
if (subClusterData) { // && $nodesState?.data) {
// Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API!
for (let j = 0; j < subClusterData.length; j++) {
const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown"
const nodeMatch = $statusQuery?.data?.nodes?.items?.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster);
const schedulerState = nodeMatch?.schedulerState ? nodeMatch.schedulerState : "notindb"
let numJobs = 0
if ($statusQuery?.data) {
const nodeJobs = $statusQuery?.data?.jobs?.items?.filter((job) => job.resources.find((res) => res.hostname == nodeName))
numJobs = nodeJobs?.length ? nodeJobs.length : 0
}
result.push({nodeName: nodeName, schedulerState: schedulerState, numJobs: numJobs})
};
};
return result
}
/* Inspect */
$inspect(clusterInfo).with((type, clusterInfo) => {
console.log(type, 'clusterInfo', clusterInfo)
});
$inspect($statusQuery?.data?.clusterMetrics).with((type, clusterMetrics) => {
console.log(type, 'clusterMetrics', clusterMetrics)
});
</script>
<Card style="height: 98vh;">
<CardHeader class="text-center">
<h3 class="mb-0">{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)} Dashboard</h3>
</CardHeader>
<CardBody>
{#if $statusQuery.fetching || $statesTimed.fetching || $topJobsQuery.fetching || $nodeStatusQuery.fetching}
<Row class="justify-content-center">
<Col xs="auto">
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error || $statesTimed.error || $topJobsQuery.error || $nodeStatusQuery.error}
<Row cols={{xs:1, md:2}}>
{#if $statusQuery.error}
<Col>
<Card color="danger">Error Requesting StatusQuery: {$statusQuery.error.message}</Card>
</Col>
{/if}
{#if $statesTimed.error}
<Col>
<Card color="danger">Error Requesting StatesTimed: {$statesTimed.error.message}</Card>
</Col>
{/if}
{#if $topJobsQuery.error}
<Col>
<Card color="danger">Error Requesting TopJobsQuery: {$topJobsQuery.error.message}</Card>
</Col>
{/if}
{#if $nodeStatusQuery.error}
<Col>
<Card color="danger">Error Requesting NodeStatusQuery: {$nodeStatusQuery.error.message}</Card>
</Col>
{/if}
</Row>
{:else}
<Row cols={{xs:1, md:2, xl: 3}}>
<Col> <!-- Info Card -->
<Card class="h-auto mt-1">
<CardHeader>
<CardTitle class="mb-0">Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</CardTitle>
<span>{[...clusterInfo?.processorTypes].toString()}</span>
</CardHeader>
<CardBody>
<Table borderless>
<tr class="py-2">
<td style="font-size:x-large;">{clusterInfo?.runningJobs} Running Jobs</td>
<td colspan="2" style="font-size:x-large;">{clusterInfo?.activeUsers} Active Users</td>
</tr>
<hr class="my-1"/>
<tr class="pt-2">
<td style="font-size: large;">
Flop Rate (<span style="cursor: help;" title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]">Any</span>)
</td>
<td colspan="2" style="font-size: large;">
Memory BW Rate
</td>
</tr>
<tr class="pb-2">
<td style="font-size:x-large;">
{clusterInfo?.flopRate}
{clusterInfo?.flopRateUnit}
</td>
<td colspan="2" style="font-size:x-large;">
{clusterInfo?.memBwRate}
{clusterInfo?.memBwRateUnit}
</td>
</tr>
<hr class="my-1"/>
<tr class="py-2">
<th scope="col">Allocated Nodes</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedNodes}
max={clusterInfo?.totalNodes}
/>
</div></td
>
<td
>{clusterInfo?.allocatedNodes} / {clusterInfo?.totalNodes}
Nodes</td
>
</tr>
<tr class="py-2">
<th scope="col">Allocated Cores</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedCores}
max={clusterInfo?.totalCores}
/>
</div></td
>
<td
>{formatNumber(clusterInfo?.allocatedCores)} / {formatNumber(clusterInfo?.totalCores)}
Cores</td
>
</tr>
{#if clusterInfo?.totalAccs !== 0}
<tr class="py-2">
<th scope="col">Allocated Accelerators</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedAccs}
max={clusterInfo?.totalAccs}
/>
</div></td
>
<td
>{clusterInfo?.allocatedAccs} / {clusterInfo?.totalAccs}
Accelerators</td
>
</tr>
{/if}
</Table>
</CardBody>
</Card>
</Col>
<Col> <!-- Pie Last States -->
<Row>
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={colWidthStates}>
{#key refinedStateData}
<h4 class="text-center">
Current Node States
</h4>
<Pie
useAltColors
canvasId="hpcpie-slurm"
size={colWidthStates * 0.75}
sliceLabel="Nodes"
quantities={refinedStateData.map(
(sd) => sd.count,
)}
entities={refinedStateData.map(
(sd) => sd.state,
)}
/>
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key refinedStateData}
<Table>
<tr class="mb-2">
<th></th>
<th>Current State</th>
<th>Nodes</th>
</tr>
{#each refinedStateData as sd, i}
<tr>
<td><Icon name="circle-fill" style="color: {legendColors(i, true)};"/></td>
<td>{sd.state}</td>
<td>{sd.count}</td>
</tr>
{/each}
</Table>
{/key}
</Col>
</Row>
</Col>
<Col> <!-- General Cluster Info Card? -->
<!-- TODO -->
<Card>
<CardHeader>
<CardTitle>Infos</CardTitle>
</CardHeader>
<CardBody>
Contents
</CardBody>
</Card>
</Col>
<Col> <!-- Nodes Roofline -->
<div bind:clientWidth={colWidthRoof}>
{#key $statusQuery?.data?.nodeMetrics}
<Roofline
useColors={false}
useLegend={false}
allowSizeChange
width={colWidthRoof - 10}
height={300}
cluster={presetCluster}
subCluster={clusterInfo?.roofData ? clusterInfo.roofData : null}
roofData={transformNodesStatsToData($statusQuery?.data?.nodeMetrics)}
nodesData={transformNodesStatsToInfo($statusQuery?.data?.nodeMetrics)}
/>
{/key}
</div>
</Col>
<Col> <!-- Resources/Job Histogram OR Total Cluster Metric in Time SUMS-->
<div bind:clientWidth={colWidthTotals}>
<DoubleMetric
width={colWidthTotals}
timestep={$statusQuery?.data?.clusterMetrics[0]?.timestep || 60}
numNodes={$statusQuery?.data?.clusterMetrics?.nodeCount || 0}
metricData={$statusQuery?.data?.clusterMetrics?.metrics || []}
cluster={presetCluster}
/>
</div>
<!-- {#if clusterInfo?.totalAccs == 0}
<Histogram
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumCores)}
title="Number of Cores Distribution"
xlabel="Allocated Cores"
xunit="Nodes"
ylabel="Number of Jobs"
yunit="Jobs"
height="275"
enableFlip
/>
{:else}
<Histogram
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumAccs)}
title="Number of Accelerators Distribution"
xlabel="Allocated Accs"
xunit="Accs"
ylabel="Number of Jobs"
yunit="Jobs"
height="275"
enableFlip
/>
{/if} -->
</Col>
<Col> <!-- Stacked SchedState -->
<div bind:clientWidth={colWidthStacked}>
{#key $statesTimed?.data?.nodeStates}
<Stacked
data={$statesTimed?.data?.nodeStates}
width={colWidthStacked * 0.95}
xlabel="Time"
ylabel="Nodes"
yunit = "#Count"
title = "Node States"
stateType = "Node"
/>
{/key}
</div>
</Col>
</Row>
{/if}
</CardBody>
</Card>

View File

@@ -120,7 +120,7 @@
href: "/monitoring/status/",
icon: "clipboard-data",
perCluster: true,
listOptions: false,
listOptions: true,
menu: "Info",
},
];

View File

@@ -6,77 +6,43 @@
-->
<script>
import {
getContext
} from "svelte"
import {
init,
} from "./generic/utils.js";
import {
Row,
Col,
Card,
CardBody,
TabContent,
TabPane,
Spinner
} from "@sveltestrap/sveltestrap";
import StatusDash from "./status/StatusDash.svelte";
import UsageDash from "./status/UsageDash.svelte";
import StatisticsDash from "./status/StatisticsDash.svelte";
import DashDetails from "./status/DashDetails.svelte";
import DashInternal from "./status/DashInternal.svelte";
/* Svelte 5 Props */
let {
presetCluster
presetCluster,
displayType
} = $props();
/*Const Init */
const { query: initq } = init();
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
const displayStatusDetail = (displayType === 'DETAILS');
</script>
<!-- Loading indicator & Refresh -->
<Row cols={1} class="mb-2">
<!-- <Row cols={1} class="mb-2">
<Col>
<h3 class="mb-0">Current Status of Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</h3>
</Col>
</Row>
</Row> -->
{#if $initq.fetching}
<Row cols={1} class="text-center mt-3">
{#if displayType !== "DASHBOARD" && displayType !== "DETAILS"}
<Row>
<Col>
<Spinner />
</Col>
</Row>
{:else if $initq.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">{$initq.error.message}</Card>
<Card body color="danger">Unknown displayList type! </Card>
</Col>
</Row>
{:else}
<Card class="overflow-auto" style="height: auto;">
<TabContent>
<TabPane tabId="status-dash" tab="Status" active>
<CardBody>
<StatusDash clusters={$initq.data.clusters} {presetCluster} {useCbColors} useAltColors></StatusDash>
</CardBody>
</TabPane>
<TabPane tabId="usage-dash" tab="Usage">
<CardBody>
<UsageDash {presetCluster} {useCbColors}></UsageDash>
</CardBody>
</TabPane>
<TabPane tabId="metric-dash" tab="Statistics">
<CardBody>
<StatisticsDash {presetCluster} {useCbColors}></StatisticsDash>
</CardBody>
</TabPane>
</TabContent>
</Card>
{#if displayStatusDetail}
<!-- ROW2-1: Node Overview (Grid Included)-->
<DashDetails {presetCluster}/>
{:else}
<!-- ROW2-2: Node List (Grid Included)-->
<DashInternal {presetCluster}/>
{/if}
{/if}

View File

@@ -0,0 +1,13 @@
import { mount } from 'svelte';
// import {} from './header.entrypoint.js'
import DashPublic from './DashPublic.root.svelte'
mount(DashPublic, {
target: document.getElementById('svelte-app'),
props: {
presetCluster: infos.cluster,
},
context: new Map([
['cc-config', clusterCockpitConfig]
])
})

View File

@@ -0,0 +1,640 @@
<!--
@component Main plot component, based on uPlot; metricdata values by time
Only width/height should change reactively.
Properties:
- `metric String`: The metric name
- `scope String?`: Scope of the displayed data [Default: node]
- `height Number?`: The plot height [Default: 300]
- `timestep Number`: The timestep used for X-axis rendering
- `series [GraphQL.Series]`: The metric data object
- `statisticsSeries [GraphQL.StatisticsSeries]?`: Min/Max/Median representation of metric data [Default: null]
- `cluster String?`: Cluster name of the parent job / data [Default: ""]
- `subCluster String`: Name of the subCluster of the parent job
- `isShared Bool?`: If this job used shared resources; for additional legend display [Default: false]
- `forNode Bool?`: If this plot is used for node data display; will render x-axis as negative time with $now as maximum [Default: false]
- `numhwthreads Number?`: Number of job HWThreads [Default: 0]
- `numaccs Number?`: Number of job Accelerators [Default: 0]
- `zoomState Object?`: The last zoom state to preserve on user zoom [Default: null]
- `thersholdState Object?`: The last threshold state to preserve on user zoom [Default: null]
- `extendedLegendData Object?`: Additional information to be rendered in an extended legend [Default: null]
- `onZoom Func`: Callback function to handle zoom-in event
-->
<script>
import uPlot from "uplot";
import { formatNumber, formatDurationTime } from "../units.js";
import { getContext, onMount, onDestroy } from "svelte";
import { Card } from "@sveltestrap/sveltestrap";
/* Svelte 5 Props */
let {
// metric,
width = 0,
height = 300,
timestep,
numNodes,
metricData,
// useStatsSeries = false,
// statisticsSeries = null,
cluster = "",
forNode = true,
// zoomState = null,
// thresholdState = null,
enableFlip = false,
// onZoom
} = $props();
/* Const Init */
const clusterCockpitConfig = getContext("cc-config");
// const resampleConfig = getContext("resampling");
// const subClusterTopology = getContext("getHardwareTopology")(cluster, subCluster);
// const metricConfig = getContext("getMetricConfig")(cluster, subCluster, metric);
const lineColors = clusterCockpitConfig.plotConfiguration_colorScheme;
const lineWidth = clusterCockpitConfig.plotConfiguration_lineWidth / window.devicePixelRatio;
// const cbmode = clusterCockpitConfig?.plotConfiguration_colorblindMode || false;
const renderSleepTime = 200;
// const normalLineColor = "#000000";
// const backgroundColors = {
// normal: "rgba(255, 255, 255, 1.0)",
// caution: cbmode ? "rgba(239, 230, 69, 0.3)" : "rgba(255, 128, 0, 0.3)",
// alert: cbmode ? "rgba(225, 86, 44, 0.3)" : "rgba(255, 0, 0, 0.3)",
// };
/* Var Init */
let timeoutId = null;
/* State Init */
let plotWrapper = $state(null);
let uplot = $state(null);
/* Derived */
// const usesMeanStatsSeries = $derived((statisticsSeries?.mean && statisticsSeries.mean.length != 0));
// const resampleTrigger = $derived(resampleConfig?.trigger ? Number(resampleConfig.trigger) : null);
// const resampleResolutions = $derived(resampleConfig?.resolutions ? [...resampleConfig.resolutions] : null);
// const resampleMinimum = $derived(resampleConfig?.resolutions ? Math.min(...resampleConfig.resolutions) : null);
// const thresholds = $derived(findJobAggregationThresholds(
// subClusterTopology,
// metricConfig,
// scope,
// numhwthreads,
// numaccs
// ));
const longestSeries = $derived.by(() => {
// if (useStatsSeries) {
// return usesMeanStatsSeries ? statisticsSeries?.mean?.length : statisticsSeries?.median?.length;
// } else {
return metricData.reduce((n, m) => Math.max(n, m.data.length), 0);
// }
});
const maxX = $derived(longestSeries * timestep);
// const maxY = $derived.by(() => {
// let pendingY = 0;
// // if (useStatsSeries) {
// // pendingY = statisticsSeries.max.reduce(
// // (max, x) => Math.max(max, x),
// // thresholds?.normal,
// // ) || thresholds?.normal
// // } else {
// pendingY = series.reduce(
// (max, series) => Math.max(max, series?.statistics?.max),
// thresholds?.normal,
// ) || thresholds?.normal;
// // }
// if (pendingY >= 10 * thresholds.peak) {
// // Hard y-range render limit if outliers in series data
// return (10 * thresholds.peak);
// } else {
// return pendingY;
// }
// });
// const plotBands = $derived.by(() => {
// if (useStatsSeries) {
// return [
// { series: [2, 3], fill: cbmode ? "rgba(0,0,255,0.1)" : "rgba(0,255,0,0.1)" },
// { series: [3, 1], fill: cbmode ? "rgba(0,255,0,0.1)" : "rgba(255,0,0,0.1)" },
// ];
// };
// return null;
// })
const plotData = $derived.by(() => {
let pendingData = [new Array(longestSeries)];
// X
if (forNode === true) {
// Negative Timestamp Buildup
for (let i = 0; i <= longestSeries; i++) {
pendingData[0][i] = (longestSeries - i) * timestep * -1;
}
} else {
// Positive Timestamp Buildup
for (let j = 0; j < longestSeries; j++) {
pendingData[0][j] = j * timestep;
};
};
// Y
// if (useStatsSeries) {
// pendingData.push(statisticsSeries.min);
// pendingData.push(statisticsSeries.max);
// if (usesMeanStatsSeries) {
// pendingData.push(statisticsSeries.mean);
// } else {
// pendingData.push(statisticsSeries.median);
// }
// } else {
for (let i = 0; i < metricData.length; i++) {
pendingData.push(metricData[i]?.data);
};
// };
return pendingData;
})
const plotSeries = $derived.by(() => {
let pendingSeries = [
// Note: X-Legend Will not be shown as soon as Y-Axis are in extendedMode
{
label: "Runtime",
value: (u, ts, sidx, didx) =>
(didx == null) ? null : formatDurationTime(ts, forNode),
}
];
// Y
// if (useStatsSeries) {
// pendingSeries.push({
// label: "min",
// scale: "y",
// width: lineWidth,
// stroke: cbmode ? "rgb(0,255,0)" : "red",
// });
// pendingSeries.push({
// label: "max",
// scale: "y",
// width: lineWidth,
// stroke: cbmode ? "rgb(0,0,255)" : "green",
// });
// pendingSeries.push({
// label: usesMeanStatsSeries ? "mean" : "median",
// scale: "y",
// width: lineWidth,
// stroke: "black",
// });
// } else {
for (let i = 0; i < metricData.length; i++) {
// Default
// if (!extendedLegendData) {
pendingSeries.push({
label: `${metricData[i]?.name} (${metricData[i]?.unit?.prefix}${metricData[i]?.unit?.base})`,
scale: `y${i+1}`,
width: lineWidth,
stroke: lineColor(i, metricData.length),
});
// }
// Extended Legend For NodeList
// else {
// pendingSeries.push({
// label:
// scope === "node"
// ? series[i].hostname
// : scope === "accelerator"
// ? 'Acc #' + (i + 1) // series[i].id.slice(9, 14) | Too Hardware Specific
// : scope + " #" + (i + 1),
// scale: "y",
// width: lineWidth,
// stroke: lineColor(i, series?.length),
// values: (u, sidx, idx) => {
// // "i" = "sidx - 1" : sidx contains x-axis-data
// if (idx == null)
// return {
// time: '-',
// value: '-',
// user: '-',
// job: '-'
// };
// if (series[i].id in extendedLegendData) {
// return {
// time: formatDurationTime(plotData[0][idx], forNode),
// value: plotData[sidx][idx],
// user: extendedLegendData[series[i].id].user,
// job: extendedLegendData[series[i].id].job,
// };
// } else {
// return {
// time: formatDurationTime(plotData[0][idx], forNode),
// value: plotData[sidx][idx],
// user: '-',
// job: '-',
// };
// }
// }
// });
// }
// };
};
return pendingSeries;
})
/* Effects */
// $effect(() => {
// if (!useStatsSeries && statisticsSeries != null) useStatsSeries = true;
// })
// This updates plot on all size changes if wrapper (== data) exists
$effect(() => {
if (plotWrapper) {
onSizeChange(width, height);
}
});
/* Functions */
function timeIncrs(timestep, maxX, forNode) {
if (forNode === true) {
return [60, 120, 240, 300, 360, 480, 600, 900, 1800, 3600, 7200, 14400, 21600]; // forNode fixed increments
} else {
let incrs = [];
for (let t = timestep; t < maxX; t *= 10)
incrs.push(t, t * 2, t * 3, t * 5);
return incrs;
}
}
// removed arg "subcluster": input metricconfig and topology now directly derived from subcluster
// function findJobAggregationThresholds(
// subClusterTopology,
// metricConfig,
// scope,
// numhwthreads,
// numaccs
// ) {
// if (!subClusterTopology || !metricConfig || !scope) {
// console.warn("Argument missing for findJobAggregationThresholds!");
// return null;
// }
// // handle special *-stat scopes
// if (scope.match(/(.*)-stat$/)) {
// const statParts = scope.split('-');
// scope = statParts[0]
// }
// if (metricConfig?.aggregation == "avg") {
// // Return as Configured
// return {
// normal: metricConfig.normal,
// caution: metricConfig.caution,
// alert: metricConfig.alert,
// peak: metricConfig.peak,
// };
// }
// if (metricConfig?.aggregation == "sum") {
// // Scale Thresholds
// let fraction;
// if (numaccs > 0) fraction = subClusterTopology.accelerators.length / numaccs;
// else if (numhwthreads > 0) fraction = subClusterTopology.core.length / numhwthreads;
// else fraction = 1; // Fallback
// let divisor;
// // Exclusive: Fraction = 1; Shared: Fraction > 1
// if (scope == 'node') divisor = fraction;
// // Cap divisor at number of available sockets or domains
// else if (scope == 'socket') divisor = (fraction < subClusterTopology.socket.length) ? subClusterTopology.socket.length : fraction;
// else if (scope == "memoryDomain") divisor = (fraction < subClusterTopology.memoryDomain.length) ? subClusterTopology.socket.length : fraction;
// // Use Maximum Division for Smallest Scopes
// else if (scope == "core") divisor = subClusterTopology.core.length;
// else if (scope == "hwthread") divisor = subClusterTopology.core.length; // alt. name for core
// else if (scope == "accelerator") divisor = subClusterTopology.accelerators.length;
// else {
// console.log('Unknown scope, return default aggregation thresholds for sum', scope)
// divisor = 1;
// }
// return {
// peak: metricConfig.peak / divisor,
// normal: metricConfig.normal / divisor,
// caution: metricConfig.caution / divisor,
// alert: metricConfig.alert / divisor,
// };
// }
// console.warn(
// "Missing or unkown aggregation mode (sum/avg) for metric:",
// metricConfig,
// );
// return null;
// }
// UPLOT PLUGIN // converts the legend into a simple tooltip
function legendAsTooltipPlugin({
className,
style = { backgroundColor: "rgba(255, 249, 196, 0.92)", color: "black" },
} = {}) {
let legendEl;
const dataSize = metricData.length;
function init(u, opts) {
legendEl = u.root.querySelector(".u-legend");
legendEl.classList.remove("u-inline");
className && legendEl.classList.add(className);
uPlot.assign(legendEl.style, {
minWidth: "100px",
textAlign: "left",
pointerEvents: "none",
display: "none",
position: "absolute",
left: 0,
top: 0,
zIndex: 100,
boxShadow: "2px 2px 10px rgba(0,0,0,0.5)",
...style,
});
// conditional hide series color markers:
if (
// useStatsSeries || // Min/Max/Median Self-Explanatory
dataSize === 1 || // Only one Y-Dataseries
dataSize > 8 // More than 8 Y-Dataseries
) {
const idents = legendEl.querySelectorAll(".u-marker");
for (let i = 0; i < idents.length; i++)
idents[i].style.display = "none";
}
const overEl = u.over;
overEl.style.overflow = "visible";
// move legend into plot bounds
overEl.appendChild(legendEl);
// show/hide tooltip on enter/exit
overEl.addEventListener("mouseenter", () => {
legendEl.style.display = null;
});
overEl.addEventListener("mouseleave", () => {
legendEl.style.display = "none";
});
// let tooltip exit plot
// overEl.style.overflow = "visible";
}
function update(u) {
const { left, top } = u.cursor;
const internalWidth = u?.over?.querySelector(".u-legend")?.offsetWidth ? u.over.querySelector(".u-legend").offsetWidth : 0;
if (enableFlip && (left < (width/2))) {
legendEl.style.transform = "translate(" + (left + 15) + "px, " + (top + 15) + "px)";
} else {
legendEl.style.transform = "translate(" + (left - internalWidth - 15) + "px, " + (top + 15) + "px)";
}
}
if (dataSize <= 12 ) { // || useStatsSeries) {
return {
hooks: {
init: init,
setCursor: update,
},
};
} else {
// Setting legend-opts show/live as object with false here will not work ...
return {};
}
}
// RETURN BG COLOR FROM THRESHOLD
// function backgroundColor() {
// if (
// clusterCockpitConfig.plotConfiguration_colorBackground == false ||
// // !thresholds ||
// !(series && series.every((s) => s.statistics != null))
// )
// return backgroundColors.normal;
// let cond =
// thresholds.alert < thresholds.caution
// ? (a, b) => a <= b
// : (a, b) => a >= b;
// let avg =
// series.reduce((sum, series) => sum + series.statistics.avg, 0) /
// series.length;
// if (Number.isNaN(avg)) return backgroundColors.normal;
// if (cond(avg, thresholds.alert)) return backgroundColors.alert;
// if (cond(avg, thresholds.caution)) return backgroundColors.caution;
// return backgroundColors.normal;
// }
function lineColor(i, n) {
if (n && n >= lineColors.length) return lineColors[i % lineColors.length];
else return lineColors[Math.floor((i / n) * lineColors.length)];
}
function render(ren_width, ren_height) {
// Set Options
const opts = {
width,
height,
plugins: [legendAsTooltipPlugin()],
series: plotSeries,
axes: [
{
scale: "x",
space: 35,
incrs: timeIncrs(timestep, maxX, forNode),
label: "Time",
values: (_, vals) => vals.map((v) => formatDurationTime(v, forNode)),
},
{
scale: "y1",
grid: { show: true },
label: `${metricData[0]?.name} (${metricData[0]?.unit?.prefix}${metricData[0]?.unit?.base})`,
values: (u, vals) => vals.map((v) => formatNumber(v)),
},
{
side: 1,
scale: "y2",
grid: { show: false },
label: `${metricData[1]?.name} (${metricData[1]?.unit?.prefix}${metricData[1]?.unit?.base})`,
values: (u, vals) => vals.map((v) => formatNumber(v)),
},
],
// bands: plotBands,
padding: [5, 10, -20, 0],
hooks: {
// init: [
// (u) => {
// /* IF Zoom Enabled */
// if (resampleConfig && !forNode) {
// u.over.addEventListener("dblclick", (e) => {
// // console.log('Dispatch: Zoom Reset')
// onZoom({
// lastZoomState: {
// x: { time: false },
// y: { auto: true }
// }
// });
// });
// };
// },
// ],
draw: [
(u) => {
// Draw plot type label:
let textl = `Cluster ${cluster}`
// let textl = `${scope}${plotSeries.length > 2 ? "s" : ""}${
// useStatsSeries
// ? (usesMeanStatsSeries ? ": min/mean/max" : ": min/median/max")
// : metricConfig != null && scope != metricConfig.scope
// ? ` (${metricConfig.aggregation})`
// : ""
// }`;
let textr = `Sums of ${numNodes} nodes`
//let textr = `${isShared && scope != "core" && scope != "accelerator" ? "[Shared]" : ""}`;
u.ctx.save();
u.ctx.textAlign = "start"; // 'end'
u.ctx.fillStyle = "black";
u.ctx.fillText(textl, u.bbox.left + 10, u.bbox.top + (forNode ? 0 : 10));
u.ctx.textAlign = "end";
u.ctx.fillStyle = "black";
u.ctx.fillText(
textr,
u.bbox.left + u.bbox.width - 10,
u.bbox.top + (forNode ? 0 : 10),
);
// u.ctx.fillText(text, u.bbox.left + u.bbox.width - 10, u.bbox.top + u.bbox.height - 10) // Recipe for bottom right
// if (!thresholds) {
u.ctx.restore();
return;
// }
// let y = u.valToPos(thresholds.normal, "y", true);
// u.ctx.save();
// u.ctx.lineWidth = lineWidth;
// u.ctx.strokeStyle = normalLineColor;
// u.ctx.setLineDash([5, 5]);
// u.ctx.beginPath();
// u.ctx.moveTo(u.bbox.left, y);
// u.ctx.lineTo(u.bbox.left + u.bbox.width, y);
// u.ctx.stroke();
// u.ctx.restore();
},
],
// setScale: [
// (u, key) => { // If ZoomResample is Configured && Not System/Node View
// if (resampleConfig && !forNode && key === 'x') {
// const numX = (u.series[0].idxs[1] - u.series[0].idxs[0])
// if (numX <= resampleTrigger && timestep !== resampleMinimum) {
// /* Get closest zoom level; prevents multiple iterative zoom requests for big zoom-steps (e.g. 600 -> 300 -> 120 -> 60) */
// // Which resolution to theoretically request to achieve 30 or more visible data points:
// const target = (numX * timestep) / resampleTrigger
// // Which configured resolution actually matches the closest to theoretical target:
// const closest = resampleResolutions.reduce(function(prev, curr) {
// return (Math.abs(curr - target) < Math.abs(prev - target) ? curr : prev);
// });
// // Prevents non-required dispatches
// if (timestep !== closest) {
// // console.log('Dispatch: Zoom with Res from / to', timestep, closest)
// onZoom({
// newRes: closest,
// lastZoomState: u?.scales,
// lastThreshold: thresholds?.normal
// });
// }
// } else {
// // console.log('Dispatch: Zoom Update States')
// onZoom({
// lastZoomState: u?.scales,
// lastThreshold: thresholds?.normal
// });
// };
// };
// },
// ]
},
scales: {
x: { time: false },
y1: { auto: true },
y1: { auto: true },
},
legend: {
// Display legend until max 12 Y-dataseries
show: true, // metricData.length <= 12 || useStatsSeries,
live: true // But This Plot always for 2 Data-Series
},
cursor: {
drag: { x: true, y: true },
}
};
// Handle Render
if (!uplot) {
opts.width = ren_width;
opts.height = ren_height;
// if (plotSync) {
// opts.cursor.sync = {
// key: plotSync.key,
// scales: ["x", null],
// }
// }
// if (zoomState && metricConfig?.aggregation == "avg") {
// opts.scales = {...zoomState}
// } else if (zoomState && metricConfig?.aggregation == "sum") {
// // Allow Zoom In === Ymin changed
// if (zoomState.y.min !== 0) { // scope change?: only use zoomState if thresholds match
// if ((thresholdState === thresholds?.normal)) { opts.scales = {...zoomState} };
// } // else: reset scaling to default
// }
uplot = new uPlot(opts, plotData, plotWrapper);
} else {
uplot.setSize({ width: ren_width, height: ren_height });
}
}
function onSizeChange(chg_width, chg_height) {
if (!uplot) return;
if (timeoutId != null) clearTimeout(timeoutId);
timeoutId = setTimeout(() => {
timeoutId = null;
render(chg_width, chg_height);
}, renderSleepTime);
}
/* On Mount */
onMount(() => {
if (plotWrapper) {
render(width, height);
}
});
/* On Destroy */
onDestroy(() => {
if (timeoutId != null) clearTimeout(timeoutId);
if (uplot) uplot.destroy();
});
</script>
<!-- Define $width Wrapper and NoData Card -->
{#if metricData[0]?.data && metricData[0]?.data?.length > 0}
<div bind:this={plotWrapper} bind:clientWidth={width}
class={forNode ? 'py-2 rounded' : 'rounded'}
></div>
{:else}
<Card body color="warning" class="mx-4"
>Cannot render plot: No series data returned for <code>{cluster}</code></Card
>
{/if}

View File

@@ -36,6 +36,7 @@
subCluster = null,
allowSizeChange = false,
useColors = true,
useLegend = true,
width = 600,
height = 380,
} = $props();
@@ -534,7 +535,7 @@
width: width,
height: height,
legend: {
show: true,
show: useLegend,
},
cursor: {
dataIdx: (u, seriesIdx) => {

View File

@@ -156,7 +156,7 @@
{
scale: "y",
grid: { show: true },
labelFont: "sans-serif",
// labelFont: "sans-serif",
label: ylabel + (yunit ? ` (${yunit})` : ''),
// values: (u, vals) => vals.map((v) => formatNumber(v)),
},

View File

@@ -64,6 +64,34 @@
{/each}
</DropdownMenu>
</Dropdown>
{:else if item.title === 'Status'}
<Dropdown nav inNavbar {direction}>
<DropdownToggle nav caret>
<Icon name={item.icon} />
{item.title}
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
{#each clusters as cluster}
<Dropdown nav direction="right">
<DropdownToggle nav caret class="dropdown-item py-1 px-2">
{cluster.name}
</DropdownToggle>
<DropdownMenu>
<DropdownItem class="py-1 px-2"
href={item.href + cluster.name}
>
Status Dashboard
</DropdownItem>
<DropdownItem class="py-1 px-2"
href={item.href + 'detail/' + cluster.name}
>
Status Details
</DropdownItem>
</DropdownMenu>
</Dropdown>
{/each}
</DropdownMenu>
</Dropdown>
{:else}
<Dropdown nav inNavbar {direction}>
<DropdownToggle nav caret>

View File

@@ -6,6 +6,7 @@ mount(Status, {
target: document.getElementById('svelte-app'),
props: {
presetCluster: infos.cluster,
displayType: displayType,
},
context: new Map([
['cc-config', clusterCockpitConfig]

View File

@@ -0,0 +1,82 @@
<!--
@component Main cluster status view component; renders current system-usage information
Properties:
- `presetCluster String`: The cluster to show status information for
-->
<script>
import {
getContext
} from "svelte"
import {
init,
} from "../generic/utils.js";
import {
Row,
Col,
Card,
CardBody,
TabContent,
TabPane,
Spinner
} from "@sveltestrap/sveltestrap";
import StatusDash from "./dashdetails/StatusDash.svelte";
import UsageDash from "./dashdetails/UsageDash.svelte";
import StatisticsDash from "./dashdetails/StatisticsDash.svelte";
/* Svelte 5 Props */
let {
presetCluster,
} = $props();
/*Const Init */
const { query: initq } = init();
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
</script>
<!-- Loading indicator & Refresh -->
<Row cols={1} class="mb-2">
<Col>
<h3 class="mb-0">Current Status of Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</h3>
</Col>
</Row>
{#if $initq.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $initq.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">{$initq.error.message}</Card>
</Col>
</Row>
{:else}
<Card class="overflow-auto" style="height: auto;">
<TabContent>
<TabPane tabId="status-dash" tab="Status" active>
<CardBody>
<StatusDash clusters={$initq.data.clusters} {presetCluster} {useCbColors} useAltColors></StatusDash>
</CardBody>
</TabPane>
<TabPane tabId="usage-dash" tab="Usage">
<CardBody>
<UsageDash {presetCluster} {useCbColors}></UsageDash>
</CardBody>
</TabPane>
<TabPane tabId="metric-dash" tab="Statistics">
<CardBody>
<StatisticsDash {presetCluster} {useCbColors}></StatisticsDash>
</CardBody>
</TabPane>
</TabContent>
</Card>
{/if}

View File

@@ -0,0 +1,605 @@
<!--
@component Main cluster status view component; renders current system-usage information
Properties:
- `presetCluster String`: The cluster to show status information for
-->
<script>
import {
getContext
} from "svelte"
import {
queryStore,
gql,
getContextClient,
} from "@urql/svelte";
import {
init,
scramble,
scrambleNames,
convert2uplot
} from "../generic/utils.js";
import {
formatDurationTime,
formatNumber,
} from "../generic/units.js";
import {
Row,
Col,
Card,
CardTitle,
CardHeader,
CardBody,
Spinner,
Table,
Progress,
Icon,
} from "@sveltestrap/sveltestrap";
import Roofline from "../generic/plots/Roofline.svelte";
import Pie, { colors } from "../generic/plots/Pie.svelte";
import Stacked from "../generic/plots/Stacked.svelte";
import Histogram from "../generic/plots/Histogram.svelte";
/* Svelte 5 Props */
let {
presetCluster,
} = $props();
/*Const Init */
const { query: initq } = init();
const client = getContextClient();
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
/* States */
let pagingState = $state({page: 1, itemsPerPage: 10}) // Top 10
let from = $state(new Date(Date.now() - 5 * 60 * 1000));
let to = $state(new Date(Date.now()));
let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400);
let colWidthJobs = $state(0);
let colWidthRoof = $state(0);
let colWidthStacked1 = $state(0);
let colWidthStacked2 = $state(0);
/* Derived */
// States for Stacked charts
const statesTimed = $derived(queryStore({
client: client,
query: gql`
query ($filter: [NodeFilter!], $typeNode: String!, $typeHealth: String!) {
nodeStates: nodeStatesTimed(filter: $filter, type: $typeNode) {
state
counts
times
}
healthStates: nodeStatesTimed(filter: $filter, type: $typeHealth) {
state
counts
times
}
}
`,
variables: {
filter: { cluster: { eq: presetCluster }, timeStart: 1760096999}, // DEBUG VALUE, use StackedFrom
typeNode: "node",
typeHealth: "health"
},
requestPolicy: "network-only"
}));
// Note: nodeMetrics are requested on configured $timestep resolution
// Result: The latest 5 minutes (datapoints) for each node independent of job
const statusQuery = $derived(queryStore({
client: client,
query: gql`
query (
$cluster: String!
$metrics: [String!]
$from: Time!
$to: Time!
$jobFilter: [JobFilter!]!
$paging: PageRequest!
$sorting: OrderByInput!
) {
# Node 5 Minute Averages for Roofline
nodeMetrics(
cluster: $cluster
metrics: $metrics
from: $from
to: $to
) {
host
subCluster
metrics {
name
metric {
series {
statistics {
avg
}
}
}
}
}
# Running Job Metric Average for Rooflines
jobsMetricStats(filter: $jobFilter, metrics: $metrics) {
id
jobId
duration
numNodes
numAccelerators
subCluster
stats {
name
data {
avg
}
}
}
# Get Jobs for Per-Node Counts
jobs(filter: $jobFilter, order: $sorting, page: $paging) {
items {
jobId
resources {
hostname
}
}
count
}
# Only counts shared nodes once
allocatedNodes(cluster: $cluster) {
name
count
}
# totalNodes includes multiples if shared jobs: Info-Card Data
jobsStatistics(
filter: $jobFilter
page: $paging
sortBy: TOTALJOBS
groupBy: SUBCLUSTER
) {
id
totalJobs
totalUsers
totalCores
totalAccs
}
}
`,
variables: {
cluster: presetCluster,
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
from: from.toISOString(),
to: to.toISOString(),
jobFilter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
paging: { itemsPerPage: -1, page: 1 }, // Get all: -1
sorting: { field: "startTime", type: "col", order: "DESC" }
},
requestPolicy: "network-only"
}));
const topJobsQuery = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [JobFilter!]!
$paging: PageRequest!
) {
jobsStatistics(
filter: $filter
page: $paging
sortBy: TOTALJOBS
groupBy: PROJECT
) {
id
totalJobs
}
}
`,
variables: {
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster} }],
paging: pagingState // Top 10
},
requestPolicy: "network-only"
}));
// Note: nodeMetrics are requested on configured $timestep resolution
const nodeStatusQuery = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [JobFilter!]!
$selectedHistograms: [String!]
$numDurationBins: String
) {
jobsStatistics(filter: $filter, metrics: $selectedHistograms, numDurationBins: $numDurationBins) {
histNumCores {
count
value
}
histNumAccs {
count
value
}
}
}
`,
variables: {
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
selectedHistograms: [], // No Metrics requested for node hardware stats - Empty Array can be used for refresh
numDurationBins: "1h", // Hardcode or selector?
},
requestPolicy: "network-only"
}));
const clusterInfo = $derived.by(() => {
if ($initq?.data?.clusters) {
let rawInfos = {};
let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || [];
for (let subCluster of subClusters) {
// Allocations
if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
// Infos
if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]);
else rawInfos['processorTypes'].add(subCluster.processorType);
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0;
else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0;
if (!rawInfos['totalCores']) rawInfos['totalCores'] = (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
else rawInfos['totalCores'] += (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
if (!rawInfos['totalAccs']) rawInfos['totalAccs'] = (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
else rawInfos['totalAccs'] += (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
// Units (Set Once)
if (!rawInfos['flopRateUnit']) rawInfos['flopRateUnit'] = subCluster.flopRateSimd.unit.prefix + subCluster.flopRateSimd.unit.base
if (!rawInfos['memBwRateUnit']) rawInfos['memBwRateUnit'] = subCluster.memoryBandwidth.unit.prefix + subCluster.memoryBandwidth.unit.base
// Get Maxima For Roofline Knee Render
if (!rawInfos['roofData']) {
rawInfos['roofData'] = {
flopRateScalar: {value: subCluster.flopRateScalar.value},
flopRateSimd: {value: subCluster.flopRateSimd.value},
memoryBandwidth: {value: subCluster.memoryBandwidth.value}
};
} else {
rawInfos['roofData']['flopRateScalar']['value'] = Math.max(rawInfos['roofData']['flopRateScalar']['value'], subCluster.flopRateScalar.value)
rawInfos['roofData']['flopRateSimd']['value'] = Math.max(rawInfos['roofData']['flopRateSimd']['value'], subCluster.flopRateSimd.value)
rawInfos['roofData']['memoryBandwidth']['value'] = Math.max(rawInfos['roofData']['memoryBandwidth']['value'], subCluster.memoryBandwidth.value)
}
}
// Keymetrics (Data on Cluster-Scope)
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
sum + (node.metrics.find((m) => m.name == 'flops_any')?.metric?.series[0]?.statistics?.avg || 0),
0, // Initial Value
) || 0;
rawInfos['flopRate'] = Math.floor((rawFlops * 100) / 100)
let rawMemBw = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
sum + (node.metrics.find((m) => m.name == 'mem_bw')?.metric?.series[0]?.statistics?.avg || 0),
0, // Initial Value
) || 0;
rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100)
return rawInfos
} else {
return {};
}
});
/* Functions */
function legendColors(targetIdx) {
// Reuses first color if targetIdx overflows
let c;
if (useCbColors) {
c = [...colors['colorblind']];
// } else if (useAltColors) {
// c = [...colors['alternative']];
} else {
c = [...colors['default']];
}
return c[(c.length + targetIdx) % c.length];
}
function transformJobsStatsToData(clusterData) {
/* c will contain values from 0 to 1 representing the duration */
let data = null
const x = [], y = [], c = [], day = 86400.0
if (clusterData) {
for (let i = 0; i < clusterData.length; i++) {
const flopsData = clusterData[i].stats.find((s) => s.name == "flops_any")
const memBwData = clusterData[i].stats.find((s) => s.name == "mem_bw")
const f = flopsData.data.avg
const m = memBwData.data.avg
const d = clusterData[i].duration / day
const intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity))
continue
x.push(intensity)
y.push(f)
// Long Jobs > 1 Day: Use max Color
if (d > 1.0) c.push(1.0)
else c.push(d)
}
} else {
console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0 && c.length > 0) {
data = [null, [x, y], c] // for dataformat see roofline.svelte
}
return data
}
function transformJobsStatsToInfo(clusterData) {
if (clusterData) {
return clusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0, duration: formatDurationTime(sc.duration)} })
} else {
console.warn("transformJobsStatsToInfo: jobInfo missing!")
return []
}
}
/* Inspect */
$inspect(clusterInfo).with((type, clusterInfo) => {
console.log(type, 'clusterInfo', clusterInfo)
});
</script>
<Card>
<CardHeader class="text-center">
<h3 class="mb-0">{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)} Dashboard</h3>
</CardHeader>
<CardBody>
{#if $statusQuery.fetching || $statesTimed.fetching || $topJobsQuery.fetching || $nodeStatusQuery.fetching}
<Row class="justify-content-center">
<Col xs="auto">
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error || $statesTimed.error || $topJobsQuery.error || $nodeStatusQuery.error}
<Row cols={{xs:1, md:2}}>
{#if $statusQuery.error}
<Col>
<Card color="danger">Error Requesting StatusQuery: {$statusQuery.error.message}</Card>
</Col>
{/if}
{#if $statesTimed.error}
<Col>
<Card color="danger">Error Requesting StatesTimed: {$statesTimed.error.message}</Card>
</Col>
{/if}
{#if $topJobsQuery.error}
<Col>
<Card color="danger">Error Requesting TopJobsQuery: {$topJobsQuery.error.message}</Card>
</Col>
{/if}
{#if $nodeStatusQuery.error}
<Col>
<Card color="danger">Error Requesting NodeStatusQuery: {$nodeStatusQuery.error.message}</Card>
</Col>
{/if}
</Row>
{:else}
<Row cols={{xs:1, md:2, xl: 3}}>
<Col> <!-- Info Card -->
<Card class="h-auto mt-1">
<CardHeader>
<CardTitle class="mb-0">Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</CardTitle>
<span>{[...clusterInfo?.processorTypes].toString()}</span>
</CardHeader>
<CardBody>
<Table borderless>
<tr class="py-2">
<td style="font-size:x-large;">{clusterInfo?.runningJobs} Running Jobs</td>
<td colspan="2" style="font-size:x-large;">{clusterInfo?.activeUsers} Active Users</td>
</tr>
<hr class="my-1"/>
<tr class="pt-2">
<td style="font-size: large;">
Flop Rate (<span style="cursor: help;" title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]">Any</span>)
</td>
<td colspan="2" style="font-size: large;">
Memory BW Rate
</td>
</tr>
<tr class="pb-2">
<td style="font-size:x-large;">
{clusterInfo?.flopRate}
{clusterInfo?.flopRateUnit}
</td>
<td colspan="2" style="font-size:x-large;">
{clusterInfo?.memBwRate}
{clusterInfo?.memBwRateUnit}
</td>
</tr>
<hr class="my-1"/>
<tr class="py-2">
<th scope="col">Allocated Nodes</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedNodes}
max={clusterInfo?.totalNodes}
/>
</div></td
>
<td
>{clusterInfo?.allocatedNodes} / {clusterInfo?.totalNodes}
Nodes</td
>
</tr>
<tr class="py-2">
<th scope="col">Allocated Cores</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedCores}
max={clusterInfo?.totalCores}
/>
</div></td
>
<td
>{formatNumber(clusterInfo?.allocatedCores)} / {formatNumber(clusterInfo?.totalCores)}
Cores</td
>
</tr>
{#if clusterInfo?.totalAccs !== 0}
<tr class="py-2">
<th scope="col">Allocated Accelerators</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedAccs}
max={clusterInfo?.totalAccs}
/>
</div></td
>
<td
>{clusterInfo?.allocatedAccs} / {clusterInfo?.totalAccs}
Accelerators</td
>
</tr>
{/if}
</Table>
</CardBody>
</Card>
</Col>
<Col> <!-- Pie Jobs -->
<Row cols={{xs:1, md:2}}>
<Col class="p-2">
<div bind:clientWidth={colWidthJobs}>
<h4 class="text-center">
Top Projects: Jobs
</h4>
<Pie
{useCbColors}
canvasId="hpcpie-jobs-projects"
size={colWidthJobs * 0.75}
sliceLabel={'Jobs'}
quantities={$topJobsQuery.data.jobsStatistics.map(
(tp) => tp['totalJobs'],
)}
entities={$topJobsQuery.data.jobsStatistics.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)}
/>
</div>
</Col>
<Col class="p-2">
<Table>
<tr class="mb-2">
<th></th>
<th style="padding-left: 0.5rem;">Project</th>
<th>Jobs</th>
</tr>
{#each $topJobsQuery.data.jobsStatistics as tp, i}
<tr>
<td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td>
<td>
<a target="_blank" href="/monitoring/jobs/?cluster={presetCluster}&state=running&project={tp.id}&projectMatch=eq"
>{scrambleNames ? scramble(tp.id) : tp.id}
</a>
</td>
<td>{tp['totalJobs']}</td>
</tr>
{/each}
</Table>
</Col>
</Row>
</Col>
<Col> <!-- Job Roofline -->
<div bind:clientWidth={colWidthRoof}>
{#key $statusQuery?.data?.jobsMetricStats}
<Roofline
useColors={true}
allowSizeChange
width={colWidthRoof - 10}
height={300}
subCluster={clusterInfo?.roofData ? clusterInfo.roofData : null}
roofData={transformJobsStatsToData($statusQuery?.data?.jobsMetricStats)}
jobsData={transformJobsStatsToInfo($statusQuery?.data?.jobsMetricStats)}
/>
{/key}
</div>
</Col>
<Col> <!-- Resources/Job Histogram -->
{#if clusterInfo?.totalAccs == 0}
<Histogram
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumCores)}
title="Number of Cores Distribution"
xlabel="Allocated Cores"
xunit="Nodes"
ylabel="Number of Jobs"
yunit="Jobs"
height="275"
enableFlip
/>
{:else}
<Histogram
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumAccs)}
title="Number of Accelerators Distribution"
xlabel="Allocated Accs"
xunit="Accs"
ylabel="Number of Jobs"
yunit="Jobs"
height="275"
enableFlip
/>
{/if}
</Col>
<Col> <!-- Stacked SchedState -->
<div bind:clientWidth={colWidthStacked1}>
{#key $statesTimed?.data?.nodeStates}
<Stacked
data={$statesTimed?.data?.nodeStates}
width={colWidthStacked1 * 0.95}
xlabel="Time"
ylabel="Nodes"
yunit = "#Count"
title = "Node States"
stateType = "Node"
/>
{/key}
</div>
</Col>
<Col> <!-- Stacked Healthstate -->
<div bind:clientWidth={colWidthStacked2}>
{#key $statesTimed?.data?.healthStates}
<Stacked
data={$statesTimed?.data?.healthStates}
width={colWidthStacked2 * 0.95}
xlabel="Time"
ylabel="Nodes"
yunit = "#Count"
title = "Health States"
stateType = "Health"
/>
{/key}
</div>
</Col>
</Row>
{/if}
</CardBody>
</Card>

View File

@@ -22,11 +22,11 @@
} from "@urql/svelte";
import {
convert2uplot,
} from "../generic/utils.js";
import PlotGrid from "../generic/PlotGrid.svelte";
import Histogram from "../generic/plots/Histogram.svelte";
import HistogramSelection from "../generic/select/HistogramSelection.svelte";
import Refresher from "../generic/helper/Refresher.svelte";
} from "../../generic/utils.js";
import PlotGrid from "../../generic/PlotGrid.svelte";
import Histogram from "../../generic/plots/Histogram.svelte";
import HistogramSelection from "../../generic/select/HistogramSelection.svelte";
import Refresher from "../../generic/helper/Refresher.svelte";
/* Svelte 5 Props */
let {

View File

@@ -22,12 +22,12 @@
gql,
getContextClient,
} from "@urql/svelte";
import { formatDurationTime } from "../generic/units.js";
import Refresher from "../generic/helper/Refresher.svelte";
import TimeSelection from "../generic/select/TimeSelection.svelte";
import Roofline from "../generic/plots/Roofline.svelte";
import Pie, { colors } from "../generic/plots/Pie.svelte";
import Stacked from "../generic/plots/Stacked.svelte";
import { formatDurationTime } from "../../generic/units.js";
import Refresher from "../../generic/helper/Refresher.svelte";
import TimeSelection from "../../generic/select/TimeSelection.svelte";
import Roofline from "../../generic/plots/Roofline.svelte";
import Pie, { colors } from "../../generic/plots/Pie.svelte";
import Stacked from "../../generic/plots/Stacked.svelte";
/* Svelte 5 Props */
let {
@@ -83,7 +83,7 @@
}
`,
variables: {
filter: { cluster: { eq: cluster }, timeStart: stackedFrom},
filter: { cluster: { eq: cluster }, timeStart: 1760096999},
typeNode: "node",
typeHealth: "health"
},

View File

@@ -27,10 +27,10 @@
scramble,
scrambleNames,
convert2uplot,
} from "../generic/utils.js";
import Pie, { colors } from "../generic/plots/Pie.svelte";
import Histogram from "../generic/plots/Histogram.svelte";
import Refresher from "../generic/helper/Refresher.svelte";
} from "../../generic/utils.js";
import Pie, { colors } from "../../generic/plots/Pie.svelte";
import Histogram from "../../generic/plots/Histogram.svelte";
import Refresher from "../../generic/helper/Refresher.svelte";
/* Svelte 5 Props */
let {