mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-11-01 00:15:05 +01:00
Build new statusDash, refine newRoofline data render
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
CardBody,
|
||||
Table,
|
||||
Progress,
|
||||
Icon,
|
||||
// Icon,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import {
|
||||
queryStore,
|
||||
@@ -24,11 +24,11 @@
|
||||
} from "@urql/svelte";
|
||||
import {
|
||||
init,
|
||||
transformPerNodeDataForRoofline,
|
||||
// transformPerNodeDataForRoofline,
|
||||
|
||||
} from "../generic/utils.js";
|
||||
import { scaleNumbers } from "../generic/units.js";
|
||||
import Roofline from "../generic/plots/Roofline.svelte";
|
||||
import { scaleNumbers, formatTime } from "../generic/units.js";
|
||||
import NewBubbleRoofline from "../generic/plots/NewBubbleRoofline.svelte";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
@@ -68,9 +68,12 @@
|
||||
$metrics: [String!]
|
||||
$from: Time!
|
||||
$to: Time!
|
||||
$filter: [JobFilter!]!
|
||||
$jobFilter: [JobFilter!]!
|
||||
$nodeFilter: [NodeFilter!]!
|
||||
$paging: PageRequest!
|
||||
$sorting: OrderByInput!
|
||||
) {
|
||||
# Node 5 Minute Averages for Roofline
|
||||
nodeMetrics(
|
||||
cluster: $cluster
|
||||
metrics: $metrics
|
||||
@@ -81,27 +84,58 @@
|
||||
subCluster
|
||||
metrics {
|
||||
name
|
||||
scope
|
||||
metric {
|
||||
timestep
|
||||
unit {
|
||||
base
|
||||
prefix
|
||||
}
|
||||
series {
|
||||
data
|
||||
statistics {
|
||||
avg
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# Running Job Metric Average for Rooflines
|
||||
jobsMetricStats(filter: $jobFilter, metrics: $metrics) {
|
||||
id
|
||||
jobId
|
||||
duration
|
||||
numNodes
|
||||
numAccelerators
|
||||
subCluster
|
||||
stats {
|
||||
name
|
||||
data {
|
||||
avg
|
||||
}
|
||||
}
|
||||
}
|
||||
# Get Jobs for Per-Node Counts
|
||||
jobs(filter: $jobFilter, order: $sorting, page: $paging) {
|
||||
items {
|
||||
jobId
|
||||
resources {
|
||||
hostname
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
# Only counts shared nodes once
|
||||
allocatedNodes(cluster: $cluster) {
|
||||
name
|
||||
count
|
||||
}
|
||||
# Get States for Node Roofline; $sorting unused in backend: Use placeholder
|
||||
nodes(filter: $nodeFilter, order: $sorting) {
|
||||
count
|
||||
items {
|
||||
hostname
|
||||
cluster
|
||||
subCluster
|
||||
nodeState
|
||||
}
|
||||
}
|
||||
# totalNodes includes multiples if shared jobs
|
||||
jobsStatistics(
|
||||
filter: $filter
|
||||
filter: $jobFilter
|
||||
page: $paging
|
||||
sortBy: TOTALJOBS
|
||||
groupBy: SUBCLUSTER
|
||||
@@ -118,8 +152,10 @@
|
||||
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
|
||||
from: from.toISOString(),
|
||||
to: to.toISOString(),
|
||||
filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
|
||||
jobFilter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
|
||||
nodeFilter: { cluster: { eq: cluster }},
|
||||
paging: { itemsPerPage: -1, page: 1 }, // Get all: -1
|
||||
sorting: { field: "startTime", type: "col", order: "DESC" }
|
||||
},
|
||||
}));
|
||||
|
||||
@@ -170,6 +206,7 @@
|
||||
});
|
||||
|
||||
/* Const Functions */
|
||||
// New: Sum Up Node Averages
|
||||
const sumUp = (data, subcluster, metric) =>
|
||||
data.reduce(
|
||||
(sum, node) =>
|
||||
@@ -177,20 +214,132 @@
|
||||
? sum +
|
||||
(node.metrics
|
||||
.find((m) => m.name == metric)
|
||||
?.metric.series.reduce(
|
||||
(sum, series) => sum + series.data[series.data.length - 1],
|
||||
0,
|
||||
) || 0)
|
||||
?.metric?.series[0]?.statistics?.avg || 0
|
||||
)
|
||||
: sum,
|
||||
0,
|
||||
);
|
||||
|
||||
// Old: SumUp Metric Time Data
|
||||
// const sumUp = (data, subcluster, metric) =>
|
||||
// data.reduce(
|
||||
// (sum, node) =>
|
||||
// node.subCluster == subcluster
|
||||
// ? sum +
|
||||
// (node.metrics
|
||||
// .find((m) => m.name == metric)
|
||||
// ?.metric.series.reduce(
|
||||
// (sum, series) => sum + series.data[series.data.length - 1],
|
||||
// 0,
|
||||
// ) || 0)
|
||||
// : sum,
|
||||
// 0,
|
||||
// );
|
||||
|
||||
/* Functions */
|
||||
function transformJobsStatsToData(subclusterData) {
|
||||
/* c will contain values from 0 to 1 representing the duration */
|
||||
let data = null
|
||||
const x = [], y = [], c = [], day = 86400.0
|
||||
|
||||
if (subclusterData) {
|
||||
for (let i = 0; i < subclusterData.length; i++) {
|
||||
const flopsData = subclusterData[i].stats.find((s) => s.name == "flops_any")
|
||||
const memBwData = subclusterData[i].stats.find((s) => s.name == "mem_bw")
|
||||
|
||||
const f = flopsData.data.avg
|
||||
const m = memBwData.data.avg
|
||||
const d = subclusterData[i].duration / day
|
||||
|
||||
const intensity = f / m
|
||||
if (Number.isNaN(intensity) || !Number.isFinite(intensity))
|
||||
continue
|
||||
|
||||
x.push(intensity)
|
||||
y.push(f)
|
||||
// Long Jobs > 1 Day: Use max Color
|
||||
if (d > 1.0) c.push(1.0)
|
||||
else c.push(d)
|
||||
}
|
||||
} else {
|
||||
console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
|
||||
}
|
||||
|
||||
if (x.length > 0 && y.length > 0 && c.length > 0) {
|
||||
data = [null, [x, y], c] // for dataformat see roofline.svelte
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
function transformNodesStatsToData(subclusterData) {
|
||||
let data = null
|
||||
const x = [], y = []
|
||||
|
||||
if (subclusterData) {
|
||||
for (let i = 0; i < subclusterData.length; i++) {
|
||||
const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any")
|
||||
const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw")
|
||||
|
||||
const f = flopsData.metric.series[0].statistics.avg
|
||||
const m = memBwData.metric.series[0].statistics.avg
|
||||
|
||||
let intensity = f / m
|
||||
if (Number.isNaN(intensity) || !Number.isFinite(intensity)) {
|
||||
// continue // Old: Introduces mismatch between Data and Info Arrays
|
||||
intensity = 0.0 // New: Set to Float Zero: Will not show in Log-Plot (Always below render limit)
|
||||
}
|
||||
|
||||
x.push(intensity)
|
||||
y.push(f)
|
||||
}
|
||||
} else {
|
||||
// console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
|
||||
}
|
||||
|
||||
if (x.length > 0 && y.length > 0) {
|
||||
data = [null, [x, y]] // for dataformat see roofline.svelte
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
function transformJobsStatsToInfo(subclusterData) {
|
||||
if (subclusterData) {
|
||||
return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0, duration: formatTime(sc.duration)} })
|
||||
} else {
|
||||
console.warn("transformJobsStatsToInfo: jobInfo missing!")
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
function transformNodesStatsToInfo(subClusterData) {
|
||||
let result = [];
|
||||
if (subClusterData) { // && $nodesState?.data) {
|
||||
// Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API!
|
||||
for (let j = 0; j < subClusterData.length; j++) {
|
||||
// nodesCounts[subClusterData[i].subCluster] = $nodesState.data.nodes.count; // Probably better as own derived!
|
||||
|
||||
const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown"
|
||||
const nodeMatch = $statusQuery?.data?.nodes?.items?.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster);
|
||||
const nodeState = nodeMatch?.nodeState ? nodeMatch.nodeState : "notindb"
|
||||
let numJobs = 0
|
||||
|
||||
if ($statusQuery?.data) {
|
||||
const nodeJobs = $statusQuery?.data?.jobs?.items?.filter((job) => job.resources.find((res) => res.hostname == nodeName))
|
||||
numJobs = nodeJobs?.length ? nodeJobs.length : 0
|
||||
}
|
||||
|
||||
result.push({nodeName: nodeName, nodeState: nodeState, numJobs: numJobs})
|
||||
};
|
||||
};
|
||||
return result
|
||||
}
|
||||
|
||||
</script>
|
||||
|
||||
<!-- Gauges & Roofline per Subcluster-->
|
||||
{#if $initq.data && $statusQuery.data}
|
||||
{#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
|
||||
<Row cols={{ lg: 2, md: 1 , sm: 1}} class="mb-3 justify-content-center">
|
||||
<Row cols={{ lg: 3, md: 1 , sm: 1}} class="mb-3 justify-content-center">
|
||||
<Col class="px-3">
|
||||
<Card class="h-auto mt-1">
|
||||
<CardHeader>
|
||||
@@ -204,6 +353,25 @@
|
||||
<td colspan="2" style="font-size:x-large;">{activeUsers[subCluster.name]} Active Users</td>
|
||||
</tr>
|
||||
<hr class="my-1"/>
|
||||
<tr class="pt-2">
|
||||
<td style="font-size: large;">
|
||||
Flop Rate (<span style="cursor: help;" title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]">Any</span>)
|
||||
</td>
|
||||
<td colspan="2" style="font-size: large;">
|
||||
Memory BW Rate
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="pb-2">
|
||||
<td style="font-size:x-large;">
|
||||
{flopRate[subCluster.name]}
|
||||
{flopRateUnitPrefix[subCluster.name]}{flopRateUnitBase[subCluster.name]}
|
||||
</td>
|
||||
<td colspan="2" style="font-size:x-large;">
|
||||
{memBwRate[subCluster.name]}
|
||||
{memBwRateUnitPrefix[subCluster.name]}{memBwRateUnitBase[subCluster.name]}
|
||||
</td>
|
||||
</tr>
|
||||
<hr class="my-1"/>
|
||||
<tr class="py-2">
|
||||
<th scope="col">Allocated Nodes</th>
|
||||
<td style="min-width: 100px;"
|
||||
@@ -236,7 +404,8 @@
|
||||
>
|
||||
</tr>
|
||||
{/if}
|
||||
<tr class="py-2">
|
||||
<hr class="my-1"/>
|
||||
<!-- <tr class="py-2">
|
||||
<th scope="col"
|
||||
>Flop Rate (Any) <Icon
|
||||
name="info-circle"
|
||||
@@ -280,23 +449,49 @@
|
||||
memBwRateUnitPrefix[subCluster.name],
|
||||
)}{memBwRateUnitBase[subCluster.name]} [Max]
|
||||
</td>
|
||||
</tr>
|
||||
</tr> -->
|
||||
</Table>
|
||||
</CardBody>
|
||||
</Card>
|
||||
</Col>
|
||||
<Col class="px-3 mt-2 mt-lg-0">
|
||||
<div bind:clientWidth={plotWidths[i]}>
|
||||
{#key $statusQuery.data.nodeMetrics}
|
||||
<Roofline
|
||||
{#key $statusQuery?.data?.nodeMetrics}
|
||||
<NewBubbleRoofline
|
||||
useColors={true}
|
||||
allowSizeChange
|
||||
width={plotWidths[i] - 10}
|
||||
height={300}
|
||||
cluster={cluster}
|
||||
subCluster={subCluster}
|
||||
roofData={transformNodesStatsToData($statusQuery?.data?.nodeMetrics.filter(
|
||||
(data) => data.subCluster == subCluster.name,
|
||||
)
|
||||
)}
|
||||
nodesData={transformNodesStatsToInfo($statusQuery?.data?.nodeMetrics.filter(
|
||||
(data) => data.subCluster == subCluster.name,
|
||||
)
|
||||
)}
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
</Col>
|
||||
<Col class="px-3 mt-2 mt-lg-0">
|
||||
<div bind:clientWidth={plotWidths[i]}>
|
||||
{#key $statusQuery?.data?.jobsMetricStats}
|
||||
<NewBubbleRoofline
|
||||
useColors={true}
|
||||
allowSizeChange
|
||||
width={plotWidths[i] - 10}
|
||||
height={300}
|
||||
subCluster={subCluster}
|
||||
data={transformPerNodeDataForRoofline(
|
||||
$statusQuery.data.nodeMetrics.filter(
|
||||
roofData={transformJobsStatsToData($statusQuery?.data?.jobsMetricStats.filter(
|
||||
(data) => data.subCluster == subCluster.name,
|
||||
),
|
||||
)
|
||||
)}
|
||||
jobsData={transformJobsStatsToInfo($statusQuery?.data?.jobsMetricStats.filter(
|
||||
(data) => data.subCluster == subCluster.name,
|
||||
)
|
||||
)}
|
||||
/>
|
||||
{/key}
|
||||
|
||||
Reference in New Issue
Block a user