Extend bubbleRoofline for nodeData, add column to node table, rename nodeStats query

This commit is contained in:
Christoph Kluge
2025-07-18 18:12:07 +02:00
parent 5cdb80b4d6
commit 697acd1d88
8 changed files with 518 additions and 128 deletions

View File

@@ -31,9 +31,11 @@
const client = getContextClient();
/* State Init */
// let from = $state(new Date(Date.now() - 5 * 60 * 1000));
// let to = $state(new Date(Date.now()));
let from = $state(new Date(Date.now() - 5 * 60 * 1000));
let to = $state(new Date(Date.now()));
let plotWidths = $state([]);
let nodesCounts = $state({});
let jobsJounts = $state({});
/* Derived */
// Note: nodeMetrics are requested on configured $timestep resolution
@@ -64,6 +66,123 @@
},
}));
// Optimal new query, does not exist
// const nodeRoofQuery = $derived(queryStore({
// client: client,
// query: gql`
// query ($filter: [JobFilter!]!, $metrics: [String!]!) {
// nodeRoofline(filter: $filter, metrics: $metrics) {
// nodeName
// nodeState
// numJobs
// stats {
// name
// data {
// avg
// }
// }
// }
// }
// `,
// variables: {
// filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
// metrics: ["flops_any", "mem_bw"], // Fixed names for job roofline
// },
// }));
// Load Required Roofline Data Averages for all nodes of cluster: use for node avg data and name, use secondary (new?) querie(s) for slurmstate and numjobs
const nodesData = $derived(queryStore({
client: client,
query: gql`
query ($cluster: String!, $metrics: [String!], $from: Time!, $to: Time!) {
nodeMetrics(
cluster: $cluster
metrics: $metrics
from: $from
to: $to
) {
host
subCluster
metrics {
name
metric {
series {
statistics {
avg
}
}
}
}
}
}
`,
variables: {
cluster: cluster,
metrics: ["flops_any", "mem_bw"],
from: from,
to: to,
},
}));
// Load for jobcount per node only -- might me required for total running jobs anyways in parent component!
// Also, think about extra query with only TotalJobCount and Items [Resources, ...some meta infos], not including metric data
const paging = { itemsPerPage: 1500, page: 1 };
const sorting = { field: "startTime", type: "col", order: "DESC" };
const filter = [
{ cluster: { eq: cluster } },
{ state: ["running"] },
];
const nodeJobsQuery = gql`
query (
$filter: [JobFilter!]!
$sorting: OrderByInput!
$paging: PageRequest!
) {
jobs(filter: $filter, order: $sorting, page: $paging) {
items {
jobId
resources {
hostname
}
}
count
}
}
`;
const nodesJobs = $derived(queryStore({
client: client,
query: nodeJobsQuery,
variables: { paging, sorting, filter },
})
);
// Last required query: Node State
const nodesState = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [NodeFilter!]
$sorting: OrderByInput
) {
nodes(filter: $filter, order: $sorting) {
count
items {
hostname
cluster
subCluster
nodeState
}
}
}
`,
variables: {
filter: { cluster: { eq: cluster }},
sorting: sorting // Unused in Backend: Use Placeholder
// Subcluster filter?
},
}));
/* Function */
function transformJobsStatsToData(subclusterData) {
/* c will contain values from 0 to 1 representing the duration */
@@ -90,7 +209,7 @@
else c.push(d)
}
} else {
console.warn("transformData: metrics for 'mem_bw' and/or 'flops_any' missing!")
console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0 && c.length > 0) {
@@ -99,15 +218,69 @@
return data
}
function transformNodesStatsToData(subclusterData) {
let data = null
const x = [], y = []
if (subclusterData) {
for (let i = 0; i < subclusterData.length; i++) {
const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any")
const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw")
const f = flopsData.metric.series[0].statistics.avg
const m = memBwData.metric.series[0].statistics.avg
let intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity)) {
// continue // Old: Introduces mismatch between Data and Info Arrays
intensity = 0.0 // New: Set to Float Zero: Will not show in Log-Plot (Always below render limit)
}
x.push(intensity)
y.push(f)
}
} else {
// console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0) {
data = [null, [x, y]] // for dataformat see roofline.svelte
}
return data
}
function transformJobsStatsToInfo(subclusterData) {
if (subclusterData) {
return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0} })
} else {
console.warn("transformData: jobInfo missing!")
console.warn("transformJobsStatsToInfo: jobInfo missing!")
return []
}
}
function transformNodesStatsToInfo(subClusterData) {
let result = [];
if (subClusterData && $nodesState?.data) {
// Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API!
for (let j = 0; j < subClusterData.length; j++) {
// nodesCounts[subClusterData[i].subCluster] = $nodesState.data.nodes.count; // Probably better as own derived!
const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown"
const nodeMatch = $nodesState.data.nodes.items.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster);
const nodeState = nodeMatch?.nodeState ? nodeMatch.nodeState : "notindb"
let numJobs = 0
if ($nodesJobs?.data) {
const nodeJobs = $nodesJobs.data.jobs.items.filter((job) => job.resources.find((res) => res.hostname == nodeName))
numJobs = nodeJobs?.length ? nodeJobs.length : 0
}
result.push({nodeName: nodeName, nodeState: nodeState, numJobs: numJobs})
};
};
return result
}
</script>
<!-- Gauges & Roofline per Subcluster-->
@@ -115,19 +288,23 @@
{#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
<Row cols={{ lg: 2, md: 2 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3 mt-2 mt-lg-0">
<b>Classic</b>
<b>Bubble Node</b>
<div bind:clientWidth={plotWidths[i]}>
{#key $jobRoofQuery.data.jobsMetricStats}
{#key $nodesData?.data?.nodeMetrics || $nodesJobs?.data?.jobs}
<b>{subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter(
(data) => data.subCluster == subCluster.name,
).length} Jobs</b>
<Roofline
<NewBubbleRoofline
allowSizeChange
renderTime
width={plotWidths[i] - 10}
height={300}
cluster={cluster}
subCluster={subCluster}
data={transformJobsStatsToData($jobRoofQuery?.data?.jobsMetricStats.filter(
roofData={transformNodesStatsToData($nodesData?.data?.nodeMetrics.filter(
(data) => data.subCluster == subCluster.name,
)
)}
nodesData={transformNodesStatsToInfo($nodesData?.data?.nodeMetrics.filter(
(data) => data.subCluster == subCluster.name,
)
)}
@@ -136,7 +313,7 @@
</div>
</Col>
<Col class="px-3 mt-2 mt-lg-0">
<b>Bubble</b>
<b>Bubble Jobs</b>
<div bind:clientWidth={plotWidths[i]}>
{#key $jobRoofQuery.data.jobsMetricStats}
<b>{subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter(