Extend bubbleRoofline for nodeData, add column to node table, rename nodeStats query

This commit is contained in:
Christoph Kluge
2025-07-18 18:12:07 +02:00
parent 5cdb80b4d6
commit 697acd1d88
8 changed files with 518 additions and 128 deletions

View File

@@ -31,8 +31,10 @@
let {
roofData = null,
jobsData = null,
allowSizeChange = false,
nodesData = null,
cluster = null,
subCluster = null,
allowSizeChange = false,
width = 600,
height = 380,
} = $props();
@@ -264,16 +266,43 @@
let filtTop = u.posToVal(-maxSize / 2, scaleY.key);
for (let i = 0; i < d[0].length; i++) {
// Color based on Duration, check index for transparency highlighting
u.ctx.strokeStyle = getRGB(u.data[2][i]);
u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill);
// Jobs: Color based on Duration
if (jobsData) {
u.ctx.strokeStyle = getRGB(u.data[2][i]);
u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill);
// Nodes: Color based on Idle vs. Allocated
} else if (nodesData) {
// console.log('In Plot Handler NodesData', nodesData)
if (nodesData[i]?.nodeState == "idle") {
u.ctx.strokeStyle = "rgb(0, 0, 255)";
u.ctx.fillStyle = "rgba(0, 0, 255, 0.5)";
} else if (nodesData[i]?.nodeState == "allocated") {
u.ctx.strokeStyle = "rgb(0, 255, 0)";
u.ctx.fillStyle = "rgba(0, 255, 0, 0.5)";
} else if (nodesData[i]?.nodeState == "notindb") {
u.ctx.strokeStyle = "rgb(0, 0, 0)";
u.ctx.fillStyle = "rgba(0, 0, 0, 0.5)";
} else { // Fallback: All other DEFINED states
u.ctx.strokeStyle = "rgb(255, 0, 0)";
u.ctx.fillStyle = "rgba(255, 0, 0, 0.5)";
}
}
// Get Values
let xVal = d[0][i];
let yVal = d[1][i];
// Calc Size; Alt.: size = sizes[i] * pxRatio
const size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes); // In NodeMode: Scale with Number of Jobs?
// Calc Size; Alt.: size = sizes[i] * pxRatio
let size = 1;
// Jobs: Size based on Resourcecount
if (jobsData) {
size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes)
// Nodes: Size based on Jobcount
} else if (nodesData) {
size = sizeBase + nodesData[i]?.numJobs
};
if (xVal >= filtLft && xVal <= filtRgt && yVal >= filtBtm && yVal <= filtTop) {
let cx = valToPosX(xVal, scaleX, xDim, xOff);
let cy = valToPosY(yVal, scaleY, yDim, yOff);
@@ -338,7 +367,7 @@
};
// Tooltip Plugin
function tooltipPlugin({onclick, getJobData, shiftX = 10, shiftY = 10}) {
function tooltipPlugin({onclick, getLegendData, shiftX = 10, shiftY = 10}) {
let tooltipLeftOffset = 0;
let tooltipTopOffset = 0;
@@ -388,11 +417,34 @@
tooltip.style.top = (tooltipTopOffset + top + shiftX) + "px";
tooltip.style.left = (tooltipLeftOffset + lft + shiftY) + "px";
tooltip.style.borderColor = getRGB(u.data[2][i]);
tooltip.textContent = (
// Tooltip Content as String
`Job ID: ${getJobData(u, i).jobId}\nNodes: ${getJobData(u, i).numNodes}${getJobData(u, i)?.numAcc?`\nAccelerators: ${getJobData(u, i).numAcc}`:''}`
);
// Jobs: Color based on Duration
if (jobsData) {
tooltip.style.borderColor = getRGB(u.data[2][i]);
// Nodes: Color based on Idle vs. Allocated
} else if (nodesData) {
if (nodesData[i]?.nodeState == "idle") {
tooltip.style.borderColor = "rgb(0, 0, 255)";
} else if (nodesData[i]?.nodeState == "allocated") {
tooltip.style.borderColor = "rgb(0, 255, 0)";
} else if (nodesData[i]?.nodeState == "notindb") { // Missing from DB table
tooltip.style.borderColor = "rgb(0, 0, 0)";
} else { // Fallback: All other DEFINED states
tooltip.style.borderColor = "rgb(255, 0, 0)";
}
}
if (jobsData) {
tooltip.textContent = (
// Tooltip Content as String for Job
`Job ID: ${getLegendData(u, i).jobId}\nNodes: ${getLegendData(u, i).numNodes}${getLegendData(u, i)?.numAcc?`\nAccelerators: ${getLegendData(u, i).numAcc}`:''}`
);
} else if (nodesData) {
tooltip.textContent = (
// Tooltip Content as String for Node
`Host: ${getLegendData(u, i).nodeName}\nState: ${getLegendData(u, i).nodeState}\nJobs: ${getLegendData(u, i).numJobs}`
);
}
}
return {
@@ -444,14 +496,18 @@
timeoutId = setTimeout(() => {
timeoutId = null;
if (uplot) uplot.destroy();
render(roofData, jobsData);
render(roofData, jobsData, nodesData);
}, 200);
}
function render(roofdata, jobsData) {
if (roofdata) {
function render(roofData, jobsData, nodesData) {
let plotTitle = "CPU Roofline Diagram";
if (jobsData) plotTitle = "Job Average Roofline Diagram";
if (nodesData) plotTitle = "Node Average Roofline Diagram";
if (roofData) {
const opts = {
title: "Job Average Roofline Diagram",
title: plotTitle,
mode: 2,
width: width,
height: height,
@@ -669,35 +725,87 @@
u.ctx.lineWidth = 0.15;
}
// The Color Scale For Time Information
const posX = u.valToPos(0.1, "x", true)
const posXLimit = u.valToPos(100, "x", true)
const posY = u.valToPos(14000.0, "y", true)
u.ctx.fillStyle = 'black'
u.ctx.fillText('Short', posX, posY)
const start = posX + 10
for (let x = start; x < posXLimit; x += 10) {
let c = (x - start) / (posXLimit - start)
u.ctx.fillStyle = getRGB(c)
u.ctx.beginPath()
u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
// Jobs: The Color Scale For Time Information
if (jobsData) {
const posX = u.valToPos(0.1, "x", true)
const posXLimit = u.valToPos(100, "x", true)
const posY = u.valToPos(14000.0, "y", true)
u.ctx.fillStyle = 'black'
u.ctx.fillText('Short', posX, posY)
const start = posX + 10
for (let x = start; x < posXLimit; x += 10) {
let c = (x - start) / (posXLimit - start)
u.ctx.fillStyle = getRGB(c)
u.ctx.beginPath()
u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
}
u.ctx.fillStyle = 'black'
u.ctx.fillText('Long', posXLimit + 23, posY)
}
// Nodes: The Colors Of NodeStates (Just 3)
if (nodesData) {
const posY = u.valToPos(14000.0, "y", true)
const posAllocDot = u.valToPos(0.1, "x", true)
const posAllocText = posAllocDot + 60
u.ctx.fillStyle = "rgb(0, 255, 0)"
u.ctx.beginPath()
u.ctx.arc(posAllocDot, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
u.ctx.fillStyle = 'black'
u.ctx.fillText('Allocated', posAllocText, posY)
const posIdleDot = posAllocDot + 150
const posIdleText = posAllocText + 120
u.ctx.fillStyle = "rgb(0, 0, 255)"
u.ctx.beginPath()
u.ctx.arc(posIdleDot, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
u.ctx.fillStyle = 'black'
u.ctx.fillText('Idle', posIdleText, posY)
const posOtherDot = posIdleDot + 150
const posOtherText = posIdleText + 160
u.ctx.fillStyle = "rgb(255, 0, 0)"
u.ctx.beginPath()
u.ctx.arc(posOtherDot, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
u.ctx.fillStyle = 'black'
u.ctx.fillText('Other', posOtherText, posY)
const posMissingDot = posOtherDot + 150
const posMissingText = posOtherText + 190
u.ctx.fillStyle = 'black'
u.ctx.beginPath()
u.ctx.arc(posMissingDot, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
u.ctx.fillText('Missing in DB', posMissingText, posY)
}
u.ctx.fillStyle = 'black'
u.ctx.fillText('Long', posXLimit + 23, posY)
},
],
},
plugins: [
tooltipPlugin({
onclick(u, dataIdx) {
window.open(`/monitoring/job/${jobsData[dataIdx].id}`);
if (jobsData) {
window.open(`/monitoring/job/${jobsData[dataIdx].id}`)
} else if (nodesData) {
window.open(`/monitoring/node/${cluster}/${nodesData[dataIdx].nodeName}`)
}
},
getJobData: (u, dataIdx) => { return jobsData[dataIdx] }
getLegendData: (u, dataIdx) => {
if (jobsData) {
return jobsData[dataIdx]
} else if (nodesData) {
return nodesData[dataIdx]
}
}
}),
],
};
uplot = new uPlot(opts, roofdata, plotWrapper);
uplot = new uPlot(opts, roofData, plotWrapper);
} else {
// console.log("No data for roofline!");
}
@@ -705,7 +813,7 @@
/* On Mount */
onMount(() => {
render(roofData, jobsData);
render(roofData, jobsData, nodesData);
});
/* On Destroy */

View File

@@ -31,9 +31,11 @@
const client = getContextClient();
/* State Init */
// let from = $state(new Date(Date.now() - 5 * 60 * 1000));
// let to = $state(new Date(Date.now()));
let from = $state(new Date(Date.now() - 5 * 60 * 1000));
let to = $state(new Date(Date.now()));
let plotWidths = $state([]);
let nodesCounts = $state({});
let jobsJounts = $state({});
/* Derived */
// Note: nodeMetrics are requested on configured $timestep resolution
@@ -64,6 +66,123 @@
},
}));
// Optimal new query, does not exist
// const nodeRoofQuery = $derived(queryStore({
// client: client,
// query: gql`
// query ($filter: [JobFilter!]!, $metrics: [String!]!) {
// nodeRoofline(filter: $filter, metrics: $metrics) {
// nodeName
// nodeState
// numJobs
// stats {
// name
// data {
// avg
// }
// }
// }
// }
// `,
// variables: {
// filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
// metrics: ["flops_any", "mem_bw"], // Fixed names for job roofline
// },
// }));
// Load Required Roofline Data Averages for all nodes of cluster: use for node avg data and name, use secondary (new?) querie(s) for slurmstate and numjobs
const nodesData = $derived(queryStore({
client: client,
query: gql`
query ($cluster: String!, $metrics: [String!], $from: Time!, $to: Time!) {
nodeMetrics(
cluster: $cluster
metrics: $metrics
from: $from
to: $to
) {
host
subCluster
metrics {
name
metric {
series {
statistics {
avg
}
}
}
}
}
}
`,
variables: {
cluster: cluster,
metrics: ["flops_any", "mem_bw"],
from: from,
to: to,
},
}));
// Load for jobcount per node only -- might me required for total running jobs anyways in parent component!
// Also, think about extra query with only TotalJobCount and Items [Resources, ...some meta infos], not including metric data
const paging = { itemsPerPage: 1500, page: 1 };
const sorting = { field: "startTime", type: "col", order: "DESC" };
const filter = [
{ cluster: { eq: cluster } },
{ state: ["running"] },
];
const nodeJobsQuery = gql`
query (
$filter: [JobFilter!]!
$sorting: OrderByInput!
$paging: PageRequest!
) {
jobs(filter: $filter, order: $sorting, page: $paging) {
items {
jobId
resources {
hostname
}
}
count
}
}
`;
const nodesJobs = $derived(queryStore({
client: client,
query: nodeJobsQuery,
variables: { paging, sorting, filter },
})
);
// Last required query: Node State
const nodesState = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [NodeFilter!]
$sorting: OrderByInput
) {
nodes(filter: $filter, order: $sorting) {
count
items {
hostname
cluster
subCluster
nodeState
}
}
}
`,
variables: {
filter: { cluster: { eq: cluster }},
sorting: sorting // Unused in Backend: Use Placeholder
// Subcluster filter?
},
}));
/* Function */
function transformJobsStatsToData(subclusterData) {
/* c will contain values from 0 to 1 representing the duration */
@@ -90,7 +209,7 @@
else c.push(d)
}
} else {
console.warn("transformData: metrics for 'mem_bw' and/or 'flops_any' missing!")
console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0 && c.length > 0) {
@@ -99,15 +218,69 @@
return data
}
function transformNodesStatsToData(subclusterData) {
let data = null
const x = [], y = []
if (subclusterData) {
for (let i = 0; i < subclusterData.length; i++) {
const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any")
const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw")
const f = flopsData.metric.series[0].statistics.avg
const m = memBwData.metric.series[0].statistics.avg
let intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity)) {
// continue // Old: Introduces mismatch between Data and Info Arrays
intensity = 0.0 // New: Set to Float Zero: Will not show in Log-Plot (Always below render limit)
}
x.push(intensity)
y.push(f)
}
} else {
// console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0) {
data = [null, [x, y]] // for dataformat see roofline.svelte
}
return data
}
function transformJobsStatsToInfo(subclusterData) {
if (subclusterData) {
return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0} })
} else {
console.warn("transformData: jobInfo missing!")
console.warn("transformJobsStatsToInfo: jobInfo missing!")
return []
}
}
function transformNodesStatsToInfo(subClusterData) {
let result = [];
if (subClusterData && $nodesState?.data) {
// Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API!
for (let j = 0; j < subClusterData.length; j++) {
// nodesCounts[subClusterData[i].subCluster] = $nodesState.data.nodes.count; // Probably better as own derived!
const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown"
const nodeMatch = $nodesState.data.nodes.items.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster);
const nodeState = nodeMatch?.nodeState ? nodeMatch.nodeState : "notindb"
let numJobs = 0
if ($nodesJobs?.data) {
const nodeJobs = $nodesJobs.data.jobs.items.filter((job) => job.resources.find((res) => res.hostname == nodeName))
numJobs = nodeJobs?.length ? nodeJobs.length : 0
}
result.push({nodeName: nodeName, nodeState: nodeState, numJobs: numJobs})
};
};
return result
}
</script>
<!-- Gauges & Roofline per Subcluster-->
@@ -115,19 +288,23 @@
{#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
<Row cols={{ lg: 2, md: 2 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3 mt-2 mt-lg-0">
<b>Classic</b>
<b>Bubble Node</b>
<div bind:clientWidth={plotWidths[i]}>
{#key $jobRoofQuery.data.jobsMetricStats}
{#key $nodesData?.data?.nodeMetrics || $nodesJobs?.data?.jobs}
<b>{subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter(
(data) => data.subCluster == subCluster.name,
).length} Jobs</b>
<Roofline
<NewBubbleRoofline
allowSizeChange
renderTime
width={plotWidths[i] - 10}
height={300}
cluster={cluster}
subCluster={subCluster}
data={transformJobsStatsToData($jobRoofQuery?.data?.jobsMetricStats.filter(
roofData={transformNodesStatsToData($nodesData?.data?.nodeMetrics.filter(
(data) => data.subCluster == subCluster.name,
)
)}
nodesData={transformNodesStatsToInfo($nodesData?.data?.nodeMetrics.filter(
(data) => data.subCluster == subCluster.name,
)
)}
@@ -136,7 +313,7 @@
</div>
</Col>
<Col class="px-3 mt-2 mt-lg-0">
<b>Bubble</b>
<b>Bubble Jobs</b>
<div bind:clientWidth={plotWidths[i]}>
{#key $jobRoofQuery.data.jobsMetricStats}
<b>{subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter(