various small dashboard fixes

- piechart color, idle count cap, metricHistoMaximum increased
This commit is contained in:
Christoph Kluge
2026-01-29 17:46:01 +01:00
parent 318dbd65e0
commit e8c81ba7d4
6 changed files with 45 additions and 26 deletions

View File

@@ -38,7 +38,7 @@
// - All queries use prepared statements via stmtCache // - All queries use prepared statements via stmtCache
// - Complex aggregations use SQL for efficiency // - Complex aggregations use SQL for efficiency
// - Histogram pre-initialization ensures consistent bin ranges // - Histogram pre-initialization ensures consistent bin ranges
// - Metric histogram queries limited to 500 jobs for running job analysis // - Metric histogram queries limited to 5000 jobs for running job analysis
package repository package repository
@@ -686,7 +686,7 @@ func (r *JobRepository) AddHistograms(
// - Pre-initialized with zeros for consistent visualization // - Pre-initialized with zeros for consistent visualization
// //
// Limitations: // Limitations:
// - Running jobs: Limited to 500 jobs for performance // - Running jobs: Limited to 5000 jobs for performance
// - Requires valid cluster configuration with metric peak values // - Requires valid cluster configuration with metric peak values
// - Uses footprint statistic (avg/max/min) configured per metric // - Uses footprint statistic (avg/max/min) configured per metric
func (r *JobRepository) AddMetricHistograms( func (r *JobRepository) AddMetricHistograms(
@@ -995,12 +995,12 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
// Returns slice of MetricHistoPoints, one per metric. // Returns slice of MetricHistoPoints, one per metric.
// //
// Limitations: // Limitations:
// - Maximum 500 jobs (returns nil if more jobs match) // - Maximum 5000 jobs (returns nil if more jobs match)
// - Requires metric backend availability // - Requires metric backend availability
// - Bins based on metric peak values from cluster configuration // - Bins based on metric peak values from cluster configuration
// //
// Algorithm: // Algorithm:
// 1. Query first 501 jobs to check count limit // 1. Query first 5001 jobs to check count limit
// 2. Load metric averages for all jobs via metricdispatch // 2. Load metric averages for all jobs via metricdispatch
// 3. For each metric, create bins based on peak value // 3. For each metric, create bins based on peak value
// 4. Iterate averages and count jobs per bin // 4. Iterate averages and count jobs per bin
@@ -1011,13 +1011,13 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
bins *int, bins *int,
) []*model.MetricHistoPoints { ) []*model.MetricHistoPoints {
// Get Jobs // Get Jobs
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil) jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 5000 + 1}, nil)
if err != nil { if err != nil {
cclog.Errorf("Error while querying jobs for footprint: %s", err) cclog.Errorf("Error while querying jobs for footprint: %s", err)
return nil return nil
} }
if len(jobs) > 500 { if len(jobs) > 5000 {
cclog.Errorf("too many jobs matched (max: %d)", 500) cclog.Errorf("too many jobs matched (max: %d)", 5000)
return nil return nil
} }

View File

@@ -242,10 +242,14 @@
} }
} }
// Get Idle Infos after Sums // Get Simple Idle Infos after Sums by Diff
if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes']; if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes'];
if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores']; if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores'];
if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs']; if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs'];
// Cap at 0 (Negative hints towards Config <> Reality Mismatch!)
if (rawInfos['idleNodes'] < 0) rawInfos['idleNodes'] = 0;
if (rawInfos['idleCores'] < 0) rawInfos['idleCores'] = 0;
if (rawInfos['idleAccs'] < 0) rawInfos['idleAccs'] = 0;
// Keymetrics (Data on Cluster-Scope) // Keymetrics (Data on Cluster-Scope)
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>

View File

@@ -25,7 +25,7 @@
metricData, metricData,
timestep, timestep,
numNodes, numNodes,
cluster, cluster = "",
forNode = true, forNode = true,
enableFlip = false, enableFlip = false,
publicMode = false, publicMode = false,
@@ -316,12 +316,14 @@
<div bind:this={plotWrapper} bind:clientWidth={width} <div bind:this={plotWrapper} bind:clientWidth={width}
class={forNode ? 'py-2 rounded' : 'rounded'} class={forNode ? 'py-2 rounded' : 'rounded'}
></div> ></div>
{:else if cluster}
<Card body color="warning" class="mx-4"
>Cannot render plot: No series data returned for <code>{cluster}</code>.</Card
>
{:else} {:else}
<Card body color="warning" class="mx-4" <Card color="warning" class="mx-2 mt-2">
>Cannot render plot: No series data returned.</Card <CardHeader class="mb-0">
> <b>Empty Metrics</b>
</CardHeader>
<CardBody>
<p>Cannot render plot for cluster <b>{cluster}</b>.</p>
<p class="mb-1">Metrics found but returned without timeseries data.</p>
</CardBody>
</Card>
{/if} {/if}

View File

@@ -67,6 +67,11 @@
reserved: "rgba(255, 0, 255, 0.75)", reserved: "rgba(255, 0, 255, 0.75)",
mixed: "rgba(255, 215, 0, 0.75)", mixed: "rgba(255, 215, 0, 0.75)",
unknown: "rgba(0, 0, 0, 0.75)" unknown: "rgba(0, 0, 0, 0.75)"
},
healthStates: {
full: "rgba(0, 128, 0, 0.75)",
failed: "rgba(255, 0, 0, 0.75)",
partial: "rgba(255, 215, 0, 0.75)",
} }
} }
</script> </script>

View File

@@ -271,10 +271,14 @@
} }
} }
// Get Idle Infos after Sums // Get Simple Idle Infos after Sums by Diff
if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes']; if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes'];
if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores']; if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores'];
if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs']; if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs'];
// Cap at 0 (Negative hints towards Config <> Reality Mismatch!)
if (rawInfos['idleNodes'] < 0) rawInfos['idleNodes'] = 0;
if (rawInfos['idleCores'] < 0) rawInfos['idleCores'] = 0;
if (rawInfos['idleAccs'] < 0) rawInfos['idleAccs'] = 0;
// Keymetrics (Data on Cluster-Scope) // Keymetrics (Data on Cluster-Scope)
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>

View File

@@ -418,7 +418,7 @@
{:else if $statesTimed.error} {:else if $statesTimed.error}
<Row cols={1} class="text-center mt-3"> <Row cols={1} class="text-center mt-3">
<Col> <Col>
<Card body color="danger">{$statesTimed.error.message}</Card> <Card body color="danger">States Timed: {$statesTimed.error.message}</Card>
</Col> </Col>
</Row> </Row>
{:else if $statesTimed.data} {:else if $statesTimed.data}
@@ -472,7 +472,7 @@
{:else if $statusQuery.error} {:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3"> <Row cols={1} class="text-center mt-3">
<Col> <Col>
<Card body color="danger">{$statesTimed.error.message}</Card> <Card body color="danger">Status Query (States): {$statesTimed.error.message}</Card>
</Col> </Col>
</Row> </Row>
{:else if $statusQuery?.data?.nodeStates} {:else if $statusQuery?.data?.nodeStates}
@@ -484,7 +484,6 @@
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States
</h4> </h4>
<Pie <Pie
{useAltColors}
canvasId="hpcpie-slurm" canvasId="hpcpie-slurm"
size={pieWidth * 0.55} size={pieWidth * 0.55}
sliceLabel="Nodes" sliceLabel="Nodes"
@@ -494,6 +493,9 @@
entities={refinedStateData.map( entities={refinedStateData.map(
(sd) => sd.state, (sd) => sd.state,
)} )}
fixColors={refinedStateData.map(
(sd) => colors['nodeStates'][sd.state],
)}
/> />
{/key} {/key}
</div> </div>
@@ -508,7 +510,7 @@
</tr> </tr>
{#each refinedStateData as sd, i} {#each refinedStateData as sd, i}
<tr> <tr>
<td><Icon name="circle-fill" style="color: {legendColors(i)};"/></td> <td><Icon name="circle-fill" style="color: {colors['nodeStates'][sd.state]};"/></td>
<td>{sd.state}</td> <td>{sd.state}</td>
<td>{sd.count}</td> <td>{sd.count}</td>
</tr> </tr>
@@ -524,15 +526,17 @@
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health
</h4> </h4>
<Pie <Pie
{useAltColors}
canvasId="hpcpie-health" canvasId="hpcpie-health"
size={pieWidth * 0.55} size={pieWidth * 0.55}
sliceLabel="Nodes" sliceLabel="Nodes"
quantities={refinedHealthData.map( quantities={refinedHealthData.map(
(sd) => sd.count, (hd) => hd.count,
)} )}
entities={refinedHealthData.map( entities={refinedHealthData.map(
(sd) => sd.state, (hd) => hd.state,
)}
fixColors={refinedHealthData.map(
(hd) => colors['healthStates'][hd.state],
)} )}
/> />
{/key} {/key}
@@ -548,7 +552,7 @@
</tr> </tr>
{#each refinedHealthData as hd, i} {#each refinedHealthData as hd, i}
<tr> <tr>
<td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td> <td><Icon name="circle-fill"style="color: {colors['healthStates'][hd.state]};" /></td>
<td>{hd.state}</td> <td>{hd.state}</td>
<td>{hd.count}</td> <td>{hd.count}</td>
</tr> </tr>
@@ -570,7 +574,7 @@
{:else if $statusQuery.error} {:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3"> <Row cols={1} class="text-center mt-3">
<Col> <Col>
<Card body color="danger">{$statusQuery.error.message}</Card> <Card body color="danger">Status Query (Details): {$statusQuery.error.message}</Card>
</Col> </Col>
</Row> </Row>
{:else if $statusQuery.data} {:else if $statusQuery.data}