From e8c81ba7d493dc20c6be4683d7869595d31a6a57 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 29 Jan 2026 17:46:01 +0100 Subject: [PATCH] various small dashboard fixes - piechart color, idle count cap, metricHistoMaximum increased --- internal/repository/stats.go | 14 ++++++------ web/frontend/src/DashPublic.root.svelte | 6 ++++- .../src/generic/plots/DoubleMetricPlot.svelte | 18 ++++++++------- web/frontend/src/generic/plots/Pie.svelte | 5 +++++ web/frontend/src/status/DashInternal.svelte | 6 ++++- .../src/status/dashdetails/StatusDash.svelte | 22 +++++++++++-------- 6 files changed, 45 insertions(+), 26 deletions(-) diff --git a/internal/repository/stats.go b/internal/repository/stats.go index 851a4ca1..af764d46 100644 --- a/internal/repository/stats.go +++ b/internal/repository/stats.go @@ -38,7 +38,7 @@ // - All queries use prepared statements via stmtCache // - Complex aggregations use SQL for efficiency // - Histogram pre-initialization ensures consistent bin ranges -// - Metric histogram queries limited to 500 jobs for running job analysis +// - Metric histogram queries limited to 5000 jobs for running job analysis package repository @@ -686,7 +686,7 @@ func (r *JobRepository) AddHistograms( // - Pre-initialized with zeros for consistent visualization // // Limitations: -// - Running jobs: Limited to 500 jobs for performance +// - Running jobs: Limited to 5000 jobs for performance // - Requires valid cluster configuration with metric peak values // - Uses footprint statistic (avg/max/min) configured per metric func (r *JobRepository) AddMetricHistograms( @@ -995,12 +995,12 @@ func (r *JobRepository) jobsMetricStatisticsHistogram( // Returns slice of MetricHistoPoints, one per metric. // // Limitations: -// - Maximum 500 jobs (returns nil if more jobs match) +// - Maximum 5000 jobs (returns nil if more jobs match) // - Requires metric backend availability // - Bins based on metric peak values from cluster configuration // // Algorithm: -// 1. Query first 501 jobs to check count limit +// 1. Query first 5001 jobs to check count limit // 2. Load metric averages for all jobs via metricdispatch // 3. For each metric, create bins based on peak value // 4. Iterate averages and count jobs per bin @@ -1011,13 +1011,13 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram( bins *int, ) []*model.MetricHistoPoints { // Get Jobs - jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil) + jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 5000 + 1}, nil) if err != nil { cclog.Errorf("Error while querying jobs for footprint: %s", err) return nil } - if len(jobs) > 500 { - cclog.Errorf("too many jobs matched (max: %d)", 500) + if len(jobs) > 5000 { + cclog.Errorf("too many jobs matched (max: %d)", 5000) return nil } diff --git a/web/frontend/src/DashPublic.root.svelte b/web/frontend/src/DashPublic.root.svelte index 988e2651..f66a6435 100644 --- a/web/frontend/src/DashPublic.root.svelte +++ b/web/frontend/src/DashPublic.root.svelte @@ -242,10 +242,14 @@ } } - // Get Idle Infos after Sums + // Get Simple Idle Infos after Sums by Diff if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes']; if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores']; if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs']; + // Cap at 0 (Negative hints towards Config <> Reality Mismatch!) + if (rawInfos['idleNodes'] < 0) rawInfos['idleNodes'] = 0; + if (rawInfos['idleCores'] < 0) rawInfos['idleCores'] = 0; + if (rawInfos['idleAccs'] < 0) rawInfos['idleAccs'] = 0; // Keymetrics (Data on Cluster-Scope) let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => diff --git a/web/frontend/src/generic/plots/DoubleMetricPlot.svelte b/web/frontend/src/generic/plots/DoubleMetricPlot.svelte index e94e269d..10e01311 100644 --- a/web/frontend/src/generic/plots/DoubleMetricPlot.svelte +++ b/web/frontend/src/generic/plots/DoubleMetricPlot.svelte @@ -25,7 +25,7 @@ metricData, timestep, numNodes, - cluster, + cluster = "", forNode = true, enableFlip = false, publicMode = false, @@ -316,12 +316,14 @@
-{:else if cluster} - Cannot render plot: No series data returned for {cluster}. {:else} - Cannot render plot: No series data returned. + + + Empty Metrics + + +

Cannot render plot for cluster {cluster}.

+

Metrics found but returned without timeseries data.

+
+
{/if} diff --git a/web/frontend/src/generic/plots/Pie.svelte b/web/frontend/src/generic/plots/Pie.svelte index 3cfb1821..331ae904 100644 --- a/web/frontend/src/generic/plots/Pie.svelte +++ b/web/frontend/src/generic/plots/Pie.svelte @@ -67,6 +67,11 @@ reserved: "rgba(255, 0, 255, 0.75)", mixed: "rgba(255, 215, 0, 0.75)", unknown: "rgba(0, 0, 0, 0.75)" + }, + healthStates: { + full: "rgba(0, 128, 0, 0.75)", + failed: "rgba(255, 0, 0, 0.75)", + partial: "rgba(255, 215, 0, 0.75)", } } diff --git a/web/frontend/src/status/DashInternal.svelte b/web/frontend/src/status/DashInternal.svelte index b55e1459..145ac4dc 100644 --- a/web/frontend/src/status/DashInternal.svelte +++ b/web/frontend/src/status/DashInternal.svelte @@ -271,10 +271,14 @@ } } - // Get Idle Infos after Sums + // Get Simple Idle Infos after Sums by Diff if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes']; if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores']; if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs']; + // Cap at 0 (Negative hints towards Config <> Reality Mismatch!) + if (rawInfos['idleNodes'] < 0) rawInfos['idleNodes'] = 0; + if (rawInfos['idleCores'] < 0) rawInfos['idleCores'] = 0; + if (rawInfos['idleAccs'] < 0) rawInfos['idleAccs'] = 0; // Keymetrics (Data on Cluster-Scope) let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => diff --git a/web/frontend/src/status/dashdetails/StatusDash.svelte b/web/frontend/src/status/dashdetails/StatusDash.svelte index 57da0f4d..730ecdcc 100644 --- a/web/frontend/src/status/dashdetails/StatusDash.svelte +++ b/web/frontend/src/status/dashdetails/StatusDash.svelte @@ -418,7 +418,7 @@ {:else if $statesTimed.error} - {$statesTimed.error.message} + States Timed: {$statesTimed.error.message} {:else if $statesTimed.data} @@ -472,7 +472,7 @@ {:else if $statusQuery.error} - {$statesTimed.error.message} + Status Query (States): {$statesTimed.error.message} {:else if $statusQuery?.data?.nodeStates} @@ -484,7 +484,6 @@ Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States sd.state, )} + fixColors={refinedStateData.map( + (sd) => colors['nodeStates'][sd.state], + )} /> {/key} @@ -508,7 +510,7 @@ {#each refinedStateData as sd, i} - + {sd.state} {sd.count} @@ -524,15 +526,17 @@ Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health sd.count, + (hd) => hd.count, )} entities={refinedHealthData.map( - (sd) => sd.state, + (hd) => hd.state, + )} + fixColors={refinedHealthData.map( + (hd) => colors['healthStates'][hd.state], )} /> {/key} @@ -548,7 +552,7 @@ {#each refinedHealthData as hd, i} - + {hd.state} {hd.count} @@ -570,7 +574,7 @@ {:else if $statusQuery.error} - {$statusQuery.error.message} + Status Query (Details): {$statusQuery.error.message} {:else if $statusQuery.data}