From 5616801f3e117ac406fde845431e8ab93b27eba7 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 5 Feb 2026 10:33:20 +0100 Subject: [PATCH 1/9] review user vinformation block, reorder reactive var in jobList --- web/frontend/src/User.root.svelte | 56 +++++++++++++++++-- .../src/generic/joblist/JobListRow.svelte | 2 +- web/frontend/src/generic/units.js | 8 +-- 3 files changed, 55 insertions(+), 11 deletions(-) diff --git a/web/frontend/src/User.root.svelte b/web/frontend/src/User.root.svelte index 669ca8bb..bb83ef91 100644 --- a/web/frontend/src/User.root.svelte +++ b/web/frontend/src/User.root.svelte @@ -19,7 +19,8 @@ Spinner, Input, InputGroup, - InputGroupText + InputGroupText, + Tooltip } from "@sveltestrap/sveltestrap"; import { queryStore, @@ -32,6 +33,10 @@ scramble, scrambleNames, } from "./generic/utils.js"; + import { + formatNumber, + formatDurationTime + } from "./generic/units.js"; import JobList from "./generic/JobList.svelte"; import JobCompare from "./generic/JobCompare.svelte"; import Filters from "./generic/Filters.svelte"; @@ -56,6 +61,7 @@ const durationBinOptions = ["1m","10m","1h","6h","12h"]; const metricBinOptions = [10, 20, 50, 100]; const matchedJobCompareLimit = 500; + const shortDuration = ccconfig.jobList_hideShortRunningJobs; // Always configured /* State Init */ // List & Control Vars @@ -108,6 +114,7 @@ shortJobs totalWalltime totalCoreHours + totalAccHours histDuration { count value @@ -133,6 +140,7 @@ variables: { jobFilters, selectedHistograms, numDurationBins, numMetricBins }, }) ); + const hasAccHours = $derived($stats?.data?.jobsStatistics[0]?.totalAccHours != 0); /* Functions */ function resetJobSelection() { @@ -290,20 +298,54 @@ {/if} Total Jobs - {$stats.data.jobsStatistics[0].totalJobs} + + + {formatNumber($stats.data.jobsStatistics[0].totalJobs)} Jobs + + - Short Jobs - {$stats.data.jobsStatistics[0].shortJobs} + + + Short Jobs + + + + Job duration less than {formatDurationTime(shortDuration)} + + + + + {formatNumber($stats.data.jobsStatistics[0].shortJobs)} Jobs + + Total Walltime - {$stats.data.jobsStatistics[0].totalWalltime} + + + {formatNumber($stats.data.jobsStatistics[0].totalWalltime)} Hours + + Total Core Hours - {$stats.data.jobsStatistics[0].totalCoreHours} + + + {formatNumber($stats.data.jobsStatistics[0].totalCoreHours)} Hours + + + {#if hasAccHours} + + Total Accelerator Hours + + + {formatNumber($stats.data.jobsStatistics[0].totalAccHours)} Hours + + + + {/if} @@ -316,6 +358,7 @@ xunit="Runtime" ylabel="Number of Jobs" yunit="Jobs" + height={hasAccHours ? 290 : 250} usesBins xtime /> @@ -330,6 +373,7 @@ xunit="Nodes" ylabel="Number of Jobs" yunit="Jobs" + height={hasAccHours ? 290 : 250} /> {/key} diff --git a/web/frontend/src/generic/joblist/JobListRow.svelte b/web/frontend/src/generic/joblist/JobListRow.svelte index 9502a2f8..93fa496b 100644 --- a/web/frontend/src/generic/joblist/JobListRow.svelte +++ b/web/frontend/src/generic/joblist/JobListRow.svelte @@ -79,7 +79,6 @@ /* Derived */ const jobId = $derived(job?.id); - const refinedData = $derived($metricsQuery?.data?.jobMetrics ? sortAndSelectScope($metricsQuery.data.jobMetrics) : []); const scopes = $derived.by(() => { if (job.numNodes == 1) { if (job.numAcc >= 1) return ["core", "accelerator"]; @@ -95,6 +94,7 @@ variables: { id: jobId, metrics, scopes, selectedResolution }, }) ); + const refinedData = $derived($metricsQuery?.data?.jobMetrics ? sortAndSelectScope($metricsQuery.data.jobMetrics) : []); /* Effects */ $effect(() => { diff --git a/web/frontend/src/generic/units.js b/web/frontend/src/generic/units.js index 540a1a32..951ecbe8 100644 --- a/web/frontend/src/generic/units.js +++ b/web/frontend/src/generic/units.js @@ -32,10 +32,6 @@ export function scaleNumber(x, p = '') { } } -export function roundTwoDigits(x) { - return Math.round(x * 100) / 100 -} - export function scaleNumbers(x, y, p = '') { const oldPower = power[prefix.indexOf(p)] const rawXValue = x * oldPower @@ -77,6 +73,10 @@ export function formatUnixTime(t, withDate = false) { } } +export function roundTwoDigits(x) { + return Math.round(x * 100) / 100 +} + // const equalsCheck = (a, b) => { // return JSON.stringify(a) === JSON.stringify(b); // } From 84d7a7aa7d5aec71a713cc09cf1a129fc5fabcb1 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 5 Feb 2026 13:06:46 +0100 Subject: [PATCH 2/9] add running default filter to list name column links if useful --- web/frontend/src/List.root.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/frontend/src/List.root.svelte b/web/frontend/src/List.root.svelte index 6bc1cd8f..239bf5f1 100644 --- a/web/frontend/src/List.root.svelte +++ b/web/frontend/src/List.root.svelte @@ -319,11 +319,11 @@ {#if type == "USER"} - {scrambleNames ? scramble(row.id) : row.id} {:else if type == "PROJECT"} - {scrambleNames ? scramble(row.id) : row.id} {:else} From 4d13c37008521755547d71ac70cef90c49d4f285 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 5 Feb 2026 15:18:01 +0100 Subject: [PATCH 3/9] remove non-required return in updatenodehandler --- internal/repository/node.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/repository/node.go b/internal/repository/node.go index 3fa041f6..2a22e8c4 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -198,7 +198,6 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt } cclog.Debugf("Added node '%s' to database", hostname) - return nil } else { cclog.Warnf("Error while querying node '%v' from database", id) return err From e3148b16eb989af8d57192c51463622de71e50e1 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 5 Feb 2026 15:24:11 +0100 Subject: [PATCH 4/9] add timers to updateNodeStates --- internal/api/node.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/internal/api/node.go b/internal/api/node.go index 37a8576c..930deb50 100644 --- a/internal/api/node.go +++ b/internal/api/node.go @@ -15,6 +15,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/metricstore" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/ClusterCockpit/cc-lib/v2/schema" ) @@ -81,6 +82,8 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) { m := make(map[string][]string) healthStates := make(map[string]schema.MonitoringState) + startMs := time.Now() + for _, node := range req.Nodes { if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil { m[sc] = append(m[sc], node.Hostname) @@ -97,6 +100,9 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) { } } + cclog.Infof("Timer updateNodeStates, MemStore HealthCheck: %s", time.Since(startMs)) + startDb := time.Now() + for _, node := range req.Nodes { state := determineState(node.States) healthState := schema.MonitoringStateFailed @@ -115,4 +121,6 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) { repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState) } + + cclog.Infof("Timer updateNodeStates, SQLite Inserts: %s", time.Since(startDb)) } From 276559d1205ba162e18759838b47ad6c57389abb Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 5 Feb 2026 15:25:49 +0100 Subject: [PATCH 5/9] revert endpoint change --- internal/api/rest.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/api/rest.go b/internal/api/rest.go index 3f6d9609..00ed1f55 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -81,7 +81,7 @@ func (api *RestAPI) MountAPIRoutes(r *mux.Router) { // Cluster List r.HandleFunc("/clusters/", api.getClusters).Methods(http.MethodGet) // Slurm node state - r.HandleFunc("/nodestates/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut) + r.HandleFunc("/nodestate/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut) // Job Handler if config.Keys.APISubjects == nil { cclog.Info("Enabling REST start/stop job API") From 0984c1d4316d94262aeb5882ec396c17b9039d3d Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 6 Feb 2026 07:21:04 +0100 Subject: [PATCH 6/9] Add debug log with degrade and missing metrics for healthcheck --- pkg/metricstore/healthcheck.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/metricstore/healthcheck.go b/pkg/metricstore/healthcheck.go index f390749d..cfb4688d 100644 --- a/pkg/metricstore/healthcheck.go +++ b/pkg/metricstore/healthcheck.go @@ -11,6 +11,7 @@ import ( "slices" "time" + cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger" "github.com/ClusterCockpit/cc-lib/v2/schema" ) @@ -246,6 +247,14 @@ func (m *MemoryStore) HealthCheck(cluster string, degradedCount = len(degradedList) healthyCount = len(expectedMetrics) - (missingCount + degradedCount) + // Debug log missing and degraded metrics + if missingCount > 0 { + cclog.ComponentDebug("metricstore", "HealthCheck: node", hostname, "missing metrics:", missingList) + } + if degradedCount > 0 { + cclog.ComponentDebug("metricstore", "HealthCheck: node", hostname, "degraded metrics:", degradedList) + } + // Determine overall health status if missingCount > 0 || degradedCount > 0 { if healthyCount == 0 { From fcb37b03672698116fd02c26eb9e665cd6c3dd67 Mon Sep 17 00:00:00 2001 From: Aditya Ujeniya Date: Fri, 6 Feb 2026 08:45:36 +0100 Subject: [PATCH 7/9] Update to count healthy metrics --- pkg/metricstore/healthcheck.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/metricstore/healthcheck.go b/pkg/metricstore/healthcheck.go index cfb4688d..801a3ae1 100644 --- a/pkg/metricstore/healthcheck.go +++ b/pkg/metricstore/healthcheck.go @@ -243,9 +243,8 @@ func (m *MemoryStore) HealthCheck(cluster string, continue } - missingCount = len(missingList) - degradedCount = len(degradedList) - healthyCount = len(expectedMetrics) - (missingCount + degradedCount) + uniqueList := mergeList(missingList, degradedList) + healthyCount = len(expectedMetrics) - len(uniqueList) // Debug log missing and degraded metrics if missingCount > 0 { From b160284a1b19a4eede37dfcca7d4e9c9e7883865 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 6 Feb 2026 09:08:46 +0100 Subject: [PATCH 8/9] Update vasp example tagger rule --- configs/tagger/apps/vasp.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/configs/tagger/apps/vasp.txt b/configs/tagger/apps/vasp.txt index 9f9b9d5d..79014e1d 100644 --- a/configs/tagger/apps/vasp.txt +++ b/configs/tagger/apps/vasp.txt @@ -1,2 +1,3 @@ -vasp -VASP +vasp_gam +vasp_ncl +vasp_std From f671d8df90c652c6b24d5451ac87e523f99d8cc3 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 6 Feb 2026 09:25:09 +0100 Subject: [PATCH 9/9] Add counts in healthcheck for logging output --- pkg/metricstore/healthcheck.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/metricstore/healthcheck.go b/pkg/metricstore/healthcheck.go index 801a3ae1..59c84f79 100644 --- a/pkg/metricstore/healthcheck.go +++ b/pkg/metricstore/healthcheck.go @@ -243,6 +243,8 @@ func (m *MemoryStore) HealthCheck(cluster string, continue } + missingCount = len(missingList) + degradedCount = len(degradedList) uniqueList := mergeList(missingList, degradedList) healthyCount = len(expectedMetrics) - len(uniqueList)