From 745c0357f3117bfd06c5ab9a313b421b00e7eef7 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Thu, 19 Feb 2026 08:04:45 +0100 Subject: [PATCH 1/7] Handle NULL values on health_metrics column --- internal/repository/node.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/repository/node.go b/internal/repository/node.go index 2e3c6746..4b10aea3 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -274,7 +274,7 @@ type NodeStateWithNode struct { func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode, error) { rows, err := sq.Select( "node_state.id", "node_state.time_stamp", "node_state.node_state", - "node_state.health_state", "node_state.health_metrics", + "node_state.health_state", "COALESCE(node_state.health_metrics, '')", "node_state.cpus_allocated", "node_state.memory_allocated", "node_state.gpus_allocated", "node_state.jobs_running", "node.hostname", "node.cluster", "node.subcluster", From 90b52f997d0749d3c06cceda0cd3bf7916f50088 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Thu, 19 Feb 2026 08:24:39 +0100 Subject: [PATCH 2/7] Cleanup and handle error in AppTagger --- cmd/cc-backend/main.go | 2 -- internal/tagger/detectApp.go | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 3ee05383..fde95fd3 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -279,8 +279,6 @@ func initSubsystems() error { return fmt.Errorf("initializing archive: %w", err) } - // Note: metricstore.Init() is called later in runServer() with proper configuration - // Handle database re-initialization if flagReinitDB { if err := importer.InitDB(); err != nil { diff --git a/internal/tagger/detectApp.go b/internal/tagger/detectApp.go index bd5faed8..21667a27 100644 --- a/internal/tagger/detectApp.go +++ b/internal/tagger/detectApp.go @@ -174,7 +174,9 @@ func (t *AppTagger) Match(job *schema.Job) { for _, re := range a.patterns { if re.MatchString(jobscriptLower) { if !r.HasTag(id, t.tagType, a.tag) { - r.AddTagOrCreateDirect(id, t.tagType, a.tag) + if _, err := r.AddTagOrCreateDirect(id, t.tagType, a.tag); err != nil { + cclog.Errorf("AppTagger: failed to add tag '%s' to job %d: %v", a.tag, id, err) + } } break out } From 62cd21eb8336c25728b6204f60983fbb3dc2c2dc Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 19 Feb 2026 13:00:51 +0100 Subject: [PATCH 3/7] switch to select filters --- .../src/status/dashdetails/HealthDash.svelte | 43 ++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/web/frontend/src/status/dashdetails/HealthDash.svelte b/web/frontend/src/status/dashdetails/HealthDash.svelte index 9bf0d5af..d9046187 100644 --- a/web/frontend/src/status/dashdetails/HealthDash.svelte +++ b/web/frontend/src/status/dashdetails/HealthDash.svelte @@ -32,12 +32,27 @@ /* Const Init */ const client = getContextClient(); + const stateOptions = [ + "all", + "allocated", + "idle", + "down", + "mixed", + "reserved", + "unknown", + ]; + const healthOptions = [ + "all", + "full", + "partial", + "failed", + ]; /* State Init */ let pieWidth = $state(0); let tableHostFilter = $state(""); - let tableStateFilter = $state(""); - let tableHealthFilter = $state(""); + let tableStateFilter = $state(stateOptions[0]); + let tableHealthFilter = $state(healthOptions[0]); let healthTableSorting = $state( { schedulerState: { dir: "down", active: true }, @@ -98,10 +113,10 @@ if (tableHostFilter != "") { pendingTableData = pendingTableData.filter((e) => e.hostname.includes(tableHostFilter)) } - if (tableStateFilter != "") { + if (tableStateFilter != "all") { pendingTableData = pendingTableData.filter((e) => e.schedulerState.includes(tableStateFilter)) } - if (tableHealthFilter != "") { + if (tableHealthFilter != "all") { pendingTableData = pendingTableData.filter((e) => e.healthState.includes(tableHealthFilter)) } return pendingTableData @@ -280,8 +295,8 @@ - sortBy('hostname')}> - Host + sortBy('hostname')}> + Hosts ({filteredTableData.length}) - sortBy('schedulerState')}> + sortBy('schedulerState')}> Scheduler State - sortBy('healthState')}> + sortBy('healthState')}> Health State - + + {#each stateOptions as so} + + {/each} + @@ -330,7 +349,11 @@ - + + {#each healthOptions as ho} + + {/each} + From 7789489d082527712e37ad6127fe4030331d7652 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 19 Feb 2026 14:25:03 +0100 Subject: [PATCH 4/7] fix automatic refresh in metric status tab --- web/frontend/src/status/dashdetails/HealthDash.svelte | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/web/frontend/src/status/dashdetails/HealthDash.svelte b/web/frontend/src/status/dashdetails/HealthDash.svelte index d9046187..11f1ef31 100644 --- a/web/frontend/src/status/dashdetails/HealthDash.svelte +++ b/web/frontend/src/status/dashdetails/HealthDash.svelte @@ -50,6 +50,7 @@ /* State Init */ let pieWidth = $state(0); + let querySorting = $state({ field: "startTime", type: "col", order: "DESC" }) let tableHostFilter = $state(""); let tableStateFilter = $state(stateOptions[0]); let tableHealthFilter = $state(healthOptions[0]); @@ -93,7 +94,7 @@ `, variables: { nodeFilter: { cluster: { eq: cluster }}, - sorting: { field: "startTime", type: "col", order: "DESC" }, + sorting: querySorting, }, requestPolicy: "network-only" })); @@ -163,7 +164,7 @@ { - sorting = { field: "startTime", type: "col", order: "DESC" } + querySorting = { field: "startTime", type: "col", order: "DESC" }; }} /> From 705d70ddc070427964c846174b46378d4414dbb7 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 19 Feb 2026 15:42:20 +0100 Subject: [PATCH 5/7] add row plot cursor syncing --- web/frontend/src/generic/joblist/JobListRow.svelte | 8 +++++++- web/frontend/src/systems/nodelist/NodeListRow.svelte | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/web/frontend/src/generic/joblist/JobListRow.svelte b/web/frontend/src/generic/joblist/JobListRow.svelte index 9db340d4..3963708f 100644 --- a/web/frontend/src/generic/joblist/JobListRow.svelte +++ b/web/frontend/src/generic/joblist/JobListRow.svelte @@ -20,6 +20,7 @@ import { queryStore, gql, getContextClient } from "@urql/svelte"; import { Card, Spinner } from "@sveltestrap/sveltestrap"; import { maxScope, checkMetricAvailability } from "../utils.js"; + import uPlot from "uplot"; import JobInfo from "./JobInfo.svelte"; import MetricPlot from "../plots/MetricPlot.svelte"; import JobFootprint from "../helper/JobFootprint.svelte"; @@ -74,13 +75,17 @@ } `; + /* Var Init*/ + // svelte-ignore state_referenced_locally + let plotSync = uPlot.sync(`jobMetricStack-${job.cluster}-${job.id}`); + /* State Init */ let zoomStates = $state({}); let thresholdStates = $state({}); /* Derived */ const resampleDefault = $derived(resampleConfig ? Math.max(...resampleConfig.resolutions) : 0); - const jobId = $derived(job?.id); + const jobId = $derived(job.id); const scopes = $derived.by(() => { if (job.numNodes == 1) { if (job.numAcc >= 1) return ["core", "accelerator"]; @@ -233,6 +238,7 @@ numaccs={job.numAcc} zoomState={zoomStates[metric.data.name] || null} thresholdState={thresholdStates[metric.data.name] || null} + {plotSync} /> {:else} diff --git a/web/frontend/src/systems/nodelist/NodeListRow.svelte b/web/frontend/src/systems/nodelist/NodeListRow.svelte index 4689ed21..558d0642 100644 --- a/web/frontend/src/systems/nodelist/NodeListRow.svelte +++ b/web/frontend/src/systems/nodelist/NodeListRow.svelte @@ -211,6 +211,7 @@ timestep={metricData.data.metric.timestep} series={metricData.data.metric.series} height={375} + {plotSync} forNode /> {/if} From f00f9fcee08967340315c6918c4a125f633cceb6 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Thu, 19 Feb 2026 17:42:45 +0100 Subject: [PATCH 6/7] Cleanup debug printf --- internal/api/node.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/api/node.go b/internal/api/node.go index cab33452..5032ed7b 100644 --- a/internal/api/node.go +++ b/internal/api/node.go @@ -113,8 +113,6 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) { } } - fmt.Printf("Result: %#v\n", healthResults) - cclog.Debugf("Timer updateNodeStates, MemStore HealthCheck: %s", time.Since(startMs)) startDB := time.Now() From 064aa0a238cd1f2ad1d153afd9ae27da42075309 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 20 Feb 2026 07:44:00 +0100 Subject: [PATCH 7/7] Add logging for AppTagger --- internal/tagger/detectApp.go | 61 +++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/internal/tagger/detectApp.go b/internal/tagger/detectApp.go index 21667a27..c82c87bc 100644 --- a/internal/tagger/detectApp.go +++ b/internal/tagger/detectApp.go @@ -19,6 +19,14 @@ import ( "github.com/ClusterCockpit/cc-lib/v2/util" ) +func metadataKeys(m map[string]string) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + return keys +} + const ( // defaultConfigPath is the default path for application tagging configuration defaultConfigPath = "./var/tagger/apps" @@ -158,31 +166,54 @@ func (t *AppTagger) Register() error { // Only the first matching application is tagged. func (t *AppTagger) Match(job *schema.Job) { r := repository.GetJobRepository() + + if len(t.apps) == 0 { + cclog.Warn("AppTagger: no app patterns loaded, skipping match") + return + } + metadata, err := r.FetchMetadata(job) if err != nil { - cclog.Infof("Cannot fetch metadata for job: %d on %s", job.JobID, job.Cluster) + cclog.Infof("AppTagger: cannot fetch metadata for job %d on %s: %v", job.JobID, job.Cluster, err) + return + } + + if metadata == nil { + cclog.Infof("AppTagger: metadata is nil for job %d on %s", job.JobID, job.Cluster) return } jobscript, ok := metadata["jobScript"] - if ok { - id := *job.ID - jobscriptLower := strings.ToLower(jobscript) + if !ok { + cclog.Infof("AppTagger: no 'jobScript' key in metadata for job %d on %s (keys: %v)", + job.JobID, job.Cluster, metadataKeys(metadata)) + return + } - out: - for _, a := range t.apps { - for _, re := range a.patterns { - if re.MatchString(jobscriptLower) { - if !r.HasTag(id, t.tagType, a.tag) { - if _, err := r.AddTagOrCreateDirect(id, t.tagType, a.tag); err != nil { - cclog.Errorf("AppTagger: failed to add tag '%s' to job %d: %v", a.tag, id, err) - } + if len(jobscript) == 0 { + cclog.Infof("AppTagger: empty jobScript for job %d on %s", job.JobID, job.Cluster) + return + } + + id := *job.ID + jobscriptLower := strings.ToLower(jobscript) + cclog.Debugf("AppTagger: matching job %d (script length: %d) against %d apps", id, len(jobscriptLower), len(t.apps)) + + for _, a := range t.apps { + for _, re := range a.patterns { + if re.MatchString(jobscriptLower) { + if r.HasTag(id, t.tagType, a.tag) { + cclog.Debugf("AppTagger: job %d already has tag %s:%s, skipping", id, t.tagType, a.tag) + } else { + cclog.Infof("AppTagger: pattern '%s' matched for app '%s' on job %d", re.String(), a.tag, id) + if _, err := r.AddTagOrCreateDirect(id, t.tagType, a.tag); err != nil { + cclog.Errorf("AppTagger: failed to add tag '%s' to job %d: %v", a.tag, id, err) } - break out } + return } } - } else { - cclog.Infof("Cannot extract job script for job: %d on %s", job.JobID, job.Cluster) } + + cclog.Debugf("AppTagger: no pattern matched for job %d on %s", id, job.Cluster) }