From 5567371ccdb302d738b955cdbb2a72f21a7d2bf7 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 23 Jan 2026 11:20:32 +0100 Subject: [PATCH 01/12] move list filter preset to url --- internal/routerConfig/routes.go | 6 +-- web/frontend/src/List.root.svelte | 14 +----- web/frontend/src/generic/Filters.svelte | 48 +++++++++---------- web/frontend/src/header/NavbarLinks.svelte | 8 ++-- .../src/systems/nodelist/NodeInfo.svelte | 4 +- 5 files changed, 34 insertions(+), 46 deletions(-) diff --git a/internal/routerConfig/routes.go b/internal/routerConfig/routes.go index e6a79095..88c38eb1 100644 --- a/internal/routerConfig/routes.go +++ b/internal/routerConfig/routes.go @@ -456,7 +456,7 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil http.Redirect(rw, r, "/monitoring/jobs/?startTime="+fromTime+"-"+untilTime+"&arrayJobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery case "username": if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) { - http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) + http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " "))+"&startTime=last30d", http.StatusFound) } else { web.RenderTemplate(rw, "message.tmpl", &web.Page{Title: "Error", MsgType: "alert-danger", Message: "Missing Access Rights", User: *user, Roles: availableRoles, Build: buildInfo}) } @@ -464,10 +464,10 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil usernames, _ := repo.FindColumnValues(user, strings.Trim(splitSearch[1], " "), "user", "username", "name") if len(usernames) != 0 { joinedNames := strings.Join(usernames, "&user=") - http.Redirect(rw, r, "/monitoring/users/?user="+joinedNames, http.StatusFound) + http.Redirect(rw, r, "/monitoring/users/?user="+joinedNames+"&startTime=last30d", http.StatusFound) } else { if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) { - http.Redirect(rw, r, "/monitoring/users/?user=NoUserNameFound", http.StatusPermanentRedirect) + http.Redirect(rw, r, "/monitoring/users/?user=NoUserNameFound&startTime=last30d", http.StatusPermanentRedirect) } else { web.RenderTemplate(rw, "message.tmpl", &web.Page{Title: "Error", MsgType: "alert-danger", Message: "Missing Access Rights", User: *user, Roles: availableRoles, Build: buildInfo}) } diff --git a/web/frontend/src/List.root.svelte b/web/frontend/src/List.root.svelte index 0163cbce..108c42dd 100644 --- a/web/frontend/src/List.root.svelte +++ b/web/frontend/src/List.root.svelte @@ -95,19 +95,7 @@ } /* On Mount */ - onMount(() => { - // By default, look at the jobs of the last 30 days: - if (filterPresets?.startTime == null) { - if (filterPresets == null) filterPresets = {}; - - filterPresets.startTime = { - range: "last30d", - text: "Last 30 Days", - }; - }; - // Init Filter - filterComponent.updateFilters(); - }); + onMount(() => filterComponent.updateFilters()); diff --git a/web/frontend/src/generic/Filters.svelte b/web/frontend/src/generic/Filters.svelte index 8cb6e88a..adb865f3 100644 --- a/web/frontend/src/generic/Filters.svelte +++ b/web/frontend/src/generic/Filters.svelte @@ -91,38 +91,38 @@ /* State Init */ // svelte-ignore state_referenced_locally let filters = $state({ - dbId: filterPresets.dbId || [], - jobId: filterPresets.jobId || "", - jobIdMatch: filterPresets.jobIdMatch || "eq", - arrayJobId: filterPresets.arrayJobId || null, - jobName: filterPresets.jobName || "", - project: filterPresets.project || "", - projectMatch: filterPresets.projectMatch || "contains", - user: filterPresets.user || "", - userMatch: filterPresets.userMatch || "contains", - cluster: filterPresets.cluster || null, - partition: filterPresets.partition || null, + dbId: filterPresets?.dbId || [], + jobId: filterPresets?.jobId || "", + jobIdMatch: filterPresets?.jobIdMatch || "eq", + arrayJobId: filterPresets?.arrayJobId || null, + jobName: filterPresets?.jobName || "", + project: filterPresets?.project || "", + projectMatch: filterPresets?.projectMatch || "contains", + user: filterPresets?.user || "", + userMatch: filterPresets?.userMatch || "contains", + cluster: filterPresets?.cluster || null, + partition: filterPresets?.partition || null, states: - filterPresets.states || filterPresets.state + filterPresets?.states || filterPresets?.state ? [filterPresets.state].flat() : allJobStates, - shared: filterPresets.shared || "", - schedule: filterPresets.schedule || "", - startTime: filterPresets.startTime || { from: null, to: null, range: ""}, - duration: filterPresets.duration || { + shared: filterPresets?.shared || "", + schedule: filterPresets?.schedule || "", + startTime: filterPresets?.startTime || { from: null, to: null, range: ""}, + duration: filterPresets?.duration || { lessThan: null, moreThan: null, from: null, to: null, }, - tags: filterPresets.tags || [], - numNodes: filterPresets.numNodes || { from: null, to: null }, - numHWThreads: filterPresets.numHWThreads || { from: null, to: null }, - numAccelerators: filterPresets.numAccelerators || { from: null, to: null }, - node: filterPresets.node || null, - nodeMatch: filterPresets.nodeMatch || "eq", - energy: filterPresets.energy || { from: null, to: null }, - stats: filterPresets.stats || [], + tags: filterPresets?.tags || [], + numNodes: filterPresets?.numNodes || { from: null, to: null }, + numHWThreads: filterPresets?.numHWThreads || { from: null, to: null }, + numAccelerators: filterPresets?.numAccelerators || { from: null, to: null }, + node: filterPresets?.node || null, + nodeMatch: filterPresets?.nodeMatch || "eq", + energy: filterPresets?.energy || { from: null, to: null }, + stats: filterPresets?.stats || [], }); /* Opened States */ diff --git a/web/frontend/src/header/NavbarLinks.svelte b/web/frontend/src/header/NavbarLinks.svelte index ad0eb77e..bb6bd0f4 100644 --- a/web/frontend/src/header/NavbarLinks.svelte +++ b/web/frontend/src/header/NavbarLinks.svelte @@ -92,7 +92,7 @@ {/each} - {:else} + {:else} @@ -100,7 +100,7 @@ All Clusters @@ -112,12 +112,12 @@ All Jobs Running Jobs diff --git a/web/frontend/src/systems/nodelist/NodeInfo.svelte b/web/frontend/src/systems/nodelist/NodeInfo.svelte index de5ca14a..39716ca2 100644 --- a/web/frontend/src/systems/nodelist/NodeInfo.svelte +++ b/web/frontend/src/systems/nodelist/NodeInfo.svelte @@ -171,7 +171,7 @@ Users - + List @@ -192,7 +192,7 @@ Projects - + List From ea6b9d910b123bdc3ccd1419f5ad3305c5b5b654 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 23 Jan 2026 13:39:34 +0100 Subject: [PATCH 02/12] switch to cluster grouping in dashboard queries --- web/frontend/src/DashPublic.root.svelte | 28 ++++++++------------- web/frontend/src/status/DashInternal.svelte | 28 ++++++++------------- 2 files changed, 20 insertions(+), 36 deletions(-) diff --git a/web/frontend/src/DashPublic.root.svelte b/web/frontend/src/DashPublic.root.svelte index c758ed37..220e3403 100644 --- a/web/frontend/src/DashPublic.root.svelte +++ b/web/frontend/src/DashPublic.root.svelte @@ -149,7 +149,7 @@ filter: $jobFilter page: $paging sortBy: TOTALJOBS - groupBy: SUBCLUSTER + groupBy: CLUSTER ) { id totalJobs @@ -193,30 +193,25 @@ })); const clusterInfo = $derived.by(() => { + let rawInfos = {}; if ($initq?.data?.clusters) { - let rawInfos = {}; + // Grouped By Cluster + if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalCores || 0; + if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalAccs || 0; + if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalUsers || 0; + if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalJobs || 0; + + // Collected By Subcluster let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || []; for (let subCluster of subClusters) { // Allocations if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0; else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0; - if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0; - else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0; - - if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0; - else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0; - // Infos if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]); else rawInfos['processorTypes'].add(subCluster.processorType); - if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0; - else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0; - - if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0; - else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0; - if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0; else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0; @@ -281,11 +276,8 @@ let rawGpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'acc_power')?.metric?.unit || null rawInfos['gpuPwrUnit'] = rawGpuUnit ? rawGpuUnit.prefix + rawGpuUnit.base : '' } - - return rawInfos - } else { - return {}; } + return rawInfos; }); const refinedStateData = $derived.by(() => { diff --git a/web/frontend/src/status/DashInternal.svelte b/web/frontend/src/status/DashInternal.svelte index 87e3c6b3..b632f77e 100644 --- a/web/frontend/src/status/DashInternal.svelte +++ b/web/frontend/src/status/DashInternal.svelte @@ -155,7 +155,7 @@ filter: $jobFilter page: $paging sortBy: TOTALJOBS - groupBy: SUBCLUSTER + groupBy: CLUSTER ) { id totalJobs @@ -222,30 +222,25 @@ })); const clusterInfo = $derived.by(() => { + let rawInfos = {}; if ($initq?.data?.clusters) { - let rawInfos = {}; + // Grouped By Cluster + if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalCores || 0; + if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalAccs || 0; + if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalUsers || 0; + if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalJobs || 0; + + // Collected By Subcluster let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || []; for (let subCluster of subClusters) { // Allocations if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0; else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0; - if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0; - else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0; - - if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0; - else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0; - // Infos if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]); else rawInfos['processorTypes'].add(subCluster.processorType); - if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0; - else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0; - - if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0; - else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0; - if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0; else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0; @@ -290,11 +285,8 @@ 0, // Initial Value ) || 0; rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100) - - return rawInfos - } else { - return {}; } + return rawInfos; }); /* Functions */ From 25c5457ef3324d2e1d7b5368fcad2574faa0ba76 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 23 Jan 2026 15:14:54 +0100 Subject: [PATCH 03/12] review internal dash info card layout --- web/frontend/src/status/DashInternal.svelte | 65 +++++++++++---------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/web/frontend/src/status/DashInternal.svelte b/web/frontend/src/status/DashInternal.svelte index b632f77e..c42c758e 100644 --- a/web/frontend/src/status/DashInternal.svelte +++ b/web/frontend/src/status/DashInternal.svelte @@ -402,17 +402,17 @@ {[...clusterInfo?.processorTypes].join(', ')} - +
- +
- @@ -421,49 +421,52 @@ {clusterInfo?.flopRate} {clusterInfo?.flopRateUnit} -
- + - - + + + + + + - + + + + + {#if clusterInfo?.totalAccs !== 0} - + - + + + + + {/if}
{clusterInfo?.runningJobs} Running Jobs{clusterInfo?.activeUsers} Active Users{clusterInfo?.activeUsers} Active Users
Flop Rate (Any) + Memory BW Rate
+ {clusterInfo?.memBwRate} {clusterInfo?.memBwRateUnit}
{formatNumber(clusterInfo?.allocatedNodes)} Active Nodes
- - - - -
{formatNumber(clusterInfo?.idleNodes)} Idle Nodes
{formatNumber(clusterInfo?.allocatedCores)} Active Cores
- - - - -
{formatNumber(clusterInfo?.idleCores)} Idle Cores
{formatNumber(clusterInfo?.allocatedAccs)} Active Accelerators
- - - - -
{formatNumber(clusterInfo?.idleAccs)} Idle Accelerators
From 22b1d4d276a77d362b69c6c8a35aa2e0f803b5e0 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 23 Jan 2026 15:25:00 +0100 Subject: [PATCH 04/12] review public dashboard layout --- web/frontend/src/DashPublic.root.svelte | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/web/frontend/src/DashPublic.root.svelte b/web/frontend/src/DashPublic.root.svelte index 220e3403..91f4664c 100644 --- a/web/frontend/src/DashPublic.root.svelte +++ b/web/frontend/src/DashPublic.root.svelte @@ -510,7 +510,7 @@
- +
@@ -529,6 +529,7 @@ timestep={$statusQuery?.data?.clusterMetrics[0]?.timestep || 60} numNodes={$statusQuery?.data?.clusterMetrics?.nodeCount || 0} metricData={$statusQuery?.data?.clusterMetrics?.metrics || []} + height={250} publicMode /> {/key} @@ -549,14 +550,14 @@ nodesData={transformNodesStatsToInfo($statusQuery?.data?.nodeMetrics)} fixTitle="Node Utilization" yMinimum={1.0} - height={330} + height={280} /> {/key} - + {#if refinedStateData.length > 0} @@ -613,7 +614,7 @@ {#key $statesTimed?.data?.nodeStatesTimed} Date: Fri, 23 Jan 2026 16:51:17 +0100 Subject: [PATCH 05/12] Adding dataGenerator script --- tools/dataGenerator.sh | 197 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 tools/dataGenerator.sh diff --git a/tools/dataGenerator.sh b/tools/dataGenerator.sh new file mode 100644 index 00000000..6f488142 --- /dev/null +++ b/tools/dataGenerator.sh @@ -0,0 +1,197 @@ +#!/bin/bash + +# ========================================== +# CONFIGURATION & FLAGS +# ========================================== + +# MODE SETTINGS +TRANSPORT_MODE="REST" # Options: "REST" or "NATS" +CONNECTION_SCOPE="INTERNAL" # Options: "INTERNAL" or "EXTERNAL" +API_USER="demo" # User for JWT generation + +# BASE NETWORK CONFIG +SERVICE_ADDRESS="http://localhost:8080" +NATS_SERVER="nats://0.0.0.0:4222" + +# NATS CREDENTIALS +NATS_USER="root" +NATS_PASS="root" +NATS_SUBJECT="hpc-nats" + +# EXTERNAL JWT (Used if CONNECTION_SCOPE is EXTERNAL) +JWT_STATIC="eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NzU3Nzg4NDQsImlhdCI6MTc2ODU3ODg0NCwicm9sZXMiOlsiYWRtaW4iLCJhcGkiXSwic3ViIjoiZGVtbyJ9._SDEW9WaUVXSBFmWqGhyIZXLoqoDU8F1hkfh4cXKIqF4yw7w50IUpfUBtwUFUOnoviFKoi563f6RAMC7XxeLDA" + +# ========================================== +# DATA DEFINITIONS +# ========================================== +ALEX_HOSTS="a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904" +FRITZ_HOSTS="f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378" + +METRICS_STD="cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock" +METRICS_NODE="cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts" +ACCEL_IDS="00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0" + +# ========================================== +# SETUP ENV (URL & TOKEN) +# ========================================== + +if [ "$CONNECTION_SCOPE" == "INTERNAL" ]; then + # 1. Set URL for Internal Mode + REST_URL="${SERVICE_ADDRESS}/metricstore/api/write" + + # 2. Generate JWT dynamically + echo "Setup: INTERNAL mode selected." + echo "Generating JWT for user: $API_USER" + JWT=$(./cc-backend -jwt "$API_USER" | grep -oP "(?<=JWT: Successfully generated JWT for user '${API_USER}': ).*") + + if [ -z "$JWT" ]; then + echo "Error: Failed to generate JWT from cc-backend." + exit 1 + fi +else + # 1. Set URL for External Mode + REST_URL="${SERVICE_ADDRESS}/api/write" + + # 2. Use Static JWT + echo "Setup: EXTERNAL mode selected." + echo "Using static JWT." + JWT="$JWT_STATIC" +fi + +echo "Target URL: $REST_URL" + +# ========================================== +# FUNCTIONS +# ========================================== + +send_payload() { + local file_path=$1 + local cluster_name=$2 + + if [ "$TRANSPORT_MODE" == "NATS" ]; then + # Piping file content directly to nats stdin + cat "$file_path" | nats pub "$NATS_SUBJECT" -s "$NATS_SERVER" --user "$NATS_USER" --password "$NATS_PASS" + else + # Sending via REST API + curl -s -X 'POST' "${REST_URL}/?cluster=${cluster_name}" \ + -H "Authorization: Bearer $JWT" \ + --data-binary "@$file_path" + fi + + # Clean up immediately + rm "$file_path" +} + +# ========================================== +# MAIN LOOP +# ========================================== + +# Clean up leftovers +rm -f sample_fritz.txt sample_alex.txt + +while [ true ]; do + timestamp="$(date '+%s')" + echo "--- Cycle Start: $timestamp [Mode: $TRANSPORT_MODE | Scope: $CONNECTION_SCOPE] ---" + + # 1. ALEX: HWTHREAD + echo "Generating Alex: hwthread" + { + for metric in $METRICS_STD; do + for hostname in $ALEX_HOSTS; do + for id in {0..127}; do + echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" + done + done + done + } > sample_alex.txt + send_payload "sample_alex.txt" "alex" + + # 2. FRITZ: HWTHREAD + echo "Generating Fritz: hwthread" + { + for metric in $METRICS_STD; do + for hostname in $FRITZ_HOSTS; do + for id in {0..71}; do + echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" + done + done + done + } > sample_fritz.txt + send_payload "sample_fritz.txt" "fritz" + + # 3. ALEX: ACCELERATOR + echo "Generating Alex: accelerator" + { + for metric in $METRICS_STD; do + for hostname in $ALEX_HOSTS; do + for id in $ACCEL_IDS; do + echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" + done + done + done + } > sample_alex.txt + send_payload "sample_alex.txt" "alex" + + # 4. ALEX: MEMORY DOMAIN + echo "Generating Alex: memoryDomain" + { + for metric in $METRICS_STD; do + for hostname in $ALEX_HOSTS; do + for id in {0..7}; do + echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" + done + done + done + } > sample_alex.txt + send_payload "sample_alex.txt" "alex" + + # 5. ALEX: SOCKET + echo "Generating Alex: socket" + { + for metric in $METRICS_STD; do + for hostname in $ALEX_HOSTS; do + for id in {0..1}; do + echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" + done + done + done + } > sample_alex.txt + send_payload "sample_alex.txt" "alex" + + # 6. FRITZ: SOCKET + echo "Generating Fritz: socket" + { + for metric in $METRICS_STD; do + for hostname in $FRITZ_HOSTS; do + for id in {0..1}; do + echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" + done + done + done + } > sample_fritz.txt + send_payload "sample_fritz.txt" "fritz" + + # 7. ALEX: NODE + echo "Generating Alex: node" + { + for metric in $METRICS_NODE; do + for hostname in $ALEX_HOSTS; do + echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" + done + done + } > sample_alex.txt + send_payload "sample_alex.txt" "alex" + + # 8. FRITZ: NODE + echo "Generating Fritz: node" + { + for metric in $METRICS_NODE; do + for hostname in $FRITZ_HOSTS; do + echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" + done + done + } > sample_fritz.txt + send_payload "sample_fritz.txt" "fritz" + + sleep 1m +done \ No newline at end of file From 98dc8cf5b56daff34306937b74b9e2bc2ec4a822 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 23 Jan 2026 16:54:30 +0100 Subject: [PATCH 06/12] add loading indicators to status detail --- .../src/status/dashdetails/StatusDash.svelte | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/web/frontend/src/status/dashdetails/StatusDash.svelte b/web/frontend/src/status/dashdetails/StatusDash.svelte index b80db96f..9968a9fd 100644 --- a/web/frontend/src/status/dashdetails/StatusDash.svelte +++ b/web/frontend/src/status/dashdetails/StatusDash.svelte @@ -16,6 +16,7 @@ Table, Progress, Icon, + Spinner } from "@sveltestrap/sveltestrap"; import { queryStore, @@ -408,7 +409,19 @@
-{#if $statesTimed.data} +{#if $statesTimed.fetching} + + + + + +{:else if $statesTimed.error} + + + {$statesTimed.error.message} + + +{:else if $statesTimed.data}
@@ -450,7 +463,19 @@
-{#if $statusQuery?.data?.nodeStates} +{#if $statusQuery.fetching} + + + + + +{:else if $statusQuery.error} + + + {$statesTimed.error.message} + + +{:else if $statusQuery?.data?.nodeStates}
@@ -536,7 +561,19 @@
-{#if $statusQuery.data} +{#if $statusQuery.fetching} + + + + + +{:else if $statusQuery.error} + + + {$statusQuery.error.message} + + +{:else if $statusQuery.data} {#each clusters.find((c) => c.name == cluster).subClusters as subCluster, i} From 49938bcef837f3518b67c83cdf63d5da64faed27 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 23 Jan 2026 17:41:21 +0100 Subject: [PATCH 07/12] remove blocking backend check - threw errors on expected and correctly handled behavior for nodeList queries --- internal/repository/node.go | 54 +++++++++++++++++++++++++++++++++++++ pkg/metricstore/query.go | 3 --- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/internal/repository/node.go b/internal/repository/node.go index a81fc58d..b848c8a9 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -11,6 +11,7 @@ import ( "encoding/json" "fmt" "slices" + "sort" "strings" "sync" "time" @@ -683,6 +684,11 @@ func (r *NodeRepository) GetNodesForList( hasNextPage = len(nextNodes) == 1 } + // Fallback, ignores stateFilter + // if countNodes == 0 { + // nodes, countNodes, hasNextPage = getNodesFromTopol(cluster, subCluster, nodeFilter, page) + // } + return nodes, stateMap, countNodes, hasNextPage, nil } @@ -707,3 +713,51 @@ func AccessCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBu return qnil, fmt.Errorf("user has no or unknown roles") } } + +func getNodesFromTopol(cluster string, subCluster string, nodeFilter string, page *model.PageRequest) ([]string, int, bool) { + // 0) Init additional vars + var hasNextPage bool = false + var totalNodes int = 0 + + // 1) Get list of all nodes + var topolNodes []string + if subCluster != "" { + scNodes := archive.NodeLists[cluster][subCluster] + topolNodes = scNodes.PrintList() + } else { + subClusterNodeLists := archive.NodeLists[cluster] + for _, nodeList := range subClusterNodeLists { + topolNodes = append(topolNodes, nodeList.PrintList()...) + } + } + + // 2) Filter nodes + if nodeFilter != "" { + filteredNodes := []string{} + for _, node := range topolNodes { + if strings.Contains(node, nodeFilter) { + filteredNodes = append(filteredNodes, node) + } + } + topolNodes = filteredNodes + } + + // 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ... + totalNodes = len(topolNodes) + sort.Strings(topolNodes) + + // 3) Apply paging + if len(topolNodes) > page.ItemsPerPage { + start := (page.Page - 1) * page.ItemsPerPage + end := start + page.ItemsPerPage + if end >= len(topolNodes) { + end = len(topolNodes) + hasNextPage = false + } else { + hasNextPage = true + } + topolNodes = topolNodes[start:end] + } + + return topolNodes, totalNodes, hasNextPage +} diff --git a/pkg/metricstore/query.go b/pkg/metricstore/query.go index a1656192..a031cb1e 100644 --- a/pkg/metricstore/query.go +++ b/pkg/metricstore/query.go @@ -912,9 +912,6 @@ func buildNodeQueries( scopes []schema.MetricScope, resolution int64, ) ([]APIQuery, []schema.MetricScope, error) { - if len(nodes) == 0 { - return nil, nil, fmt.Errorf("METRICDATA/CCMS > no nodes specified for query") - } queries := make([]APIQuery, 0, len(metrics)*len(scopes)*len(nodes)) assignedScope := []schema.MetricScope{} From 436194e46dd98670e37db4c04a5efe44661a835b Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 23 Jan 2026 17:45:44 +0100 Subject: [PATCH 08/12] enable fallback for non initialized node table --- internal/repository/node.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/repository/node.go b/internal/repository/node.go index b848c8a9..0d94e46f 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -684,10 +684,10 @@ func (r *NodeRepository) GetNodesForList( hasNextPage = len(nextNodes) == 1 } - // Fallback, ignores stateFilter - // if countNodes == 0 { - // nodes, countNodes, hasNextPage = getNodesFromTopol(cluster, subCluster, nodeFilter, page) - // } + // Fallback for non-init'd node table in DB; Ignores stateFilter + if stateFilter == "all" && countNodes == 0 { + nodes, countNodes, hasNextPage = getNodesFromTopol(cluster, subCluster, nodeFilter, page) + } return nodes, stateMap, countNodes, hasNextPage, nil } From 0af550bf4edd5eff382d58d870ce214fb48b5875 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 23 Jan 2026 18:26:10 +0100 Subject: [PATCH 09/12] fix nodelist paging --- web/frontend/src/systems/NodeList.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/frontend/src/systems/NodeList.svelte b/web/frontend/src/systems/NodeList.svelte index f512a6ea..8dda2770 100644 --- a/web/frontend/src/systems/NodeList.svelte +++ b/web/frontend/src/systems/NodeList.svelte @@ -95,7 +95,6 @@ let nodes = $state([]); let page = $state(1); let headerPaddingTop = $state(0); - let matchedNodes = $state(0); /* Derived */ let selectedMetrics = $derived(pendingSelectedMetrics); @@ -119,6 +118,8 @@ }, requestPolicy: "network-only", // Resolution queries are cached, but how to access them? For now: reload on every change })); + + const matchedNodes = $derived($nodesQuery?.data?.nodeMetricsList?.totalNodes || 0); /* Effects */ $effect(() => { @@ -142,7 +143,6 @@ if ($nodesQuery?.data) { untrack(() => { nodes = handleNodes($nodesQuery?.data?.nodeMetricsList); - matchedNodes = $nodesQuery?.data?.totalNodes || 0; }); selectedMetrics = [...pendingSelectedMetrics]; // Trigger Rerender in NodeListRow Only After Data is Fetched }; From a2c1b65f917e44d7889fadc7cb168db565be8204 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 23 Jan 2026 18:42:07 +0100 Subject: [PATCH 10/12] set variables in nodeitems handler --- web/frontend/src/systems/NodeList.svelte | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/web/frontend/src/systems/NodeList.svelte b/web/frontend/src/systems/NodeList.svelte index 8dda2770..850f5394 100644 --- a/web/frontend/src/systems/NodeList.svelte +++ b/web/frontend/src/systems/NodeList.svelte @@ -142,7 +142,7 @@ $effect(() => { if ($nodesQuery?.data) { untrack(() => { - nodes = handleNodes($nodesQuery?.data?.nodeMetricsList); + handleNodes($nodesQuery?.data?.nodeMetricsList?.items); }); selectedMetrics = [...pendingSelectedMetrics]; // Trigger Rerender in NodeListRow Only After Data is Fetched }; @@ -161,18 +161,18 @@ }); /* Functions */ - function handleNodes(data) { - if (data) { + function handleNodes(newNodes) { + if (newNodes) { if (usePaging) { // console.log('New Paging', $state.snapshot(paging)) - return [...data.items].sort((a, b) => a.host.localeCompare(b.host)); + nodes = [...newNodes].sort((a, b) => a.host.localeCompare(b.host)); } else { if ($state.snapshot(page) == 1) { // console.log('Page 1 Reset', [...data.items]) - return [...data.items].sort((a, b) => a.host.localeCompare(b.host)); + nodes = [...newNodes].sort((a, b) => a.host.localeCompare(b.host)); } else { // console.log('Add Nodes', $state.snapshot(nodes), [...data.items]) - return nodes.concat([...data.items]) + nodes = nodes.concat([...newNodes]) } } }; From fbf4004e92fb7e293aa4e43434fe1b17f58e31db Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 23 Jan 2026 18:50:28 +0100 Subject: [PATCH 11/12] move load indicator to after job list render - prevents location reset when continuously scrolling --- web/frontend/src/systems/NodeList.svelte | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/web/frontend/src/systems/NodeList.svelte b/web/frontend/src/systems/NodeList.svelte index 850f5394..fdcaf211 100644 --- a/web/frontend/src/systems/NodeList.svelte +++ b/web/frontend/src/systems/NodeList.svelte @@ -169,10 +169,10 @@ } else { if ($state.snapshot(page) == 1) { // console.log('Page 1 Reset', [...data.items]) - nodes = [...newNodes].sort((a, b) => a.host.localeCompare(b.host)); + nodes = [...newNodes].sort((a, b) => a.host.localeCompare(b.host)); } else { // console.log('Add Nodes', $state.snapshot(nodes), [...data.items]) - nodes = nodes.concat([...newNodes]) + nodes = nodes.concat([...newNodes]) } } }; @@ -248,7 +248,16 @@ {$nodesQuery.error.message} - {:else if $nodesQuery.fetching || !$nodesQuery.data} + {:else} + {#each nodes as nodeData (nodeData.host)} + + {:else} + + No nodes found + + {/each} + {/if} + {#if $nodesQuery.fetching || !$nodesQuery.data}
@@ -265,14 +274,6 @@
- {:else} - {#each nodes as nodeData (nodeData.host)} - - {:else} - - No nodes found - - {/each} {/if} From c782043c6449d2c9f4612abbaa253f026e86cb88 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Mon, 26 Jan 2026 08:38:53 +0100 Subject: [PATCH 12/12] Upgrade cclib and remove usage of obsolete util.Float --- go.mod | 4 ++-- go.sum | 4 ++-- pkg/metricstore/level.go | 3 ++- pkg/metricstore/metricstore.go | 5 ++--- pkg/metricstore/stats.go | 21 +++++++++++---------- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/go.mod b/go.mod index 479f1644..9e1a5453 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ tool ( require ( github.com/99designs/gqlgen v0.17.85 - github.com/ClusterCockpit/cc-lib/v2 v2.1.0 + github.com/ClusterCockpit/cc-lib/v2 v2.2.0 github.com/Masterminds/squirrel v1.5.4 github.com/aws/aws-sdk-go-v2 v1.41.1 github.com/aws/aws-sdk-go-v2/config v1.32.6 @@ -32,7 +32,6 @@ require ( github.com/joho/godotenv v1.5.1 github.com/linkedin/goavro/v2 v2.14.1 github.com/mattn/go-sqlite3 v1.14.33 - github.com/nats-io/nats.go v1.47.0 github.com/qustavo/sqlhooks/v2 v2.1.0 github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 github.com/stretchr/testify v1.11.1 @@ -95,6 +94,7 @@ require ( github.com/kr/pretty v0.3.1 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect + github.com/nats-io/nats.go v1.47.0 // indirect github.com/nats-io/nkeys v0.4.12 // indirect github.com/nats-io/nuid v1.0.1 // indirect github.com/oapi-codegen/runtime v1.1.1 // indirect diff --git a/go.sum b/go.sum index ef89e2d2..0cb7e925 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/99designs/gqlgen v0.17.85 h1:EkGx3U2FDcxQm8YDLQSpXIAVmpDyZ3IcBMOJi2nH github.com/99designs/gqlgen v0.17.85/go.mod h1:yvs8s0bkQlRfqg03YXr3eR4OQUowVhODT/tHzCXnbOU= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= -github.com/ClusterCockpit/cc-lib/v2 v2.1.0 h1:B6l6h0IjfEuY9DU6aVM3fSsj24lQ1eudXK9QTKmJjqg= -github.com/ClusterCockpit/cc-lib/v2 v2.1.0/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw= +github.com/ClusterCockpit/cc-lib/v2 v2.2.0 h1:gqMsh7zsJMUhaXviXzaZ3gqXcLVgerjRJHzIcwX4FmQ= +github.com/ClusterCockpit/cc-lib/v2 v2.2.0/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= diff --git a/pkg/metricstore/level.go b/pkg/metricstore/level.go index b35137ef..bfa0ddf0 100644 --- a/pkg/metricstore/level.go +++ b/pkg/metricstore/level.go @@ -44,6 +44,7 @@ import ( "sync" "unsafe" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/ClusterCockpit/cc-lib/v2/util" ) @@ -272,7 +273,7 @@ func (l *Level) sizeInBytes() int64 { for _, b := range l.metrics { if b != nil { - size += b.count() * int64(unsafe.Sizeof(util.Float(0))) + size += b.count() * int64(unsafe.Sizeof(schema.Float(0))) } } diff --git a/pkg/metricstore/metricstore.go b/pkg/metricstore/metricstore.go index a50f4ab5..0d1f19c9 100644 --- a/pkg/metricstore/metricstore.go +++ b/pkg/metricstore/metricstore.go @@ -37,7 +37,7 @@ import ( "github.com/ClusterCockpit/cc-lib/v2/util" ) -// Define a struct to hold your globals and the mutex +// GlobalState holds the global state for the metric store with thread-safe access. type GlobalState struct { mu sync.RWMutex lastRetentionTime int64 @@ -740,8 +740,7 @@ func (m *MemoryStore) Free(selector []string, t int64) (int, error) { return m.GetLevel(selector).free(t) } -// Free releases all buffers for the selected level and all its children that -// contain only values older than `t`. +// ForceFree unconditionally removes the oldest buffer from each metric chain. func (m *MemoryStore) ForceFree() (int, error) { return m.GetLevel(nil).forceFree() } diff --git a/pkg/metricstore/stats.go b/pkg/metricstore/stats.go index 51ffafc1..8f7886a3 100644 --- a/pkg/metricstore/stats.go +++ b/pkg/metricstore/stats.go @@ -9,14 +9,15 @@ import ( "errors" "math" + "github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/ClusterCockpit/cc-lib/v2/util" ) type Stats struct { Samples int - Avg util.Float - Min util.Float - Max util.Float + Avg schema.Float + Min schema.Float + Max schema.Float } func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) { @@ -61,9 +62,9 @@ func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) { return Stats{ Samples: samples, - Avg: util.Float(sum) / util.Float(samples), - Min: util.Float(min), - Max: util.Float(max), + Avg: schema.Float(sum) / schema.Float(samples), + Min: schema.Float(min), + Max: schema.Float(max), }, from, t, nil } @@ -81,7 +82,7 @@ func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int6 } n, samples := 0, 0 - avg, min, max := util.Float(0), math.MaxFloat32, -math.MaxFloat32 + avg, min, max := schema.Float(0), math.MaxFloat32, -math.MaxFloat32 err := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error { stats, cfrom, cto, err := b.stats(from, to) if err != nil { @@ -110,7 +111,7 @@ func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int6 } if minfo.Aggregation == AvgAggregation { - avg /= util.Float(n) + avg /= schema.Float(n) } else if n > 1 && minfo.Aggregation != SumAggregation { return nil, 0, 0, errors.New("invalid aggregation") } @@ -118,7 +119,7 @@ func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int6 return &Stats{ Samples: samples, Avg: avg, - Min: util.Float(min), - Max: util.Float(max), + Min: schema.Float(min), + Max: schema.Float(max), }, from, to, nil }