Merge pull request #481 from ClusterCockpit/dev

Dev
2026-02-11 21:41:46 +01:00 · 2026-02-04 19:46:02 +01:00
parent 74dbbaa794 af73ce9c6d
commit b5f6ee9c0c
19 changed files with 862 additions and 234 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -13,7 +13,7 @@
 /var/checkpoints*
 migrateTimestamps.pl
-test_ccms_write_api*
+test_ccms_*
 /web/frontend/public/build
 /web/frontend/node_modules
--- a/internal/api/metricstore.go
+++ b/internal/api/metricstore.go
@@ -135,36 +135,3 @@ func debugMetrics(rw http.ResponseWriter, r *http.Request) {
 		return
 	}
 }
 // handleHealthCheck godoc
 // @summary HealthCheck endpoint
 // @tags healthcheck
 // @description This endpoint allows the users to check if a node is healthy
 // @produce     json
 // @param       selector        query    string            false "Selector"
 // @success     200            {string} string  "Debug dump"
 // @failure     400            {object} api.ErrorResponse       "Bad Request"
 // @failure     401            {object} api.ErrorResponse       "Unauthorized"
 // @failure     403            {object} api.ErrorResponse       "Forbidden"
 // @failure     500            {object} api.ErrorResponse       "Internal Server Error"
 // @security    ApiKeyAuth
 // @router      /healthcheck/ [get]
 func metricsHealth(rw http.ResponseWriter, r *http.Request) {
 	rawCluster := r.URL.Query().Get("cluster")
 	rawNode := r.URL.Query().Get("node")
 	if rawCluster == "" || rawNode == "" {
 		handleError(errors.New("'cluster' and 'node' are required query parameter"), http.StatusBadRequest, rw)
 		return
 	}
 	rw.Header().Add("Content-Type", "application/json")
 	selector := []string{rawCluster, rawNode}
 	ms := metricstore.GetMemoryStore()
 	if err := ms.HealthCheck(bufio.NewWriter(rw), selector); err != nil {
 		handleError(err, http.StatusBadRequest, rw)
 		return
 	}
 }
--- a/internal/api/nats.go
+++ b/internal/api/nats.go
@@ -324,11 +324,12 @@ func (api *NatsAPI) processNodestateEvent(msg lp.CCMessage) {
 	}
 	repo := repository.GetNodeRepository()
 	requestReceived := time.Now().Unix()
 	for _, node := range req.Nodes {
 		state := determineState(node.States)
 		nodeState := schema.NodeStateDB{
-			TimeStamp:       time.Now().Unix(),
+			TimeStamp:       requestReceived,
 			NodeState:       state,
 			CpusAllocated:   node.CpusAllocated,
 			MemoryAllocated: node.MemoryAllocated,
--- a/internal/api/node.go
+++ b/internal/api/node.go
@@ -7,11 +7,14 @@ package api
 import (
 	"fmt"
 	"maps"
 	"net/http"
 	"strings"
 	"time"
 	"github.com/ClusterCockpit/cc-backend/internal/repository"
 	"github.com/ClusterCockpit/cc-backend/pkg/archive"
 	"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
 	"github.com/ClusterCockpit/cc-lib/v2/schema"
 )
@@ -20,6 +23,15 @@ type UpdateNodeStatesRequest struct {
 	Cluster string               `json:"cluster" example:"fritz"`
 }
 // metricListToNames converts a map of metric configurations to a list of metric names
 func metricListToNames(metricList map[string]*schema.Metric) []string {
 	names := make([]string, 0, len(metricList))
 	for name := range metricList {
 		names = append(names, name)
 	}
 	return names
 }
 // this routine assumes that only one of them exists per node
 func determineState(states []string) schema.SchedulerState {
 	for _, state := range states {
@@ -62,16 +74,42 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
 			http.StatusBadRequest, rw)
 		return
 	}
 	requestReceived := time.Now().Unix()
 	repo := repository.GetNodeRepository()
 	ms := metricstore.GetMemoryStore()
 	m := make(map[string][]string)
 	healthStates := make(map[string]schema.MonitoringState)
 	for _, node := range req.Nodes {
 		if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil {
 			m[sc] = append(m[sc], node.Hostname)
 		}
 	}
 	for sc, nl := range m {
 		if sc != "" {
 			metricList := archive.GetMetricConfigSubCluster(req.Cluster, sc)
 			metricNames := metricListToNames(metricList)
 			if states, err := ms.HealthCheck(req.Cluster, nl, metricNames); err == nil {
 				maps.Copy(healthStates, states)
 			}
 		}
 	}
 	for _, node := range req.Nodes {
 		state := determineState(node.States)
 		healthState := schema.MonitoringStateFailed
 		if hs, ok := healthStates[node.Hostname]; ok {
 			healthState = hs
 		}
 		nodeState := schema.NodeStateDB{
-			TimeStamp: time.Now().Unix(), NodeState: state,
+			TimeStamp:       requestReceived,
 			NodeState:       state,
 			CpusAllocated:   node.CpusAllocated,
 			MemoryAllocated: node.MemoryAllocated,
 			GpusAllocated:   node.GpusAllocated,
-			HealthState:     schema.MonitoringStateFull,
+			HealthState:     healthState,
 			JobsRunning:     node.JobsRunning,
 		}
--- a/internal/api/rest.go
+++ b/internal/api/rest.go
@@ -81,7 +81,7 @@ func (api *RestAPI) MountAPIRoutes(r *mux.Router) {
 	// Cluster List
 	r.HandleFunc("/clusters/", api.getClusters).Methods(http.MethodGet)
 	// Slurm node state
-	r.HandleFunc("/nodestate/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut)
+	r.HandleFunc("/nodestates/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut)
 	// Job Handler
 	if config.Keys.APISubjects == nil {
 		cclog.Info("Enabling REST start/stop job API")
@@ -127,12 +127,12 @@ func (api *RestAPI) MountMetricStoreAPIRoutes(r *mux.Router) {
 	r.HandleFunc("/free", freeMetrics).Methods(http.MethodPost)
 	r.HandleFunc("/write", writeMetrics).Methods(http.MethodPost)
 	r.HandleFunc("/debug", debugMetrics).Methods(http.MethodGet)
-	r.HandleFunc("/healthcheck", metricsHealth).Methods(http.MethodGet)
+	r.HandleFunc("/healthcheck", api.updateNodeStates).Methods(http.MethodPost)
 	// Same endpoints but with trailing slash
 	r.HandleFunc("/free/", freeMetrics).Methods(http.MethodPost)
 	r.HandleFunc("/write/", writeMetrics).Methods(http.MethodPost)
 	r.HandleFunc("/debug/", debugMetrics).Methods(http.MethodGet)
-	r.HandleFunc("/healthcheck/", metricsHealth).Methods(http.MethodGet)
+	r.HandleFunc("/healthcheck/", api.updateNodeStates).Methods(http.MethodPost)
 }
 // MountConfigAPIRoutes registers configuration and user management endpoints.
--- a/internal/graph/schema.resolvers.go
+++ b/internal/graph/schema.resolvers.go
@@ -923,15 +923,19 @@ func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metr
 					if !okData && len(ser.Data) != 0 {
 						collectorData[metric] = make([]schema.Float, len(ser.Data))
 					} else if !okData {
-						cclog.Debugf("ClusterMetrics Skip Init: No Data -> %s at %s; Size %d", metric, ser.Hostname, len(ser.Data))
+						cclog.Debugf("[SCHEMARESOLVER] clusterMetrics skip init: no data -> %s at %s; size %d", metric, ser.Hostname, len(ser.Data))
 					}
 					// Sum if init'd and matching size
 					if okData && len(ser.Data) == len(collectorData[metric]) {
 						for i, val := range ser.Data {
 							if val.IsNaN() {
 								continue
 							} else {
 								collectorData[metric][i] += val
 							}
 						}
 					} else if okData {
-						cclog.Debugf("ClusterMetrics Skip Sum: Data Diff -> %s at %s; Want Size %d, Have Size %d", metric, ser.Hostname, len(collectorData[metric]), len(ser.Data))
+						cclog.Debugf("[SCHEMARESOLVER] clusterMetrics skip sum: data diff -> %s at %s; want size %d, have size %d", metric, ser.Hostname, len(collectorData[metric]), len(ser.Data))
 					}
 				}
 			}
--- a/internal/repository/stats.go
+++ b/internal/repository/stats.go
@@ -466,7 +466,7 @@ func (r *JobRepository) JobCountGrouped(
 // AddJobCountGrouped augments existing statistics with additional job counts by category.
 //
 // This method enriches JobsStatistics returned by JobsStatsGrouped or JobCountGrouped
-// with counts of running or short-running jobs, matched by group ID.
+// with counts of running or short-running (based on ShortRunningJobsDuration) jobs, matched by group ID.
 //
 // Parameters:
 //   - ctx: Context for security checks
--- a/pkg/metricstore/archive.go
+++ b/pkg/metricstore/archive.go
@@ -158,8 +158,7 @@ func cleanupCheckpoints(dir string, cleanupDir string, from int64, deleteInstead
 		return 0, err
 	}
-	extension := Keys.Checkpoints.FileFormat
+	files, err := findFiles(entries, from, false)
 	files, err := findFiles(entries, from, extension, false)
 	if err != nil {
 		return 0, err
 	}
--- a/pkg/metricstore/checkpoint.go
+++ b/pkg/metricstore/checkpoint.go
@@ -415,7 +415,7 @@ func enqueueCheckpointHosts(dir string, work chan<- [2]string) error {
 //
 // Uses worker pool to load cluster/host combinations. Periodically triggers GC
 // to prevent excessive heap growth. Returns number of files loaded and any errors.
-func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (int, error) {
+func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
 	var wg sync.WaitGroup
 	work := make(chan [2]string, Keys.NumWorkers*4)
 	n, errs := int32(0), int32(0)
@@ -426,7 +426,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (
 			defer wg.Done()
 			for host := range work {
 				lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics))
-				nn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from, extension)
+				nn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from)
 				if err != nil {
 					cclog.Errorf("[METRICSTORE]> error while loading checkpoints for %s/%s: %s", host[0], host[1], err.Error())
 					atomic.AddInt32(&errs, 1)
@@ -465,57 +465,7 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
 		cclog.Debugf("[METRICSTORE]> %#v Directory created successfully", dir)
 	}
-	// Config read (replace with your actual config read)
+	return m.FromCheckpoint(dir, from)
 	fileFormat := Keys.Checkpoints.FileFormat
 	if fileFormat == "" {
 		fileFormat = "avro"
 	}
 	// Map to easily get the fallback format
 	oppositeFormat := map[string]string{
 		"json": "avro",
 		"avro": "json",
 	}
 	// First, attempt to load the specified format
 	if found, err := checkFilesWithExtension(dir, fileFormat); err != nil {
 		return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
 	} else if found {
 		cclog.Infof("[METRICSTORE]> Loading %s files because fileformat is %s", fileFormat, fileFormat)
 		return m.FromCheckpoint(dir, from, fileFormat)
 	}
 	// If not found, attempt the opposite format
 	altFormat := oppositeFormat[fileFormat]
 	if found, err := checkFilesWithExtension(dir, altFormat); err != nil {
 		return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
 	} else if found {
 		cclog.Infof("[METRICSTORE]> Loading %s files but fileformat is %s", altFormat, fileFormat)
 		return m.FromCheckpoint(dir, from, altFormat)
 	}
 	return 0, nil
 }
 // checkFilesWithExtension walks a directory tree to check if files with the given extension exist.
 func checkFilesWithExtension(dir string, extension string) (bool, error) {
 	found := false
 	err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
 		if err != nil {
 			return fmt.Errorf("[METRICSTORE]> error accessing path %s: %v", path, err)
 		}
 		if !info.IsDir() && filepath.Ext(info.Name()) == "."+extension {
 			found = true
 			return nil
 		}
 		return nil
 	})
 	if err != nil {
 		return false, fmt.Errorf("[METRICSTORE]> error walking through directories: %s", err)
 	}
 	return found, nil
 }
 func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
@@ -729,7 +679,7 @@ func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
 	return nil
 }
-func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension string) (int, error) {
+func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, error) {
 	direntries, err := os.ReadDir(dir)
 	if err != nil {
 		if os.IsNotExist(err) {
@@ -748,33 +698,38 @@ func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension
 				children: make(map[string]*Level),
 			}
-			files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from, extension)
+			files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from)
 			filesLoaded += files
 			if err != nil {
 				return filesLoaded, err
 			}
 			l.children[e.Name()] = child
-		} else if strings.HasSuffix(e.Name(), "."+extension) {
+		} else if strings.HasSuffix(e.Name(), ".json") || strings.HasSuffix(e.Name(), ".avro") {
 			allFiles = append(allFiles, e)
 		} else {
 			continue
 		}
 	}
-	files, err := findFiles(allFiles, from, extension, true)
+	files, err := findFiles(allFiles, from, true)
 	if err != nil {
 		return filesLoaded, err
 	}
 	loaders := map[string]func(*MemoryStore, *os.File, int64) error{
-		"json": l.loadJSONFile,
+		".json": l.loadJSONFile,
-		"avro": l.loadAvroFile,
+		".avro": l.loadAvroFile,
 	}
 	loader := loaders[extension]
 	for _, filename := range files {
 		ext := filepath.Ext(filename)
 		loader := loaders[ext]
 		if loader == nil {
 			cclog.Warnf("Unknown extension for file %s", filename)
 			continue
 		}
 		// Use a closure to ensure file is closed immediately after use
 		err := func() error {
 			f, err := os.Open(path.Join(dir, filename))
@@ -798,10 +753,12 @@ func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension
 // This will probably get very slow over time!
 // A solution could be some sort of an index file in which all other files
 // and the timespan they contain is listed.
-func findFiles(direntries []fs.DirEntry, t int64, extension string, findMoreRecentFiles bool) ([]string, error) {
+// NOTE: This now assumes that you have distinct timestamps for json and avro files
 // Also, it assumes that the timestamps are not overlapping/self-modified.
 func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) {
 	nums := map[string]int64{}
 	for _, e := range direntries {
-		if !strings.HasSuffix(e.Name(), "."+extension) {
+		if !strings.HasSuffix(e.Name(), ".json") && !strings.HasSuffix(e.Name(), ".avro") {
 			continue
 		}
--- a/pkg/metricstore/healthcheck.go
+++ b/pkg/metricstore/healthcheck.go
@@ -6,87 +6,260 @@
 package metricstore
 import (
-	"bufio"
+	"cmp"
 	"fmt"
 	"slices"
 	"time"
 	"github.com/ClusterCockpit/cc-lib/v2/schema"
 )
 // HealthCheckResponse represents the result of a health check operation.
 //
 // Status indicates the monitoring state (Full, Partial, Failed).
 // Error contains any error encountered during the health check.
 type HealthCheckResponse struct {
 	Status schema.MonitoringState
 	Error  error
 }
 // MaxMissingDataPoints is a threshold that allows a node to be healthy with certain number of data points missing.
 // Suppose a node does not receive last 5 data points, then healthCheck endpoint will still say a
 // node is healthy. Anything more than 5 missing points in metrics of the node will deem the node unhealthy.
 const MaxMissingDataPoints int64 = 5
-// MaxUnhealthyMetrics is a threshold which allows upto certain number of metrics in a node to be unhealthly.
+// isBufferHealthy checks if a buffer has received data for the last MaxMissingDataPoints.
-// Works with MaxMissingDataPoints. Say 5 metrics (including submetrics) do not receive the last
+//
-// MaxMissingDataPoints data points, then the node will be deemed healthy. Any more metrics that does
+// Returns true if the buffer is healthy (recent data within threshold), false otherwise.
-// not receive data for MaxMissingDataPoints data points will deem the node unhealthy.
+// A nil buffer or empty buffer is considered unhealthy.
-const MaxUnhealthyMetrics int64 = 5
+func (b *buffer) bufferExists() bool {
 func (b *buffer) healthCheck() int64 {
 	// Check if the buffer is empty
-	if b.data == nil {
+	if b == nil || b.data == nil || len(b.data) == 0 {
-		return 1
+		return false
 	}
 	return true
 }
 // isBufferHealthy checks if a buffer has received data for the last MaxMissingDataPoints.
 //
 // Returns true if the buffer is healthy (recent data within threshold), false otherwise.
 // A nil buffer or empty buffer is considered unhealthy.
 func (b *buffer) isBufferHealthy() bool {
 	// Get the last endtime of the buffer
 	bufferEnd := b.start + b.frequency*int64(len(b.data))
 	t := time.Now().Unix()
-	// Check if the buffer is too old
+	// Check if the buffer has recent data (within MaxMissingDataPoints threshold)
 	if t-bufferEnd > MaxMissingDataPoints*b.frequency {
-		return 1
+		return false
 	}
-	return 0
+	return true
 }
-func (l *Level) healthCheck(m *MemoryStore, count int64) (int64, error) {
+// MergeUniqueSorted merges two lists, sorts them, and removes duplicates.
 // Requires 'cmp.Ordered' because we need to sort the data.
 func mergeList[string cmp.Ordered](list1, list2 []string) []string {
 	// 1. Combine both lists
 	result := append(list1, list2...)
 	// 2. Sort the combined list
 	slices.Sort(result)
 	// 3. Compact removes consecutive duplicates (standard in Go 1.21+)
 	// e.g. [1, 1, 2, 3, 3] -> [1, 2, 3]
 	result = slices.Compact(result)
 	return result
 }
 // getHealthyMetrics recursively collects healthy and degraded metrics at this level and below.
 //
 // A metric is considered:
 //   - Healthy: buffer has recent data within MaxMissingDataPoints threshold AND has few/no NaN values
 //   - Degraded: buffer exists and has recent data, but contains more than MaxMissingDataPoints NaN values
 //
 // This routine walks the entire subtree starting from the current level.
 //
 // Parameters:
 //   - m: MemoryStore containing the global metric configuration
 //
 // Returns:
 //   - []string: Flat list of healthy metric names from this level and all children
 //   - []string: Flat list of degraded metric names (exist but have too many missing values)
 //   - error: Non-nil only for internal errors during recursion
 //
 // The routine mirrors healthCheck() but provides more granular classification:
 //   - healthCheck() finds problems (stale/missing)
 //   - getHealthyMetrics() separates healthy from degraded metrics
 func (l *Level) getHealthyMetrics(m *MemoryStore, expectedMetrics []string) ([]string, []string, error) {
 	l.lock.RLock()
 	defer l.lock.RUnlock()
-	for _, mc := range m.Metrics {
+	globalMetrics := m.Metrics
-		if b := l.metrics[mc.offset]; b != nil {
+
-			count += b.healthCheck()
+	missingList := make([]string, 0)
 	degradedList := make([]string, 0)
 	// Phase 1: Check metrics at this level
 	for _, metricName := range expectedMetrics {
 		offset := globalMetrics[metricName].offset
 		b := l.metrics[offset]
 		if !b.bufferExists() {
 			missingList = append(missingList, metricName)
 		} else if !b.isBufferHealthy() {
 			degradedList = append(degradedList, metricName)
 		}
 	}
 	// Phase 2: Recursively check child levels
 	for _, lvl := range l.children {
-		c, err := lvl.healthCheck(m, 0)
+		childMissing, childDegraded, err := lvl.getHealthyMetrics(m, expectedMetrics)
 		if err != nil {
-			return 0, err
+			return nil, nil, err
 		}
 		count += c
 		}
-	return count, nil
+		missingList = mergeList(missingList, childMissing)
 		degradedList = mergeList(degradedList, childDegraded)
 	}
-func (m *MemoryStore) HealthCheck(w *bufio.Writer, selector []string) error {
+	return missingList, degradedList, nil
 }
 // GetHealthyMetrics returns healthy and degraded metrics for a specific node as flat lists.
 //
 // This routine walks the metric tree starting from the specified node selector
 // and collects all metrics that have received data within the last MaxMissingDataPoints
 // (default: 5 data points). Metrics are classified into two categories:
 //
 //   - Healthy: Buffer has recent data AND contains few/no NaN (missing) values
 //   - Degraded: Buffer has recent data BUT contains more than MaxMissingDataPoints NaN values
 //
 // The returned lists include both node-level metrics (e.g., "load", "mem_used") and
 // hardware-level metrics (e.g., "cpu_user", "gpu_temp") in flat slices.
 //
 // Parameters:
 //   - selector: Hierarchical path to the target node, typically []string{cluster, hostname}.
 //     Example: []string{"emmy", "node001"} navigates to the "node001" host in the "emmy" cluster.
 //     The selector must match the hierarchy used during metric ingestion.
 //
 // Returns:
 //   - []string: Flat list of healthy metric names (recent data, few missing values)
 //   - []string: Flat list of degraded metric names (recent data, many missing values)
 //   - error: Non-nil if the node is not found or internal errors occur
 //
 // Example usage:
 //
 //	selector := []string{"emmy", "node001"}
 //	healthyMetrics, degradedMetrics, err := ms.GetHealthyMetrics(selector)
 //	if err != nil {
 //	    // Node not found or internal error
 //	    return err
 //	}
 //	fmt.Printf("Healthy metrics: %v\n", healthyMetrics)
 //	// Output: ["load", "mem_used", "cpu_user", ...]
 //	fmt.Printf("Degraded metrics: %v\n", degradedMetrics)
 //	// Output: ["gpu_temp", "network_rx", ...] (metrics with many NaN values)
 //
 // Note: This routine provides more granular classification than HealthCheck:
 //   - HealthCheck reports stale/missing metrics (problems)
 //   - GetHealthyMetrics separates fully healthy from degraded metrics (quality levels)
 func (m *MemoryStore) GetHealthyMetrics(selector []string, expectedMetrics []string) ([]string, []string, error) {
 	lvl := m.root.findLevel(selector)
 	if lvl == nil {
-		return fmt.Errorf("[METRICSTORE]> not found: %#v", selector)
+		return nil, nil, fmt.Errorf("[METRICSTORE]> error while GetHealthyMetrics, host not found: %#v", selector)
 	}
-	buf := make([]byte, 0, 25)
+	missingList, degradedList, err := lvl.getHealthyMetrics(m, expectedMetrics)
 	// buf = append(buf, "{"...)
 	var count int64 = 0
 	unhealthyMetricsCount, err := lvl.healthCheck(m, count)
 	if err != nil {
-		return err
+		return nil, nil, err
 	}
-	if unhealthyMetricsCount < MaxUnhealthyMetrics {
+	return missingList, degradedList, nil
-		buf = append(buf, "Healthy"...)
+}
 // HealthCheck performs health checks on multiple nodes and returns their monitoring states.
 //
 // This routine provides a batch health check interface that evaluates multiple nodes
 // against a specific set of expected metrics. For each node, it determines the overall
 // monitoring state based on which metrics are healthy, degraded, or missing.
 //
 // Health Status Classification:
 //   - MonitoringStateFull: All expected metrics are healthy (recent data, few missing values)
 //   - MonitoringStatePartial: Some metrics are degraded (many missing values) or missing
 //   - MonitoringStateFailed: Node not found or all expected metrics are missing/stale
 //
 // Parameters:
 //   - cluster: Cluster name (first element of selector path)
 //   - nodes: List of node hostnames to check
 //   - expectedMetrics: List of metric names that should be present on each node
 //
 // Returns:
 //   - map[string]schema.MonitoringState: Map keyed by hostname containing monitoring state for each node
 //   - error: Non-nil only for internal errors (individual node failures are captured as MonitoringStateFailed)
 //
 // Example usage:
 //
 //	cluster := "emmy"
 //	nodes := []string{"node001", "node002", "node003"}
 //	expectedMetrics := []string{"load", "mem_used", "cpu_user", "cpu_system"}
 //	healthStates, err := ms.HealthCheck(cluster, nodes, expectedMetrics)
 //	if err != nil {
 //	    return err
 //	}
 //	for hostname, state := range healthStates {
 //	    fmt.Printf("Node %s: %s\n", hostname, state)
 //	}
 //
 // Note: This routine is optimized for batch operations where you need to check
 // the same set of metrics across multiple nodes.
 func (m *MemoryStore) HealthCheck(cluster string,
 	nodes []string, expectedMetrics []string,
 ) (map[string]schema.MonitoringState, error) {
 	results := make(map[string]schema.MonitoringState, len(nodes))
 	// Create a set of expected metrics for fast lookup
 	expectedSet := make(map[string]bool, len(expectedMetrics))
 	for _, metric := range expectedMetrics {
 		expectedSet[metric] = true
 	}
 	// Check each node
 	for _, hostname := range nodes {
 		selector := []string{cluster, hostname}
 		status := schema.MonitoringStateFull
 		healthyCount := 0
 		degradedCount := 0
 		missingCount := 0
 		// Get healthy and degraded metrics for this node
 		missingList, degradedList, err := m.GetHealthyMetrics(selector, expectedMetrics)
 		if err != nil {
 			// Node not found or internal error
 			results[hostname] = schema.MonitoringStateFailed
 			continue
 		}
 		missingCount = len(missingList)
 		degradedCount = len(degradedList)
 		healthyCount = len(expectedMetrics) - (missingCount + degradedCount)
 		// Determine overall health status
 		if missingCount > 0 || degradedCount > 0 {
 			if healthyCount == 0 {
 				// No healthy metrics at all
 				status = schema.MonitoringStateFailed
 			} else {
-		buf = append(buf, "Unhealthy"...)
+				// Some healthy, some degraded/missing
 				status = schema.MonitoringStatePartial
 			}
 		}
 		// else: all metrics healthy, status remains MonitoringStateFull
 		results[hostname] = status
 	}
-	// buf = append(buf, "}\n"...)
+	return results, nil
 	if _, err = w.Write(buf); err != nil {
 		return err
 	}
 	return w.Flush()
 }
--- a/pkg/metricstore/metricstore_test.go
+++ b/pkg/metricstore/metricstore_test.go
@@ -7,6 +7,7 @@ package metricstore
 import (
 	"testing"
 	"time"
 	"github.com/ClusterCockpit/cc-lib/v2/schema"
 )
@@ -88,3 +89,378 @@ func TestBufferRead(t *testing.T) {
 		t.Errorf("buffer.read() len(result) = %d, want 3", len(result))
 	}
 }
 func TestHealthCheck(t *testing.T) {
 	// Create a test MemoryStore with some metrics
 	metrics := map[string]MetricConfig{
 		"load":       {Frequency: 10, Aggregation: AvgAggregation, offset: 0},
 		"mem_used":   {Frequency: 10, Aggregation: AvgAggregation, offset: 1},
 		"cpu_user":   {Frequency: 10, Aggregation: AvgAggregation, offset: 2},
 		"cpu_system": {Frequency: 10, Aggregation: AvgAggregation, offset: 3},
 	}
 	ms := &MemoryStore{
 		Metrics: metrics,
 		root: Level{
 			metrics:  make([]*buffer, len(metrics)),
 			children: make(map[string]*Level),
 		},
 	}
 	// Use recent timestamps (current time minus a small offset)
 	now := time.Now().Unix()
 	startTime := now - 100 // Start 100 seconds ago to have enough data points
 	// Setup test data for node001 - all metrics healthy (recent data)
 	node001 := ms.root.findLevelOrCreate([]string{"testcluster", "node001"}, len(metrics))
 	for i := 0; i < len(metrics); i++ {
 		node001.metrics[i] = newBuffer(startTime, 10)
 		// Write recent data up to now
 		for ts := startTime; ts <= now; ts += 10 {
 			node001.metrics[i].write(ts, schema.Float(float64(i+1)))
 		}
 	}
 	// Setup test data for node002 - some metrics stale (old data beyond MaxMissingDataPoints threshold)
 	node002 := ms.root.findLevelOrCreate([]string{"testcluster", "node002"}, len(metrics))
 	// MaxMissingDataPoints = 5, frequency = 10, so threshold is 50 seconds
 	staleTime := now - 100 // Data ends 100 seconds ago (well beyond 50 second threshold)
 	for i := 0; i < len(metrics); i++ {
 		node002.metrics[i] = newBuffer(staleTime-50, 10)
 		if i < 2 {
 			// First two metrics: healthy (recent data)
 			for ts := startTime; ts <= now; ts += 10 {
 				node002.metrics[i].write(ts, schema.Float(float64(i+1)))
 			}
 		} else {
 			// Last two metrics: stale (data ends 100 seconds ago)
 			for ts := staleTime - 50; ts <= staleTime; ts += 10 {
 				node002.metrics[i].write(ts, schema.Float(float64(i+1)))
 			}
 		}
 	}
 	// Setup test data for node003 - some metrics missing (no buffer)
 	node003 := ms.root.findLevelOrCreate([]string{"testcluster", "node003"}, len(metrics))
 	// Only create buffers for first two metrics
 	for i := 0; i < 2; i++ {
 		node003.metrics[i] = newBuffer(startTime, 10)
 		for ts := startTime; ts <= now; ts += 10 {
 			node003.metrics[i].write(ts, schema.Float(float64(i+1)))
 		}
 	}
 	// Leave metrics[2] and metrics[3] as nil (missing)
 	// Setup test data for node005 - all metrics stale
 	node005 := ms.root.findLevelOrCreate([]string{"testcluster", "node005"}, len(metrics))
 	for i := 0; i < len(metrics); i++ {
 		node005.metrics[i] = newBuffer(staleTime-50, 10)
 		// All metrics have stale data (ends 100 seconds ago)
 		for ts := staleTime - 50; ts <= staleTime; ts += 10 {
 			node005.metrics[i].write(ts, schema.Float(float64(i+1)))
 		}
 	}
 	// node004 doesn't exist at all
 	tests := []struct {
 		name            string
 		cluster         string
 		nodes           []string
 		expectedMetrics []string
 		wantStates      map[string]schema.MonitoringState
 	}{
 		{
 			name:            "all metrics healthy",
 			cluster:         "testcluster",
 			nodes:           []string{"node001"},
 			expectedMetrics: []string{"load", "mem_used", "cpu_user", "cpu_system"},
 			wantStates: map[string]schema.MonitoringState{
 				"node001": schema.MonitoringStateFull,
 			},
 		},
 		{
 			name:            "some metrics stale",
 			cluster:         "testcluster",
 			nodes:           []string{"node002"},
 			expectedMetrics: []string{"load", "mem_used", "cpu_user", "cpu_system"},
 			wantStates: map[string]schema.MonitoringState{
 				"node002": schema.MonitoringStatePartial,
 			},
 		},
 		{
 			name:            "some metrics missing",
 			cluster:         "testcluster",
 			nodes:           []string{"node003"},
 			expectedMetrics: []string{"load", "mem_used", "cpu_user", "cpu_system"},
 			wantStates: map[string]schema.MonitoringState{
 				"node003": schema.MonitoringStatePartial,
 			},
 		},
 		{
 			name:            "node not found",
 			cluster:         "testcluster",
 			nodes:           []string{"node004"},
 			expectedMetrics: []string{"load", "mem_used", "cpu_user", "cpu_system"},
 			wantStates: map[string]schema.MonitoringState{
 				"node004": schema.MonitoringStateFailed,
 			},
 		},
 		{
 			name:            "all metrics stale",
 			cluster:         "testcluster",
 			nodes:           []string{"node005"},
 			expectedMetrics: []string{"load", "mem_used", "cpu_user", "cpu_system"},
 			wantStates: map[string]schema.MonitoringState{
 				"node005": schema.MonitoringStateFailed,
 			},
 		},
 		{
 			name:            "multiple nodes mixed states",
 			cluster:         "testcluster",
 			nodes:           []string{"node001", "node002", "node003", "node004", "node005"},
 			expectedMetrics: []string{"load", "mem_used"},
 			wantStates: map[string]schema.MonitoringState{
 				"node001": schema.MonitoringStateFull,
 				"node002": schema.MonitoringStateFull,   // Only checking first 2 metrics which are healthy
 				"node003": schema.MonitoringStateFull,   // Only checking first 2 metrics which exist
 				"node004": schema.MonitoringStateFailed, // Node doesn't exist
 				"node005": schema.MonitoringStateFailed, // Both metrics are stale
 			},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			results, err := ms.HealthCheck(tt.cluster, tt.nodes, tt.expectedMetrics)
 			if err != nil {
 				t.Errorf("HealthCheck() error = %v", err)
 				return
 			}
 			// Check that we got results for all nodes
 			if len(results) != len(tt.nodes) {
 				t.Errorf("HealthCheck() returned %d results, want %d", len(results), len(tt.nodes))
 			}
 			// Check each node's state
 			for _, node := range tt.nodes {
 				state, ok := results[node]
 				if !ok {
 					t.Errorf("HealthCheck() missing result for node %s", node)
 					continue
 				}
 				// Check status
 				if wantStatus, ok := tt.wantStates[node]; ok {
 					if state != wantStatus {
 						t.Errorf("HealthCheck() node %s status = %v, want %v", node, state, wantStatus)
 					}
 				}
 			}
 		})
 	}
 }
 // TestGetHealthyMetrics tests the GetHealthyMetrics function which returns lists of missing and degraded metrics
 func TestGetHealthyMetrics(t *testing.T) {
 	metrics := map[string]MetricConfig{
 		"load":     {Frequency: 10, Aggregation: AvgAggregation, offset: 0},
 		"mem_used": {Frequency: 10, Aggregation: AvgAggregation, offset: 1},
 		"cpu_user": {Frequency: 10, Aggregation: AvgAggregation, offset: 2},
 	}
 	ms := &MemoryStore{
 		Metrics: metrics,
 		root: Level{
 			metrics:  make([]*buffer, len(metrics)),
 			children: make(map[string]*Level),
 		},
 	}
 	now := time.Now().Unix()
 	startTime := now - 100
 	staleTime := now - 100
 	// Setup node with mixed health states
 	node := ms.root.findLevelOrCreate([]string{"testcluster", "testnode"}, len(metrics))
 	// Metric 0 (load): healthy - recent data
 	node.metrics[0] = newBuffer(startTime, 10)
 	for ts := startTime; ts <= now; ts += 10 {
 		node.metrics[0].write(ts, schema.Float(1.0))
 	}
 	// Metric 1 (mem_used): degraded - stale data
 	node.metrics[1] = newBuffer(staleTime-50, 10)
 	for ts := staleTime - 50; ts <= staleTime; ts += 10 {
 		node.metrics[1].write(ts, schema.Float(2.0))
 	}
 	// Metric 2 (cpu_user): missing - no buffer (nil)
 	tests := []struct {
 		name            string
 		selector        []string
 		expectedMetrics []string
 		wantMissing     []string
 		wantDegraded    []string
 		wantErr         bool
 	}{
 		{
 			name:            "mixed health states",
 			selector:        []string{"testcluster", "testnode"},
 			expectedMetrics: []string{"load", "mem_used", "cpu_user"},
 			wantMissing:     []string{"cpu_user"},
 			wantDegraded:    []string{"mem_used"},
 			wantErr:         false,
 		},
 		{
 			name:            "node not found",
 			selector:        []string{"testcluster", "nonexistent"},
 			expectedMetrics: []string{"load"},
 			wantMissing:     nil,
 			wantDegraded:    nil,
 			wantErr:         true,
 		},
 		{
 			name:            "check only healthy metric",
 			selector:        []string{"testcluster", "testnode"},
 			expectedMetrics: []string{"load"},
 			wantMissing:     []string{},
 			wantDegraded:    []string{},
 			wantErr:         false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			missing, degraded, err := ms.GetHealthyMetrics(tt.selector, tt.expectedMetrics)
 			if (err != nil) != tt.wantErr {
 				t.Errorf("GetHealthyMetrics() error = %v, wantErr %v", err, tt.wantErr)
 				return
 			}
 			if tt.wantErr {
 				return
 			}
 			// Check missing list
 			if len(missing) != len(tt.wantMissing) {
 				t.Errorf("GetHealthyMetrics() missing = %v, want %v", missing, tt.wantMissing)
 			} else {
 				for i, m := range tt.wantMissing {
 					if missing[i] != m {
 						t.Errorf("GetHealthyMetrics() missing[%d] = %v, want %v", i, missing[i], m)
 					}
 				}
 			}
 			// Check degraded list
 			if len(degraded) != len(tt.wantDegraded) {
 				t.Errorf("GetHealthyMetrics() degraded = %v, want %v", degraded, tt.wantDegraded)
 			} else {
 				for i, d := range tt.wantDegraded {
 					if degraded[i] != d {
 						t.Errorf("GetHealthyMetrics() degraded[%d] = %v, want %v", i, degraded[i], d)
 					}
 				}
 			}
 		})
 	}
 }
 // TestBufferHealthChecks tests the buffer-level health check functions
 func TestBufferHealthChecks(t *testing.T) {
 	now := time.Now().Unix()
 	tests := []struct {
 		name        string
 		setupBuffer func() *buffer
 		wantExists  bool
 		wantHealthy bool
 		description string
 	}{
 		{
 			name: "nil buffer",
 			setupBuffer: func() *buffer {
 				return nil
 			},
 			wantExists:  false,
 			wantHealthy: false,
 			description: "nil buffer should not exist and not be healthy",
 		},
 		{
 			name: "empty buffer",
 			setupBuffer: func() *buffer {
 				b := newBuffer(now, 10)
 				b.data = nil
 				return b
 			},
 			wantExists:  false,
 			wantHealthy: false,
 			description: "empty buffer should not exist and not be healthy",
 		},
 		{
 			name: "healthy buffer with recent data",
 			setupBuffer: func() *buffer {
 				b := newBuffer(now-30, 10)
 				// Write data up to now (within MaxMissingDataPoints * frequency = 50 seconds)
 				for ts := now - 30; ts <= now; ts += 10 {
 					b.write(ts, schema.Float(1.0))
 				}
 				return b
 			},
 			wantExists:  true,
 			wantHealthy: true,
 			description: "buffer with recent data should be healthy",
 		},
 		{
 			name: "stale buffer beyond threshold",
 			setupBuffer: func() *buffer {
 				b := newBuffer(now-200, 10)
 				// Write data that ends 100 seconds ago (beyond MaxMissingDataPoints * frequency = 50 seconds)
 				for ts := now - 200; ts <= now-100; ts += 10 {
 					b.write(ts, schema.Float(1.0))
 				}
 				return b
 			},
 			wantExists:  true,
 			wantHealthy: false,
 			description: "buffer with stale data should exist but not be healthy",
 		},
 		{
 			name: "buffer at threshold boundary",
 			setupBuffer: func() *buffer {
 				b := newBuffer(now-50, 10)
 				// Write data that ends exactly at threshold (MaxMissingDataPoints * frequency = 50 seconds)
 				for ts := now - 50; ts <= now-50; ts += 10 {
 					b.write(ts, schema.Float(1.0))
 				}
 				return b
 			},
 			wantExists:  true,
 			wantHealthy: true,
 			description: "buffer at threshold boundary should still be healthy",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			b := tt.setupBuffer()
 			exists := b.bufferExists()
 			if exists != tt.wantExists {
 				t.Errorf("bufferExists() = %v, want %v: %s", exists, tt.wantExists, tt.description)
 			}
 			if b != nil && b.data != nil && len(b.data) > 0 {
 				healthy := b.isBufferHealthy()
 				if healthy != tt.wantHealthy {
 					t.Errorf("isBufferHealthy() = %v, want %v: %s", healthy, tt.wantHealthy, tt.description)
 				}
 			}
 		})
 	}
 }
--- a/web/frontend/src/List.root.svelte
+++ b/web/frontend/src/List.root.svelte
@@ -7,7 +7,7 @@
 -->
 <script>
-  import { onMount } from "svelte";
+  import { getContext, onMount } from "svelte";
  import {
    Row,
    Col,
@@ -18,6 +18,7 @@
    Spinner,
    InputGroup,
    Input,
    Tooltip
  } from "@sveltestrap/sveltestrap";
  import {
    queryStore,
@@ -29,6 +30,9 @@
    scramble,
    scrambleNames,
  } from "./generic/utils.js";
  import {
    formatDurationTime
  } from "./generic/units.js";
  import Filters from "./generic/Filters.svelte";
  /* Svelte 5 Props */
@@ -40,48 +44,70 @@
  /* Const Init */
  const {} = init();
  const client = getContextClient();
  const shortDuration = getContext("cc-config").jobList_hideShortRunningJobs; // Always configured
  /* State Init*/
  let filterComponent = $state(); // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
  let jobFilters = $state([]);
  let nameFilter = $state("");
-  let sorting = $state({ field: "totalJobs", direction: "down" });
+  let sorting = $state({ field: "totalJobs", direction: "desc" });
  /* Derived Vars */
  const fetchRunning = $derived(jobFilters.some(jf => jf?.state?.length == 1 && jf?.state?.includes("running")));
  const numCols = $derived.by(() => {
    let colbase = 6
    if (fetchRunning) {
      colbase += 2
    }
    return colbase
  })
  let stats = $derived(
    queryStore({
      client: client,
      query: gql`
-        query($jobFilters: [JobFilter!]!) {
+        query($jobFilters: [JobFilter!]!, $fetchRunning: Boolean!) {
          rows: jobsStatistics(filter: $jobFilters, groupBy: ${type}) {
            id
            name
            totalJobs
            shortJobs
            totalCores @include(if: $fetchRunning)
            totalAccs @include(if: $fetchRunning)
            totalWalltime
            totalCoreHours
            totalAccHours
          }
        }`,
-      variables: { jobFilters },
+      variables: {
        jobFilters,
        fetchRunning
      },
    })
  );
  /* Functions */
-  function changeSorting(field) {
+  function changeSorting(newField) {
-    sorting = { field, direction: sorting?.direction == "down" ? "up" : "down" };
+    if (sorting.field == newField) {
      // Same Field, Change Direction
      sorting = { field: newField, direction: sorting.direction == "desc" ? "asc" : "desc" };
    } else {
      // Change Field, Apply Default Direction
      sorting = { field: newField, direction: "desc" };
    }
  }
  function sort(stats, sorting, nameFilter) {
-    const idCmp = sorting.direction == "up"
+    const idCmp = sorting.direction == "asc"
      ? (a, b) => b.id.localeCompare(a.id)
      : (a, b) => a.id.localeCompare(b.id)
    // Force empty or undefined strings to the end of the list
-    const nameCmp = sorting.direction == "up"
+    const nameCmp = sorting.direction == "asc"
      ? (a, b) => !a?.name ? 1 : (!b?.name ? -1 : (b.name.localeCompare(a.name)))
      : (a, b) => !a?.name ? 1 : (!b?.name ? -1 : (a.name.localeCompare(b.name)))
-    const intCmp = sorting.direction == "up"
+    const intCmp = sorting.direction == "asc"
      ? (a, b) => a[sorting.field] - b[sorting.field]
      : (a, b) => b[sorting.field] - a[sorting.field];
@@ -141,7 +167,7 @@
        >
          {#if sorting?.field == "id"}
            <!-- Note on Icon-Name: Arrow-indicator always down, only alpha-indicator switches -->
-            <Icon name={`sort-alpha-${sorting?.direction == 'down' ? 'down' : 'down-alt'}`} />
+            <Icon name={`sort-alpha-${sorting?.direction == 'desc' ? 'down' : 'down-alt'}`} />
          {:else}
            <Icon name="three-dots-vertical" />
          {/if}
@@ -156,7 +182,7 @@
            onclick={() => changeSorting("name")}
          >
            {#if sorting?.field == "name"}
-              <Icon name={`sort-alpha-${sorting?.direction == 'down' ? 'down' : 'down-alt'}`} />
+              <Icon name={`sort-alpha-${sorting?.direction == 'desc' ? 'down' : 'down-alt'}`} />
            {:else}
              <Icon name="three-dots-vertical" />
            {/if}
@@ -172,12 +198,66 @@
        >
          {#if sorting?.field == "totalJobs"}
            <!-- Note on Icon-Name: Arrow-indicator always down, only numeric-indicator switches -->
-            <Icon name={`sort-numeric-${sorting?.direction == 'down' ? 'down-alt' : 'down'}`} />
+            <Icon name={`sort-numeric-${sorting?.direction == 'desc' ? 'down-alt' : 'down'}`} />
          {:else}
            <Icon name="three-dots-vertical" />
          {/if}
        </Button>
      </th>
      <th scope="col">
        <span class="mr-1">
          Short Jobs
          <Icon id="shortjobs-info" style="cursor:help;" size="sm" name="info-circle"/>
        </span>
        <Tooltip target={`shortjobs-info`} placement="top">
          Job duration less than {formatDurationTime(shortDuration)}
        </Tooltip>
        &#8239; <!-- Narrow Non-Breaking Space -->
        <Button
          color={sorting.field == "shortJobs" ? "primary" : "light"}
          size="sm"
          onclick={() => changeSorting("shortJobs")}
        >
          {#if sorting?.field == "shortJobs"}
            <!-- Note on Icon-Name: Arrow-indicator always down, only numeric-indicator switches -->
            <Icon name={`sort-numeric-${sorting?.direction == 'desc' ? 'down-alt' : 'down'}`} />
          {:else}
            <Icon name="three-dots-vertical" />
          {/if}
        </Button>
      </th>
      {#if fetchRunning}
        <th scope="col">
          Total Cores
          <Button
            color={sorting.field == "totalCores" ? "primary" : "light"}
            size="sm"
            onclick={() => changeSorting("totalCores")}
          >
            {#if sorting?.field == "totalJCores"}
              <!-- Note on Icon-Name: Arrow-indicator always down, only numeric-indicator switches -->
              <Icon name={`sort-numeric-${sorting?.direction == 'desc' ? 'down-alt' : 'down'}`} />
            {:else}
              <Icon name="three-dots-vertical" />
            {/if}
          </Button>
        </th>
        <th scope="col">
          Total Accelerators
          <Button
            color={sorting.field == "totalAccs" ? "primary" : "light"}
            size="sm"
            onclick={() => changeSorting("totalAccs")}
          >
            {#if sorting?.field == "totalAccs"}
              <!-- Note on Icon-Name: Arrow-indicator always down, only numeric-indicator switches -->
              <Icon name={`sort-numeric-${sorting?.direction == 'desc' ? 'down-alt' : 'down'}`} />
            {:else}
              <Icon name="three-dots-vertical" />
            {/if}
          </Button>
        </th>
      {/if}
      <th scope="col">
        Total Walltime
        <Button
@@ -186,7 +266,7 @@
          onclick={() => changeSorting("totalWalltime")}
        >
          {#if sorting?.field == "totalWalltime"}
-            <Icon name={`sort-numeric-${sorting?.direction == 'down' ? 'down-alt' : 'down'}`} />
+            <Icon name={`sort-numeric-${sorting?.direction == 'desc' ? 'down-alt' : 'down'}`} />
          {:else}
            <Icon name="three-dots-vertical" />
          {/if}
@@ -200,7 +280,7 @@
          onclick={() => changeSorting("totalCoreHours")}
        >
          {#if sorting?.field == "totalCoreHours"}
-            <Icon name={`sort-numeric-${sorting?.direction == 'down' ? 'down-alt' : 'down'}`} />
+            <Icon name={`sort-numeric-${sorting?.direction == 'desc' ? 'down-alt' : 'down'}`} />
          {:else}
            <Icon name="three-dots-vertical" />
          {/if}
@@ -214,7 +294,7 @@
          onclick={() => changeSorting("totalAccHours")}
        >
          {#if sorting?.field == "totalAccHours"}
-            <Icon name={`sort-numeric-${sorting?.direction == 'down' ? 'down-alt' : 'down'}`} />
+            <Icon name={`sort-numeric-${sorting?.direction == 'desc' ? 'down-alt' : 'down'}`} />
          {:else}
            <Icon name="three-dots-vertical" />
          {/if}
@@ -225,11 +305,11 @@
  <tbody>
    {#if $stats.fetching}
      <tr>
-        <td colspan="4" style="text-align: center;"><Spinner secondary /></td>
+        <td colspan={numCols} style="text-align: center;"><Spinner secondary /></td>
      </tr>
    {:else if $stats.error}
      <tr>
-        <td colspan="4"
+        <td colspan={numCols}
          ><Card body color="danger" class="mb-3">{$stats.error.message}</Card
          ></td
        >
@@ -260,13 +340,18 @@
            >
          {/if}
          <td>{row.totalJobs}</td>
          <td>{row.shortJobs}</td>
          {#if fetchRunning}
            <td>{row.totalCores}</td>
            <td>{row.totalAccs}</td>
          {/if}
          <td>{row.totalWalltime}</td>
          <td>{row.totalCoreHours}</td>
          <td>{row.totalAccHours}</td>
        </tr>
      {:else}
        <tr>
-          <td colspan="4"><i>No {type.toLowerCase()}s/jobs found</i></td>
+          <td colspan={numCols}><i>No {type.toLowerCase()}s/jobs found</i></td>
        </tr>
      {/each}
    {/if}
--- a/web/frontend/src/generic/JobList.svelte
+++ b/web/frontend/src/generic/JobList.svelte
@@ -32,7 +32,7 @@
  let {
    matchedListJobs = $bindable(0),
    selectedJobs = $bindable([]),
-    metrics = getContext("cc-config").metricConfig_jobListMetrics,
+    metrics = [],
    sorting = { field: "startTime", type: "col", order: "DESC" },
    showFootprint = false,
    filterBuffer = [],
@@ -109,7 +109,7 @@
  let paging = $derived({ itemsPerPage, page });
  const plotWidth = $derived.by(() => {
    return Math.floor(
-      (tableWidth - jobInfoColumnWidth) / (metrics.length + (showFootprint ? 1 : 0)) - 10,
+      (tableWidth - jobInfoColumnWidth) / (metrics.length + (showFootprint ? 2 : 1)) - 10,
    );
  });
  let jobsStore = $derived(queryStore({
--- a/web/frontend/src/generic/helper/JobFootprint.svelte
+++ b/web/frontend/src/generic/helper/JobFootprint.svelte
@@ -133,7 +133,7 @@
  }
 </script>
-<Card class="mt-1 overflow-auto" style="width: {width}; height: {height}">
+<Card class="mx-2 overflow-auto" style="width: {width}; height: {height}">
  {#if displayTitle}
    <CardHeader>
      <CardTitle class="mb-0 d-flex justify-content-center">
--- a/web/frontend/src/generic/joblist/JobListRow.svelte
+++ b/web/frontend/src/generic/joblist/JobListRow.svelte
@@ -79,6 +79,7 @@
  /* Derived */
  const jobId = $derived(job?.id);
  const refinedData = $derived($metricsQuery?.data?.jobMetrics ? sortAndSelectScope($metricsQuery.data.jobMetrics) : []);
  const scopes = $derived.by(() => {
    if (job.numNodes == 1) {
      if (job.numAcc >= 1) return ["core", "accelerator"];
@@ -202,10 +203,15 @@
        />
      </td>
    {/if}
-    {#each sortAndSelectScope($metricsQuery.data.jobMetrics) as metric, i (metric?.name || i)}
+    {#each refinedData as metric, i (metric?.name || i)}
      <td>
-        <!-- Subluster Metricconfig remove keyword for jobtables (joblist main, user joblist, project joblist) to be used here as toplevel case-->
+        {#key metric}
-        {#if metric.disabled == false && metric.data}
+          {#if metric?.data}
            {#if metric?.disabled}
              <Card body class="mx-2" color="info">
                Metric <b>{metric.data.name}</b>: Disabled for subcluster <code>{job.subCluster}</code>
              </Card>
            {:else}
              <MetricPlot
                onZoom={(detail) => handleZoom(detail, metric.data.name)}
                height={plotHeight}
@@ -222,12 +228,7 @@
                zoomState={zoomStates[metric.data.name] || null}
                thresholdState={thresholdStates[metric.data.name] || null}
              />
-        {:else if metric.disabled == true && metric.data}
+            {/if}
          <Card body color="info"
            >Metric disabled for subcluster <code
              >{metric.data.name}:{job.subCluster}</code
            ></Card
          >
          {:else}
            <Card body class="mx-2" color="warning">
              <p>No dataset(s) returned for <b>{metrics[i]}</b></p>
@@ -236,6 +237,11 @@
              <p class="mb-1">Identical messages in <i>job {job.jobId} row</i>: Host not found.</p>
            </Card>
          {/if}
        {/key}
      </td>
    {:else}
      <td>
        <Card body class="mx-2">No metrics selected for display.</Card>
      </td>
    {/each}
  {/if}
--- a/web/frontend/src/generic/plots/DoubleMetricPlot.svelte
+++ b/web/frontend/src/generic/plots/DoubleMetricPlot.svelte
@@ -79,7 +79,7 @@
    // X
    let pendingSeries = [
      {
-        label: "Runtime",
+        label: "Time",
        value: (u, ts, sidx, didx) =>
        (didx == null) ? null : formatDurationTime(ts, forNode),
      }
--- a/web/frontend/src/status/DashDetails.svelte
+++ b/web/frontend/src/status/DashDetails.svelte
@@ -34,6 +34,9 @@
  /*Const Init */
  const { query: initq } = init();
  const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
  /* Derived */
  const subClusters = $derived($initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || []);
 </script>
 <!-- Loading indicator & Refresh -->
@@ -66,12 +69,22 @@
        </CardBody>
      </TabPane>
-      <TabPane tabId="usage-dash" tab="Usage">
+      <TabPane tabId="usage-dash" tab="Cluster Usage">
        <CardBody>
          <UsageDash {presetCluster} {useCbColors}></UsageDash>
        </CardBody>
      </TabPane>
      {#if subClusters?.length > 1}
        {#each subClusters.map(sc => sc.name) as scn}
        <TabPane tabId="{scn}-usage-dash" tab="{scn.charAt(0).toUpperCase() + scn.slice(1)} Usage">
          <CardBody>
            <UsageDash {presetCluster} presetSubCluster={scn} {useCbColors}></UsageDash>
          </CardBody>
        </TabPane>
        {/each}
      {/if}
      <TabPane tabId="metric-dash" tab="Statistics">
        <CardBody>
          <StatisticsDash {presetCluster} {useCbColors}></StatisticsDash>
--- a/web/frontend/src/status/dashdetails/UsageDash.svelte
+++ b/web/frontend/src/status/dashdetails/UsageDash.svelte
@@ -3,6 +3,9 @@
  Properties:
  - `presetCluster String`: The cluster to show status information for
  - `presetSubCluster String?`: The subCluster to show status information for [Default: null]
  - `useCbColors Bool?`: Use colorblind friendly colors [Default: false]
  - `useAltColors Bool?`: Use alternative color set [Default: false]
 -->
 <script>
@@ -35,6 +38,7 @@
  /* Svelte 5 Props */
  let {
    presetCluster,
    presetSubCluster = null,
    useCbColors = false,
    useAltColors = false
  } = $props();
@@ -52,7 +56,12 @@
  let numDurationBins = $state("1h");
  /* Derived */
-  let cluster = $derived(presetCluster)
+  const canvasPrefix = $derived(`${presetCluster}-${presetSubCluster ? presetSubCluster : ''}`)
  const statusFilter = $derived(presetSubCluster
      ? [{ state: ["running"] }, { cluster: { eq: presetCluster} }, { partition: { eq: presetSubCluster } }]
      : [{ state: ["running"] }, { cluster: { eq: presetCluster} }] 
  );
  const topJobsQuery = $derived(queryStore({
    client: client,
    query: gql`
@@ -82,7 +91,7 @@
      }
    `,
    variables: {
-      filter: [{ state: ["running"] }, { cluster: { eq: cluster} }],
+      filter: statusFilter,
      paging: pagingState // Top 10
    },
    requestPolicy: "network-only"
@@ -117,7 +126,7 @@
      }
    `,
    variables: {
-      filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
+      filter: statusFilter,
      paging: pagingState
    },
    requestPolicy: "network-only"
@@ -152,7 +161,7 @@
      }
    `,
    variables: {
-      filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
+      filter: statusFilter,
      paging: pagingState
    },
    requestPolicy: "network-only"
@@ -184,7 +193,7 @@
      }
    `,
    variables: {
-      filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
+      filter: statusFilter,
      selectedHistograms: selectedHistograms, // No Metrics requested for node hardware stats
      numDurationBins: numDurationBins,
    },
@@ -264,7 +273,7 @@
        </h4>
        <Pie
          {useAltColors}
-          canvasId="hpcpie-jobs-users"
+          canvasId="{canvasPrefix}-hpcpie-jobs-users"
          size={colWidthJobs * 0.75}
          sliceLabel="Jobs"
          quantities={$topJobsQuery.data.topUser.map(
@@ -284,14 +293,14 @@
        {#each $topJobsQuery.data.topUser as tu, i}
          <tr>
            <td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td>
-            <td id="topName-jobs-{tu.id}">
+            <td id="{canvasPrefix}-topName-jobs-{tu.id}">
-              <a target="_blank" href="/monitoring/user/{tu.id}?cluster={cluster}&state=running"
+              <a target="_blank" href="/monitoring/user/{tu.id}?cluster={presetCluster}{presetSubCluster ? '&partition='+presetSubCluster : ''}&state=running"
                >{scrambleNames ? scramble(tu.id) : tu.id}
              </a>
            </td>
            {#if tu?.name}
              <Tooltip
-                target={`topName-jobs-${tu.id}`}
+                target={`${canvasPrefix}-topName-jobs-${tu.id}`}
                placement="left"
                >{scrambleNames ? scramble(tu.name) : tu.name}</Tooltip
              >
@@ -308,7 +317,7 @@
      </h4>
      <Pie
        {useAltColors}
-        canvasId="hpcpie-jobs-projects"
+        canvasId="{canvasPrefix}-hpcpie-jobs-projects"
        size={colWidthJobs * 0.75}
        sliceLabel={'Jobs'}
        quantities={$topJobsQuery.data.topProjects.map(
@@ -328,7 +337,7 @@
          <tr>
            <td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td>
            <td>
-              <a target="_blank" href="/monitoring/jobs/?cluster={cluster}&state=running&project={tp.id}&projectMatch=eq"
+              <a target="_blank" href="/monitoring/jobs/?cluster={presetCluster}{presetSubCluster ? '&partition='+presetSubCluster : ''}&state=running&project={tp.id}&projectMatch=eq"
                >{scrambleNames ? scramble(tp.id) : tp.id}
              </a>
            </td>
@@ -368,7 +377,7 @@
        </h4>
        <Pie
          {useAltColors}
-          canvasId="hpcpie-nodes-users"
+          canvasId="{canvasPrefix}-hpcpie-nodes-users"
          size={colWidthNodes * 0.75}
          sliceLabel="Nodes"
          quantities={$topNodesQuery.data.topUser.map(
@@ -388,14 +397,14 @@
        {#each $topNodesQuery.data.topUser as tu, i}
          <tr>
            <td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td>
-            <td id="topName-nodes-{tu.id}">
+            <td id="{canvasPrefix}-topName-nodes-{tu.id}">
-              <a target="_blank" href="/monitoring/user/{tu.id}?cluster={cluster}&state=running"
+              <a target="_blank" href="/monitoring/user/{tu.id}?cluster={presetCluster}{presetSubCluster ? '&partition='+presetSubCluster : ''}&state=running"
                >{scrambleNames ? scramble(tu.id) : tu.id}
              </a>
            </td>
            {#if tu?.name}
              <Tooltip
-                target={`topName-nodes-${tu.id}`}
+                target={`${canvasPrefix}-topName-nodes-${tu.id}`}
                placement="left"
                >{scrambleNames ? scramble(tu.name) : tu.name}</Tooltip
              >
@@ -412,7 +421,7 @@
      </h4>
      <Pie
        {useAltColors}
-        canvasId="hpcpie-nodes-projects"
+        canvasId="{canvasPrefix}-hpcpie-nodes-projects"
        size={colWidthNodes * 0.75}
        sliceLabel={'Nodes'}
        quantities={$topNodesQuery.data.topProjects.map(
@@ -432,7 +441,7 @@
          <tr>
            <td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td>
            <td>
-              <a target="_blank" href="/monitoring/jobs/?cluster={cluster}&state=running&project={tp.id}&projectMatch=eq"
+              <a target="_blank" href="/monitoring/jobs/?cluster={presetCluster}{presetSubCluster ? '&partition='+presetSubCluster : ''}&state=running&project={tp.id}&projectMatch=eq"
                >{scrambleNames ? scramble(tp.id) : tp.id}
              </a>
            </td>
@@ -472,7 +481,7 @@
        </h4>
        <Pie
          {useAltColors}
-          canvasId="hpcpie-accs-users"
+          canvasId="{canvasPrefix}-hpcpie-accs-users"
          size={colWidthAccs * 0.75}
          sliceLabel="GPUs"
          quantities={$topAccsQuery.data.topUser.map(
@@ -492,14 +501,14 @@
        {#each $topAccsQuery.data.topUser as tu, i}
          <tr>
            <td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td>
-            <td id="topName-accs-{tu.id}">
+            <td id="{canvasPrefix}-topName-accs-{tu.id}">
-              <a target="_blank" href="/monitoring/user/{tu.id}?cluster={cluster}&state=running"
+              <a target="_blank" href="/monitoring/user/{tu.id}?cluster={presetCluster}{presetSubCluster ? '&partition='+presetSubCluster : ''}&state=running"
                >{scrambleNames ? scramble(tu.id) : tu.id}
              </a>
            </td>
            {#if tu?.name}
              <Tooltip
-                target={`topName-accs-${tu.id}`}
+                target={`${canvasPrefix}-topName-accs-${tu.id}`}
                placement="left"
                >{scrambleNames ? scramble(tu.name) : tu.name}</Tooltip
              >
@@ -516,7 +525,7 @@
      </h4>
      <Pie
        {useAltColors}
-        canvasId="hpcpie-accs-projects"
+        canvasId="{canvasPrefix}-hpcpie-accs-projects"
        size={colWidthAccs * 0.75}
        sliceLabel={'GPUs'}
        quantities={$topAccsQuery.data.topProjects.map(
@@ -536,7 +545,7 @@
          <tr>
            <td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td>
            <td>
-              <a target="_blank" href="/monitoring/jobs/?cluster={cluster}&state=running&project={tp.id}&projectMatch=eq"
+              <a target="_blank" href="/monitoring/jobs/?cluster={presetCluster}{presetSubCluster ? '&partition='+presetSubCluster : ''}&state=running&project={tp.id}&projectMatch=eq"
                >{scrambleNames ? scramble(tp.id) : tp.id}
              </a>
            </td>
--- a/web/frontend/src/systems/nodelist/NodeListRow.svelte
+++ b/web/frontend/src/systems/nodelist/NodeListRow.svelte
@@ -69,9 +69,9 @@
    })
  );
-  let extendedLegendData = $derived($nodeJobsData?.data ? buildExtendedLegend() : null);
+  const extendedLegendData = $derived($nodeJobsData?.data ? buildExtendedLegend() : null);
-  let refinedData = $derived(nodeData?.metrics ? sortAndSelectScope(nodeData.metrics) : null);
+  const refinedData = $derived(nodeData?.metrics ? sortAndSelectScope(nodeData.metrics) : []);
-  let dataHealth = $derived(refinedData.filter((rd) => rd.disabled === false).map((enabled) => (enabled?.data?.metric?.series?.length > 0)));
+  const dataHealth = $derived(refinedData.filter((rd) => rd.disabled === false).map((enabled) => (enabled?.data?.metric?.series?.length > 0)));
  /* Functions */
  const selectScope = (nodeMetrics) =>