Merge pull request #484 from ClusterCockpit/dev

Dev
This commit is contained in:
Jan Eitzinger
2026-02-07 06:23:44 +01:00
committed by GitHub
25 changed files with 488 additions and 555 deletions

View File

@@ -6,9 +6,7 @@
package metricstore
import (
"cmp"
"fmt"
"slices"
"time"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
@@ -16,25 +14,18 @@ import (
)
// HealthCheckResponse represents the result of a health check operation.
//
// Status indicates the monitoring state (Full, Partial, Failed).
// Error contains any error encountered during the health check.
type HealthCheckResponse struct {
Status schema.MonitoringState
Error error
}
// MaxMissingDataPoints is a threshold that allows a node to be healthy with certain number of data points missing.
// Suppose a node does not receive last 5 data points, then healthCheck endpoint will still say a
// node is healthy. Anything more than 5 missing points in metrics of the node will deem the node unhealthy.
// MaxMissingDataPoints is the threshold for stale data detection.
// A buffer is considered healthy if the gap between its last data point
// and the current time is within MaxMissingDataPoints * frequency.
const MaxMissingDataPoints int64 = 5
// isBufferHealthy checks if a buffer has received data for the last MaxMissingDataPoints.
//
// Returns true if the buffer is healthy (recent data within threshold), false otherwise.
// A nil buffer or empty buffer is considered unhealthy.
// bufferExists returns true if the buffer is non-nil and contains data.
func (b *buffer) bufferExists() bool {
// Check if the buffer is empty
if b == nil || b.data == nil || len(b.data) == 0 {
return false
}
@@ -42,233 +33,139 @@ func (b *buffer) bufferExists() bool {
return true
}
// isBufferHealthy checks if a buffer has received data for the last MaxMissingDataPoints.
//
// Returns true if the buffer is healthy (recent data within threshold), false otherwise.
// A nil buffer or empty buffer is considered unhealthy.
// isBufferHealthy returns true if the buffer has recent data within
// MaxMissingDataPoints * frequency of the current time.
func (b *buffer) isBufferHealthy() bool {
// Get the last endtime of the buffer
bufferEnd := b.start + b.frequency*int64(len(b.data))
t := time.Now().Unix()
// Check if the buffer has recent data (within MaxMissingDataPoints threshold)
if t-bufferEnd > MaxMissingDataPoints*b.frequency {
return false
}
return true
return t-bufferEnd <= MaxMissingDataPoints*b.frequency
}
// MergeUniqueSorted merges two lists, sorts them, and removes duplicates.
// Requires 'cmp.Ordered' because we need to sort the data.
func mergeList[string cmp.Ordered](list1, list2 []string) []string {
// 1. Combine both lists
result := append(list1, list2...)
// 2. Sort the combined list
slices.Sort(result)
// 3. Compact removes consecutive duplicates (standard in Go 1.21+)
// e.g. [1, 1, 2, 3, 3] -> [1, 2, 3]
result = slices.Compact(result)
return result
}
// getHealthyMetrics recursively collects healthy and degraded metrics at this level and below.
// collectMetricStatus walks the subtree rooted at l and classifies each
// expected metric into the healthy or degraded map.
//
// A metric is considered:
// - Healthy: buffer has recent data within MaxMissingDataPoints threshold AND has few/no NaN values
// - Degraded: buffer exists and has recent data, but contains more than MaxMissingDataPoints NaN values
//
// This routine walks the entire subtree starting from the current level.
//
// Parameters:
// - m: MemoryStore containing the global metric configuration
//
// Returns:
// - []string: Flat list of healthy metric names from this level and all children
// - []string: Flat list of degraded metric names (exist but have too many missing values)
// - error: Non-nil only for internal errors during recursion
//
// The routine mirrors healthCheck() but provides more granular classification:
// - healthCheck() finds problems (stale/missing)
// - getHealthyMetrics() separates healthy from degraded metrics
func (l *Level) getHealthyMetrics(m *MemoryStore, expectedMetrics []string) ([]string, []string, error) {
// Classification rules (evaluated per buffer, pessimistic):
// - A single stale buffer marks the metric as degraded permanently.
// - A healthy buffer only counts if no stale buffer has been seen.
// - Metrics absent from the global config or without any buffer remain
// in neither map and are later reported as missing.
func (l *Level) collectMetricStatus(m *MemoryStore, expectedMetrics []string, healthy, degraded map[string]bool) {
l.lock.RLock()
defer l.lock.RUnlock()
globalMetrics := m.Metrics
for _, metricName := range expectedMetrics {
if degraded[metricName] {
continue // already degraded, cannot improve
}
mc := m.Metrics[metricName]
b := l.metrics[mc.offset]
if b.bufferExists() {
if !b.isBufferHealthy() {
degraded[metricName] = true
delete(healthy, metricName)
} else if !degraded[metricName] {
healthy[metricName] = true
}
}
}
for _, lvl := range l.children {
lvl.collectMetricStatus(m, expectedMetrics, healthy, degraded)
}
}
// getHealthyMetrics walks the complete subtree rooted at l and classifies
// each expected metric by comparing the collected status against the
// expected list.
//
// Returns:
// - missingList: metrics not found in global config or without any buffer
// - degradedList: metrics with at least one stale buffer in the subtree
func (l *Level) getHealthyMetrics(m *MemoryStore, expectedMetrics []string) ([]string, []string) {
healthy := make(map[string]bool, len(expectedMetrics))
degraded := make(map[string]bool)
l.collectMetricStatus(m, expectedMetrics, healthy, degraded)
missingList := make([]string, 0)
degradedList := make([]string, 0)
// Phase 1: Check metrics at this level
for _, metricName := range expectedMetrics {
offset := globalMetrics[metricName].offset
b := l.metrics[offset]
if healthy[metricName] {
continue
}
if !b.bufferExists() {
missingList = append(missingList, metricName)
} else if !b.isBufferHealthy() {
if degraded[metricName] {
degradedList = append(degradedList, metricName)
} else {
missingList = append(missingList, metricName)
}
}
// Phase 2: Recursively check child levels
for _, lvl := range l.children {
childMissing, childDegraded, err := lvl.getHealthyMetrics(m, expectedMetrics)
if err != nil {
return nil, nil, err
}
missingList = mergeList(missingList, childMissing)
degradedList = mergeList(degradedList, childDegraded)
}
return missingList, degradedList, nil
return degradedList, missingList
}
// GetHealthyMetrics returns healthy and degraded metrics for a specific node as flat lists.
// GetHealthyMetrics returns missing and degraded metric lists for a node.
//
// This routine walks the metric tree starting from the specified node selector
// and collects all metrics that have received data within the last MaxMissingDataPoints
// (default: 5 data points). Metrics are classified into two categories:
// It walks the metric tree starting from the node identified by selector
// and classifies each expected metric:
// - Missing: no buffer anywhere in the subtree, or metric not in global config
// - Degraded: at least one stale buffer exists in the subtree
//
// - Healthy: Buffer has recent data AND contains few/no NaN (missing) values
// - Degraded: Buffer has recent data BUT contains more than MaxMissingDataPoints NaN values
//
// The returned lists include both node-level metrics (e.g., "load", "mem_used") and
// hardware-level metrics (e.g., "cpu_user", "gpu_temp") in flat slices.
//
// Parameters:
// - selector: Hierarchical path to the target node, typically []string{cluster, hostname}.
// Example: []string{"emmy", "node001"} navigates to the "node001" host in the "emmy" cluster.
// The selector must match the hierarchy used during metric ingestion.
//
// Returns:
// - []string: Flat list of healthy metric names (recent data, few missing values)
// - []string: Flat list of degraded metric names (recent data, many missing values)
// - error: Non-nil if the node is not found or internal errors occur
//
// Example usage:
//
// selector := []string{"emmy", "node001"}
// healthyMetrics, degradedMetrics, err := ms.GetHealthyMetrics(selector)
// if err != nil {
// // Node not found or internal error
// return err
// }
// fmt.Printf("Healthy metrics: %v\n", healthyMetrics)
// // Output: ["load", "mem_used", "cpu_user", ...]
// fmt.Printf("Degraded metrics: %v\n", degradedMetrics)
// // Output: ["gpu_temp", "network_rx", ...] (metrics with many NaN values)
//
// Note: This routine provides more granular classification than HealthCheck:
// - HealthCheck reports stale/missing metrics (problems)
// - GetHealthyMetrics separates fully healthy from degraded metrics (quality levels)
// Metrics present in expectedMetrics but absent from both returned lists
// are considered fully healthy.
func (m *MemoryStore) GetHealthyMetrics(selector []string, expectedMetrics []string) ([]string, []string, error) {
lvl := m.root.findLevel(selector)
if lvl == nil {
return nil, nil, fmt.Errorf("[METRICSTORE]> error while GetHealthyMetrics, host not found: %#v", selector)
return nil, nil, fmt.Errorf("[METRICSTORE]> GetHealthyMetrics: host not found: %#v", selector)
}
missingList, degradedList, err := lvl.getHealthyMetrics(m, expectedMetrics)
if err != nil {
return nil, nil, err
}
return missingList, degradedList, nil
degradedList, missingList := lvl.getHealthyMetrics(m, expectedMetrics)
return degradedList, missingList, nil
}
// HealthCheck performs health checks on multiple nodes and returns their monitoring states.
// HealthCheck evaluates multiple nodes against a set of expected metrics
// and returns a monitoring state per node.
//
// This routine provides a batch health check interface that evaluates multiple nodes
// against a specific set of expected metrics. For each node, it determines the overall
// monitoring state based on which metrics are healthy, degraded, or missing.
//
// Health Status Classification:
// - MonitoringStateFull: All expected metrics are healthy (recent data, few missing values)
// - MonitoringStatePartial: Some metrics are degraded (many missing values) or missing
// - MonitoringStateFailed: Node not found or all expected metrics are missing/stale
//
// Parameters:
// - cluster: Cluster name (first element of selector path)
// - nodes: List of node hostnames to check
// - expectedMetrics: List of metric names that should be present on each node
//
// Returns:
// - map[string]schema.MonitoringState: Map keyed by hostname containing monitoring state for each node
// - error: Non-nil only for internal errors (individual node failures are captured as MonitoringStateFailed)
//
// Example usage:
//
// cluster := "emmy"
// nodes := []string{"node001", "node002", "node003"}
// expectedMetrics := []string{"load", "mem_used", "cpu_user", "cpu_system"}
// healthStates, err := ms.HealthCheck(cluster, nodes, expectedMetrics)
// if err != nil {
// return err
// }
// for hostname, state := range healthStates {
// fmt.Printf("Node %s: %s\n", hostname, state)
// }
//
// Note: This routine is optimized for batch operations where you need to check
// the same set of metrics across multiple nodes.
// States:
// - MonitoringStateFull: all expected metrics are healthy
// - MonitoringStatePartial: some metrics are missing or degraded
// - MonitoringStateFailed: node not found, or no healthy metrics at all
func (m *MemoryStore) HealthCheck(cluster string,
nodes []string, expectedMetrics []string,
) (map[string]schema.MonitoringState, error) {
results := make(map[string]schema.MonitoringState, len(nodes))
// Create a set of expected metrics for fast lookup
expectedSet := make(map[string]bool, len(expectedMetrics))
for _, metric := range expectedMetrics {
expectedSet[metric] = true
}
// Check each node
for _, hostname := range nodes {
selector := []string{cluster, hostname}
status := schema.MonitoringStateFull
healthyCount := 0
degradedCount := 0
missingCount := 0
// Get healthy and degraded metrics for this node
missingList, degradedList, err := m.GetHealthyMetrics(selector, expectedMetrics)
degradedList, missingList, err := m.GetHealthyMetrics(selector, expectedMetrics)
if err != nil {
// Node not found or internal error
results[hostname] = schema.MonitoringStateFailed
continue
}
missingCount = len(missingList)
degradedCount = len(degradedList)
uniqueList := mergeList(missingList, degradedList)
healthyCount = len(expectedMetrics) - len(uniqueList)
degradedCount := len(degradedList)
missingCount := len(missingList)
healthyCount := len(expectedMetrics) - degradedCount - missingCount
// Debug log missing and degraded metrics
if missingCount > 0 {
cclog.ComponentDebug("metricstore", "HealthCheck: node", hostname, "missing metrics:", missingList)
}
if degradedCount > 0 {
cclog.ComponentDebug("metricstore", "HealthCheck: node", hostname, "degraded metrics:", degradedList)
cclog.ComponentInfo("metricstore", "HealthCheck: node ", hostname, "degraded metrics:", degradedList)
}
if missingCount > 0 {
cclog.ComponentInfo("metricstore", "HealthCheck: node ", hostname, "missing metrics:", missingList)
}
// Determine overall health status
if missingCount > 0 || degradedCount > 0 {
if healthyCount == 0 {
// No healthy metrics at all
status = schema.MonitoringStateFailed
} else {
// Some healthy, some degraded/missing
status = schema.MonitoringStatePartial
}
switch {
case degradedCount == 0 && missingCount == 0:
results[hostname] = schema.MonitoringStateFull
case healthyCount == 0:
results[hostname] = schema.MonitoringStateFailed
default:
results[hostname] = schema.MonitoringStatePartial
}
// else: all metrics healthy, status remains MonitoringStateFull
results[hostname] = status
}
return results, nil

View File

@@ -303,39 +303,39 @@ func TestGetHealthyMetrics(t *testing.T) {
name string
selector []string
expectedMetrics []string
wantMissing []string
wantDegraded []string
wantMissing []string
wantErr bool
}{
{
name: "mixed health states",
selector: []string{"testcluster", "testnode"},
expectedMetrics: []string{"load", "mem_used", "cpu_user"},
wantMissing: []string{"cpu_user"},
wantDegraded: []string{"mem_used"},
wantMissing: []string{"cpu_user"},
wantErr: false,
},
{
name: "node not found",
selector: []string{"testcluster", "nonexistent"},
expectedMetrics: []string{"load"},
wantMissing: nil,
wantDegraded: nil,
wantMissing: nil,
wantErr: true,
},
{
name: "check only healthy metric",
selector: []string{"testcluster", "testnode"},
expectedMetrics: []string{"load"},
wantMissing: []string{},
wantDegraded: []string{},
wantMissing: []string{},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
missing, degraded, err := ms.GetHealthyMetrics(tt.selector, tt.expectedMetrics)
degraded, missing, err := ms.GetHealthyMetrics(tt.selector, tt.expectedMetrics)
if (err != nil) != tt.wantErr {
t.Errorf("GetHealthyMetrics() error = %v, wantErr %v", err, tt.wantErr)
@@ -346,17 +346,6 @@ func TestGetHealthyMetrics(t *testing.T) {
return
}
// Check missing list
if len(missing) != len(tt.wantMissing) {
t.Errorf("GetHealthyMetrics() missing = %v, want %v", missing, tt.wantMissing)
} else {
for i, m := range tt.wantMissing {
if missing[i] != m {
t.Errorf("GetHealthyMetrics() missing[%d] = %v, want %v", i, missing[i], m)
}
}
}
// Check degraded list
if len(degraded) != len(tt.wantDegraded) {
t.Errorf("GetHealthyMetrics() degraded = %v, want %v", degraded, tt.wantDegraded)
@@ -367,6 +356,17 @@ func TestGetHealthyMetrics(t *testing.T) {
}
}
}
// Check missing list
if len(missing) != len(tt.wantMissing) {
t.Errorf("GetHealthyMetrics() missing = %v, want %v", missing, tt.wantMissing)
} else {
for i, m := range tt.wantMissing {
if missing[i] != m {
t.Errorf("GetHealthyMetrics() missing[%d] = %v, want %v", i, missing[i], m)
}
}
}
})
}
}

View File

@@ -12,6 +12,7 @@ API_USER="demo" # User for JWT generation
# BASE NETWORK CONFIG
SERVICE_ADDRESS="http://localhost:8080"
NATS_SERVER="nats://0.0.0.0:4222"
REST_URL="${SERVICE_ADDRESS}/api/write"
# NATS CREDENTIALS
NATS_USER="root"
@@ -27,18 +28,22 @@ JWT_STATIC="eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NzU3Nzg4NDQsImlhdCI
ALEX_HOSTS="a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904"
FRITZ_HOSTS="f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378"
METRICS_STD="cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock"
METRICS_NODE="cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts"
ALEX_METRICS_HWTHREAD="cpu_user flops_any clock core_power ipc"
ALEX_METRICS_SOCKET="mem_bw cpu_power"
ALEX_METRICS_ACC="acc_utilization acc_mem_used acc_power nv_mem_util nv_temp nv_sm_clock"
ALEX_METRICS_NODE="cpu_load mem_used net_bytes_in net_bytes_out"
FRITZ_METRICS_HWTHREAD="cpu_user flops_any flops_sp flops_dp clock ipc vectorization_ratio"
FRITZ_METRICS_SOCKET="mem_bw cpu_power mem_power"
FRITZ_METRICS_NODE="cpu_load mem_used ib_recv ib_xmit ib_recv_pkts ib_xmit_pkts nfs4_read nfs4_total"
ACCEL_IDS="00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0"
# ==========================================
# SETUP ENV (URL & TOKEN)
# ==========================================
if [ "$CONNECTION_SCOPE" == "INTERNAL" ]; then
# 1. Set URL for Internal Mode
REST_URL="${SERVICE_ADDRESS}/metricstore/api/write"
if [ "$CONNECTION_SCOPE" == "INTERNAL" ]; then
# 2. Generate JWT dynamically
echo "Setup: INTERNAL mode selected."
echo "Generating JWT for user: $API_USER"
@@ -48,10 +53,7 @@ if [ "$CONNECTION_SCOPE" == "INTERNAL" ]; then
echo "Error: Failed to generate JWT from cc-backend."
exit 1
fi
else
# 1. Set URL for External Mode
REST_URL="${SERVICE_ADDRESS}/api/write"
else
# 2. Use Static JWT
echo "Setup: EXTERNAL mode selected."
echo "Using static JWT."
@@ -96,7 +98,7 @@ while [ true ]; do
# 1. ALEX: HWTHREAD
echo "Generating Alex: hwthread"
{
for metric in $METRICS_STD; do
for metric in $ALEX_METRICS_HWTHREAD; do
for hostname in $ALEX_HOSTS; do
for id in {0..127}; do
echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
@@ -109,7 +111,7 @@ while [ true ]; do
# 2. FRITZ: HWTHREAD
echo "Generating Fritz: hwthread"
{
for metric in $METRICS_STD; do
for metric in $FRITZ_METRICS_HWTHREAD; do
for hostname in $FRITZ_HOSTS; do
for id in {0..71}; do
echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
@@ -122,7 +124,7 @@ while [ true ]; do
# 3. ALEX: ACCELERATOR
echo "Generating Alex: accelerator"
{
for metric in $METRICS_STD; do
for metric in $ALEX_METRICS_ACC; do
for hostname in $ALEX_HOSTS; do
for id in $ACCEL_IDS; do
echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
@@ -132,23 +134,10 @@ while [ true ]; do
} > sample_alex.txt
send_payload "sample_alex.txt" "alex"
# 4. ALEX: MEMORY DOMAIN
echo "Generating Alex: memoryDomain"
{
for metric in $METRICS_STD; do
for hostname in $ALEX_HOSTS; do
for id in {0..7}; do
echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
done
done
done
} > sample_alex.txt
send_payload "sample_alex.txt" "alex"
# 5. ALEX: SOCKET
echo "Generating Alex: socket"
{
for metric in $METRICS_STD; do
for metric in $ALEX_METRICS_SOCKET; do
for hostname in $ALEX_HOSTS; do
for id in {0..1}; do
echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
@@ -161,7 +150,7 @@ while [ true ]; do
# 6. FRITZ: SOCKET
echo "Generating Fritz: socket"
{
for metric in $METRICS_STD; do
for metric in $FRITZ_METRICS_SOCKET; do
for hostname in $FRITZ_HOSTS; do
for id in {0..1}; do
echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
@@ -174,7 +163,7 @@ while [ true ]; do
# 7. ALEX: NODE
echo "Generating Alex: node"
{
for metric in $METRICS_NODE; do
for metric in $ALEX_METRICS_NODE; do
for hostname in $ALEX_HOSTS; do
echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp"
done
@@ -185,7 +174,7 @@ while [ true ]; do
# 8. FRITZ: NODE
echo "Generating Fritz: node"
{
for metric in $METRICS_NODE; do
for metric in $FRITZ_METRICS_NODE; do
for hostname in $FRITZ_HOSTS; do
echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp"
done

View File

@@ -68,12 +68,8 @@
energyFootprint { hardware, metric, value }
}
`);
const client = getContextClient();
const ccconfig = getContext("cc-config");
const showRoofline = !!ccconfig[`jobView_showRoofline`];
const showStatsTable = !!ccconfig[`jobView_showStatTable`];
/* Note: Actual metric data queried in <Metric> Component, only require base infos here -> reduce backend load by requesting just stats */
const client = getContextClient();
const query = gql`
query ($dbid: ID!, $selectedMetrics: [String!]!, $selectedScopes: [MetricScope!]!) {
scopedJobStats(id: $dbid, metrics: $selectedMetrics, scopes: $selectedScopes) {
@@ -89,25 +85,68 @@
/* State Init */
let plots = $state({});
let isMetricsSelectionOpen = $state(false);
let selectedMetrics = $state([]);
let selectedScopes = $state([]);
let totalMetrics = $state(0);
/* Derived */
const showSummary = $derived((!!ccconfig[`jobView_showFootprint`] || !!ccconfig[`jobView_showPolarPlot`]))
/* Derived Init Return */
const thisJob = $derived($initq?.data ? $initq.data.job : null);
/* Derived Settings */
const globalMetrics = $derived(thisJob ? getContext("globalMetrics") : null);
const clusterInfo = $derived(thisJob ? getContext("clusters") : null);
const ccconfig = $derived(thisJob ? getContext("cc-config") : null);
const showRoofline = $derived(ccconfig ? !!ccconfig[`jobView_showRoofline`] : false);
const showStatsTable = $derived(ccconfig ? !!ccconfig[`jobView_showStatTable`] : false);
const showSummary = $derived(ccconfig ? (!!ccconfig[`jobView_showFootprint`] || !!ccconfig[`jobView_showPolarPlot`]) : false)
/* Derived Var Preprocessing*/
let selectedMetrics = $derived.by(() => {
if(thisJob && ccconfig) {
if (thisJob.cluster) {
if (thisJob.subCluster) {
return ccconfig[`metricConfig_jobViewPlotMetrics:${thisJob.cluster}:${thisJob.subCluster}`] ||
ccconfig[`metricConfig_jobViewPlotMetrics:${thisJob.cluster}`] ||
ccconfig.metricConfig_jobViewPlotMetrics
}
return ccconfig[`metricConfig_jobViewPlotMetrics:${thisJob.cluster}`] ||
ccconfig.metricConfig_jobViewPlotMetrics
}
return ccconfig.metricConfig_jobViewPlotMetrics
}
return [];
});
let selectedScopes = $derived.by(() => {
const pendingScopes = ["node"]
if (thisJob) {
const accScopeDefault = [...selectedMetrics].some(function (m) {
const thisCluster = clusterInfo.find((c) => c.name == thisJob.cluster);
const subCluster = thisCluster.subClusters.find((sc) => sc.name == thisJob.subCluster);
return subCluster.metricConfig.find((smc) => smc.name == m)?.scope === "accelerator";
});
if (accScopeDefault) pendingScopes.push("accelerator")
if (thisJob.numNodes === 1) {
pendingScopes.push("socket")
pendingScopes.push("core")
}
}
return[...new Set(pendingScopes)];
});
/* Derived Query and Postprocessing*/
const jobMetrics = $derived(queryStore({
client: client,
query: query,
variables: { dbid, selectedMetrics, selectedScopes },
})
);
const missingMetrics = $derived.by(() => {
if ($initq?.data && $jobMetrics?.data) {
let job = $initq.data.job;
if (thisJob && $jobMetrics?.data) {
let metrics = $jobMetrics.data.scopedJobStats;
let metricNames = $initq.data.globalMetrics.reduce((names, gm) => {
if (gm.availability.find((av) => av.cluster === job.cluster)) {
let metricNames = globalMetrics.reduce((names, gm) => {
if (gm.availability.find((av) => av.cluster === thisJob.cluster)) {
names.push(gm.name);
}
return names;
@@ -118,9 +157,10 @@
!metrics.some((jm) => jm.name == metric) &&
selectedMetrics.includes(metric) &&
!checkMetricDisabled(
globalMetrics,
metric,
$initq.data.job.cluster,
$initq.data.job.subCluster,
thisJob.cluster,
thisJob.subCluster,
),
);
} else {
@@ -129,17 +169,16 @@
});
const missingHosts = $derived.by(() => {
if ($initq?.data && $jobMetrics?.data) {
let job = $initq.data.job;
if (thisJob && $jobMetrics?.data) {
let metrics = $jobMetrics.data.scopedJobStats;
let metricNames = $initq.data.globalMetrics.reduce((names, gm) => {
if (gm.availability.find((av) => av.cluster === job.cluster)) {
let metricNames = globalMetrics.reduce((names, gm) => {
if (gm.availability.find((av) => av.cluster === thisJob.cluster)) {
names.push(gm.name);
}
return names;
}, []);
return job.resources
return thisJob.resources
.map(({ hostname }) => ({
hostname: hostname,
metrics: metricNames.filter(
@@ -165,51 +204,19 @@
? "Loading..."
: $initq?.error
? "Error"
: `Job ${$initq.data.job.jobId} - ClusterCockpit`;
});
/* On Init */
getContext("on-init")(() => {
let job = $initq.data.job;
if (!job) return;
const pendingMetrics = (
ccconfig[`metricConfig_jobViewPlotMetrics:${job.cluster}:${job.subCluster}`] ||
ccconfig[`metricConfig_jobViewPlotMetrics:${job.cluster}`]
) ||
$initq.data.globalMetrics.reduce((names, gm) => {
if (gm.availability.find((av) => av.cluster === job.cluster && av.subClusters.includes(job.subCluster))) {
names.push(gm.name);
}
return names;
}, [])
// Select default Scopes to load: Check before if any metric has accelerator scope by default
const accScopeDefault = [...pendingMetrics].some(function (m) {
const cluster = $initq.data.clusters.find((c) => c.name == job.cluster);
const subCluster = cluster.subClusters.find((sc) => sc.name == job.subCluster);
return subCluster.metricConfig.find((smc) => smc.name == m)?.scope === "accelerator";
});
const pendingScopes = ["node"]
if (accScopeDefault) pendingScopes.push("accelerator")
if (job.numNodes === 1) {
pendingScopes.push("socket")
pendingScopes.push("core")
}
selectedMetrics = [...new Set(pendingMetrics)];
selectedScopes = [...new Set(pendingScopes)];
: `Job ${thisJob.jobId} - ClusterCockpit`;
});
/* Functions */
const orderAndMap = (grouped, selectedMetrics) =>
selectedMetrics.map((metric) => ({
const orderAndMap = (grouped, inputMetrics) =>
inputMetrics.map((metric) => ({
metric: metric,
data: grouped.find((group) => group[0].name == metric),
disabled: checkMetricDisabled(
globalMetrics,
metric,
$initq.data.job.cluster,
$initq.data.job.subCluster,
thisJob.cluster,
thisJob.subCluster,
),
}));
</script>
@@ -219,34 +226,34 @@
<Col xs={12} md={6} xl={3} class="mb-3 mb-xxl-0">
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq?.data}
{:else if thisJob}
<Card class="overflow-auto" style="height: auto;">
<TabContent> <!-- on:tab={(e) => (status = e.detail)} -->
{#if $initq.data?.job?.metaData?.message}
{#if thisJob?.metaData?.message}
<TabPane tabId="admin-msg" tab="Admin Note" active>
<CardBody>
<Card body class="mb-2" color="warning">
<h5>Job {$initq.data?.job?.jobId} ({$initq.data?.job?.cluster})</h5>
<h5>Job {thisJob?.jobId} ({thisJob?.cluster})</h5>
The following note was added by administrators:
</Card>
<Card body>
{@html $initq.data.job.metaData.message}
{@html thisJob.metaData.message}
</Card>
</CardBody>
</TabPane>
{/if}
<TabPane tabId="meta-info" tab="Job Info" active={$initq.data?.job?.metaData?.message?false:true}>
<TabPane tabId="meta-info" tab="Job Info" active={thisJob?.metaData?.message?false:true}>
<CardBody class="pb-2">
<JobInfo job={$initq.data.job} {username} {authlevel} {roles} showTagEdit/>
<JobInfo job={thisJob} {username} {authlevel} {roles} showTagEdit/>
</CardBody>
</TabPane>
{#if $initq.data.job.concurrentJobs != null && $initq.data.job.concurrentJobs.items.length != 0}
{#if thisJob.concurrentJobs != null && thisJob.concurrentJobs.items.length != 0}
<TabPane tabId="shared-jobs">
<span slot="tab">
{$initq.data.job.concurrentJobs.items.length} Concurrent Jobs
{thisJob.concurrentJobs.items.length} Concurrent Jobs
</span>
<CardBody>
<ConcurrentJobs cJobs={$initq.data.job.concurrentJobs} showLinks={(authlevel > roles.manager)}/>
<ConcurrentJobs cJobs={thisJob.concurrentJobs} showLinks={(authlevel > roles.manager)}/>
</CardBody>
</TabPane>
{/if}
@@ -261,9 +268,9 @@
<Col xs={12} md={6} xl={4} xxl={3} class="mb-3 mb-xxl-0">
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq?.data}
{:else if thisJob}
{#if showSummary}
<JobSummary job={$initq.data.job}/>
<JobSummary job={thisJob}/>
{/if}
{:else}
<Spinner secondary />
@@ -274,9 +281,9 @@
<Col xs={12} md={12} xl={5} xxl={6}>
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq?.data}
{:else if thisJob}
{#if showRoofline}
<JobRoofline job={$initq.data.job} clusters={$initq.data.clusters}/>
<JobRoofline job={thisJob} {clusterInfo}/>
{/if}
{:else}
<Spinner secondary />
@@ -285,10 +292,10 @@
</Row>
<!-- Row 2: Energy Information if available -->
{#if $initq?.data && $initq.data.job.energyFootprint.length != 0}
{#if thisJob && thisJob?.energyFootprint?.length != 0}
<Row class="mb-3">
<Col>
<EnergySummary jobId={$initq.data.job.jobId} jobEnergy={$initq.data.job.energy} jobEnergyFootprint={$initq.data.job.energyFootprint}/>
<EnergySummary jobId={thisJob.jobId} jobEnergy={thisJob.energy} jobEnergyFootprint={thisJob.energyFootprint}/>
</Col>
</Row>
{/if}
@@ -297,7 +304,7 @@
<Card class="mb-3">
<CardBody>
<Row class="mb-2">
{#if $initq?.data}
{#if thisJob}
<Col xs="auto">
<Button outline onclick={() => (isMetricsSelectionOpen = true)} color="primary">
Select Metrics (Selected {selectedMetrics.length} of {totalMetrics} available)
@@ -310,7 +317,7 @@
{#if $jobMetrics.error}
<Row class="mt-2">
<Col>
{#if $initq?.data && ($initq.data.job?.monitoringStatus == 0 || $initq.data.job?.monitoringStatus == 2)}
{#if thisJob && (thisJob?.monitoringStatus == 0 || thisJob?.monitoringStatus == 2)}
<Card body color="warning">Not monitored or archiving failed</Card>
<br />
{/if}
@@ -323,18 +330,18 @@
<Spinner secondary />
</Col>
</Row>
{:else if $initq?.data && $jobMetrics?.data?.scopedJobStats}
{:else if thisJob && $jobMetrics?.data?.scopedJobStats}
<!-- Note: Ignore '#snippet' Error in IDE -->
{#snippet gridContent(item)}
{#if item.data}
<Metric
bind:this={plots[item.metric]}
job={$initq.data.job}
job={thisJob}
metricName={item.metric}
metricUnit={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.unit}
nativeScope={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.scope}
metricUnit={globalMetrics.find((gm) => gm.name == item.metric)?.unit}
nativeScope={globalMetrics.find((gm) => gm.name == item.metric)?.scope}
presetScopes={item.data.map((x) => x.scope)}
isShared={$initq.data.job.shared != "none"}
isShared={thisJob.shared != "none"}
/>
{:else if item.disabled == true}
<Card color="info">
@@ -342,7 +349,7 @@
<b>Disabled Metric</b>
</CardHeader>
<CardBody>
<p>Metric <b>{item.metric}</b> is disabled for cluster <b>{$initq.data.job.cluster}:{$initq.data.job.subCluster}</b>.</p>
<p>Metric <b>{item.metric}</b> is disabled for cluster <b>{thisJob.cluster}:{thisJob.subCluster}</b>.</p>
<p class="mb-1">To remove this card, open metric selection and press "Close and Apply".</p>
</CardBody>
</Card>
@@ -353,7 +360,7 @@
</CardHeader>
<CardBody>
<p>No dataset(s) returned for <b>{item.metric}</b>.</p>
<p class="mb-1">Metric was not found in metric store for cluster <b>{$initq.data.job.cluster}</b>.</p>
<p class="mb-1">Metric was not found in metric store for cluster <b>{thisJob.cluster}</b>.</p>
</CardBody>
</Card>
{/if}
@@ -374,7 +381,7 @@
<!-- Metadata && Statistcics Table -->
<Row class="mb-3">
<Col>
{#if $initq?.data}
{#if thisJob}
<Card>
<TabContent>
{#if somethingMissing}
@@ -409,12 +416,12 @@
{/if}
{#if showStatsTable}
<!-- Includes <TabPane> Statistics Table with Independent GQL Query -->
<StatsTab job={$initq.data.job} clusters={$initq.data.clusters} tabActive={!somethingMissing}/>
<StatsTab job={thisJob} {clusterInfo} {globalMetrics} {ccconfig} tabActive={!somethingMissing}/>
{/if}
<TabPane tabId="job-script" tab="Job Script">
<div class="pre-wrapper">
{#if $initq.data.job.metaData?.jobScript}
<pre><code>{$initq.data.job.metaData?.jobScript}</code></pre>
{#if thisJob.metaData?.jobScript}
<pre><code>{thisJob.metaData?.jobScript}</code></pre>
{:else}
<Card body color="warning">No job script available</Card>
{/if}
@@ -422,8 +429,8 @@
</TabPane>
<TabPane tabId="slurm-info" tab="Slurm Info">
<div class="pre-wrapper">
{#if $initq.data.job.metaData?.slurmInfo}
<pre><code>{$initq.data.job.metaData?.slurmInfo}</code></pre>
{#if thisJob.metaData?.slurmInfo}
<pre><code>{thisJob.metaData?.slurmInfo}</code></pre>
{:else}
<Card body color="warning"
>No additional slurm information available</Card
@@ -437,15 +444,15 @@
</Col>
</Row>
{#if $initq?.data}
{#if thisJob}
<MetricSelection
bind:isOpen={isMetricsSelectionOpen}
bind:totalMetrics
presetMetrics={selectedMetrics}
cluster={$initq.data.job.cluster}
subCluster={$initq.data.job.subCluster}
cluster={thisJob.cluster}
subCluster={thisJob.subCluster}
configName="metricConfig_jobViewPlotMetrics"
preInitialized
{globalMetrics}
applyMetrics={(newMetrics) =>
selectedMetrics = [...newMetrics]
}

View File

@@ -36,7 +36,6 @@
/* Const Init */
const { query: initq } = init();
const ccconfig = getContext("cc-config");
const matchedJobCompareLimit = 500;
/* State Init */
@@ -52,26 +51,36 @@
let isMetricsSelectionOpen = $state(false);
let sorting = $state({ field: "startTime", type: "col", order: "DESC" });
/* Derived Init Return */
const thisInit = $derived($initq?.data ? true : false);
/* Derived */
const ccconfig = $derived(thisInit ? getContext("cc-config") : null);
const globalMetrics = $derived(thisInit ? getContext("globalMetrics") : null);
let presetProject = $derived(filterPresets?.project ? filterPresets.project : "");
let selectedCluster = $derived(filterPresets?.cluster ? filterPresets.cluster : null);
let selectedSubCluster = $derived(filterPresets?.partition ? filterPresets.partition : null);
let metrics = $derived.by(() => {
if (selectedCluster) {
if (selectedSubCluster) {
return ccconfig[`metricConfig_jobListMetrics:${selectedCluster}:${selectedSubCluster}`] ||
ccconfig[`metricConfig_jobListMetrics:${selectedCluster}`] ||
if (thisInit && ccconfig) {
if (selectedCluster) {
if (selectedSubCluster) {
return ccconfig[`metricConfig_jobListMetrics:${selectedCluster}:${selectedSubCluster}`] ||
ccconfig[`metricConfig_jobListMetrics:${selectedCluster}`] ||
ccconfig.metricConfig_jobListMetrics
}
return ccconfig[`metricConfig_jobListMetrics:${selectedCluster}`] ||
ccconfig.metricConfig_jobListMetrics
}
return ccconfig[`metricConfig_jobListMetrics:${selectedCluster}`] ||
ccconfig.metricConfig_jobListMetrics
return ccconfig.metricConfig_jobListMetrics
}
return ccconfig.metricConfig_jobListMetrics
return [];
});
let showFootprint = $derived(selectedCluster
? !!ccconfig[`jobList_showFootprint:${selectedCluster}`]
: !!ccconfig.jobList_showFootprint
let showFootprint = $derived((thisInit && ccconfig)
? selectedCluster
? ccconfig[`jobList_showFootprint:${selectedCluster}`]
: ccconfig.jobList_showFootprint
: {}
);
/* Functions */
@@ -219,6 +228,7 @@
<Sorting
bind:isOpen={isSortingOpen}
presetSorting={sorting}
{globalMetrics}
applySorting={(newSort) =>
sorting = {...newSort}
}
@@ -232,6 +242,7 @@
subCluster={selectedSubCluster}
configName="metricConfig_jobListMetrics"
footprintSelect
{globalMetrics}
applyMetrics={(newMetrics) =>
metrics = [...newMetrics]
}

View File

@@ -49,14 +49,10 @@
/* Const Init */
const { query: initq } = init();
const initialized = getContext("initialized")
const globalMetrics = getContext("globalMetrics")
const ccconfig = getContext("cc-config");
const clusters = getContext("clusters");
const client = getContextClient();
const nowEpoch = Date.now();
const paging = { itemsPerPage: 50, page: 1 };
const sorting = { field: "startTime", type: "col", order: "DESC" };
const client = getContextClient();
const nodeMetricsQuery = gql`
query ($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) {
nodeMetrics(cluster: $cluster, nodes: $nodes, from: $from, to: $to) {
@@ -112,14 +108,32 @@
let from = $state(presetFrom ? presetFrom : new Date(nowEpoch - (4 * 3600 * 1000)));
// svelte-ignore state_referenced_locally
let to = $state(presetTo ? presetTo : new Date(nowEpoch));
let systemUnits = $state({});
/* Derived Init Return */
const thisInit = $derived($initq?.data ? true : false);
/* Derived */
const ccconfig = $derived(thisInit ? getContext("cc-config") : null);
const globalMetrics = $derived(thisInit ? getContext("globalMetrics") : null);
const clusterInfos = $derived(thisInit ? getContext("clusters") : null);
const filter = $derived([
{ cluster: { eq: cluster } },
{ node: { contains: hostname } },
{ state: ["running"] },
]);
const systemUnits = $derived.by(() => {
const pendingUnits = {};
if (thisInit) {
const systemMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))]
for (let sm of systemMetrics) {
pendingUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
}
}
return {...pendingUnits};
});
const nodeMetricsData = $derived(queryStore({
client: client,
query: nodeMetricsQuery,
@@ -140,20 +154,6 @@
);
const thisNodeState = $derived($nodeMetricsData?.data?.nodeMetrics[0]?.state ? $nodeMetricsData.data.nodeMetrics[0].state : 'notindb');
/* Effect */
$effect(() => {
loadUnits($initialized);
});
/* Functions */
function loadUnits(isInitialized) {
if (!isInitialized) return
const systemMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))]
for (let sm of systemMetrics) {
systemUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
}
}
</script>
<Row cols={{ xs: 2, lg: 5 }}>
@@ -246,7 +246,7 @@
<MetricPlot
metric={item.name}
timestep={item.metric.timestep}
cluster={clusters.find((c) => c.name == cluster)}
cluster={clusterInfos.find((c) => c.name == cluster)}
subCluster={$nodeMetricsData.data.nodeMetrics[0].subCluster}
series={item.metric.series}
enableFlip
@@ -277,6 +277,7 @@
.map((m) => ({
...m,
disabled: checkMetricDisabled(
globalMetrics,
m.name,
cluster,
$nodeMetricsData.data.nodeMetrics[0].subCluster,

View File

@@ -51,13 +51,6 @@
/* Const Init */
const { query: initq } = init();
const client = getContextClient();
const ccconfig = getContext("cc-config");
const initialized = getContext("initialized");
const globalMetrics = getContext("globalMetrics");
const resampleConfig = getContext("resampling") || null;
const resampleResolutions = resampleConfig ? [...resampleConfig.resolutions] : [];
const resampleDefault = resampleConfig ? Math.max(...resampleConfig.resolutions) : 0;
const stateOptions = ['all', 'allocated', 'idle', 'reserved', 'mixed', 'down', 'unknown', 'notindb'];
const nowDate = new Date(Date.now());
@@ -65,35 +58,55 @@
let timeoutId = null;
/* State Init */
let selectedResolution = $state(resampleConfig ? resampleDefault : 0);
let hostnameFilter = $state("");
let hoststateFilter = $state("all");
let pendingHostnameFilter = $state("");
let isMetricsSelectionOpen = $state(false);
/* Derived Init Return */
const thisInit = $derived($initq?.data ? true : false);
/* Derived States */
const ccconfig = $derived(thisInit ? getContext("cc-config") : null);
const globalMetrics = $derived(thisInit ? getContext("globalMetrics") : null);
const resampleConfig = $derived(thisInit ? getContext("resampling") : null);
const resampleResolutions = $derived(resampleConfig ? [...resampleConfig.resolutions] : []);
const resampleDefault = $derived(resampleConfig ? Math.max(...resampleConfig.resolutions) : 0);
const displayNodeOverview = $derived((displayType === 'OVERVIEW'));
const systemMetrics = $derived(globalMetrics ? [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))] : []);
const systemUnits = $derived.by(() => {
const pendingUnits = {};
if (thisInit && systemMetrics.length > 0) {
for (let sm of systemMetrics) {
pendingUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
};
}
return {...pendingUnits};
});
let selectedResolution = $derived(resampleDefault);
let to = $derived(presetTo ? presetTo : new Date(Date.now()));
let from = $derived(presetFrom ? presetFrom : new Date(nowDate.setHours(nowDate.getHours() - 4)));
const displayNodeOverview = $derived((displayType === 'OVERVIEW'));
const systemMetrics = $derived($initialized ? [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))] : []);
const presetSystemUnits = $derived(loadUnits(systemMetrics));
let selectedMetric = $derived.by(() => {
let configKey = `nodeOverview_selectedMetric`;
if (cluster) configKey += `:${cluster}`;
if (subCluster) configKey += `:${subCluster}`;
if ($initialized) {
if (thisInit) {
if (ccconfig[configKey]) return ccconfig[configKey]
else if (systemMetrics.length !== 0) return systemMetrics[0].name
}
return ""
});
let selectedMetrics = $derived.by(() => {
let configKey = `nodeList_selectedMetrics`;
if (cluster) configKey += `:${cluster}`;
if (subCluster) configKey += `:${subCluster}`;
if ($initialized) {
if (thisInit) {
if (ccconfig[configKey]) return ccconfig[configKey]
else if (systemMetrics.length >= 3) return [systemMetrics[0].name, systemMetrics[1].name, systemMetrics[2].name]
}
@@ -108,16 +121,6 @@
});
/* Functions */
function loadUnits(systemMetrics) {
let pendingUnits = {};
if (systemMetrics.length > 0) {
for (let sm of systemMetrics) {
pendingUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
};
};
return {...pendingUnits};
};
// Wait after input for some time to prevent too many requests
function updateHostnameFilter() {
if (timeoutId != null) clearTimeout(timeoutId);
@@ -157,7 +160,7 @@
<!-- ROW1: Tools-->
<Row cols={{ xs: 2, lg: !displayNodeOverview ? (resampleConfig ? 6 : 5) : 5 }} class="mb-3">
{#if $initq?.data}
{#if thisInit}
<!-- List Metric Select Col-->
{#if !displayNodeOverview}
<Col>
@@ -234,7 +237,7 @@
<Input type="select" bind:value={selectedMetric}>
{#each systemMetrics as metric (metric.name)}
<option value={metric.name}
>{metric.name} {presetSystemUnits[metric.name] ? "("+presetSystemUnits[metric.name]+")" : ""}</option
>{metric.name} {systemUnits[metric.name] ? "("+systemUnits[metric.name]+")" : ""}</option
>
{:else}
<option disabled>No available options</option>
@@ -266,10 +269,11 @@
{:else}
{#if displayNodeOverview}
<!-- ROW2-1: Node Overview (Grid Included)-->
<NodeOverview {cluster} {ccconfig} {selectedMetric} {from} {to} {hostnameFilter} {hoststateFilter}/>
<NodeOverview {cluster} {ccconfig} {selectedMetric} {globalMetrics} {from} {to} {hostnameFilter} {hoststateFilter}/>
{:else}
<!-- ROW2-2: Node List (Grid Included)-->
<NodeList {cluster} {subCluster} {ccconfig} pendingSelectedMetrics={selectedMetrics} {selectedResolution} {hostnameFilter} {hoststateFilter} {from} {to} {presetSystemUnits}/>
<NodeList {cluster} {subCluster} {ccconfig} {globalMetrics}
pendingSelectedMetrics={selectedMetrics} {selectedResolution} {hostnameFilter} {hoststateFilter} {from} {to} {systemUnits}/>
{/if}
{/if}
@@ -279,6 +283,7 @@
presetMetrics={selectedMetrics}
{cluster}
{subCluster}
{globalMetrics}
configName="nodeList_selectedMetrics"
applyMetrics={(newMetrics) =>
selectedMetrics = [...newMetrics]

View File

@@ -56,12 +56,10 @@
/* Const Init */
const { query: initq } = init();
const ccconfig = getContext("cc-config");
const client = getContextClient();
const durationBinOptions = ["1m","10m","1h","6h","12h"];
const metricBinOptions = [10, 20, 50, 100];
const matchedJobCompareLimit = 500;
const shortDuration = ccconfig.jobList_hideShortRunningJobs; // Always configured
/* State Init */
// List & Control Vars
@@ -73,7 +71,6 @@
let isSortingOpen = $state(false);
let isMetricsSelectionOpen = $state(false);
let sorting = $state({ field: "startTime", type: "col", order: "DESC" });
let selectedHistogramsBuffer = $state({ all: (ccconfig['userView_histogramMetrics'] || []) })
let jobCompare = $state(null);
let matchedCompareJobs = $state(0);
let showCompare = $state(false);
@@ -84,26 +81,48 @@
let numDurationBins = $state("1h");
let numMetricBins = $state(10);
/* Derived Init Return */
const thisInit = $derived($initq?.data ? true : false);
/* Derived */
const ccconfig = $derived(thisInit ? getContext("cc-config") : null);
const globalMetrics = $derived(thisInit ? getContext("globalMetrics") : null);
const shortDuration = $derived(ccconfig?.jobList_hideShortRunningJobs);
let selectedCluster = $derived(filterPresets?.cluster ? filterPresets.cluster : null);
let selectedSubCluster = $derived(filterPresets?.partition ? filterPresets.partition : null);
let metrics = $derived.by(() => {
if (selectedCluster) {
if (selectedSubCluster) {
return ccconfig[`metricConfig_jobListMetrics:${selectedCluster}:${selectedSubCluster}`] ||
ccconfig[`metricConfig_jobListMetrics:${selectedCluster}`] ||
if (thisInit && ccconfig) {
if (selectedCluster) {
if (selectedSubCluster) {
return ccconfig[`metricConfig_jobListMetrics:${selectedCluster}:${selectedSubCluster}`] ||
ccconfig[`metricConfig_jobListMetrics:${selectedCluster}`] ||
ccconfig.metricConfig_jobListMetrics
}
return ccconfig[`metricConfig_jobListMetrics:${selectedCluster}`] ||
ccconfig.metricConfig_jobListMetrics
}
return ccconfig[`metricConfig_jobListMetrics:${selectedCluster}`] ||
ccconfig.metricConfig_jobListMetrics
return ccconfig.metricConfig_jobListMetrics
}
return ccconfig.metricConfig_jobListMetrics
return [];
});
let showFootprint = $derived(filterPresets.cluster
? !!ccconfig[`jobList_showFootprint:${filterPresets.cluster}`]
: !!ccconfig.jobList_showFootprint
let showFootprint = $derived((thisInit && ccconfig)
? filterPresets?.cluster
? ccconfig[`jobList_showFootprint:${filterPresets.cluster}`]
: ccconfig.jobList_showFootprint
: {}
);
let selectedHistograms = $derived(selectedCluster ? selectedHistogramsBuffer[selectedCluster] : selectedHistogramsBuffer['all']);
let selectedHistograms = $derived.by(() => {
if (thisInit && ccconfig) {
if (selectedCluster) {
return ccconfig[`userView_histogramMetrics:${selectedCluster}`] // No Fallback; Unspecific lists an include unavailable metrics
}
return ccconfig.userView_histogramMetrics
}
return []
});
let stats = $derived(
queryStore({
client: client,
@@ -159,19 +178,9 @@
});
});
$effect(() => {
if (!selectedHistogramsBuffer[selectedCluster]) {
selectedHistogramsBuffer[selectedCluster] = ccconfig[`userView_histogramMetrics:${selectedCluster}`];
};
});
/* On Mount */
onMount(() => {
filterComponent.updateFilters();
// Why? -> `$derived(ccconfig[$cluster])` only loads array from last Backend-Query if $cluster changed reactively (without reload)
if (filterPresets?.cluster) {
selectedHistogramsBuffer[filterPresets.cluster] = ccconfig[`userView_histogramMetrics:${filterPresets.cluster}`];
};
});
</script>
@@ -508,6 +517,7 @@
<Sorting
bind:isOpen={isSortingOpen}
presetSorting={sorting}
{globalMetrics}
applySorting={(newSort) =>
sorting = {...newSort}
}
@@ -521,6 +531,7 @@
subCluster={selectedSubCluster}
configName="metricConfig_jobListMetrics"
footprintSelect
{globalMetrics}
applyMetrics={(newMetrics) =>
metrics = [...newMetrics]
}
@@ -531,7 +542,8 @@
bind:isOpen={isHistogramSelectionOpen}
presetSelectedHistograms={selectedHistograms}
configName="userView_histogramMetrics"
{globalMetrics}
applyChange={(newSelection) => {
selectedHistogramsBuffer[selectedCluster || 'all'] = [...newSelection];
selectedHistograms = [...newSelection];
}}
/>

View File

@@ -39,10 +39,6 @@
} = $props();
/* Const Init */
const ccconfig = getContext("cc-config");
const initialized = getContext("initialized");
const globalMetrics = getContext("globalMetrics");
const usePaging = ccconfig?.jobList_usePaging || false;
const jobInfoColumnWidth = 250;
const client = getContextClient();
const query = gql`
@@ -100,11 +96,18 @@
let headerPaddingTop = $state(0);
let jobs = $state([]);
let page = $state(1);
let itemsPerPage = $state(usePaging ? (ccconfig?.jobList_jobsPerPage || 10) : 10);
let triggerMetricRefresh = $state(false);
let tableWidth = $state(0);
/* Derived */
const initialized = $derived(getContext("initialized") || false);
const ccconfig = $derived(initialized ? getContext("cc-config") : null);
const globalMetrics = $derived(initialized ? getContext("globalMetrics") : null);
const clusterInfos = $derived(initialized ? getContext("clusters"): null);
const resampleConfig = $derived(initialized ? getContext("resampling") : null);
const usePaging = $derived(ccconfig?.jobList_usePaging || false);
let itemsPerPage = $derived(usePaging ? (ccconfig?.jobList_jobsPerPage || 10) : 10);
let filter = $derived([...filterBuffer]);
let paging = $derived({ itemsPerPage, page });
const plotWidth = $derived.by(() => {
@@ -274,7 +277,7 @@
style="width: {plotWidth}px; padding-top: {headerPaddingTop}px"
>
{metric}
{#if $initialized}
{#if initialized}
({getUnit(metric)})
{/if}
</th>
@@ -292,7 +295,8 @@
</tr>
{:else}
{#each jobs as job (job.id)}
<JobListRow {triggerMetricRefresh} {job} {metrics} {plotWidth} {showFootprint} previousSelect={selectedJobs.includes(job.id)}
<JobListRow {triggerMetricRefresh} {job} {metrics} {plotWidth} {showFootprint} {globalMetrics} {clusterInfos} {resampleConfig}
previousSelect={selectedJobs.includes(job.id)}
selectJob={(detail) => selectedJobs = [...selectedJobs, detail]}
unselectJob={(detail) => selectedJobs = selectedJobs.filter(item => item !== detail)}
/>

View File

@@ -30,11 +30,10 @@
setFilter
} = $props();
/* Const Init */
const clusters = getContext("clusters");
const initialized = getContext("initialized");
/* Derived */
const initialized = $derived(getContext("initialized") || false);
const clusterInfos = $derived($initialized ? getContext("clusters") : null);
let pendingCluster = $derived(presetCluster);
let pendingPartition = $derived(presetPartition);
</script>
@@ -56,7 +55,7 @@
>
Any Cluster
</ListGroupItem>
{#each clusters as cluster}
{#each clusterInfos as cluster}
<ListGroupItem
disabled={disableClusterSelection}
active={pendingCluster == cluster.name}
@@ -80,7 +79,7 @@
>
Any Partition
</ListGroupItem>
{#each clusters?.find((c) => c.name == pendingCluster)?.partitions as partition}
{#each clusterInfos?.find((c) => c.name == pendingCluster)?.partitions as partition}
<ListGroupItem
active={pendingPartition == partition}
onclick={() => (pendingPartition = partition)}

View File

@@ -42,8 +42,8 @@
contains: "Contains",
}
const findMaxNumAccels = (clusters) =>
clusters.reduce(
const findMaxNumAccels = (infos) =>
infos.reduce(
(max, cluster) =>
Math.max(
max,
@@ -56,8 +56,8 @@
);
// Limited to Single-Node Thread Count
const findMaxNumHWThreadsPerNode = (clusters) =>
clusters.reduce(
const findMaxNumHWThreadsPerNode = (infos) =>
infos.reduce(
(max, cluster) =>
Math.max(
max,
@@ -92,8 +92,8 @@
let threadState = $derived(presetNumHWThreads);
let accState = $derived(presetNumAccelerators);
const clusters = $derived(getContext("clusters"));
const initialized = $derived(getContext("initialized"));
const initialized = $derived(getContext("initialized") || false);
const clusterInfos = $derived($initialized ? getContext("clusters") : null);
// Is Selection Active
const nodesActive = $derived(!(JSON.stringify(nodesState) === JSON.stringify({ from: 1, to: maxNumNodes })));
const threadActive = $derived(!(JSON.stringify(threadState) === JSON.stringify({ from: 1, to: maxNumHWThreads })));
@@ -109,12 +109,12 @@
$effect(() => {
if ($initialized) {
if (activeCluster != null) {
const { subClusters } = clusters.find((c) => c.name == activeCluster);
const { subClusters } = clusterInfos.find((c) => c.name == activeCluster);
maxNumAccelerators = findMaxNumAccels([{ subClusters }]);
maxNumHWThreads = findMaxNumHWThreadsPerNode([{ subClusters }]);
} else if (clusters.length > 0) {
maxNumAccelerators = findMaxNumAccels(clusters);
maxNumHWThreads = findMaxNumHWThreadsPerNode(clusters);
} else if (clusterInfos.length > 0) {
maxNumAccelerators = findMaxNumAccels(clusterInfos);
maxNumHWThreads = findMaxNumHWThreadsPerNode(clusterInfos);
}
}
});

View File

@@ -31,8 +31,8 @@
} = $props();
/* Derived */
const allTags = $derived(getContext("tags"))
const initialized = $derived(getContext("initialized"))
const initialized = $derived(getContext("initialized") || false)
const allTags = $derived($initialized ? getContext("tags") : [])
/* State Init */
let searchTerm = $state("");

View File

@@ -18,8 +18,8 @@
} = $props();
/* Derived */
const allTags = $derived(getContext('tags'));
const initialized = $derived(getContext('initialized'));
const initialized = $derived(getContext('initialized') || false);
const allTags = $derived($initialized ? getContext('tags') : []);
/* Effects */
$effect(() => {

View File

@@ -48,8 +48,6 @@
const client = getContextClient();
/* State Init */
let initialized = getContext("initialized")
let allTags = getContext("tags")
let newTagType = $state("");
let newTagName = $state("");
let filterTerm = $state("");
@@ -57,10 +55,13 @@
let isOpen = $state(false);
/* Derived */
const initialized = $derived(getContext("initialized") || false );
let allTags = $derived(initialized ? getContext("tags") : [])
let newTagScope = $derived(username);
const isAdmin = $derived((roles && authlevel == roles.admin));
const isSupport = $derived((roles && authlevel == roles.support));
const allTagsFiltered = $derived(($initialized, jobTags, fuzzySearchTags(filterTerm, allTags))); // $init und JobTags only for triggering react
const allTagsFiltered = $derived((initialized, jobTags, fuzzySearchTags(filterTerm, allTags))); // $init und JobTags only for triggering react
const usedTagsFiltered = $derived(matchJobTags(jobTags, allTagsFiltered, 'used', isAdmin, isSupport));
const unusedTagsFiltered = $derived(matchJobTags(jobTags, allTagsFiltered, 'unused', isAdmin, isSupport));

View File

@@ -11,11 +11,13 @@
- `triggerMetricRefresh Bool?`: If changed to true from upstream, will trigger metric query [Default: false]
- `selectJob Func`: The callback function to select a job for comparison
- `unselectJob Func`: The callback function to unselect a job from comparison
- `globalMetrics [Obj]`: Includes the backend supplied availabilities for cluster and subCluster
- `clusterInfos [Obj]`: Includes the backend supplied cluster topology
- `resampleConfig [Obj]`: Includes the backend supplied resampling info
-->
<script>
import { queryStore, gql, getContextClient } from "@urql/svelte";
import { getContext } from "svelte";
import { Card, Spinner } from "@sveltestrap/sveltestrap";
import { maxScope, checkMetricDisabled } from "../utils.js";
import JobInfo from "./JobInfo.svelte";
@@ -33,13 +35,13 @@
triggerMetricRefresh = false,
selectJob,
unselectJob,
globalMetrics,
clusterInfos,
resampleConfig
} = $props();
/* Const Init */
const client = getContextClient();
const cluster = getContext("clusters");
const resampleConfig = getContext("resampling") || null;
const resampleDefault = resampleConfig ? Math.max(...resampleConfig.resolutions) : 0;
const query = gql`
query ($id: ID!, $metrics: [String!]!, $scopes: [MetricScope!]!, $selectedResolution: Int) {
jobMetrics(id: $id, metrics: $metrics, scopes: $scopes, resolution: $selectedResolution) {
@@ -73,11 +75,11 @@
`;
/* State Init */
let selectedResolution = $state(resampleDefault);
let zoomStates = $state({});
let thresholdStates = $state({});
/* Derived */
const resampleDefault = $derived(resampleConfig ? Math.max(...resampleConfig.resolutions) : 0);
const jobId = $derived(job?.id);
const scopes = $derived.by(() => {
if (job.numNodes == 1) {
@@ -87,6 +89,8 @@
return ["node"];
};
});
let selectedResolution = $derived(resampleDefault);
let isSelected = $derived(previousSelect);
let metricsQuery = $derived(queryStore({
client: client,
@@ -94,6 +98,7 @@
variables: { id: jobId, metrics, scopes, selectedResolution },
})
);
const refinedData = $derived($metricsQuery?.data?.jobMetrics ? sortAndSelectScope($metricsQuery.data.jobMetrics) : []);
/* Effects */
@@ -160,6 +165,7 @@
return {
name: jobMetric.data.name,
disabled: checkMetricDisabled(
globalMetrics,
jobMetric.data.name,
job.cluster,
job.subCluster,
@@ -220,7 +226,7 @@
series={metric.data.metric.series}
statisticsSeries={metric.data.metric.statisticsSeries}
metric={metric.data.name}
cluster={cluster.find((c) => c.name == job.cluster)}
cluster={clusterInfos.find((c) => c.name == job.cluster)}
subCluster={job.subCluster}
isShared={job.shared != "none"}
numhwthreads={job.numHWThreads}

View File

@@ -18,7 +18,7 @@
import uPlot from "uplot";
import { formatNumber, formatDurationTime } from "../units.js";
import { getContext, onDestroy } from "svelte";
import { Card } from "@sveltestrap/sveltestrap";
import { Card, CardHeader, CardBody } from "@sveltestrap/sveltestrap";
/* Svelte 5 Props */
let {

View File

@@ -6,6 +6,7 @@
- `ìsOpen Bool`: Is selection opened [Bindable]
- `configName String`: The config id string to be updated in database on selection change
- `presetSelectedHistograms [String]`: The currently selected metrics to display as histogram
- `globalMetrics [Obj]`: Includes the backend supplied availabilities for cluster and subCluster
- `applyChange Func`: The callback function to apply current selection
-->
@@ -24,10 +25,11 @@
/* Svelte 5 Props */
let {
cluster,
cluster = "",
isOpen = $bindable(),
configName,
presetSelectedHistograms,
globalMetrics,
applyChange
} = $props();
@@ -42,11 +44,11 @@
function loadHistoMetrics(thisCluster) {
// isInit Check Removed: Parent Component has finished Init-Query: Globalmetrics available here.
if (!thisCluster) {
return getContext("globalMetrics")
return globalMetrics
.filter((gm) => gm?.footprint)
.map((fgm) => { return fgm.name })
} else {
return getContext("globalMetrics")
return globalMetrics
.filter((gm) => gm?.availability.find((av) => av.cluster == thisCluster))
.filter((agm) => agm?.footprint)
.map((afgm) => { return afgm.name })

View File

@@ -9,13 +9,12 @@
- `cluster String?`: The currently selected cluster [Default: null]
- `subCluster String?`: The currently selected subCluster [Default: null]
- `footprintSelect Bool?`: Render checkbox for footprint display in upstream component [Default: false]
- `preInitialized Bool?`: If the parent component has a dedicated call to init() [Default: false]
- `configName String`: The config key for the last saved selection (constant)
- `globalMetrics [Obj]`: Includes the backend supplied availabilities for cluster and subCluster
- `applyMetrics Func`: The callback function to apply current selection
-->
<script>
import { getContext } from "svelte";
import {
Modal,
ModalBody,
@@ -35,14 +34,12 @@
cluster = null,
subCluster = null,
footprintSelect = false,
preInitialized = false, // Job View is Pre-Init'd: $initialized "alone" store returns false
configName,
globalMetrics,
applyMetrics
} = $props();
/* Const Init */
const globalMetrics = getContext("globalMetrics");
const initialized = getContext("initialized");
const client = getContextClient();
const updateConfigurationMutation = ({ name, value }) => {
return mutationStore({
@@ -58,27 +55,23 @@
/* State Init */
let pendingShowFootprint = $state(!!showFootprint);
let listedMetrics = $state([]);
let columnHovering = $state(null);
/* Derives States */
let pendingMetrics = $derived(presetMetrics);
const allMetrics = $derived(loadAvailable(preInitialized || $initialized));
const allMetrics = $derived(loadAvailable(globalMetrics));
let pendingMetrics = $derived(presetMetrics || []);
let listedMetrics = $derived([...presetMetrics, ...allMetrics.difference(new Set(presetMetrics))]); // List (preset) active metrics first, then list inactives
/* Reactive Effects */
$effect(() => {
totalMetrics = allMetrics?.size || 0;
});
$effect(() => {
listedMetrics = [...presetMetrics, ...allMetrics.difference(new Set(presetMetrics))]; // List (preset) active metrics first, then list inactives
});
/* Functions */
function loadAvailable(init) {
function loadAvailable(gms) {
const availableMetrics = new Set();
if (init) {
for (let gm of globalMetrics) {
if (gms) {
for (let gm of gms) {
if (!cluster) {
availableMetrics.add(gm.name)
} else {
@@ -90,7 +83,7 @@
}
}
}
return availableMetrics
return availableMetrics;
}
function printAvailability(metric, cluster) {

View File

@@ -5,11 +5,11 @@
- `presetSorting Object?`: The latest sort selection state
- Default { field: "startTime", type: "col", order: "DESC" }
- `isOpen Bool?`: Is modal opened [Bindable, Default: false]
- `globalMetrics [Obj]`: Includes the backend supplied availabilities for cluster and subCluster
- `applySorting Func`: The callback function to apply current selection
-->
<script>
import { getContext, onMount } from "svelte";
import {
Icon,
Button,
@@ -25,12 +25,11 @@
let {
isOpen = $bindable(false),
presetSorting = { field: "startTime", type: "col", order: "DESC" },
globalMetrics,
applySorting
} = $props();
/* Const Init */
const initialized = getContext("initialized");
const globalMetrics = getContext("globalMetrics");
const fixedSortables = $state([
{ field: "startTime", type: "col", text: "Start Time (Default)", order: "DESC" },
{ field: "duration", type: "col", text: "Duration", order: "DESC" },
@@ -42,22 +41,11 @@
/* State Init */
let activeColumnIdx = $state(0);
let metricSortables = $state([]);
/* Derived */
let sorting = $derived({...presetSorting})
let sortableColumns = $derived([...fixedSortables, ...metricSortables]);
/* Effect */
$effect(() => {
if ($initialized) {
loadMetricSortables();
};
});
/* Functions */
function loadMetricSortables() {
metricSortables = globalMetrics.map((gm) => {
let metricSortables = $derived.by(() => {
return globalMetrics.map((gm) => {
if (gm?.footprint) {
return {
field: gm.name + '_' + gm.footprint,
@@ -68,8 +56,10 @@
}
return null
}).filter((r) => r != null)
};
});
let sortableColumns = $derived([...fixedSortables, ...metricSortables]);
/* Functions */
function loadActiveIndex() {
activeColumnIdx = sortableColumns.findIndex(
(col) => col.field == sorting.field,

View File

@@ -302,19 +302,17 @@ export function stickyHeader(datatableHeaderSelector, updatePading) {
onDestroy(() => document.removeEventListener("scroll", onscroll));
}
export function checkMetricDisabled(m, c, s) { // [m]etric, [c]luster, [s]ubcluster
const metrics = getContext("globalMetrics");
const available = metrics?.find((gm) => gm.name === m)?.availability?.find((av) => av.cluster === c)?.subClusters?.includes(s)
export function checkMetricDisabled(gm, m, c, s) { // [g]lobal[m]etrics, [m]etric, [c]luster, [s]ubcluster
const available = gm?.find((gm) => gm.name === m)?.availability?.find((av) => av.cluster === c)?.subClusters?.includes(s)
// Return inverse logic
return !available
}
export function checkMetricsDisabled(ma, c, s) { // [m]etric[a]rray, [c]luster, [s]ubcluster
export function checkMetricsDisabled(gm, ma, c, s) { // [g]lobal[m]etrics, [m]etric[a]rray, [c]luster, [s]ubcluster
let result = {};
const metrics = getContext("globalMetrics");
ma.forEach((m) => {
// Return named inverse logic: !available
result[m] = !(metrics?.find((gm) => gm.name === m)?.availability?.find((av) => av.cluster === c)?.subClusters?.includes(s))
result[m] = !(gm?.find((gm) => gm.name === m)?.availability?.find((av) => av.cluster === c)?.subClusters?.includes(s))
});
return result
}

View File

@@ -3,7 +3,7 @@
Properties:
- `job Object`: The GQL job object
- `clusters Array`: The GQL clusters array
- `clusterInfo Array`: The GQL clusters array
-->
<script>
@@ -24,7 +24,7 @@
/* Svelte 5 Props */
let {
job,
clusters,
clusterInfo,
} = $props();
/* Const Init */
@@ -62,7 +62,7 @@
<div bind:clientWidth={roofWidth}>
<Roofline
width={roofWidth}
subCluster={clusters
subCluster={clusterInfo
.find((c) => c.name == job.cluster)
.subClusters.find((sc) => sc.name == job.subCluster)}
data={transformDataForRoofline(

View File

@@ -3,8 +3,10 @@
Properties:
- `job Object`: The job object
- `clusters Object`: The clusters object
- `clusterInfo Object`: The clusters object
- `tabActive bool`: Boolean if StatsTabe Tab is Active on Creation
- `globalMetrics [Obj]`: Includes the backend supplied availabilities for cluster and subCluster
- `ccconfig Object?`: The ClusterCockpit Config Context
-->
<script>
@@ -13,7 +15,6 @@
gql,
getContextClient
} from "@urql/svelte";
import { getContext } from "svelte";
import {
Card,
Button,
@@ -29,8 +30,10 @@
/* Svelte 5 Props */
let {
job,
clusters,
clusterInfo,
tabActive,
globalMetrics,
ccconfig
} = $props();
/* Const Init */
@@ -55,65 +58,73 @@
/* State Init */
let moreScopes = $state(false);
let selectedScopes = $state([]);
let selectedMetrics = $state([]);
let totalMetrics = $state(0); // For Info Only, filled by MetricSelection Component
let isMetricSelectionOpen = $state(false);
/* Derived */
/* Derived Var Preprocessing*/
let selectedTableMetrics = $derived.by(() => {
if(job && ccconfig) {
if (job.cluster) {
if (job.subCluster) {
return ccconfig[`metricConfig_jobViewTableMetrics:${job.cluster}:${job.subCluster}`] ||
ccconfig[`metricConfig_jobViewTableMetrics:${job.cluster}`] ||
ccconfig.metricConfig_jobViewTableMetrics
}
return ccconfig[`metricConfig_jobViewTableMetrics:${job.cluster}`] ||
ccconfig.metricConfig_jobViewTableMetrics
}
return ccconfig.metricConfig_jobViewTableMetrics
}
return [];
});
let selectedTableScopes = $derived.by(() => {
if (job) {
if (!moreScopes) {
// Select default Scopes to load: Check before if any metric has accelerator scope by default
const pendingScopes = ["node"]
const accScopeDefault = [...selectedTableMetrics].some(function (m) {
const cluster = clusterInfo.find((c) => c.name == job.cluster);
const subCluster = cluster.subClusters.find((sc) => sc.name == job.subCluster);
return subCluster.metricConfig.find((smc) => smc.name == m)?.scope === "accelerator";
});
if (job.numNodes === 1) {
pendingScopes.push("socket")
pendingScopes.push("core")
pendingScopes.push("hwthread")
if (accScopeDefault) { pendingScopes.push("accelerator") }
}
return[...new Set(pendingScopes)];
} else {
// If flag set: Always load all scopes
return ["node", "socket", "core", "hwthread", "accelerator"];
}
} // Fallback
return ["node"]
});
/* Derived Query */
const scopedStats = $derived(queryStore({
client: client,
query: query,
variables: { dbid: job.id, selectedMetrics, selectedScopes },
variables: {
dbid: job.id,
selectedMetrics: selectedTableMetrics,
selectedScopes: selectedTableScopes
},
})
);
/* Functions */
function loadScopes() {
// Archived Jobs Load All Scopes By Default (See Backend)
moreScopes = true;
selectedScopes = ["node", "socket", "core", "hwthread", "accelerator"];
};
/* On Init */
// Handle Job Query on Init -> is not executed anymore
getContext("on-init")(() => {
if (!job) return;
const pendingMetrics = (
getContext("cc-config")[`metricConfig_jobViewTableMetrics:${job.cluster}:${job.subCluster}`] ||
getContext("cc-config")[`metricConfig_jobViewTableMetrics:${job.cluster}`]
) || getContext("cc-config")["metricConfig_jobViewTableMetrics"];
// Select default Scopes to load: Check before if any metric has accelerator scope by default
const accScopeDefault = [...pendingMetrics].some(function (m) {
const cluster = clusters.find((c) => c.name == job.cluster);
const subCluster = cluster.subClusters.find((sc) => sc.name == job.subCluster);
return subCluster.metricConfig.find((smc) => smc.name == m)?.scope === "accelerator";
});
const pendingScopes = ["node"]
if (job.numNodes === 1) {
pendingScopes.push("socket")
pendingScopes.push("core")
pendingScopes.push("hwthread")
if (accScopeDefault) { pendingScopes.push("accelerator") }
}
selectedMetrics = [...pendingMetrics];
selectedScopes = [...pendingScopes];
});
</script>
<TabPane tabId="stats" tab="Statistics Table" class="overflow-x-auto" active={tabActive}>
<Row>
<Col class="m-2">
<Button outline onclick={() => (isMetricSelectionOpen = true)} class="px-2" color="primary" style="margin-right:0.5rem">
Select Metrics (Selected {selectedMetrics.length} of {totalMetrics} available)
Select Metrics (Selected {selectedTableMetrics.length} of {totalMetrics} available)
</Button>
{#if job.numNodes > 1 && job.state === "running"}
<Button class="px-2 ml-auto" color="success" outline onclick={loadScopes} disabled={moreScopes}>
<Button class="px-2 ml-auto" color="success" outline onclick={() => (moreScopes = !moreScopes)} disabled={moreScopes}>
{#if !moreScopes}
<Icon name="plus-square-fill" style="margin-right:0.25rem"/> Add More Scopes
{:else}
@@ -141,7 +152,7 @@
<StatsTable
hosts={job.resources.map((r) => r.hostname).sort()}
jobStats={$scopedStats?.data?.scopedJobStats}
{selectedMetrics}
selectedMetrics={selectedTableMetrics}
/>
{/if}
</TabPane>
@@ -149,12 +160,12 @@
<MetricSelection
bind:isOpen={isMetricSelectionOpen}
bind:totalMetrics
presetMetrics={selectedMetrics}
presetMetrics={selectedTableMetrics}
cluster={job.cluster}
subCluster={job.subCluster}
configName="metricConfig_jobViewTableMetrics"
preInitialized
{globalMetrics}
applyMetrics={(newMetrics) =>
selectedMetrics = [...newMetrics]
selectedTableMetrics = [...newMetrics]
}
/>

View File

@@ -5,11 +5,12 @@
- `cluster String`: The nodes' cluster
- `subCluster String`: The nodes' subCluster [Default: ""]
- `ccconfig Object?`: The ClusterCockpit Config Context [Default: null]
- `globalMetrics [Obj]`: Includes the backend supplied availabilities for cluster and subCluster
- `pendingSelectedMetrics [String]`: The array of selected metrics [Default []]
- `selectedResolution Number?`: The selected data resolution [Default: 0]
- `hostnameFilter String?`: The active hostnamefilter [Default: ""]
- `hoststateFilter String?`: The active hoststatefilter [Default: ""]
- `presetSystemUnits Object`: The object of metric units [Default: null]
- `systemUnits Object`: The object of metric units [Default: null]
- `from Date?`: The selected "from" date [Default: null]
- `to Date?`: The selected "to" date [Default: null]
-->
@@ -27,11 +28,12 @@
cluster,
subCluster = "",
ccconfig = null,
globalMetrics = null,
pendingSelectedMetrics = [],
selectedResolution = 0,
hostnameFilter = "",
hoststateFilter = "",
presetSystemUnits = null,
systemUnits = null,
from = null,
to = null
} = $props();
@@ -236,7 +238,7 @@
scope="col"
style="padding-top: {headerPaddingTop}px"
>
{metric} ({presetSystemUnits[metric]})
{metric} ({systemUnits[metric]})
</th>
{/each}
</tr>
@@ -250,7 +252,7 @@
</Row>
{:else}
{#each nodes as nodeData (nodeData.host)}
<NodeListRow {nodeData} {cluster} {selectedMetrics}/>
<NodeListRow {nodeData} {cluster} {selectedMetrics} {globalMetrics}/>
{:else}
<tr>
<td colspan={selectedMetrics.length + 1}> No nodes found </td>

View File

@@ -9,10 +9,10 @@
- `hostnameFilter String?`: The active hoststatefilter [Default: ""]
- `from Date?`: The selected "from" date [Default: null]
- `to Date?`: The selected "to" date [Default: null]
- `globalMetrics [Obj]`: Includes the backend supplied availabilities for cluster and subCluster
-->
<script>
import { getContext } from "svelte";
import { queryStore, gql, getContextClient } from "@urql/svelte";
import { Row, Col, Card, CardHeader, CardBody, Spinner, Badge } from "@sveltestrap/sveltestrap";
import { checkMetricDisabled } from "../generic/utils.js";
@@ -26,11 +26,11 @@
hostnameFilter = "",
hoststateFilter = "",
from = null,
to = null
to = null,
globalMetrics
} = $props();
/* Const Init */
const initialized = getContext("initialized");
const client = getContextClient();
// Node State Colors
const stateColors = {
@@ -87,7 +87,7 @@
},
}));
const mappedData = $derived(handleQueryData($initialized, $nodesQuery?.data));
const mappedData = $derived(handleQueryData($nodesQuery?.data));
const filteredData = $derived(mappedData.filter((h) => {
if (hostnameFilter) {
if (hoststateFilter == 'all') return h.host.includes(hostnameFilter)
@@ -99,7 +99,7 @@
}));
/* Functions */
function handleQueryData(isInitialized, queryData) {
function handleQueryData(queryData) {
let rawData = []
if (queryData) {
rawData = queryData.nodeMetrics.filter((h) => {
@@ -120,7 +120,8 @@
data: h.metrics.filter(
(m) => m?.name == selectedMetric && m.scope == "node",
),
disabled: isInitialized ? checkMetricDisabled(selectedMetric, cluster, h.subCluster) : null,
// TODO: Move To New Func Variant With Disabled Check on WHole Cluster Level: This never Triggers!
disabled: checkMetricDisabled(globalMetrics, selectedMetric, cluster, h.subCluster),
}))
.sort((a, b) => a.host.localeCompare(b.host))
}
@@ -163,6 +164,7 @@
</div>
{#if item?.data}
{#if item.disabled === true}
<!-- TODO: Will never be Shown: Overview Single Metric Return Will be Null, see Else Case-->
<Card body class="mx-3" color="info"
>Metric disabled for subcluster <code
>{selectedMetric}:{item.subCluster}</code
@@ -182,7 +184,7 @@
enableFlip
/>
{/key}
{:else if item.disabled === null}
{:else}
<Card body class="mx-3" color="info">
Global Metric List Not Initialized
Can not determine {selectedMetric} availability: Please Reload Page

View File

@@ -5,6 +5,7 @@
- `cluster String`: The nodes' cluster
- `nodeData Object`: The node data object including metric data
- `selectedMetrics [String]`: The array of selected metrics
- `globalMetrics [Obj]`: Includes the backend supplied availabilities for cluster and subCluster
-->
<script>
@@ -24,6 +25,7 @@
cluster,
nodeData,
selectedMetrics,
globalMetrics
} = $props();
/* Var Init*/
@@ -92,6 +94,7 @@
if (scopedNodeMetric?.data) {
return {
disabled: checkMetricDisabled(
globalMetrics,
scopedNodeMetric.data.name,
cluster,
nodeData.subCluster,