From 743a89c3a218d9ba75c0272cb8c62836e0f70fa9 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 4 Jul 2025 15:14:15 +0200 Subject: [PATCH 01/20] Finalize node query backend functions, fix migration issue --- api/schema.graphqls | 2 + internal/graph/generated/generated.go | 10 +- internal/graph/model/models_gen.go | 1 + internal/graph/schema.resolvers.go | 19 ++- internal/importer/initDB.go | 2 +- internal/repository/job.go | 4 +- .../migrations/sqlite3/10_node-table.up.sql | 12 +- internal/repository/node.go | 124 +++++++++++++++++- 8 files changed, 162 insertions(+), 12 deletions(-) diff --git a/api/schema.graphqls b/api/schema.graphqls index 794c630..5ff1a36 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -300,6 +300,7 @@ type Query { user(username: String!): User allocatedNodes(cluster: String!): [Count!]! + ## Node Queries New node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! nodeStats(filter: [NodeFilter!]): [NodeStats!]! @@ -393,6 +394,7 @@ type TimeRangeOutput { input NodeFilter { hostname: StringInput cluster: StringInput + subcluster: StringInput nodeState: NodeState healthState: MonitoringState } diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index 238270f..b150423 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -2714,6 +2714,7 @@ type TimeRangeOutput { input NodeFilter { hostname: StringInput cluster: StringInput + subcluster: StringInput nodeState: NodeState healthState: MonitoringState } @@ -17745,7 +17746,7 @@ func (ec *executionContext) unmarshalInputNodeFilter(ctx context.Context, obj an asMap[k] = v } - fieldsInOrder := [...]string{"hostname", "cluster", "nodeState", "healthState"} + fieldsInOrder := [...]string{"hostname", "cluster", "subcluster", "nodeState", "healthState"} for _, k := range fieldsInOrder { v, ok := asMap[k] if !ok { @@ -17766,6 +17767,13 @@ func (ec *executionContext) unmarshalInputNodeFilter(ctx context.Context, obj an return it, err } it.Cluster = data + case "subcluster": + ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("subcluster")) + data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v) + if err != nil { + return it, err + } + it.Subcluster = data case "nodeState": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("nodeState")) data, err := ec.unmarshalONodeState2ᚖstring(ctx, v) diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index 5a32ac9..c5cc79b 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -170,6 +170,7 @@ type NamedStatsWithScope struct { type NodeFilter struct { Hostname *StringInput `json:"hostname,omitempty"` Cluster *StringInput `json:"cluster,omitempty"` + Subcluster *StringInput `json:"subcluster,omitempty"` NodeState *string `json:"nodeState,omitempty"` HealthState *schema.NodeState `json:"healthState,omitempty"` } diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 78a76ef..1284c09 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -380,7 +380,24 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o // NodeStats is the resolver for the nodeStats field. func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error) { - panic(fmt.Errorf("not implemented: NodeStats - nodeStats")) + repo := repository.GetNodeRepository() + + stateCounts, serr := repo.CountNodeStates(ctx, filter) + if serr != nil { + cclog.Warnf("Error while counting nodeStates: %s", serr.Error()) + return nil, serr + } + + healthCounts, herr := repo.CountHealthStates(ctx, filter) + if herr != nil { + cclog.Warnf("Error while counting healthStates: %s", herr.Error()) + return nil, herr + } + + allCounts := make([]*model.NodeStats, 0) + allCounts = append(stateCounts, healthCounts...) + + return allCounts, nil } // Job is the resolver for the job field. diff --git a/internal/importer/initDB.go b/internal/importer/initDB.go index 179c21c..98dca03 100644 --- a/internal/importer/initDB.go +++ b/internal/importer/initDB.go @@ -40,7 +40,7 @@ func InitDB() error { } tags := make(map[string]int64) - // Not using log.Print because we want the line to end with `\r` and + // Not using cclog.Print because we want the line to end with `\r` and // this function is only ever called when a special command line flag // is passed anyways. fmt.Printf("%d jobs inserted...\r", 0) diff --git a/internal/repository/job.go b/internal/repository/job.go index b6aa323..2cde824 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -337,10 +337,10 @@ func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, ta // theSql, args, theErr := theQuery.ToSql() // if theErr != nil { - // log.Warn("Error while converting query to sql") + // cclog.Warn("Error while converting query to sql") // return "", err // } - // log.Debugf("SQL query (FindColumnValue): `%s`, args: %#v", theSql, args) + // cclog.Debugf("SQL query (FindColumnValue): `%s`, args: %#v", theSql, args) err := theQuery.RunWith(r.stmtCache).QueryRow().Scan(&result) diff --git a/internal/repository/migrations/sqlite3/10_node-table.up.sql b/internal/repository/migrations/sqlite3/10_node-table.up.sql index c208b32..52e6a05 100644 --- a/internal/repository/migrations/sqlite3/10_node-table.up.sql +++ b/internal/repository/migrations/sqlite3/10_node-table.up.sql @@ -3,12 +3,12 @@ CREATE TABLE "node" ( hostname VARCHAR(255) NOT NULL, cluster VARCHAR(255) NOT NULL, subcluster VARCHAR(255) NOT NULL, - cpus_allocated INTEGER NOT NULL, - cpus_total INTEGER NOT NULL, - memory_allocated INTEGER NOT NULL, - memory_total INTEGER NOT NULL, - gpus_allocated INTEGER NOT NULL, - gpus_total INTEGER NOT NULL, + cpus_allocated INTEGER DEFAULT 0 NOT NULL, + cpus_total INTEGER DEFAULT 0 NOT NULL, + memory_allocated INTEGER DEFAULT 0 NOT NULL, + memory_total INTEGER DEFAULT 0 NOT NULL, + gpus_allocated INTEGER DEFAULT 0 NOT NULL, + gpus_total INTEGER DEFAULT 0 NOT NULL, node_state VARCHAR(255) NOT NULL CHECK (node_state IN ( 'allocated', 'reserved', 'idle', 'mixed', diff --git a/internal/repository/node.go b/internal/repository/node.go index 83bf062..b4d0181 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -49,6 +49,11 @@ func GetNodeRepository() *NodeRepository { return nodeRepoInstance } +var nodeColumns []string = []string{ + "node.id", "node.hostname", "node.cluster", "node.subcluster", + "node.node_state", "node.health_state", "node.meta_data", +} + func (r *NodeRepository) FetchMetadata(node *schema.Node) (map[string]string, error) { start := time.Now() cachekey := fmt.Sprintf("metadata:%d", node.ID) @@ -220,7 +225,7 @@ func (r *NodeRepository) QueryNodes( filters []*model.NodeFilter, order *model.OrderByInput, ) ([]*schema.Node, error) { - query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("node")) + query, qerr := AccessCheck(ctx, sq.Select(nodeColumns...).From("node")) if qerr != nil { return nil, qerr } @@ -232,6 +237,9 @@ func (r *NodeRepository) QueryNodes( if f.Cluster != nil { query = buildStringCondition("node.cluster", f.Cluster, query) } + if f.Subcluster != nil { + query = buildStringCondition("node.subcluster", f.Subcluster, query) + } if f.NodeState != nil { query = query.Where("node.node_state = ?", f.NodeState) } @@ -287,3 +295,117 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) { return nodeList, nil } + +func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStats, error) { + query, qerr := AccessCheck(ctx, sq.Select("node_state AS state", "count(*) AS count").From("node")) + if qerr != nil { + return nil, qerr + } + + for _, f := range filters { + if f.Hostname != nil { + query = buildStringCondition("node.hostname", f.Hostname, query) + } + if f.Cluster != nil { + query = buildStringCondition("node.cluster", f.Cluster, query) + } + if f.Subcluster != nil { + query = buildStringCondition("node.subcluster", f.Subcluster, query) + } + if f.NodeState != nil { + query = query.Where("node.node_state = ?", f.NodeState) + } + if f.HealthState != nil { + query = query.Where("node.health_state = ?", f.HealthState) + } + } + + rows, err := query.RunWith(r.stmtCache).Query() + if err != nil { + queryString, queryVars, _ := query.ToSql() + cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err) + return nil, err + } + + nodes := make([]*model.NodeStats, 0) + for rows.Next() { + node := model.NodeStats{} + + if err := rows.Scan(&node.State, &node.Count); err != nil { + rows.Close() + cclog.Warn("Error while scanning rows (NodeStats)") + return nil, err + } + nodes = append(nodes, &node) + } + + return nodes, nil +} + +func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStats, error) { + query, qerr := AccessCheck(ctx, sq.Select("health_state AS state", "count(*) AS count").From("node")) + if qerr != nil { + return nil, qerr + } + + for _, f := range filters { + if f.Hostname != nil { + query = buildStringCondition("node.hostname", f.Hostname, query) + } + if f.Cluster != nil { + query = buildStringCondition("node.cluster", f.Cluster, query) + } + if f.Subcluster != nil { + query = buildStringCondition("node.subcluster", f.Subcluster, query) + } + if f.NodeState != nil { + query = query.Where("node.node_state = ?", f.NodeState) + } + if f.HealthState != nil { + query = query.Where("node.health_state = ?", f.HealthState) + } + } + + rows, err := query.RunWith(r.stmtCache).Query() + if err != nil { + queryString, queryVars, _ := query.ToSql() + cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err) + return nil, err + } + + nodes := make([]*model.NodeStats, 0) + for rows.Next() { + node := model.NodeStats{} + + if err := rows.Scan(&node.State, &node.Count); err != nil { + rows.Close() + cclog.Warn("Error while scanning rows (NodeStats)") + return nil, err + } + nodes = append(nodes, &node) + } + + return nodes, nil +} + +func AccessCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) { + user := GetUserFromContext(ctx) + return AccessCheckWithUser(user, query) +} + +func AccessCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) { + if user == nil { + var qnil sq.SelectBuilder + return qnil, fmt.Errorf("user context is nil") + } + + switch { + // case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : Access NodeInfos + // return query, nil + case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : Access NodeInfos + return query, nil + default: // No known Role: No Access, return error + var qnil sq.SelectBuilder + return qnil, fmt.Errorf("user has no or unknown roles") + } +} From 57b43b7b60c9d6328beef505eecac3786c136b95 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 7 Jul 2025 18:44:24 +0200 Subject: [PATCH 02/20] Split status view into tabbed components --- web/frontend/src/Status.root.svelte | 687 +----------------- web/frontend/src/status/NodeDash.svelte | 127 ++++ web/frontend/src/status/StatisticsDash.svelte | 174 +++++ web/frontend/src/status/StatusDash.svelte | 246 +++++++ web/frontend/src/status/UsageDash.svelte | 319 ++++++++ 5 files changed, 902 insertions(+), 651 deletions(-) create mode 100644 web/frontend/src/status/NodeDash.svelte create mode 100644 web/frontend/src/status/StatisticsDash.svelte create mode 100644 web/frontend/src/status/StatusDash.svelte create mode 100644 web/frontend/src/status/UsageDash.svelte diff --git a/web/frontend/src/Status.root.svelte b/web/frontend/src/Status.root.svelte index 8cdd091..297e675 100644 --- a/web/frontend/src/Status.root.svelte +++ b/web/frontend/src/Status.root.svelte @@ -6,350 +6,38 @@ --> - - + +

Current utilization of cluster "{cluster}"

- - -
- + + {/if} @@ -377,334 +64,32 @@ {$mainQuery.error.message}
-{/if} +{/if} --> -
+ + + + + + + - + + + + + -{#if $initq.data && $mainQuery.data} - {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} - - - - - SubCluster "{subCluster.name}" - - - - - - - - - - - - - - - - - - -
Allocated Nodes
- -
{allocatedNodes[subCluster.name]} / {subCluster.numberOfNodes} - Nodes
Flop Rate (Any)
- -
- {scaleNumbers( - flopRate[subCluster.name], - subCluster.flopRateSimd.value * subCluster.numberOfNodes, - flopRateUnitPrefix[subCluster.name], - )}{flopRateUnitBase[subCluster.name]} [Max] -
MemBw Rate
- -
- {scaleNumbers( - memBwRate[subCluster.name], - subCluster.memoryBandwidth.value * subCluster.numberOfNodes, - memBwRateUnitPrefix[subCluster.name], - )}{memBwRateUnitBase[subCluster.name]} [Max] -
-
-
- - -
- {#key $mainQuery.data.nodeMetrics} - data.subCluster == subCluster.name, - ), - )} - /> - {/key} -
- -
- {/each} + + + + + -
- - - - - -
-

- Top Users on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} -

- {#key $topUserQuery.data} - {#if $topUserQuery.fetching} - - {:else if $topUserQuery.error} - {$topUserQuery.error.message} - {:else} - tu[topUserSelection.key], - )} - entities={$topUserQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} - /> - {/if} - {/key} -
- - - {#key $topUserQuery.data} - {#if $topUserQuery.fetching} - - {:else if $topUserQuery.error} - {$topUserQuery.error.message} - {:else} - - - - - - - {#each $topUserQuery.data.topUser as tu, i} - - - - {#if tu?.name} - {scrambleNames ? scramble(tu.name) : tu.name} - {/if} - - - {/each} -
LegendUser NameNumber of - -
{scrambleNames ? scramble(tu.id) : tu.id}{tu[topUserSelection.key]}
- {/if} - {/key} - - -

- Top Projects on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} -

- {#key $topProjectQuery.data} - {#if $topProjectQuery.fetching} - - {:else if $topProjectQuery.error} - {$topProjectQuery.error.message} - {:else} - tp[topProjectSelection.key], - )} - entities={$topProjectQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} - /> - {/if} - {/key} - - - {#key $topProjectQuery.data} - {#if $topProjectQuery.fetching} - - {:else if $topProjectQuery.error} - {$topProjectQuery.error.message} - {:else} - - - - - - - {#each $topProjectQuery.data.topProjects as tp, i} - - - - - - {/each} -
LegendProject CodeNumber of - -
{scrambleNames ? scramble(tp.id) : tp.id}{tp[topProjectSelection.key]}
- {/if} - {/key} - -
- -
- - - - - - {#key $mainQuery.data.stats} - - {/key} - - - {#key $mainQuery.data.stats} - - {/key} - - - - - {#key $mainQuery.data.stats} - - {/key} - - - {#key $mainQuery.data.stats} - - {/key} - - - -
- - - - {#if selectedHistograms} - - {#snippet gridContent(item)} - - {/snippet} - - {#key $mainQuery.data.stats[0].histMetrics} - - {/key} - {/if} -{/if} - - { - selectedHistograms = [...newSelection]; - }} -/> + + + + + +
+
\ No newline at end of file diff --git a/web/frontend/src/status/NodeDash.svelte b/web/frontend/src/status/NodeDash.svelte new file mode 100644 index 0000000..29a3cf8 --- /dev/null +++ b/web/frontend/src/status/NodeDash.svelte @@ -0,0 +1,127 @@ + + + + +{#if $initq.data && $nodeStatusQuery.data} + + + + {#key $nodeStatusQuery.data.jobsStatistics} + + {/key} + + + {#key $nodeStatusQuery.data.jobsStatistics} + + {/key} + + + + + {#key $nodeStatusQuery.data.jobsStatistics} + + {/key} + + + {#key $nodeStatusQuery.data.jobsStatistics} + + {/key} + + +{/if} + + diff --git a/web/frontend/src/status/StatisticsDash.svelte b/web/frontend/src/status/StatisticsDash.svelte new file mode 100644 index 0000000..d3b4236 --- /dev/null +++ b/web/frontend/src/status/StatisticsDash.svelte @@ -0,0 +1,174 @@ + + + + + + + + + + + + + {#if $initq.fetching || $metricStatusQuery.fetching} + + {:else if $initq.error} + {$initq.error.message} + {:else} + + {/if} + + +{#if $metricStatusQuery.error} + + + {$metricStatusQuery.error.message} + + +{/if} + +{#if $initq.data && $metricStatusQuery.data} + + {#if selectedHistograms} + + {#snippet gridContent(item)} + + {/snippet} + + {#key $metricStatusQuery.data.jobsStatistics[0].histMetrics} + + {/key} + {/if} +{/if} + + { + selectedHistograms = [...newSelection]; + }} +/> diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte new file mode 100644 index 0000000..57951a5 --- /dev/null +++ b/web/frontend/src/status/StatusDash.svelte @@ -0,0 +1,246 @@ + + + + + +{#if $initq.data && $statusQuery.data} + {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} + + + + + SubCluster "{subCluster.name}" + + + + + + + + + + + + + + + + + + +
Allocated Nodes
+ +
{allocatedNodes[subCluster.name]} / {subCluster.numberOfNodes} + Nodes
Flop Rate (Any)
+ +
+ {scaleNumbers( + flopRate[subCluster.name], + subCluster.flopRateSimd.value * subCluster.numberOfNodes, + flopRateUnitPrefix[subCluster.name], + )}{flopRateUnitBase[subCluster.name]} [Max] +
MemBw Rate
+ +
+ {scaleNumbers( + memBwRate[subCluster.name], + subCluster.memoryBandwidth.value * subCluster.numberOfNodes, + memBwRateUnitPrefix[subCluster.name], + )}{memBwRateUnitBase[subCluster.name]} [Max] +
+
+
+ + +
+ {#key $statusQuery.data.nodeMetrics} + data.subCluster == subCluster.name, + ), + )} + /> + {/key} +
+ +
+ {/each} +{/if} diff --git a/web/frontend/src/status/UsageDash.svelte b/web/frontend/src/status/UsageDash.svelte new file mode 100644 index 0000000..1cda6e8 --- /dev/null +++ b/web/frontend/src/status/UsageDash.svelte @@ -0,0 +1,319 @@ + + + + +{#if $initq.data} + + + +
+

+ Top Users on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} +

+ {#key $topUserQuery.data} + {#if $topUserQuery.fetching} + + {:else if $topUserQuery.error} + {$topUserQuery.error.message} + {:else} + tu[topUserSelection.key], + )} + entities={$topUserQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} + /> + {/if} + {/key} +
+ + + {#key $topUserQuery.data} + {#if $topUserQuery.fetching} + + {:else if $topUserQuery.error} + {$topUserQuery.error.message} + {:else} + + + + + + + {#each $topUserQuery.data.topUser as tu, i} + + + + {#if tu?.name} + {scrambleNames ? scramble(tu.name) : tu.name} + {/if} + + + {/each} +
LegendUser NameNumber of + +
{scrambleNames ? scramble(tu.id) : tu.id}{tu[topUserSelection.key]}
+ {/if} + {/key} + + +

+ Top Projects on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} +

+ {#key $topProjectQuery.data} + {#if $topProjectQuery.fetching} + + {:else if $topProjectQuery.error} + {$topProjectQuery.error.message} + {:else} + tp[topProjectSelection.key], + )} + entities={$topProjectQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} + /> + {/if} + {/key} + + + {#key $topProjectQuery.data} + {#if $topProjectQuery.fetching} + + {:else if $topProjectQuery.error} + {$topProjectQuery.error.message} + {:else} + + + + + + + {#each $topProjectQuery.data.topProjects as tp, i} + + + + + + {/each} +
LegendProject CodeNumber of + +
{scrambleNames ? scramble(tp.id) : tp.id}{tp[topProjectSelection.key]}
+ {/if} + {/key} + +
+{/if} From b036c3903c4274e15cd5b559fa0f3a6d5a730907 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 10 Jul 2025 14:57:12 +0200 Subject: [PATCH 03/20] add config fallbacks and notes --- api/schema.graphqls | 1 + web/frontend/src/generic/JobCompare.svelte | 2 +- web/frontend/src/generic/plots/Comparogram.svelte | 2 +- web/frontend/src/status/StatisticsDash.svelte | 2 +- web/frontend/src/status/StatusDash.svelte | 1 + 5 files changed, 5 insertions(+), 3 deletions(-) diff --git a/api/schema.graphqls b/api/schema.graphqls index 5ff1a36..a95df84 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -358,6 +358,7 @@ type Query { from: Time! to: Time! ): [NodeMetrics!]! + nodeMetricsList( cluster: String! subCluster: String! diff --git a/web/frontend/src/generic/JobCompare.svelte b/web/frontend/src/generic/JobCompare.svelte index a1e7bfa..55966ac 100644 --- a/web/frontend/src/generic/JobCompare.svelte +++ b/web/frontend/src/generic/JobCompare.svelte @@ -26,7 +26,7 @@ /* Svelte 5 Props */ let { matchedCompareJobs = $bindable(0), - metrics = ccconfig?.plot_list_selectedMetrics, + metrics = getContext("cc-config")?.plot_list_selectedMetrics, filterBuffer = [], } = $props(); diff --git a/web/frontend/src/generic/plots/Comparogram.svelte b/web/frontend/src/generic/plots/Comparogram.svelte index b6f5fd1..2051088 100644 --- a/web/frontend/src/generic/plots/Comparogram.svelte +++ b/web/frontend/src/generic/plots/Comparogram.svelte @@ -44,7 +44,7 @@ /* Const Init */ const clusterCockpitConfig = getContext("cc-config"); - const lineWidth = clusterCockpitConfig.plot_general_lineWidth / window.devicePixelRatio; + const lineWidth = clusterCockpitConfig?.plot_general_lineWidth / window.devicePixelRatio || 2; const cbmode = clusterCockpitConfig?.plot_general_colorblindMode || false; // UPLOT SERIES INIT // diff --git a/web/frontend/src/status/StatisticsDash.svelte b/web/frontend/src/status/StatisticsDash.svelte index d3b4236..e573554 100644 --- a/web/frontend/src/status/StatisticsDash.svelte +++ b/web/frontend/src/status/StatisticsDash.svelte @@ -79,7 +79,7 @@ })); /* Functions */ - // TODO: Originally Uses User View Selection! -> Change to Status View + // TODO: Originally Uses User View Selection! -> Change to Status View : Adapt Mutations from TopUserSelect // function updateTopUserConfiguration(select) { // if (ccconfig[`status_view_selectedHistograms:${cluster}`] != select) { // updateConfigurationMutation({ diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index 57951a5..f98c1c3 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -54,6 +54,7 @@ /* Derived */ // Note: nodeMetrics are requested on configured $timestep resolution + // Result: The latest 5 minutes (datapoints) for each node independent of job const statusQuery = $derived(queryStore({ client: client, query: gql` From ed5290be86127d905b039971a8ecba5b808e2727 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 14 Jul 2025 18:12:34 +0200 Subject: [PATCH 04/20] adds new roofline component for job average based data - clickable, resource sized and duration colored bubbles --- web/frontend/src/Status.root.svelte | 9 +- .../generic/plots/NewBubbleRoofline.svelte | 739 ++++++++++++++++++ web/frontend/src/status/DevelDash.svelte | 164 ++++ 3 files changed, 911 insertions(+), 1 deletion(-) create mode 100644 web/frontend/src/generic/plots/NewBubbleRoofline.svelte create mode 100644 web/frontend/src/status/DevelDash.svelte diff --git a/web/frontend/src/Status.root.svelte b/web/frontend/src/Status.root.svelte index 297e675..ee8cde7 100644 --- a/web/frontend/src/Status.root.svelte +++ b/web/frontend/src/Status.root.svelte @@ -20,6 +20,7 @@ import UsageDash from "./status/UsageDash.svelte"; import NodeDash from "./status/NodeDash.svelte"; import StatisticsDash from "./status/StatisticsDash.svelte"; + import DevelDash from "./status/DevelDash.svelte"; /* Svelte 5 Props */ let { @@ -68,7 +69,13 @@ - + + + + + + + diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte new file mode 100644 index 0000000..1c89433 --- /dev/null +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -0,0 +1,739 @@ + + + +{#if roofData != null} +
+{:else} + Cannot render roofline: No data! +{/if} \ No newline at end of file diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte new file mode 100644 index 0000000..ea00803 --- /dev/null +++ b/web/frontend/src/status/DevelDash.svelte @@ -0,0 +1,164 @@ + + + + + +{#if $initq.data && $jobRoofQuery.data} + {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} + + + Classic +
+ {#key $jobRoofQuery.data.jobsMetricStats} + {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( + (data) => data.subCluster == subCluster.name, + ).length} Jobs + data.subCluster == subCluster.name, + ) + )} + /> + {/key} +
+ + + Bubble +
+ {#key $jobRoofQuery.data.jobsMetricStats} + {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( + (data) => data.subCluster == subCluster.name, + ).length} Jobs + data.subCluster == subCluster.name, + ) + )} + jobsData={transformJobsStatsToInfo($jobRoofQuery?.data?.jobsMetricStats.filter( + (data) => data.subCluster == subCluster.name, + ) + )} + /> + {/key} +
+ +
+ {/each} +{/if} From 096217eea694b09968f650a23a26038a7d29ba71 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 15 Jul 2025 16:00:55 +0200 Subject: [PATCH 05/20] cleanup bubbleRoofline code, comment optional code parts --- .../generic/plots/NewBubbleRoofline.svelte | 182 ++++++++---------- 1 file changed, 82 insertions(+), 100 deletions(-) diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte index 1c89433..857b49b 100644 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -37,8 +37,6 @@ height = 380, } = $props(); - $inspect(jobsData) - /* Const Init */ const lineWidth = clusterCockpitConfig?.plot_general_lineWidth || 2; const cbmode = clusterCockpitConfig?.plot_general_colorblindMode || false; @@ -58,6 +56,7 @@ // Copied Example Vars for Uplot Bubble // https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D/isPointInPath let qt; + let hRect; let pxRatio; function setPxRatio() { pxRatio = uPlot.pxRatio; @@ -66,7 +65,7 @@ window.addEventListener('dppxchange', setPxRatio); // let minSize = 6; let maxSize = 60; - let maxArea = Math.PI * (maxSize / 2) ** 2; + // let maxArea = Math.PI * (maxSize / 2) ** 2; // let minArea = Math.PI * (minSize / 2) ** 2; /* Functions */ @@ -94,7 +93,7 @@ return Math.floor(x * 255.0); } function getRGB(c, makeTransparent = false) { - if (makeTransparent) return `rgb(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.33)`; + if (makeTransparent) return `rgba(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.33)`; else return `rgb(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)})`; } function nearestThousand(num) { @@ -110,28 +109,28 @@ } // quadratic scaling (px area) - function getSize(value, minValue, maxValue) { - let pct = value / maxValue; - // clamp to min area - //let area = Math.max(maxArea * pct, minArea); - let area = maxArea * pct; - return Math.sqrt(area / Math.PI) * 2; - } + // function getSize(value, minValue, maxValue) { + // let pct = value / maxValue; + // // clamp to min area + // //let area = Math.max(maxArea * pct, minArea); + // let area = maxArea * pct; + // return Math.sqrt(area / Math.PI) * 2; + // } - function getSizeMinMax(u) { - let minValue = Infinity; - let maxValue = -Infinity; - for (let i = 1; i < u.series.length; i++) { - let sizeData = u.data[i][2]; - for (let j = 0; j < sizeData.length; j++) { - minValue = Math.min(minValue, sizeData[j]); - maxValue = Math.max(maxValue, sizeData[j]); - } - } - return [minValue, maxValue]; - } + // function getSizeMinMax(u) { + // let minValue = Infinity; + // let maxValue = -Infinity; + // for (let i = 1; i < u.series.length; i++) { + // let sizeData = u.data[i][2]; + // for (let j = 0; j < sizeData.length; j++) { + // minValue = Math.min(minValue, sizeData[j]); + // maxValue = Math.max(maxValue, sizeData[j]); + // } + // } + // return [minValue, maxValue]; + // } - // Quadtree Object (How to import?) + // Quadtree Object (TODO: Split and Import) class Quadtree { constructor (x, y, w, h, l) { let t = this; @@ -239,43 +238,24 @@ } } - // Dot Renderers + // Dot Renderer const makeDrawPoints = (opts) => { - let {/*size,*/ disp, each = () => {}} = opts; + let {/*size, disp,*/ each = () => {}} = opts; const sizeBase = 5 * pxRatio; return (u, seriesIdx, idx0, idx1) => { uPlot.orient(u, seriesIdx, (series, dataX, dataY, scaleX, scaleY, valToPosX, valToPosY, xOff, yOff, xDim, yDim, moveTo, lineTo, rect, arc) => { let d = u.data[seriesIdx]; - let strokeWidth = 2; - - u.ctx.save(); - - u.ctx.rect(u.bbox.left, u.bbox.top, u.bbox.width, u.bbox.height); - u.ctx.clip(); - - // u.ctx.fillStyle = series.fill(); - // u.ctx.strokeStyle = series.stroke(); - u.ctx.lineWidth = strokeWidth; - let deg360 = 2 * Math.PI; - - // console.time("points"); - - // let cir = new Path2D(); - // cir.moveTo(0, 0); - // arc(cir, 0, 0, 3, 0, deg360); - - // Create transformation matrix that moves 200 points to the right - // let m = document.createElementNS('http://www.w3.org/2000/svg', 'svg').createSVGMatrix(); - // m.a = 1; m.b = 0; - // m.c = 0; m.d = 1; - // m.e = 200; m.f = 0; - - // compute bubble dims + /* Alt.: Sizes based on other Data Rows */ // let sizes = disp.size.values(u, seriesIdx, idx0, idx1); + u.ctx.save(); + u.ctx.rect(u.bbox.left, u.bbox.top, u.bbox.width, u.bbox.height); + u.ctx.clip(); + u.ctx.lineWidth = strokeWidth; + // todo: this depends on direction & orientation // todo: calc once per redraw, not per path let filtLft = u.posToVal(-maxSize / 2, scaleX.key); @@ -284,15 +264,14 @@ let filtTop = u.posToVal(-maxSize / 2, scaleY.key); for (let i = 0; i < d[0].length; i++) { - // Import from Roofline + // Color based on Duration u.ctx.strokeStyle = getRGB(u.data[2][i]); u.ctx.fillStyle = getRGB(u.data[2][i], true); - // End - + // Get Values let xVal = d[0][i]; let yVal = d[1][i]; - const size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes); - // let size = sizes[i] * pxRatio; + // Calc Size; Alt.: size = sizes[i] * pxRatio + const size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes); // In NodeMode: Scale with Number of Jobs? if (xVal >= filtLft && xVal <= filtRgt && yVal >= filtBtm && yVal <= filtTop) { let cx = valToPosX(xVal, scaleX, xDim, xOff); @@ -312,28 +291,25 @@ ); } } - - // console.timeEnd("points"); - u.ctx.restore(); }); - return null; }; }; let drawPoints = makeDrawPoints({ - disp: { - size: { - unit: 3, // raw CSS pixels - // discr: true, - values: (u, seriesIdx, idx0, idx1) => { - // TODO: only run once per setData() call - let [minValue, maxValue] = getSizeMinMax(u); - return u.data[seriesIdx][2].map(v => getSize(v, minValue, maxValue)); - }, - }, - }, + // disp: { + // size: { + // // unit: 3, // raw CSS pixels + // // discr: true, + // values: (u, seriesIdx, idx0, idx1) => { + // /* Func to get sizes from additional subSeries [series][2...x] ([0,1] is [x,y]) */ + // // TODO: only run once per setData() call + // let [minValue, maxValue] = getSizeMinMax(u); + // return u.data[seriesIdx][2].map(v => getSize(v, minValue, maxValue)); + // }, + // }, + // }, each: (u, seriesIdx, dataIdx, lft, top, wid, hgt) => { // we get back raw canvas coords (included axes & padding). translate to the plotting area origin lft -= u.bbox.left; @@ -470,19 +446,17 @@ }, 200); } - let hRect; function render(roofdata, jobsData) { if (roofdata) { const opts = { - title: "Job Average Roofline Diagram (Bubble)", + title: "Job Average Roofline Diagram", mode: 2, width: width, height: height, legend: { - // show: true, + show: true, }, cursor: { - drag: { x: true, y: false }, // Activate zoom dataIdx: (u, seriesIdx) => { if (seriesIdx == 1) { hRect = null; @@ -521,22 +495,27 @@ } }); } - return hRect && seriesIdx == hRect.sidx ? hRect.didx : null; }, - points: { - size: (u, seriesIdx) => { - return hRect && seriesIdx == hRect.sidx ? hRect.w / pxRatio : 0; - } - }, - focus: { - prox: 1e3, - alpha: 0.3, - dist: (u, seriesIdx) => { - let prox = (hRect?.sidx === seriesIdx ? 0 : Infinity); - return prox; - }, - } + // /* Render "Fill" on Data Point Hover: Works in Example Bubble, does not work here? */ + // points: { + // size: (u, seriesIdx) => { + // return hRect && seriesIdx == hRect.sidx ? hRect.w / pxRatio : 0; + // } + // }, + /* Make all non-focused series semi-transparent: Useless unless more than one series rendered */ + // focus: { + // prox: 1e3, + // alpha: 0.3, + // dist: (u, seriesIdx) => { + // let prox = (hRect?.sidx === seriesIdx ? 0 : Infinity); + // return prox; + // }, + // }, + drag: { // Activates Zoom + x: true, + y: false + }, }, axes: [ { @@ -569,16 +548,17 @@ series: [ null, { - facets: [ - { - scale: 'x', - auto: true, - }, - { - scale: 'y', - auto: true, - } - ], + /* Facets: Define Purpose of Sub-Arrays in Series-Array, e.g. x, y, size, label, color, ... */ + // facets: [ + // { + // scale: 'x', + // auto: true, + // }, + // { + // scale: 'y', + // auto: true, + // } + // ], paths: drawPoints, values: legendValues } @@ -591,8 +571,10 @@ qt = qt || new Quadtree(0, 0, u.bbox.width, u.bbox.height); qt.clear(); + // force-clear the path cache to cause drawBars() to rebuild new quadtree u.series.forEach((s, i) => { - if (i > 0) s._paths = null; + if (i > 0) + s._paths = null; }); }, ], From e48ff8be7324e743a3f1aaec8d9dc28b741db7c1 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 15 Jul 2025 16:36:12 +0200 Subject: [PATCH 06/20] change bubble render parameters - Note: data points are hover highlighted by tooltip --- web/frontend/src/generic/plots/NewBubbleRoofline.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte index 857b49b..b693a92 100644 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -93,7 +93,7 @@ return Math.floor(x * 255.0); } function getRGB(c, makeTransparent = false) { - if (makeTransparent) return `rgba(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.33)`; + if (makeTransparent) return `rgba(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.5)`; else return `rgb(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)})`; } function nearestThousand(num) { @@ -246,7 +246,7 @@ return (u, seriesIdx, idx0, idx1) => { uPlot.orient(u, seriesIdx, (series, dataX, dataY, scaleX, scaleY, valToPosX, valToPosY, xOff, yOff, xDim, yDim, moveTo, lineTo, rect, arc) => { let d = u.data[seriesIdx]; - let strokeWidth = 2; + let strokeWidth = 1; let deg360 = 2 * Math.PI; /* Alt.: Sizes based on other Data Rows */ // let sizes = disp.size.values(u, seriesIdx, idx0, idx1); From 5cdb80b4d606465aac35eb27a398138ecb4baaaa Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 15 Jul 2025 18:49:23 +0200 Subject: [PATCH 07/20] cleanup intends, add transparency switch to path renderer --- .../generic/plots/NewBubbleRoofline.svelte | 159 +++++++++--------- 1 file changed, 80 insertions(+), 79 deletions(-) diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte index b693a92..4415791 100644 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -62,7 +62,7 @@ pxRatio = uPlot.pxRatio; } setPxRatio(); - window.addEventListener('dppxchange', setPxRatio); + window.addEventListener('dppxchange', setPxRatio); // let minSize = 6; let maxSize = 60; // let maxArea = Math.PI * (maxSize / 2) ** 2; @@ -92,8 +92,8 @@ x = 1.0 - (x - 0.25) * 4.0; return Math.floor(x * 255.0); } - function getRGB(c, makeTransparent = false) { - if (makeTransparent) return `rgba(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.5)`; + function getRGB(c, transparent = false) { + if (transparent) return `rgba(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.5)`; else return `rgb(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)})`; } function nearestThousand(num) { @@ -240,7 +240,7 @@ // Dot Renderer const makeDrawPoints = (opts) => { - let {/*size, disp,*/ each = () => {}} = opts; + let {/*size, disp,*/ transparentFill, each = () => {}} = opts; const sizeBase = 5 * pxRatio; return (u, seriesIdx, idx0, idx1) => { @@ -264,9 +264,10 @@ let filtTop = u.posToVal(-maxSize / 2, scaleY.key); for (let i = 0; i < d[0].length; i++) { - // Color based on Duration + // Color based on Duration, check index for transparency highlighting u.ctx.strokeStyle = getRGB(u.data[2][i]); - u.ctx.fillStyle = getRGB(u.data[2][i], true); + u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill); + // Get Values let xVal = d[0][i]; let yVal = d[1][i]; @@ -310,6 +311,7 @@ // }, // }, // }, + transparentFill: true, each: (u, seriesIdx, dataIdx, lft, top, wid, hgt) => { // we get back raw canvas coords (included axes & padding). translate to the plotting area origin lft -= u.bbox.left; @@ -454,64 +456,64 @@ width: width, height: height, legend: { - show: true, + show: true, }, cursor: { dataIdx: (u, seriesIdx) => { - if (seriesIdx == 1) { - hRect = null; + if (seriesIdx == 1) { + hRect = null; - let dist = Infinity; - let area = Infinity; - let cx = u.cursor.left * pxRatio; - let cy = u.cursor.top * pxRatio; + let dist = Infinity; + let area = Infinity; + let cx = u.cursor.left * pxRatio; + let cy = u.cursor.top * pxRatio; - qt.get(cx, cy, 1, 1, o => { - if (pointWithin(cx, cy, o.x, o.y, o.x + o.w, o.y + o.h)) { - let ocx = o.x + o.w / 2; - let ocy = o.y + o.h / 2; + qt.get(cx, cy, 1, 1, o => { + if (pointWithin(cx, cy, o.x, o.y, o.x + o.w, o.y + o.h)) { + let ocx = o.x + o.w / 2; + let ocy = o.y + o.h / 2; - let dx = ocx - cx; - let dy = ocy - cy; + let dx = ocx - cx; + let dy = ocy - cy; - let d = Math.sqrt(dx ** 2 + dy ** 2); + let d = Math.sqrt(dx ** 2 + dy ** 2); - // test against radius for actual hover - if (d <= o.w / 2) { - let a = o.w * o.h; + // test against radius for actual hover + if (d <= o.w / 2) { + let a = o.w * o.h; - // prefer smallest - if (a < area) { - area = a; - dist = d; - hRect = o; - } - // only hover bbox with closest distance - else if (a == area && d <= dist) { - dist = d; - hRect = o; - } - } - } - }); - } - return hRect && seriesIdx == hRect.sidx ? hRect.didx : null; - }, - // /* Render "Fill" on Data Point Hover: Works in Example Bubble, does not work here? */ - // points: { - // size: (u, seriesIdx) => { - // return hRect && seriesIdx == hRect.sidx ? hRect.w / pxRatio : 0; - // } - // }, + // prefer smallest + if (a < area) { + area = a; + dist = d; + hRect = o; + } + // only hover bbox with closest distance + else if (a == area && d <= dist) { + dist = d; + hRect = o; + } + } + } + }); + } + return hRect && seriesIdx == hRect.sidx ? hRect.didx : null; + }, + /* Render "Fill" on Data Point Hover: Works in Example Bubble, does not work here? Guess: Interference with tooltip */ + // points: { + // size: (u, seriesIdx) => { + // return hRect && seriesIdx == hRect.sidx ? hRect.w / pxRatio : 0; + // } + // }, /* Make all non-focused series semi-transparent: Useless unless more than one series rendered */ - // focus: { - // prox: 1e3, - // alpha: 0.3, - // dist: (u, seriesIdx) => { - // let prox = (hRect?.sidx === seriesIdx ? 0 : Infinity); - // return prox; - // }, - // }, + // focus: { + // prox: 1e3, + // alpha: 0.3, + // dist: (u, seriesIdx) => { + // let prox = (hRect?.sidx === seriesIdx ? 0 : Infinity); + // return prox; + // }, + // }, drag: { // Activates Zoom x: true, y: false @@ -550,26 +552,26 @@ { /* Facets: Define Purpose of Sub-Arrays in Series-Array, e.g. x, y, size, label, color, ... */ // facets: [ - // { - // scale: 'x', - // auto: true, - // }, - // { - // scale: 'y', - // auto: true, - // } - // ], + // { + // scale: 'x', + // auto: true, + // }, + // { + // scale: 'y', + // auto: true, + // } + // ], paths: drawPoints, values: legendValues } ], hooks: { // setSeries: [ (u, seriesIdx) => console.log('setSeries', seriesIdx) ], - // setLegend: [ u => console.log('setLegend', u.legend.idxs) ], + // setLegend: [ u => console.log('setLegend', u.legend.idxs) ], drawClear: [ (u) => { qt = qt || new Quadtree(0, 0, u.bbox.width, u.bbox.height); - qt.clear(); + qt.clear(); // force-clear the path cache to cause drawBars() to rebuild new quadtree u.series.forEach((s, i) => { @@ -675,11 +677,11 @@ u.ctx.fillText('Short', posX, posY) const start = posX + 10 for (let x = start; x < posXLimit; x += 10) { - let c = (x - start) / (posXLimit - start) - u.ctx.fillStyle = getRGB(c) - u.ctx.beginPath() - u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() + let c = (x - start) / (posXLimit - start) + u.ctx.fillStyle = getRGB(c) + u.ctx.beginPath() + u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() } u.ctx.fillStyle = 'black' u.ctx.fillText('Long', posXLimit + 23, posY) @@ -687,13 +689,13 @@ ], }, plugins: [ - tooltipPlugin({ - onclick(u, dataIdx) { - window.open(`/monitoring/job/${jobsData[dataIdx].id}`); - }, + tooltipPlugin({ + onclick(u, dataIdx) { + window.open(`/monitoring/job/${jobsData[dataIdx].id}`); + }, getJobData: (u, dataIdx) => { return jobsData[dataIdx] } - }), - ], + }), + ], }; uplot = new uPlot(opts, roofdata, plotWrapper); } else { @@ -716,6 +718,5 @@ {#if roofData != null}
{:else} - Cannot render roofline: No data! -{/if} \ No newline at end of file + Cannot render roofline: No data! +{/if} From 697acd1d8867a0b523864d9a8a42b746b297bbf7 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 18 Jul 2025 18:12:07 +0200 Subject: [PATCH 08/20] Extend bubbleRoofline for nodeData, add column to node table, rename nodeStats query --- api/schema.graphqls | 7 +- internal/graph/generated/generated.go | 214 +++++++++++++----- internal/graph/model/models_gen.go | 2 +- internal/graph/schema.resolvers.go | 22 +- .../migrations/sqlite3/10_node-table.up.sql | 1 + internal/repository/node.go | 23 +- .../generic/plots/NewBubbleRoofline.svelte | 180 ++++++++++++--- web/frontend/src/status/DevelDash.svelte | 197 +++++++++++++++- 8 files changed, 518 insertions(+), 128 deletions(-) diff --git a/api/schema.graphqls b/api/schema.graphqls index a95df84..b3dadb5 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -12,12 +12,13 @@ type Node { hostname: String! cluster: String! subCluster: String! + runningJobs: Int! nodeState: NodeState! - HealthState: MonitoringState! + healthState: MonitoringState! metaData: Any } -type NodeStats { +type NodeStates { state: String! count: Int! } @@ -303,7 +304,7 @@ type Query { ## Node Queries New node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! - nodeStats(filter: [NodeFilter!]): [NodeStats!]! + nodeStates(filter: [NodeFilter!]): [NodeStates!]! job(id: ID!): Job jobMetrics( diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index b150423..a725802 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -276,6 +276,7 @@ type ComplexityRoot struct { ID func(childComplexity int) int MetaData func(childComplexity int) int NodeState func(childComplexity int) int + RunningJobs func(childComplexity int) int SubCluster func(childComplexity int) int } @@ -290,7 +291,7 @@ type ComplexityRoot struct { Items func(childComplexity int) int } - NodeStats struct { + NodeStates struct { Count func(childComplexity int) int State func(childComplexity int) int } @@ -318,7 +319,7 @@ type ComplexityRoot struct { Node func(childComplexity int, id string) int NodeMetrics func(childComplexity int, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int NodeMetricsList func(childComplexity int, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) int - NodeStats func(childComplexity int, filter []*model.NodeFilter) int + NodeStates func(childComplexity int, filter []*model.NodeFilter) int Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int ScopedJobStats func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int @@ -444,6 +445,7 @@ type MutationResolver interface { UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) } type NodeResolver interface { + RunningJobs(ctx context.Context, obj *schema.Node) (int, error) NodeState(ctx context.Context, obj *schema.Node) (string, error) HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error) MetaData(ctx context.Context, obj *schema.Node) (any, error) @@ -456,7 +458,7 @@ type QueryResolver interface { AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) Node(ctx context.Context, id string) (*schema.Node, error) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) - NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error) + NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) Job(ctx context.Context, id string) (*schema.Job, error) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error) @@ -1474,7 +1476,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.Node.Cluster(childComplexity), true - case "Node.HealthState": + case "Node.healthState": if e.complexity.Node.HealthState == nil { break } @@ -1509,6 +1511,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.Node.NodeState(childComplexity), true + case "Node.runningJobs": + if e.complexity.Node.RunningJobs == nil { + break + } + + return e.complexity.Node.RunningJobs(childComplexity), true + case "Node.subCluster": if e.complexity.Node.SubCluster == nil { break @@ -1551,19 +1560,19 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.NodeStateResultList.Items(childComplexity), true - case "NodeStats.count": - if e.complexity.NodeStats.Count == nil { + case "NodeStates.count": + if e.complexity.NodeStates.Count == nil { break } - return e.complexity.NodeStats.Count(childComplexity), true + return e.complexity.NodeStates.Count(childComplexity), true - case "NodeStats.state": - if e.complexity.NodeStats.State == nil { + case "NodeStates.state": + if e.complexity.NodeStates.State == nil { break } - return e.complexity.NodeStats.State(childComplexity), true + return e.complexity.NodeStates.State(childComplexity), true case "NodesResultList.count": if e.complexity.NodesResultList.Count == nil { @@ -1753,17 +1762,17 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int)), true - case "Query.nodeStats": - if e.complexity.Query.NodeStats == nil { + case "Query.nodeStates": + if e.complexity.Query.NodeStates == nil { break } - args, err := ec.field_Query_nodeStats_args(context.TODO(), rawArgs) + args, err := ec.field_Query_nodeStates_args(context.TODO(), rawArgs) if err != nil { return 0, false } - return e.complexity.Query.NodeStats(childComplexity, args["filter"].([]*model.NodeFilter)), true + return e.complexity.Query.NodeStates(childComplexity, args["filter"].([]*model.NodeFilter)), true case "Query.nodes": if e.complexity.Query.Nodes == nil { @@ -2333,12 +2342,13 @@ type Node { hostname: String! cluster: String! subCluster: String! + runningJobs: Int! nodeState: NodeState! - HealthState: MonitoringState! + healthState: MonitoringState! metaData: Any } -type NodeStats { +type NodeStates { state: String! count: Int! } @@ -2621,9 +2631,10 @@ type Query { user(username: String!): User allocatedNodes(cluster: String!): [Count!]! + ## Node Queries New node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! - nodeStats(filter: [NodeFilter!]): [NodeStats!]! + nodeStates(filter: [NodeFilter!]): [NodeStates!]! job(id: ID!): Job jobMetrics( @@ -2678,6 +2689,7 @@ type Query { from: Time! to: Time! ): [NodeMetrics!]! + nodeMetricsList( cluster: String! subCluster: String! @@ -4062,17 +4074,17 @@ func (ec *executionContext) field_Query_nodeMetrics_argsTo( return zeroVal, nil } -func (ec *executionContext) field_Query_nodeStats_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { +func (ec *executionContext) field_Query_nodeStates_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_nodeStats_argsFilter(ctx, rawArgs) + arg0, err := ec.field_Query_nodeStates_argsFilter(ctx, rawArgs) if err != nil { return nil, err } args["filter"] = arg0 return args, nil } -func (ec *executionContext) field_Query_nodeStats_argsFilter( +func (ec *executionContext) field_Query_nodeStates_argsFilter( ctx context.Context, rawArgs map[string]any, ) ([]*model.NodeFilter, error) { @@ -10985,6 +10997,50 @@ func (ec *executionContext) fieldContext_Node_subCluster(_ context.Context, fiel return fc, nil } +func (ec *executionContext) _Node_runningJobs(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_runningJobs(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return ec.resolvers.Node().RunningJobs(rctx, obj) + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(int) + fc.Result = res + return ec.marshalNInt2int(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Node_runningJobs(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Node", + Field: field, + IsMethod: true, + IsResolver: true, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Int does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _Node_nodeState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { fc, err := ec.fieldContext_Node_nodeState(ctx, field) if err != nil { @@ -11029,8 +11085,8 @@ func (ec *executionContext) fieldContext_Node_nodeState(_ context.Context, field return fc, nil } -func (ec *executionContext) _Node_HealthState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_Node_HealthState(ctx, field) +func (ec *executionContext) _Node_healthState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_healthState(ctx, field) if err != nil { return graphql.Null } @@ -11060,7 +11116,7 @@ func (ec *executionContext) _Node_HealthState(ctx context.Context, field graphql return ec.marshalNMonitoringState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐNodeState(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_Node_HealthState(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_Node_healthState(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "Node", Field: field, @@ -11301,10 +11357,12 @@ func (ec *executionContext) fieldContext_NodeStateResultList_items(_ context.Con return ec.fieldContext_Node_cluster(ctx, field) case "subCluster": return ec.fieldContext_Node_subCluster(ctx, field) + case "runningJobs": + return ec.fieldContext_Node_runningJobs(ctx, field) case "nodeState": return ec.fieldContext_Node_nodeState(ctx, field) - case "HealthState": - return ec.fieldContext_Node_HealthState(ctx, field) + case "healthState": + return ec.fieldContext_Node_healthState(ctx, field) case "metaData": return ec.fieldContext_Node_metaData(ctx, field) } @@ -11355,8 +11413,8 @@ func (ec *executionContext) fieldContext_NodeStateResultList_count(_ context.Con return fc, nil } -func (ec *executionContext) _NodeStats_state(ctx context.Context, field graphql.CollectedField, obj *model.NodeStats) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_NodeStats_state(ctx, field) +func (ec *executionContext) _NodeStates_state(ctx context.Context, field graphql.CollectedField, obj *model.NodeStates) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeStates_state(ctx, field) if err != nil { return graphql.Null } @@ -11386,9 +11444,9 @@ func (ec *executionContext) _NodeStats_state(ctx context.Context, field graphql. return ec.marshalNString2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_NodeStats_state(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_NodeStates_state(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ - Object: "NodeStats", + Object: "NodeStates", Field: field, IsMethod: false, IsResolver: false, @@ -11399,8 +11457,8 @@ func (ec *executionContext) fieldContext_NodeStats_state(_ context.Context, fiel return fc, nil } -func (ec *executionContext) _NodeStats_count(ctx context.Context, field graphql.CollectedField, obj *model.NodeStats) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_NodeStats_count(ctx, field) +func (ec *executionContext) _NodeStates_count(ctx context.Context, field graphql.CollectedField, obj *model.NodeStates) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeStates_count(ctx, field) if err != nil { return graphql.Null } @@ -11430,9 +11488,9 @@ func (ec *executionContext) _NodeStats_count(ctx context.Context, field graphql. return ec.marshalNInt2int(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_NodeStats_count(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_NodeStates_count(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ - Object: "NodeStats", + Object: "NodeStates", Field: field, IsMethod: false, IsResolver: false, @@ -12027,10 +12085,12 @@ func (ec *executionContext) fieldContext_Query_node(ctx context.Context, field g return ec.fieldContext_Node_cluster(ctx, field) case "subCluster": return ec.fieldContext_Node_subCluster(ctx, field) + case "runningJobs": + return ec.fieldContext_Node_runningJobs(ctx, field) case "nodeState": return ec.fieldContext_Node_nodeState(ctx, field) - case "HealthState": - return ec.fieldContext_Node_HealthState(ctx, field) + case "healthState": + return ec.fieldContext_Node_healthState(ctx, field) case "metaData": return ec.fieldContext_Node_metaData(ctx, field) } @@ -12112,8 +12172,8 @@ func (ec *executionContext) fieldContext_Query_nodes(ctx context.Context, field return fc, nil } -func (ec *executionContext) _Query_nodeStats(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_Query_nodeStats(ctx, field) +func (ec *executionContext) _Query_nodeStates(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Query_nodeStates(ctx, field) if err != nil { return graphql.Null } @@ -12126,7 +12186,7 @@ func (ec *executionContext) _Query_nodeStats(ctx context.Context, field graphql. }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { ctx = rctx // use context from middleware stack in children - return ec.resolvers.Query().NodeStats(rctx, fc.Args["filter"].([]*model.NodeFilter)) + return ec.resolvers.Query().NodeStates(rctx, fc.Args["filter"].([]*model.NodeFilter)) }) if err != nil { ec.Error(ctx, err) @@ -12138,12 +12198,12 @@ func (ec *executionContext) _Query_nodeStats(ctx context.Context, field graphql. } return graphql.Null } - res := resTmp.([]*model.NodeStats) + res := resTmp.([]*model.NodeStates) fc.Result = res - return ec.marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatsᚄ(ctx, field.Selections, res) + return ec.marshalNNodeStates2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesᚄ(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_Query_nodeStats(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_Query_nodeStates(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "Query", Field: field, @@ -12152,11 +12212,11 @@ func (ec *executionContext) fieldContext_Query_nodeStats(ctx context.Context, fi Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { switch field.Name { case "state": - return ec.fieldContext_NodeStats_state(ctx, field) + return ec.fieldContext_NodeStates_state(ctx, field) case "count": - return ec.fieldContext_NodeStats_count(ctx, field) + return ec.fieldContext_NodeStates_count(ctx, field) } - return nil, fmt.Errorf("no field named %q was found under type NodeStats", field.Name) + return nil, fmt.Errorf("no field named %q was found under type NodeStates", field.Name) }, } defer func() { @@ -12166,7 +12226,7 @@ func (ec *executionContext) fieldContext_Query_nodeStats(ctx context.Context, fi } }() ctx = graphql.WithFieldContext(ctx, fc) - if fc.Args, err = ec.field_Query_nodeStats_args(ctx, field.ArgumentMap(ec.Variables)); err != nil { + if fc.Args, err = ec.field_Query_nodeStates_args(ctx, field.ArgumentMap(ec.Variables)); err != nil { ec.Error(ctx, err) return fc, err } @@ -19829,6 +19889,42 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj if out.Values[i] == graphql.Null { atomic.AddUint32(&out.Invalids, 1) } + case "runningJobs": + field := field + + innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + } + }() + res = ec._Node_runningJobs(ctx, field, obj) + if res == graphql.Null { + atomic.AddUint32(&fs.Invalids, 1) + } + return res + } + + if field.Deferrable != nil { + dfs, ok := deferred[field.Deferrable.Label] + di := 0 + if ok { + dfs.AddField(field) + di = len(dfs.Values) - 1 + } else { + dfs = graphql.NewFieldSet([]graphql.CollectedField{field}) + deferred[field.Deferrable.Label] = dfs + } + dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler { + return innerFunc(ctx, dfs) + }) + + // don't run the out.Concurrently() call below + out.Values[i] = graphql.Null + continue + } + + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) case "nodeState": field := field @@ -19865,7 +19961,7 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj } out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) - case "HealthState": + case "healthState": field := field innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { @@ -19874,7 +19970,7 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj ec.Error(ctx, ec.Recover(ctx, r)) } }() - res = ec._Node_HealthState(ctx, field, obj) + res = ec._Node_healthState(ctx, field, obj) if res == graphql.Null { atomic.AddUint32(&fs.Invalids, 1) } @@ -20047,24 +20143,24 @@ func (ec *executionContext) _NodeStateResultList(ctx context.Context, sel ast.Se return out } -var nodeStatsImplementors = []string{"NodeStats"} +var nodeStatesImplementors = []string{"NodeStates"} -func (ec *executionContext) _NodeStats(ctx context.Context, sel ast.SelectionSet, obj *model.NodeStats) graphql.Marshaler { - fields := graphql.CollectFields(ec.OperationContext, sel, nodeStatsImplementors) +func (ec *executionContext) _NodeStates(ctx context.Context, sel ast.SelectionSet, obj *model.NodeStates) graphql.Marshaler { + fields := graphql.CollectFields(ec.OperationContext, sel, nodeStatesImplementors) out := graphql.NewFieldSet(fields) deferred := make(map[string]*graphql.FieldSet) for i, field := range fields { switch field.Name { case "__typename": - out.Values[i] = graphql.MarshalString("NodeStats") + out.Values[i] = graphql.MarshalString("NodeStates") case "state": - out.Values[i] = ec._NodeStats_state(ctx, field, obj) + out.Values[i] = ec._NodeStates_state(ctx, field, obj) if out.Values[i] == graphql.Null { out.Invalids++ } case "count": - out.Values[i] = ec._NodeStats_count(ctx, field, obj) + out.Values[i] = ec._NodeStates_count(ctx, field, obj) if out.Values[i] == graphql.Null { out.Invalids++ } @@ -20307,7 +20403,7 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr } out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) }) - case "nodeStats": + case "nodeStates": field := field innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { @@ -20316,7 +20412,7 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr ec.Error(ctx, ec.Recover(ctx, r)) } }() - res = ec._Query_nodeStats(ctx, field) + res = ec._Query_nodeStates(ctx, field) if res == graphql.Null { atomic.AddUint32(&fs.Invalids, 1) } @@ -22961,7 +23057,7 @@ func (ec *executionContext) marshalNNodeStateResultList2ᚖgithubᚗcomᚋCluste return ec._NodeStateResultList(ctx, sel, v) } -func (ec *executionContext) marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatsᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.NodeStats) graphql.Marshaler { +func (ec *executionContext) marshalNNodeStates2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.NodeStates) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -22985,7 +23081,7 @@ func (ec *executionContext) marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpi if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNNodeStats2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStats(ctx, sel, v[i]) + ret[i] = ec.marshalNNodeStates2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStates(ctx, sel, v[i]) } if isLen1 { f(i) @@ -23005,14 +23101,14 @@ func (ec *executionContext) marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpi return ret } -func (ec *executionContext) marshalNNodeStats2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStats(ctx context.Context, sel ast.SelectionSet, v *model.NodeStats) graphql.Marshaler { +func (ec *executionContext) marshalNNodeStates2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStates(ctx context.Context, sel ast.SelectionSet, v *model.NodeStates) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { ec.Errorf(ctx, "the requested element is null which the schema does not allow") } return graphql.Null } - return ec._NodeStats(ctx, sel, v) + return ec._NodeStates(ctx, sel, v) } func (ec *executionContext) marshalNNodesResultList2githubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodesResultList(ctx context.Context, sel ast.SelectionSet, v model.NodesResultList) graphql.Marshaler { diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index c5cc79b..e6619b7 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -186,7 +186,7 @@ type NodeStateResultList struct { Count *int `json:"count,omitempty"` } -type NodeStats struct { +type NodeStates struct { State string `json:"state"` Count int `json:"count"` } diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 1284c09..e0a7948 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -305,14 +305,20 @@ func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, return nil, nil } -// NodeState is the resolver for the nodeState field. -func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) { - panic(fmt.Errorf("not implemented: NodeState - nodeState")) +// RunningJobs is the resolver for the runningJobs field. +func (r *nodeResolver) RunningJobs(ctx context.Context, obj *schema.Node) (int, error) { + panic(fmt.Errorf("not implemented: RunningJobs - runningJobs")) } -// HealthState is the resolver for the HealthState field. +// NodeState is the resolver for the nodeState field. +func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) { + return string(obj.NodeState), nil +} + +// HealthState is the resolver for the healthState field. func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error) { - panic(fmt.Errorf("not implemented: HealthState - HealthState")) + // FIXME: Why is Output of schema.NodeState Type? + panic(fmt.Errorf("not implemented: HealthState - healthState")) } // MetaData is the resolver for the metaData field. @@ -378,8 +384,8 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o return &model.NodeStateResultList{Items: nodes, Count: &count}, err } -// NodeStats is the resolver for the nodeStats field. -func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error) { +// NodeStates is the resolver for the nodeStates field. +func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) { repo := repository.GetNodeRepository() stateCounts, serr := repo.CountNodeStates(ctx, filter) @@ -394,7 +400,7 @@ func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilte return nil, herr } - allCounts := make([]*model.NodeStats, 0) + allCounts := make([]*model.NodeStates, 0) allCounts = append(stateCounts, healthCounts...) return allCounts, nil diff --git a/internal/repository/migrations/sqlite3/10_node-table.up.sql b/internal/repository/migrations/sqlite3/10_node-table.up.sql index 52e6a05..1211ba9 100644 --- a/internal/repository/migrations/sqlite3/10_node-table.up.sql +++ b/internal/repository/migrations/sqlite3/10_node-table.up.sql @@ -3,6 +3,7 @@ CREATE TABLE "node" ( hostname VARCHAR(255) NOT NULL, cluster VARCHAR(255) NOT NULL, subcluster VARCHAR(255) NOT NULL, + jobs_running INTEGER DEFAULT 0 NOT NULL, cpus_allocated INTEGER DEFAULT 0 NOT NULL, cpus_total INTEGER DEFAULT 0 NOT NULL, memory_allocated INTEGER DEFAULT 0 NOT NULL, diff --git a/internal/repository/node.go b/internal/repository/node.go index b4d0181..277c1c5 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -50,8 +50,9 @@ func GetNodeRepository() *NodeRepository { } var nodeColumns []string = []string{ - "node.id", "node.hostname", "node.cluster", "node.subcluster", - "node.node_state", "node.health_state", "node.meta_data", + // "node.id," + "node.hostname", "node.cluster", "node.subcluster", + "node.node_state", "node.health_state", // "node.meta_data", } func (r *NodeRepository) FetchMetadata(node *schema.Node) (map[string]string, error) { @@ -223,7 +224,7 @@ func (r *NodeRepository) DeleteNode(id int64) error { func (r *NodeRepository) QueryNodes( ctx context.Context, filters []*model.NodeFilter, - order *model.OrderByInput, + order *model.OrderByInput, // Currently unused! ) ([]*schema.Node, error) { query, qerr := AccessCheck(ctx, sq.Select(nodeColumns...).From("node")) if qerr != nil { @@ -296,7 +297,7 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) { return nodeList, nil } -func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStats, error) { +func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) { query, qerr := AccessCheck(ctx, sq.Select("node_state AS state", "count(*) AS count").From("node")) if qerr != nil { return nil, qerr @@ -327,13 +328,13 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N return nil, err } - nodes := make([]*model.NodeStats, 0) + nodes := make([]*model.NodeStates, 0) for rows.Next() { - node := model.NodeStats{} + node := model.NodeStates{} if err := rows.Scan(&node.State, &node.Count); err != nil { rows.Close() - cclog.Warn("Error while scanning rows (NodeStats)") + cclog.Warn("Error while scanning rows (NodeStates)") return nil, err } nodes = append(nodes, &node) @@ -342,7 +343,7 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N return nodes, nil } -func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStats, error) { +func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) { query, qerr := AccessCheck(ctx, sq.Select("health_state AS state", "count(*) AS count").From("node")) if qerr != nil { return nil, qerr @@ -373,13 +374,13 @@ func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model return nil, err } - nodes := make([]*model.NodeStats, 0) + nodes := make([]*model.NodeStates, 0) for rows.Next() { - node := model.NodeStats{} + node := model.NodeStates{} if err := rows.Scan(&node.State, &node.Count); err != nil { rows.Close() - cclog.Warn("Error while scanning rows (NodeStats)") + cclog.Warn("Error while scanning rows (NodeStates)") return nil, err } nodes = append(nodes, &node) diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte index 4415791..bf25347 100644 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -31,8 +31,10 @@ let { roofData = null, jobsData = null, - allowSizeChange = false, + nodesData = null, + cluster = null, subCluster = null, + allowSizeChange = false, width = 600, height = 380, } = $props(); @@ -264,16 +266,43 @@ let filtTop = u.posToVal(-maxSize / 2, scaleY.key); for (let i = 0; i < d[0].length; i++) { - // Color based on Duration, check index for transparency highlighting - u.ctx.strokeStyle = getRGB(u.data[2][i]); - u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill); - + // Jobs: Color based on Duration + if (jobsData) { + u.ctx.strokeStyle = getRGB(u.data[2][i]); + u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill); + // Nodes: Color based on Idle vs. Allocated + } else if (nodesData) { + // console.log('In Plot Handler NodesData', nodesData) + if (nodesData[i]?.nodeState == "idle") { + u.ctx.strokeStyle = "rgb(0, 0, 255)"; + u.ctx.fillStyle = "rgba(0, 0, 255, 0.5)"; + } else if (nodesData[i]?.nodeState == "allocated") { + u.ctx.strokeStyle = "rgb(0, 255, 0)"; + u.ctx.fillStyle = "rgba(0, 255, 0, 0.5)"; + } else if (nodesData[i]?.nodeState == "notindb") { + u.ctx.strokeStyle = "rgb(0, 0, 0)"; + u.ctx.fillStyle = "rgba(0, 0, 0, 0.5)"; + } else { // Fallback: All other DEFINED states + u.ctx.strokeStyle = "rgb(255, 0, 0)"; + u.ctx.fillStyle = "rgba(255, 0, 0, 0.5)"; + } + } + // Get Values let xVal = d[0][i]; let yVal = d[1][i]; - // Calc Size; Alt.: size = sizes[i] * pxRatio - const size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes); // In NodeMode: Scale with Number of Jobs? + // Calc Size; Alt.: size = sizes[i] * pxRatio + let size = 1; + + // Jobs: Size based on Resourcecount + if (jobsData) { + size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes) + // Nodes: Size based on Jobcount + } else if (nodesData) { + size = sizeBase + nodesData[i]?.numJobs + }; + if (xVal >= filtLft && xVal <= filtRgt && yVal >= filtBtm && yVal <= filtTop) { let cx = valToPosX(xVal, scaleX, xDim, xOff); let cy = valToPosY(yVal, scaleY, yDim, yOff); @@ -338,7 +367,7 @@ }; // Tooltip Plugin - function tooltipPlugin({onclick, getJobData, shiftX = 10, shiftY = 10}) { + function tooltipPlugin({onclick, getLegendData, shiftX = 10, shiftY = 10}) { let tooltipLeftOffset = 0; let tooltipTopOffset = 0; @@ -388,11 +417,34 @@ tooltip.style.top = (tooltipTopOffset + top + shiftX) + "px"; tooltip.style.left = (tooltipLeftOffset + lft + shiftY) + "px"; - tooltip.style.borderColor = getRGB(u.data[2][i]); - tooltip.textContent = ( - // Tooltip Content as String - `Job ID: ${getJobData(u, i).jobId}\nNodes: ${getJobData(u, i).numNodes}${getJobData(u, i)?.numAcc?`\nAccelerators: ${getJobData(u, i).numAcc}`:''}` - ); + + // Jobs: Color based on Duration + if (jobsData) { + tooltip.style.borderColor = getRGB(u.data[2][i]); + // Nodes: Color based on Idle vs. Allocated + } else if (nodesData) { + if (nodesData[i]?.nodeState == "idle") { + tooltip.style.borderColor = "rgb(0, 0, 255)"; + } else if (nodesData[i]?.nodeState == "allocated") { + tooltip.style.borderColor = "rgb(0, 255, 0)"; + } else if (nodesData[i]?.nodeState == "notindb") { // Missing from DB table + tooltip.style.borderColor = "rgb(0, 0, 0)"; + } else { // Fallback: All other DEFINED states + tooltip.style.borderColor = "rgb(255, 0, 0)"; + } + } + + if (jobsData) { + tooltip.textContent = ( + // Tooltip Content as String for Job + `Job ID: ${getLegendData(u, i).jobId}\nNodes: ${getLegendData(u, i).numNodes}${getLegendData(u, i)?.numAcc?`\nAccelerators: ${getLegendData(u, i).numAcc}`:''}` + ); + } else if (nodesData) { + tooltip.textContent = ( + // Tooltip Content as String for Node + `Host: ${getLegendData(u, i).nodeName}\nState: ${getLegendData(u, i).nodeState}\nJobs: ${getLegendData(u, i).numJobs}` + ); + } } return { @@ -444,14 +496,18 @@ timeoutId = setTimeout(() => { timeoutId = null; if (uplot) uplot.destroy(); - render(roofData, jobsData); + render(roofData, jobsData, nodesData); }, 200); } - function render(roofdata, jobsData) { - if (roofdata) { + function render(roofData, jobsData, nodesData) { + let plotTitle = "CPU Roofline Diagram"; + if (jobsData) plotTitle = "Job Average Roofline Diagram"; + if (nodesData) plotTitle = "Node Average Roofline Diagram"; + + if (roofData) { const opts = { - title: "Job Average Roofline Diagram", + title: plotTitle, mode: 2, width: width, height: height, @@ -669,35 +725,87 @@ u.ctx.lineWidth = 0.15; } - // The Color Scale For Time Information - const posX = u.valToPos(0.1, "x", true) - const posXLimit = u.valToPos(100, "x", true) - const posY = u.valToPos(14000.0, "y", true) - u.ctx.fillStyle = 'black' - u.ctx.fillText('Short', posX, posY) - const start = posX + 10 - for (let x = start; x < posXLimit; x += 10) { - let c = (x - start) / (posXLimit - start) - u.ctx.fillStyle = getRGB(c) - u.ctx.beginPath() - u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() + // Jobs: The Color Scale For Time Information + if (jobsData) { + const posX = u.valToPos(0.1, "x", true) + const posXLimit = u.valToPos(100, "x", true) + const posY = u.valToPos(14000.0, "y", true) + u.ctx.fillStyle = 'black' + u.ctx.fillText('Short', posX, posY) + const start = posX + 10 + for (let x = start; x < posXLimit; x += 10) { + let c = (x - start) / (posXLimit - start) + u.ctx.fillStyle = getRGB(c) + u.ctx.beginPath() + u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + } + u.ctx.fillStyle = 'black' + u.ctx.fillText('Long', posXLimit + 23, posY) + } + + // Nodes: The Colors Of NodeStates (Just 3) + if (nodesData) { + const posY = u.valToPos(14000.0, "y", true) + + const posAllocDot = u.valToPos(0.1, "x", true) + const posAllocText = posAllocDot + 60 + u.ctx.fillStyle = "rgb(0, 255, 0)" + u.ctx.beginPath() + u.ctx.arc(posAllocDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Allocated', posAllocText, posY) + + const posIdleDot = posAllocDot + 150 + const posIdleText = posAllocText + 120 + u.ctx.fillStyle = "rgb(0, 0, 255)" + u.ctx.beginPath() + u.ctx.arc(posIdleDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Idle', posIdleText, posY) + + const posOtherDot = posIdleDot + 150 + const posOtherText = posIdleText + 160 + u.ctx.fillStyle = "rgb(255, 0, 0)" + u.ctx.beginPath() + u.ctx.arc(posOtherDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Other', posOtherText, posY) + + const posMissingDot = posOtherDot + 150 + const posMissingText = posOtherText + 190 + u.ctx.fillStyle = 'black' + u.ctx.beginPath() + u.ctx.arc(posMissingDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillText('Missing in DB', posMissingText, posY) } - u.ctx.fillStyle = 'black' - u.ctx.fillText('Long', posXLimit + 23, posY) }, ], }, plugins: [ tooltipPlugin({ onclick(u, dataIdx) { - window.open(`/monitoring/job/${jobsData[dataIdx].id}`); + if (jobsData) { + window.open(`/monitoring/job/${jobsData[dataIdx].id}`) + } else if (nodesData) { + window.open(`/monitoring/node/${cluster}/${nodesData[dataIdx].nodeName}`) + } }, - getJobData: (u, dataIdx) => { return jobsData[dataIdx] } + getLegendData: (u, dataIdx) => { + if (jobsData) { + return jobsData[dataIdx] + } else if (nodesData) { + return nodesData[dataIdx] + } + } }), ], }; - uplot = new uPlot(opts, roofdata, plotWrapper); + uplot = new uPlot(opts, roofData, plotWrapper); } else { // console.log("No data for roofline!"); } @@ -705,7 +813,7 @@ /* On Mount */ onMount(() => { - render(roofData, jobsData); + render(roofData, jobsData, nodesData); }); /* On Destroy */ diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index ea00803..5ac1326 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -31,9 +31,11 @@ const client = getContextClient(); /* State Init */ - // let from = $state(new Date(Date.now() - 5 * 60 * 1000)); - // let to = $state(new Date(Date.now())); + let from = $state(new Date(Date.now() - 5 * 60 * 1000)); + let to = $state(new Date(Date.now())); let plotWidths = $state([]); + let nodesCounts = $state({}); + let jobsJounts = $state({}); /* Derived */ // Note: nodeMetrics are requested on configured $timestep resolution @@ -64,6 +66,123 @@ }, })); + // Optimal new query, does not exist + // const nodeRoofQuery = $derived(queryStore({ + // client: client, + // query: gql` + // query ($filter: [JobFilter!]!, $metrics: [String!]!) { + // nodeRoofline(filter: $filter, metrics: $metrics) { + // nodeName + // nodeState + // numJobs + // stats { + // name + // data { + // avg + // } + // } + // } + // } + // `, + // variables: { + // filter: [{ state: ["running"] }, { cluster: { eq: cluster } }], + // metrics: ["flops_any", "mem_bw"], // Fixed names for job roofline + // }, + // })); + + // Load Required Roofline Data Averages for all nodes of cluster: use for node avg data and name, use secondary (new?) querie(s) for slurmstate and numjobs + const nodesData = $derived(queryStore({ + client: client, + query: gql` + query ($cluster: String!, $metrics: [String!], $from: Time!, $to: Time!) { + nodeMetrics( + cluster: $cluster + metrics: $metrics + from: $from + to: $to + ) { + host + subCluster + metrics { + name + metric { + series { + statistics { + avg + } + } + } + } + } + } + `, + variables: { + cluster: cluster, + metrics: ["flops_any", "mem_bw"], + from: from, + to: to, + }, + })); + + // Load for jobcount per node only -- might me required for total running jobs anyways in parent component! + // Also, think about extra query with only TotalJobCount and Items [Resources, ...some meta infos], not including metric data + const paging = { itemsPerPage: 1500, page: 1 }; + const sorting = { field: "startTime", type: "col", order: "DESC" }; + const filter = [ + { cluster: { eq: cluster } }, + { state: ["running"] }, + ]; + const nodeJobsQuery = gql` + query ( + $filter: [JobFilter!]! + $sorting: OrderByInput! + $paging: PageRequest! + ) { + jobs(filter: $filter, order: $sorting, page: $paging) { + items { + jobId + resources { + hostname + } + } + count + } + } + `; + + const nodesJobs = $derived(queryStore({ + client: client, + query: nodeJobsQuery, + variables: { paging, sorting, filter }, + }) + ); + + // Last required query: Node State + const nodesState = $derived(queryStore({ + client: client, + query: gql` + query ( + $filter: [NodeFilter!] + $sorting: OrderByInput + ) { + nodes(filter: $filter, order: $sorting) { + count + items { + hostname + cluster + subCluster + nodeState + } + } + } + `, + variables: { + filter: { cluster: { eq: cluster }}, + sorting: sorting // Unused in Backend: Use Placeholder + // Subcluster filter? + }, + })); + /* Function */ function transformJobsStatsToData(subclusterData) { /* c will contain values from 0 to 1 representing the duration */ @@ -90,7 +209,7 @@ else c.push(d) } } else { - console.warn("transformData: metrics for 'mem_bw' and/or 'flops_any' missing!") + console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!") } if (x.length > 0 && y.length > 0 && c.length > 0) { @@ -99,15 +218,69 @@ return data } + function transformNodesStatsToData(subclusterData) { + let data = null + const x = [], y = [] + + if (subclusterData) { + for (let i = 0; i < subclusterData.length; i++) { + const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any") + const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw") + + const f = flopsData.metric.series[0].statistics.avg + const m = memBwData.metric.series[0].statistics.avg + + let intensity = f / m + if (Number.isNaN(intensity) || !Number.isFinite(intensity)) { + // continue // Old: Introduces mismatch between Data and Info Arrays + intensity = 0.0 // New: Set to Float Zero: Will not show in Log-Plot (Always below render limit) + } + + x.push(intensity) + y.push(f) + } + } else { + // console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!") + } + + if (x.length > 0 && y.length > 0) { + data = [null, [x, y]] // for dataformat see roofline.svelte + } + return data + } + function transformJobsStatsToInfo(subclusterData) { if (subclusterData) { return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0} }) } else { - console.warn("transformData: jobInfo missing!") + console.warn("transformJobsStatsToInfo: jobInfo missing!") return [] } } + function transformNodesStatsToInfo(subClusterData) { + let result = []; + if (subClusterData && $nodesState?.data) { + // Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API! + for (let j = 0; j < subClusterData.length; j++) { + // nodesCounts[subClusterData[i].subCluster] = $nodesState.data.nodes.count; // Probably better as own derived! + + const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown" + const nodeMatch = $nodesState.data.nodes.items.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster); + const nodeState = nodeMatch?.nodeState ? nodeMatch.nodeState : "notindb" + let numJobs = 0 + + if ($nodesJobs?.data) { + const nodeJobs = $nodesJobs.data.jobs.items.filter((job) => job.resources.find((res) => res.hostname == nodeName)) + numJobs = nodeJobs?.length ? nodeJobs.length : 0 + } + + result.push({nodeName: nodeName, nodeState: nodeState, numJobs: numJobs}) + }; + }; + return result + } + @@ -115,19 +288,23 @@ {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} - Classic + Bubble Node
- {#key $jobRoofQuery.data.jobsMetricStats} + {#key $nodesData?.data?.nodeMetrics || $nodesJobs?.data?.jobs} {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( (data) => data.subCluster == subCluster.name, ).length} Jobs - data.subCluster == subCluster.name, + ) + )} + nodesData={transformNodesStatsToInfo($nodesData?.data?.nodeMetrics.filter( (data) => data.subCluster == subCluster.name, ) )} @@ -136,7 +313,7 @@
- Bubble + Bubble Jobs
{#key $jobRoofQuery.data.jobsMetricStats} {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( From 35c0b0be58599abc88903adafe87fe1fcf77bb1d Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 21 Jul 2025 16:03:07 +0200 Subject: [PATCH 09/20] add scheduler and health status pie charts --- internal/repository/node.go | 6 ++ web/frontend/src/status/DevelDash.svelte | 127 ++++++++++++++++++++++- 2 files changed, 130 insertions(+), 3 deletions(-) diff --git a/internal/repository/node.go b/internal/repository/node.go index 277c1c5..d7db2f4 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -321,6 +321,9 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N } } + // Add Group and Order + query = query.GroupBy("state").OrderBy("count DESC") + rows, err := query.RunWith(r.stmtCache).Query() if err != nil { queryString, queryVars, _ := query.ToSql() @@ -367,6 +370,9 @@ func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model } } + // Add Group and Order + query = query.GroupBy("state").OrderBy("count DESC") + rows, err := query.RunWith(r.stmtCache).Query() if err != nil { queryString, queryVars, _ := query.ToSql() diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index 5ac1326..f37976b 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -9,6 +9,8 @@ import { Row, Col, + Table, + Icon } from "@sveltestrap/sveltestrap"; import { queryStore, @@ -18,8 +20,9 @@ import { init, } from "../generic/utils.js"; - import Roofline from "../generic/plots/Roofline.svelte"; + //import Roofline from "../generic/plots/Roofline.svelte"; import NewBubbleRoofline from "../generic/plots/NewBubbleRoofline.svelte"; + import Pie, { colors } from "../generic/plots/Pie.svelte"; /* Svelte 5 Props */ let { @@ -34,8 +37,10 @@ let from = $state(new Date(Date.now() - 5 * 60 * 1000)); let to = $state(new Date(Date.now())); let plotWidths = $state([]); - let nodesCounts = $state({}); - let jobsJounts = $state({}); + let statesWidth = $state(0); + let healthWidth = $state(0); + // let nodesCounts = $state({}); + // let jobsJounts = $state({}); /* Derived */ // Note: nodeMetrics are requested on configured $timestep resolution @@ -183,6 +188,33 @@ }, })); + // Accumulated NodeStates for Piecharts + const nodesStateCounts = $derived(queryStore({ + client: client, + query: gql` + query ($filter: [NodeFilter!]) { + nodeStates(filter: $filter) { + state + count + } + } + `, + variables: { + filter: { cluster: { eq: cluster }} + }, + })); + + $inspect($nodesStateCounts?.data?.nodeStates) + + const refinedStateData = $derived.by(() => { + return $nodesStateCounts?.data?.nodeStates.filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)) + }); + + const refinedHealthData = $derived.by(() => { + return $nodesStateCounts?.data?.nodeStates.filter((e) => ['full', 'partial', 'failed'].includes(e.state)) + }); + + /* Function */ function transformJobsStatsToData(subclusterData) { /* c will contain values from 0 to 1 representing the duration */ @@ -339,3 +371,92 @@ {/each} {/if} + +
+
+ +{#if $initq.data && $nodesStateCounts.data} + + + Node State +
+ {#key refinedStateData} + Total: {refinedStateData.reduce((sum, item) => { + return sum + item.count; + }, 0)} Nodes + + sd.count, + )} + entities={refinedStateData.map( + (sd) => sd.state, + )} + /> + {/key} +
+ + + {#key refinedStateData} + + + + + + + {#each refinedStateData as sd, i} + + + + + + {/each} +
LegendCurrent State#Nodes
{sd.state}{sd.count}
+ {/key} + + + + Node Health +
+ {#key refinedHealthData} + Total: {refinedStateData.reduce((sum, item) => { + return sum + item.count; + }, 0)} Nodes + + sd.count, + )} + entities={refinedHealthData.map( + (sd) => sd.state, + )} + /> + {/key} +
+ + + {#key refinedHealthData} + + + + + + + {#each refinedHealthData as hd, i} + + + + + + {/each} +
LegendCurrent Health#Nodes
{hd.state}{hd.count}
+ {/key} + +
+{/if} From 4d2c64b012c7ecf3c08df86258012e981c0e5be1 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 23 Jul 2025 15:00:10 +0200 Subject: [PATCH 10/20] remove logging --- web/frontend/src/status/DevelDash.svelte | 2 -- 1 file changed, 2 deletions(-) diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index f37976b..8cd4627 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -204,8 +204,6 @@ }, })); - $inspect($nodesStateCounts?.data?.nodeStates) - const refinedStateData = $derived.by(() => { return $nodesStateCounts?.data?.nodeStates.filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)) }); From 98b9f8e62deed8efb500d81bf439285edbcb956c Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 4 Aug 2025 14:50:53 +0200 Subject: [PATCH 11/20] Add more information to status dash --- api/schema.graphqls | 7 ++- internal/graph/generated/generated.go | 64 ++++++++++++++++++++++- internal/graph/model/models_gen.go | 15 ++++-- internal/graph/schema.resolvers.go | 2 +- internal/repository/stats.go | 42 ++++++++++----- web/frontend/src/status/StatusDash.svelte | 64 ++++++++++++++++++++++- 6 files changed, 169 insertions(+), 25 deletions(-) diff --git a/api/schema.graphqls b/api/schema.graphqls index b3dadb5..d1c78f3 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -237,10 +237,12 @@ enum Aggregate { USER PROJECT CLUSTER + SUBCLUSTER } enum SortByAggregate { TOTALWALLTIME TOTALJOBS + TOTALUSERS TOTALNODES TOTALNODEHOURS TOTALCORES @@ -501,11 +503,12 @@ type MetricHistoPoint { } type JobsStatistics { - id: ID! # If `groupBy` was used, ID of the user/project/cluster + id: ID! # If `groupBy` was used, ID of the user/project/cluster/subcluster name: String! # if User-Statistics: Given Name of Account (ID) Owner + totalUsers: Int! # if *not* User-Statistics: Number of active users (based on running jobs) totalJobs: Int! # Number of jobs runningJobs: Int! # Number of running jobs - shortJobs: Int! # Number of jobs with a duration of less than duration + shortJobs: Int! # Number of jobs with a duration of less than config'd ShortRunningJobsDuration totalWalltime: Int! # Sum of the duration of all matched jobs in hours totalNodes: Int! # Sum of the nodes of all matched jobs totalNodeHours: Int! # Sum of the node hours of all matched jobs diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index a725802..ff4469a 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -201,6 +201,7 @@ type ComplexityRoot struct { TotalJobs func(childComplexity int) int TotalNodeHours func(childComplexity int) int TotalNodes func(childComplexity int) int + TotalUsers func(childComplexity int) int TotalWalltime func(childComplexity int) int } @@ -1166,6 +1167,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.JobsStatistics.TotalNodes(childComplexity), true + case "JobsStatistics.totalUsers": + if e.complexity.JobsStatistics.TotalUsers == nil { + break + } + + return e.complexity.JobsStatistics.TotalUsers(childComplexity), true + case "JobsStatistics.totalWalltime": if e.complexity.JobsStatistics.TotalWalltime == nil { break @@ -2567,10 +2575,12 @@ enum Aggregate { USER PROJECT CLUSTER + SUBCLUSTER } enum SortByAggregate { TOTALWALLTIME TOTALJOBS + TOTALUSERS TOTALNODES TOTALNODEHOURS TOTALCORES @@ -2831,8 +2841,9 @@ type MetricHistoPoint { } type JobsStatistics { - id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster + id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster/subcluster name: String! # if User-Statistics: Given Name of Account (ID) Owner + totalUsers: Int! # if *not* User-Statistics: Number of active users (based on running jobs) totalJobs: Int! # Number of jobs runningJobs: Int! # Number of running jobs shortJobs: Int! # Number of jobs with a duration of less than duration @@ -8334,6 +8345,50 @@ func (ec *executionContext) fieldContext_JobsStatistics_name(_ context.Context, return fc, nil } +func (ec *executionContext) _JobsStatistics_totalUsers(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_JobsStatistics_totalUsers(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.TotalUsers, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(int) + fc.Result = res + return ec.marshalNInt2int(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_JobsStatistics_totalUsers(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "JobsStatistics", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Int does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _JobsStatistics_totalJobs(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) { fc, err := ec.fieldContext_JobsStatistics_totalJobs(ctx, field) if err != nil { @@ -12636,6 +12691,8 @@ func (ec *executionContext) fieldContext_Query_jobsStatistics(ctx context.Contex return ec.fieldContext_JobsStatistics_id(ctx, field) case "name": return ec.fieldContext_JobsStatistics_name(ctx, field) + case "totalUsers": + return ec.fieldContext_JobsStatistics_totalUsers(ctx, field) case "totalJobs": return ec.fieldContext_JobsStatistics_totalJobs(ctx, field) case "runningJobs": @@ -19240,6 +19297,11 @@ func (ec *executionContext) _JobsStatistics(ctx context.Context, sel ast.Selecti if out.Values[i] == graphql.Null { out.Invalids++ } + case "totalUsers": + out.Values[i] = ec._JobsStatistics_totalUsers(ctx, field, obj) + if out.Values[i] == graphql.Null { + out.Invalids++ + } case "totalJobs": out.Values[i] = ec._JobsStatistics_totalJobs(ctx, field, obj) if out.Values[i] == graphql.Null { diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index e6619b7..a5fe2a2 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -112,6 +112,7 @@ type JobStats struct { type JobsStatistics struct { ID string `json:"id"` Name string `json:"name"` + TotalUsers int `json:"totalUsers"` TotalJobs int `json:"totalJobs"` RunningJobs int `json:"runningJobs"` ShortJobs int `json:"shortJobs"` @@ -247,20 +248,22 @@ type User struct { type Aggregate string const ( - AggregateUser Aggregate = "USER" - AggregateProject Aggregate = "PROJECT" - AggregateCluster Aggregate = "CLUSTER" + AggregateUser Aggregate = "USER" + AggregateProject Aggregate = "PROJECT" + AggregateCluster Aggregate = "CLUSTER" + AggregateSubcluster Aggregate = "SUBCLUSTER" ) var AllAggregate = []Aggregate{ AggregateUser, AggregateProject, AggregateCluster, + AggregateSubcluster, } func (e Aggregate) IsValid() bool { switch e { - case AggregateUser, AggregateProject, AggregateCluster: + case AggregateUser, AggregateProject, AggregateCluster, AggregateSubcluster: return true } return false @@ -292,6 +295,7 @@ type SortByAggregate string const ( SortByAggregateTotalwalltime SortByAggregate = "TOTALWALLTIME" SortByAggregateTotaljobs SortByAggregate = "TOTALJOBS" + SortByAggregateTotalusers SortByAggregate = "TOTALUSERS" SortByAggregateTotalnodes SortByAggregate = "TOTALNODES" SortByAggregateTotalnodehours SortByAggregate = "TOTALNODEHOURS" SortByAggregateTotalcores SortByAggregate = "TOTALCORES" @@ -303,6 +307,7 @@ const ( var AllSortByAggregate = []SortByAggregate{ SortByAggregateTotalwalltime, SortByAggregateTotaljobs, + SortByAggregateTotalusers, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, @@ -313,7 +318,7 @@ var AllSortByAggregate = []SortByAggregate{ func (e SortByAggregate) IsValid() bool { switch e { - case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours: + case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalusers, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours: return true } return false diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index e0a7948..b993ebb 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -581,7 +581,7 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF defaultDurationBins := "1h" defaultMetricBins := 10 - if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") || + if requireField(ctx, "totalJobs") || requireField(ctx, "totalUsers") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") || requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") { if groupBy == nil { stats, err = r.Repo.JobsStats(ctx, filter) diff --git a/internal/repository/stats.go b/internal/repository/stats.go index 7beb674..1aa3c55 100644 --- a/internal/repository/stats.go +++ b/internal/repository/stats.go @@ -21,13 +21,15 @@ import ( // GraphQL validation should make sure that no unkown values can be specified. var groupBy2column = map[model.Aggregate]string{ - model.AggregateUser: "job.hpc_user", - model.AggregateProject: "job.project", - model.AggregateCluster: "job.cluster", + model.AggregateUser: "job.hpc_user", + model.AggregateProject: "job.project", + model.AggregateCluster: "job.cluster", + model.AggregateSubcluster: "job.subcluster", } var sortBy2column = map[model.SortByAggregate]string{ model.SortByAggregateTotaljobs: "totalJobs", + model.SortByAggregateTotalusers: "totalUsers", model.SortByAggregateTotalwalltime: "totalWalltime", model.SortByAggregateTotalnodes: "totalNodes", model.SortByAggregateTotalnodehours: "totalNodeHours", @@ -76,8 +78,12 @@ func (r *JobRepository) buildStatsQuery( // fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType) if col != "" { - // Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours - query = sq.Select(col, "COUNT(job.id) as totalJobs", "name", + // Scan columns: id, name, totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours + query = sq.Select( + col, + "name", + "COUNT(job.id) as totalJobs", + "COUNT(DISTINCT job.hpc_user) AS totalUsers", fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType), fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType), fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType), @@ -87,8 +93,10 @@ func (r *JobRepository) buildStatsQuery( fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType), ).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col) } else { - // Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours - query = sq.Select("COUNT(job.id)", + // Scan columns: totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours + query = sq.Select( + "COUNT(job.id) as totalJobs", + "COUNT(DISTINCT job.hpc_user) AS totalUsers", fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType), fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType), fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType), @@ -167,14 +175,14 @@ func (r *JobRepository) JobsStatsGrouped( for rows.Next() { var id sql.NullString var name sql.NullString - var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64 - if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil { + var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64 + if err := rows.Scan(&id, &name, &jobs, &users, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil { cclog.Warn("Error while scanning rows") return nil, err } if id.Valid { - var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int + var totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int var personName string if name.Valid { @@ -185,6 +193,10 @@ func (r *JobRepository) JobsStatsGrouped( totalJobs = int(jobs.Int64) } + if users.Valid { + totalUsers = int(users.Int64) + } + if walltime.Valid { totalWalltime = int(walltime.Int64) } @@ -228,8 +240,9 @@ func (r *JobRepository) JobsStatsGrouped( stats = append(stats, &model.JobsStatistics{ ID: id.String, - TotalJobs: int(jobs.Int64), - TotalWalltime: int(walltime.Int64), + TotalJobs: totalJobs, + TotalUsers: totalUsers, + TotalWalltime: totalWalltime, TotalNodes: totalNodes, TotalNodeHours: totalNodeHours, TotalCores: totalCores, @@ -259,8 +272,8 @@ func (r *JobRepository) JobsStats( row := query.RunWith(r.DB).QueryRow() stats := make([]*model.JobsStatistics, 0, 1) - var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64 - if err := row.Scan(&jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil { + var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64 + if err := row.Scan(&jobs, &users, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil { cclog.Warn("Error while scanning rows") return nil, err } @@ -280,6 +293,7 @@ func (r *JobRepository) JobsStats( stats = append(stats, &model.JobsStatistics{ TotalJobs: int(jobs.Int64), + TotalUsers: int(users.Int64), TotalWalltime: int(walltime.Int64), TotalNodeHours: totalNodeHours, TotalCoreHours: totalCoreHours, diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index f98c1c3..a1196e5 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -45,12 +45,17 @@ let plotWidths = $state([]); // Bar Gauges let allocatedNodes = $state({}); + let allocatedAccs = $state({}); let flopRate = $state({}); let flopRateUnitPrefix = $state({}); let flopRateUnitBase = $state({}); let memBwRate = $state({}); let memBwRateUnitPrefix = $state({}); let memBwRateUnitBase = $state({}); + // Plain Infos + let runningJobs = $state({}); + let activeUsers = $state({}); + let totalAccs = $state({}); /* Derived */ // Note: nodeMetrics are requested on configured $timestep resolution @@ -63,6 +68,8 @@ $metrics: [String!] $from: Time! $to: Time! + $filter: [JobFilter!]! + $paging: PageRequest! ) { nodeMetrics( cluster: $cluster @@ -87,11 +94,23 @@ } } } - + # Only counts shared nodes once allocatedNodes(cluster: $cluster) { name count } + # totalNodes includes multiples if shared jobs + jobsStatistics( + filter: $filter + page: $paging + sortBy: TOTALJOBS + groupBy: SUBCLUSTER + ) { + id + totalJobs + totalUsers + totalAccs + } } `, variables: { @@ -99,7 +118,8 @@ metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars from: from.toISOString(), to: to.toISOString(), - // filter: [{ state: ["running"] }, { cluster: { eq: cluster } }], + filter: [{ state: ["running"] }, { cluster: { eq: cluster } }], + paging: { itemsPerPage: -1, page: 1 }, // Get all: -1 }, })); @@ -110,10 +130,27 @@ (c) => c.name == cluster, ).subClusters; for (let subCluster of subClusters) { + // Allocations allocatedNodes[subCluster.name] = $statusQuery.data.allocatedNodes.find( ({ name }) => name == subCluster.name, )?.count || 0; + allocatedAccs[subCluster.name] = + $statusQuery.data.jobsStatistics.find( + ({ id }) => id == subCluster.name, + )?.totalAccs || 0; + // Infos + activeUsers[subCluster.name] = + $statusQuery.data.jobsStatistics.find( + ({ id }) => id == subCluster.name, + )?.totalUsers || 0; + runningJobs[subCluster.name] = + $statusQuery.data.jobsStatistics.find( + ({ id }) => id == subCluster.name, + )?.totalJobs || 0; + totalAccs[subCluster.name] = + (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || null; + // Keymetrics flopRate[subCluster.name] = Math.floor( sumUp($statusQuery.data.nodeMetrics, subCluster.name, "flops_any") * @@ -158,9 +195,15 @@ SubCluster "{subCluster.name}" + {subCluster.processorType} + + + + +
+ {#if totalAccs[subCluster.name] !== null} + + + + + + {/if} @@ -204,6 +353,25 @@
+ + + + + + + + +
{/if} - +
+
{runningJobs[subCluster.name]} Running Jobs{activeUsers[subCluster.name]} Active Users
Allocated Nodes
Allocated Accelerators
+ +
{allocatedAccs[subCluster.name]} / {totalAccs[subCluster.name]} + Accelerators
Flop Rate (Any) Date: Tue, 5 Aug 2025 14:19:03 +0200 Subject: [PATCH 12/20] fix: fix metric availability subcluster list overflow --- web/frontend/src/generic/select/MetricSelection.svelte | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/frontend/src/generic/select/MetricSelection.svelte b/web/frontend/src/generic/select/MetricSelection.svelte index 469cc32..d6da4df 100644 --- a/web/frontend/src/generic/select/MetricSelection.svelte +++ b/web/frontend/src/generic/select/MetricSelection.svelte @@ -96,9 +96,9 @@ function printAvailability(metric, cluster) { const avail = globalMetrics.find((gm) => gm.name === metric)?.availability if (!cluster) { - return avail.map((av) => av.cluster).join(',') + return avail.map((av) => av.cluster).join(', ') } else { - return avail.find((av) => av.cluster === cluster).subClusters.join(',') + return avail.find((av) => av.cluster === cluster).subClusters.join(', ') } } @@ -208,7 +208,7 @@ /> {/if} {metric} - + {printAvailability(metric, cluster)} From bef832e45b0bf713d0ae759e21000eab2651d42b Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 7 Aug 2025 16:10:11 +0200 Subject: [PATCH 13/20] Build new statusDash, refine newRoofline data render --- internal/api/job.go | 3 +- internal/auth/auth.go | 3 +- .../generic/plots/NewBubbleRoofline.svelte | 208 ++++++++------- web/frontend/src/status/DevelDash.svelte | 5 +- web/frontend/src/status/StatusDash.svelte | 247 ++++++++++++++++-- 5 files changed, 345 insertions(+), 121 deletions(-) diff --git a/internal/api/job.go b/internal/api/job.go index 4c8ca76..7c27a86 100644 --- a/internal/api/job.go +++ b/internal/api/job.go @@ -112,6 +112,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) { for key, vals := range r.URL.Query() { switch key { + // TODO: add project filter case "state": for _, s := range vals { state := schema.JobState(s) @@ -124,7 +125,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) { } case "cluster": filter.Cluster = &model.StringInput{Eq: &vals[0]} - case "start-time": + case "start-time": // ?startTime=1753707480-1754053139 st := strings.Split(vals[0], "-") if len(st) != 2 { handleError(fmt.Errorf("invalid query parameter value: startTime"), diff --git a/internal/auth/auth.go b/internal/auth/auth.go index ad78397..333efc0 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -381,7 +381,7 @@ func (auth *Authentication) AuthUserApi( return } case len(user.Roles) >= 2: - if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) { + if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleSupport, schema.RoleAdmin}) { ctx := context.WithValue(r.Context(), repository.ContextUserKey, user) onsuccess.ServeHTTP(rw, r.WithContext(ctx)) return @@ -473,6 +473,7 @@ func securedCheck(user *schema.User, r *http.Request) error { IPAddress = r.RemoteAddr } + // FIXME: IPV6 not handled if strings.Contains(IPAddress, ":") { IPAddress = strings.Split(IPAddress, ":")[0] } diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte index bf25347..3a0e332 100644 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -35,6 +35,7 @@ cluster = null, subCluster = null, allowSizeChange = false, + useColors = true, width = 600, height = 380, } = $props(); @@ -243,7 +244,7 @@ // Dot Renderer const makeDrawPoints = (opts) => { let {/*size, disp,*/ transparentFill, each = () => {}} = opts; - const sizeBase = 5 * pxRatio; + const sizeBase = 6 * pxRatio; return (u, seriesIdx, idx0, idx1) => { uPlot.orient(u, seriesIdx, (series, dataX, dataY, scaleX, scaleY, valToPosX, valToPosY, xOff, yOff, xDim, yDim, moveTo, lineTo, rect, arc) => { @@ -266,26 +267,33 @@ let filtTop = u.posToVal(-maxSize / 2, scaleY.key); for (let i = 0; i < d[0].length; i++) { - // Jobs: Color based on Duration - if (jobsData) { - u.ctx.strokeStyle = getRGB(u.data[2][i]); - u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill); - // Nodes: Color based on Idle vs. Allocated - } else if (nodesData) { - // console.log('In Plot Handler NodesData', nodesData) - if (nodesData[i]?.nodeState == "idle") { - u.ctx.strokeStyle = "rgb(0, 0, 255)"; - u.ctx.fillStyle = "rgba(0, 0, 255, 0.5)"; - } else if (nodesData[i]?.nodeState == "allocated") { - u.ctx.strokeStyle = "rgb(0, 255, 0)"; - u.ctx.fillStyle = "rgba(0, 255, 0, 0.5)"; - } else if (nodesData[i]?.nodeState == "notindb") { - u.ctx.strokeStyle = "rgb(0, 0, 0)"; - u.ctx.fillStyle = "rgba(0, 0, 0, 0.5)"; - } else { // Fallback: All other DEFINED states - u.ctx.strokeStyle = "rgb(255, 0, 0)"; - u.ctx.fillStyle = "rgba(255, 0, 0, 0.5)"; + if (useColors) { + u.ctx.strokeStyle = "rgb(0, 0, 0)"; + // Jobs: Color based on Duration + if (jobsData) { + //u.ctx.strokeStyle = getRGB(u.data[2][i]); + u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill); + // Nodes: Color based on Idle vs. Allocated + } else if (nodesData) { + // console.log('In Plot Handler NodesData', nodesData) + if (nodesData[i]?.nodeState == "idle") { + //u.ctx.strokeStyle = "rgb(0, 0, 255)"; + u.ctx.fillStyle = "rgba(0, 0, 255, 0.5)"; + } else if (nodesData[i]?.nodeState == "allocated") { + //u.ctx.strokeStyle = "rgb(0, 255, 0)"; + u.ctx.fillStyle = "rgba(0, 255, 0, 0.5)"; + } else if (nodesData[i]?.nodeState == "notindb") { + //u.ctx.strokeStyle = "rgb(0, 0, 0)"; + u.ctx.fillStyle = "rgba(0, 0, 0, 0.5)"; + } else { // Fallback: All other DEFINED states + //u.ctx.strokeStyle = "rgb(255, 0, 0)"; + u.ctx.fillStyle = "rgba(255, 0, 0, 0.5)"; + } } + } else { + // No Colors: Use Black + u.ctx.strokeStyle = "rgb(0, 0, 0)"; + u.ctx.fillStyle = "rgba(0, 0, 0, 0.5)"; } // Get Values @@ -297,10 +305,15 @@ // Jobs: Size based on Resourcecount if (jobsData) { - size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes) + const scaling = jobsData[i].numNodes > 12 + ? 24 // Capped Dot Size + : jobsData[i].numNodes > 1 + ? jobsData[i].numNodes * 2 // MultiNode Scaling + : jobsData[i]?.numAcc ? jobsData[i].numAcc : jobsData[i].numNodes * 2 // Single Node or Scale by Accs + size = sizeBase + scaling // Nodes: Size based on Jobcount } else if (nodesData) { - size = sizeBase + nodesData[i]?.numJobs + size = sizeBase + (nodesData[i]?.numJobs * 1.5) // Max Jobs Scale: 8 * 1.5 = 12 }; if (xVal >= filtLft && xVal <= filtRgt && yVal >= filtBtm && yVal <= filtTop) { @@ -377,7 +390,7 @@ tooltip.style.fontSize = "10pt"; tooltip.style.position = "absolute"; tooltip.style.background = "#fcfcfc"; - tooltip.style.display = "nonde"; + tooltip.style.display = "none"; tooltip.style.border = "2px solid black"; tooltip.style.padding = "4px"; tooltip.style.pointerEvents = "none"; @@ -417,33 +430,42 @@ tooltip.style.top = (tooltipTopOffset + top + shiftX) + "px"; tooltip.style.left = (tooltipLeftOffset + lft + shiftY) + "px"; - - // Jobs: Color based on Duration - if (jobsData) { - tooltip.style.borderColor = getRGB(u.data[2][i]); - // Nodes: Color based on Idle vs. Allocated - } else if (nodesData) { - if (nodesData[i]?.nodeState == "idle") { - tooltip.style.borderColor = "rgb(0, 0, 255)"; - } else if (nodesData[i]?.nodeState == "allocated") { - tooltip.style.borderColor = "rgb(0, 255, 0)"; - } else if (nodesData[i]?.nodeState == "notindb") { // Missing from DB table - tooltip.style.borderColor = "rgb(0, 0, 0)"; - } else { // Fallback: All other DEFINED states - tooltip.style.borderColor = "rgb(255, 0, 0)"; + if (useColors) { + // Jobs: Color based on Duration + if (jobsData) { + tooltip.style.borderColor = getRGB(u.data[2][i]); + // Nodes: Color based on Idle vs. Allocated + } else if (nodesData) { + if (nodesData[i]?.nodeState == "idle") { + tooltip.style.borderColor = "rgb(0, 0, 255)"; + } else if (nodesData[i]?.nodeState == "allocated") { + tooltip.style.borderColor = "rgb(0, 255, 0)"; + } else if (nodesData[i]?.nodeState == "notindb") { // Missing from DB table + tooltip.style.borderColor = "rgb(0, 0, 0)"; + } else { // Fallback: All other DEFINED states + tooltip.style.borderColor = "rgb(255, 0, 0)"; + } } + } else { + // No Colors: Use Black + tooltip.style.borderColor = "rgb(0, 0, 0)"; } if (jobsData) { tooltip.textContent = ( // Tooltip Content as String for Job - `Job ID: ${getLegendData(u, i).jobId}\nNodes: ${getLegendData(u, i).numNodes}${getLegendData(u, i)?.numAcc?`\nAccelerators: ${getLegendData(u, i).numAcc}`:''}` + `Job ID: ${getLegendData(u, i).jobId}\nRuntime: ${getLegendData(u, i).duration}\nNodes: ${getLegendData(u, i).numNodes}${getLegendData(u, i)?.numAcc?`\nAccelerators: ${getLegendData(u, i).numAcc}`:''}` ); - } else if (nodesData) { + } else if (nodesData && useColors) { tooltip.textContent = ( // Tooltip Content as String for Node `Host: ${getLegendData(u, i).nodeName}\nState: ${getLegendData(u, i).nodeState}\nJobs: ${getLegendData(u, i).numJobs}` ); + } else if (nodesData && !useColors) { + tooltip.textContent = ( + // Tooltip Content as String for Node + `Host: ${getLegendData(u, i).nodeName}\nJobs: ${getLegendData(u, i).numJobs}` + ); } } @@ -570,7 +592,7 @@ // return prox; // }, // }, - drag: { // Activates Zoom + drag: { // Activates Zoom: Only one Dimension; YX Breaks Zoom Reset (Reason TBD) x: true, y: false }, @@ -725,63 +747,67 @@ u.ctx.lineWidth = 0.15; } - // Jobs: The Color Scale For Time Information - if (jobsData) { - const posX = u.valToPos(0.1, "x", true) - const posXLimit = u.valToPos(100, "x", true) - const posY = u.valToPos(14000.0, "y", true) - u.ctx.fillStyle = 'black' - u.ctx.fillText('Short', posX, posY) - const start = posX + 10 - for (let x = start; x < posXLimit; x += 10) { - let c = (x - start) / (posXLimit - start) - u.ctx.fillStyle = getRGB(c) - u.ctx.beginPath() - u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() + /* Render Scales */ + if (useColors) { + // Jobs: The Color Scale For Time Information + if (jobsData) { + const posX = u.valToPos(0.1, "x", true) + const posXLimit = u.valToPos(100, "x", true) + const posY = u.valToPos(17500.0, "y", true) + u.ctx.fillStyle = 'black' + u.ctx.fillText('0 Hours', posX, posY) + const start = posX + 10 + for (let x = start; x < posXLimit; x += 10) { + let c = (x - start) / (posXLimit - start) + u.ctx.fillStyle = getRGB(c) + u.ctx.beginPath() + u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + } + u.ctx.fillStyle = 'black' + u.ctx.fillText('24 Hours', posXLimit + 55, posY) } - u.ctx.fillStyle = 'black' - u.ctx.fillText('Long', posXLimit + 23, posY) - } - // Nodes: The Colors Of NodeStates (Just 3) - if (nodesData) { - const posY = u.valToPos(14000.0, "y", true) + // Nodes: The Colors Of NodeStates + if (nodesData) { + const posY = u.valToPos(17500.0, "y", true) - const posAllocDot = u.valToPos(0.1, "x", true) - const posAllocText = posAllocDot + 60 - u.ctx.fillStyle = "rgb(0, 255, 0)" - u.ctx.beginPath() - u.ctx.arc(posAllocDot, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() - u.ctx.fillStyle = 'black' - u.ctx.fillText('Allocated', posAllocText, posY) + const posAllocDot = u.valToPos(0.03, "x", true) + const posAllocText = posAllocDot + 60 + const posIdleDot = u.valToPos(0.3, "x", true) + const posIdleText = posIdleDot + 30 + const posOtherDot = u.valToPos(3, "x", true) + const posOtherText = posOtherDot + 40 + const posMissingDot = u.valToPos(30, "x", true) + const posMissingText = posMissingDot + 80 - const posIdleDot = posAllocDot + 150 - const posIdleText = posAllocText + 120 - u.ctx.fillStyle = "rgb(0, 0, 255)" - u.ctx.beginPath() - u.ctx.arc(posIdleDot, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() - u.ctx.fillStyle = 'black' - u.ctx.fillText('Idle', posIdleText, posY) + u.ctx.fillStyle = "rgb(0, 255, 0)" + u.ctx.beginPath() + u.ctx.arc(posAllocDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Allocated', posAllocText, posY) - const posOtherDot = posIdleDot + 150 - const posOtherText = posIdleText + 160 - u.ctx.fillStyle = "rgb(255, 0, 0)" - u.ctx.beginPath() - u.ctx.arc(posOtherDot, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() - u.ctx.fillStyle = 'black' - u.ctx.fillText('Other', posOtherText, posY) + u.ctx.fillStyle = "rgb(0, 0, 255)" + u.ctx.beginPath() + u.ctx.arc(posIdleDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Idle', posIdleText, posY) - const posMissingDot = posOtherDot + 150 - const posMissingText = posOtherText + 190 - u.ctx.fillStyle = 'black' - u.ctx.beginPath() - u.ctx.arc(posMissingDot, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() - u.ctx.fillText('Missing in DB', posMissingText, posY) + u.ctx.fillStyle = "rgb(255, 0, 0)" + u.ctx.beginPath() + u.ctx.arc(posOtherDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Other', posOtherText, posY) + + u.ctx.fillStyle = 'black' + u.ctx.beginPath() + u.ctx.arc(posMissingDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillText('Missing in DB', posMissingText, posY) + } } }, ], diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index 8cd4627..a4ee42c 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -23,6 +23,7 @@ //import Roofline from "../generic/plots/Roofline.svelte"; import NewBubbleRoofline from "../generic/plots/NewBubbleRoofline.svelte"; import Pie, { colors } from "../generic/plots/Pie.svelte"; + import { formatTime } from "../generic/units.js"; /* Svelte 5 Props */ let { @@ -131,7 +132,7 @@ // Load for jobcount per node only -- might me required for total running jobs anyways in parent component! // Also, think about extra query with only TotalJobCount and Items [Resources, ...some meta infos], not including metric data - const paging = { itemsPerPage: 1500, page: 1 }; + const paging = { itemsPerPage: -1, page: 1 }; const sorting = { field: "startTime", type: "col", order: "DESC" }; const filter = [ { cluster: { eq: cluster } }, @@ -281,7 +282,7 @@ function transformJobsStatsToInfo(subclusterData) { if (subclusterData) { - return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0} }) + return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0, duration: formatTime(sc.duration)} }) } else { console.warn("transformJobsStatsToInfo: jobInfo missing!") return [] diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index a1196e5..102026c 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -15,7 +15,7 @@ CardBody, Table, Progress, - Icon, + // Icon, } from "@sveltestrap/sveltestrap"; import { queryStore, @@ -24,11 +24,11 @@ } from "@urql/svelte"; import { init, - transformPerNodeDataForRoofline, + // transformPerNodeDataForRoofline, } from "../generic/utils.js"; - import { scaleNumbers } from "../generic/units.js"; - import Roofline from "../generic/plots/Roofline.svelte"; + import { scaleNumbers, formatTime } from "../generic/units.js"; + import NewBubbleRoofline from "../generic/plots/NewBubbleRoofline.svelte"; /* Svelte 5 Props */ let { @@ -68,9 +68,12 @@ $metrics: [String!] $from: Time! $to: Time! - $filter: [JobFilter!]! + $jobFilter: [JobFilter!]! + $nodeFilter: [NodeFilter!]! $paging: PageRequest! + $sorting: OrderByInput! ) { + # Node 5 Minute Averages for Roofline nodeMetrics( cluster: $cluster metrics: $metrics @@ -81,27 +84,58 @@ subCluster metrics { name - scope metric { - timestep - unit { - base - prefix - } series { - data + statistics { + avg + } } } } } + # Running Job Metric Average for Rooflines + jobsMetricStats(filter: $jobFilter, metrics: $metrics) { + id + jobId + duration + numNodes + numAccelerators + subCluster + stats { + name + data { + avg + } + } + } + # Get Jobs for Per-Node Counts + jobs(filter: $jobFilter, order: $sorting, page: $paging) { + items { + jobId + resources { + hostname + } + } + count + } # Only counts shared nodes once allocatedNodes(cluster: $cluster) { name count } + # Get States for Node Roofline; $sorting unused in backend: Use placeholder + nodes(filter: $nodeFilter, order: $sorting) { + count + items { + hostname + cluster + subCluster + nodeState + } + } # totalNodes includes multiples if shared jobs jobsStatistics( - filter: $filter + filter: $jobFilter page: $paging sortBy: TOTALJOBS groupBy: SUBCLUSTER @@ -118,8 +152,10 @@ metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars from: from.toISOString(), to: to.toISOString(), - filter: [{ state: ["running"] }, { cluster: { eq: cluster } }], + jobFilter: [{ state: ["running"] }, { cluster: { eq: cluster } }], + nodeFilter: { cluster: { eq: cluster }}, paging: { itemsPerPage: -1, page: 1 }, // Get all: -1 + sorting: { field: "startTime", type: "col", order: "DESC" } }, })); @@ -170,6 +206,7 @@ }); /* Const Functions */ + // New: Sum Up Node Averages const sumUp = (data, subcluster, metric) => data.reduce( (sum, node) => @@ -177,20 +214,132 @@ ? sum + (node.metrics .find((m) => m.name == metric) - ?.metric.series.reduce( - (sum, series) => sum + series.data[series.data.length - 1], - 0, - ) || 0) + ?.metric?.series[0]?.statistics?.avg || 0 + ) : sum, 0, ); + // Old: SumUp Metric Time Data + // const sumUp = (data, subcluster, metric) => + // data.reduce( + // (sum, node) => + // node.subCluster == subcluster + // ? sum + + // (node.metrics + // .find((m) => m.name == metric) + // ?.metric.series.reduce( + // (sum, series) => sum + series.data[series.data.length - 1], + // 0, + // ) || 0) + // : sum, + // 0, + // ); + + /* Functions */ + function transformJobsStatsToData(subclusterData) { + /* c will contain values from 0 to 1 representing the duration */ + let data = null + const x = [], y = [], c = [], day = 86400.0 + + if (subclusterData) { + for (let i = 0; i < subclusterData.length; i++) { + const flopsData = subclusterData[i].stats.find((s) => s.name == "flops_any") + const memBwData = subclusterData[i].stats.find((s) => s.name == "mem_bw") + + const f = flopsData.data.avg + const m = memBwData.data.avg + const d = subclusterData[i].duration / day + + const intensity = f / m + if (Number.isNaN(intensity) || !Number.isFinite(intensity)) + continue + + x.push(intensity) + y.push(f) + // Long Jobs > 1 Day: Use max Color + if (d > 1.0) c.push(1.0) + else c.push(d) + } + } else { + console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!") + } + + if (x.length > 0 && y.length > 0 && c.length > 0) { + data = [null, [x, y], c] // for dataformat see roofline.svelte + } + return data + } + + function transformNodesStatsToData(subclusterData) { + let data = null + const x = [], y = [] + + if (subclusterData) { + for (let i = 0; i < subclusterData.length; i++) { + const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any") + const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw") + + const f = flopsData.metric.series[0].statistics.avg + const m = memBwData.metric.series[0].statistics.avg + + let intensity = f / m + if (Number.isNaN(intensity) || !Number.isFinite(intensity)) { + // continue // Old: Introduces mismatch between Data and Info Arrays + intensity = 0.0 // New: Set to Float Zero: Will not show in Log-Plot (Always below render limit) + } + + x.push(intensity) + y.push(f) + } + } else { + // console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!") + } + + if (x.length > 0 && y.length > 0) { + data = [null, [x, y]] // for dataformat see roofline.svelte + } + return data + } + + function transformJobsStatsToInfo(subclusterData) { + if (subclusterData) { + return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0, duration: formatTime(sc.duration)} }) + } else { + console.warn("transformJobsStatsToInfo: jobInfo missing!") + return [] + } + } + + function transformNodesStatsToInfo(subClusterData) { + let result = []; + if (subClusterData) { // && $nodesState?.data) { + // Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API! + for (let j = 0; j < subClusterData.length; j++) { + // nodesCounts[subClusterData[i].subCluster] = $nodesState.data.nodes.count; // Probably better as own derived! + + const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown" + const nodeMatch = $statusQuery?.data?.nodes?.items?.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster); + const nodeState = nodeMatch?.nodeState ? nodeMatch.nodeState : "notindb" + let numJobs = 0 + + if ($statusQuery?.data) { + const nodeJobs = $statusQuery?.data?.jobs?.items?.filter((job) => job.resources.find((res) => res.hostname == nodeName)) + numJobs = nodeJobs?.length ? nodeJobs.length : 0 + } + + result.push({nodeName: nodeName, nodeState: nodeState, numJobs: numJobs}) + }; + }; + return result + } + {#if $initq.data && $statusQuery.data} {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} - +
{activeUsers[subCluster.name]} Active Users
+ Flop Rate (Any) + + Memory BW Rate +
+ {flopRate[subCluster.name]} + {flopRateUnitPrefix[subCluster.name]}{flopRateUnitBase[subCluster.name]} + + {memBwRate[subCluster.name]} + {memBwRateUnitPrefix[subCluster.name]}{memBwRateUnitBase[subCluster.name]} +
Allocated Nodes
- {#key $statusQuery.data.nodeMetrics} - data.subCluster == subCluster.name, + ) + )} + nodesData={transformNodesStatsToInfo($statusQuery?.data?.nodeMetrics.filter( + (data) => data.subCluster == subCluster.name, + ) + )} + /> + {/key} +
+ + +
+ {#key $statusQuery?.data?.jobsMetricStats} + data.subCluster == subCluster.name, - ), + ) + )} + jobsData={transformJobsStatsToInfo($statusQuery?.data?.jobsMetricStats.filter( + (data) => data.subCluster == subCluster.name, + ) )} /> {/key} From f338209f32b63f8c0461016f9c105c26efaf9352 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 7 Aug 2025 16:28:35 +0200 Subject: [PATCH 14/20] rename new roofline compnent --- .../generic/plots/NewBubbleRoofline.svelte | 856 ------------------ .../src/generic/plots/Roofline.svelte | 708 ++++++++++++--- .../src/generic/plots/RooflineLegacy.svelte | 384 ++++++++ web/frontend/src/job/JobRoofline.svelte | 2 +- web/frontend/src/status/DevelDash.svelte | 6 +- web/frontend/src/status/StatusDash.svelte | 6 +- 6 files changed, 981 insertions(+), 981 deletions(-) delete mode 100644 web/frontend/src/generic/plots/NewBubbleRoofline.svelte create mode 100644 web/frontend/src/generic/plots/RooflineLegacy.svelte diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte deleted file mode 100644 index 3a0e332..0000000 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ /dev/null @@ -1,856 +0,0 @@ - - - -{#if roofData != null} -
-{:else} - Cannot render roofline: No data! -{/if} diff --git a/web/frontend/src/generic/plots/Roofline.svelte b/web/frontend/src/generic/plots/Roofline.svelte index 8c6e9de..3a0e332 100644 --- a/web/frontend/src/generic/plots/Roofline.svelte +++ b/web/frontend/src/generic/plots/Roofline.svelte @@ -3,7 +3,6 @@ Properties: - `data [null, [], []]`: Roofline Data Structure, see below for details [Default: null] - - `renderTime Bool?`: If time information should be rendered as colored dots [Default: false] - `allowSizeChange Bool?`: If dimensions of rendered plot can change [Default: false] - `subCluster GraphQL.SubCluster?`: SubCluster Object; contains required topology information [Default: null] - `width Number?`: Plot width (reactively adaptive) [Default: 600] @@ -21,19 +20,22 @@ - `data[2] = [0.1, 0.15, 0.2, ...]` - Color Code: Time Information (Floats from 0 to 1) (Optional) --> - -{#if data != null} +{#if roofData != null}
{:else} - Cannot render roofline: No data! + Cannot render roofline: No data! {/if} - diff --git a/web/frontend/src/generic/plots/RooflineLegacy.svelte b/web/frontend/src/generic/plots/RooflineLegacy.svelte new file mode 100644 index 0000000..8c6e9de --- /dev/null +++ b/web/frontend/src/generic/plots/RooflineLegacy.svelte @@ -0,0 +1,384 @@ + + + + +{#if data != null} +
+{:else} + Cannot render roofline: No data! +{/if} + diff --git a/web/frontend/src/job/JobRoofline.svelte b/web/frontend/src/job/JobRoofline.svelte index ae33017..ae962f1 100644 --- a/web/frontend/src/job/JobRoofline.svelte +++ b/web/frontend/src/job/JobRoofline.svelte @@ -19,7 +19,7 @@ import { transformDataForRoofline, } from "../generic/utils.js"; - import Roofline from "../generic/plots/Roofline.svelte"; + import Roofline from "../generic/plots/RooflineLegacy.svelte"; /* Svelte 5 Props */ let { diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index a4ee42c..f54e51b 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -21,7 +21,7 @@ init, } from "../generic/utils.js"; //import Roofline from "../generic/plots/Roofline.svelte"; - import NewBubbleRoofline from "../generic/plots/NewBubbleRoofline.svelte"; + import Roofline from "../generic/plots/Roofline.svelte"; import Pie, { colors } from "../generic/plots/Pie.svelte"; import { formatTime } from "../generic/units.js"; @@ -325,7 +325,7 @@ {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( (data) => data.subCluster == subCluster.name, ).length} Jobs - {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( (data) => data.subCluster == subCluster.name, ).length} Jobs
-
{#key $statusQuery?.data?.nodeMetrics} -
{#key $statusQuery?.data?.jobsMetricStats} - Date: Thu, 7 Aug 2025 18:20:34 +0200 Subject: [PATCH 15/20] add cbmode to piecharts - old default colorscheme is now cb colorscheme --- web/frontend/src/generic/plots/Pie.svelte | 72 ++++++++++++++++++----- web/frontend/src/status/UsageDash.svelte | 7 ++- 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/web/frontend/src/generic/plots/Pie.svelte b/web/frontend/src/generic/plots/Pie.svelte index aed6026..a0312c4 100644 --- a/web/frontend/src/generic/plots/Pie.svelte +++ b/web/frontend/src/generic/plots/Pie.svelte @@ -14,8 +14,47 @@ --> {#if $initq.data} @@ -235,7 +244,7 @@ {#each $topUserQuery.data.topUser as tu, i} - + {#each $topProjectQuery.data.topProjects as tp, i} - + Date: Tue, 12 Aug 2025 17:04:31 +0200 Subject: [PATCH 17/20] reorganize plots, reduce tabs, --- web/frontend/src/Analysis.root.svelte | 2 +- web/frontend/src/Status.root.svelte | 28 +- web/frontend/src/generic/plots/Pie.svelte | 116 ++-- web/frontend/src/status/DevelDash.svelte | 20 +- web/frontend/src/status/StatusDash.svelte | 135 ++++- web/frontend/src/status/UsageDash.svelte | 630 ++++++++++++++-------- 6 files changed, 621 insertions(+), 310 deletions(-) diff --git a/web/frontend/src/Analysis.root.svelte b/web/frontend/src/Analysis.root.svelte index 689b7a2..122a67b 100644 --- a/web/frontend/src/Analysis.root.svelte +++ b/web/frontend/src/Analysis.root.svelte @@ -459,7 +459,7 @@ {#each $topQuery.data.topList as te, i} - + {#if groupSelection.key == "user"} -
+
diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index e0f4960..17426fc 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -22,12 +22,13 @@ } from "../generic/utils.js"; //import Roofline from "../generic/plots/Roofline.svelte"; import Roofline from "../generic/plots/Roofline.svelte"; - import Pie, { cbColors, colors } from "../generic/plots/Pie.svelte"; + import Pie, { colors } from "../generic/plots/Pie.svelte"; import { formatTime } from "../generic/units.js"; /* Svelte 5 Props */ let { - cluster + cluster, + useCbColors = false } = $props(); /* Const Init */ @@ -40,7 +41,6 @@ let plotWidths = $state([]); let statesWidth = $state(0); let healthWidth = $state(0); - let cbmode = $state(false); // let nodesCounts = $state({}); // let jobsJounts = $state({}); @@ -313,6 +313,12 @@ return result } + function legendColors(targetIdx) { + // Reuses first color if targetIdx overflows + let c = [...colors['default']]; + return c[(c.length + targetIdx) % c.length]; + } + @@ -386,7 +392,7 @@ }, 0)} Nodes {#each refinedStateData as sd, i} - + {sd.state} {sd.count} @@ -427,7 +433,7 @@ }, 0)} Nodes {#each refinedHealthData as hd, i} - + {hd.state} {hd.count} diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index f3fdd9b..44a0ab4 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -15,7 +15,7 @@ CardBody, Table, Progress, - // Icon, + Icon, } from "@sveltestrap/sveltestrap"; import { queryStore, @@ -24,15 +24,16 @@ } from "@urql/svelte"; import { init, - // transformPerNodeDataForRoofline, - } from "../generic/utils.js"; import { scaleNumbers, formatTime } from "../generic/units.js"; import Roofline from "../generic/plots/Roofline.svelte"; + import Pie, { colors } from "../generic/plots/Pie.svelte"; /* Svelte 5 Props */ let { - cluster + cluster, + useCbColors = false, + useAltColors = false, } = $props(); /* Const Init */ @@ -42,6 +43,7 @@ /* State Init */ let from = $state(new Date(Date.now() - 5 * 60 * 1000)); let to = $state(new Date(Date.now())); + let pieWidth = $state(0); let plotWidths = $state([]); // Bar Gauges let allocatedNodes = $state({}); @@ -58,6 +60,30 @@ let totalAccs = $state({}); /* Derived */ + // Accumulated NodeStates for Piecharts + const nodesStateCounts = $derived(queryStore({ + client: client, + query: gql` + query ($filter: [NodeFilter!]) { + nodeStates(filter: $filter) { + state + count + } + } + `, + variables: { + filter: { cluster: { eq: cluster }} + }, + })); + + const refinedStateData = $derived.by(() => { + return $nodesStateCounts?.data?.nodeStates.filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)) + }); + + const refinedHealthData = $derived.by(() => { + return $nodesStateCounts?.data?.nodeStates.filter((e) => ['full', 'partial', 'failed'].includes(e.state)) + }); + // Note: nodeMetrics are requested on configured $timestep resolution // Result: The latest 5 minutes (datapoints) for each node independent of job const statusQuery = $derived(queryStore({ @@ -334,8 +360,107 @@ return result } + function legendColors(targetIdx) { + // Reuses first color if targetIdx overflows + let c; + if (useCbColors) { + c = [...colors['colorblind']]; + } else if (useAltColors) { + c = [...colors['alternative']]; + } else { + c = [...colors['default']]; + } + return c[(c.length + targetIdx) % c.length]; + } + + +{#if $initq.data && $nodesStateCounts.data} + + +
+ {#key refinedStateData} +

+ {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States +

+ sd.count, + )} + entities={refinedStateData.map( + (sd) => sd.state, + )} + /> + {/key} +
+ + + {#key refinedStateData} + + + + + + + {#each refinedStateData as sd, i} + + + + + + {/each} +
Current StateNodes
{sd.state}{sd.count}
+ {/key} + + + +
+ {#key refinedHealthData} +

+ {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health +

+ sd.count, + )} + entities={refinedHealthData.map( + (sd) => sd.state, + )} + /> + {/key} +
+ + + {#key refinedHealthData} + + + + + + + {#each refinedHealthData as hd, i} + + + + + + {/each} +
Current HealthNodes
{hd.state}{hd.count}
+ {/key} + +
+{/if} + +
{#if $initq.data && $statusQuery.data} {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} @@ -454,5 +579,5 @@ {/each} {:else} - Cannot render status tab: No data! + Cannot render status rooflines: No data! {/if} diff --git a/web/frontend/src/status/UsageDash.svelte b/web/frontend/src/status/UsageDash.svelte index 93604ea..0de7da2 100644 --- a/web/frontend/src/status/UsageDash.svelte +++ b/web/frontend/src/status/UsageDash.svelte @@ -6,7 +6,6 @@ --> -{#if $initq.data} - - - -
+ +{#if $topJobsQuery.fetching || $nodeStatusQuery.fetching} + +{:else if $topJobsQuery.data && $nodeStatusQuery.data} + + + + + +

- Top Users on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} + Top Users: Jobs

- {#key $topUserQuery.data} - {#if $topUserQuery.fetching} - - {:else if $topUserQuery.error} - {$topUserQuery.error.message} - {:else} - tu[topUserSelection.key], - )} - entities={$topUserQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} - /> - {/if} - {/key} + tu['totalJobs'], + )} + entities={$topJobsQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} + />
- - {#key $topUserQuery.data} - {#if $topUserQuery.fetching} - - {:else if $topUserQuery.error} - {$topUserQuery.error.message} - {:else} - - - - - - - {#each $topUserQuery.data.topUser as tu, i} - - - - {#if tu?.name} - {scrambleNames ? scramble(tu.name) : tu.name} - {/if} - - - {/each} -
LegendUser NameNumber of - -
{scrambleNames ? scramble(tu.id) : tu.id}{tu[topUserSelection.key]}
- {/if} - {/key} + + + + + + + + {#each $topJobsQuery.data.topUser as tu, i} + + + + {#if tu?.name} + {scrambleNames ? scramble(tu.name) : tu.name} + {/if} + + + {/each} +
UserActive Jobs
+ {scrambleNames ? scramble(tu.id) : tu.id} + + {tu['totalJobs']}
- + +

- Top Projects on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} + Top Projects: Jobs

- {#key $topProjectQuery.data} - {#if $topProjectQuery.fetching} - - {:else if $topProjectQuery.error} - {$topProjectQuery.error.message} - {:else} - tp[topProjectSelection.key], - )} - entities={$topProjectQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} - /> - {/if} - {/key} + tp['totalJobs'], + )} + entities={$topJobsQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} + /> - - {#key $topProjectQuery.data} - {#if $topProjectQuery.fetching} - - {:else if $topProjectQuery.error} - {$topProjectQuery.error.message} - {:else} - - - - - - - {#each $topProjectQuery.data.topProjects as tp, i} - - - - - - {/each} -
LegendProject CodeNumber of - -
{scrambleNames ? scramble(tp.id) : tp.id}{tp[topProjectSelection.key]}
- {/if} - {/key} + + + + + + + + {#each $topJobsQuery.data.topProjects as tp, i} + + + + + + {/each} +
ProjectActive Jobs
+ {scrambleNames ? scramble(tp.id) : tp.id} + + {tp['totalJobs']}
+{:else} + Cannot render job status charts: No data! {/if} + +
+ + +{#if $topNodesQuery.fetching || $nodeStatusQuery.fetching} + +{:else if $topNodesQuery.data && $nodeStatusQuery.data} + + + + + +
+

+ Top Users: Nodes +

+ tu['totalNodes'], + )} + entities={$topNodesQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} + /> +
+ + + + + + + + + {#each $topNodesQuery.data.topUser as tu, i} + + + + {#if tu?.name} + {scrambleNames ? scramble(tu.name) : tu.name} + {/if} + + + {/each} +
UserNodes
+ {scrambleNames ? scramble(tu.id) : tu.id} + + {tu['totalNodes']}
+ + + +

+ Top Projects: Nodes +

+ tp['totalNodes'], + )} + entities={$topNodesQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} + /> + + + + + + + + + {#each $topNodesQuery.data.topProjects as tp, i} + + + + + + {/each} +
ProjectNodes
+ {scrambleNames ? scramble(tp.id) : tp.id} + + {tp['totalNodes']}
+ +
+{:else} + Cannot render node status charts: No data! +{/if} + +
+ + +{#if $topAccsQuery.fetching || $nodeStatusQuery.fetching} + +{:else if $topAccsQuery.data && $nodeStatusQuery.data} + + + + + +
+

+ Top Users: GPUs +

+ tu['totalAccs'], + )} + entities={$topAccsQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} + /> +
+ + + + + + + + + {#each $topAccsQuery.data.topUser as tu, i} + + + + {#if tu?.name} + {scrambleNames ? scramble(tu.name) : tu.name} + {/if} + + + {/each} +
UserGPUs
+ {scrambleNames ? scramble(tu.id) : tu.id} + + {tu['totalAccs']}
+ + + +

+ Top Projects: GPUs +

+ tp['totalAccs'], + )} + entities={$topAccsQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} + /> + + + + + + + + + {#each $topAccsQuery.data.topProjects as tp, i} + + + + + + {/each} +
ProjectGPUs
+ {scrambleNames ? scramble(tp.id) : tp.id} + + {tp['totalAccs']}
+ +
+{:else} + Cannot render accelerator status charts: No data! +{/if} \ No newline at end of file From 44d8254a0bbbe195ba3321b219a5ad51848aa3a4 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 12 Aug 2025 17:57:04 +0200 Subject: [PATCH 18/20] fix layouting --- web/frontend/src/status/UsageDash.svelte | 30 ++++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/web/frontend/src/status/UsageDash.svelte b/web/frontend/src/status/UsageDash.svelte index 0de7da2..5afde37 100644 --- a/web/frontend/src/status/UsageDash.svelte +++ b/web/frontend/src/status/UsageDash.svelte @@ -198,7 +198,7 @@ {:else if $topJobsQuery.data && $nodeStatusQuery.data} - + - +

Top Users: Jobs @@ -228,7 +228,7 @@ />

- + @@ -256,7 +256,7 @@
- +

Top Projects: Jobs

@@ -271,7 +271,7 @@ entities={$topJobsQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} /> - + @@ -303,7 +303,7 @@ {:else if $topNodesQuery.data && $nodeStatusQuery.data} - + - +

Top Users: Nodes @@ -331,7 +331,7 @@ />

- +
@@ -359,7 +359,7 @@
- +

Top Projects: Nodes

@@ -374,7 +374,7 @@ entities={$topNodesQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} /> - + @@ -406,7 +406,7 @@ {:else if $topAccsQuery.data && $nodeStatusQuery.data} - + - +

Top Users: GPUs @@ -434,7 +434,7 @@ />

- +
@@ -462,7 +462,7 @@
- +

Top Projects: GPUs

@@ -477,7 +477,7 @@ entities={$topAccsQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} /> - + From 58ae476a3e3bab322055f45ed80600dbab7dc8bd Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 13 Aug 2025 14:22:24 +0200 Subject: [PATCH 19/20] move and add interface options for status tabs --- web/frontend/src/Status.root.svelte | 47 ++-------- web/frontend/src/User.root.svelte | 1 + .../generic/select/HistogramSelection.svelte | 8 +- web/frontend/src/status.entrypoint.js | 2 +- web/frontend/src/status/StatisticsDash.svelte | 65 ++++++-------- web/frontend/src/status/StatusDash.svelte | 47 +++++----- web/frontend/src/status/UsageDash.svelte | 86 ++++++++++++++----- 7 files changed, 126 insertions(+), 130 deletions(-) diff --git a/web/frontend/src/Status.root.svelte b/web/frontend/src/Status.root.svelte index f0336f9..e28af8e 100644 --- a/web/frontend/src/Status.root.svelte +++ b/web/frontend/src/Status.root.svelte @@ -2,7 +2,7 @@ @component Main cluster status view component; renders current system-usage information Properties: - - `cluster String`: The cluster to show status information for + - `presetCluster String`: The cluster to show status information for --> - + -

Current utilization of cluster "{cluster}"

- - - { - from = new Date(Date.now() - 5 * 60 * 1000); - to = new Date(Date.now()); - }} - /> +

Current Status of Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"

- - - + - + - + diff --git a/web/frontend/src/User.root.svelte b/web/frontend/src/User.root.svelte index c1f0fb8..f675a0d 100644 --- a/web/frontend/src/User.root.svelte +++ b/web/frontend/src/User.root.svelte @@ -404,6 +404,7 @@ cluster={selectedCluster} bind:isOpen={isHistogramSelectionOpen} presetSelectedHistograms={selectedHistograms} + configName="user_view_histogramMetrics" applyChange={(newSelection) => { selectedHistogramsBuffer[selectedCluster || 'all'] = [...newSelection]; }} diff --git a/web/frontend/src/generic/select/HistogramSelection.svelte b/web/frontend/src/generic/select/HistogramSelection.svelte index 0468efd..a424ef4 100644 --- a/web/frontend/src/generic/select/HistogramSelection.svelte +++ b/web/frontend/src/generic/select/HistogramSelection.svelte @@ -3,8 +3,9 @@ Properties: - `cluster String`: Currently selected cluster - - `selectedHistograms [String]`: The currently selected metrics to display as histogram - `ìsOpen Bool`: Is selection opened [Bindable] + - `configName String`: The config id string to be updated in database on selection change + - `presetSelectedHistograms [String]`: The currently selected metrics to display as histogram - `applyChange Func`: The callback function to apply current selection --> @@ -25,6 +26,7 @@ let { cluster, isOpen = $bindable(), + configName, presetSelectedHistograms, applyChange } = $props(); @@ -67,8 +69,8 @@ applyChange(selectedHistograms) updateConfiguration({ name: cluster - ? `user_view_histogramMetrics:${cluster}` - : "user_view_histogramMetrics", + ? `${configName}:${cluster}` + : configName, value: selectedHistograms, }); } diff --git a/web/frontend/src/status.entrypoint.js b/web/frontend/src/status.entrypoint.js index 3e45cb7..c3407c1 100644 --- a/web/frontend/src/status.entrypoint.js +++ b/web/frontend/src/status.entrypoint.js @@ -5,7 +5,7 @@ import Status from './Status.root.svelte' mount(Status, { target: document.getElementById('svelte-app'), props: { - cluster: infos.cluster, + presetCluster: infos.cluster, }, context: new Map([ ['cc-config', clusterCockpitConfig] diff --git a/web/frontend/src/status/StatisticsDash.svelte b/web/frontend/src/status/StatisticsDash.svelte index e573554..8523c80 100644 --- a/web/frontend/src/status/StatisticsDash.svelte +++ b/web/frontend/src/status/StatisticsDash.svelte @@ -2,7 +2,7 @@ @component Main cluster status view component; renders current system-usage information Properties: - - `cluster String`: The cluster to show status information for + - `presetCluster String`: The cluster to show status information for --> - -
+ + + + { + from = new Date(Date.now() - (30 * 24 * 60 * 60 * 1000)); // Triggers GQL + to = new Date(Date.now()); + }} + /> + + {#if $initq.fetching || $metricStatusQuery.fetching} @@ -168,6 +152,7 @@ {cluster} bind:isOpen={isHistogramSelectionOpen} presetSelectedHistograms={selectedHistograms} + configName="status_view_selectedHistograms" applyChange={(newSelection) => { selectedHistograms = [...newSelection]; }} diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index 44a0ab4..280b04b 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -2,7 +2,7 @@ @component Main cluster status view component; renders current system-usage information Properties: - - `cluster String`: The cluster to show status information for + - `presetCluster String`: The cluster to show status information for --> + + + + { + from = new Date(Date.now() - 5 * 60 * 1000); + to = new Date(Date.now()); + }} + /> + + + +
+ {#if $initq.data && $nodesStateCounts.data} diff --git a/web/frontend/src/status/UsageDash.svelte b/web/frontend/src/status/UsageDash.svelte index 5afde37..16575e4 100644 --- a/web/frontend/src/status/UsageDash.svelte +++ b/web/frontend/src/status/UsageDash.svelte @@ -2,7 +2,7 @@ @component Main cluster status view component; renders current system-usage information Properties: - - `cluster String`: The cluster to show status information for + - `presetCluster String`: The cluster to show status information for --> + + +
+ + + + + + Duration Bin Size + + + {#each durationBinOptions as dbin} + + {/each} + + + + + { + from = new Date(Date.now() - (30 * 24 * 60 * 60 * 1000)); // Triggers GQL + to = new Date(Date.now()); + }} + /> + + + +
+ {#if $topJobsQuery.fetching || $nodeStatusQuery.fetching} {:else if $topJobsQuery.data && $nodeStatusQuery.data}
- + {#key $nodeStatusQuery.data.jobsStatistics[0].histDuration} + + {/key}
@@ -233,7 +277,7 @@
- + {#each $topJobsQuery.data.topUser as tu, i} @@ -276,7 +320,7 @@ - + {#each $topJobsQuery.data.topProjects as tp, i} From 19a75554b0fec818ab4a77cd38021e6a08280fa0 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 13 Aug 2025 14:23:19 +0200 Subject: [PATCH 20/20] remove outdated components --- web/frontend/src/status/DevelDash.svelte | 468 ----------------------- web/frontend/src/status/NodeDash.svelte | 127 ------ 2 files changed, 595 deletions(-) delete mode 100644 web/frontend/src/status/DevelDash.svelte delete mode 100644 web/frontend/src/status/NodeDash.svelte diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte deleted file mode 100644 index 17426fc..0000000 --- a/web/frontend/src/status/DevelDash.svelte +++ /dev/null @@ -1,468 +0,0 @@ - - - - - -{#if $initq.data && $jobRoofQuery.data} - {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} - - - Bubble Node -
- {#key $nodesData?.data?.nodeMetrics || $nodesJobs?.data?.jobs} - {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( - (data) => data.subCluster == subCluster.name, - ).length} Jobs - data.subCluster == subCluster.name, - ) - )} - nodesData={transformNodesStatsToInfo($nodesData?.data?.nodeMetrics.filter( - (data) => data.subCluster == subCluster.name, - ) - )} - /> - {/key} -
- - - Bubble Jobs -
- {#key $jobRoofQuery.data.jobsMetricStats} - {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( - (data) => data.subCluster == subCluster.name, - ).length} Jobs - data.subCluster == subCluster.name, - ) - )} - jobsData={transformJobsStatsToInfo($jobRoofQuery?.data?.jobsMetricStats.filter( - (data) => data.subCluster == subCluster.name, - ) - )} - /> - {/key} -
- - - {/each} -{/if} - -
-
- -{#if $initq.data && $nodesStateCounts.data} - - - Node State -
- {#key refinedStateData} - Total: {refinedStateData.reduce((sum, item) => { - return sum + item.count; - }, 0)} Nodes - - sd.count, - )} - entities={refinedStateData.map( - (sd) => sd.state, - )} - /> - {/key} -
- - - {#key refinedStateData} -
UserActive JobsJobs
ProjectActive JobsJobs
- - - - - - {#each refinedStateData as sd, i} - - - - - - {/each} -
LegendCurrent State#Nodes
{sd.state}{sd.count}
- {/key} - - - - Node Health -
- {#key refinedHealthData} - Total: {refinedStateData.reduce((sum, item) => { - return sum + item.count; - }, 0)} Nodes - - sd.count, - )} - entities={refinedHealthData.map( - (sd) => sd.state, - )} - /> - {/key} -
- - - {#key refinedHealthData} - - - - - - - {#each refinedHealthData as hd, i} - - - - - - {/each} -
LegendCurrent Health#Nodes
{hd.state}{hd.count}
- {/key} - -
-{/if} diff --git a/web/frontend/src/status/NodeDash.svelte b/web/frontend/src/status/NodeDash.svelte deleted file mode 100644 index 29a3cf8..0000000 --- a/web/frontend/src/status/NodeDash.svelte +++ /dev/null @@ -1,127 +0,0 @@ - - - - -{#if $initq.data && $nodeStatusQuery.data} - - - - {#key $nodeStatusQuery.data.jobsStatistics} - - {/key} - - - {#key $nodeStatusQuery.data.jobsStatistics} - - {/key} - - - - - {#key $nodeStatusQuery.data.jobsStatistics} - - {/key} - - - {#key $nodeStatusQuery.data.jobsStatistics} - - {/key} - - -{/if} - -