From 743a89c3a218d9ba75c0272cb8c62836e0f70fa9 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 4 Jul 2025 15:14:15 +0200 Subject: [PATCH 01/40] Finalize node query backend functions, fix migration issue --- api/schema.graphqls | 2 + internal/graph/generated/generated.go | 10 +- internal/graph/model/models_gen.go | 1 + internal/graph/schema.resolvers.go | 19 ++- internal/importer/initDB.go | 2 +- internal/repository/job.go | 4 +- .../migrations/sqlite3/10_node-table.up.sql | 12 +- internal/repository/node.go | 124 +++++++++++++++++- 8 files changed, 162 insertions(+), 12 deletions(-) diff --git a/api/schema.graphqls b/api/schema.graphqls index 794c630..5ff1a36 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -300,6 +300,7 @@ type Query { user(username: String!): User allocatedNodes(cluster: String!): [Count!]! + ## Node Queries New node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! nodeStats(filter: [NodeFilter!]): [NodeStats!]! @@ -393,6 +394,7 @@ type TimeRangeOutput { input NodeFilter { hostname: StringInput cluster: StringInput + subcluster: StringInput nodeState: NodeState healthState: MonitoringState } diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index 238270f..b150423 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -2714,6 +2714,7 @@ type TimeRangeOutput { input NodeFilter { hostname: StringInput cluster: StringInput + subcluster: StringInput nodeState: NodeState healthState: MonitoringState } @@ -17745,7 +17746,7 @@ func (ec *executionContext) unmarshalInputNodeFilter(ctx context.Context, obj an asMap[k] = v } - fieldsInOrder := [...]string{"hostname", "cluster", "nodeState", "healthState"} + fieldsInOrder := [...]string{"hostname", "cluster", "subcluster", "nodeState", "healthState"} for _, k := range fieldsInOrder { v, ok := asMap[k] if !ok { @@ -17766,6 +17767,13 @@ func (ec *executionContext) unmarshalInputNodeFilter(ctx context.Context, obj an return it, err } it.Cluster = data + case "subcluster": + ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("subcluster")) + data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v) + if err != nil { + return it, err + } + it.Subcluster = data case "nodeState": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("nodeState")) data, err := ec.unmarshalONodeState2ᚖstring(ctx, v) diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index 5a32ac9..c5cc79b 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -170,6 +170,7 @@ type NamedStatsWithScope struct { type NodeFilter struct { Hostname *StringInput `json:"hostname,omitempty"` Cluster *StringInput `json:"cluster,omitempty"` + Subcluster *StringInput `json:"subcluster,omitempty"` NodeState *string `json:"nodeState,omitempty"` HealthState *schema.NodeState `json:"healthState,omitempty"` } diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 78a76ef..1284c09 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -380,7 +380,24 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o // NodeStats is the resolver for the nodeStats field. func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error) { - panic(fmt.Errorf("not implemented: NodeStats - nodeStats")) + repo := repository.GetNodeRepository() + + stateCounts, serr := repo.CountNodeStates(ctx, filter) + if serr != nil { + cclog.Warnf("Error while counting nodeStates: %s", serr.Error()) + return nil, serr + } + + healthCounts, herr := repo.CountHealthStates(ctx, filter) + if herr != nil { + cclog.Warnf("Error while counting healthStates: %s", herr.Error()) + return nil, herr + } + + allCounts := make([]*model.NodeStats, 0) + allCounts = append(stateCounts, healthCounts...) + + return allCounts, nil } // Job is the resolver for the job field. diff --git a/internal/importer/initDB.go b/internal/importer/initDB.go index 179c21c..98dca03 100644 --- a/internal/importer/initDB.go +++ b/internal/importer/initDB.go @@ -40,7 +40,7 @@ func InitDB() error { } tags := make(map[string]int64) - // Not using log.Print because we want the line to end with `\r` and + // Not using cclog.Print because we want the line to end with `\r` and // this function is only ever called when a special command line flag // is passed anyways. fmt.Printf("%d jobs inserted...\r", 0) diff --git a/internal/repository/job.go b/internal/repository/job.go index b6aa323..2cde824 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -337,10 +337,10 @@ func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, ta // theSql, args, theErr := theQuery.ToSql() // if theErr != nil { - // log.Warn("Error while converting query to sql") + // cclog.Warn("Error while converting query to sql") // return "", err // } - // log.Debugf("SQL query (FindColumnValue): `%s`, args: %#v", theSql, args) + // cclog.Debugf("SQL query (FindColumnValue): `%s`, args: %#v", theSql, args) err := theQuery.RunWith(r.stmtCache).QueryRow().Scan(&result) diff --git a/internal/repository/migrations/sqlite3/10_node-table.up.sql b/internal/repository/migrations/sqlite3/10_node-table.up.sql index c208b32..52e6a05 100644 --- a/internal/repository/migrations/sqlite3/10_node-table.up.sql +++ b/internal/repository/migrations/sqlite3/10_node-table.up.sql @@ -3,12 +3,12 @@ CREATE TABLE "node" ( hostname VARCHAR(255) NOT NULL, cluster VARCHAR(255) NOT NULL, subcluster VARCHAR(255) NOT NULL, - cpus_allocated INTEGER NOT NULL, - cpus_total INTEGER NOT NULL, - memory_allocated INTEGER NOT NULL, - memory_total INTEGER NOT NULL, - gpus_allocated INTEGER NOT NULL, - gpus_total INTEGER NOT NULL, + cpus_allocated INTEGER DEFAULT 0 NOT NULL, + cpus_total INTEGER DEFAULT 0 NOT NULL, + memory_allocated INTEGER DEFAULT 0 NOT NULL, + memory_total INTEGER DEFAULT 0 NOT NULL, + gpus_allocated INTEGER DEFAULT 0 NOT NULL, + gpus_total INTEGER DEFAULT 0 NOT NULL, node_state VARCHAR(255) NOT NULL CHECK (node_state IN ( 'allocated', 'reserved', 'idle', 'mixed', diff --git a/internal/repository/node.go b/internal/repository/node.go index 83bf062..b4d0181 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -49,6 +49,11 @@ func GetNodeRepository() *NodeRepository { return nodeRepoInstance } +var nodeColumns []string = []string{ + "node.id", "node.hostname", "node.cluster", "node.subcluster", + "node.node_state", "node.health_state", "node.meta_data", +} + func (r *NodeRepository) FetchMetadata(node *schema.Node) (map[string]string, error) { start := time.Now() cachekey := fmt.Sprintf("metadata:%d", node.ID) @@ -220,7 +225,7 @@ func (r *NodeRepository) QueryNodes( filters []*model.NodeFilter, order *model.OrderByInput, ) ([]*schema.Node, error) { - query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("node")) + query, qerr := AccessCheck(ctx, sq.Select(nodeColumns...).From("node")) if qerr != nil { return nil, qerr } @@ -232,6 +237,9 @@ func (r *NodeRepository) QueryNodes( if f.Cluster != nil { query = buildStringCondition("node.cluster", f.Cluster, query) } + if f.Subcluster != nil { + query = buildStringCondition("node.subcluster", f.Subcluster, query) + } if f.NodeState != nil { query = query.Where("node.node_state = ?", f.NodeState) } @@ -287,3 +295,117 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) { return nodeList, nil } + +func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStats, error) { + query, qerr := AccessCheck(ctx, sq.Select("node_state AS state", "count(*) AS count").From("node")) + if qerr != nil { + return nil, qerr + } + + for _, f := range filters { + if f.Hostname != nil { + query = buildStringCondition("node.hostname", f.Hostname, query) + } + if f.Cluster != nil { + query = buildStringCondition("node.cluster", f.Cluster, query) + } + if f.Subcluster != nil { + query = buildStringCondition("node.subcluster", f.Subcluster, query) + } + if f.NodeState != nil { + query = query.Where("node.node_state = ?", f.NodeState) + } + if f.HealthState != nil { + query = query.Where("node.health_state = ?", f.HealthState) + } + } + + rows, err := query.RunWith(r.stmtCache).Query() + if err != nil { + queryString, queryVars, _ := query.ToSql() + cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err) + return nil, err + } + + nodes := make([]*model.NodeStats, 0) + for rows.Next() { + node := model.NodeStats{} + + if err := rows.Scan(&node.State, &node.Count); err != nil { + rows.Close() + cclog.Warn("Error while scanning rows (NodeStats)") + return nil, err + } + nodes = append(nodes, &node) + } + + return nodes, nil +} + +func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStats, error) { + query, qerr := AccessCheck(ctx, sq.Select("health_state AS state", "count(*) AS count").From("node")) + if qerr != nil { + return nil, qerr + } + + for _, f := range filters { + if f.Hostname != nil { + query = buildStringCondition("node.hostname", f.Hostname, query) + } + if f.Cluster != nil { + query = buildStringCondition("node.cluster", f.Cluster, query) + } + if f.Subcluster != nil { + query = buildStringCondition("node.subcluster", f.Subcluster, query) + } + if f.NodeState != nil { + query = query.Where("node.node_state = ?", f.NodeState) + } + if f.HealthState != nil { + query = query.Where("node.health_state = ?", f.HealthState) + } + } + + rows, err := query.RunWith(r.stmtCache).Query() + if err != nil { + queryString, queryVars, _ := query.ToSql() + cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err) + return nil, err + } + + nodes := make([]*model.NodeStats, 0) + for rows.Next() { + node := model.NodeStats{} + + if err := rows.Scan(&node.State, &node.Count); err != nil { + rows.Close() + cclog.Warn("Error while scanning rows (NodeStats)") + return nil, err + } + nodes = append(nodes, &node) + } + + return nodes, nil +} + +func AccessCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) { + user := GetUserFromContext(ctx) + return AccessCheckWithUser(user, query) +} + +func AccessCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) { + if user == nil { + var qnil sq.SelectBuilder + return qnil, fmt.Errorf("user context is nil") + } + + switch { + // case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : Access NodeInfos + // return query, nil + case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : Access NodeInfos + return query, nil + default: // No known Role: No Access, return error + var qnil sq.SelectBuilder + return qnil, fmt.Errorf("user has no or unknown roles") + } +} From 57b43b7b60c9d6328beef505eecac3786c136b95 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 7 Jul 2025 18:44:24 +0200 Subject: [PATCH 02/40] Split status view into tabbed components --- web/frontend/src/Status.root.svelte | 687 +----------------- web/frontend/src/status/NodeDash.svelte | 127 ++++ web/frontend/src/status/StatisticsDash.svelte | 174 +++++ web/frontend/src/status/StatusDash.svelte | 246 +++++++ web/frontend/src/status/UsageDash.svelte | 319 ++++++++ 5 files changed, 902 insertions(+), 651 deletions(-) create mode 100644 web/frontend/src/status/NodeDash.svelte create mode 100644 web/frontend/src/status/StatisticsDash.svelte create mode 100644 web/frontend/src/status/StatusDash.svelte create mode 100644 web/frontend/src/status/UsageDash.svelte diff --git a/web/frontend/src/Status.root.svelte b/web/frontend/src/Status.root.svelte index 8cdd091..297e675 100644 --- a/web/frontend/src/Status.root.svelte +++ b/web/frontend/src/Status.root.svelte @@ -6,350 +6,38 @@ --> - - + +

Current utilization of cluster "{cluster}"

- - -
- + + {/if} @@ -377,334 +64,32 @@ {$mainQuery.error.message}
-{/if} +{/if} --> -
+ + + + + + + - + + + + + -{#if $initq.data && $mainQuery.data} - {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} - - - - - SubCluster "{subCluster.name}" - - - - - - - - - - - - - - - - - - -
Allocated Nodes
- -
{allocatedNodes[subCluster.name]} / {subCluster.numberOfNodes} - Nodes
Flop Rate (Any)
- -
- {scaleNumbers( - flopRate[subCluster.name], - subCluster.flopRateSimd.value * subCluster.numberOfNodes, - flopRateUnitPrefix[subCluster.name], - )}{flopRateUnitBase[subCluster.name]} [Max] -
MemBw Rate
- -
- {scaleNumbers( - memBwRate[subCluster.name], - subCluster.memoryBandwidth.value * subCluster.numberOfNodes, - memBwRateUnitPrefix[subCluster.name], - )}{memBwRateUnitBase[subCluster.name]} [Max] -
-
-
- - -
- {#key $mainQuery.data.nodeMetrics} - data.subCluster == subCluster.name, - ), - )} - /> - {/key} -
- -
- {/each} + + + + + -
- - - - - -
-

- Top Users on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} -

- {#key $topUserQuery.data} - {#if $topUserQuery.fetching} - - {:else if $topUserQuery.error} - {$topUserQuery.error.message} - {:else} - tu[topUserSelection.key], - )} - entities={$topUserQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} - /> - {/if} - {/key} -
- - - {#key $topUserQuery.data} - {#if $topUserQuery.fetching} - - {:else if $topUserQuery.error} - {$topUserQuery.error.message} - {:else} - - - - - - - {#each $topUserQuery.data.topUser as tu, i} - - - - {#if tu?.name} - {scrambleNames ? scramble(tu.name) : tu.name} - {/if} - - - {/each} -
LegendUser NameNumber of - -
{scrambleNames ? scramble(tu.id) : tu.id}{tu[topUserSelection.key]}
- {/if} - {/key} - - -

- Top Projects on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} -

- {#key $topProjectQuery.data} - {#if $topProjectQuery.fetching} - - {:else if $topProjectQuery.error} - {$topProjectQuery.error.message} - {:else} - tp[topProjectSelection.key], - )} - entities={$topProjectQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} - /> - {/if} - {/key} - - - {#key $topProjectQuery.data} - {#if $topProjectQuery.fetching} - - {:else if $topProjectQuery.error} - {$topProjectQuery.error.message} - {:else} - - - - - - - {#each $topProjectQuery.data.topProjects as tp, i} - - - - - - {/each} -
LegendProject CodeNumber of - -
{scrambleNames ? scramble(tp.id) : tp.id}{tp[topProjectSelection.key]}
- {/if} - {/key} - -
- -
- - - - - - {#key $mainQuery.data.stats} - - {/key} - - - {#key $mainQuery.data.stats} - - {/key} - - - - - {#key $mainQuery.data.stats} - - {/key} - - - {#key $mainQuery.data.stats} - - {/key} - - - -
- - - - {#if selectedHistograms} - - {#snippet gridContent(item)} - - {/snippet} - - {#key $mainQuery.data.stats[0].histMetrics} - - {/key} - {/if} -{/if} - - { - selectedHistograms = [...newSelection]; - }} -/> + + + + + +
+
\ No newline at end of file diff --git a/web/frontend/src/status/NodeDash.svelte b/web/frontend/src/status/NodeDash.svelte new file mode 100644 index 0000000..29a3cf8 --- /dev/null +++ b/web/frontend/src/status/NodeDash.svelte @@ -0,0 +1,127 @@ + + + + +{#if $initq.data && $nodeStatusQuery.data} + + + + {#key $nodeStatusQuery.data.jobsStatistics} + + {/key} + + + {#key $nodeStatusQuery.data.jobsStatistics} + + {/key} + + + + + {#key $nodeStatusQuery.data.jobsStatistics} + + {/key} + + + {#key $nodeStatusQuery.data.jobsStatistics} + + {/key} + + +{/if} + + diff --git a/web/frontend/src/status/StatisticsDash.svelte b/web/frontend/src/status/StatisticsDash.svelte new file mode 100644 index 0000000..d3b4236 --- /dev/null +++ b/web/frontend/src/status/StatisticsDash.svelte @@ -0,0 +1,174 @@ + + + + + + + + + + + + + {#if $initq.fetching || $metricStatusQuery.fetching} + + {:else if $initq.error} + {$initq.error.message} + {:else} + + {/if} + + +{#if $metricStatusQuery.error} + + + {$metricStatusQuery.error.message} + + +{/if} + +{#if $initq.data && $metricStatusQuery.data} + + {#if selectedHistograms} + + {#snippet gridContent(item)} + + {/snippet} + + {#key $metricStatusQuery.data.jobsStatistics[0].histMetrics} + + {/key} + {/if} +{/if} + + { + selectedHistograms = [...newSelection]; + }} +/> diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte new file mode 100644 index 0000000..57951a5 --- /dev/null +++ b/web/frontend/src/status/StatusDash.svelte @@ -0,0 +1,246 @@ + + + + + +{#if $initq.data && $statusQuery.data} + {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} + + + + + SubCluster "{subCluster.name}" + + + + + + + + + + + + + + + + + + +
Allocated Nodes
+ +
{allocatedNodes[subCluster.name]} / {subCluster.numberOfNodes} + Nodes
Flop Rate (Any)
+ +
+ {scaleNumbers( + flopRate[subCluster.name], + subCluster.flopRateSimd.value * subCluster.numberOfNodes, + flopRateUnitPrefix[subCluster.name], + )}{flopRateUnitBase[subCluster.name]} [Max] +
MemBw Rate
+ +
+ {scaleNumbers( + memBwRate[subCluster.name], + subCluster.memoryBandwidth.value * subCluster.numberOfNodes, + memBwRateUnitPrefix[subCluster.name], + )}{memBwRateUnitBase[subCluster.name]} [Max] +
+
+
+ + +
+ {#key $statusQuery.data.nodeMetrics} + data.subCluster == subCluster.name, + ), + )} + /> + {/key} +
+ +
+ {/each} +{/if} diff --git a/web/frontend/src/status/UsageDash.svelte b/web/frontend/src/status/UsageDash.svelte new file mode 100644 index 0000000..1cda6e8 --- /dev/null +++ b/web/frontend/src/status/UsageDash.svelte @@ -0,0 +1,319 @@ + + + + +{#if $initq.data} + + + +
+

+ Top Users on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} +

+ {#key $topUserQuery.data} + {#if $topUserQuery.fetching} + + {:else if $topUserQuery.error} + {$topUserQuery.error.message} + {:else} + tu[topUserSelection.key], + )} + entities={$topUserQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} + /> + {/if} + {/key} +
+ + + {#key $topUserQuery.data} + {#if $topUserQuery.fetching} + + {:else if $topUserQuery.error} + {$topUserQuery.error.message} + {:else} + + + + + + + {#each $topUserQuery.data.topUser as tu, i} + + + + {#if tu?.name} + {scrambleNames ? scramble(tu.name) : tu.name} + {/if} + + + {/each} +
LegendUser NameNumber of + +
{scrambleNames ? scramble(tu.id) : tu.id}{tu[topUserSelection.key]}
+ {/if} + {/key} + + +

+ Top Projects on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} +

+ {#key $topProjectQuery.data} + {#if $topProjectQuery.fetching} + + {:else if $topProjectQuery.error} + {$topProjectQuery.error.message} + {:else} + tp[topProjectSelection.key], + )} + entities={$topProjectQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} + /> + {/if} + {/key} + + + {#key $topProjectQuery.data} + {#if $topProjectQuery.fetching} + + {:else if $topProjectQuery.error} + {$topProjectQuery.error.message} + {:else} + + + + + + + {#each $topProjectQuery.data.topProjects as tp, i} + + + + + + {/each} +
LegendProject CodeNumber of + +
{scrambleNames ? scramble(tp.id) : tp.id}{tp[topProjectSelection.key]}
+ {/if} + {/key} + +
+{/if} From b036c3903c4274e15cd5b559fa0f3a6d5a730907 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 10 Jul 2025 14:57:12 +0200 Subject: [PATCH 03/40] add config fallbacks and notes --- api/schema.graphqls | 1 + web/frontend/src/generic/JobCompare.svelte | 2 +- web/frontend/src/generic/plots/Comparogram.svelte | 2 +- web/frontend/src/status/StatisticsDash.svelte | 2 +- web/frontend/src/status/StatusDash.svelte | 1 + 5 files changed, 5 insertions(+), 3 deletions(-) diff --git a/api/schema.graphqls b/api/schema.graphqls index 5ff1a36..a95df84 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -358,6 +358,7 @@ type Query { from: Time! to: Time! ): [NodeMetrics!]! + nodeMetricsList( cluster: String! subCluster: String! diff --git a/web/frontend/src/generic/JobCompare.svelte b/web/frontend/src/generic/JobCompare.svelte index a1e7bfa..55966ac 100644 --- a/web/frontend/src/generic/JobCompare.svelte +++ b/web/frontend/src/generic/JobCompare.svelte @@ -26,7 +26,7 @@ /* Svelte 5 Props */ let { matchedCompareJobs = $bindable(0), - metrics = ccconfig?.plot_list_selectedMetrics, + metrics = getContext("cc-config")?.plot_list_selectedMetrics, filterBuffer = [], } = $props(); diff --git a/web/frontend/src/generic/plots/Comparogram.svelte b/web/frontend/src/generic/plots/Comparogram.svelte index b6f5fd1..2051088 100644 --- a/web/frontend/src/generic/plots/Comparogram.svelte +++ b/web/frontend/src/generic/plots/Comparogram.svelte @@ -44,7 +44,7 @@ /* Const Init */ const clusterCockpitConfig = getContext("cc-config"); - const lineWidth = clusterCockpitConfig.plot_general_lineWidth / window.devicePixelRatio; + const lineWidth = clusterCockpitConfig?.plot_general_lineWidth / window.devicePixelRatio || 2; const cbmode = clusterCockpitConfig?.plot_general_colorblindMode || false; // UPLOT SERIES INIT // diff --git a/web/frontend/src/status/StatisticsDash.svelte b/web/frontend/src/status/StatisticsDash.svelte index d3b4236..e573554 100644 --- a/web/frontend/src/status/StatisticsDash.svelte +++ b/web/frontend/src/status/StatisticsDash.svelte @@ -79,7 +79,7 @@ })); /* Functions */ - // TODO: Originally Uses User View Selection! -> Change to Status View + // TODO: Originally Uses User View Selection! -> Change to Status View : Adapt Mutations from TopUserSelect // function updateTopUserConfiguration(select) { // if (ccconfig[`status_view_selectedHistograms:${cluster}`] != select) { // updateConfigurationMutation({ diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index 57951a5..f98c1c3 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -54,6 +54,7 @@ /* Derived */ // Note: nodeMetrics are requested on configured $timestep resolution + // Result: The latest 5 minutes (datapoints) for each node independent of job const statusQuery = $derived(queryStore({ client: client, query: gql` From ed5290be86127d905b039971a8ecba5b808e2727 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 14 Jul 2025 18:12:34 +0200 Subject: [PATCH 04/40] adds new roofline component for job average based data - clickable, resource sized and duration colored bubbles --- web/frontend/src/Status.root.svelte | 9 +- .../generic/plots/NewBubbleRoofline.svelte | 739 ++++++++++++++++++ web/frontend/src/status/DevelDash.svelte | 164 ++++ 3 files changed, 911 insertions(+), 1 deletion(-) create mode 100644 web/frontend/src/generic/plots/NewBubbleRoofline.svelte create mode 100644 web/frontend/src/status/DevelDash.svelte diff --git a/web/frontend/src/Status.root.svelte b/web/frontend/src/Status.root.svelte index 297e675..ee8cde7 100644 --- a/web/frontend/src/Status.root.svelte +++ b/web/frontend/src/Status.root.svelte @@ -20,6 +20,7 @@ import UsageDash from "./status/UsageDash.svelte"; import NodeDash from "./status/NodeDash.svelte"; import StatisticsDash from "./status/StatisticsDash.svelte"; + import DevelDash from "./status/DevelDash.svelte"; /* Svelte 5 Props */ let { @@ -68,7 +69,13 @@ - + + + + + + + diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte new file mode 100644 index 0000000..1c89433 --- /dev/null +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -0,0 +1,739 @@ + + + +{#if roofData != null} +
+{:else} + Cannot render roofline: No data! +{/if} \ No newline at end of file diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte new file mode 100644 index 0000000..ea00803 --- /dev/null +++ b/web/frontend/src/status/DevelDash.svelte @@ -0,0 +1,164 @@ + + + + + +{#if $initq.data && $jobRoofQuery.data} + {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} + + + Classic +
+ {#key $jobRoofQuery.data.jobsMetricStats} + {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( + (data) => data.subCluster == subCluster.name, + ).length} Jobs + data.subCluster == subCluster.name, + ) + )} + /> + {/key} +
+ + + Bubble +
+ {#key $jobRoofQuery.data.jobsMetricStats} + {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( + (data) => data.subCluster == subCluster.name, + ).length} Jobs + data.subCluster == subCluster.name, + ) + )} + jobsData={transformJobsStatsToInfo($jobRoofQuery?.data?.jobsMetricStats.filter( + (data) => data.subCluster == subCluster.name, + ) + )} + /> + {/key} +
+ +
+ {/each} +{/if} From 096217eea694b09968f650a23a26038a7d29ba71 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 15 Jul 2025 16:00:55 +0200 Subject: [PATCH 05/40] cleanup bubbleRoofline code, comment optional code parts --- .../generic/plots/NewBubbleRoofline.svelte | 182 ++++++++---------- 1 file changed, 82 insertions(+), 100 deletions(-) diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte index 1c89433..857b49b 100644 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -37,8 +37,6 @@ height = 380, } = $props(); - $inspect(jobsData) - /* Const Init */ const lineWidth = clusterCockpitConfig?.plot_general_lineWidth || 2; const cbmode = clusterCockpitConfig?.plot_general_colorblindMode || false; @@ -58,6 +56,7 @@ // Copied Example Vars for Uplot Bubble // https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D/isPointInPath let qt; + let hRect; let pxRatio; function setPxRatio() { pxRatio = uPlot.pxRatio; @@ -66,7 +65,7 @@ window.addEventListener('dppxchange', setPxRatio); // let minSize = 6; let maxSize = 60; - let maxArea = Math.PI * (maxSize / 2) ** 2; + // let maxArea = Math.PI * (maxSize / 2) ** 2; // let minArea = Math.PI * (minSize / 2) ** 2; /* Functions */ @@ -94,7 +93,7 @@ return Math.floor(x * 255.0); } function getRGB(c, makeTransparent = false) { - if (makeTransparent) return `rgb(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.33)`; + if (makeTransparent) return `rgba(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.33)`; else return `rgb(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)})`; } function nearestThousand(num) { @@ -110,28 +109,28 @@ } // quadratic scaling (px area) - function getSize(value, minValue, maxValue) { - let pct = value / maxValue; - // clamp to min area - //let area = Math.max(maxArea * pct, minArea); - let area = maxArea * pct; - return Math.sqrt(area / Math.PI) * 2; - } + // function getSize(value, minValue, maxValue) { + // let pct = value / maxValue; + // // clamp to min area + // //let area = Math.max(maxArea * pct, minArea); + // let area = maxArea * pct; + // return Math.sqrt(area / Math.PI) * 2; + // } - function getSizeMinMax(u) { - let minValue = Infinity; - let maxValue = -Infinity; - for (let i = 1; i < u.series.length; i++) { - let sizeData = u.data[i][2]; - for (let j = 0; j < sizeData.length; j++) { - minValue = Math.min(minValue, sizeData[j]); - maxValue = Math.max(maxValue, sizeData[j]); - } - } - return [minValue, maxValue]; - } + // function getSizeMinMax(u) { + // let minValue = Infinity; + // let maxValue = -Infinity; + // for (let i = 1; i < u.series.length; i++) { + // let sizeData = u.data[i][2]; + // for (let j = 0; j < sizeData.length; j++) { + // minValue = Math.min(minValue, sizeData[j]); + // maxValue = Math.max(maxValue, sizeData[j]); + // } + // } + // return [minValue, maxValue]; + // } - // Quadtree Object (How to import?) + // Quadtree Object (TODO: Split and Import) class Quadtree { constructor (x, y, w, h, l) { let t = this; @@ -239,43 +238,24 @@ } } - // Dot Renderers + // Dot Renderer const makeDrawPoints = (opts) => { - let {/*size,*/ disp, each = () => {}} = opts; + let {/*size, disp,*/ each = () => {}} = opts; const sizeBase = 5 * pxRatio; return (u, seriesIdx, idx0, idx1) => { uPlot.orient(u, seriesIdx, (series, dataX, dataY, scaleX, scaleY, valToPosX, valToPosY, xOff, yOff, xDim, yDim, moveTo, lineTo, rect, arc) => { let d = u.data[seriesIdx]; - let strokeWidth = 2; - - u.ctx.save(); - - u.ctx.rect(u.bbox.left, u.bbox.top, u.bbox.width, u.bbox.height); - u.ctx.clip(); - - // u.ctx.fillStyle = series.fill(); - // u.ctx.strokeStyle = series.stroke(); - u.ctx.lineWidth = strokeWidth; - let deg360 = 2 * Math.PI; - - // console.time("points"); - - // let cir = new Path2D(); - // cir.moveTo(0, 0); - // arc(cir, 0, 0, 3, 0, deg360); - - // Create transformation matrix that moves 200 points to the right - // let m = document.createElementNS('http://www.w3.org/2000/svg', 'svg').createSVGMatrix(); - // m.a = 1; m.b = 0; - // m.c = 0; m.d = 1; - // m.e = 200; m.f = 0; - - // compute bubble dims + /* Alt.: Sizes based on other Data Rows */ // let sizes = disp.size.values(u, seriesIdx, idx0, idx1); + u.ctx.save(); + u.ctx.rect(u.bbox.left, u.bbox.top, u.bbox.width, u.bbox.height); + u.ctx.clip(); + u.ctx.lineWidth = strokeWidth; + // todo: this depends on direction & orientation // todo: calc once per redraw, not per path let filtLft = u.posToVal(-maxSize / 2, scaleX.key); @@ -284,15 +264,14 @@ let filtTop = u.posToVal(-maxSize / 2, scaleY.key); for (let i = 0; i < d[0].length; i++) { - // Import from Roofline + // Color based on Duration u.ctx.strokeStyle = getRGB(u.data[2][i]); u.ctx.fillStyle = getRGB(u.data[2][i], true); - // End - + // Get Values let xVal = d[0][i]; let yVal = d[1][i]; - const size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes); - // let size = sizes[i] * pxRatio; + // Calc Size; Alt.: size = sizes[i] * pxRatio + const size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes); // In NodeMode: Scale with Number of Jobs? if (xVal >= filtLft && xVal <= filtRgt && yVal >= filtBtm && yVal <= filtTop) { let cx = valToPosX(xVal, scaleX, xDim, xOff); @@ -312,28 +291,25 @@ ); } } - - // console.timeEnd("points"); - u.ctx.restore(); }); - return null; }; }; let drawPoints = makeDrawPoints({ - disp: { - size: { - unit: 3, // raw CSS pixels - // discr: true, - values: (u, seriesIdx, idx0, idx1) => { - // TODO: only run once per setData() call - let [minValue, maxValue] = getSizeMinMax(u); - return u.data[seriesIdx][2].map(v => getSize(v, minValue, maxValue)); - }, - }, - }, + // disp: { + // size: { + // // unit: 3, // raw CSS pixels + // // discr: true, + // values: (u, seriesIdx, idx0, idx1) => { + // /* Func to get sizes from additional subSeries [series][2...x] ([0,1] is [x,y]) */ + // // TODO: only run once per setData() call + // let [minValue, maxValue] = getSizeMinMax(u); + // return u.data[seriesIdx][2].map(v => getSize(v, minValue, maxValue)); + // }, + // }, + // }, each: (u, seriesIdx, dataIdx, lft, top, wid, hgt) => { // we get back raw canvas coords (included axes & padding). translate to the plotting area origin lft -= u.bbox.left; @@ -470,19 +446,17 @@ }, 200); } - let hRect; function render(roofdata, jobsData) { if (roofdata) { const opts = { - title: "Job Average Roofline Diagram (Bubble)", + title: "Job Average Roofline Diagram", mode: 2, width: width, height: height, legend: { - // show: true, + show: true, }, cursor: { - drag: { x: true, y: false }, // Activate zoom dataIdx: (u, seriesIdx) => { if (seriesIdx == 1) { hRect = null; @@ -521,22 +495,27 @@ } }); } - return hRect && seriesIdx == hRect.sidx ? hRect.didx : null; }, - points: { - size: (u, seriesIdx) => { - return hRect && seriesIdx == hRect.sidx ? hRect.w / pxRatio : 0; - } - }, - focus: { - prox: 1e3, - alpha: 0.3, - dist: (u, seriesIdx) => { - let prox = (hRect?.sidx === seriesIdx ? 0 : Infinity); - return prox; - }, - } + // /* Render "Fill" on Data Point Hover: Works in Example Bubble, does not work here? */ + // points: { + // size: (u, seriesIdx) => { + // return hRect && seriesIdx == hRect.sidx ? hRect.w / pxRatio : 0; + // } + // }, + /* Make all non-focused series semi-transparent: Useless unless more than one series rendered */ + // focus: { + // prox: 1e3, + // alpha: 0.3, + // dist: (u, seriesIdx) => { + // let prox = (hRect?.sidx === seriesIdx ? 0 : Infinity); + // return prox; + // }, + // }, + drag: { // Activates Zoom + x: true, + y: false + }, }, axes: [ { @@ -569,16 +548,17 @@ series: [ null, { - facets: [ - { - scale: 'x', - auto: true, - }, - { - scale: 'y', - auto: true, - } - ], + /* Facets: Define Purpose of Sub-Arrays in Series-Array, e.g. x, y, size, label, color, ... */ + // facets: [ + // { + // scale: 'x', + // auto: true, + // }, + // { + // scale: 'y', + // auto: true, + // } + // ], paths: drawPoints, values: legendValues } @@ -591,8 +571,10 @@ qt = qt || new Quadtree(0, 0, u.bbox.width, u.bbox.height); qt.clear(); + // force-clear the path cache to cause drawBars() to rebuild new quadtree u.series.forEach((s, i) => { - if (i > 0) s._paths = null; + if (i > 0) + s._paths = null; }); }, ], From e48ff8be7324e743a3f1aaec8d9dc28b741db7c1 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 15 Jul 2025 16:36:12 +0200 Subject: [PATCH 06/40] change bubble render parameters - Note: data points are hover highlighted by tooltip --- web/frontend/src/generic/plots/NewBubbleRoofline.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte index 857b49b..b693a92 100644 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -93,7 +93,7 @@ return Math.floor(x * 255.0); } function getRGB(c, makeTransparent = false) { - if (makeTransparent) return `rgba(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.33)`; + if (makeTransparent) return `rgba(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.5)`; else return `rgb(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)})`; } function nearestThousand(num) { @@ -246,7 +246,7 @@ return (u, seriesIdx, idx0, idx1) => { uPlot.orient(u, seriesIdx, (series, dataX, dataY, scaleX, scaleY, valToPosX, valToPosY, xOff, yOff, xDim, yDim, moveTo, lineTo, rect, arc) => { let d = u.data[seriesIdx]; - let strokeWidth = 2; + let strokeWidth = 1; let deg360 = 2 * Math.PI; /* Alt.: Sizes based on other Data Rows */ // let sizes = disp.size.values(u, seriesIdx, idx0, idx1); From 5cdb80b4d606465aac35eb27a398138ecb4baaaa Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 15 Jul 2025 18:49:23 +0200 Subject: [PATCH 07/40] cleanup intends, add transparency switch to path renderer --- .../generic/plots/NewBubbleRoofline.svelte | 159 +++++++++--------- 1 file changed, 80 insertions(+), 79 deletions(-) diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte index b693a92..4415791 100644 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -62,7 +62,7 @@ pxRatio = uPlot.pxRatio; } setPxRatio(); - window.addEventListener('dppxchange', setPxRatio); + window.addEventListener('dppxchange', setPxRatio); // let minSize = 6; let maxSize = 60; // let maxArea = Math.PI * (maxSize / 2) ** 2; @@ -92,8 +92,8 @@ x = 1.0 - (x - 0.25) * 4.0; return Math.floor(x * 255.0); } - function getRGB(c, makeTransparent = false) { - if (makeTransparent) return `rgba(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.5)`; + function getRGB(c, transparent = false) { + if (transparent) return `rgba(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)}, 0.5)`; else return `rgb(${cbmode ? '0' : getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)})`; } function nearestThousand(num) { @@ -240,7 +240,7 @@ // Dot Renderer const makeDrawPoints = (opts) => { - let {/*size, disp,*/ each = () => {}} = opts; + let {/*size, disp,*/ transparentFill, each = () => {}} = opts; const sizeBase = 5 * pxRatio; return (u, seriesIdx, idx0, idx1) => { @@ -264,9 +264,10 @@ let filtTop = u.posToVal(-maxSize / 2, scaleY.key); for (let i = 0; i < d[0].length; i++) { - // Color based on Duration + // Color based on Duration, check index for transparency highlighting u.ctx.strokeStyle = getRGB(u.data[2][i]); - u.ctx.fillStyle = getRGB(u.data[2][i], true); + u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill); + // Get Values let xVal = d[0][i]; let yVal = d[1][i]; @@ -310,6 +311,7 @@ // }, // }, // }, + transparentFill: true, each: (u, seriesIdx, dataIdx, lft, top, wid, hgt) => { // we get back raw canvas coords (included axes & padding). translate to the plotting area origin lft -= u.bbox.left; @@ -454,64 +456,64 @@ width: width, height: height, legend: { - show: true, + show: true, }, cursor: { dataIdx: (u, seriesIdx) => { - if (seriesIdx == 1) { - hRect = null; + if (seriesIdx == 1) { + hRect = null; - let dist = Infinity; - let area = Infinity; - let cx = u.cursor.left * pxRatio; - let cy = u.cursor.top * pxRatio; + let dist = Infinity; + let area = Infinity; + let cx = u.cursor.left * pxRatio; + let cy = u.cursor.top * pxRatio; - qt.get(cx, cy, 1, 1, o => { - if (pointWithin(cx, cy, o.x, o.y, o.x + o.w, o.y + o.h)) { - let ocx = o.x + o.w / 2; - let ocy = o.y + o.h / 2; + qt.get(cx, cy, 1, 1, o => { + if (pointWithin(cx, cy, o.x, o.y, o.x + o.w, o.y + o.h)) { + let ocx = o.x + o.w / 2; + let ocy = o.y + o.h / 2; - let dx = ocx - cx; - let dy = ocy - cy; + let dx = ocx - cx; + let dy = ocy - cy; - let d = Math.sqrt(dx ** 2 + dy ** 2); + let d = Math.sqrt(dx ** 2 + dy ** 2); - // test against radius for actual hover - if (d <= o.w / 2) { - let a = o.w * o.h; + // test against radius for actual hover + if (d <= o.w / 2) { + let a = o.w * o.h; - // prefer smallest - if (a < area) { - area = a; - dist = d; - hRect = o; - } - // only hover bbox with closest distance - else if (a == area && d <= dist) { - dist = d; - hRect = o; - } - } - } - }); - } - return hRect && seriesIdx == hRect.sidx ? hRect.didx : null; - }, - // /* Render "Fill" on Data Point Hover: Works in Example Bubble, does not work here? */ - // points: { - // size: (u, seriesIdx) => { - // return hRect && seriesIdx == hRect.sidx ? hRect.w / pxRatio : 0; - // } - // }, + // prefer smallest + if (a < area) { + area = a; + dist = d; + hRect = o; + } + // only hover bbox with closest distance + else if (a == area && d <= dist) { + dist = d; + hRect = o; + } + } + } + }); + } + return hRect && seriesIdx == hRect.sidx ? hRect.didx : null; + }, + /* Render "Fill" on Data Point Hover: Works in Example Bubble, does not work here? Guess: Interference with tooltip */ + // points: { + // size: (u, seriesIdx) => { + // return hRect && seriesIdx == hRect.sidx ? hRect.w / pxRatio : 0; + // } + // }, /* Make all non-focused series semi-transparent: Useless unless more than one series rendered */ - // focus: { - // prox: 1e3, - // alpha: 0.3, - // dist: (u, seriesIdx) => { - // let prox = (hRect?.sidx === seriesIdx ? 0 : Infinity); - // return prox; - // }, - // }, + // focus: { + // prox: 1e3, + // alpha: 0.3, + // dist: (u, seriesIdx) => { + // let prox = (hRect?.sidx === seriesIdx ? 0 : Infinity); + // return prox; + // }, + // }, drag: { // Activates Zoom x: true, y: false @@ -550,26 +552,26 @@ { /* Facets: Define Purpose of Sub-Arrays in Series-Array, e.g. x, y, size, label, color, ... */ // facets: [ - // { - // scale: 'x', - // auto: true, - // }, - // { - // scale: 'y', - // auto: true, - // } - // ], + // { + // scale: 'x', + // auto: true, + // }, + // { + // scale: 'y', + // auto: true, + // } + // ], paths: drawPoints, values: legendValues } ], hooks: { // setSeries: [ (u, seriesIdx) => console.log('setSeries', seriesIdx) ], - // setLegend: [ u => console.log('setLegend', u.legend.idxs) ], + // setLegend: [ u => console.log('setLegend', u.legend.idxs) ], drawClear: [ (u) => { qt = qt || new Quadtree(0, 0, u.bbox.width, u.bbox.height); - qt.clear(); + qt.clear(); // force-clear the path cache to cause drawBars() to rebuild new quadtree u.series.forEach((s, i) => { @@ -675,11 +677,11 @@ u.ctx.fillText('Short', posX, posY) const start = posX + 10 for (let x = start; x < posXLimit; x += 10) { - let c = (x - start) / (posXLimit - start) - u.ctx.fillStyle = getRGB(c) - u.ctx.beginPath() - u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() + let c = (x - start) / (posXLimit - start) + u.ctx.fillStyle = getRGB(c) + u.ctx.beginPath() + u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() } u.ctx.fillStyle = 'black' u.ctx.fillText('Long', posXLimit + 23, posY) @@ -687,13 +689,13 @@ ], }, plugins: [ - tooltipPlugin({ - onclick(u, dataIdx) { - window.open(`/monitoring/job/${jobsData[dataIdx].id}`); - }, + tooltipPlugin({ + onclick(u, dataIdx) { + window.open(`/monitoring/job/${jobsData[dataIdx].id}`); + }, getJobData: (u, dataIdx) => { return jobsData[dataIdx] } - }), - ], + }), + ], }; uplot = new uPlot(opts, roofdata, plotWrapper); } else { @@ -716,6 +718,5 @@ {#if roofData != null}
{:else} - Cannot render roofline: No data! -{/if} \ No newline at end of file + Cannot render roofline: No data! +{/if} From 697acd1d8867a0b523864d9a8a42b746b297bbf7 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 18 Jul 2025 18:12:07 +0200 Subject: [PATCH 08/40] Extend bubbleRoofline for nodeData, add column to node table, rename nodeStats query --- api/schema.graphqls | 7 +- internal/graph/generated/generated.go | 214 +++++++++++++----- internal/graph/model/models_gen.go | 2 +- internal/graph/schema.resolvers.go | 22 +- .../migrations/sqlite3/10_node-table.up.sql | 1 + internal/repository/node.go | 23 +- .../generic/plots/NewBubbleRoofline.svelte | 180 ++++++++++++--- web/frontend/src/status/DevelDash.svelte | 197 +++++++++++++++- 8 files changed, 518 insertions(+), 128 deletions(-) diff --git a/api/schema.graphqls b/api/schema.graphqls index a95df84..b3dadb5 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -12,12 +12,13 @@ type Node { hostname: String! cluster: String! subCluster: String! + runningJobs: Int! nodeState: NodeState! - HealthState: MonitoringState! + healthState: MonitoringState! metaData: Any } -type NodeStats { +type NodeStates { state: String! count: Int! } @@ -303,7 +304,7 @@ type Query { ## Node Queries New node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! - nodeStats(filter: [NodeFilter!]): [NodeStats!]! + nodeStates(filter: [NodeFilter!]): [NodeStates!]! job(id: ID!): Job jobMetrics( diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index b150423..a725802 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -276,6 +276,7 @@ type ComplexityRoot struct { ID func(childComplexity int) int MetaData func(childComplexity int) int NodeState func(childComplexity int) int + RunningJobs func(childComplexity int) int SubCluster func(childComplexity int) int } @@ -290,7 +291,7 @@ type ComplexityRoot struct { Items func(childComplexity int) int } - NodeStats struct { + NodeStates struct { Count func(childComplexity int) int State func(childComplexity int) int } @@ -318,7 +319,7 @@ type ComplexityRoot struct { Node func(childComplexity int, id string) int NodeMetrics func(childComplexity int, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int NodeMetricsList func(childComplexity int, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) int - NodeStats func(childComplexity int, filter []*model.NodeFilter) int + NodeStates func(childComplexity int, filter []*model.NodeFilter) int Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int ScopedJobStats func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int @@ -444,6 +445,7 @@ type MutationResolver interface { UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) } type NodeResolver interface { + RunningJobs(ctx context.Context, obj *schema.Node) (int, error) NodeState(ctx context.Context, obj *schema.Node) (string, error) HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error) MetaData(ctx context.Context, obj *schema.Node) (any, error) @@ -456,7 +458,7 @@ type QueryResolver interface { AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) Node(ctx context.Context, id string) (*schema.Node, error) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) - NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error) + NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) Job(ctx context.Context, id string) (*schema.Job, error) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error) @@ -1474,7 +1476,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.Node.Cluster(childComplexity), true - case "Node.HealthState": + case "Node.healthState": if e.complexity.Node.HealthState == nil { break } @@ -1509,6 +1511,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.Node.NodeState(childComplexity), true + case "Node.runningJobs": + if e.complexity.Node.RunningJobs == nil { + break + } + + return e.complexity.Node.RunningJobs(childComplexity), true + case "Node.subCluster": if e.complexity.Node.SubCluster == nil { break @@ -1551,19 +1560,19 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.NodeStateResultList.Items(childComplexity), true - case "NodeStats.count": - if e.complexity.NodeStats.Count == nil { + case "NodeStates.count": + if e.complexity.NodeStates.Count == nil { break } - return e.complexity.NodeStats.Count(childComplexity), true + return e.complexity.NodeStates.Count(childComplexity), true - case "NodeStats.state": - if e.complexity.NodeStats.State == nil { + case "NodeStates.state": + if e.complexity.NodeStates.State == nil { break } - return e.complexity.NodeStats.State(childComplexity), true + return e.complexity.NodeStates.State(childComplexity), true case "NodesResultList.count": if e.complexity.NodesResultList.Count == nil { @@ -1753,17 +1762,17 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int)), true - case "Query.nodeStats": - if e.complexity.Query.NodeStats == nil { + case "Query.nodeStates": + if e.complexity.Query.NodeStates == nil { break } - args, err := ec.field_Query_nodeStats_args(context.TODO(), rawArgs) + args, err := ec.field_Query_nodeStates_args(context.TODO(), rawArgs) if err != nil { return 0, false } - return e.complexity.Query.NodeStats(childComplexity, args["filter"].([]*model.NodeFilter)), true + return e.complexity.Query.NodeStates(childComplexity, args["filter"].([]*model.NodeFilter)), true case "Query.nodes": if e.complexity.Query.Nodes == nil { @@ -2333,12 +2342,13 @@ type Node { hostname: String! cluster: String! subCluster: String! + runningJobs: Int! nodeState: NodeState! - HealthState: MonitoringState! + healthState: MonitoringState! metaData: Any } -type NodeStats { +type NodeStates { state: String! count: Int! } @@ -2621,9 +2631,10 @@ type Query { user(username: String!): User allocatedNodes(cluster: String!): [Count!]! + ## Node Queries New node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! - nodeStats(filter: [NodeFilter!]): [NodeStats!]! + nodeStates(filter: [NodeFilter!]): [NodeStates!]! job(id: ID!): Job jobMetrics( @@ -2678,6 +2689,7 @@ type Query { from: Time! to: Time! ): [NodeMetrics!]! + nodeMetricsList( cluster: String! subCluster: String! @@ -4062,17 +4074,17 @@ func (ec *executionContext) field_Query_nodeMetrics_argsTo( return zeroVal, nil } -func (ec *executionContext) field_Query_nodeStats_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { +func (ec *executionContext) field_Query_nodeStates_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_nodeStats_argsFilter(ctx, rawArgs) + arg0, err := ec.field_Query_nodeStates_argsFilter(ctx, rawArgs) if err != nil { return nil, err } args["filter"] = arg0 return args, nil } -func (ec *executionContext) field_Query_nodeStats_argsFilter( +func (ec *executionContext) field_Query_nodeStates_argsFilter( ctx context.Context, rawArgs map[string]any, ) ([]*model.NodeFilter, error) { @@ -10985,6 +10997,50 @@ func (ec *executionContext) fieldContext_Node_subCluster(_ context.Context, fiel return fc, nil } +func (ec *executionContext) _Node_runningJobs(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_runningJobs(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return ec.resolvers.Node().RunningJobs(rctx, obj) + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(int) + fc.Result = res + return ec.marshalNInt2int(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Node_runningJobs(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Node", + Field: field, + IsMethod: true, + IsResolver: true, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Int does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _Node_nodeState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { fc, err := ec.fieldContext_Node_nodeState(ctx, field) if err != nil { @@ -11029,8 +11085,8 @@ func (ec *executionContext) fieldContext_Node_nodeState(_ context.Context, field return fc, nil } -func (ec *executionContext) _Node_HealthState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_Node_HealthState(ctx, field) +func (ec *executionContext) _Node_healthState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_healthState(ctx, field) if err != nil { return graphql.Null } @@ -11060,7 +11116,7 @@ func (ec *executionContext) _Node_HealthState(ctx context.Context, field graphql return ec.marshalNMonitoringState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐNodeState(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_Node_HealthState(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_Node_healthState(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "Node", Field: field, @@ -11301,10 +11357,12 @@ func (ec *executionContext) fieldContext_NodeStateResultList_items(_ context.Con return ec.fieldContext_Node_cluster(ctx, field) case "subCluster": return ec.fieldContext_Node_subCluster(ctx, field) + case "runningJobs": + return ec.fieldContext_Node_runningJobs(ctx, field) case "nodeState": return ec.fieldContext_Node_nodeState(ctx, field) - case "HealthState": - return ec.fieldContext_Node_HealthState(ctx, field) + case "healthState": + return ec.fieldContext_Node_healthState(ctx, field) case "metaData": return ec.fieldContext_Node_metaData(ctx, field) } @@ -11355,8 +11413,8 @@ func (ec *executionContext) fieldContext_NodeStateResultList_count(_ context.Con return fc, nil } -func (ec *executionContext) _NodeStats_state(ctx context.Context, field graphql.CollectedField, obj *model.NodeStats) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_NodeStats_state(ctx, field) +func (ec *executionContext) _NodeStates_state(ctx context.Context, field graphql.CollectedField, obj *model.NodeStates) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeStates_state(ctx, field) if err != nil { return graphql.Null } @@ -11386,9 +11444,9 @@ func (ec *executionContext) _NodeStats_state(ctx context.Context, field graphql. return ec.marshalNString2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_NodeStats_state(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_NodeStates_state(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ - Object: "NodeStats", + Object: "NodeStates", Field: field, IsMethod: false, IsResolver: false, @@ -11399,8 +11457,8 @@ func (ec *executionContext) fieldContext_NodeStats_state(_ context.Context, fiel return fc, nil } -func (ec *executionContext) _NodeStats_count(ctx context.Context, field graphql.CollectedField, obj *model.NodeStats) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_NodeStats_count(ctx, field) +func (ec *executionContext) _NodeStates_count(ctx context.Context, field graphql.CollectedField, obj *model.NodeStates) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeStates_count(ctx, field) if err != nil { return graphql.Null } @@ -11430,9 +11488,9 @@ func (ec *executionContext) _NodeStats_count(ctx context.Context, field graphql. return ec.marshalNInt2int(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_NodeStats_count(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_NodeStates_count(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ - Object: "NodeStats", + Object: "NodeStates", Field: field, IsMethod: false, IsResolver: false, @@ -12027,10 +12085,12 @@ func (ec *executionContext) fieldContext_Query_node(ctx context.Context, field g return ec.fieldContext_Node_cluster(ctx, field) case "subCluster": return ec.fieldContext_Node_subCluster(ctx, field) + case "runningJobs": + return ec.fieldContext_Node_runningJobs(ctx, field) case "nodeState": return ec.fieldContext_Node_nodeState(ctx, field) - case "HealthState": - return ec.fieldContext_Node_HealthState(ctx, field) + case "healthState": + return ec.fieldContext_Node_healthState(ctx, field) case "metaData": return ec.fieldContext_Node_metaData(ctx, field) } @@ -12112,8 +12172,8 @@ func (ec *executionContext) fieldContext_Query_nodes(ctx context.Context, field return fc, nil } -func (ec *executionContext) _Query_nodeStats(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_Query_nodeStats(ctx, field) +func (ec *executionContext) _Query_nodeStates(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Query_nodeStates(ctx, field) if err != nil { return graphql.Null } @@ -12126,7 +12186,7 @@ func (ec *executionContext) _Query_nodeStats(ctx context.Context, field graphql. }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { ctx = rctx // use context from middleware stack in children - return ec.resolvers.Query().NodeStats(rctx, fc.Args["filter"].([]*model.NodeFilter)) + return ec.resolvers.Query().NodeStates(rctx, fc.Args["filter"].([]*model.NodeFilter)) }) if err != nil { ec.Error(ctx, err) @@ -12138,12 +12198,12 @@ func (ec *executionContext) _Query_nodeStats(ctx context.Context, field graphql. } return graphql.Null } - res := resTmp.([]*model.NodeStats) + res := resTmp.([]*model.NodeStates) fc.Result = res - return ec.marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatsᚄ(ctx, field.Selections, res) + return ec.marshalNNodeStates2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesᚄ(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_Query_nodeStats(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_Query_nodeStates(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "Query", Field: field, @@ -12152,11 +12212,11 @@ func (ec *executionContext) fieldContext_Query_nodeStats(ctx context.Context, fi Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { switch field.Name { case "state": - return ec.fieldContext_NodeStats_state(ctx, field) + return ec.fieldContext_NodeStates_state(ctx, field) case "count": - return ec.fieldContext_NodeStats_count(ctx, field) + return ec.fieldContext_NodeStates_count(ctx, field) } - return nil, fmt.Errorf("no field named %q was found under type NodeStats", field.Name) + return nil, fmt.Errorf("no field named %q was found under type NodeStates", field.Name) }, } defer func() { @@ -12166,7 +12226,7 @@ func (ec *executionContext) fieldContext_Query_nodeStats(ctx context.Context, fi } }() ctx = graphql.WithFieldContext(ctx, fc) - if fc.Args, err = ec.field_Query_nodeStats_args(ctx, field.ArgumentMap(ec.Variables)); err != nil { + if fc.Args, err = ec.field_Query_nodeStates_args(ctx, field.ArgumentMap(ec.Variables)); err != nil { ec.Error(ctx, err) return fc, err } @@ -19829,6 +19889,42 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj if out.Values[i] == graphql.Null { atomic.AddUint32(&out.Invalids, 1) } + case "runningJobs": + field := field + + innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + } + }() + res = ec._Node_runningJobs(ctx, field, obj) + if res == graphql.Null { + atomic.AddUint32(&fs.Invalids, 1) + } + return res + } + + if field.Deferrable != nil { + dfs, ok := deferred[field.Deferrable.Label] + di := 0 + if ok { + dfs.AddField(field) + di = len(dfs.Values) - 1 + } else { + dfs = graphql.NewFieldSet([]graphql.CollectedField{field}) + deferred[field.Deferrable.Label] = dfs + } + dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler { + return innerFunc(ctx, dfs) + }) + + // don't run the out.Concurrently() call below + out.Values[i] = graphql.Null + continue + } + + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) case "nodeState": field := field @@ -19865,7 +19961,7 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj } out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) - case "HealthState": + case "healthState": field := field innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { @@ -19874,7 +19970,7 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj ec.Error(ctx, ec.Recover(ctx, r)) } }() - res = ec._Node_HealthState(ctx, field, obj) + res = ec._Node_healthState(ctx, field, obj) if res == graphql.Null { atomic.AddUint32(&fs.Invalids, 1) } @@ -20047,24 +20143,24 @@ func (ec *executionContext) _NodeStateResultList(ctx context.Context, sel ast.Se return out } -var nodeStatsImplementors = []string{"NodeStats"} +var nodeStatesImplementors = []string{"NodeStates"} -func (ec *executionContext) _NodeStats(ctx context.Context, sel ast.SelectionSet, obj *model.NodeStats) graphql.Marshaler { - fields := graphql.CollectFields(ec.OperationContext, sel, nodeStatsImplementors) +func (ec *executionContext) _NodeStates(ctx context.Context, sel ast.SelectionSet, obj *model.NodeStates) graphql.Marshaler { + fields := graphql.CollectFields(ec.OperationContext, sel, nodeStatesImplementors) out := graphql.NewFieldSet(fields) deferred := make(map[string]*graphql.FieldSet) for i, field := range fields { switch field.Name { case "__typename": - out.Values[i] = graphql.MarshalString("NodeStats") + out.Values[i] = graphql.MarshalString("NodeStates") case "state": - out.Values[i] = ec._NodeStats_state(ctx, field, obj) + out.Values[i] = ec._NodeStates_state(ctx, field, obj) if out.Values[i] == graphql.Null { out.Invalids++ } case "count": - out.Values[i] = ec._NodeStats_count(ctx, field, obj) + out.Values[i] = ec._NodeStates_count(ctx, field, obj) if out.Values[i] == graphql.Null { out.Invalids++ } @@ -20307,7 +20403,7 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr } out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) }) - case "nodeStats": + case "nodeStates": field := field innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { @@ -20316,7 +20412,7 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr ec.Error(ctx, ec.Recover(ctx, r)) } }() - res = ec._Query_nodeStats(ctx, field) + res = ec._Query_nodeStates(ctx, field) if res == graphql.Null { atomic.AddUint32(&fs.Invalids, 1) } @@ -22961,7 +23057,7 @@ func (ec *executionContext) marshalNNodeStateResultList2ᚖgithubᚗcomᚋCluste return ec._NodeStateResultList(ctx, sel, v) } -func (ec *executionContext) marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatsᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.NodeStats) graphql.Marshaler { +func (ec *executionContext) marshalNNodeStates2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.NodeStates) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup isLen1 := len(v) == 1 @@ -22985,7 +23081,7 @@ func (ec *executionContext) marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpi if !isLen1 { defer wg.Done() } - ret[i] = ec.marshalNNodeStats2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStats(ctx, sel, v[i]) + ret[i] = ec.marshalNNodeStates2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStates(ctx, sel, v[i]) } if isLen1 { f(i) @@ -23005,14 +23101,14 @@ func (ec *executionContext) marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpi return ret } -func (ec *executionContext) marshalNNodeStats2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStats(ctx context.Context, sel ast.SelectionSet, v *model.NodeStats) graphql.Marshaler { +func (ec *executionContext) marshalNNodeStates2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStates(ctx context.Context, sel ast.SelectionSet, v *model.NodeStates) graphql.Marshaler { if v == nil { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { ec.Errorf(ctx, "the requested element is null which the schema does not allow") } return graphql.Null } - return ec._NodeStats(ctx, sel, v) + return ec._NodeStates(ctx, sel, v) } func (ec *executionContext) marshalNNodesResultList2githubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodesResultList(ctx context.Context, sel ast.SelectionSet, v model.NodesResultList) graphql.Marshaler { diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index c5cc79b..e6619b7 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -186,7 +186,7 @@ type NodeStateResultList struct { Count *int `json:"count,omitempty"` } -type NodeStats struct { +type NodeStates struct { State string `json:"state"` Count int `json:"count"` } diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 1284c09..e0a7948 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -305,14 +305,20 @@ func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, return nil, nil } -// NodeState is the resolver for the nodeState field. -func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) { - panic(fmt.Errorf("not implemented: NodeState - nodeState")) +// RunningJobs is the resolver for the runningJobs field. +func (r *nodeResolver) RunningJobs(ctx context.Context, obj *schema.Node) (int, error) { + panic(fmt.Errorf("not implemented: RunningJobs - runningJobs")) } -// HealthState is the resolver for the HealthState field. +// NodeState is the resolver for the nodeState field. +func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) { + return string(obj.NodeState), nil +} + +// HealthState is the resolver for the healthState field. func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error) { - panic(fmt.Errorf("not implemented: HealthState - HealthState")) + // FIXME: Why is Output of schema.NodeState Type? + panic(fmt.Errorf("not implemented: HealthState - healthState")) } // MetaData is the resolver for the metaData field. @@ -378,8 +384,8 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o return &model.NodeStateResultList{Items: nodes, Count: &count}, err } -// NodeStats is the resolver for the nodeStats field. -func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error) { +// NodeStates is the resolver for the nodeStates field. +func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) { repo := repository.GetNodeRepository() stateCounts, serr := repo.CountNodeStates(ctx, filter) @@ -394,7 +400,7 @@ func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilte return nil, herr } - allCounts := make([]*model.NodeStats, 0) + allCounts := make([]*model.NodeStates, 0) allCounts = append(stateCounts, healthCounts...) return allCounts, nil diff --git a/internal/repository/migrations/sqlite3/10_node-table.up.sql b/internal/repository/migrations/sqlite3/10_node-table.up.sql index 52e6a05..1211ba9 100644 --- a/internal/repository/migrations/sqlite3/10_node-table.up.sql +++ b/internal/repository/migrations/sqlite3/10_node-table.up.sql @@ -3,6 +3,7 @@ CREATE TABLE "node" ( hostname VARCHAR(255) NOT NULL, cluster VARCHAR(255) NOT NULL, subcluster VARCHAR(255) NOT NULL, + jobs_running INTEGER DEFAULT 0 NOT NULL, cpus_allocated INTEGER DEFAULT 0 NOT NULL, cpus_total INTEGER DEFAULT 0 NOT NULL, memory_allocated INTEGER DEFAULT 0 NOT NULL, diff --git a/internal/repository/node.go b/internal/repository/node.go index b4d0181..277c1c5 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -50,8 +50,9 @@ func GetNodeRepository() *NodeRepository { } var nodeColumns []string = []string{ - "node.id", "node.hostname", "node.cluster", "node.subcluster", - "node.node_state", "node.health_state", "node.meta_data", + // "node.id," + "node.hostname", "node.cluster", "node.subcluster", + "node.node_state", "node.health_state", // "node.meta_data", } func (r *NodeRepository) FetchMetadata(node *schema.Node) (map[string]string, error) { @@ -223,7 +224,7 @@ func (r *NodeRepository) DeleteNode(id int64) error { func (r *NodeRepository) QueryNodes( ctx context.Context, filters []*model.NodeFilter, - order *model.OrderByInput, + order *model.OrderByInput, // Currently unused! ) ([]*schema.Node, error) { query, qerr := AccessCheck(ctx, sq.Select(nodeColumns...).From("node")) if qerr != nil { @@ -296,7 +297,7 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) { return nodeList, nil } -func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStats, error) { +func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) { query, qerr := AccessCheck(ctx, sq.Select("node_state AS state", "count(*) AS count").From("node")) if qerr != nil { return nil, qerr @@ -327,13 +328,13 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N return nil, err } - nodes := make([]*model.NodeStats, 0) + nodes := make([]*model.NodeStates, 0) for rows.Next() { - node := model.NodeStats{} + node := model.NodeStates{} if err := rows.Scan(&node.State, &node.Count); err != nil { rows.Close() - cclog.Warn("Error while scanning rows (NodeStats)") + cclog.Warn("Error while scanning rows (NodeStates)") return nil, err } nodes = append(nodes, &node) @@ -342,7 +343,7 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N return nodes, nil } -func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStats, error) { +func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) { query, qerr := AccessCheck(ctx, sq.Select("health_state AS state", "count(*) AS count").From("node")) if qerr != nil { return nil, qerr @@ -373,13 +374,13 @@ func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model return nil, err } - nodes := make([]*model.NodeStats, 0) + nodes := make([]*model.NodeStates, 0) for rows.Next() { - node := model.NodeStats{} + node := model.NodeStates{} if err := rows.Scan(&node.State, &node.Count); err != nil { rows.Close() - cclog.Warn("Error while scanning rows (NodeStats)") + cclog.Warn("Error while scanning rows (NodeStates)") return nil, err } nodes = append(nodes, &node) diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte index 4415791..bf25347 100644 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -31,8 +31,10 @@ let { roofData = null, jobsData = null, - allowSizeChange = false, + nodesData = null, + cluster = null, subCluster = null, + allowSizeChange = false, width = 600, height = 380, } = $props(); @@ -264,16 +266,43 @@ let filtTop = u.posToVal(-maxSize / 2, scaleY.key); for (let i = 0; i < d[0].length; i++) { - // Color based on Duration, check index for transparency highlighting - u.ctx.strokeStyle = getRGB(u.data[2][i]); - u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill); - + // Jobs: Color based on Duration + if (jobsData) { + u.ctx.strokeStyle = getRGB(u.data[2][i]); + u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill); + // Nodes: Color based on Idle vs. Allocated + } else if (nodesData) { + // console.log('In Plot Handler NodesData', nodesData) + if (nodesData[i]?.nodeState == "idle") { + u.ctx.strokeStyle = "rgb(0, 0, 255)"; + u.ctx.fillStyle = "rgba(0, 0, 255, 0.5)"; + } else if (nodesData[i]?.nodeState == "allocated") { + u.ctx.strokeStyle = "rgb(0, 255, 0)"; + u.ctx.fillStyle = "rgba(0, 255, 0, 0.5)"; + } else if (nodesData[i]?.nodeState == "notindb") { + u.ctx.strokeStyle = "rgb(0, 0, 0)"; + u.ctx.fillStyle = "rgba(0, 0, 0, 0.5)"; + } else { // Fallback: All other DEFINED states + u.ctx.strokeStyle = "rgb(255, 0, 0)"; + u.ctx.fillStyle = "rgba(255, 0, 0, 0.5)"; + } + } + // Get Values let xVal = d[0][i]; let yVal = d[1][i]; - // Calc Size; Alt.: size = sizes[i] * pxRatio - const size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes); // In NodeMode: Scale with Number of Jobs? + // Calc Size; Alt.: size = sizes[i] * pxRatio + let size = 1; + + // Jobs: Size based on Resourcecount + if (jobsData) { + size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes) + // Nodes: Size based on Jobcount + } else if (nodesData) { + size = sizeBase + nodesData[i]?.numJobs + }; + if (xVal >= filtLft && xVal <= filtRgt && yVal >= filtBtm && yVal <= filtTop) { let cx = valToPosX(xVal, scaleX, xDim, xOff); let cy = valToPosY(yVal, scaleY, yDim, yOff); @@ -338,7 +367,7 @@ }; // Tooltip Plugin - function tooltipPlugin({onclick, getJobData, shiftX = 10, shiftY = 10}) { + function tooltipPlugin({onclick, getLegendData, shiftX = 10, shiftY = 10}) { let tooltipLeftOffset = 0; let tooltipTopOffset = 0; @@ -388,11 +417,34 @@ tooltip.style.top = (tooltipTopOffset + top + shiftX) + "px"; tooltip.style.left = (tooltipLeftOffset + lft + shiftY) + "px"; - tooltip.style.borderColor = getRGB(u.data[2][i]); - tooltip.textContent = ( - // Tooltip Content as String - `Job ID: ${getJobData(u, i).jobId}\nNodes: ${getJobData(u, i).numNodes}${getJobData(u, i)?.numAcc?`\nAccelerators: ${getJobData(u, i).numAcc}`:''}` - ); + + // Jobs: Color based on Duration + if (jobsData) { + tooltip.style.borderColor = getRGB(u.data[2][i]); + // Nodes: Color based on Idle vs. Allocated + } else if (nodesData) { + if (nodesData[i]?.nodeState == "idle") { + tooltip.style.borderColor = "rgb(0, 0, 255)"; + } else if (nodesData[i]?.nodeState == "allocated") { + tooltip.style.borderColor = "rgb(0, 255, 0)"; + } else if (nodesData[i]?.nodeState == "notindb") { // Missing from DB table + tooltip.style.borderColor = "rgb(0, 0, 0)"; + } else { // Fallback: All other DEFINED states + tooltip.style.borderColor = "rgb(255, 0, 0)"; + } + } + + if (jobsData) { + tooltip.textContent = ( + // Tooltip Content as String for Job + `Job ID: ${getLegendData(u, i).jobId}\nNodes: ${getLegendData(u, i).numNodes}${getLegendData(u, i)?.numAcc?`\nAccelerators: ${getLegendData(u, i).numAcc}`:''}` + ); + } else if (nodesData) { + tooltip.textContent = ( + // Tooltip Content as String for Node + `Host: ${getLegendData(u, i).nodeName}\nState: ${getLegendData(u, i).nodeState}\nJobs: ${getLegendData(u, i).numJobs}` + ); + } } return { @@ -444,14 +496,18 @@ timeoutId = setTimeout(() => { timeoutId = null; if (uplot) uplot.destroy(); - render(roofData, jobsData); + render(roofData, jobsData, nodesData); }, 200); } - function render(roofdata, jobsData) { - if (roofdata) { + function render(roofData, jobsData, nodesData) { + let plotTitle = "CPU Roofline Diagram"; + if (jobsData) plotTitle = "Job Average Roofline Diagram"; + if (nodesData) plotTitle = "Node Average Roofline Diagram"; + + if (roofData) { const opts = { - title: "Job Average Roofline Diagram", + title: plotTitle, mode: 2, width: width, height: height, @@ -669,35 +725,87 @@ u.ctx.lineWidth = 0.15; } - // The Color Scale For Time Information - const posX = u.valToPos(0.1, "x", true) - const posXLimit = u.valToPos(100, "x", true) - const posY = u.valToPos(14000.0, "y", true) - u.ctx.fillStyle = 'black' - u.ctx.fillText('Short', posX, posY) - const start = posX + 10 - for (let x = start; x < posXLimit; x += 10) { - let c = (x - start) / (posXLimit - start) - u.ctx.fillStyle = getRGB(c) - u.ctx.beginPath() - u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() + // Jobs: The Color Scale For Time Information + if (jobsData) { + const posX = u.valToPos(0.1, "x", true) + const posXLimit = u.valToPos(100, "x", true) + const posY = u.valToPos(14000.0, "y", true) + u.ctx.fillStyle = 'black' + u.ctx.fillText('Short', posX, posY) + const start = posX + 10 + for (let x = start; x < posXLimit; x += 10) { + let c = (x - start) / (posXLimit - start) + u.ctx.fillStyle = getRGB(c) + u.ctx.beginPath() + u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + } + u.ctx.fillStyle = 'black' + u.ctx.fillText('Long', posXLimit + 23, posY) + } + + // Nodes: The Colors Of NodeStates (Just 3) + if (nodesData) { + const posY = u.valToPos(14000.0, "y", true) + + const posAllocDot = u.valToPos(0.1, "x", true) + const posAllocText = posAllocDot + 60 + u.ctx.fillStyle = "rgb(0, 255, 0)" + u.ctx.beginPath() + u.ctx.arc(posAllocDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Allocated', posAllocText, posY) + + const posIdleDot = posAllocDot + 150 + const posIdleText = posAllocText + 120 + u.ctx.fillStyle = "rgb(0, 0, 255)" + u.ctx.beginPath() + u.ctx.arc(posIdleDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Idle', posIdleText, posY) + + const posOtherDot = posIdleDot + 150 + const posOtherText = posIdleText + 160 + u.ctx.fillStyle = "rgb(255, 0, 0)" + u.ctx.beginPath() + u.ctx.arc(posOtherDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Other', posOtherText, posY) + + const posMissingDot = posOtherDot + 150 + const posMissingText = posOtherText + 190 + u.ctx.fillStyle = 'black' + u.ctx.beginPath() + u.ctx.arc(posMissingDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillText('Missing in DB', posMissingText, posY) } - u.ctx.fillStyle = 'black' - u.ctx.fillText('Long', posXLimit + 23, posY) }, ], }, plugins: [ tooltipPlugin({ onclick(u, dataIdx) { - window.open(`/monitoring/job/${jobsData[dataIdx].id}`); + if (jobsData) { + window.open(`/monitoring/job/${jobsData[dataIdx].id}`) + } else if (nodesData) { + window.open(`/monitoring/node/${cluster}/${nodesData[dataIdx].nodeName}`) + } }, - getJobData: (u, dataIdx) => { return jobsData[dataIdx] } + getLegendData: (u, dataIdx) => { + if (jobsData) { + return jobsData[dataIdx] + } else if (nodesData) { + return nodesData[dataIdx] + } + } }), ], }; - uplot = new uPlot(opts, roofdata, plotWrapper); + uplot = new uPlot(opts, roofData, plotWrapper); } else { // console.log("No data for roofline!"); } @@ -705,7 +813,7 @@ /* On Mount */ onMount(() => { - render(roofData, jobsData); + render(roofData, jobsData, nodesData); }); /* On Destroy */ diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index ea00803..5ac1326 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -31,9 +31,11 @@ const client = getContextClient(); /* State Init */ - // let from = $state(new Date(Date.now() - 5 * 60 * 1000)); - // let to = $state(new Date(Date.now())); + let from = $state(new Date(Date.now() - 5 * 60 * 1000)); + let to = $state(new Date(Date.now())); let plotWidths = $state([]); + let nodesCounts = $state({}); + let jobsJounts = $state({}); /* Derived */ // Note: nodeMetrics are requested on configured $timestep resolution @@ -64,6 +66,123 @@ }, })); + // Optimal new query, does not exist + // const nodeRoofQuery = $derived(queryStore({ + // client: client, + // query: gql` + // query ($filter: [JobFilter!]!, $metrics: [String!]!) { + // nodeRoofline(filter: $filter, metrics: $metrics) { + // nodeName + // nodeState + // numJobs + // stats { + // name + // data { + // avg + // } + // } + // } + // } + // `, + // variables: { + // filter: [{ state: ["running"] }, { cluster: { eq: cluster } }], + // metrics: ["flops_any", "mem_bw"], // Fixed names for job roofline + // }, + // })); + + // Load Required Roofline Data Averages for all nodes of cluster: use for node avg data and name, use secondary (new?) querie(s) for slurmstate and numjobs + const nodesData = $derived(queryStore({ + client: client, + query: gql` + query ($cluster: String!, $metrics: [String!], $from: Time!, $to: Time!) { + nodeMetrics( + cluster: $cluster + metrics: $metrics + from: $from + to: $to + ) { + host + subCluster + metrics { + name + metric { + series { + statistics { + avg + } + } + } + } + } + } + `, + variables: { + cluster: cluster, + metrics: ["flops_any", "mem_bw"], + from: from, + to: to, + }, + })); + + // Load for jobcount per node only -- might me required for total running jobs anyways in parent component! + // Also, think about extra query with only TotalJobCount and Items [Resources, ...some meta infos], not including metric data + const paging = { itemsPerPage: 1500, page: 1 }; + const sorting = { field: "startTime", type: "col", order: "DESC" }; + const filter = [ + { cluster: { eq: cluster } }, + { state: ["running"] }, + ]; + const nodeJobsQuery = gql` + query ( + $filter: [JobFilter!]! + $sorting: OrderByInput! + $paging: PageRequest! + ) { + jobs(filter: $filter, order: $sorting, page: $paging) { + items { + jobId + resources { + hostname + } + } + count + } + } + `; + + const nodesJobs = $derived(queryStore({ + client: client, + query: nodeJobsQuery, + variables: { paging, sorting, filter }, + }) + ); + + // Last required query: Node State + const nodesState = $derived(queryStore({ + client: client, + query: gql` + query ( + $filter: [NodeFilter!] + $sorting: OrderByInput + ) { + nodes(filter: $filter, order: $sorting) { + count + items { + hostname + cluster + subCluster + nodeState + } + } + } + `, + variables: { + filter: { cluster: { eq: cluster }}, + sorting: sorting // Unused in Backend: Use Placeholder + // Subcluster filter? + }, + })); + /* Function */ function transformJobsStatsToData(subclusterData) { /* c will contain values from 0 to 1 representing the duration */ @@ -90,7 +209,7 @@ else c.push(d) } } else { - console.warn("transformData: metrics for 'mem_bw' and/or 'flops_any' missing!") + console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!") } if (x.length > 0 && y.length > 0 && c.length > 0) { @@ -99,15 +218,69 @@ return data } + function transformNodesStatsToData(subclusterData) { + let data = null + const x = [], y = [] + + if (subclusterData) { + for (let i = 0; i < subclusterData.length; i++) { + const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any") + const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw") + + const f = flopsData.metric.series[0].statistics.avg + const m = memBwData.metric.series[0].statistics.avg + + let intensity = f / m + if (Number.isNaN(intensity) || !Number.isFinite(intensity)) { + // continue // Old: Introduces mismatch between Data and Info Arrays + intensity = 0.0 // New: Set to Float Zero: Will not show in Log-Plot (Always below render limit) + } + + x.push(intensity) + y.push(f) + } + } else { + // console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!") + } + + if (x.length > 0 && y.length > 0) { + data = [null, [x, y]] // for dataformat see roofline.svelte + } + return data + } + function transformJobsStatsToInfo(subclusterData) { if (subclusterData) { return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0} }) } else { - console.warn("transformData: jobInfo missing!") + console.warn("transformJobsStatsToInfo: jobInfo missing!") return [] } } + function transformNodesStatsToInfo(subClusterData) { + let result = []; + if (subClusterData && $nodesState?.data) { + // Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API! + for (let j = 0; j < subClusterData.length; j++) { + // nodesCounts[subClusterData[i].subCluster] = $nodesState.data.nodes.count; // Probably better as own derived! + + const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown" + const nodeMatch = $nodesState.data.nodes.items.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster); + const nodeState = nodeMatch?.nodeState ? nodeMatch.nodeState : "notindb" + let numJobs = 0 + + if ($nodesJobs?.data) { + const nodeJobs = $nodesJobs.data.jobs.items.filter((job) => job.resources.find((res) => res.hostname == nodeName)) + numJobs = nodeJobs?.length ? nodeJobs.length : 0 + } + + result.push({nodeName: nodeName, nodeState: nodeState, numJobs: numJobs}) + }; + }; + return result + } + @@ -115,19 +288,23 @@ {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} - Classic + Bubble Node
- {#key $jobRoofQuery.data.jobsMetricStats} + {#key $nodesData?.data?.nodeMetrics || $nodesJobs?.data?.jobs} {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( (data) => data.subCluster == subCluster.name, ).length} Jobs - data.subCluster == subCluster.name, + ) + )} + nodesData={transformNodesStatsToInfo($nodesData?.data?.nodeMetrics.filter( (data) => data.subCluster == subCluster.name, ) )} @@ -136,7 +313,7 @@
- Bubble + Bubble Jobs
{#key $jobRoofQuery.data.jobsMetricStats} {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( From 35c0b0be58599abc88903adafe87fe1fcf77bb1d Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 21 Jul 2025 16:03:07 +0200 Subject: [PATCH 09/40] add scheduler and health status pie charts --- internal/repository/node.go | 6 ++ web/frontend/src/status/DevelDash.svelte | 127 ++++++++++++++++++++++- 2 files changed, 130 insertions(+), 3 deletions(-) diff --git a/internal/repository/node.go b/internal/repository/node.go index 277c1c5..d7db2f4 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -321,6 +321,9 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N } } + // Add Group and Order + query = query.GroupBy("state").OrderBy("count DESC") + rows, err := query.RunWith(r.stmtCache).Query() if err != nil { queryString, queryVars, _ := query.ToSql() @@ -367,6 +370,9 @@ func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model } } + // Add Group and Order + query = query.GroupBy("state").OrderBy("count DESC") + rows, err := query.RunWith(r.stmtCache).Query() if err != nil { queryString, queryVars, _ := query.ToSql() diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index 5ac1326..f37976b 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -9,6 +9,8 @@ import { Row, Col, + Table, + Icon } from "@sveltestrap/sveltestrap"; import { queryStore, @@ -18,8 +20,9 @@ import { init, } from "../generic/utils.js"; - import Roofline from "../generic/plots/Roofline.svelte"; + //import Roofline from "../generic/plots/Roofline.svelte"; import NewBubbleRoofline from "../generic/plots/NewBubbleRoofline.svelte"; + import Pie, { colors } from "../generic/plots/Pie.svelte"; /* Svelte 5 Props */ let { @@ -34,8 +37,10 @@ let from = $state(new Date(Date.now() - 5 * 60 * 1000)); let to = $state(new Date(Date.now())); let plotWidths = $state([]); - let nodesCounts = $state({}); - let jobsJounts = $state({}); + let statesWidth = $state(0); + let healthWidth = $state(0); + // let nodesCounts = $state({}); + // let jobsJounts = $state({}); /* Derived */ // Note: nodeMetrics are requested on configured $timestep resolution @@ -183,6 +188,33 @@ }, })); + // Accumulated NodeStates for Piecharts + const nodesStateCounts = $derived(queryStore({ + client: client, + query: gql` + query ($filter: [NodeFilter!]) { + nodeStates(filter: $filter) { + state + count + } + } + `, + variables: { + filter: { cluster: { eq: cluster }} + }, + })); + + $inspect($nodesStateCounts?.data?.nodeStates) + + const refinedStateData = $derived.by(() => { + return $nodesStateCounts?.data?.nodeStates.filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)) + }); + + const refinedHealthData = $derived.by(() => { + return $nodesStateCounts?.data?.nodeStates.filter((e) => ['full', 'partial', 'failed'].includes(e.state)) + }); + + /* Function */ function transformJobsStatsToData(subclusterData) { /* c will contain values from 0 to 1 representing the duration */ @@ -339,3 +371,92 @@ {/each} {/if} + +
+
+ +{#if $initq.data && $nodesStateCounts.data} + + + Node State +
+ {#key refinedStateData} + Total: {refinedStateData.reduce((sum, item) => { + return sum + item.count; + }, 0)} Nodes + + sd.count, + )} + entities={refinedStateData.map( + (sd) => sd.state, + )} + /> + {/key} +
+ + + {#key refinedStateData} + + + + + + + {#each refinedStateData as sd, i} + + + + + + {/each} +
LegendCurrent State#Nodes
{sd.state}{sd.count}
+ {/key} + + + + Node Health +
+ {#key refinedHealthData} + Total: {refinedStateData.reduce((sum, item) => { + return sum + item.count; + }, 0)} Nodes + + sd.count, + )} + entities={refinedHealthData.map( + (sd) => sd.state, + )} + /> + {/key} +
+ + + {#key refinedHealthData} + + + + + + + {#each refinedHealthData as hd, i} + + + + + + {/each} +
LegendCurrent Health#Nodes
{hd.state}{hd.count}
+ {/key} + +
+{/if} From 4d2c64b012c7ecf3c08df86258012e981c0e5be1 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 23 Jul 2025 15:00:10 +0200 Subject: [PATCH 10/40] remove logging --- web/frontend/src/status/DevelDash.svelte | 2 -- 1 file changed, 2 deletions(-) diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index f37976b..8cd4627 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -204,8 +204,6 @@ }, })); - $inspect($nodesStateCounts?.data?.nodeStates) - const refinedStateData = $derived.by(() => { return $nodesStateCounts?.data?.nodeStates.filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)) }); From 764b65d094f22ee0aa332228c2338bdb36b2aba6 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Thu, 31 Jul 2025 12:10:01 +0200 Subject: [PATCH 11/40] Add timestamp column to node table --- internal/repository/migrations/sqlite3/10_node-table.up.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/repository/migrations/sqlite3/10_node-table.up.sql b/internal/repository/migrations/sqlite3/10_node-table.up.sql index c208b32..6b9afbe 100644 --- a/internal/repository/migrations/sqlite3/10_node-table.up.sql +++ b/internal/repository/migrations/sqlite3/10_node-table.up.sql @@ -1,5 +1,6 @@ CREATE TABLE "node" ( id INTEGER PRIMARY KEY, + time_stamp INTEGER NOT NULL, hostname VARCHAR(255) NOT NULL, cluster VARCHAR(255) NOT NULL, subcluster VARCHAR(255) NOT NULL, @@ -32,4 +33,4 @@ CREATE INDEX IF NOT EXISTS nodes_cluster_health ON node (cluster, health_state); -- Add Indices For Increased Amounts of Tags CREATE INDEX IF NOT EXISTS tags_jobid ON jobtag (job_id); -CREATE INDEX IF NOT EXISTS tags_tagid ON jobtag (tag_id); \ No newline at end of file +CREATE INDEX IF NOT EXISTS tags_tagid ON jobtag (tag_id); From 44cd8d258de097e45454cebd68215f366a8f4169 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Thu, 31 Jul 2025 12:10:46 +0200 Subject: [PATCH 12/40] Fix and regenerate Swagger and GraphQL --- Makefile | 2 +- api/swagger.json | 63 +++- api/swagger.yaml | 50 +++- go.mod | 37 +-- go.sum | 85 +++--- gqlgen.yml | 3 +- internal/api/docs.go | 63 +++- internal/api/job.go | 3 +- internal/graph/generated/generated.go | 408 +++++++++++++------------- internal/graph/model/models_gen.go | 54 +++- internal/graph/schema.resolvers.go | 2 +- internal/repository/jobQuery.go | 5 +- internal/taskManager/taskManager.go | 5 +- 13 files changed, 459 insertions(+), 321 deletions(-) diff --git a/Makefile b/Makefile index 5702ba1..d7abb18 100644 --- a/Makefile +++ b/Makefile @@ -52,7 +52,7 @@ frontend: swagger: $(info ===> GENERATE swagger) - @go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api + @go run github.com/swaggo/swag/cmd/swag init --parseDependency -d ./internal/api -g rest.go -o ./api @mv ./api/docs.go ./internal/api/docs.go graphql: diff --git a/api/swagger.json b/api/swagger.json index 310297f..87bf3ed 100644 --- a/api/swagger.json +++ b/api/swagger.json @@ -1254,9 +1254,27 @@ "api.Node": { "type": "object", "properties": { + "cpusAllocated": { + "type": "integer" + }, + "cpusTotal": { + "type": "integer" + }, + "gpusAllocated": { + "type": "integer" + }, + "gpusTotal": { + "type": "integer" + }, "hostname": { "type": "string" }, + "memoryAllocated": { + "type": "integer" + }, + "memoryTotal": { + "type": "integer" + }, "states": { "type": "array", "items": { @@ -1372,7 +1390,8 @@ "energyFootprint": { "type": "object", "additionalProperties": { - "type": "number" + "type": "number", + "format": "float64" } }, "exclusive": { @@ -1384,7 +1403,8 @@ "footprint": { "type": "object", "additionalProperties": { - "type": "number" + "type": "number", + "format": "float64" } }, "id": { @@ -1475,6 +1495,10 @@ "type": "string", "example": "main" }, + "submitTime": { + "type": "integer", + "example": 1649723812 + }, "tags": { "type": "array", "items": { @@ -1540,24 +1564,32 @@ "schema.JobState": { "type": "string", "enum": [ - "running", - "completed", - "failed", + "boot_fail", "cancelled", - "stopped", - "timeout", + "completed", + "deadline", + "failed", + "node_fail", + "out_of_memory", + "pending", "preempted", - "out_of_memory" + "running", + "suspended", + "timeout" ], "x-enum-varnames": [ - "JobStateRunning", - "JobStateCompleted", - "JobStateFailed", + "JobStateBootFail", "JobStateCancelled", - "JobStateStopped", - "JobStateTimeout", + "JobStateCompleted", + "JobStateDeadline", + "JobStateFailed", + "JobStateNodeFail", + "JobStateOutOfMemory", + "JobStatePending", "JobStatePreempted", - "JobStateOutOfMemory" + "JobStateRunning", + "JobStateSuspended", + "JobStateTimeout" ] }, "schema.JobStatistics": { @@ -1756,7 +1788,8 @@ "additionalProperties": { "type": "array", "items": { - "type": "number" + "type": "number", + "format": "float64" } } } diff --git a/api/swagger.yaml b/api/swagger.yaml index 1c57f2d..06caa56 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -113,8 +113,20 @@ definitions: type: object api.Node: properties: + cpusAllocated: + type: integer + cpusTotal: + type: integer + gpusAllocated: + type: integer + gpusTotal: + type: integer hostname: type: string + memoryAllocated: + type: integer + memoryTotal: + type: integer states: items: type: string @@ -192,6 +204,7 @@ definitions: type: number energyFootprint: additionalProperties: + format: float64 type: number type: object exclusive: @@ -201,6 +214,7 @@ definitions: type: integer footprint: additionalProperties: + format: float64 type: number type: object id: @@ -268,6 +282,9 @@ definitions: subCluster: example: main type: string + submitTime: + example: 1649723812 + type: integer tags: items: $ref: '#/definitions/schema.Tag' @@ -311,24 +328,32 @@ definitions: type: object schema.JobState: enum: - - running - - completed - - failed + - boot_fail - cancelled - - stopped - - timeout - - preempted + - completed + - deadline + - failed + - node_fail - out_of_memory + - pending + - preempted + - running + - suspended + - timeout type: string x-enum-varnames: - - JobStateRunning - - JobStateCompleted - - JobStateFailed + - JobStateBootFail - JobStateCancelled - - JobStateStopped - - JobStateTimeout - - JobStatePreempted + - JobStateCompleted + - JobStateDeadline + - JobStateFailed + - JobStateNodeFail - JobStateOutOfMemory + - JobStatePending + - JobStatePreempted + - JobStateRunning + - JobStateSuspended + - JobStateTimeout schema.JobStatistics: description: Specification for job metric statistics. properties: @@ -465,6 +490,7 @@ definitions: percentiles: additionalProperties: items: + format: float64 type: number type: array type: object diff --git a/go.mod b/go.mod index 1714807..df742d7 100644 --- a/go.mod +++ b/go.mod @@ -5,8 +5,8 @@ go 1.23.5 toolchain go1.24.1 require ( - github.com/99designs/gqlgen v0.17.66 - github.com/ClusterCockpit/cc-lib v0.5.0 + github.com/99designs/gqlgen v0.17.76 + github.com/ClusterCockpit/cc-lib v0.6.0 github.com/Masterminds/squirrel v1.5.4 github.com/coreos/go-oidc/v3 v3.12.0 github.com/expr-lang/expr v1.17.5 @@ -27,9 +27,9 @@ require ( github.com/qustavo/sqlhooks/v2 v2.1.0 github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 github.com/swaggo/http-swagger v1.3.4 - github.com/swaggo/swag v1.16.4 - github.com/vektah/gqlparser/v2 v2.5.22 - golang.org/x/crypto v0.39.0 + github.com/swaggo/swag v1.16.5 + github.com/vektah/gqlparser/v2 v2.5.30 + golang.org/x/crypto v0.40.0 golang.org/x/oauth2 v0.30.0 golang.org/x/time v0.5.0 ) @@ -41,16 +41,16 @@ require ( github.com/agnivade/levenshtein v1.2.1 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect github.com/go-jose/go-jose/v4 v4.0.5 // indirect - github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonpointer v0.21.1 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/spec v0.21.0 // indirect - github.com/go-openapi/swag v0.23.0 // indirect - github.com/go-viper/mapstructure/v2 v2.2.1 // indirect + github.com/go-openapi/swag v0.23.1 // indirect + github.com/go-viper/mapstructure/v2 v2.3.0 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/securecookie v1.1.2 // indirect github.com/gorilla/websocket v1.5.3 // indirect @@ -74,18 +74,19 @@ require ( github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sosodev/duration v1.3.1 // indirect github.com/swaggo/files v1.0.1 // indirect - github.com/urfave/cli/v2 v2.27.5 // indirect - github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect + github.com/urfave/cli/v2 v2.27.7 // indirect + github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect go.uber.org/atomic v1.11.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect - golang.org/x/mod v0.25.0 // indirect - golang.org/x/net v0.41.0 // indirect - golang.org/x/sync v0.15.0 // indirect - golang.org/x/sys v0.33.0 // indirect - golang.org/x/text v0.26.0 // indirect - golang.org/x/tools v0.34.0 // indirect + golang.org/x/mod v0.26.0 // indirect + golang.org/x/net v0.42.0 // indirect + golang.org/x/sync v0.16.0 // indirect + golang.org/x/sys v0.34.0 // indirect + golang.org/x/text v0.27.0 // indirect + golang.org/x/tools v0.35.0 // indirect google.golang.org/protobuf v1.36.6 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect ) diff --git a/go.sum b/go.sum index 0cb3dd9..1524b5a 100644 --- a/go.sum +++ b/go.sum @@ -1,13 +1,13 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= -github.com/99designs/gqlgen v0.17.66 h1:2/SRc+h3115fCOZeTtsqrB5R5gTGm+8qCAwcrZa+CXA= -github.com/99designs/gqlgen v0.17.66/go.mod h1:gucrb5jK5pgCKzAGuOMMVU9C8PnReecHEHd2UxLQwCg= +github.com/99designs/gqlgen v0.17.76 h1:YsJBcfACWmXWU2t1yCjoGdOmqcTfOFpjbLAE443fmYI= +github.com/99designs/gqlgen v0.17.76/go.mod h1:miiU+PkAnTIDKMQ1BseUOIVeQHoiwYDZGCswoxl7xec= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= -github.com/ClusterCockpit/cc-lib v0.5.0 h1:DSKAD1TxjVWyd1x3GWvxFeEkANF9o13T97nirj3CbRU= -github.com/ClusterCockpit/cc-lib v0.5.0/go.mod h1:0zLbJprwOWLA+OSNQ+OlUKLscZszwf9J2j8Ly5ztplk= +github.com/ClusterCockpit/cc-lib v0.6.0 h1:uK/6DLBkkLznSWD28wmGGumMAa5ovD9rPaGS4Miw9W8= +github.com/ClusterCockpit/cc-lib v0.6.0/go.mod h1:0zLbJprwOWLA+OSNQ+OlUKLscZszwf9J2j8Ly5ztplk= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= @@ -16,16 +16,16 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/NVIDIA/go-nvml v0.12.9-0 h1:e344UK8ZkeMeeLkdQtRhmXRxNf+u532LDZPGMtkdus0= github.com/NVIDIA/go-nvml v0.12.9-0/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= -github.com/PuerkitoBio/goquery v1.9.3 h1:mpJr/ikUA9/GNJB/DBZcGeFDXUtosHRyRrwh7KGdTG0= -github.com/PuerkitoBio/goquery v1.9.3/go.mod h1:1ndLHPdTz+DyQPICCWYlYQMPl0oXZj0G6D4LCYA6u4U= +github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo= +github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y= github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM= github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU= github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI= github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= -github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= -github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= +github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= +github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= @@ -36,8 +36,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc/Jo= github.com/coreos/go-oidc/v3 v3.12.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0= -github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0= -github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= +github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -71,27 +71,26 @@ github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= -github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonpointer v0.21.1 h1:whnzv/pNXtK2FbX/W9yJfRmE2gsmkfahjMKB0fZvcic= +github.com/go-openapi/jsonpointer v0.21.1/go.mod h1:50I1STOfbY1ycR8jGz8DaMeLCdXiI6aDteEdRNNzpdk= github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9ZY= github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk= -github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= -github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU= +github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0= github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= github.com/go-sql-driver/mysql v1.9.0 h1:Y0zIbQXhQKmQgTp44Y1dp3wTXcn804QoTptLZT1vtvo= github.com/go-sql-driver/mysql v1.9.0/go.mod h1:pDetrLJeA3oMujJuvXc8RJoasr589B6A9fwzD3QMrqw= -github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss= -github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/go-viper/mapstructure/v2 v2.3.0 h1:27XbWsHIqhbdR5TIC911OfYvgSaW93HM+dX7970Q7jk= +github.com/go-viper/mapstructure/v2 v2.3.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8= github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang-migrate/migrate/v4 v4.18.2 h1:2VSCMz7x7mjyTXx3m2zPokOY82LTRgxK1yQYKo6wWQ8= github.com/golang-migrate/migrate/v4 v4.18.2/go.mod h1:2CM6tJvn2kqPXwnXO/d3rAQYiyoIm180VsO8PRX6Rpk= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -242,14 +241,14 @@ github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE= github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg= github.com/swaggo/http-swagger v1.3.4 h1:q7t/XLx0n15H1Q9/tk3Y9L4n210XzJF5WtnDX64a5ww= github.com/swaggo/http-swagger v1.3.4/go.mod h1:9dAh0unqMBAlbp1uE2Uc2mQTxNMU/ha4UbucIg1MFkQ= -github.com/swaggo/swag v1.16.4 h1:clWJtd9LStiG3VeijiCfOVODP6VpHtKdQy9ELFG3s1A= -github.com/swaggo/swag v1.16.4/go.mod h1:VBsHJRsDvfYvqoiMKnsdwhNV9LEMHgEDZcyVYX0sxPg= -github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= -github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= -github.com/vektah/gqlparser/v2 v2.5.22 h1:yaaeJ0fu+nv1vUMW0Hl+aS1eiv1vMfapBNjpffAda1I= -github.com/vektah/gqlparser/v2 v2.5.22/go.mod h1:xMl+ta8a5M1Yo1A1Iwt/k7gSpscwSnHZdw7tfhEGfTM= -github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= -github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= +github.com/swaggo/swag v1.16.5 h1:nMf2fEV1TetMTJb4XzD0Lz7jFfKJmJKGTygEey8NSxM= +github.com/swaggo/swag v1.16.5/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg= +github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= +github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= +github.com/vektah/gqlparser/v2 v2.5.30 h1:EqLwGAFLIzt1wpx1IPpY67DwUujF1OfzgEyDsLrN6kE= +github.com/vektah/gqlparser/v2 v2.5.30/go.mod h1:D1/VCZtV3LPnQrcPBeR/q5jkSQIPti0uYCP/RI0gIeo= +github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg= +github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= @@ -263,6 +262,10 @@ go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE= +go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= @@ -270,8 +273,8 @@ golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliY golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= -golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= -golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= +golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM= +golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY= golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o= golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= @@ -279,8 +282,8 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w= -golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= +golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg= +golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= @@ -292,8 +295,8 @@ golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= -golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= -golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= +golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= +golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -303,8 +306,8 @@ golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= -golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= +golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -316,8 +319,8 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= -golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -336,8 +339,8 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= -golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= -golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= +golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= +golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -346,8 +349,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= -golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo= -golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg= +golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= +golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= @@ -360,5 +363,5 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/gqlgen.yml b/gqlgen.yml index 3118ec9..c4b3faf 100644 --- a/gqlgen.yml +++ b/gqlgen.yml @@ -32,6 +32,7 @@ resolver: autobind: - "github.com/99designs/gqlgen/graphql/introspection" - "github.com/ClusterCockpit/cc-backend/internal/graph/model" + - "github.com/ClusterCockpit/cc-backend/internal/config" # This section declares type mapping between the GraphQL and go type systems # @@ -83,8 +84,6 @@ models: { model: "github.com/ClusterCockpit/cc-lib/schema.NodeState" } HealthState: { model: "github.com/ClusterCockpit/cc-lib/schema.MonitoringState" } - TimeRange: { model: "github.com/ClusterCockpit/cc-lib/schema.TimeRange" } - IntRange: { model: "github.com/ClusterCockpit/cc-lib/schema.IntRange" } JobMetric: { model: "github.com/ClusterCockpit/cc-lib/schema.JobMetric" } Series: { model: "github.com/ClusterCockpit/cc-lib/schema.Series" } MetricStatistics: diff --git a/internal/api/docs.go b/internal/api/docs.go index d7b8464..50cab92 100644 --- a/internal/api/docs.go +++ b/internal/api/docs.go @@ -1261,9 +1261,27 @@ const docTemplate = `{ "api.Node": { "type": "object", "properties": { + "cpusAllocated": { + "type": "integer" + }, + "cpusTotal": { + "type": "integer" + }, + "gpusAllocated": { + "type": "integer" + }, + "gpusTotal": { + "type": "integer" + }, "hostname": { "type": "string" }, + "memoryAllocated": { + "type": "integer" + }, + "memoryTotal": { + "type": "integer" + }, "states": { "type": "array", "items": { @@ -1379,7 +1397,8 @@ const docTemplate = `{ "energyFootprint": { "type": "object", "additionalProperties": { - "type": "number" + "type": "number", + "format": "float64" } }, "exclusive": { @@ -1391,7 +1410,8 @@ const docTemplate = `{ "footprint": { "type": "object", "additionalProperties": { - "type": "number" + "type": "number", + "format": "float64" } }, "id": { @@ -1482,6 +1502,10 @@ const docTemplate = `{ "type": "string", "example": "main" }, + "submitTime": { + "type": "integer", + "example": 1649723812 + }, "tags": { "type": "array", "items": { @@ -1547,24 +1571,32 @@ const docTemplate = `{ "schema.JobState": { "type": "string", "enum": [ - "running", - "completed", - "failed", + "boot_fail", "cancelled", - "stopped", - "timeout", + "completed", + "deadline", + "failed", + "node_fail", + "out_of_memory", + "pending", "preempted", - "out_of_memory" + "running", + "suspended", + "timeout" ], "x-enum-varnames": [ - "JobStateRunning", - "JobStateCompleted", - "JobStateFailed", + "JobStateBootFail", "JobStateCancelled", - "JobStateStopped", - "JobStateTimeout", + "JobStateCompleted", + "JobStateDeadline", + "JobStateFailed", + "JobStateNodeFail", + "JobStateOutOfMemory", + "JobStatePending", "JobStatePreempted", - "JobStateOutOfMemory" + "JobStateRunning", + "JobStateSuspended", + "JobStateTimeout" ] }, "schema.JobStatistics": { @@ -1763,7 +1795,8 @@ const docTemplate = `{ "additionalProperties": { "type": "array", "items": { - "type": "number" + "type": "number", + "format": "float64" } } } diff --git a/internal/api/job.go b/internal/api/job.go index 4c8ca76..21e42f8 100644 --- a/internal/api/job.go +++ b/internal/api/job.go @@ -17,6 +17,7 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/archiver" + "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph" "github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/importer" @@ -142,7 +143,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) { return } ufrom, uto := time.Unix(from, 0), time.Unix(to, 0) - filter.StartTime = &schema.TimeRange{From: &ufrom, To: &uto} + filter.StartTime = &config.TimeRange{From: &ufrom, To: &uto} case "page": x, err := strconv.Atoi(vals[0]) if err != nil { diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index 238270f..a6899a1 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -14,6 +14,7 @@ import ( "github.com/99designs/gqlgen/graphql" "github.com/99designs/gqlgen/graphql/introspection" + "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-lib/schema" gqlparser "github.com/vektah/gqlparser/v2" @@ -487,7 +488,7 @@ func (e *executableSchema) Schema() *ast.Schema { return parsedSchema } -func (e *executableSchema) Complexity(typeName, field string, childComplexity int, rawArgs map[string]any) (int, bool) { +func (e *executableSchema) Complexity(ctx context.Context, typeName, field string, childComplexity int, rawArgs map[string]any) (int, bool) { ec := executionContext{nil, e, 0, 0, nil} _ = ec switch typeName + "." + field { @@ -1365,7 +1366,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Mutation_addTagsToJob_args(context.TODO(), rawArgs) + args, err := ec.field_Mutation_addTagsToJob_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1377,7 +1378,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Mutation_createTag_args(context.TODO(), rawArgs) + args, err := ec.field_Mutation_createTag_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1389,7 +1390,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Mutation_deleteTag_args(context.TODO(), rawArgs) + args, err := ec.field_Mutation_deleteTag_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1401,7 +1402,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Mutation_removeTagFromList_args(context.TODO(), rawArgs) + args, err := ec.field_Mutation_removeTagFromList_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1413,7 +1414,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Mutation_removeTagsFromJob_args(context.TODO(), rawArgs) + args, err := ec.field_Mutation_removeTagsFromJob_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1425,7 +1426,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Mutation_updateConfiguration_args(context.TODO(), rawArgs) + args, err := ec.field_Mutation_updateConfiguration_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1612,7 +1613,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_allocatedNodes_args(context.TODO(), rawArgs) + args, err := ec.field_Query_allocatedNodes_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1638,7 +1639,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_job_args(context.TODO(), rawArgs) + args, err := ec.field_Query_job_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1650,7 +1651,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_jobMetrics_args(context.TODO(), rawArgs) + args, err := ec.field_Query_jobMetrics_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1662,7 +1663,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_jobStats_args(context.TODO(), rawArgs) + args, err := ec.field_Query_jobStats_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1674,7 +1675,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_jobs_args(context.TODO(), rawArgs) + args, err := ec.field_Query_jobs_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1686,7 +1687,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_jobsFootprints_args(context.TODO(), rawArgs) + args, err := ec.field_Query_jobsFootprints_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1698,7 +1699,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_jobsMetricStats_args(context.TODO(), rawArgs) + args, err := ec.field_Query_jobsMetricStats_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1710,7 +1711,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_jobsStatistics_args(context.TODO(), rawArgs) + args, err := ec.field_Query_jobsStatistics_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1722,7 +1723,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_node_args(context.TODO(), rawArgs) + args, err := ec.field_Query_node_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1734,7 +1735,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_nodeMetrics_args(context.TODO(), rawArgs) + args, err := ec.field_Query_nodeMetrics_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1746,7 +1747,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_nodeMetricsList_args(context.TODO(), rawArgs) + args, err := ec.field_Query_nodeMetricsList_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1758,7 +1759,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_nodeStats_args(context.TODO(), rawArgs) + args, err := ec.field_Query_nodeStats_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1770,7 +1771,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_nodes_args(context.TODO(), rawArgs) + args, err := ec.field_Query_nodes_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1782,7 +1783,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_rooflineHeatmap_args(context.TODO(), rawArgs) + args, err := ec.field_Query_rooflineHeatmap_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1794,7 +1795,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_scopedJobStats_args(context.TODO(), rawArgs) + args, err := ec.field_Query_scopedJobStats_args(ctx, rawArgs) if err != nil { return 0, false } @@ -1813,7 +1814,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in break } - args, err := ec.field_Query_user_args(context.TODO(), rawArgs) + args, err := ec.field_Query_user_args(ctx, rawArgs) if err != nil { return 0, false } @@ -12987,6 +12988,8 @@ func (ec *executionContext) fieldContext_Query___type(ctx context.Context, field return ec.fieldContext___Type_name(ctx, field) case "description": return ec.fieldContext___Type_description(ctx, field) + case "specifiedByURL": + return ec.fieldContext___Type_specifiedByURL(ctx, field) case "fields": return ec.fieldContext___Type_fields(ctx, field) case "interfaces": @@ -12999,8 +13002,6 @@ func (ec *executionContext) fieldContext_Query___type(ctx context.Context, field return ec.fieldContext___Type_inputFields(ctx, field) case "ofType": return ec.fieldContext___Type_ofType(ctx, field) - case "specifiedByURL": - return ec.fieldContext___Type_specifiedByURL(ctx, field) case "isOneOf": return ec.fieldContext___Type_isOneOf(ctx, field) } @@ -15610,6 +15611,50 @@ func (ec *executionContext) fieldContext___Directive_description(_ context.Conte return fc, nil } +func (ec *executionContext) ___Directive_isRepeatable(ctx context.Context, field graphql.CollectedField, obj *introspection.Directive) (ret graphql.Marshaler) { + fc, err := ec.fieldContext___Directive_isRepeatable(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.IsRepeatable, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(bool) + fc.Result = res + return ec.marshalNBoolean2bool(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext___Directive_isRepeatable(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "__Directive", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Boolean does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) ___Directive_locations(ctx context.Context, field graphql.CollectedField, obj *introspection.Directive) (ret graphql.Marshaler) { fc, err := ec.fieldContext___Directive_locations(ctx, field) if err != nil { @@ -15723,50 +15768,6 @@ func (ec *executionContext) fieldContext___Directive_args(ctx context.Context, f return fc, nil } -func (ec *executionContext) ___Directive_isRepeatable(ctx context.Context, field graphql.CollectedField, obj *introspection.Directive) (ret graphql.Marshaler) { - fc, err := ec.fieldContext___Directive_isRepeatable(ctx, field) - if err != nil { - return graphql.Null - } - ctx = graphql.WithFieldContext(ctx, fc) - defer func() { - if r := recover(); r != nil { - ec.Error(ctx, ec.Recover(ctx, r)) - ret = graphql.Null - } - }() - resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { - ctx = rctx // use context from middleware stack in children - return obj.IsRepeatable, nil - }) - if err != nil { - ec.Error(ctx, err) - return graphql.Null - } - if resTmp == nil { - if !graphql.HasFieldError(ctx, fc) { - ec.Errorf(ctx, "must not be null") - } - return graphql.Null - } - res := resTmp.(bool) - fc.Result = res - return ec.marshalNBoolean2bool(ctx, field.Selections, res) -} - -func (ec *executionContext) fieldContext___Directive_isRepeatable(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { - fc = &graphql.FieldContext{ - Object: "__Directive", - Field: field, - IsMethod: false, - IsResolver: false, - Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type Boolean does not have child fields") - }, - } - return fc, nil -} - func (ec *executionContext) ___EnumValue_name(ctx context.Context, field graphql.CollectedField, obj *introspection.EnumValue) (ret graphql.Marshaler) { fc, err := ec.fieldContext___EnumValue_name(ctx, field) if err != nil { @@ -16136,6 +16137,8 @@ func (ec *executionContext) fieldContext___Field_type(_ context.Context, field g return ec.fieldContext___Type_name(ctx, field) case "description": return ec.fieldContext___Type_description(ctx, field) + case "specifiedByURL": + return ec.fieldContext___Type_specifiedByURL(ctx, field) case "fields": return ec.fieldContext___Type_fields(ctx, field) case "interfaces": @@ -16148,8 +16151,6 @@ func (ec *executionContext) fieldContext___Field_type(_ context.Context, field g return ec.fieldContext___Type_inputFields(ctx, field) case "ofType": return ec.fieldContext___Type_ofType(ctx, field) - case "specifiedByURL": - return ec.fieldContext___Type_specifiedByURL(ctx, field) case "isOneOf": return ec.fieldContext___Type_isOneOf(ctx, field) } @@ -16374,6 +16375,8 @@ func (ec *executionContext) fieldContext___InputValue_type(_ context.Context, fi return ec.fieldContext___Type_name(ctx, field) case "description": return ec.fieldContext___Type_description(ctx, field) + case "specifiedByURL": + return ec.fieldContext___Type_specifiedByURL(ctx, field) case "fields": return ec.fieldContext___Type_fields(ctx, field) case "interfaces": @@ -16386,8 +16389,6 @@ func (ec *executionContext) fieldContext___InputValue_type(_ context.Context, fi return ec.fieldContext___Type_inputFields(ctx, field) case "ofType": return ec.fieldContext___Type_ofType(ctx, field) - case "specifiedByURL": - return ec.fieldContext___Type_specifiedByURL(ctx, field) case "isOneOf": return ec.fieldContext___Type_isOneOf(ctx, field) } @@ -16609,6 +16610,8 @@ func (ec *executionContext) fieldContext___Schema_types(_ context.Context, field return ec.fieldContext___Type_name(ctx, field) case "description": return ec.fieldContext___Type_description(ctx, field) + case "specifiedByURL": + return ec.fieldContext___Type_specifiedByURL(ctx, field) case "fields": return ec.fieldContext___Type_fields(ctx, field) case "interfaces": @@ -16621,8 +16624,6 @@ func (ec *executionContext) fieldContext___Schema_types(_ context.Context, field return ec.fieldContext___Type_inputFields(ctx, field) case "ofType": return ec.fieldContext___Type_ofType(ctx, field) - case "specifiedByURL": - return ec.fieldContext___Type_specifiedByURL(ctx, field) case "isOneOf": return ec.fieldContext___Type_isOneOf(ctx, field) } @@ -16677,6 +16678,8 @@ func (ec *executionContext) fieldContext___Schema_queryType(_ context.Context, f return ec.fieldContext___Type_name(ctx, field) case "description": return ec.fieldContext___Type_description(ctx, field) + case "specifiedByURL": + return ec.fieldContext___Type_specifiedByURL(ctx, field) case "fields": return ec.fieldContext___Type_fields(ctx, field) case "interfaces": @@ -16689,8 +16692,6 @@ func (ec *executionContext) fieldContext___Schema_queryType(_ context.Context, f return ec.fieldContext___Type_inputFields(ctx, field) case "ofType": return ec.fieldContext___Type_ofType(ctx, field) - case "specifiedByURL": - return ec.fieldContext___Type_specifiedByURL(ctx, field) case "isOneOf": return ec.fieldContext___Type_isOneOf(ctx, field) } @@ -16742,6 +16743,8 @@ func (ec *executionContext) fieldContext___Schema_mutationType(_ context.Context return ec.fieldContext___Type_name(ctx, field) case "description": return ec.fieldContext___Type_description(ctx, field) + case "specifiedByURL": + return ec.fieldContext___Type_specifiedByURL(ctx, field) case "fields": return ec.fieldContext___Type_fields(ctx, field) case "interfaces": @@ -16754,8 +16757,6 @@ func (ec *executionContext) fieldContext___Schema_mutationType(_ context.Context return ec.fieldContext___Type_inputFields(ctx, field) case "ofType": return ec.fieldContext___Type_ofType(ctx, field) - case "specifiedByURL": - return ec.fieldContext___Type_specifiedByURL(ctx, field) case "isOneOf": return ec.fieldContext___Type_isOneOf(ctx, field) } @@ -16807,6 +16808,8 @@ func (ec *executionContext) fieldContext___Schema_subscriptionType(_ context.Con return ec.fieldContext___Type_name(ctx, field) case "description": return ec.fieldContext___Type_description(ctx, field) + case "specifiedByURL": + return ec.fieldContext___Type_specifiedByURL(ctx, field) case "fields": return ec.fieldContext___Type_fields(ctx, field) case "interfaces": @@ -16819,8 +16822,6 @@ func (ec *executionContext) fieldContext___Schema_subscriptionType(_ context.Con return ec.fieldContext___Type_inputFields(ctx, field) case "ofType": return ec.fieldContext___Type_ofType(ctx, field) - case "specifiedByURL": - return ec.fieldContext___Type_specifiedByURL(ctx, field) case "isOneOf": return ec.fieldContext___Type_isOneOf(ctx, field) } @@ -16873,12 +16874,12 @@ func (ec *executionContext) fieldContext___Schema_directives(_ context.Context, return ec.fieldContext___Directive_name(ctx, field) case "description": return ec.fieldContext___Directive_description(ctx, field) + case "isRepeatable": + return ec.fieldContext___Directive_isRepeatable(ctx, field) case "locations": return ec.fieldContext___Directive_locations(ctx, field) case "args": return ec.fieldContext___Directive_args(ctx, field) - case "isRepeatable": - return ec.fieldContext___Directive_isRepeatable(ctx, field) } return nil, fmt.Errorf("no field named %q was found under type __Directive", field.Name) }, @@ -17012,6 +17013,47 @@ func (ec *executionContext) fieldContext___Type_description(_ context.Context, f return fc, nil } +func (ec *executionContext) ___Type_specifiedByURL(ctx context.Context, field graphql.CollectedField, obj *introspection.Type) (ret graphql.Marshaler) { + fc, err := ec.fieldContext___Type_specifiedByURL(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.SpecifiedByURL(), nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.(*string) + fc.Result = res + return ec.marshalOString2ᚖstring(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext___Type_specifiedByURL(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "__Type", + Field: field, + IsMethod: true, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type String does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) ___Type_fields(ctx context.Context, field graphql.CollectedField, obj *introspection.Type) (ret graphql.Marshaler) { fc, err := ec.fieldContext___Type_fields(ctx, field) if err != nil { @@ -17120,6 +17162,8 @@ func (ec *executionContext) fieldContext___Type_interfaces(_ context.Context, fi return ec.fieldContext___Type_name(ctx, field) case "description": return ec.fieldContext___Type_description(ctx, field) + case "specifiedByURL": + return ec.fieldContext___Type_specifiedByURL(ctx, field) case "fields": return ec.fieldContext___Type_fields(ctx, field) case "interfaces": @@ -17132,8 +17176,6 @@ func (ec *executionContext) fieldContext___Type_interfaces(_ context.Context, fi return ec.fieldContext___Type_inputFields(ctx, field) case "ofType": return ec.fieldContext___Type_ofType(ctx, field) - case "specifiedByURL": - return ec.fieldContext___Type_specifiedByURL(ctx, field) case "isOneOf": return ec.fieldContext___Type_isOneOf(ctx, field) } @@ -17185,6 +17227,8 @@ func (ec *executionContext) fieldContext___Type_possibleTypes(_ context.Context, return ec.fieldContext___Type_name(ctx, field) case "description": return ec.fieldContext___Type_description(ctx, field) + case "specifiedByURL": + return ec.fieldContext___Type_specifiedByURL(ctx, field) case "fields": return ec.fieldContext___Type_fields(ctx, field) case "interfaces": @@ -17197,8 +17241,6 @@ func (ec *executionContext) fieldContext___Type_possibleTypes(_ context.Context, return ec.fieldContext___Type_inputFields(ctx, field) case "ofType": return ec.fieldContext___Type_ofType(ctx, field) - case "specifiedByURL": - return ec.fieldContext___Type_specifiedByURL(ctx, field) case "isOneOf": return ec.fieldContext___Type_isOneOf(ctx, field) } @@ -17367,6 +17409,8 @@ func (ec *executionContext) fieldContext___Type_ofType(_ context.Context, field return ec.fieldContext___Type_name(ctx, field) case "description": return ec.fieldContext___Type_description(ctx, field) + case "specifiedByURL": + return ec.fieldContext___Type_specifiedByURL(ctx, field) case "fields": return ec.fieldContext___Type_fields(ctx, field) case "interfaces": @@ -17379,8 +17423,6 @@ func (ec *executionContext) fieldContext___Type_ofType(_ context.Context, field return ec.fieldContext___Type_inputFields(ctx, field) case "ofType": return ec.fieldContext___Type_ofType(ctx, field) - case "specifiedByURL": - return ec.fieldContext___Type_specifiedByURL(ctx, field) case "isOneOf": return ec.fieldContext___Type_isOneOf(ctx, field) } @@ -17390,47 +17432,6 @@ func (ec *executionContext) fieldContext___Type_ofType(_ context.Context, field return fc, nil } -func (ec *executionContext) ___Type_specifiedByURL(ctx context.Context, field graphql.CollectedField, obj *introspection.Type) (ret graphql.Marshaler) { - fc, err := ec.fieldContext___Type_specifiedByURL(ctx, field) - if err != nil { - return graphql.Null - } - ctx = graphql.WithFieldContext(ctx, fc) - defer func() { - if r := recover(); r != nil { - ec.Error(ctx, ec.Recover(ctx, r)) - ret = graphql.Null - } - }() - resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { - ctx = rctx // use context from middleware stack in children - return obj.SpecifiedByURL(), nil - }) - if err != nil { - ec.Error(ctx, err) - return graphql.Null - } - if resTmp == nil { - return graphql.Null - } - res := resTmp.(*string) - fc.Result = res - return ec.marshalOString2ᚖstring(ctx, field.Selections, res) -} - -func (ec *executionContext) fieldContext___Type_specifiedByURL(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { - fc = &graphql.FieldContext{ - Object: "__Type", - Field: field, - IsMethod: true, - IsResolver: false, - Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type String does not have child fields") - }, - } - return fc, nil -} - func (ec *executionContext) ___Type_isOneOf(ctx context.Context, field graphql.CollectedField, obj *introspection.Type) (ret graphql.Marshaler) { fc, err := ec.fieldContext___Type_isOneOf(ctx, field) if err != nil { @@ -17510,8 +17511,8 @@ func (ec *executionContext) unmarshalInputFloatRange(ctx context.Context, obj an return it, nil } -func (ec *executionContext) unmarshalInputIntRange(ctx context.Context, obj any) (schema.IntRange, error) { - var it schema.IntRange +func (ec *executionContext) unmarshalInputIntRange(ctx context.Context, obj any) (config.IntRange, error) { + var it config.IntRange asMap := map[string]any{} for k, v := range obj.(map[string]any) { asMap[k] = v @@ -17623,7 +17624,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any it.Partition = data case "duration": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("duration")) - data, err := ec.unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐIntRange(ctx, v) + data, err := ec.unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋconfigᚐIntRange(ctx, v) if err != nil { return it, err } @@ -17644,28 +17645,28 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any it.MinRunningFor = data case "numNodes": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("numNodes")) - data, err := ec.unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐIntRange(ctx, v) + data, err := ec.unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋconfigᚐIntRange(ctx, v) if err != nil { return it, err } it.NumNodes = data case "numAccelerators": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("numAccelerators")) - data, err := ec.unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐIntRange(ctx, v) + data, err := ec.unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋconfigᚐIntRange(ctx, v) if err != nil { return it, err } it.NumAccelerators = data case "numHWThreads": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("numHWThreads")) - data, err := ec.unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐIntRange(ctx, v) + data, err := ec.unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋconfigᚐIntRange(ctx, v) if err != nil { return it, err } it.NumHWThreads = data case "startTime": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("startTime")) - data, err := ec.unmarshalOTimeRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTimeRange(ctx, v) + data, err := ec.unmarshalOTimeRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋconfigᚐTimeRange(ctx, v) if err != nil { return it, err } @@ -17927,8 +17928,8 @@ func (ec *executionContext) unmarshalInputStringInput(ctx context.Context, obj a return it, nil } -func (ec *executionContext) unmarshalInputTimeRange(ctx context.Context, obj any) (schema.TimeRange, error) { - var it schema.TimeRange +func (ec *executionContext) unmarshalInputTimeRange(ctx context.Context, obj any) (config.TimeRange, error) { + var it config.TimeRange asMap := map[string]any{} for k, v := range obj.(map[string]any) { asMap[k] = v @@ -21263,6 +21264,11 @@ func (ec *executionContext) ___Directive(ctx context.Context, sel ast.SelectionS } case "description": out.Values[i] = ec.___Directive_description(ctx, field, obj) + case "isRepeatable": + out.Values[i] = ec.___Directive_isRepeatable(ctx, field, obj) + if out.Values[i] == graphql.Null { + out.Invalids++ + } case "locations": out.Values[i] = ec.___Directive_locations(ctx, field, obj) if out.Values[i] == graphql.Null { @@ -21273,11 +21279,6 @@ func (ec *executionContext) ___Directive(ctx context.Context, sel ast.SelectionS if out.Values[i] == graphql.Null { out.Invalids++ } - case "isRepeatable": - out.Values[i] = ec.___Directive_isRepeatable(ctx, field, obj) - if out.Values[i] == graphql.Null { - out.Invalids++ - } default: panic("unknown field " + strconv.Quote(field.Name)) } @@ -21537,6 +21538,8 @@ func (ec *executionContext) ___Type(ctx context.Context, sel ast.SelectionSet, o out.Values[i] = ec.___Type_name(ctx, field, obj) case "description": out.Values[i] = ec.___Type_description(ctx, field, obj) + case "specifiedByURL": + out.Values[i] = ec.___Type_specifiedByURL(ctx, field, obj) case "fields": out.Values[i] = ec.___Type_fields(ctx, field, obj) case "interfaces": @@ -21549,8 +21552,6 @@ func (ec *executionContext) ___Type(ctx context.Context, sel ast.SelectionSet, o out.Values[i] = ec.___Type_inputFields(ctx, field, obj) case "ofType": out.Values[i] = ec.___Type_ofType(ctx, field, obj) - case "specifiedByURL": - out.Values[i] = ec.___Type_specifiedByURL(ctx, field, obj) case "isOneOf": out.Values[i] = ec.___Type_isOneOf(ctx, field, obj) default: @@ -21596,6 +21597,7 @@ func (ec *executionContext) unmarshalNBoolean2bool(ctx context.Context, v any) ( } func (ec *executionContext) marshalNBoolean2bool(ctx context.Context, sel ast.SelectionSet, v bool) graphql.Marshaler { + _ = sel res := graphql.MarshalBoolean(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -21767,6 +21769,7 @@ func (ec *executionContext) unmarshalNFloat2float64(ctx context.Context, v any) } func (ec *executionContext) marshalNFloat2float64(ctx context.Context, sel ast.SelectionSet, v float64) graphql.Marshaler { + _ = sel res := graphql.MarshalFloatContext(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -21778,9 +21781,7 @@ func (ec *executionContext) marshalNFloat2float64(ctx context.Context, sel ast.S func (ec *executionContext) unmarshalNFloat2ᚕfloat64ᚄ(ctx context.Context, v any) ([]float64, error) { var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]float64, len(vSlice)) for i := range vSlice { @@ -21810,9 +21811,7 @@ func (ec *executionContext) marshalNFloat2ᚕfloat64ᚄ(ctx context.Context, sel func (ec *executionContext) unmarshalNFloat2ᚕᚕfloat64ᚄ(ctx context.Context, v any) ([][]float64, error) { var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([][]float64, len(vSlice)) for i := range vSlice { @@ -21959,6 +21958,7 @@ func (ec *executionContext) unmarshalNID2int64(ctx context.Context, v any) (int6 } func (ec *executionContext) marshalNID2int64(ctx context.Context, sel ast.SelectionSet, v int64) graphql.Marshaler { + _ = sel res := graphql.MarshalInt64(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -21974,6 +21974,7 @@ func (ec *executionContext) unmarshalNID2string(ctx context.Context, v any) (str } func (ec *executionContext) marshalNID2string(ctx context.Context, sel ast.SelectionSet, v string) graphql.Marshaler { + _ = sel res := graphql.MarshalID(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -21985,9 +21986,7 @@ func (ec *executionContext) marshalNID2string(ctx context.Context, sel ast.Selec func (ec *executionContext) unmarshalNID2ᚕstringᚄ(ctx context.Context, v any) ([]string, error) { var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]string, len(vSlice)) for i := range vSlice { @@ -22027,6 +22026,7 @@ func (ec *executionContext) marshalNID2ᚖint64(ctx context.Context, sel ast.Sel } return graphql.Null } + _ = sel res := graphql.MarshalInt64(*v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -22042,6 +22042,7 @@ func (ec *executionContext) unmarshalNInt2int(ctx context.Context, v any) (int, } func (ec *executionContext) marshalNInt2int(ctx context.Context, sel ast.SelectionSet, v int) graphql.Marshaler { + _ = sel res := graphql.MarshalInt(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -22057,6 +22058,7 @@ func (ec *executionContext) unmarshalNInt2int32(ctx context.Context, v any) (int } func (ec *executionContext) marshalNInt2int32(ctx context.Context, sel ast.SelectionSet, v int32) graphql.Marshaler { + _ = sel res := graphql.MarshalInt32(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -22072,6 +22074,7 @@ func (ec *executionContext) unmarshalNInt2int64(ctx context.Context, v any) (int } func (ec *executionContext) marshalNInt2int64(ctx context.Context, sel ast.SelectionSet, v int64) graphql.Marshaler { + _ = sel res := graphql.MarshalInt64(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -22083,9 +22086,7 @@ func (ec *executionContext) marshalNInt2int64(ctx context.Context, sel ast.Selec func (ec *executionContext) unmarshalNInt2ᚕintᚄ(ctx context.Context, v any) ([]int, error) { var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]int, len(vSlice)) for i := range vSlice { @@ -22115,9 +22116,7 @@ func (ec *executionContext) marshalNInt2ᚕintᚄ(ctx context.Context, sel ast.S func (ec *executionContext) unmarshalNInt2ᚕᚖintᚄ(ctx context.Context, v any) ([]*int, error) { var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]*int, len(vSlice)) for i := range vSlice { @@ -22157,6 +22156,7 @@ func (ec *executionContext) marshalNInt2ᚖint(ctx context.Context, sel ast.Sele } return graphql.Null } + _ = sel res := graphql.MarshalInt(*v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -22222,9 +22222,7 @@ func (ec *executionContext) marshalNJob2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑ func (ec *executionContext) unmarshalNJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ(ctx context.Context, v any) ([]*model.JobFilter, error) { var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]*model.JobFilter, len(vSlice)) for i := range vSlice { @@ -22694,6 +22692,7 @@ func (ec *executionContext) unmarshalNMonitoringState2githubᚗcomᚋClusterCock } func (ec *executionContext) marshalNMonitoringState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐNodeState(ctx context.Context, sel ast.SelectionSet, v schema.NodeState) graphql.Marshaler { + _ = sel res := graphql.MarshalString(string(v)) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -22930,6 +22929,7 @@ func (ec *executionContext) unmarshalNNodeState2string(ctx context.Context, v an } func (ec *executionContext) marshalNNodeState2string(ctx context.Context, sel ast.SelectionSet, v string) graphql.Marshaler { + _ = sel res := graphql.MarshalString(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -23033,9 +23033,7 @@ func (ec *executionContext) marshalNNullableFloat2githubᚗcomᚋClusterCockpit func (ec *executionContext) unmarshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ(ctx context.Context, v any) ([]schema.Float, error) { var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]schema.Float, len(vSlice)) for i := range vSlice { @@ -23191,6 +23189,7 @@ func (ec *executionContext) unmarshalNString2string(ctx context.Context, v any) } func (ec *executionContext) marshalNString2string(ctx context.Context, sel ast.SelectionSet, v string) graphql.Marshaler { + _ = sel res := graphql.MarshalString(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -23202,9 +23201,7 @@ func (ec *executionContext) marshalNString2string(ctx context.Context, sel ast.S func (ec *executionContext) unmarshalNString2ᚕstringᚄ(ctx context.Context, v any) ([]string, error) { var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]string, len(vSlice)) for i := range vSlice { @@ -23404,6 +23401,7 @@ func (ec *executionContext) unmarshalNTime2timeᚐTime(ctx context.Context, v an } func (ec *executionContext) marshalNTime2timeᚐTime(ctx context.Context, sel ast.SelectionSet, v time.Time) graphql.Marshaler { + _ = sel res := graphql.MarshalTime(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -23425,6 +23423,7 @@ func (ec *executionContext) marshalNTime2ᚖtimeᚐTime(ctx context.Context, sel } return graphql.Null } + _ = sel res := graphql.MarshalTime(*v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -23506,6 +23505,7 @@ func (ec *executionContext) unmarshalN__DirectiveLocation2string(ctx context.Con } func (ec *executionContext) marshalN__DirectiveLocation2string(ctx context.Context, sel ast.SelectionSet, v string) graphql.Marshaler { + _ = sel res := graphql.MarshalString(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -23517,9 +23517,7 @@ func (ec *executionContext) marshalN__DirectiveLocation2string(ctx context.Conte func (ec *executionContext) unmarshalN__DirectiveLocation2ᚕstringᚄ(ctx context.Context, v any) ([]string, error) { var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]string, len(vSlice)) for i := range vSlice { @@ -23696,6 +23694,7 @@ func (ec *executionContext) unmarshalN__TypeKind2string(ctx context.Context, v a } func (ec *executionContext) marshalN__TypeKind2string(ctx context.Context, sel ast.SelectionSet, v string) graphql.Marshaler { + _ = sel res := graphql.MarshalString(v) if res == graphql.Null { if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { @@ -23780,6 +23779,8 @@ func (ec *executionContext) marshalOAny2interface(ctx context.Context, sel ast.S if v == nil { return graphql.Null } + _ = sel + _ = ctx res := graphql.MarshalAny(v) return res } @@ -23790,6 +23791,8 @@ func (ec *executionContext) unmarshalOBoolean2bool(ctx context.Context, v any) ( } func (ec *executionContext) marshalOBoolean2bool(ctx context.Context, sel ast.SelectionSet, v bool) graphql.Marshaler { + _ = sel + _ = ctx res := graphql.MarshalBoolean(v) return res } @@ -23806,6 +23809,8 @@ func (ec *executionContext) marshalOBoolean2ᚖbool(ctx context.Context, sel ast if v == nil { return graphql.Null } + _ = sel + _ = ctx res := graphql.MarshalBoolean(*v) return res } @@ -23864,6 +23869,7 @@ func (ec *executionContext) unmarshalOFloat2float64(ctx context.Context, v any) } func (ec *executionContext) marshalOFloat2float64(ctx context.Context, sel ast.SelectionSet, v float64) graphql.Marshaler { + _ = sel res := graphql.MarshalFloatContext(v) return graphql.WrapContextMarshaler(ctx, res) } @@ -23936,9 +23942,7 @@ func (ec *executionContext) unmarshalOID2ᚕstringᚄ(ctx context.Context, v any return nil, nil } var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]string, len(vSlice)) for i := range vSlice { @@ -23974,9 +23978,7 @@ func (ec *executionContext) unmarshalOInt2ᚕintᚄ(ctx context.Context, v any) return nil, nil } var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]int, len(vSlice)) for i := range vSlice { @@ -24012,9 +24014,7 @@ func (ec *executionContext) unmarshalOInt2ᚕᚕintᚄ(ctx context.Context, v an return nil, nil } var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([][]int, len(vSlice)) for i := range vSlice { @@ -24050,9 +24050,7 @@ func (ec *executionContext) unmarshalOInt2ᚕᚕᚖintᚄ(ctx context.Context, v return nil, nil } var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([][]*int, len(vSlice)) for i := range vSlice { @@ -24095,11 +24093,13 @@ func (ec *executionContext) marshalOInt2ᚖint(ctx context.Context, sel ast.Sele if v == nil { return graphql.Null } + _ = sel + _ = ctx res := graphql.MarshalInt(*v) return res } -func (ec *executionContext) unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐIntRange(ctx context.Context, v any) (*schema.IntRange, error) { +func (ec *executionContext) unmarshalOIntRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋconfigᚐIntRange(ctx context.Context, v any) (*config.IntRange, error) { if v == nil { return nil, nil } @@ -24119,9 +24119,7 @@ func (ec *executionContext) unmarshalOJobFilter2ᚕᚖgithubᚗcomᚋClusterCock return nil, nil } var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]*model.JobFilter, len(vSlice)) for i := range vSlice { @@ -24146,9 +24144,7 @@ func (ec *executionContext) unmarshalOJobState2ᚕgithubᚗcomᚋClusterCockpit return nil, nil } var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]schema.JobState, len(vSlice)) for i := range vSlice { @@ -24231,9 +24227,7 @@ func (ec *executionContext) unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockp return nil, nil } var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]schema.MetricScope, len(vSlice)) for i := range vSlice { @@ -24269,9 +24263,7 @@ func (ec *executionContext) unmarshalOMetricStatItem2ᚕᚖgithubᚗcomᚋCluste return nil, nil } var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]*model.MetricStatItem, len(vSlice)) for i := range vSlice { @@ -24301,6 +24293,8 @@ func (ec *executionContext) marshalOMonitoringState2ᚖgithubᚗcomᚋClusterCoc if v == nil { return graphql.Null } + _ = sel + _ = ctx res := graphql.MarshalString(string(*v)) return res } @@ -24317,9 +24311,7 @@ func (ec *executionContext) unmarshalONodeFilter2ᚕᚖgithubᚗcomᚋClusterCoc return nil, nil } var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]*model.NodeFilter, len(vSlice)) for i := range vSlice { @@ -24344,6 +24336,8 @@ func (ec *executionContext) marshalONodeState2ᚖstring(ctx context.Context, sel if v == nil { return graphql.Null } + _ = sel + _ = ctx res := graphql.MarshalString(*v) return res } @@ -24440,6 +24434,8 @@ func (ec *executionContext) unmarshalOString2string(ctx context.Context, v any) } func (ec *executionContext) marshalOString2string(ctx context.Context, sel ast.SelectionSet, v string) graphql.Marshaler { + _ = sel + _ = ctx res := graphql.MarshalString(v) return res } @@ -24449,9 +24445,7 @@ func (ec *executionContext) unmarshalOString2ᚕstringᚄ(ctx context.Context, v return nil, nil } var vSlice []any - if v != nil { - vSlice = graphql.CoerceList(v) - } + vSlice = graphql.CoerceList(v) var err error res := make([]string, len(vSlice)) for i := range vSlice { @@ -24494,6 +24488,8 @@ func (ec *executionContext) marshalOString2ᚖstring(ctx context.Context, sel as if v == nil { return graphql.Null } + _ = sel + _ = ctx res := graphql.MarshalString(*v) return res } @@ -24518,11 +24514,13 @@ func (ec *executionContext) marshalOTime2ᚖtimeᚐTime(ctx context.Context, sel if v == nil { return graphql.Null } + _ = sel + _ = ctx res := graphql.MarshalTime(*v) return res } -func (ec *executionContext) unmarshalOTimeRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐTimeRange(ctx context.Context, v any) (*schema.TimeRange, error) { +func (ec *executionContext) unmarshalOTimeRange2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋconfigᚐTimeRange(ctx context.Context, v any) (*config.TimeRange, error) { if v == nil { return nil, nil } diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index 5a32ac9..e9abf0d 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -3,11 +3,13 @@ package model import ( + "bytes" "fmt" "io" "strconv" "time" + "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-lib/schema" ) @@ -58,13 +60,13 @@ type JobFilter struct { JobName *StringInput `json:"jobName,omitempty"` Cluster *StringInput `json:"cluster,omitempty"` Partition *StringInput `json:"partition,omitempty"` - Duration *schema.IntRange `json:"duration,omitempty"` + Duration *config.IntRange `json:"duration,omitempty"` Energy *FloatRange `json:"energy,omitempty"` MinRunningFor *int `json:"minRunningFor,omitempty"` - NumNodes *schema.IntRange `json:"numNodes,omitempty"` - NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"` - NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"` - StartTime *schema.TimeRange `json:"startTime,omitempty"` + NumNodes *config.IntRange `json:"numNodes,omitempty"` + NumAccelerators *config.IntRange `json:"numAccelerators,omitempty"` + NumHWThreads *config.IntRange `json:"numHWThreads,omitempty"` + StartTime *config.TimeRange `json:"startTime,omitempty"` State []schema.JobState `json:"state,omitempty"` MetricStats []*MetricStatItem `json:"metricStats,omitempty"` Exclusive *int `json:"exclusive,omitempty"` @@ -286,6 +288,20 @@ func (e Aggregate) MarshalGQL(w io.Writer) { fmt.Fprint(w, strconv.Quote(e.String())) } +func (e *Aggregate) UnmarshalJSON(b []byte) error { + s, err := strconv.Unquote(string(b)) + if err != nil { + return err + } + return e.UnmarshalGQL(s) +} + +func (e Aggregate) MarshalJSON() ([]byte, error) { + var buf bytes.Buffer + e.MarshalGQL(&buf) + return buf.Bytes(), nil +} + type SortByAggregate string const ( @@ -339,6 +355,20 @@ func (e SortByAggregate) MarshalGQL(w io.Writer) { fmt.Fprint(w, strconv.Quote(e.String())) } +func (e *SortByAggregate) UnmarshalJSON(b []byte) error { + s, err := strconv.Unquote(string(b)) + if err != nil { + return err + } + return e.UnmarshalGQL(s) +} + +func (e SortByAggregate) MarshalJSON() ([]byte, error) { + var buf bytes.Buffer + e.MarshalGQL(&buf) + return buf.Bytes(), nil +} + type SortDirectionEnum string const ( @@ -379,3 +409,17 @@ func (e *SortDirectionEnum) UnmarshalGQL(v any) error { func (e SortDirectionEnum) MarshalGQL(w io.Writer) { fmt.Fprint(w, strconv.Quote(e.String())) } + +func (e *SortDirectionEnum) UnmarshalJSON(b []byte) error { + s, err := strconv.Unquote(string(b)) + if err != nil { + return err + } + return e.UnmarshalGQL(s) +} + +func (e SortDirectionEnum) MarshalJSON() ([]byte, error) { + var buf bytes.Buffer + e.MarshalGQL(&buf) + return buf.Bytes(), nil +} diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 78a76ef..e0e74b5 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -2,7 +2,7 @@ package graph // This file will be automatically regenerated based on the schema, any resolver implementations // will be copied through when generating and any unknown code will be moved to the end. -// Code generated by github.com/99designs/gqlgen version v0.17.66 +// Code generated by github.com/99designs/gqlgen version v0.17.76 import ( "context" diff --git a/internal/repository/jobQuery.go b/internal/repository/jobQuery.go index c9ccb03..fdcc904 100644 --- a/internal/repository/jobQuery.go +++ b/internal/repository/jobQuery.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph/model" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" "github.com/ClusterCockpit/cc-lib/schema" @@ -216,7 +217,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select return query } -func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuilder) sq.SelectBuilder { +func buildIntCondition(field string, cond *config.IntRange, query sq.SelectBuilder) sq.SelectBuilder { return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To) } @@ -224,7 +225,7 @@ func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBu return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To) } -func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder { +func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBuilder) sq.SelectBuilder { if cond.From != nil && cond.To != nil { return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix()) } else if cond.From != nil { diff --git a/internal/taskManager/taskManager.go b/internal/taskManager/taskManager.go index 7ed5aac..7231d12 100644 --- a/internal/taskManager/taskManager.go +++ b/internal/taskManager/taskManager.go @@ -13,7 +13,6 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/repository" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" - "github.com/ClusterCockpit/cc-lib/schema" "github.com/go-co-op/gocron/v2" ) @@ -73,8 +72,8 @@ func Start(cronCfg, archiveConfig json.RawMessage) { } var cfg struct { - Retention schema.Retention `json:"retention"` - Compression int `json:"compression"` + Retention Retention `json:"retention"` + Compression int `json:"compression"` } cfg.Retention.IncludeDB = true From 98b9f8e62deed8efb500d81bf439285edbcb956c Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 4 Aug 2025 14:50:53 +0200 Subject: [PATCH 13/40] Add more information to status dash --- api/schema.graphqls | 7 ++- internal/graph/generated/generated.go | 64 ++++++++++++++++++++++- internal/graph/model/models_gen.go | 15 ++++-- internal/graph/schema.resolvers.go | 2 +- internal/repository/stats.go | 42 ++++++++++----- web/frontend/src/status/StatusDash.svelte | 64 ++++++++++++++++++++++- 6 files changed, 169 insertions(+), 25 deletions(-) diff --git a/api/schema.graphqls b/api/schema.graphqls index b3dadb5..d1c78f3 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -237,10 +237,12 @@ enum Aggregate { USER PROJECT CLUSTER + SUBCLUSTER } enum SortByAggregate { TOTALWALLTIME TOTALJOBS + TOTALUSERS TOTALNODES TOTALNODEHOURS TOTALCORES @@ -501,11 +503,12 @@ type MetricHistoPoint { } type JobsStatistics { - id: ID! # If `groupBy` was used, ID of the user/project/cluster + id: ID! # If `groupBy` was used, ID of the user/project/cluster/subcluster name: String! # if User-Statistics: Given Name of Account (ID) Owner + totalUsers: Int! # if *not* User-Statistics: Number of active users (based on running jobs) totalJobs: Int! # Number of jobs runningJobs: Int! # Number of running jobs - shortJobs: Int! # Number of jobs with a duration of less than duration + shortJobs: Int! # Number of jobs with a duration of less than config'd ShortRunningJobsDuration totalWalltime: Int! # Sum of the duration of all matched jobs in hours totalNodes: Int! # Sum of the nodes of all matched jobs totalNodeHours: Int! # Sum of the node hours of all matched jobs diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index a725802..ff4469a 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -201,6 +201,7 @@ type ComplexityRoot struct { TotalJobs func(childComplexity int) int TotalNodeHours func(childComplexity int) int TotalNodes func(childComplexity int) int + TotalUsers func(childComplexity int) int TotalWalltime func(childComplexity int) int } @@ -1166,6 +1167,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.JobsStatistics.TotalNodes(childComplexity), true + case "JobsStatistics.totalUsers": + if e.complexity.JobsStatistics.TotalUsers == nil { + break + } + + return e.complexity.JobsStatistics.TotalUsers(childComplexity), true + case "JobsStatistics.totalWalltime": if e.complexity.JobsStatistics.TotalWalltime == nil { break @@ -2567,10 +2575,12 @@ enum Aggregate { USER PROJECT CLUSTER + SUBCLUSTER } enum SortByAggregate { TOTALWALLTIME TOTALJOBS + TOTALUSERS TOTALNODES TOTALNODEHOURS TOTALCORES @@ -2831,8 +2841,9 @@ type MetricHistoPoint { } type JobsStatistics { - id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster + id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster/subcluster name: String! # if User-Statistics: Given Name of Account (ID) Owner + totalUsers: Int! # if *not* User-Statistics: Number of active users (based on running jobs) totalJobs: Int! # Number of jobs runningJobs: Int! # Number of running jobs shortJobs: Int! # Number of jobs with a duration of less than duration @@ -8334,6 +8345,50 @@ func (ec *executionContext) fieldContext_JobsStatistics_name(_ context.Context, return fc, nil } +func (ec *executionContext) _JobsStatistics_totalUsers(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_JobsStatistics_totalUsers(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.TotalUsers, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(int) + fc.Result = res + return ec.marshalNInt2int(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_JobsStatistics_totalUsers(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "JobsStatistics", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Int does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _JobsStatistics_totalJobs(ctx context.Context, field graphql.CollectedField, obj *model.JobsStatistics) (ret graphql.Marshaler) { fc, err := ec.fieldContext_JobsStatistics_totalJobs(ctx, field) if err != nil { @@ -12636,6 +12691,8 @@ func (ec *executionContext) fieldContext_Query_jobsStatistics(ctx context.Contex return ec.fieldContext_JobsStatistics_id(ctx, field) case "name": return ec.fieldContext_JobsStatistics_name(ctx, field) + case "totalUsers": + return ec.fieldContext_JobsStatistics_totalUsers(ctx, field) case "totalJobs": return ec.fieldContext_JobsStatistics_totalJobs(ctx, field) case "runningJobs": @@ -19240,6 +19297,11 @@ func (ec *executionContext) _JobsStatistics(ctx context.Context, sel ast.Selecti if out.Values[i] == graphql.Null { out.Invalids++ } + case "totalUsers": + out.Values[i] = ec._JobsStatistics_totalUsers(ctx, field, obj) + if out.Values[i] == graphql.Null { + out.Invalids++ + } case "totalJobs": out.Values[i] = ec._JobsStatistics_totalJobs(ctx, field, obj) if out.Values[i] == graphql.Null { diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index e6619b7..a5fe2a2 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -112,6 +112,7 @@ type JobStats struct { type JobsStatistics struct { ID string `json:"id"` Name string `json:"name"` + TotalUsers int `json:"totalUsers"` TotalJobs int `json:"totalJobs"` RunningJobs int `json:"runningJobs"` ShortJobs int `json:"shortJobs"` @@ -247,20 +248,22 @@ type User struct { type Aggregate string const ( - AggregateUser Aggregate = "USER" - AggregateProject Aggregate = "PROJECT" - AggregateCluster Aggregate = "CLUSTER" + AggregateUser Aggregate = "USER" + AggregateProject Aggregate = "PROJECT" + AggregateCluster Aggregate = "CLUSTER" + AggregateSubcluster Aggregate = "SUBCLUSTER" ) var AllAggregate = []Aggregate{ AggregateUser, AggregateProject, AggregateCluster, + AggregateSubcluster, } func (e Aggregate) IsValid() bool { switch e { - case AggregateUser, AggregateProject, AggregateCluster: + case AggregateUser, AggregateProject, AggregateCluster, AggregateSubcluster: return true } return false @@ -292,6 +295,7 @@ type SortByAggregate string const ( SortByAggregateTotalwalltime SortByAggregate = "TOTALWALLTIME" SortByAggregateTotaljobs SortByAggregate = "TOTALJOBS" + SortByAggregateTotalusers SortByAggregate = "TOTALUSERS" SortByAggregateTotalnodes SortByAggregate = "TOTALNODES" SortByAggregateTotalnodehours SortByAggregate = "TOTALNODEHOURS" SortByAggregateTotalcores SortByAggregate = "TOTALCORES" @@ -303,6 +307,7 @@ const ( var AllSortByAggregate = []SortByAggregate{ SortByAggregateTotalwalltime, SortByAggregateTotaljobs, + SortByAggregateTotalusers, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, @@ -313,7 +318,7 @@ var AllSortByAggregate = []SortByAggregate{ func (e SortByAggregate) IsValid() bool { switch e { - case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours: + case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalusers, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours: return true } return false diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index e0a7948..b993ebb 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -581,7 +581,7 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF defaultDurationBins := "1h" defaultMetricBins := 10 - if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") || + if requireField(ctx, "totalJobs") || requireField(ctx, "totalUsers") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") || requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") { if groupBy == nil { stats, err = r.Repo.JobsStats(ctx, filter) diff --git a/internal/repository/stats.go b/internal/repository/stats.go index 7beb674..1aa3c55 100644 --- a/internal/repository/stats.go +++ b/internal/repository/stats.go @@ -21,13 +21,15 @@ import ( // GraphQL validation should make sure that no unkown values can be specified. var groupBy2column = map[model.Aggregate]string{ - model.AggregateUser: "job.hpc_user", - model.AggregateProject: "job.project", - model.AggregateCluster: "job.cluster", + model.AggregateUser: "job.hpc_user", + model.AggregateProject: "job.project", + model.AggregateCluster: "job.cluster", + model.AggregateSubcluster: "job.subcluster", } var sortBy2column = map[model.SortByAggregate]string{ model.SortByAggregateTotaljobs: "totalJobs", + model.SortByAggregateTotalusers: "totalUsers", model.SortByAggregateTotalwalltime: "totalWalltime", model.SortByAggregateTotalnodes: "totalNodes", model.SortByAggregateTotalnodehours: "totalNodeHours", @@ -76,8 +78,12 @@ func (r *JobRepository) buildStatsQuery( // fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType) if col != "" { - // Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours - query = sq.Select(col, "COUNT(job.id) as totalJobs", "name", + // Scan columns: id, name, totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours + query = sq.Select( + col, + "name", + "COUNT(job.id) as totalJobs", + "COUNT(DISTINCT job.hpc_user) AS totalUsers", fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType), fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType), fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType), @@ -87,8 +93,10 @@ func (r *JobRepository) buildStatsQuery( fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType), ).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col) } else { - // Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours - query = sq.Select("COUNT(job.id)", + // Scan columns: totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours + query = sq.Select( + "COUNT(job.id) as totalJobs", + "COUNT(DISTINCT job.hpc_user) AS totalUsers", fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType), fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType), fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType), @@ -167,14 +175,14 @@ func (r *JobRepository) JobsStatsGrouped( for rows.Next() { var id sql.NullString var name sql.NullString - var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64 - if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil { + var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64 + if err := rows.Scan(&id, &name, &jobs, &users, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil { cclog.Warn("Error while scanning rows") return nil, err } if id.Valid { - var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int + var totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int var personName string if name.Valid { @@ -185,6 +193,10 @@ func (r *JobRepository) JobsStatsGrouped( totalJobs = int(jobs.Int64) } + if users.Valid { + totalUsers = int(users.Int64) + } + if walltime.Valid { totalWalltime = int(walltime.Int64) } @@ -228,8 +240,9 @@ func (r *JobRepository) JobsStatsGrouped( stats = append(stats, &model.JobsStatistics{ ID: id.String, - TotalJobs: int(jobs.Int64), - TotalWalltime: int(walltime.Int64), + TotalJobs: totalJobs, + TotalUsers: totalUsers, + TotalWalltime: totalWalltime, TotalNodes: totalNodes, TotalNodeHours: totalNodeHours, TotalCores: totalCores, @@ -259,8 +272,8 @@ func (r *JobRepository) JobsStats( row := query.RunWith(r.DB).QueryRow() stats := make([]*model.JobsStatistics, 0, 1) - var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64 - if err := row.Scan(&jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil { + var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64 + if err := row.Scan(&jobs, &users, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil { cclog.Warn("Error while scanning rows") return nil, err } @@ -280,6 +293,7 @@ func (r *JobRepository) JobsStats( stats = append(stats, &model.JobsStatistics{ TotalJobs: int(jobs.Int64), + TotalUsers: int(users.Int64), TotalWalltime: int(walltime.Int64), TotalNodeHours: totalNodeHours, TotalCoreHours: totalCoreHours, diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index f98c1c3..a1196e5 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -45,12 +45,17 @@ let plotWidths = $state([]); // Bar Gauges let allocatedNodes = $state({}); + let allocatedAccs = $state({}); let flopRate = $state({}); let flopRateUnitPrefix = $state({}); let flopRateUnitBase = $state({}); let memBwRate = $state({}); let memBwRateUnitPrefix = $state({}); let memBwRateUnitBase = $state({}); + // Plain Infos + let runningJobs = $state({}); + let activeUsers = $state({}); + let totalAccs = $state({}); /* Derived */ // Note: nodeMetrics are requested on configured $timestep resolution @@ -63,6 +68,8 @@ $metrics: [String!] $from: Time! $to: Time! + $filter: [JobFilter!]! + $paging: PageRequest! ) { nodeMetrics( cluster: $cluster @@ -87,11 +94,23 @@ } } } - + # Only counts shared nodes once allocatedNodes(cluster: $cluster) { name count } + # totalNodes includes multiples if shared jobs + jobsStatistics( + filter: $filter + page: $paging + sortBy: TOTALJOBS + groupBy: SUBCLUSTER + ) { + id + totalJobs + totalUsers + totalAccs + } } `, variables: { @@ -99,7 +118,8 @@ metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars from: from.toISOString(), to: to.toISOString(), - // filter: [{ state: ["running"] }, { cluster: { eq: cluster } }], + filter: [{ state: ["running"] }, { cluster: { eq: cluster } }], + paging: { itemsPerPage: -1, page: 1 }, // Get all: -1 }, })); @@ -110,10 +130,27 @@ (c) => c.name == cluster, ).subClusters; for (let subCluster of subClusters) { + // Allocations allocatedNodes[subCluster.name] = $statusQuery.data.allocatedNodes.find( ({ name }) => name == subCluster.name, )?.count || 0; + allocatedAccs[subCluster.name] = + $statusQuery.data.jobsStatistics.find( + ({ id }) => id == subCluster.name, + )?.totalAccs || 0; + // Infos + activeUsers[subCluster.name] = + $statusQuery.data.jobsStatistics.find( + ({ id }) => id == subCluster.name, + )?.totalUsers || 0; + runningJobs[subCluster.name] = + $statusQuery.data.jobsStatistics.find( + ({ id }) => id == subCluster.name, + )?.totalJobs || 0; + totalAccs[subCluster.name] = + (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || null; + // Keymetrics flopRate[subCluster.name] = Math.floor( sumUp($statusQuery.data.nodeMetrics, subCluster.name, "flops_any") * @@ -158,9 +195,15 @@ SubCluster "{subCluster.name}" + {subCluster.processorType} + + + + +
+ {#if totalAccs[subCluster.name] !== null} + + + + + + {/if} @@ -204,6 +353,25 @@
+ + + + + + + + +
{/if} - +
+
{runningJobs[subCluster.name]} Running Jobs{activeUsers[subCluster.name]} Active Users
Allocated Nodes
Allocated Accelerators
+ +
{allocatedAccs[subCluster.name]} / {totalAccs[subCluster.name]} + Accelerators
Flop Rate (Any) Date: Tue, 5 Aug 2025 10:23:54 +0200 Subject: [PATCH 14/40] Port to new job structs Backup commit: Does not build. --- go.mod | 10 +- go.sum | 24 +- internal/api/api_test.go | 1 - internal/api/job.go | 2 +- internal/graph/generated/generated.go | 1382 ++--------------- internal/graph/schema.resolvers.go | 9 +- internal/importer/handleImport.go | 2 +- internal/repository/job.go | 2 +- .../sqlite3/09_add-job-cache.up.sql | 53 +- internal/tagger/classifyJob.go | 14 +- pkg/archive/fsBackend_test.go | 8 +- 11 files changed, 195 insertions(+), 1312 deletions(-) diff --git a/go.mod b/go.mod index df742d7..554ea56 100644 --- a/go.mod +++ b/go.mod @@ -5,8 +5,8 @@ go 1.23.5 toolchain go1.24.1 require ( - github.com/99designs/gqlgen v0.17.76 - github.com/ClusterCockpit/cc-lib v0.6.0 + github.com/99designs/gqlgen v0.17.78 + github.com/ClusterCockpit/cc-lib v0.7.0 github.com/Masterminds/squirrel v1.5.4 github.com/coreos/go-oidc/v3 v3.12.0 github.com/expr-lang/expr v1.17.5 @@ -22,12 +22,12 @@ require ( github.com/jmoiron/sqlx v1.4.0 github.com/joho/godotenv v1.5.1 github.com/mattn/go-sqlite3 v1.14.24 - github.com/prometheus/client_golang v1.22.0 + github.com/prometheus/client_golang v1.23.0 github.com/prometheus/common v0.65.0 github.com/qustavo/sqlhooks/v2 v2.1.0 github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 github.com/swaggo/http-swagger v1.3.4 - github.com/swaggo/swag v1.16.5 + github.com/swaggo/swag v1.16.6 github.com/vektah/gqlparser/v2 v2.5.30 golang.org/x/crypto v0.40.0 golang.org/x/oauth2 v0.30.0 @@ -50,7 +50,7 @@ require ( github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/spec v0.21.0 // indirect github.com/go-openapi/swag v0.23.1 // indirect - github.com/go-viper/mapstructure/v2 v2.3.0 // indirect + github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/securecookie v1.1.2 // indirect github.com/gorilla/websocket v1.5.3 // indirect diff --git a/go.sum b/go.sum index 1524b5a..6f61908 100644 --- a/go.sum +++ b/go.sum @@ -1,13 +1,13 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= -github.com/99designs/gqlgen v0.17.76 h1:YsJBcfACWmXWU2t1yCjoGdOmqcTfOFpjbLAE443fmYI= -github.com/99designs/gqlgen v0.17.76/go.mod h1:miiU+PkAnTIDKMQ1BseUOIVeQHoiwYDZGCswoxl7xec= +github.com/99designs/gqlgen v0.17.78 h1:bhIi7ynrc3js2O8wu1sMQj1YHPENDt3jQGyifoBvoVI= +github.com/99designs/gqlgen v0.17.78/go.mod h1:yI/o31IauG2kX0IsskM4R894OCCG1jXJORhtLQqB7Oc= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= -github.com/ClusterCockpit/cc-lib v0.6.0 h1:uK/6DLBkkLznSWD28wmGGumMAa5ovD9rPaGS4Miw9W8= -github.com/ClusterCockpit/cc-lib v0.6.0/go.mod h1:0zLbJprwOWLA+OSNQ+OlUKLscZszwf9J2j8Ly5ztplk= +github.com/ClusterCockpit/cc-lib v0.7.0 h1:THuSYrMcn9pSbrMditSI1LMOluq9TnM0/aVId4uK1Hc= +github.com/ClusterCockpit/cc-lib v0.7.0/go.mod h1:TD1PS8pL2RDvEWaqs8VNejoTSm5OawI9Dcc0CTY/yWQ= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= @@ -83,8 +83,8 @@ github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= github.com/go-sql-driver/mysql v1.9.0 h1:Y0zIbQXhQKmQgTp44Y1dp3wTXcn804QoTptLZT1vtvo= github.com/go-sql-driver/mysql v1.9.0/go.mod h1:pDetrLJeA3oMujJuvXc8RJoasr589B6A9fwzD3QMrqw= -github.com/go-viper/mapstructure/v2 v2.3.0 h1:27XbWsHIqhbdR5TIC911OfYvgSaW93HM+dX7970Q7jk= -github.com/go-viper/mapstructure/v2 v2.3.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= +github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8= @@ -187,8 +187,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/nats-io/nats.go v1.43.0 h1:uRFZ2FEoRvP64+UUhaTokyS18XBCR/xM2vQZKO4i8ug= -github.com/nats-io/nats.go v1.43.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= +github.com/nats-io/nats.go v1.44.0 h1:ECKVrDLdh/kDPV1g0gAQ+2+m2KprqZK5O/eJAyAnH2M= +github.com/nats-io/nats.go v1.44.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0= github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= @@ -204,8 +204,8 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= -github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= +github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= +github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= @@ -241,8 +241,8 @@ github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE= github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg= github.com/swaggo/http-swagger v1.3.4 h1:q7t/XLx0n15H1Q9/tk3Y9L4n210XzJF5WtnDX64a5ww= github.com/swaggo/http-swagger v1.3.4/go.mod h1:9dAh0unqMBAlbp1uE2Uc2mQTxNMU/ha4UbucIg1MFkQ= -github.com/swaggo/swag v1.16.5 h1:nMf2fEV1TetMTJb4XzD0Lz7jFfKJmJKGTygEey8NSxM= -github.com/swaggo/swag v1.16.5/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg= +github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI= +github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg= github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= github.com/vektah/gqlparser/v2 v2.5.30 h1:EqLwGAFLIzt1wpx1IPpY67DwUujF1OfzgEyDsLrN6kE= diff --git a/internal/api/api_test.go b/internal/api/api_test.go index eeb093e..9f47a1f 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -297,7 +297,6 @@ func TestRestApi(t *testing.T) { job.NumNodes != 1 || job.NumHWThreads != 8 || job.NumAcc != 0 || - job.Exclusive != 1 || job.MonitoringStatus != 1 || job.SMT != 1 || !reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) || diff --git a/internal/api/job.go b/internal/api/job.go index 21e42f8..9367bcc 100644 --- a/internal/api/job.go +++ b/internal/api/job.go @@ -647,7 +647,7 @@ func (api *RestApi) removeTags(rw http.ResponseWriter, r *http.Request) { // @router /api/jobs/start_job/ [post] func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) { req := schema.Job{ - Exclusive: 1, + Shared: "none", MonitoringStatus: schema.MonitoringStatusRunningOrArchiving, } if err := decode(r.Body, &req); err != nil { diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index a6899a1..3a85858 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -425,6 +425,8 @@ type ClusterResolver interface { type JobResolver interface { StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error) + Exclusive(ctx context.Context, obj *schema.Job) (int, error) + Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) @@ -2853,1701 +2855,504 @@ var parsedSchema = gqlparser.MustLoadSchema(sources...) func (ec *executionContext) field_Mutation_addTagsToJob_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Mutation_addTagsToJob_argsJob(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "job", ec.unmarshalNID2string) if err != nil { return nil, err } args["job"] = arg0 - arg1, err := ec.field_Mutation_addTagsToJob_argsTagIds(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "tagIds", ec.unmarshalNID2ᚕstringᚄ) if err != nil { return nil, err } args["tagIds"] = arg1 return args, nil } -func (ec *executionContext) field_Mutation_addTagsToJob_argsJob( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["job"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("job")) - if tmp, ok := rawArgs["job"]; ok { - return ec.unmarshalNID2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Mutation_addTagsToJob_argsTagIds( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["tagIds"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("tagIds")) - if tmp, ok := rawArgs["tagIds"]; ok { - return ec.unmarshalNID2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} func (ec *executionContext) field_Mutation_createTag_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Mutation_createTag_argsType(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "type", ec.unmarshalNString2string) if err != nil { return nil, err } args["type"] = arg0 - arg1, err := ec.field_Mutation_createTag_argsName(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "name", ec.unmarshalNString2string) if err != nil { return nil, err } args["name"] = arg1 - arg2, err := ec.field_Mutation_createTag_argsScope(ctx, rawArgs) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "scope", ec.unmarshalNString2string) if err != nil { return nil, err } args["scope"] = arg2 return args, nil } -func (ec *executionContext) field_Mutation_createTag_argsType( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["type"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("type")) - if tmp, ok := rawArgs["type"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Mutation_createTag_argsName( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["name"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("name")) - if tmp, ok := rawArgs["name"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Mutation_createTag_argsScope( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["scope"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("scope")) - if tmp, ok := rawArgs["scope"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} func (ec *executionContext) field_Mutation_deleteTag_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Mutation_deleteTag_argsID(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "id", ec.unmarshalNID2string) if err != nil { return nil, err } args["id"] = arg0 return args, nil } -func (ec *executionContext) field_Mutation_deleteTag_argsID( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["id"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("id")) - if tmp, ok := rawArgs["id"]; ok { - return ec.unmarshalNID2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} func (ec *executionContext) field_Mutation_removeTagFromList_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Mutation_removeTagFromList_argsTagIds(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "tagIds", ec.unmarshalNID2ᚕstringᚄ) if err != nil { return nil, err } args["tagIds"] = arg0 return args, nil } -func (ec *executionContext) field_Mutation_removeTagFromList_argsTagIds( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["tagIds"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("tagIds")) - if tmp, ok := rawArgs["tagIds"]; ok { - return ec.unmarshalNID2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} func (ec *executionContext) field_Mutation_removeTagsFromJob_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Mutation_removeTagsFromJob_argsJob(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "job", ec.unmarshalNID2string) if err != nil { return nil, err } args["job"] = arg0 - arg1, err := ec.field_Mutation_removeTagsFromJob_argsTagIds(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "tagIds", ec.unmarshalNID2ᚕstringᚄ) if err != nil { return nil, err } args["tagIds"] = arg1 return args, nil } -func (ec *executionContext) field_Mutation_removeTagsFromJob_argsJob( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["job"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("job")) - if tmp, ok := rawArgs["job"]; ok { - return ec.unmarshalNID2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Mutation_removeTagsFromJob_argsTagIds( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["tagIds"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("tagIds")) - if tmp, ok := rawArgs["tagIds"]; ok { - return ec.unmarshalNID2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} func (ec *executionContext) field_Mutation_updateConfiguration_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Mutation_updateConfiguration_argsName(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "name", ec.unmarshalNString2string) if err != nil { return nil, err } args["name"] = arg0 - arg1, err := ec.field_Mutation_updateConfiguration_argsValue(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "value", ec.unmarshalNString2string) if err != nil { return nil, err } args["value"] = arg1 return args, nil } -func (ec *executionContext) field_Mutation_updateConfiguration_argsName( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["name"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("name")) - if tmp, ok := rawArgs["name"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Mutation_updateConfiguration_argsValue( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["value"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("value")) - if tmp, ok := rawArgs["value"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} func (ec *executionContext) field_Query___type_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query___type_argsName(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "name", ec.unmarshalNString2string) if err != nil { return nil, err } args["name"] = arg0 return args, nil } -func (ec *executionContext) field_Query___type_argsName( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["name"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("name")) - if tmp, ok := rawArgs["name"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} func (ec *executionContext) field_Query_allocatedNodes_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_allocatedNodes_argsCluster(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "cluster", ec.unmarshalNString2string) if err != nil { return nil, err } args["cluster"] = arg0 return args, nil } -func (ec *executionContext) field_Query_allocatedNodes_argsCluster( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["cluster"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("cluster")) - if tmp, ok := rawArgs["cluster"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} func (ec *executionContext) field_Query_jobMetrics_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_jobMetrics_argsID(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "id", ec.unmarshalNID2string) if err != nil { return nil, err } args["id"] = arg0 - arg1, err := ec.field_Query_jobMetrics_argsMetrics(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ) if err != nil { return nil, err } args["metrics"] = arg1 - arg2, err := ec.field_Query_jobMetrics_argsScopes(ctx, rawArgs) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ) if err != nil { return nil, err } args["scopes"] = arg2 - arg3, err := ec.field_Query_jobMetrics_argsResolution(ctx, rawArgs) + arg3, err := graphql.ProcessArgField(ctx, rawArgs, "resolution", ec.unmarshalOInt2ᚖint) if err != nil { return nil, err } args["resolution"] = arg3 return args, nil } -func (ec *executionContext) field_Query_jobMetrics_argsID( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["id"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("id")) - if tmp, ok := rawArgs["id"]; ok { - return ec.unmarshalNID2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobMetrics_argsMetrics( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["metrics"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("metrics")) - if tmp, ok := rawArgs["metrics"]; ok { - return ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobMetrics_argsScopes( - ctx context.Context, - rawArgs map[string]any, -) ([]schema.MetricScope, error) { - if _, ok := rawArgs["scopes"]; !ok { - var zeroVal []schema.MetricScope - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("scopes")) - if tmp, ok := rawArgs["scopes"]; ok { - return ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ(ctx, tmp) - } - - var zeroVal []schema.MetricScope - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobMetrics_argsResolution( - ctx context.Context, - rawArgs map[string]any, -) (*int, error) { - if _, ok := rawArgs["resolution"]; !ok { - var zeroVal *int - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("resolution")) - if tmp, ok := rawArgs["resolution"]; ok { - return ec.unmarshalOInt2ᚖint(ctx, tmp) - } - - var zeroVal *int - return zeroVal, nil -} func (ec *executionContext) field_Query_jobStats_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_jobStats_argsID(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "id", ec.unmarshalNID2string) if err != nil { return nil, err } args["id"] = arg0 - arg1, err := ec.field_Query_jobStats_argsMetrics(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ) if err != nil { return nil, err } args["metrics"] = arg1 return args, nil } -func (ec *executionContext) field_Query_jobStats_argsID( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["id"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("id")) - if tmp, ok := rawArgs["id"]; ok { - return ec.unmarshalNID2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobStats_argsMetrics( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["metrics"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("metrics")) - if tmp, ok := rawArgs["metrics"]; ok { - return ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} func (ec *executionContext) field_Query_job_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_job_argsID(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "id", ec.unmarshalNID2string) if err != nil { return nil, err } args["id"] = arg0 return args, nil } -func (ec *executionContext) field_Query_job_argsID( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["id"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("id")) - if tmp, ok := rawArgs["id"]; ok { - return ec.unmarshalNID2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} func (ec *executionContext) field_Query_jobsFootprints_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_jobsFootprints_argsFilter(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalOJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ) if err != nil { return nil, err } args["filter"] = arg0 - arg1, err := ec.field_Query_jobsFootprints_argsMetrics(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalNString2ᚕstringᚄ) if err != nil { return nil, err } args["metrics"] = arg1 return args, nil } -func (ec *executionContext) field_Query_jobsFootprints_argsFilter( - ctx context.Context, - rawArgs map[string]any, -) ([]*model.JobFilter, error) { - if _, ok := rawArgs["filter"]; !ok { - var zeroVal []*model.JobFilter - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("filter")) - if tmp, ok := rawArgs["filter"]; ok { - return ec.unmarshalOJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ(ctx, tmp) - } - - var zeroVal []*model.JobFilter - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobsFootprints_argsMetrics( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["metrics"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("metrics")) - if tmp, ok := rawArgs["metrics"]; ok { - return ec.unmarshalNString2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} func (ec *executionContext) field_Query_jobsMetricStats_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_jobsMetricStats_argsFilter(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalOJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ) if err != nil { return nil, err } args["filter"] = arg0 - arg1, err := ec.field_Query_jobsMetricStats_argsMetrics(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ) if err != nil { return nil, err } args["metrics"] = arg1 return args, nil } -func (ec *executionContext) field_Query_jobsMetricStats_argsFilter( - ctx context.Context, - rawArgs map[string]any, -) ([]*model.JobFilter, error) { - if _, ok := rawArgs["filter"]; !ok { - var zeroVal []*model.JobFilter - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("filter")) - if tmp, ok := rawArgs["filter"]; ok { - return ec.unmarshalOJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ(ctx, tmp) - } - - var zeroVal []*model.JobFilter - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobsMetricStats_argsMetrics( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["metrics"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("metrics")) - if tmp, ok := rawArgs["metrics"]; ok { - return ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} func (ec *executionContext) field_Query_jobsStatistics_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_jobsStatistics_argsFilter(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalOJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ) if err != nil { return nil, err } args["filter"] = arg0 - arg1, err := ec.field_Query_jobsStatistics_argsMetrics(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ) if err != nil { return nil, err } args["metrics"] = arg1 - arg2, err := ec.field_Query_jobsStatistics_argsPage(ctx, rawArgs) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "page", ec.unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐPageRequest) if err != nil { return nil, err } args["page"] = arg2 - arg3, err := ec.field_Query_jobsStatistics_argsSortBy(ctx, rawArgs) + arg3, err := graphql.ProcessArgField(ctx, rawArgs, "sortBy", ec.unmarshalOSortByAggregate2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐSortByAggregate) if err != nil { return nil, err } args["sortBy"] = arg3 - arg4, err := ec.field_Query_jobsStatistics_argsGroupBy(ctx, rawArgs) + arg4, err := graphql.ProcessArgField(ctx, rawArgs, "groupBy", ec.unmarshalOAggregate2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐAggregate) if err != nil { return nil, err } args["groupBy"] = arg4 - arg5, err := ec.field_Query_jobsStatistics_argsNumDurationBins(ctx, rawArgs) + arg5, err := graphql.ProcessArgField(ctx, rawArgs, "numDurationBins", ec.unmarshalOString2ᚖstring) if err != nil { return nil, err } args["numDurationBins"] = arg5 - arg6, err := ec.field_Query_jobsStatistics_argsNumMetricBins(ctx, rawArgs) + arg6, err := graphql.ProcessArgField(ctx, rawArgs, "numMetricBins", ec.unmarshalOInt2ᚖint) if err != nil { return nil, err } args["numMetricBins"] = arg6 return args, nil } -func (ec *executionContext) field_Query_jobsStatistics_argsFilter( - ctx context.Context, - rawArgs map[string]any, -) ([]*model.JobFilter, error) { - if _, ok := rawArgs["filter"]; !ok { - var zeroVal []*model.JobFilter - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("filter")) - if tmp, ok := rawArgs["filter"]; ok { - return ec.unmarshalOJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ(ctx, tmp) - } - - var zeroVal []*model.JobFilter - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobsStatistics_argsMetrics( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["metrics"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("metrics")) - if tmp, ok := rawArgs["metrics"]; ok { - return ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobsStatistics_argsPage( - ctx context.Context, - rawArgs map[string]any, -) (*model.PageRequest, error) { - if _, ok := rawArgs["page"]; !ok { - var zeroVal *model.PageRequest - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("page")) - if tmp, ok := rawArgs["page"]; ok { - return ec.unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐPageRequest(ctx, tmp) - } - - var zeroVal *model.PageRequest - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobsStatistics_argsSortBy( - ctx context.Context, - rawArgs map[string]any, -) (*model.SortByAggregate, error) { - if _, ok := rawArgs["sortBy"]; !ok { - var zeroVal *model.SortByAggregate - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("sortBy")) - if tmp, ok := rawArgs["sortBy"]; ok { - return ec.unmarshalOSortByAggregate2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐSortByAggregate(ctx, tmp) - } - - var zeroVal *model.SortByAggregate - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobsStatistics_argsGroupBy( - ctx context.Context, - rawArgs map[string]any, -) (*model.Aggregate, error) { - if _, ok := rawArgs["groupBy"]; !ok { - var zeroVal *model.Aggregate - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("groupBy")) - if tmp, ok := rawArgs["groupBy"]; ok { - return ec.unmarshalOAggregate2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐAggregate(ctx, tmp) - } - - var zeroVal *model.Aggregate - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobsStatistics_argsNumDurationBins( - ctx context.Context, - rawArgs map[string]any, -) (*string, error) { - if _, ok := rawArgs["numDurationBins"]; !ok { - var zeroVal *string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("numDurationBins")) - if tmp, ok := rawArgs["numDurationBins"]; ok { - return ec.unmarshalOString2ᚖstring(ctx, tmp) - } - - var zeroVal *string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobsStatistics_argsNumMetricBins( - ctx context.Context, - rawArgs map[string]any, -) (*int, error) { - if _, ok := rawArgs["numMetricBins"]; !ok { - var zeroVal *int - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("numMetricBins")) - if tmp, ok := rawArgs["numMetricBins"]; ok { - return ec.unmarshalOInt2ᚖint(ctx, tmp) - } - - var zeroVal *int - return zeroVal, nil -} func (ec *executionContext) field_Query_jobs_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_jobs_argsFilter(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalOJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ) if err != nil { return nil, err } args["filter"] = arg0 - arg1, err := ec.field_Query_jobs_argsPage(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "page", ec.unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐPageRequest) if err != nil { return nil, err } args["page"] = arg1 - arg2, err := ec.field_Query_jobs_argsOrder(ctx, rawArgs) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "order", ec.unmarshalOOrderByInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐOrderByInput) if err != nil { return nil, err } args["order"] = arg2 return args, nil } -func (ec *executionContext) field_Query_jobs_argsFilter( - ctx context.Context, - rawArgs map[string]any, -) ([]*model.JobFilter, error) { - if _, ok := rawArgs["filter"]; !ok { - var zeroVal []*model.JobFilter - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("filter")) - if tmp, ok := rawArgs["filter"]; ok { - return ec.unmarshalOJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ(ctx, tmp) - } - - var zeroVal []*model.JobFilter - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobs_argsPage( - ctx context.Context, - rawArgs map[string]any, -) (*model.PageRequest, error) { - if _, ok := rawArgs["page"]; !ok { - var zeroVal *model.PageRequest - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("page")) - if tmp, ok := rawArgs["page"]; ok { - return ec.unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐPageRequest(ctx, tmp) - } - - var zeroVal *model.PageRequest - return zeroVal, nil -} - -func (ec *executionContext) field_Query_jobs_argsOrder( - ctx context.Context, - rawArgs map[string]any, -) (*model.OrderByInput, error) { - if _, ok := rawArgs["order"]; !ok { - var zeroVal *model.OrderByInput - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("order")) - if tmp, ok := rawArgs["order"]; ok { - return ec.unmarshalOOrderByInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐOrderByInput(ctx, tmp) - } - - var zeroVal *model.OrderByInput - return zeroVal, nil -} func (ec *executionContext) field_Query_nodeMetricsList_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_nodeMetricsList_argsCluster(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "cluster", ec.unmarshalNString2string) if err != nil { return nil, err } args["cluster"] = arg0 - arg1, err := ec.field_Query_nodeMetricsList_argsSubCluster(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "subCluster", ec.unmarshalNString2string) if err != nil { return nil, err } args["subCluster"] = arg1 - arg2, err := ec.field_Query_nodeMetricsList_argsNodeFilter(ctx, rawArgs) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "nodeFilter", ec.unmarshalNString2string) if err != nil { return nil, err } args["nodeFilter"] = arg2 - arg3, err := ec.field_Query_nodeMetricsList_argsScopes(ctx, rawArgs) + arg3, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ) if err != nil { return nil, err } args["scopes"] = arg3 - arg4, err := ec.field_Query_nodeMetricsList_argsMetrics(ctx, rawArgs) + arg4, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ) if err != nil { return nil, err } args["metrics"] = arg4 - arg5, err := ec.field_Query_nodeMetricsList_argsFrom(ctx, rawArgs) + arg5, err := graphql.ProcessArgField(ctx, rawArgs, "from", ec.unmarshalNTime2timeᚐTime) if err != nil { return nil, err } args["from"] = arg5 - arg6, err := ec.field_Query_nodeMetricsList_argsTo(ctx, rawArgs) + arg6, err := graphql.ProcessArgField(ctx, rawArgs, "to", ec.unmarshalNTime2timeᚐTime) if err != nil { return nil, err } args["to"] = arg6 - arg7, err := ec.field_Query_nodeMetricsList_argsPage(ctx, rawArgs) + arg7, err := graphql.ProcessArgField(ctx, rawArgs, "page", ec.unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐPageRequest) if err != nil { return nil, err } args["page"] = arg7 - arg8, err := ec.field_Query_nodeMetricsList_argsResolution(ctx, rawArgs) + arg8, err := graphql.ProcessArgField(ctx, rawArgs, "resolution", ec.unmarshalOInt2ᚖint) if err != nil { return nil, err } args["resolution"] = arg8 return args, nil } -func (ec *executionContext) field_Query_nodeMetricsList_argsCluster( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["cluster"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("cluster")) - if tmp, ok := rawArgs["cluster"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetricsList_argsSubCluster( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["subCluster"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("subCluster")) - if tmp, ok := rawArgs["subCluster"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetricsList_argsNodeFilter( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["nodeFilter"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("nodeFilter")) - if tmp, ok := rawArgs["nodeFilter"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetricsList_argsScopes( - ctx context.Context, - rawArgs map[string]any, -) ([]schema.MetricScope, error) { - if _, ok := rawArgs["scopes"]; !ok { - var zeroVal []schema.MetricScope - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("scopes")) - if tmp, ok := rawArgs["scopes"]; ok { - return ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ(ctx, tmp) - } - - var zeroVal []schema.MetricScope - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetricsList_argsMetrics( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["metrics"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("metrics")) - if tmp, ok := rawArgs["metrics"]; ok { - return ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetricsList_argsFrom( - ctx context.Context, - rawArgs map[string]any, -) (time.Time, error) { - if _, ok := rawArgs["from"]; !ok { - var zeroVal time.Time - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("from")) - if tmp, ok := rawArgs["from"]; ok { - return ec.unmarshalNTime2timeᚐTime(ctx, tmp) - } - - var zeroVal time.Time - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetricsList_argsTo( - ctx context.Context, - rawArgs map[string]any, -) (time.Time, error) { - if _, ok := rawArgs["to"]; !ok { - var zeroVal time.Time - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("to")) - if tmp, ok := rawArgs["to"]; ok { - return ec.unmarshalNTime2timeᚐTime(ctx, tmp) - } - - var zeroVal time.Time - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetricsList_argsPage( - ctx context.Context, - rawArgs map[string]any, -) (*model.PageRequest, error) { - if _, ok := rawArgs["page"]; !ok { - var zeroVal *model.PageRequest - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("page")) - if tmp, ok := rawArgs["page"]; ok { - return ec.unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐPageRequest(ctx, tmp) - } - - var zeroVal *model.PageRequest - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetricsList_argsResolution( - ctx context.Context, - rawArgs map[string]any, -) (*int, error) { - if _, ok := rawArgs["resolution"]; !ok { - var zeroVal *int - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("resolution")) - if tmp, ok := rawArgs["resolution"]; ok { - return ec.unmarshalOInt2ᚖint(ctx, tmp) - } - - var zeroVal *int - return zeroVal, nil -} func (ec *executionContext) field_Query_nodeMetrics_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_nodeMetrics_argsCluster(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "cluster", ec.unmarshalNString2string) if err != nil { return nil, err } args["cluster"] = arg0 - arg1, err := ec.field_Query_nodeMetrics_argsNodes(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "nodes", ec.unmarshalOString2ᚕstringᚄ) if err != nil { return nil, err } args["nodes"] = arg1 - arg2, err := ec.field_Query_nodeMetrics_argsScopes(ctx, rawArgs) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ) if err != nil { return nil, err } args["scopes"] = arg2 - arg3, err := ec.field_Query_nodeMetrics_argsMetrics(ctx, rawArgs) + arg3, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ) if err != nil { return nil, err } args["metrics"] = arg3 - arg4, err := ec.field_Query_nodeMetrics_argsFrom(ctx, rawArgs) + arg4, err := graphql.ProcessArgField(ctx, rawArgs, "from", ec.unmarshalNTime2timeᚐTime) if err != nil { return nil, err } args["from"] = arg4 - arg5, err := ec.field_Query_nodeMetrics_argsTo(ctx, rawArgs) + arg5, err := graphql.ProcessArgField(ctx, rawArgs, "to", ec.unmarshalNTime2timeᚐTime) if err != nil { return nil, err } args["to"] = arg5 return args, nil } -func (ec *executionContext) field_Query_nodeMetrics_argsCluster( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["cluster"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("cluster")) - if tmp, ok := rawArgs["cluster"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetrics_argsNodes( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["nodes"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("nodes")) - if tmp, ok := rawArgs["nodes"]; ok { - return ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetrics_argsScopes( - ctx context.Context, - rawArgs map[string]any, -) ([]schema.MetricScope, error) { - if _, ok := rawArgs["scopes"]; !ok { - var zeroVal []schema.MetricScope - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("scopes")) - if tmp, ok := rawArgs["scopes"]; ok { - return ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ(ctx, tmp) - } - - var zeroVal []schema.MetricScope - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetrics_argsMetrics( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["metrics"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("metrics")) - if tmp, ok := rawArgs["metrics"]; ok { - return ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetrics_argsFrom( - ctx context.Context, - rawArgs map[string]any, -) (time.Time, error) { - if _, ok := rawArgs["from"]; !ok { - var zeroVal time.Time - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("from")) - if tmp, ok := rawArgs["from"]; ok { - return ec.unmarshalNTime2timeᚐTime(ctx, tmp) - } - - var zeroVal time.Time - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodeMetrics_argsTo( - ctx context.Context, - rawArgs map[string]any, -) (time.Time, error) { - if _, ok := rawArgs["to"]; !ok { - var zeroVal time.Time - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("to")) - if tmp, ok := rawArgs["to"]; ok { - return ec.unmarshalNTime2timeᚐTime(ctx, tmp) - } - - var zeroVal time.Time - return zeroVal, nil -} func (ec *executionContext) field_Query_nodeStats_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_nodeStats_argsFilter(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalONodeFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeFilterᚄ) if err != nil { return nil, err } args["filter"] = arg0 return args, nil } -func (ec *executionContext) field_Query_nodeStats_argsFilter( - ctx context.Context, - rawArgs map[string]any, -) ([]*model.NodeFilter, error) { - if _, ok := rawArgs["filter"]; !ok { - var zeroVal []*model.NodeFilter - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("filter")) - if tmp, ok := rawArgs["filter"]; ok { - return ec.unmarshalONodeFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeFilterᚄ(ctx, tmp) - } - - var zeroVal []*model.NodeFilter - return zeroVal, nil -} func (ec *executionContext) field_Query_node_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_node_argsID(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "id", ec.unmarshalNID2string) if err != nil { return nil, err } args["id"] = arg0 return args, nil } -func (ec *executionContext) field_Query_node_argsID( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["id"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("id")) - if tmp, ok := rawArgs["id"]; ok { - return ec.unmarshalNID2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} func (ec *executionContext) field_Query_nodes_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_nodes_argsFilter(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalONodeFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeFilterᚄ) if err != nil { return nil, err } args["filter"] = arg0 - arg1, err := ec.field_Query_nodes_argsOrder(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "order", ec.unmarshalOOrderByInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐOrderByInput) if err != nil { return nil, err } args["order"] = arg1 return args, nil } -func (ec *executionContext) field_Query_nodes_argsFilter( - ctx context.Context, - rawArgs map[string]any, -) ([]*model.NodeFilter, error) { - if _, ok := rawArgs["filter"]; !ok { - var zeroVal []*model.NodeFilter - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("filter")) - if tmp, ok := rawArgs["filter"]; ok { - return ec.unmarshalONodeFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeFilterᚄ(ctx, tmp) - } - - var zeroVal []*model.NodeFilter - return zeroVal, nil -} - -func (ec *executionContext) field_Query_nodes_argsOrder( - ctx context.Context, - rawArgs map[string]any, -) (*model.OrderByInput, error) { - if _, ok := rawArgs["order"]; !ok { - var zeroVal *model.OrderByInput - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("order")) - if tmp, ok := rawArgs["order"]; ok { - return ec.unmarshalOOrderByInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐOrderByInput(ctx, tmp) - } - - var zeroVal *model.OrderByInput - return zeroVal, nil -} func (ec *executionContext) field_Query_rooflineHeatmap_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_rooflineHeatmap_argsFilter(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalNJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ) if err != nil { return nil, err } args["filter"] = arg0 - arg1, err := ec.field_Query_rooflineHeatmap_argsRows(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "rows", ec.unmarshalNInt2int) if err != nil { return nil, err } args["rows"] = arg1 - arg2, err := ec.field_Query_rooflineHeatmap_argsCols(ctx, rawArgs) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "cols", ec.unmarshalNInt2int) if err != nil { return nil, err } args["cols"] = arg2 - arg3, err := ec.field_Query_rooflineHeatmap_argsMinX(ctx, rawArgs) + arg3, err := graphql.ProcessArgField(ctx, rawArgs, "minX", ec.unmarshalNFloat2float64) if err != nil { return nil, err } args["minX"] = arg3 - arg4, err := ec.field_Query_rooflineHeatmap_argsMinY(ctx, rawArgs) + arg4, err := graphql.ProcessArgField(ctx, rawArgs, "minY", ec.unmarshalNFloat2float64) if err != nil { return nil, err } args["minY"] = arg4 - arg5, err := ec.field_Query_rooflineHeatmap_argsMaxX(ctx, rawArgs) + arg5, err := graphql.ProcessArgField(ctx, rawArgs, "maxX", ec.unmarshalNFloat2float64) if err != nil { return nil, err } args["maxX"] = arg5 - arg6, err := ec.field_Query_rooflineHeatmap_argsMaxY(ctx, rawArgs) + arg6, err := graphql.ProcessArgField(ctx, rawArgs, "maxY", ec.unmarshalNFloat2float64) if err != nil { return nil, err } args["maxY"] = arg6 return args, nil } -func (ec *executionContext) field_Query_rooflineHeatmap_argsFilter( - ctx context.Context, - rawArgs map[string]any, -) ([]*model.JobFilter, error) { - if _, ok := rawArgs["filter"]; !ok { - var zeroVal []*model.JobFilter - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("filter")) - if tmp, ok := rawArgs["filter"]; ok { - return ec.unmarshalNJobFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐJobFilterᚄ(ctx, tmp) - } - - var zeroVal []*model.JobFilter - return zeroVal, nil -} - -func (ec *executionContext) field_Query_rooflineHeatmap_argsRows( - ctx context.Context, - rawArgs map[string]any, -) (int, error) { - if _, ok := rawArgs["rows"]; !ok { - var zeroVal int - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("rows")) - if tmp, ok := rawArgs["rows"]; ok { - return ec.unmarshalNInt2int(ctx, tmp) - } - - var zeroVal int - return zeroVal, nil -} - -func (ec *executionContext) field_Query_rooflineHeatmap_argsCols( - ctx context.Context, - rawArgs map[string]any, -) (int, error) { - if _, ok := rawArgs["cols"]; !ok { - var zeroVal int - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("cols")) - if tmp, ok := rawArgs["cols"]; ok { - return ec.unmarshalNInt2int(ctx, tmp) - } - - var zeroVal int - return zeroVal, nil -} - -func (ec *executionContext) field_Query_rooflineHeatmap_argsMinX( - ctx context.Context, - rawArgs map[string]any, -) (float64, error) { - if _, ok := rawArgs["minX"]; !ok { - var zeroVal float64 - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("minX")) - if tmp, ok := rawArgs["minX"]; ok { - return ec.unmarshalNFloat2float64(ctx, tmp) - } - - var zeroVal float64 - return zeroVal, nil -} - -func (ec *executionContext) field_Query_rooflineHeatmap_argsMinY( - ctx context.Context, - rawArgs map[string]any, -) (float64, error) { - if _, ok := rawArgs["minY"]; !ok { - var zeroVal float64 - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("minY")) - if tmp, ok := rawArgs["minY"]; ok { - return ec.unmarshalNFloat2float64(ctx, tmp) - } - - var zeroVal float64 - return zeroVal, nil -} - -func (ec *executionContext) field_Query_rooflineHeatmap_argsMaxX( - ctx context.Context, - rawArgs map[string]any, -) (float64, error) { - if _, ok := rawArgs["maxX"]; !ok { - var zeroVal float64 - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("maxX")) - if tmp, ok := rawArgs["maxX"]; ok { - return ec.unmarshalNFloat2float64(ctx, tmp) - } - - var zeroVal float64 - return zeroVal, nil -} - -func (ec *executionContext) field_Query_rooflineHeatmap_argsMaxY( - ctx context.Context, - rawArgs map[string]any, -) (float64, error) { - if _, ok := rawArgs["maxY"]; !ok { - var zeroVal float64 - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("maxY")) - if tmp, ok := rawArgs["maxY"]; ok { - return ec.unmarshalNFloat2float64(ctx, tmp) - } - - var zeroVal float64 - return zeroVal, nil -} func (ec *executionContext) field_Query_scopedJobStats_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_scopedJobStats_argsID(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "id", ec.unmarshalNID2string) if err != nil { return nil, err } args["id"] = arg0 - arg1, err := ec.field_Query_scopedJobStats_argsMetrics(ctx, rawArgs) + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ) if err != nil { return nil, err } args["metrics"] = arg1 - arg2, err := ec.field_Query_scopedJobStats_argsScopes(ctx, rawArgs) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ) if err != nil { return nil, err } args["scopes"] = arg2 return args, nil } -func (ec *executionContext) field_Query_scopedJobStats_argsID( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["id"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("id")) - if tmp, ok := rawArgs["id"]; ok { - return ec.unmarshalNID2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_scopedJobStats_argsMetrics( - ctx context.Context, - rawArgs map[string]any, -) ([]string, error) { - if _, ok := rawArgs["metrics"]; !ok { - var zeroVal []string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("metrics")) - if tmp, ok := rawArgs["metrics"]; ok { - return ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) - } - - var zeroVal []string - return zeroVal, nil -} - -func (ec *executionContext) field_Query_scopedJobStats_argsScopes( - ctx context.Context, - rawArgs map[string]any, -) ([]schema.MetricScope, error) { - if _, ok := rawArgs["scopes"]; !ok { - var zeroVal []schema.MetricScope - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("scopes")) - if tmp, ok := rawArgs["scopes"]; ok { - return ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ(ctx, tmp) - } - - var zeroVal []schema.MetricScope - return zeroVal, nil -} func (ec *executionContext) field_Query_user_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field_Query_user_argsUsername(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "username", ec.unmarshalNString2string) if err != nil { return nil, err } args["username"] = arg0 return args, nil } -func (ec *executionContext) field_Query_user_argsUsername( - ctx context.Context, - rawArgs map[string]any, -) (string, error) { - if _, ok := rawArgs["username"]; !ok { - var zeroVal string - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("username")) - if tmp, ok := rawArgs["username"]; ok { - return ec.unmarshalNString2string(ctx, tmp) - } - - var zeroVal string - return zeroVal, nil -} func (ec *executionContext) field___Directive_args_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field___Directive_args_argsIncludeDeprecated(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "includeDeprecated", ec.unmarshalOBoolean2ᚖbool) if err != nil { return nil, err } args["includeDeprecated"] = arg0 return args, nil } -func (ec *executionContext) field___Directive_args_argsIncludeDeprecated( - ctx context.Context, - rawArgs map[string]any, -) (*bool, error) { - if _, ok := rawArgs["includeDeprecated"]; !ok { - var zeroVal *bool - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("includeDeprecated")) - if tmp, ok := rawArgs["includeDeprecated"]; ok { - return ec.unmarshalOBoolean2ᚖbool(ctx, tmp) - } - - var zeroVal *bool - return zeroVal, nil -} func (ec *executionContext) field___Field_args_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field___Field_args_argsIncludeDeprecated(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "includeDeprecated", ec.unmarshalOBoolean2ᚖbool) if err != nil { return nil, err } args["includeDeprecated"] = arg0 return args, nil } -func (ec *executionContext) field___Field_args_argsIncludeDeprecated( - ctx context.Context, - rawArgs map[string]any, -) (*bool, error) { - if _, ok := rawArgs["includeDeprecated"]; !ok { - var zeroVal *bool - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("includeDeprecated")) - if tmp, ok := rawArgs["includeDeprecated"]; ok { - return ec.unmarshalOBoolean2ᚖbool(ctx, tmp) - } - - var zeroVal *bool - return zeroVal, nil -} func (ec *executionContext) field___Type_enumValues_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field___Type_enumValues_argsIncludeDeprecated(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "includeDeprecated", ec.unmarshalOBoolean2bool) if err != nil { return nil, err } args["includeDeprecated"] = arg0 return args, nil } -func (ec *executionContext) field___Type_enumValues_argsIncludeDeprecated( - ctx context.Context, - rawArgs map[string]any, -) (bool, error) { - if _, ok := rawArgs["includeDeprecated"]; !ok { - var zeroVal bool - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("includeDeprecated")) - if tmp, ok := rawArgs["includeDeprecated"]; ok { - return ec.unmarshalOBoolean2bool(ctx, tmp) - } - - var zeroVal bool - return zeroVal, nil -} func (ec *executionContext) field___Type_fields_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} - arg0, err := ec.field___Type_fields_argsIncludeDeprecated(ctx, rawArgs) + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "includeDeprecated", ec.unmarshalOBoolean2bool) if err != nil { return nil, err } args["includeDeprecated"] = arg0 return args, nil } -func (ec *executionContext) field___Type_fields_argsIncludeDeprecated( - ctx context.Context, - rawArgs map[string]any, -) (bool, error) { - if _, ok := rawArgs["includeDeprecated"]; !ok { - var zeroVal bool - return zeroVal, nil - } - - ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("includeDeprecated")) - if tmp, ok := rawArgs["includeDeprecated"]; ok { - return ec.unmarshalOBoolean2bool(ctx, tmp) - } - - var zeroVal bool - return zeroVal, nil -} // endregion ***************************** args.gotpl ***************************** @@ -6426,7 +5231,7 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { ctx = rctx // use context from middleware stack in children - return obj.Exclusive, nil + return ec.resolvers.Job().Exclusive(rctx, obj) }) if err != nil { ec.Error(ctx, err) @@ -6438,17 +5243,17 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co } return graphql.Null } - res := resTmp.(int32) + res := resTmp.(int) fc.Result = res - return ec.marshalNInt2int32(ctx, field.Selections, res) + return ec.marshalNInt2int(ctx, field.Selections, res) } func (ec *executionContext) fieldContext_Job_exclusive(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "Job", Field: field, - IsMethod: false, - IsResolver: false, + IsMethod: true, + IsResolver: true, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { return nil, errors.New("field of type Int does not have child fields") }, @@ -18593,10 +17398,41 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj atomic.AddUint32(&out.Invalids, 1) } case "exclusive": - out.Values[i] = ec._Job_exclusive(ctx, field, obj) - if out.Values[i] == graphql.Null { - atomic.AddUint32(&out.Invalids, 1) + field := field + + innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + } + }() + res = ec._Job_exclusive(ctx, field, obj) + if res == graphql.Null { + atomic.AddUint32(&fs.Invalids, 1) + } + return res } + + if field.Deferrable != nil { + dfs, ok := deferred[field.Deferrable.Label] + di := 0 + if ok { + dfs.AddField(field) + di = len(dfs.Values) - 1 + } else { + dfs = graphql.NewFieldSet([]graphql.CollectedField{field}) + deferred[field.Deferrable.Label] = dfs + } + dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler { + return innerFunc(ctx, dfs) + }) + + // don't run the out.Concurrently() call below + out.Values[i] = graphql.Null + continue + } + + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) case "partition": out.Values[i] = ec._Job_partition(ctx, field, obj) if out.Values[i] == graphql.Null { diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index e0e74b5..8868497 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -2,7 +2,7 @@ package graph // This file will be automatically regenerated based on the schema, any resolver implementations // will be copied through when generating and any unknown code will be moved to the end. -// Code generated by github.com/99designs/gqlgen version v0.17.76 +// Code generated by github.com/99designs/gqlgen version v0.17.78 import ( "context" @@ -35,6 +35,11 @@ func (r *jobResolver) StartTime(ctx context.Context, obj *schema.Job) (*time.Tim return ×tamp, nil } +// Exclusive is the resolver for the exclusive field. +func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) { + panic(fmt.Errorf("not implemented: Exclusive - exclusive")) +} + // Tags is the resolver for the tags field. func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) { return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID) @@ -43,7 +48,7 @@ func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, // ConcurrentJobs is the resolver for the concurrentJobs field. func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) { // FIXME: Make the hardcoded duration configurable - if obj.Exclusive != 1 && obj.Duration > 600 { + if obj.Shared != "none" && obj.Duration > 600 { return r.Repo.FindConcurrentJobs(ctx, obj) } diff --git a/internal/importer/handleImport.go b/internal/importer/handleImport.go index 71c4d24..f527781 100644 --- a/internal/importer/handleImport.go +++ b/internal/importer/handleImport.go @@ -43,7 +43,7 @@ func HandleImportFlag(flag string) error { dec := json.NewDecoder(bytes.NewReader(raw)) dec.DisallowUnknownFields() job := schema.Job{ - Exclusive: 1, + Shared: "none", MonitoringStatus: schema.MonitoringStatusRunningOrArchiving, } if err = dec.Decode(&job); err != nil { diff --git a/internal/repository/job.go b/internal/repository/job.go index b6aa323..dd40ebc 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -74,7 +74,7 @@ func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) { if err := row.Scan( &job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTime, &job.Partition, &job.ArrayJobId, &job.NumNodes, &job.NumHWThreads, - &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State, + &job.NumAcc, &job.Shared, &job.MonitoringStatus, &job.SMT, &job.State, &job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil { cclog.Warnf("Error while scanning rows (Job): %v", err) return nil, err diff --git a/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql b/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql index 7840369..003eab0 100644 --- a/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql +++ b/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql @@ -1,9 +1,10 @@ CREATE TABLE "job_cache" ( id INTEGER PRIMARY KEY, job_id BIGINT NOT NULL, - cluster VARCHAR(255) NOT NULL, + hpc_cluster VARCHAR(255) NOT NULL, subcluster VARCHAR(255) NOT NULL, - start_time BIGINT NOT NULL, -- Unix timestamp + submit_time BIGINT NOT NULL, -- Unix timestamp + start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp hpc_user VARCHAR(255) NOT NULL, project VARCHAR(255) NOT NULL, cluster_partition VARCHAR(255), @@ -12,8 +13,9 @@ CREATE TABLE "job_cache" ( walltime INT NOT NULL, job_state VARCHAR(255) NOT NULL CHECK (job_state IN ( - 'running', 'completed', 'failed', 'cancelled', - 'stopped', 'timeout', 'preempted', 'out_of_memory' + 'boot_fail', 'cancelled', 'completed', 'deadline', + 'failed', 'node_fail', 'out-of-memory', 'pending', + 'preempted', 'running', 'suspended', 'timeout' )), meta_data TEXT, -- JSON resources TEXT NOT NULL, -- JSON @@ -21,7 +23,8 @@ CREATE TABLE "job_cache" ( num_hwthreads INT, num_acc INT, smt TINYINT NOT NULL DEFAULT 1 CHECK (smt IN (0, 1)), - exclusive TINYINT NOT NULL DEFAULT 1 CHECK (exclusive IN (0, 1, 2)), + shared TEXT NOT NULL + CHECK (shared IN ("none", "single_user", "multi_user")), monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK (monitoring_status IN (0, 1, 2, 3)), energy REAL NOT NULL DEFAULT 0.0, @@ -29,3 +32,43 @@ CREATE TABLE "job_cache" ( footprint TEXT DEFAULT NULL, UNIQUE (job_id, cluster, start_time) ); + +CREATE TABLE "job_new" ( + id INTEGER PRIMARY KEY, + job_id BIGINT NOT NULL, + hpc_cluster TEXT NOT NULL, + subcluster TEXT NOT NULL, + submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp + start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp + hpc_user TEXT NOT NULL, + project TEXT NOT NULL, + cluster_partition TEXT, + array_job_id BIGINT, + duration INT NOT NULL, + walltime INT NOT NULL, + job_state TEXT NOT NULL + CHECK (job_state IN ( + 'boot_fail', 'cancelled', 'completed', 'deadline', + 'failed', 'node_fail', 'out-of-memory', 'pending', + 'preempted', 'running', 'suspended', 'timeout' + )), + meta_data TEXT, -- JSON + resources TEXT NOT NULL, -- JSON + num_nodes INT NOT NULL, + num_hwthreads INT, + num_acc INT, + smt INT NOT NULL DEFAULT 1, + shared TEXT NOT NULL + CHECK (shared IN ("none", "single_user", "multi_user")), + monitoring_status TINYINT NOT NULL DEFAULT 1 + CHECK (monitoring_status IN (0, 1, 2, 3)), + energy REAL NOT NULL DEFAULT 0.0, + energy_footprint TEXT DEFAULT NULL, + footprint TEXT DEFAULT NULL, + UNIQUE (job_id, cluster, start_time) +); + +ALTER TABLE job RENAME COLUMN cluster TO hpc_cluster; +INSERT INTO job_new SELECT * FROM job; +DROP TABLE job; +ALTER TABLE job_new RENAME TO job; diff --git a/internal/tagger/classifyJob.go b/internal/tagger/classifyJob.go index 32063cd..0317f81 100644 --- a/internal/tagger/classifyJob.go +++ b/internal/tagger/classifyJob.go @@ -240,13 +240,13 @@ func (t *JobClassTagger) Match(job *schema.Job) { // Initialize environment env["job"] = map[string]any{ - "exclusive": job.Exclusive, - "duration": job.Duration, - "numCores": job.NumHWThreads, - "numNodes": job.NumNodes, - "jobState": job.State, - "numAcc": job.NumAcc, - "smt": job.SMT, + "shared": job.Shared, + "duration": job.Duration, + "numCores": job.NumHWThreads, + "numNodes": job.NumNodes, + "jobState": job.State, + "numAcc": job.NumAcc, + "smt": job.SMT, } // add metrics to env diff --git a/pkg/archive/fsBackend_test.go b/pkg/archive/fsBackend_test.go index cdc892f..c872d0a 100644 --- a/pkg/archive/fsBackend_test.go +++ b/pkg/archive/fsBackend_test.go @@ -86,7 +86,7 @@ func TestLoadJobMeta(t *testing.T) { } jobIn := schema.Job{ - Exclusive: 1, + Shared: "none", MonitoringStatus: schema.MonitoringStatusRunningOrArchiving, } jobIn.StartTime = 1608923076 @@ -117,7 +117,7 @@ func TestLoadJobData(t *testing.T) { } jobIn := schema.Job{ - Exclusive: 1, + Shared: "none", MonitoringStatus: schema.MonitoringStatusRunningOrArchiving, } jobIn.StartTime = 1608923076 @@ -148,7 +148,7 @@ func BenchmarkLoadJobData(b *testing.B) { fsa.Init(json.RawMessage(archiveCfg)) jobIn := schema.Job{ - Exclusive: 1, + Shared: "none", MonitoringStatus: schema.MonitoringStatusRunningOrArchiving, } jobIn.StartTime = 1608923076 @@ -174,7 +174,7 @@ func BenchmarkLoadJobDataCompressed(b *testing.B) { fsa.Init(json.RawMessage(archiveCfg)) jobIn := schema.Job{ - Exclusive: 1, + Shared: "none", MonitoringStatus: schema.MonitoringStatusRunningOrArchiving, } jobIn.StartTime = 1608923076 From 71cfb4db7789a10ce87b5641aa232e1db23593af Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 5 Aug 2025 14:19:03 +0200 Subject: [PATCH 15/40] fix: fix metric availability subcluster list overflow --- web/frontend/src/generic/select/MetricSelection.svelte | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/frontend/src/generic/select/MetricSelection.svelte b/web/frontend/src/generic/select/MetricSelection.svelte index 469cc32..d6da4df 100644 --- a/web/frontend/src/generic/select/MetricSelection.svelte +++ b/web/frontend/src/generic/select/MetricSelection.svelte @@ -96,9 +96,9 @@ function printAvailability(metric, cluster) { const avail = globalMetrics.find((gm) => gm.name === metric)?.availability if (!cluster) { - return avail.map((av) => av.cluster).join(',') + return avail.map((av) => av.cluster).join(', ') } else { - return avail.find((av) => av.cluster === cluster).subClusters.join(',') + return avail.find((av) => av.cluster === cluster).subClusters.join(', ') } } @@ -208,7 +208,7 @@ /> {/if} {metric} - + {printAvailability(metric, cluster)} From bef832e45b0bf713d0ae759e21000eab2651d42b Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 7 Aug 2025 16:10:11 +0200 Subject: [PATCH 16/40] Build new statusDash, refine newRoofline data render --- internal/api/job.go | 3 +- internal/auth/auth.go | 3 +- .../generic/plots/NewBubbleRoofline.svelte | 208 ++++++++------- web/frontend/src/status/DevelDash.svelte | 5 +- web/frontend/src/status/StatusDash.svelte | 247 ++++++++++++++++-- 5 files changed, 345 insertions(+), 121 deletions(-) diff --git a/internal/api/job.go b/internal/api/job.go index 4c8ca76..7c27a86 100644 --- a/internal/api/job.go +++ b/internal/api/job.go @@ -112,6 +112,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) { for key, vals := range r.URL.Query() { switch key { + // TODO: add project filter case "state": for _, s := range vals { state := schema.JobState(s) @@ -124,7 +125,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) { } case "cluster": filter.Cluster = &model.StringInput{Eq: &vals[0]} - case "start-time": + case "start-time": // ?startTime=1753707480-1754053139 st := strings.Split(vals[0], "-") if len(st) != 2 { handleError(fmt.Errorf("invalid query parameter value: startTime"), diff --git a/internal/auth/auth.go b/internal/auth/auth.go index ad78397..333efc0 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -381,7 +381,7 @@ func (auth *Authentication) AuthUserApi( return } case len(user.Roles) >= 2: - if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) { + if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleSupport, schema.RoleAdmin}) { ctx := context.WithValue(r.Context(), repository.ContextUserKey, user) onsuccess.ServeHTTP(rw, r.WithContext(ctx)) return @@ -473,6 +473,7 @@ func securedCheck(user *schema.User, r *http.Request) error { IPAddress = r.RemoteAddr } + // FIXME: IPV6 not handled if strings.Contains(IPAddress, ":") { IPAddress = strings.Split(IPAddress, ":")[0] } diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte index bf25347..3a0e332 100644 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte @@ -35,6 +35,7 @@ cluster = null, subCluster = null, allowSizeChange = false, + useColors = true, width = 600, height = 380, } = $props(); @@ -243,7 +244,7 @@ // Dot Renderer const makeDrawPoints = (opts) => { let {/*size, disp,*/ transparentFill, each = () => {}} = opts; - const sizeBase = 5 * pxRatio; + const sizeBase = 6 * pxRatio; return (u, seriesIdx, idx0, idx1) => { uPlot.orient(u, seriesIdx, (series, dataX, dataY, scaleX, scaleY, valToPosX, valToPosY, xOff, yOff, xDim, yDim, moveTo, lineTo, rect, arc) => { @@ -266,26 +267,33 @@ let filtTop = u.posToVal(-maxSize / 2, scaleY.key); for (let i = 0; i < d[0].length; i++) { - // Jobs: Color based on Duration - if (jobsData) { - u.ctx.strokeStyle = getRGB(u.data[2][i]); - u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill); - // Nodes: Color based on Idle vs. Allocated - } else if (nodesData) { - // console.log('In Plot Handler NodesData', nodesData) - if (nodesData[i]?.nodeState == "idle") { - u.ctx.strokeStyle = "rgb(0, 0, 255)"; - u.ctx.fillStyle = "rgba(0, 0, 255, 0.5)"; - } else if (nodesData[i]?.nodeState == "allocated") { - u.ctx.strokeStyle = "rgb(0, 255, 0)"; - u.ctx.fillStyle = "rgba(0, 255, 0, 0.5)"; - } else if (nodesData[i]?.nodeState == "notindb") { - u.ctx.strokeStyle = "rgb(0, 0, 0)"; - u.ctx.fillStyle = "rgba(0, 0, 0, 0.5)"; - } else { // Fallback: All other DEFINED states - u.ctx.strokeStyle = "rgb(255, 0, 0)"; - u.ctx.fillStyle = "rgba(255, 0, 0, 0.5)"; + if (useColors) { + u.ctx.strokeStyle = "rgb(0, 0, 0)"; + // Jobs: Color based on Duration + if (jobsData) { + //u.ctx.strokeStyle = getRGB(u.data[2][i]); + u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill); + // Nodes: Color based on Idle vs. Allocated + } else if (nodesData) { + // console.log('In Plot Handler NodesData', nodesData) + if (nodesData[i]?.nodeState == "idle") { + //u.ctx.strokeStyle = "rgb(0, 0, 255)"; + u.ctx.fillStyle = "rgba(0, 0, 255, 0.5)"; + } else if (nodesData[i]?.nodeState == "allocated") { + //u.ctx.strokeStyle = "rgb(0, 255, 0)"; + u.ctx.fillStyle = "rgba(0, 255, 0, 0.5)"; + } else if (nodesData[i]?.nodeState == "notindb") { + //u.ctx.strokeStyle = "rgb(0, 0, 0)"; + u.ctx.fillStyle = "rgba(0, 0, 0, 0.5)"; + } else { // Fallback: All other DEFINED states + //u.ctx.strokeStyle = "rgb(255, 0, 0)"; + u.ctx.fillStyle = "rgba(255, 0, 0, 0.5)"; + } } + } else { + // No Colors: Use Black + u.ctx.strokeStyle = "rgb(0, 0, 0)"; + u.ctx.fillStyle = "rgba(0, 0, 0, 0.5)"; } // Get Values @@ -297,10 +305,15 @@ // Jobs: Size based on Resourcecount if (jobsData) { - size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes) + const scaling = jobsData[i].numNodes > 12 + ? 24 // Capped Dot Size + : jobsData[i].numNodes > 1 + ? jobsData[i].numNodes * 2 // MultiNode Scaling + : jobsData[i]?.numAcc ? jobsData[i].numAcc : jobsData[i].numNodes * 2 // Single Node or Scale by Accs + size = sizeBase + scaling // Nodes: Size based on Jobcount } else if (nodesData) { - size = sizeBase + nodesData[i]?.numJobs + size = sizeBase + (nodesData[i]?.numJobs * 1.5) // Max Jobs Scale: 8 * 1.5 = 12 }; if (xVal >= filtLft && xVal <= filtRgt && yVal >= filtBtm && yVal <= filtTop) { @@ -377,7 +390,7 @@ tooltip.style.fontSize = "10pt"; tooltip.style.position = "absolute"; tooltip.style.background = "#fcfcfc"; - tooltip.style.display = "nonde"; + tooltip.style.display = "none"; tooltip.style.border = "2px solid black"; tooltip.style.padding = "4px"; tooltip.style.pointerEvents = "none"; @@ -417,33 +430,42 @@ tooltip.style.top = (tooltipTopOffset + top + shiftX) + "px"; tooltip.style.left = (tooltipLeftOffset + lft + shiftY) + "px"; - - // Jobs: Color based on Duration - if (jobsData) { - tooltip.style.borderColor = getRGB(u.data[2][i]); - // Nodes: Color based on Idle vs. Allocated - } else if (nodesData) { - if (nodesData[i]?.nodeState == "idle") { - tooltip.style.borderColor = "rgb(0, 0, 255)"; - } else if (nodesData[i]?.nodeState == "allocated") { - tooltip.style.borderColor = "rgb(0, 255, 0)"; - } else if (nodesData[i]?.nodeState == "notindb") { // Missing from DB table - tooltip.style.borderColor = "rgb(0, 0, 0)"; - } else { // Fallback: All other DEFINED states - tooltip.style.borderColor = "rgb(255, 0, 0)"; + if (useColors) { + // Jobs: Color based on Duration + if (jobsData) { + tooltip.style.borderColor = getRGB(u.data[2][i]); + // Nodes: Color based on Idle vs. Allocated + } else if (nodesData) { + if (nodesData[i]?.nodeState == "idle") { + tooltip.style.borderColor = "rgb(0, 0, 255)"; + } else if (nodesData[i]?.nodeState == "allocated") { + tooltip.style.borderColor = "rgb(0, 255, 0)"; + } else if (nodesData[i]?.nodeState == "notindb") { // Missing from DB table + tooltip.style.borderColor = "rgb(0, 0, 0)"; + } else { // Fallback: All other DEFINED states + tooltip.style.borderColor = "rgb(255, 0, 0)"; + } } + } else { + // No Colors: Use Black + tooltip.style.borderColor = "rgb(0, 0, 0)"; } if (jobsData) { tooltip.textContent = ( // Tooltip Content as String for Job - `Job ID: ${getLegendData(u, i).jobId}\nNodes: ${getLegendData(u, i).numNodes}${getLegendData(u, i)?.numAcc?`\nAccelerators: ${getLegendData(u, i).numAcc}`:''}` + `Job ID: ${getLegendData(u, i).jobId}\nRuntime: ${getLegendData(u, i).duration}\nNodes: ${getLegendData(u, i).numNodes}${getLegendData(u, i)?.numAcc?`\nAccelerators: ${getLegendData(u, i).numAcc}`:''}` ); - } else if (nodesData) { + } else if (nodesData && useColors) { tooltip.textContent = ( // Tooltip Content as String for Node `Host: ${getLegendData(u, i).nodeName}\nState: ${getLegendData(u, i).nodeState}\nJobs: ${getLegendData(u, i).numJobs}` ); + } else if (nodesData && !useColors) { + tooltip.textContent = ( + // Tooltip Content as String for Node + `Host: ${getLegendData(u, i).nodeName}\nJobs: ${getLegendData(u, i).numJobs}` + ); } } @@ -570,7 +592,7 @@ // return prox; // }, // }, - drag: { // Activates Zoom + drag: { // Activates Zoom: Only one Dimension; YX Breaks Zoom Reset (Reason TBD) x: true, y: false }, @@ -725,63 +747,67 @@ u.ctx.lineWidth = 0.15; } - // Jobs: The Color Scale For Time Information - if (jobsData) { - const posX = u.valToPos(0.1, "x", true) - const posXLimit = u.valToPos(100, "x", true) - const posY = u.valToPos(14000.0, "y", true) - u.ctx.fillStyle = 'black' - u.ctx.fillText('Short', posX, posY) - const start = posX + 10 - for (let x = start; x < posXLimit; x += 10) { - let c = (x - start) / (posXLimit - start) - u.ctx.fillStyle = getRGB(c) - u.ctx.beginPath() - u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() + /* Render Scales */ + if (useColors) { + // Jobs: The Color Scale For Time Information + if (jobsData) { + const posX = u.valToPos(0.1, "x", true) + const posXLimit = u.valToPos(100, "x", true) + const posY = u.valToPos(17500.0, "y", true) + u.ctx.fillStyle = 'black' + u.ctx.fillText('0 Hours', posX, posY) + const start = posX + 10 + for (let x = start; x < posXLimit; x += 10) { + let c = (x - start) / (posXLimit - start) + u.ctx.fillStyle = getRGB(c) + u.ctx.beginPath() + u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + } + u.ctx.fillStyle = 'black' + u.ctx.fillText('24 Hours', posXLimit + 55, posY) } - u.ctx.fillStyle = 'black' - u.ctx.fillText('Long', posXLimit + 23, posY) - } - // Nodes: The Colors Of NodeStates (Just 3) - if (nodesData) { - const posY = u.valToPos(14000.0, "y", true) + // Nodes: The Colors Of NodeStates + if (nodesData) { + const posY = u.valToPos(17500.0, "y", true) - const posAllocDot = u.valToPos(0.1, "x", true) - const posAllocText = posAllocDot + 60 - u.ctx.fillStyle = "rgb(0, 255, 0)" - u.ctx.beginPath() - u.ctx.arc(posAllocDot, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() - u.ctx.fillStyle = 'black' - u.ctx.fillText('Allocated', posAllocText, posY) + const posAllocDot = u.valToPos(0.03, "x", true) + const posAllocText = posAllocDot + 60 + const posIdleDot = u.valToPos(0.3, "x", true) + const posIdleText = posIdleDot + 30 + const posOtherDot = u.valToPos(3, "x", true) + const posOtherText = posOtherDot + 40 + const posMissingDot = u.valToPos(30, "x", true) + const posMissingText = posMissingDot + 80 - const posIdleDot = posAllocDot + 150 - const posIdleText = posAllocText + 120 - u.ctx.fillStyle = "rgb(0, 0, 255)" - u.ctx.beginPath() - u.ctx.arc(posIdleDot, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() - u.ctx.fillStyle = 'black' - u.ctx.fillText('Idle', posIdleText, posY) + u.ctx.fillStyle = "rgb(0, 255, 0)" + u.ctx.beginPath() + u.ctx.arc(posAllocDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Allocated', posAllocText, posY) - const posOtherDot = posIdleDot + 150 - const posOtherText = posIdleText + 160 - u.ctx.fillStyle = "rgb(255, 0, 0)" - u.ctx.beginPath() - u.ctx.arc(posOtherDot, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() - u.ctx.fillStyle = 'black' - u.ctx.fillText('Other', posOtherText, posY) + u.ctx.fillStyle = "rgb(0, 0, 255)" + u.ctx.beginPath() + u.ctx.arc(posIdleDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Idle', posIdleText, posY) - const posMissingDot = posOtherDot + 150 - const posMissingText = posOtherText + 190 - u.ctx.fillStyle = 'black' - u.ctx.beginPath() - u.ctx.arc(posMissingDot, posY, 3, 0, Math.PI * 2, false) - u.ctx.fill() - u.ctx.fillText('Missing in DB', posMissingText, posY) + u.ctx.fillStyle = "rgb(255, 0, 0)" + u.ctx.beginPath() + u.ctx.arc(posOtherDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillStyle = 'black' + u.ctx.fillText('Other', posOtherText, posY) + + u.ctx.fillStyle = 'black' + u.ctx.beginPath() + u.ctx.arc(posMissingDot, posY, 3, 0, Math.PI * 2, false) + u.ctx.fill() + u.ctx.fillText('Missing in DB', posMissingText, posY) + } } }, ], diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index 8cd4627..a4ee42c 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -23,6 +23,7 @@ //import Roofline from "../generic/plots/Roofline.svelte"; import NewBubbleRoofline from "../generic/plots/NewBubbleRoofline.svelte"; import Pie, { colors } from "../generic/plots/Pie.svelte"; + import { formatTime } from "../generic/units.js"; /* Svelte 5 Props */ let { @@ -131,7 +132,7 @@ // Load for jobcount per node only -- might me required for total running jobs anyways in parent component! // Also, think about extra query with only TotalJobCount and Items [Resources, ...some meta infos], not including metric data - const paging = { itemsPerPage: 1500, page: 1 }; + const paging = { itemsPerPage: -1, page: 1 }; const sorting = { field: "startTime", type: "col", order: "DESC" }; const filter = [ { cluster: { eq: cluster } }, @@ -281,7 +282,7 @@ function transformJobsStatsToInfo(subclusterData) { if (subclusterData) { - return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0} }) + return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0, duration: formatTime(sc.duration)} }) } else { console.warn("transformJobsStatsToInfo: jobInfo missing!") return [] diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index a1196e5..102026c 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -15,7 +15,7 @@ CardBody, Table, Progress, - Icon, + // Icon, } from "@sveltestrap/sveltestrap"; import { queryStore, @@ -24,11 +24,11 @@ } from "@urql/svelte"; import { init, - transformPerNodeDataForRoofline, + // transformPerNodeDataForRoofline, } from "../generic/utils.js"; - import { scaleNumbers } from "../generic/units.js"; - import Roofline from "../generic/plots/Roofline.svelte"; + import { scaleNumbers, formatTime } from "../generic/units.js"; + import NewBubbleRoofline from "../generic/plots/NewBubbleRoofline.svelte"; /* Svelte 5 Props */ let { @@ -68,9 +68,12 @@ $metrics: [String!] $from: Time! $to: Time! - $filter: [JobFilter!]! + $jobFilter: [JobFilter!]! + $nodeFilter: [NodeFilter!]! $paging: PageRequest! + $sorting: OrderByInput! ) { + # Node 5 Minute Averages for Roofline nodeMetrics( cluster: $cluster metrics: $metrics @@ -81,27 +84,58 @@ subCluster metrics { name - scope metric { - timestep - unit { - base - prefix - } series { - data + statistics { + avg + } } } } } + # Running Job Metric Average for Rooflines + jobsMetricStats(filter: $jobFilter, metrics: $metrics) { + id + jobId + duration + numNodes + numAccelerators + subCluster + stats { + name + data { + avg + } + } + } + # Get Jobs for Per-Node Counts + jobs(filter: $jobFilter, order: $sorting, page: $paging) { + items { + jobId + resources { + hostname + } + } + count + } # Only counts shared nodes once allocatedNodes(cluster: $cluster) { name count } + # Get States for Node Roofline; $sorting unused in backend: Use placeholder + nodes(filter: $nodeFilter, order: $sorting) { + count + items { + hostname + cluster + subCluster + nodeState + } + } # totalNodes includes multiples if shared jobs jobsStatistics( - filter: $filter + filter: $jobFilter page: $paging sortBy: TOTALJOBS groupBy: SUBCLUSTER @@ -118,8 +152,10 @@ metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars from: from.toISOString(), to: to.toISOString(), - filter: [{ state: ["running"] }, { cluster: { eq: cluster } }], + jobFilter: [{ state: ["running"] }, { cluster: { eq: cluster } }], + nodeFilter: { cluster: { eq: cluster }}, paging: { itemsPerPage: -1, page: 1 }, // Get all: -1 + sorting: { field: "startTime", type: "col", order: "DESC" } }, })); @@ -170,6 +206,7 @@ }); /* Const Functions */ + // New: Sum Up Node Averages const sumUp = (data, subcluster, metric) => data.reduce( (sum, node) => @@ -177,20 +214,132 @@ ? sum + (node.metrics .find((m) => m.name == metric) - ?.metric.series.reduce( - (sum, series) => sum + series.data[series.data.length - 1], - 0, - ) || 0) + ?.metric?.series[0]?.statistics?.avg || 0 + ) : sum, 0, ); + // Old: SumUp Metric Time Data + // const sumUp = (data, subcluster, metric) => + // data.reduce( + // (sum, node) => + // node.subCluster == subcluster + // ? sum + + // (node.metrics + // .find((m) => m.name == metric) + // ?.metric.series.reduce( + // (sum, series) => sum + series.data[series.data.length - 1], + // 0, + // ) || 0) + // : sum, + // 0, + // ); + + /* Functions */ + function transformJobsStatsToData(subclusterData) { + /* c will contain values from 0 to 1 representing the duration */ + let data = null + const x = [], y = [], c = [], day = 86400.0 + + if (subclusterData) { + for (let i = 0; i < subclusterData.length; i++) { + const flopsData = subclusterData[i].stats.find((s) => s.name == "flops_any") + const memBwData = subclusterData[i].stats.find((s) => s.name == "mem_bw") + + const f = flopsData.data.avg + const m = memBwData.data.avg + const d = subclusterData[i].duration / day + + const intensity = f / m + if (Number.isNaN(intensity) || !Number.isFinite(intensity)) + continue + + x.push(intensity) + y.push(f) + // Long Jobs > 1 Day: Use max Color + if (d > 1.0) c.push(1.0) + else c.push(d) + } + } else { + console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!") + } + + if (x.length > 0 && y.length > 0 && c.length > 0) { + data = [null, [x, y], c] // for dataformat see roofline.svelte + } + return data + } + + function transformNodesStatsToData(subclusterData) { + let data = null + const x = [], y = [] + + if (subclusterData) { + for (let i = 0; i < subclusterData.length; i++) { + const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any") + const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw") + + const f = flopsData.metric.series[0].statistics.avg + const m = memBwData.metric.series[0].statistics.avg + + let intensity = f / m + if (Number.isNaN(intensity) || !Number.isFinite(intensity)) { + // continue // Old: Introduces mismatch between Data and Info Arrays + intensity = 0.0 // New: Set to Float Zero: Will not show in Log-Plot (Always below render limit) + } + + x.push(intensity) + y.push(f) + } + } else { + // console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!") + } + + if (x.length > 0 && y.length > 0) { + data = [null, [x, y]] // for dataformat see roofline.svelte + } + return data + } + + function transformJobsStatsToInfo(subclusterData) { + if (subclusterData) { + return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0, duration: formatTime(sc.duration)} }) + } else { + console.warn("transformJobsStatsToInfo: jobInfo missing!") + return [] + } + } + + function transformNodesStatsToInfo(subClusterData) { + let result = []; + if (subClusterData) { // && $nodesState?.data) { + // Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API! + for (let j = 0; j < subClusterData.length; j++) { + // nodesCounts[subClusterData[i].subCluster] = $nodesState.data.nodes.count; // Probably better as own derived! + + const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown" + const nodeMatch = $statusQuery?.data?.nodes?.items?.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster); + const nodeState = nodeMatch?.nodeState ? nodeMatch.nodeState : "notindb" + let numJobs = 0 + + if ($statusQuery?.data) { + const nodeJobs = $statusQuery?.data?.jobs?.items?.filter((job) => job.resources.find((res) => res.hostname == nodeName)) + numJobs = nodeJobs?.length ? nodeJobs.length : 0 + } + + result.push({nodeName: nodeName, nodeState: nodeState, numJobs: numJobs}) + }; + }; + return result + } + {#if $initq.data && $statusQuery.data} {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} - +
{activeUsers[subCluster.name]} Active Users
+ Flop Rate (Any) + + Memory BW Rate +
+ {flopRate[subCluster.name]} + {flopRateUnitPrefix[subCluster.name]}{flopRateUnitBase[subCluster.name]} + + {memBwRate[subCluster.name]} + {memBwRateUnitPrefix[subCluster.name]}{memBwRateUnitBase[subCluster.name]} +
Allocated Nodes
- {#key $statusQuery.data.nodeMetrics} - data.subCluster == subCluster.name, + ) + )} + nodesData={transformNodesStatsToInfo($statusQuery?.data?.nodeMetrics.filter( + (data) => data.subCluster == subCluster.name, + ) + )} + /> + {/key} +
+ + +
+ {#key $statusQuery?.data?.jobsMetricStats} + data.subCluster == subCluster.name, - ), + ) + )} + jobsData={transformJobsStatsToInfo($statusQuery?.data?.jobsMetricStats.filter( + (data) => data.subCluster == subCluster.name, + ) )} /> {/key} From f338209f32b63f8c0461016f9c105c26efaf9352 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 7 Aug 2025 16:28:35 +0200 Subject: [PATCH 17/40] rename new roofline compnent --- .../generic/plots/NewBubbleRoofline.svelte | 856 ------------------ .../src/generic/plots/Roofline.svelte | 708 ++++++++++++--- .../src/generic/plots/RooflineLegacy.svelte | 384 ++++++++ web/frontend/src/job/JobRoofline.svelte | 2 +- web/frontend/src/status/DevelDash.svelte | 6 +- web/frontend/src/status/StatusDash.svelte | 6 +- 6 files changed, 981 insertions(+), 981 deletions(-) delete mode 100644 web/frontend/src/generic/plots/NewBubbleRoofline.svelte create mode 100644 web/frontend/src/generic/plots/RooflineLegacy.svelte diff --git a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte b/web/frontend/src/generic/plots/NewBubbleRoofline.svelte deleted file mode 100644 index 3a0e332..0000000 --- a/web/frontend/src/generic/plots/NewBubbleRoofline.svelte +++ /dev/null @@ -1,856 +0,0 @@ - - - -{#if roofData != null} -
-{:else} - Cannot render roofline: No data! -{/if} diff --git a/web/frontend/src/generic/plots/Roofline.svelte b/web/frontend/src/generic/plots/Roofline.svelte index 8c6e9de..3a0e332 100644 --- a/web/frontend/src/generic/plots/Roofline.svelte +++ b/web/frontend/src/generic/plots/Roofline.svelte @@ -3,7 +3,6 @@ Properties: - `data [null, [], []]`: Roofline Data Structure, see below for details [Default: null] - - `renderTime Bool?`: If time information should be rendered as colored dots [Default: false] - `allowSizeChange Bool?`: If dimensions of rendered plot can change [Default: false] - `subCluster GraphQL.SubCluster?`: SubCluster Object; contains required topology information [Default: null] - `width Number?`: Plot width (reactively adaptive) [Default: 600] @@ -21,19 +20,22 @@ - `data[2] = [0.1, 0.15, 0.2, ...]` - Color Code: Time Information (Floats from 0 to 1) (Optional) --> - -{#if data != null} +{#if roofData != null}
{:else} - Cannot render roofline: No data! + Cannot render roofline: No data! {/if} - diff --git a/web/frontend/src/generic/plots/RooflineLegacy.svelte b/web/frontend/src/generic/plots/RooflineLegacy.svelte new file mode 100644 index 0000000..8c6e9de --- /dev/null +++ b/web/frontend/src/generic/plots/RooflineLegacy.svelte @@ -0,0 +1,384 @@ + + + + +{#if data != null} +
+{:else} + Cannot render roofline: No data! +{/if} + diff --git a/web/frontend/src/job/JobRoofline.svelte b/web/frontend/src/job/JobRoofline.svelte index ae33017..ae962f1 100644 --- a/web/frontend/src/job/JobRoofline.svelte +++ b/web/frontend/src/job/JobRoofline.svelte @@ -19,7 +19,7 @@ import { transformDataForRoofline, } from "../generic/utils.js"; - import Roofline from "../generic/plots/Roofline.svelte"; + import Roofline from "../generic/plots/RooflineLegacy.svelte"; /* Svelte 5 Props */ let { diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index a4ee42c..f54e51b 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -21,7 +21,7 @@ init, } from "../generic/utils.js"; //import Roofline from "../generic/plots/Roofline.svelte"; - import NewBubbleRoofline from "../generic/plots/NewBubbleRoofline.svelte"; + import Roofline from "../generic/plots/Roofline.svelte"; import Pie, { colors } from "../generic/plots/Pie.svelte"; import { formatTime } from "../generic/units.js"; @@ -325,7 +325,7 @@ {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( (data) => data.subCluster == subCluster.name, ).length} Jobs - {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( (data) => data.subCluster == subCluster.name, ).length} Jobs
-
{#key $statusQuery?.data?.nodeMetrics} -
{#key $statusQuery?.data?.jobsMetricStats} - Date: Thu, 7 Aug 2025 18:20:34 +0200 Subject: [PATCH 18/40] add cbmode to piecharts - old default colorscheme is now cb colorscheme --- web/frontend/src/generic/plots/Pie.svelte | 72 ++++++++++++++++++----- web/frontend/src/status/UsageDash.svelte | 7 ++- 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/web/frontend/src/generic/plots/Pie.svelte b/web/frontend/src/generic/plots/Pie.svelte index aed6026..a0312c4 100644 --- a/web/frontend/src/generic/plots/Pie.svelte +++ b/web/frontend/src/generic/plots/Pie.svelte @@ -14,8 +14,47 @@ --> {#if $initq.data} @@ -235,7 +244,7 @@ {#each $topUserQuery.data.topUser as tu, i} - + {#each $topProjectQuery.data.topProjects as tp, i} - + Date: Fri, 8 Aug 2025 14:24:52 +0200 Subject: [PATCH 20/40] Import metric store packages --- internal/avro/avroCheckpoint.go | 473 +++++++++++++++++ internal/avro/avroHelper.go | 79 +++ internal/avro/avroStruct.go | 163 ++++++ internal/memorystore/archive.go | 190 +++++++ internal/memorystore/buffer.go | 233 +++++++++ internal/memorystore/checkpoint.go | 764 ++++++++++++++++++++++++++++ internal/memorystore/config.go | 26 + internal/memorystore/debug.go | 107 ++++ internal/memorystore/healthcheck.go | 88 ++++ internal/memorystore/level.go | 187 +++++++ internal/memorystore/memorystore.go | 372 ++++++++++++++ internal/memorystore/stats.go | 120 +++++ 12 files changed, 2802 insertions(+) create mode 100644 internal/avro/avroCheckpoint.go create mode 100644 internal/avro/avroHelper.go create mode 100644 internal/avro/avroStruct.go create mode 100644 internal/memorystore/archive.go create mode 100644 internal/memorystore/buffer.go create mode 100644 internal/memorystore/checkpoint.go create mode 100644 internal/memorystore/config.go create mode 100644 internal/memorystore/debug.go create mode 100644 internal/memorystore/healthcheck.go create mode 100644 internal/memorystore/level.go create mode 100644 internal/memorystore/memorystore.go create mode 100644 internal/memorystore/stats.go diff --git a/internal/avro/avroCheckpoint.go b/internal/avro/avroCheckpoint.go new file mode 100644 index 0000000..4a3cf19 --- /dev/null +++ b/internal/avro/avroCheckpoint.go @@ -0,0 +1,473 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. +package avro + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "log" + "os" + "path" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/ClusterCockpit/cc-lib/util" + "github.com/linkedin/goavro/v2" +) + +var NumWorkers int = 4 + +var ErrNoNewData error = errors.New("no data in the pool") + +func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) { + levels := make([]*AvroLevel, 0) + selectors := make([][]string, 0) + as.root.lock.RLock() + // Cluster + for sel1, l1 := range as.root.children { + l1.lock.RLock() + // Node + for sel2, l2 := range l1.children { + l2.lock.RLock() + // Frequency + for sel3, l3 := range l2.children { + levels = append(levels, l3) + selectors = append(selectors, []string{sel1, sel2, sel3}) + } + l2.lock.RUnlock() + } + l1.lock.RUnlock() + } + as.root.lock.RUnlock() + + type workItem struct { + level *AvroLevel + dir string + selector []string + } + + n, errs := int32(0), int32(0) + + var wg sync.WaitGroup + wg.Add(NumWorkers) + work := make(chan workItem, NumWorkers*2) + for range NumWorkers { + go func() { + defer wg.Done() + + for workItem := range work { + var from int64 = getTimestamp(workItem.dir) + + if err := workItem.level.toCheckpoint(workItem.dir, from, dumpAll); err != nil { + if err == ErrNoNewData { + continue + } + + log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error()) + atomic.AddInt32(&errs, 1) + } else { + atomic.AddInt32(&n, 1) + } + } + }() + } + + for i := range len(levels) { + dir := path.Join(dir, path.Join(selectors[i]...)) + work <- workItem{ + level: levels[i], + dir: dir, + selector: selectors[i], + } + } + + close(work) + wg.Wait() + + if errs > 0 { + return int(n), fmt.Errorf("%d errors happend while creating avro checkpoints (%d successes)", errs, n) + } + return int(n), nil +} + +// getTimestamp returns the timestamp from the directory name +func getTimestamp(dir string) int64 { + // Extract the resolution and timestamp from the directory name + // The existing avro file will be in epoch timestamp format + // iterate over all the files in the directory and find the maximum timestamp + // and return it + + resolution := path.Base(dir) + dir = path.Dir(dir) + + files, err := os.ReadDir(dir) + if err != nil { + return 0 + } + var maxTs int64 = 0 + + if len(files) == 0 { + return 0 + } + + for _, file := range files { + if file.IsDir() { + continue + } + name := file.Name() + + if len(name) < 5 || !strings.HasSuffix(name, ".avro") || !strings.HasPrefix(name, resolution+"_") { + continue + } + + ts, err := strconv.ParseInt(name[strings.Index(name, "_")+1:len(name)-5], 10, 64) + if err != nil { + fmt.Printf("error while parsing timestamp: %s\n", err.Error()) + continue + } + + if ts > maxTs { + maxTs = ts + } + } + + interval, _ := time.ParseDuration(Keys.Checkpoints.Interval) + updateTime := time.Unix(maxTs, 0).Add(interval).Add(time.Duration(CheckpointBufferMinutes-1) * time.Minute).Unix() + + if updateTime < time.Now().Unix() { + return 0 + } + + return maxTs +} + +func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error { + l.lock.Lock() + defer l.lock.Unlock() + + // fmt.Printf("Checkpointing directory: %s\n", dir) + // filepath contains the resolution + int_res, _ := strconv.Atoi(path.Base(dir)) + + // find smallest overall timestamp in l.data map and delete it from l.data + var minTs int64 = int64(1<<63 - 1) + for ts, dat := range l.data { + if ts < minTs && len(dat) != 0 { + minTs = ts + } + } + + if from == 0 && minTs != int64(1<<63-1) { + from = minTs + } + + if from == 0 { + return ErrNoNewData + } + + var schema string + var codec *goavro.Codec + record_list := make([]map[string]interface{}, 0) + + var f *os.File + + filePath := dir + fmt.Sprintf("_%d.avro", from) + + var err error + + fp_, err_ := os.Stat(filePath) + if errors.Is(err_, os.ErrNotExist) { + err = os.MkdirAll(path.Dir(dir), 0o755) + if err != nil { + return fmt.Errorf("failed to create directory: %v", err) + } + } else if fp_.Size() != 0 { + f, err = os.Open(filePath) + if err != nil { + return fmt.Errorf("failed to open existing avro file: %v", err) + } + + br := bufio.NewReader(f) + + reader, err := goavro.NewOCFReader(br) + if err != nil { + return fmt.Errorf("failed to create OCF reader: %v", err) + } + codec = reader.Codec() + schema = codec.Schema() + + f.Close() + } + + time_ref := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix() + + if dumpAll { + time_ref = time.Now().Unix() + } + + // Empty values + if len(l.data) == 0 { + // we checkpoint avro files every 60 seconds + repeat := 60 / int_res + + for range repeat { + record_list = append(record_list, make(map[string]interface{})) + } + } + + readFlag := true + + for ts := range l.data { + flag := false + if ts < time_ref { + data := l.data[ts] + + schema_gen, err := generateSchema(data) + if err != nil { + return err + } + + flag, schema, err = compareSchema(schema, schema_gen) + if err != nil { + return fmt.Errorf("failed to compare read and generated schema: %v", err) + } + if flag && readFlag && !errors.Is(err_, os.ErrNotExist) { + + f.Close() + + f, err = os.Open(filePath) + if err != nil { + return fmt.Errorf("failed to open Avro file: %v", err) + } + + br := bufio.NewReader(f) + + ocfReader, err := goavro.NewOCFReader(br) + if err != nil { + return fmt.Errorf("failed to create OCF reader while changing schema: %v", err) + } + + for ocfReader.Scan() { + record, err := ocfReader.Read() + if err != nil { + return fmt.Errorf("failed to read record: %v", err) + } + + record_list = append(record_list, record.(map[string]interface{})) + } + + f.Close() + + err = os.Remove(filePath) + if err != nil { + return fmt.Errorf("failed to delete file: %v", err) + } + + readFlag = false + } + codec, err = goavro.NewCodec(schema) + if err != nil { + return fmt.Errorf("failed to create codec after merged schema: %v", err) + } + + record_list = append(record_list, generateRecord(data)) + delete(l.data, ts) + } + } + + if len(record_list) == 0 { + return ErrNoNewData + } + + f, err = os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644) + if err != nil { + return fmt.Errorf("failed to append new avro file: %v", err) + } + + // fmt.Printf("Codec : %#v\n", codec) + + writer, err := goavro.NewOCFWriter(goavro.OCFConfig{ + W: f, + Codec: codec, + CompressionName: goavro.CompressionDeflateLabel, + }) + if err != nil { + return fmt.Errorf("failed to create OCF writer: %v", err) + } + + // Append the new record + if err := writer.Append(record_list); err != nil { + return fmt.Errorf("failed to append record: %v", err) + } + + f.Close() + + return nil +} + +func compareSchema(schemaRead, schemaGen string) (bool, string, error) { + var genSchema, readSchema AvroSchema + + if schemaRead == "" { + return false, schemaGen, nil + } + + // Unmarshal the schema strings into AvroSchema structs + if err := json.Unmarshal([]byte(schemaGen), &genSchema); err != nil { + return false, "", fmt.Errorf("failed to parse generated schema: %v", err) + } + if err := json.Unmarshal([]byte(schemaRead), &readSchema); err != nil { + return false, "", fmt.Errorf("failed to parse read schema: %v", err) + } + + sort.Slice(genSchema.Fields, func(i, j int) bool { + return genSchema.Fields[i].Name < genSchema.Fields[j].Name + }) + + sort.Slice(readSchema.Fields, func(i, j int) bool { + return readSchema.Fields[i].Name < readSchema.Fields[j].Name + }) + + // Check if schemas are identical + schemasEqual := true + if len(genSchema.Fields) <= len(readSchema.Fields) { + + for i := range genSchema.Fields { + if genSchema.Fields[i].Name != readSchema.Fields[i].Name { + schemasEqual = false + break + } + } + + // If schemas are identical, return the read schema + if schemasEqual { + return false, schemaRead, nil + } + } + + // Create a map to hold unique fields from both schemas + fieldMap := make(map[string]AvroField) + + // Add fields from the read schema + for _, field := range readSchema.Fields { + fieldMap[field.Name] = field + } + + // Add or update fields from the generated schema + for _, field := range genSchema.Fields { + fieldMap[field.Name] = field + } + + // Create a union schema by collecting fields from the map + var mergedFields []AvroField + for _, field := range fieldMap { + mergedFields = append(mergedFields, field) + } + + // Sort fields by name for consistency + sort.Slice(mergedFields, func(i, j int) bool { + return mergedFields[i].Name < mergedFields[j].Name + }) + + // Create the merged schema + mergedSchema := AvroSchema{ + Type: "record", + Name: genSchema.Name, + Fields: mergedFields, + } + + // Check if schemas are identical + schemasEqual = len(mergedSchema.Fields) == len(readSchema.Fields) + if schemasEqual { + for i := range mergedSchema.Fields { + if mergedSchema.Fields[i].Name != readSchema.Fields[i].Name { + schemasEqual = false + break + } + } + + if schemasEqual { + return false, schemaRead, nil + } + } + + // Marshal the merged schema back to JSON + mergedSchemaJson, err := json.Marshal(mergedSchema) + if err != nil { + return false, "", fmt.Errorf("failed to marshal merged schema: %v", err) + } + + return true, string(mergedSchemaJson), nil +} + +func generateSchema(data map[string]util.Float) (string, error) { + // Define the Avro schema structure + schema := map[string]interface{}{ + "type": "record", + "name": "DataRecord", + "fields": []map[string]interface{}{}, + } + + fieldTracker := make(map[string]struct{}) + + for key := range data { + if _, exists := fieldTracker[key]; !exists { + key = correctKey(key) + + field := map[string]interface{}{ + "name": key, + "type": "double", + "default": -1.0, + } + schema["fields"] = append(schema["fields"].([]map[string]interface{}), field) + fieldTracker[key] = struct{}{} + } + } + + schemaString, err := json.Marshal(schema) + if err != nil { + return "", fmt.Errorf("failed to marshal schema: %v", err) + } + + return string(schemaString), nil +} + +func generateRecord(data map[string]util.Float) map[string]interface{} { + record := make(map[string]interface{}) + + // Iterate through each map in data + for key, value := range data { + key = correctKey(key) + + // Set the value in the record + record[key] = value.Double() + } + + return record +} + +func correctKey(key string) string { + // Replace any invalid characters in the key + // For example, replace spaces with underscores + key = strings.ReplaceAll(key, ":", "___") + key = strings.ReplaceAll(key, ".", "__") + + return key +} + +func ReplaceKey(key string) string { + // Replace any invalid characters in the key + // For example, replace spaces with underscores + key = strings.ReplaceAll(key, "___", ":") + key = strings.ReplaceAll(key, "__", ".") + + return key +} diff --git a/internal/avro/avroHelper.go b/internal/avro/avroHelper.go new file mode 100644 index 0000000..ee09759 --- /dev/null +++ b/internal/avro/avroHelper.go @@ -0,0 +1,79 @@ +package avro + +import ( + "context" + "fmt" + "strconv" + "sync" + +) + +func DataStaging(wg *sync.WaitGroup, ctx context.Context) { + + // AvroPool is a pool of Avro writers. + go func() { + if Keys.Checkpoints.FileFormat == "json" { + wg.Done() // Mark this goroutine as done + return // Exit the goroutine + } + + defer wg.Done() + + var avroLevel *AvroLevel + oldSelector := make([]string, 0) + + for { + select { + case <-ctx.Done(): + return + case val := <-LineProtocolMessages: + //Fetch the frequency of the metric from the global configuration + freq, err := Keys.GetMetricFrequency(val.MetricName) + if err != nil { + fmt.Printf("Error fetching metric frequency: %s\n", err) + continue + } + + metricName := "" + + for _, selector_name := range val.Selector { + metricName += selector_name + Delimiter + } + + metricName += val.MetricName + + // Create a new selector for the Avro level + // The selector is a slice of strings that represents the path to the + // Avro level. It is created by appending the cluster, node, and metric + // name to the selector. + var selector []string + selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10)) + + if !testEq(oldSelector, selector) { + // Get the Avro level for the metric + avroLevel = avroStore.root.findAvroLevelOrCreate(selector) + + // If the Avro level is nil, create a new one + if avroLevel == nil { + fmt.Printf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName) + } + oldSelector = append([]string{}, selector...) + } + + avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq)) + } + } + }() +} + +func testEq(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/internal/avro/avroStruct.go b/internal/avro/avroStruct.go new file mode 100644 index 0000000..27aac47 --- /dev/null +++ b/internal/avro/avroStruct.go @@ -0,0 +1,163 @@ +package avro + +import ( + "sync" + + "github.com/ClusterCockpit/cc-lib/util" +) + +var ( + LineProtocolMessages = make(chan *AvroStruct) + Delimiter = "ZZZZZ" +) + +// CheckpointBufferMinutes should always be in minutes. +// Its controls the amount of data to hold for given amount of time. +var CheckpointBufferMinutes = 3 + +type AvroStruct struct { + MetricName string + Cluster string + Node string + Selector []string + Value util.Float + Timestamp int64 +} + +type AvroStore struct { + root AvroLevel +} + +var avroStore AvroStore + +type AvroLevel struct { + children map[string]*AvroLevel + data map[int64]map[string]util.Float + lock sync.RWMutex +} + +type AvroField struct { + Name string `json:"name"` + Type interface{} `json:"type"` + Default interface{} `json:"default,omitempty"` +} + +type AvroSchema struct { + Type string `json:"type"` + Name string `json:"name"` + Fields []AvroField `json:"fields"` +} + +func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel { + if len(selector) == 0 { + return l + } + + // Allow concurrent reads: + l.lock.RLock() + var child *AvroLevel + var ok bool + if l.children == nil { + // Children map needs to be created... + l.lock.RUnlock() + } else { + child, ok := l.children[selector[0]] + l.lock.RUnlock() + if ok { + return child.findAvroLevelOrCreate(selector[1:]) + } + } + + // The level does not exist, take write lock for unqiue access: + l.lock.Lock() + // While this thread waited for the write lock, another thread + // could have created the child node. + if l.children != nil { + child, ok = l.children[selector[0]] + if ok { + l.lock.Unlock() + return child.findAvroLevelOrCreate(selector[1:]) + } + } + + child = &AvroLevel{ + data: make(map[int64]map[string]util.Float, 0), + children: nil, + } + + if l.children != nil { + l.children[selector[0]] = child + } else { + l.children = map[string]*AvroLevel{selector[0]: child} + } + l.lock.Unlock() + return child.findAvroLevelOrCreate(selector[1:]) +} + +func (l *AvroLevel) addMetric(metricName string, value util.Float, timestamp int64, Freq int) { + l.lock.Lock() + defer l.lock.Unlock() + + KeyCounter := int(CheckpointBufferMinutes * 60 / Freq) + + // Create keys in advance for the given amount of time + if len(l.data) != KeyCounter { + if len(l.data) == 0 { + for i := range KeyCounter { + l.data[timestamp+int64(i*Freq)] = make(map[string]util.Float, 0) + } + } else { + // Get the last timestamp + var lastTs int64 + for ts := range l.data { + if ts > lastTs { + lastTs = ts + } + } + // Create keys for the next KeyCounter timestamps + l.data[lastTs+int64(Freq)] = make(map[string]util.Float, 0) + } + } + + closestTs := int64(0) + minDiff := int64(Freq) + 1 // Start with diff just outside the valid range + found := false + + // Iterate over timestamps and choose the one which is within range. + // Since its epoch time, we check if the difference is less than 60 seconds. + for ts, dat := range l.data { + // Check if timestamp is within range + diff := timestamp - ts + if diff < -int64(Freq) || diff > int64(Freq) { + continue + } + + // Metric already present at this timestamp — skip + if _, ok := dat[metricName]; ok { + continue + } + + // Check if this is the closest timestamp so far + if Abs(diff) < minDiff { + minDiff = Abs(diff) + closestTs = ts + found = true + } + } + + if found { + l.data[closestTs][metricName] = value + } +} + +func GetAvroStore() *AvroStore { + return &avroStore +} + +// Abs returns the absolute value of x. +func Abs(x int64) int64 { + if x < 0 { + return -x + } + return x +} diff --git a/internal/memorystore/archive.go b/internal/memorystore/archive.go new file mode 100644 index 0000000..6e25aff --- /dev/null +++ b/internal/memorystore/archive.go @@ -0,0 +1,190 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. +package memorystore + +import ( + "archive/zip" + "bufio" + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "sync" + "sync/atomic" + "time" + + cclog "github.com/ClusterCockpit/cc-lib/ccLogger" +) + +func Archiving(wg *sync.WaitGroup, ctx context.Context) { + go func() { + defer wg.Done() + d, err := time.ParseDuration(Keys.Archive.Interval) + if err != nil { + cclog.Fatalf("error parsing archive interval duration: %v\n", err) + } + if d <= 0 { + return + } + + ticks := func() <-chan time.Time { + if d <= 0 { + return nil + } + return time.NewTicker(d).C + }() + for { + select { + case <-ctx.Done(): + return + case <-ticks: + t := time.Now().Add(-d) + cclog.Infof("start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339)) + n, err := ArchiveCheckpoints(Keys.Checkpoints.RootDir, + Keys.Archive.RootDir, t.Unix(), Keys.Archive.DeleteInstead) + + if err != nil { + cclog.Warnf("archiving failed: %s\n", err.Error()) + } else { + cclog.Infof("done: %d files zipped and moved to archive\n", n) + } + } + } + }() +} + +var ErrNoNewData error = errors.New("all data already archived") + +// ZIP all checkpoint files older than `from` together and write them to the `archiveDir`, +// deleting them from the `checkpointsDir`. +func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteInstead bool) (int, error) { + entries1, err := os.ReadDir(checkpointsDir) + if err != nil { + return 0, err + } + + type workItem struct { + cdir, adir string + cluster, host string + } + + var wg sync.WaitGroup + n, errs := int32(0), int32(0) + work := make(chan workItem, NumWorkers) + + wg.Add(NumWorkers) + for worker := 0; worker < NumWorkers; worker++ { + go func() { + defer wg.Done() + for workItem := range work { + m, err := archiveCheckpoints(workItem.cdir, workItem.adir, from, deleteInstead) + if err != nil { + cclog.Errorf("error while archiving %s/%s: %s", workItem.cluster, workItem.host, err.Error()) + atomic.AddInt32(&errs, 1) + } + atomic.AddInt32(&n, int32(m)) + } + }() + } + + for _, de1 := range entries1 { + entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name())) + if e != nil { + err = e + } + + for _, de2 := range entries2 { + cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name()) + adir := filepath.Join(archiveDir, de1.Name(), de2.Name()) + work <- workItem{ + adir: adir, cdir: cdir, + cluster: de1.Name(), host: de2.Name(), + } + } + } + + close(work) + wg.Wait() + + if err != nil { + return int(n), err + } + + if errs > 0 { + return int(n), fmt.Errorf("%d errors happend while archiving (%d successes)", errs, n) + } + return int(n), nil +} + +// Helper function for `ArchiveCheckpoints`. +func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead bool) (int, error) { + entries, err := os.ReadDir(dir) + if err != nil { + return 0, err + } + + extension := Keys.Checkpoints.FileFormat + files, err := findFiles(entries, from, extension, false) + if err != nil { + return 0, err + } + + if deleteInstead { + n := 0 + for _, checkpoint := range files { + filename := filepath.Join(dir, checkpoint) + if err = os.Remove(filename); err != nil { + return n, err + } + n += 1 + } + return n, nil + } + + filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from)) + f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil && os.IsNotExist(err) { + err = os.MkdirAll(archiveDir, 0o755) + if err == nil { + f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644) + } + } + if err != nil { + return 0, err + } + defer f.Close() + bw := bufio.NewWriter(f) + defer bw.Flush() + zw := zip.NewWriter(bw) + defer zw.Close() + + n := 0 + for _, checkpoint := range files { + filename := filepath.Join(dir, checkpoint) + r, err := os.Open(filename) + if err != nil { + return n, err + } + defer r.Close() + + w, err := zw.Create(checkpoint) + if err != nil { + return n, err + } + + if _, err = io.Copy(w, r); err != nil { + return n, err + } + + if err = os.Remove(filename); err != nil { + return n, err + } + n += 1 + } + + return n, nil +} diff --git a/internal/memorystore/buffer.go b/internal/memorystore/buffer.go new file mode 100644 index 0000000..d084c6d --- /dev/null +++ b/internal/memorystore/buffer.go @@ -0,0 +1,233 @@ +package memorystore + +import ( + "errors" + "sync" + + "github.com/ClusterCockpit/cc-lib/util" +) + +// Default buffer capacity. +// `buffer.data` will only ever grow up to it's capacity and a new link +// in the buffer chain will be created if needed so that no copying +// of data or reallocation needs to happen on writes. +const ( + BUFFER_CAP int = 512 +) + +// So that we can reuse allocations +var bufferPool sync.Pool = sync.Pool{ + New: func() interface{} { + return &buffer{ + data: make([]util.Float, 0, BUFFER_CAP), + } + }, +} + +var ( + ErrNoData error = errors.New("no data for this metric/level") + ErrDataDoesNotAlign error = errors.New("data from lower granularities does not align") +) + +// Each metric on each level has it's own buffer. +// This is where the actual values go. +// If `cap(data)` is reached, a new buffer is created and +// becomes the new head of a buffer list. +type buffer struct { + prev *buffer + next *buffer + data []util.Float + frequency int64 + start int64 + archived bool + closed bool +} + +func newBuffer(ts, freq int64) *buffer { + b := bufferPool.Get().(*buffer) + b.frequency = freq + b.start = ts - (freq / 2) + b.prev = nil + b.next = nil + b.archived = false + b.closed = false + b.data = b.data[:0] + return b +} + +// If a new buffer was created, the new head is returnd. +// Otherwise, the existing buffer is returnd. +// Normaly, only "newer" data should be written, but if the value would +// end up in the same buffer anyways it is allowed. +func (b *buffer) write(ts int64, value util.Float) (*buffer, error) { + if ts < b.start { + return nil, errors.New("cannot write value to buffer from past") + } + + // idx := int((ts - b.start + (b.frequency / 3)) / b.frequency) + idx := int((ts - b.start) / b.frequency) + if idx >= cap(b.data) { + newbuf := newBuffer(ts, b.frequency) + newbuf.prev = b + b.next = newbuf + b.close() + b = newbuf + idx = 0 + } + + // Overwriting value or writing value from past + if idx < len(b.data) { + b.data[idx] = value + return b, nil + } + + // Fill up unwritten slots with NaN + for i := len(b.data); i < idx; i++ { + b.data = append(b.data, util.NaN) + } + + b.data = append(b.data, value) + return b, nil +} + +func (b *buffer) end() int64 { + return b.firstWrite() + int64(len(b.data))*b.frequency +} + +func (b *buffer) firstWrite() int64 { + return b.start + (b.frequency / 2) +} + +func (b *buffer) close() {} + +/* +func (b *buffer) close() { + if b.closed { + return + } + + b.closed = true + n, sum, min, max := 0, 0., math.MaxFloat64, -math.MaxFloat64 + for _, x := range b.data { + if x.IsNaN() { + continue + } + + n += 1 + f := float64(x) + sum += f + min = math.Min(min, f) + max = math.Max(max, f) + } + + b.statisticts.samples = n + if n > 0 { + b.statisticts.avg = Float(sum / float64(n)) + b.statisticts.min = Float(min) + b.statisticts.max = Float(max) + } else { + b.statisticts.avg = NaN + b.statisticts.min = NaN + b.statisticts.max = NaN + } +} +*/ + +// func interpolate(idx int, data []Float) Float { +// if idx == 0 || idx+1 == len(data) { +// return NaN +// } +// return (data[idx-1] + data[idx+1]) / 2.0 +// } + +// Return all known values from `from` to `to`. Gaps of information are represented as NaN. +// Simple linear interpolation is done between the two neighboring cells if possible. +// If values at the start or end are missing, instead of NaN values, the second and thrid +// return values contain the actual `from`/`to`. +// This function goes back the buffer chain if `from` is older than the currents buffer start. +// The loaded values are added to `data` and `data` is returned, possibly with a shorter length. +// If `data` is not long enough to hold all values, this function will panic! +func (b *buffer) read(from, to int64, data []util.Float) ([]util.Float, int64, int64, error) { + if from < b.firstWrite() { + if b.prev != nil { + return b.prev.read(from, to, data) + } + from = b.firstWrite() + } + + i := 0 + t := from + for ; t < to; t += b.frequency { + idx := int((t - b.start) / b.frequency) + if idx >= cap(b.data) { + if b.next == nil { + break + } + b = b.next + idx = 0 + } + + if idx >= len(b.data) { + if b.next == nil || to <= b.next.start { + break + } + data[i] += util.NaN + } else if t < b.start { + data[i] += util.NaN + // } else if b.data[idx].IsNaN() { + // data[i] += interpolate(idx, b.data) + } else { + data[i] += b.data[idx] + } + i++ + } + + return data[:i], from, t, nil +} + +// Returns true if this buffer needs to be freed. +func (b *buffer) free(t int64) (delme bool, n int) { + if b.prev != nil { + delme, m := b.prev.free(t) + n += m + if delme { + b.prev.next = nil + if cap(b.prev.data) == BUFFER_CAP { + bufferPool.Put(b.prev) + } + b.prev = nil + } + } + + end := b.end() + if end < t { + return true, n + 1 + } + + return false, n +} + +// Call `callback` on every buffer that contains data in the range from `from` to `to`. +func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error { + if b == nil { + return nil + } + + if err := b.prev.iterFromTo(from, to, callback); err != nil { + return err + } + + if from <= b.end() && b.start <= to { + return callback(b) + } + + return nil +} + +func (b *buffer) count() int64 { + res := int64(len(b.data)) + if b.prev != nil { + res += b.prev.count() + } + return res +} diff --git a/internal/memorystore/checkpoint.go b/internal/memorystore/checkpoint.go new file mode 100644 index 0000000..ecd6fb1 --- /dev/null +++ b/internal/memorystore/checkpoint.go @@ -0,0 +1,764 @@ +package memorystore + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io/fs" + "log" + "os" + "path" + "path/filepath" + "runtime" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/ClusterCockpit/cc-backend/pkg/avro" + "github.com/ClusterCockpit/cc-lib/util" + "github.com/linkedin/goavro/v2" +) + +// Whenever changed, update MarshalJSON as well! +type CheckpointMetrics struct { + Data []util.Float `json:"data"` + Frequency int64 `json:"frequency"` + Start int64 `json:"start"` +} + +type CheckpointFile struct { + Metrics map[string]*CheckpointMetrics `json:"metrics"` + Children map[string]*CheckpointFile `json:"children"` + From int64 `json:"from"` + To int64 `json:"to"` +} + +var lastCheckpoint time.Time + +func Checkpointing(wg *sync.WaitGroup, ctx context.Context) { + lastCheckpoint = time.Now() + + if Keys.Checkpoints.FileFormat == "json" { + ms := GetMemoryStore() + + go func() { + defer wg.Done() + d, err := time.ParseDuration(Keys.Checkpoints.Interval) + if err != nil { + log.Fatal(err) + } + if d <= 0 { + return + } + + ticks := func() <-chan time.Time { + if d <= 0 { + return nil + } + return time.NewTicker(d).C + }() + for { + select { + case <-ctx.Done(): + return + case <-ticks: + log.Printf("start checkpointing (starting at %s)...\n", lastCheckpoint.Format(time.RFC3339)) + now := time.Now() + n, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir, + lastCheckpoint.Unix(), now.Unix()) + if err != nil { + log.Printf("checkpointing failed: %s\n", err.Error()) + } else { + log.Printf("done: %d checkpoint files created\n", n) + lastCheckpoint = now + } + } + } + }() + } else { + go func() { + defer wg.Done() + d, _ := time.ParseDuration("1m") + + select { + case <-ctx.Done(): + return + case <-time.After(time.Duration(avro.CheckpointBufferMinutes) * time.Minute): + // This is the first tick untill we collect the data for given minutes. + avro.GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false) + // log.Printf("Checkpointing %d avro files", count) + + } + + ticks := func() <-chan time.Time { + if d <= 0 { + return nil + } + return time.NewTicker(d).C + }() + + for { + select { + case <-ctx.Done(): + return + case <-ticks: + // Regular ticks of 1 minute to write data. + avro.GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false) + // log.Printf("Checkpointing %d avro files", count) + } + } + }() + } +} + +// As `Float` implements a custom MarshalJSON() function, +// serializing an array of such types has more overhead +// than one would assume (because of extra allocations, interfaces and so on). +func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) { + buf := make([]byte, 0, 128+len(cm.Data)*8) + buf = append(buf, `{"frequency":`...) + buf = strconv.AppendInt(buf, cm.Frequency, 10) + buf = append(buf, `,"start":`...) + buf = strconv.AppendInt(buf, cm.Start, 10) + buf = append(buf, `,"data":[`...) + for i, x := range cm.Data { + if i != 0 { + buf = append(buf, ',') + } + if x.IsNaN() { + buf = append(buf, `null`...) + } else { + buf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32) + } + } + buf = append(buf, `]}`...) + return buf, nil +} + +// Metrics stored at the lowest 2 levels are not stored away (root and cluster)! +// On a per-host basis a new JSON file is created. I have no idea if this will scale. +// The good thing: Only a host at a time is locked, so this function can run +// in parallel to writes/reads. +func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) { + levels := make([]*Level, 0) + selectors := make([][]string, 0) + m.root.lock.RLock() + for sel1, l1 := range m.root.children { + l1.lock.RLock() + for sel2, l2 := range l1.children { + levels = append(levels, l2) + selectors = append(selectors, []string{sel1, sel2}) + } + l1.lock.RUnlock() + } + m.root.lock.RUnlock() + + type workItem struct { + level *Level + dir string + selector []string + } + + n, errs := int32(0), int32(0) + + var wg sync.WaitGroup + wg.Add(NumWorkers) + work := make(chan workItem, NumWorkers*2) + for worker := 0; worker < NumWorkers; worker++ { + go func() { + defer wg.Done() + + for workItem := range work { + if err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil { + if err == ErrNoNewData { + continue + } + + log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error()) + atomic.AddInt32(&errs, 1) + } else { + atomic.AddInt32(&n, 1) + } + } + }() + } + + for i := 0; i < len(levels); i++ { + dir := path.Join(dir, path.Join(selectors[i]...)) + work <- workItem{ + level: levels[i], + dir: dir, + selector: selectors[i], + } + } + + close(work) + wg.Wait() + + if errs > 0 { + return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n) + } + return int(n), nil +} + +func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) { + l.lock.RLock() + defer l.lock.RUnlock() + + retval := &CheckpointFile{ + From: from, + To: to, + Metrics: make(map[string]*CheckpointMetrics), + Children: make(map[string]*CheckpointFile), + } + + for metric, minfo := range m.Metrics { + b := l.metrics[minfo.Offset] + if b == nil { + continue + } + + allArchived := true + b.iterFromTo(from, to, func(b *buffer) error { + if !b.archived { + allArchived = false + } + return nil + }) + + if allArchived { + continue + } + + data := make([]util.Float, (to-from)/b.frequency+1) + data, start, end, err := b.read(from, to, data) + if err != nil { + return nil, err + } + + for i := int((end - start) / b.frequency); i < len(data); i++ { + data[i] = util.NaN + } + + retval.Metrics[metric] = &CheckpointMetrics{ + Frequency: b.frequency, + Start: start, + Data: data, + } + } + + for name, child := range l.children { + val, err := child.toCheckpointFile(from, to, m) + if err != nil { + return nil, err + } + + if val != nil { + retval.Children[name] = val + } + } + + if len(retval.Children) == 0 && len(retval.Metrics) == 0 { + return nil, nil + } + + return retval, nil +} + +func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error { + cf, err := l.toCheckpointFile(from, to, m) + if err != nil { + return err + } + + if cf == nil { + return ErrNoNewData + } + + filepath := path.Join(dir, fmt.Sprintf("%d.json", from)) + f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil && os.IsNotExist(err) { + err = os.MkdirAll(dir, 0o755) + if err == nil { + f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644) + } + } + if err != nil { + return err + } + defer f.Close() + + bw := bufio.NewWriter(f) + if err = json.NewEncoder(bw).Encode(cf); err != nil { + return err + } + + return bw.Flush() +} + +func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (int, error) { + var wg sync.WaitGroup + work := make(chan [2]string, NumWorkers) + n, errs := int32(0), int32(0) + + wg.Add(NumWorkers) + for worker := 0; worker < NumWorkers; worker++ { + go func() { + defer wg.Done() + for host := range work { + lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics)) + nn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from, extension) + if err != nil { + log.Fatalf("error while loading checkpoints: %s", err.Error()) + atomic.AddInt32(&errs, 1) + } + atomic.AddInt32(&n, int32(nn)) + } + }() + } + + i := 0 + clustersDir, err := os.ReadDir(dir) + for _, clusterDir := range clustersDir { + if !clusterDir.IsDir() { + err = errors.New("expected only directories at first level of checkpoints/ directory") + goto done + } + + hostsDir, e := os.ReadDir(filepath.Join(dir, clusterDir.Name())) + if e != nil { + err = e + goto done + } + + for _, hostDir := range hostsDir { + if !hostDir.IsDir() { + err = errors.New("expected only directories at second level of checkpoints/ directory") + goto done + } + + i++ + if i%NumWorkers == 0 && i > 100 { + // Forcing garbage collection runs here regulary during the loading of checkpoints + // will decrease the total heap size after loading everything back to memory is done. + // While loading data, the heap will grow fast, so the GC target size will double + // almost always. By forcing GCs here, we can keep it growing more slowly so that + // at the end, less memory is wasted. + runtime.GC() + } + + work <- [2]string{clusterDir.Name(), hostDir.Name()} + } + } +done: + close(work) + wg.Wait() + + if err != nil { + return int(n), err + } + + if errs > 0 { + return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n) + } + return int(n), nil +} + +// Metrics stored at the lowest 2 levels are not loaded (root and cluster)! +// This function can only be called once and before the very first write or read. +// Different host's data is loaded to memory in parallel. +func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) { + if _, err := os.Stat(dir); os.IsNotExist(err) { + // The directory does not exist, so create it using os.MkdirAll() + err := os.MkdirAll(dir, 0755) // 0755 sets the permissions for the directory + if err != nil { + log.Fatalf("Error creating directory: %#v\n", err) + } + fmt.Printf("%#v Directory created successfully.\n", dir) + } + + // Config read (replace with your actual config read) + fileFormat := Keys.Checkpoints.FileFormat + if fileFormat == "" { + fileFormat = "avro" + } + + // Map to easily get the fallback format + oppositeFormat := map[string]string{ + "json": "avro", + "avro": "json", + } + + // First, attempt to load the specified format + if found, err := checkFilesWithExtension(dir, fileFormat); err != nil { + return 0, fmt.Errorf("error checking files with extension: %v", err) + } else if found { + log.Printf("Loading %s files because fileformat is %s\n", fileFormat, fileFormat) + return m.FromCheckpoint(dir, from, fileFormat) + } + + // If not found, attempt the opposite format + altFormat := oppositeFormat[fileFormat] + if found, err := checkFilesWithExtension(dir, altFormat); err != nil { + return 0, fmt.Errorf("error checking files with extension: %v", err) + } else if found { + log.Printf("Loading %s files but fileformat is %s\n", altFormat, fileFormat) + return m.FromCheckpoint(dir, from, altFormat) + } + + log.Println("No valid checkpoint files found in the directory.") + return 0, nil +} + +func checkFilesWithExtension(dir string, extension string) (bool, error) { + found := false + + err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return fmt.Errorf("error accessing path %s: %v", path, err) + } + if !info.IsDir() && filepath.Ext(info.Name()) == "."+extension { + found = true + return nil + } + return nil + }) + if err != nil { + return false, fmt.Errorf("error walking through directories: %s", err) + } + + return found, nil +} + +func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error { + br := bufio.NewReader(f) + + fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:] + resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64) + if err != nil { + return fmt.Errorf("error while reading avro file (resolution parsing) : %s", err) + } + + from_timestamp, err := strconv.ParseInt(fileName[strings.Index(fileName, "_")+1:len(fileName)-5], 10, 64) + + // Same logic according to lineprotocol + from_timestamp -= (resolution / 2) + + if err != nil { + return fmt.Errorf("error converting timestamp from the avro file : %s", err) + } + + // fmt.Printf("File : %s with resolution : %d\n", fileName, resolution) + + var recordCounter int64 = 0 + + // Create a new OCF reader from the buffered reader + ocfReader, err := goavro.NewOCFReader(br) + if err != nil { + panic(err) + } + + metricsData := make(map[string]util.FloatArray) + + for ocfReader.Scan() { + datum, err := ocfReader.Read() + if err != nil { + return fmt.Errorf("error while reading avro file : %s", err) + } + + record, ok := datum.(map[string]interface{}) + if !ok { + panic("failed to assert datum as map[string]interface{}") + } + + for key, value := range record { + metricsData[key] = append(metricsData[key], util.ConvertToFloat(value.(float64))) + } + + recordCounter += 1 + } + + to := (from_timestamp + (recordCounter / (60 / resolution) * 60)) + if to < from { + return nil + } + + for key, floatArray := range metricsData { + metricName := avro.ReplaceKey(key) + + if strings.Contains(metricName, avro.Delimiter) { + subString := strings.Split(metricName, avro.Delimiter) + + lvl := l + + for i := 0; i < len(subString)-1; i++ { + + sel := subString[i] + + if lvl.children == nil { + lvl.children = make(map[string]*Level) + } + + child, ok := lvl.children[sel] + if !ok { + child = &Level{ + metrics: make([]*buffer, len(m.Metrics)), + children: nil, + } + lvl.children[sel] = child + } + lvl = child + } + + leafMetricName := subString[len(subString)-1] + err = lvl.createBuffer(m, leafMetricName, floatArray, from_timestamp, resolution) + if err != nil { + return fmt.Errorf("error while creating buffers from avroReader : %s", err) + } + } else { + err = l.createBuffer(m, metricName, floatArray, from_timestamp, resolution) + if err != nil { + return fmt.Errorf("error while creating buffers from avroReader : %s", err) + } + } + + } + + return nil +} + +func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray util.FloatArray, from int64, resolution int64) error { + n := len(floatArray) + b := &buffer{ + frequency: resolution, + start: from, + data: floatArray[0:n:n], + prev: nil, + next: nil, + archived: true, + } + b.close() + + minfo, ok := m.Metrics[metricName] + if !ok { + return nil + // return errors.New("Unkown metric: " + name) + } + + prev := l.metrics[minfo.Offset] + if prev == nil { + l.metrics[minfo.Offset] = b + } else { + if prev.start > b.start { + return errors.New("wooops") + } + + b.prev = prev + prev.next = b + + missingCount := ((int(b.start) - int(prev.start)) - len(prev.data)*int(b.frequency)) + if missingCount > 0 { + missingCount /= int(b.frequency) + + for range missingCount { + prev.data = append(prev.data, util.NaN) + } + + prev.data = prev.data[0:len(prev.data):len(prev.data)] + } + } + l.metrics[minfo.Offset] = b + + return nil +} + +func (l *Level) loadJsonFile(m *MemoryStore, f *os.File, from int64) error { + br := bufio.NewReader(f) + cf := &CheckpointFile{} + if err := json.NewDecoder(br).Decode(cf); err != nil { + return err + } + + if cf.To != 0 && cf.To < from { + return nil + } + + if err := l.loadFile(cf, m); err != nil { + return err + } + + return nil +} + +func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error { + for name, metric := range cf.Metrics { + n := len(metric.Data) + b := &buffer{ + frequency: metric.Frequency, + start: metric.Start, + data: metric.Data[0:n:n], // Space is wasted here :( + prev: nil, + next: nil, + archived: true, + } + b.close() + + minfo, ok := m.Metrics[name] + if !ok { + continue + // return errors.New("Unkown metric: " + name) + } + + prev := l.metrics[minfo.Offset] + if prev == nil { + l.metrics[minfo.Offset] = b + } else { + if prev.start > b.start { + return errors.New("wooops") + } + + b.prev = prev + prev.next = b + } + l.metrics[minfo.Offset] = b + } + + if len(cf.Children) > 0 && l.children == nil { + l.children = make(map[string]*Level) + } + + for sel, childCf := range cf.Children { + child, ok := l.children[sel] + if !ok { + child = &Level{ + metrics: make([]*buffer, len(m.Metrics)), + children: nil, + } + l.children[sel] = child + } + + if err := child.loadFile(childCf, m); err != nil { + return err + } + } + + return nil +} + +func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension string) (int, error) { + direntries, err := os.ReadDir(dir) + if err != nil { + if os.IsNotExist(err) { + return 0, nil + } + + return 0, err + } + + allFiles := make([]fs.DirEntry, 0) + filesLoaded := 0 + for _, e := range direntries { + if e.IsDir() { + child := &Level{ + metrics: make([]*buffer, len(m.Metrics)), + children: make(map[string]*Level), + } + + files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from, extension) + filesLoaded += files + if err != nil { + return filesLoaded, err + } + + l.children[e.Name()] = child + } else if strings.HasSuffix(e.Name(), "."+extension) { + allFiles = append(allFiles, e) + } else { + continue + } + } + + files, err := findFiles(allFiles, from, extension, true) + if err != nil { + return filesLoaded, err + } + + loaders := map[string]func(*MemoryStore, *os.File, int64) error{ + "json": l.loadJsonFile, + "avro": l.loadAvroFile, + } + + loader := loaders[extension] + + for _, filename := range files { + f, err := os.Open(path.Join(dir, filename)) + if err != nil { + return filesLoaded, err + } + defer f.Close() + + if err = loader(m, f, from); err != nil { + return filesLoaded, err + } + + filesLoaded += 1 + } + + return filesLoaded, nil +} + +// This will probably get very slow over time! +// A solution could be some sort of an index file in which all other files +// and the timespan they contain is listed. +func findFiles(direntries []fs.DirEntry, t int64, extension string, findMoreRecentFiles bool) ([]string, error) { + nums := map[string]int64{} + for _, e := range direntries { + if !strings.HasSuffix(e.Name(), "."+extension) { + continue + } + + ts, err := strconv.ParseInt(e.Name()[strings.Index(e.Name(), "_")+1:len(e.Name())-5], 10, 64) + if err != nil { + return nil, err + } + nums[e.Name()] = ts + } + + sort.Slice(direntries, func(i, j int) bool { + a, b := direntries[i], direntries[j] + return nums[a.Name()] < nums[b.Name()] + }) + + filenames := make([]string, 0) + for i := 0; i < len(direntries); i++ { + e := direntries[i] + ts1 := nums[e.Name()] + + if findMoreRecentFiles && t <= ts1 { + filenames = append(filenames, e.Name()) + } + if i == len(direntries)-1 { + continue + } + + enext := direntries[i+1] + ts2 := nums[enext.Name()] + + if findMoreRecentFiles { + if ts1 < t && t < ts2 { + filenames = append(filenames, e.Name()) + } + } else { + if ts2 < t { + filenames = append(filenames, e.Name()) + } + } + } + + return filenames, nil +} diff --git a/internal/memorystore/config.go b/internal/memorystore/config.go new file mode 100644 index 0000000..0d8a8ab --- /dev/null +++ b/internal/memorystore/config.go @@ -0,0 +1,26 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. +package memorystore + +type MetricStoreConfig struct { + Checkpoints struct { + FileFormat string `json:"file-format"` + Interval string `json:"interval"` + RootDir string `json:"directory"` + Restore string `json:"restore"` + } `json:"checkpoints"` + Debug struct { + DumpToFile string `json:"dump-to-file"` + EnableGops bool `json:"gops"` + } `json:"debug"` + RetentionInMemory string `json:"retention-in-memory"` + Archive struct { + Interval string `json:"interval"` + RootDir string `json:"directory"` + DeleteInstead bool `json:"delete-instead"` + } `json:"archive"` +} + +var Keys MetricStoreConfig diff --git a/internal/memorystore/debug.go b/internal/memorystore/debug.go new file mode 100644 index 0000000..2743a45 --- /dev/null +++ b/internal/memorystore/debug.go @@ -0,0 +1,107 @@ +package memorystore + +import ( + "bufio" + "fmt" + "strconv" +) + +func (b *buffer) debugDump(buf []byte) []byte { + if b.prev != nil { + buf = b.prev.debugDump(buf) + } + + start, len, end := b.start, len(b.data), b.start+b.frequency*int64(len(b.data)) + buf = append(buf, `{"start":`...) + buf = strconv.AppendInt(buf, start, 10) + buf = append(buf, `,"len":`...) + buf = strconv.AppendInt(buf, int64(len), 10) + buf = append(buf, `,"end":`...) + buf = strconv.AppendInt(buf, end, 10) + if b.archived { + buf = append(buf, `,"saved":true`...) + } + if b.next != nil { + buf = append(buf, `},`...) + } else { + buf = append(buf, `}`...) + } + return buf +} + +func (l *Level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf []byte, depth int) ([]byte, error) { + l.lock.RLock() + defer l.lock.RUnlock() + for i := 0; i < depth; i++ { + buf = append(buf, '\t') + } + buf = append(buf, '"') + buf = append(buf, lvlname...) + buf = append(buf, "\":{\n"...) + depth += 1 + objitems := 0 + for name, mc := range m.Metrics { + if b := l.metrics[mc.Offset]; b != nil { + for i := 0; i < depth; i++ { + buf = append(buf, '\t') + } + + buf = append(buf, '"') + buf = append(buf, name...) + buf = append(buf, `":[`...) + buf = b.debugDump(buf) + buf = append(buf, "],\n"...) + objitems++ + } + } + + for name, lvl := range l.children { + _, err := w.Write(buf) + if err != nil { + return nil, err + } + + buf = buf[0:0] + buf, err = lvl.debugDump(m, w, name, buf, depth) + if err != nil { + return nil, err + } + + buf = append(buf, ',', '\n') + objitems++ + } + + // remove final `,`: + if objitems > 0 { + buf = append(buf[0:len(buf)-1], '\n') + } + + depth -= 1 + for i := 0; i < depth; i++ { + buf = append(buf, '\t') + } + buf = append(buf, '}') + return buf, nil +} + +func (m *MemoryStore) DebugDump(w *bufio.Writer, selector []string) error { + lvl := m.root.findLevel(selector) + if lvl == nil { + return fmt.Errorf("not found: %#v", selector) + } + + buf := make([]byte, 0, 2048) + buf = append(buf, "{"...) + + buf, err := lvl.debugDump(m, w, "data", buf, 0) + if err != nil { + return err + } + + buf = append(buf, "}\n"...) + if _, err = w.Write(buf); err != nil { + return err + } + + return w.Flush() +} diff --git a/internal/memorystore/healthcheck.go b/internal/memorystore/healthcheck.go new file mode 100644 index 0000000..cb22d49 --- /dev/null +++ b/internal/memorystore/healthcheck.go @@ -0,0 +1,88 @@ +package memorystore + +import ( + "bufio" + "fmt" + "time" +) + +// This is a threshold that allows a node to be healthy with certain number of data points missing. +// Suppose a node does not receive last 5 data points, then healthCheck endpoint will still say a +// node is healthy. Anything more than 5 missing points in metrics of the node will deem the node unhealthy. +const MaxMissingDataPoints int64 = 5 + +// This is a threshold which allows upto certain number of metrics in a node to be unhealthly. +// Works with MaxMissingDataPoints. Say 5 metrics (including submetrics) do not receive the last +// MaxMissingDataPoints data points, then the node will be deemed healthy. Any more metrics that does +// not receive data for MaxMissingDataPoints data points will deem the node unhealthy. +const MaxUnhealthyMetrics int64 = 5 + +func (b *buffer) healthCheck() int64 { + + // Check if the buffer is empty + if b.data == nil { + return 1 + } + + buffer_end := b.start + b.frequency*int64(len(b.data)) + t := time.Now().Unix() + + // Check if the buffer is too old + if t-buffer_end > MaxMissingDataPoints*b.frequency { + return 1 + } + + return 0 +} + +func (l *Level) healthCheck(m *MemoryStore, count int64) (int64, error) { + l.lock.RLock() + defer l.lock.RUnlock() + + for _, mc := range m.Metrics { + if b := l.metrics[mc.Offset]; b != nil { + count += b.healthCheck() + } + } + + for _, lvl := range l.children { + c, err := lvl.healthCheck(m, 0) + if err != nil { + return 0, err + } + count += c + } + + return count, nil +} + +func (m *MemoryStore) HealthCheck(w *bufio.Writer, selector []string) error { + lvl := m.root.findLevel(selector) + if lvl == nil { + return fmt.Errorf("not found: %#v", selector) + } + + buf := make([]byte, 0, 25) + // buf = append(buf, "{"...) + + var count int64 = 0 + + unhealthyMetricsCount, err := lvl.healthCheck(m, count) + if err != nil { + return err + } + + if unhealthyMetricsCount < MaxUnhealthyMetrics { + buf = append(buf, "Healthy"...) + } else { + buf = append(buf, "Unhealthy"...) + } + + // buf = append(buf, "}\n"...) + + if _, err = w.Write(buf); err != nil { + return err + } + + return w.Flush() +} diff --git a/internal/memorystore/level.go b/internal/memorystore/level.go new file mode 100644 index 0000000..76916e6 --- /dev/null +++ b/internal/memorystore/level.go @@ -0,0 +1,187 @@ +package memorystore + +import ( + "sync" + "unsafe" + + "github.com/ClusterCockpit/cc-lib/util" +) + +// Could also be called "node" as this forms a node in a tree structure. +// Called Level because "node" might be confusing here. +// Can be both a leaf or a inner node. In this tree structue, inner nodes can +// also hold data (in `metrics`). +type Level struct { + children map[string]*Level + metrics []*buffer + lock sync.RWMutex +} + +// Find the correct level for the given selector, creating it if +// it does not exist. Example selector in the context of the +// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }. +// This function would probably benefit a lot from `level.children` beeing a `sync.Map`? +func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level { + if len(selector) == 0 { + return l + } + + // Allow concurrent reads: + l.lock.RLock() + var child *Level + var ok bool + if l.children == nil { + // Children map needs to be created... + l.lock.RUnlock() + } else { + child, ok := l.children[selector[0]] + l.lock.RUnlock() + if ok { + return child.findLevelOrCreate(selector[1:], nMetrics) + } + } + + // The level does not exist, take write lock for unqiue access: + l.lock.Lock() + // While this thread waited for the write lock, another thread + // could have created the child node. + if l.children != nil { + child, ok = l.children[selector[0]] + if ok { + l.lock.Unlock() + return child.findLevelOrCreate(selector[1:], nMetrics) + } + } + + child = &Level{ + metrics: make([]*buffer, nMetrics), + children: nil, + } + + if l.children != nil { + l.children[selector[0]] = child + } else { + l.children = map[string]*Level{selector[0]: child} + } + l.lock.Unlock() + return child.findLevelOrCreate(selector[1:], nMetrics) +} + +func (l *Level) free(t int64) (int, error) { + l.lock.Lock() + defer l.lock.Unlock() + + n := 0 + for i, b := range l.metrics { + if b != nil { + delme, m := b.free(t) + n += m + if delme { + if cap(b.data) == BUFFER_CAP { + bufferPool.Put(b) + } + l.metrics[i] = nil + } + } + } + + for _, l := range l.children { + m, err := l.free(t) + n += m + if err != nil { + return n, err + } + } + + return n, nil +} + +func (l *Level) sizeInBytes() int64 { + l.lock.RLock() + defer l.lock.RUnlock() + size := int64(0) + + for _, b := range l.metrics { + if b != nil { + size += b.count() * int64(unsafe.Sizeof(util.Float(0))) + } + } + + for _, child := range l.children { + size += child.sizeInBytes() + } + + return size +} + +func (l *Level) findLevel(selector []string) *Level { + if len(selector) == 0 { + return l + } + + l.lock.RLock() + defer l.lock.RUnlock() + + lvl := l.children[selector[0]] + if lvl == nil { + return nil + } + + return lvl.findLevel(selector[1:]) +} + +func (l *Level) findBuffers(selector util.Selector, offset int, f func(b *buffer) error) error { + l.lock.RLock() + defer l.lock.RUnlock() + + if len(selector) == 0 { + b := l.metrics[offset] + if b != nil { + return f(b) + } + + for _, lvl := range l.children { + err := lvl.findBuffers(nil, offset, f) + if err != nil { + return err + } + } + return nil + } + + sel := selector[0] + if len(sel.String) != 0 && l.children != nil { + lvl, ok := l.children[sel.String] + if ok { + err := lvl.findBuffers(selector[1:], offset, f) + if err != nil { + return err + } + } + return nil + } + + if sel.Group != nil && l.children != nil { + for _, key := range sel.Group { + lvl, ok := l.children[key] + if ok { + err := lvl.findBuffers(selector[1:], offset, f) + if err != nil { + return err + } + } + } + return nil + } + + if sel.Any && l.children != nil { + for _, lvl := range l.children { + if err := lvl.findBuffers(selector[1:], offset, f); err != nil { + return err + } + } + return nil + } + + return nil +} diff --git a/internal/memorystore/memorystore.go b/internal/memorystore/memorystore.go new file mode 100644 index 0000000..7659a89 --- /dev/null +++ b/internal/memorystore/memorystore.go @@ -0,0 +1,372 @@ +package memorystore + +import ( + "context" + "errors" + "log" + "runtime" + "sync" + "time" + + "github.com/ClusterCockpit/cc-backend/pkg/avro" + "github.com/ClusterCockpit/cc-lib/resampler" + "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-metric-store/internal/config" +) + +var ( + singleton sync.Once + msInstance *MemoryStore +) + +var NumWorkers int = 4 + +func init() { + maxWorkers := 10 + NumWorkers = runtime.NumCPU()/2 + 1 + if NumWorkers > maxWorkers { + NumWorkers = maxWorkers + } +} + +type Metric struct { + Name string + Value util.Float + MetricConfig config.MetricConfig +} + +type MemoryStore struct { + Metrics map[string]config.MetricConfig + root Level +} + +// Create a new, initialized instance of a MemoryStore. +// Will panic if values in the metric configurations are invalid. +func Init(metrics map[string]config.MetricConfig) { + singleton.Do(func() { + offset := 0 + for key, cfg := range metrics { + if cfg.Frequency == 0 { + panic("invalid frequency") + } + + metrics[key] = config.MetricConfig{ + Frequency: cfg.Frequency, + Aggregation: cfg.Aggregation, + Offset: offset, + } + offset += 1 + } + + msInstance = &MemoryStore{ + root: Level{ + metrics: make([]*buffer, len(metrics)), + children: make(map[string]*Level), + }, + Metrics: metrics, + } + }) +} + +func GetMemoryStore() *MemoryStore { + if msInstance == nil { + log.Fatalf("MemoryStore not initialized!") + } + + return msInstance +} + +func Shutdown() { + log.Printf("Writing to '%s'...\n", config.Keys.Checkpoints.RootDir) + var files int + var err error + + ms := GetMemoryStore() + + if config.Keys.Checkpoints.FileFormat == "json" { + files, err = ms.ToCheckpoint(config.Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix()) + } else { + files, err = avro.GetAvroStore().ToCheckpoint(config.Keys.Checkpoints.RootDir, true) + close(avro.LineProtocolMessages) + } + + if err != nil { + log.Printf("Writing checkpoint failed: %s\n", err.Error()) + } + log.Printf("Done! (%d files written)\n", files) + + // ms.PrintHeirarchy() +} + +// func (m *MemoryStore) PrintHeirarchy() { +// m.root.lock.Lock() +// defer m.root.lock.Unlock() + +// fmt.Printf("Root : \n") + +// for lvl1, sel1 := range m.root.children { +// fmt.Printf("\t%s\n", lvl1) +// for lvl2, sel2 := range sel1.children { +// fmt.Printf("\t\t%s\n", lvl2) +// if lvl1 == "fritz" && lvl2 == "f0201" { + +// for name, met := range m.Metrics { +// mt := sel2.metrics[met.Offset] + +// fmt.Printf("\t\t\t\t%s\n", name) +// fmt.Printf("\t\t\t\t") + +// for mt != nil { +// // if name == "cpu_load" { +// fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data) +// // } +// mt = mt.prev +// } +// fmt.Printf("\n") + +// } +// } +// for lvl3, sel3 := range sel2.children { +// if lvl1 == "fritz" && lvl2 == "f0201" && lvl3 == "hwthread70" { + +// fmt.Printf("\t\t\t\t\t%s\n", lvl3) + +// for name, met := range m.Metrics { +// mt := sel3.metrics[met.Offset] + +// fmt.Printf("\t\t\t\t\t\t%s\n", name) + +// fmt.Printf("\t\t\t\t\t\t") + +// for mt != nil { +// // if name == "clock" { +// fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data) + +// mt = mt.prev +// } +// fmt.Printf("\n") + +// } + +// // for i, _ := range sel3.metrics { +// // fmt.Printf("\t\t\t\t\t%s\n", getName(configmetrics, i)) +// // } +// } +// } +// } +// } + +// } + +func getName(m *MemoryStore, i int) string { + for key, val := range m.Metrics { + if val.Offset == i { + return key + } + } + return "" +} + +func Retention(wg *sync.WaitGroup, ctx context.Context) { + ms := GetMemoryStore() + + go func() { + defer wg.Done() + d, err := time.ParseDuration(config.Keys.RetentionInMemory) + if err != nil { + log.Fatal(err) + } + if d <= 0 { + return + } + + ticks := func() <-chan time.Time { + d := d / 2 + if d <= 0 { + return nil + } + return time.NewTicker(d).C + }() + for { + select { + case <-ctx.Done(): + return + case <-ticks: + t := time.Now().Add(-d) + log.Printf("start freeing buffers (older than %s)...\n", t.Format(time.RFC3339)) + freed, err := ms.Free(nil, t.Unix()) + if err != nil { + log.Printf("freeing up buffers failed: %s\n", err.Error()) + } else { + log.Printf("done: %d buffers freed\n", freed) + } + } + } + }() +} + +// Write all values in `metrics` to the level specified by `selector` for time `ts`. +// Look at `findLevelOrCreate` for how selectors work. +func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error { + var ok bool + for i, metric := range metrics { + if metric.MetricConfig.Frequency == 0 { + metric.MetricConfig, ok = m.Metrics[metric.Name] + if !ok { + metric.MetricConfig.Frequency = 0 + } + metrics[i] = metric + } + } + + return m.WriteToLevel(&m.root, selector, ts, metrics) +} + +func (m *MemoryStore) GetLevel(selector []string) *Level { + return m.root.findLevelOrCreate(selector, len(m.Metrics)) +} + +// Assumes that `minfo` in `metrics` is filled in! +func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []Metric) error { + l = l.findLevelOrCreate(selector, len(m.Metrics)) + l.lock.Lock() + defer l.lock.Unlock() + + for _, metric := range metrics { + if metric.MetricConfig.Frequency == 0 { + continue + } + + b := l.metrics[metric.MetricConfig.Offset] + if b == nil { + // First write to this metric and level + b = newBuffer(ts, metric.MetricConfig.Frequency) + l.metrics[metric.MetricConfig.Offset] = b + } + + nb, err := b.write(ts, metric.Value) + if err != nil { + return err + } + + // Last write created a new buffer... + if b != nb { + l.metrics[metric.MetricConfig.Offset] = nb + } + } + return nil +} + +// Returns all values for metric `metric` from `from` to `to` for the selected level(s). +// If the level does not hold the metric itself, the data will be aggregated recursively from the children. +// The second and third return value are the actual from/to for the data. Those can be different from +// the range asked for if no data was available. +func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]util.Float, int64, int64, int64, error) { + if from > to { + return nil, 0, 0, 0, errors.New("invalid time range") + } + + minfo, ok := m.Metrics[metric] + if !ok { + return nil, 0, 0, 0, errors.New("unkown metric: " + metric) + } + + n, data := 0, make([]util.Float, (to-from)/minfo.Frequency+1) + + err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error { + cdata, cfrom, cto, err := b.read(from, to, data) + if err != nil { + return err + } + + if n == 0 { + from, to = cfrom, cto + } else if from != cfrom || to != cto || len(data) != len(cdata) { + missingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency) + if missingfront != 0 { + return ErrDataDoesNotAlign + } + + newlen := len(cdata) - missingback + if newlen < 1 { + return ErrDataDoesNotAlign + } + cdata = cdata[0:newlen] + if len(cdata) != len(data) { + return ErrDataDoesNotAlign + } + + from, to = cfrom, cto + } + + data = cdata + n += 1 + return nil + }) + + if err != nil { + return nil, 0, 0, 0, err + } else if n == 0 { + return nil, 0, 0, 0, errors.New("metric or host not found") + } else if n > 1 { + if minfo.Aggregation == config.AvgAggregation { + normalize := 1. / util.Float(n) + for i := 0; i < len(data); i++ { + data[i] *= normalize + } + } else if minfo.Aggregation != config.SumAggregation { + return nil, 0, 0, 0, errors.New("invalid aggregation") + } + } + + data, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution) + if err != nil { + return nil, 0, 0, 0, err + } + + return data, from, to, resolution, nil +} + +// Release all buffers for the selected level and all its children that contain only +// values older than `t`. +func (m *MemoryStore) Free(selector []string, t int64) (int, error) { + return m.GetLevel(selector).free(t) +} + +func (m *MemoryStore) FreeAll() error { + for k := range m.root.children { + delete(m.root.children, k) + } + + return nil +} + +func (m *MemoryStore) SizeInBytes() int64 { + return m.root.sizeInBytes() +} + +// Given a selector, return a list of all children of the level selected. +func (m *MemoryStore) ListChildren(selector []string) []string { + lvl := &m.root + for lvl != nil && len(selector) != 0 { + lvl.lock.RLock() + next := lvl.children[selector[0]] + lvl.lock.RUnlock() + lvl = next + selector = selector[1:] + } + + if lvl == nil { + return nil + } + + lvl.lock.RLock() + defer lvl.lock.RUnlock() + + children := make([]string, 0, len(lvl.children)) + for child := range lvl.children { + children = append(children, child) + } + + return children +} diff --git a/internal/memorystore/stats.go b/internal/memorystore/stats.go new file mode 100644 index 0000000..6682d62 --- /dev/null +++ b/internal/memorystore/stats.go @@ -0,0 +1,120 @@ +package memorystore + +import ( + "errors" + "math" + + "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-metric-store/internal/config" +) + +type Stats struct { + Samples int + Avg util.Float + Min util.Float + Max util.Float +} + +func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) { + if from < b.start { + if b.prev != nil { + return b.prev.stats(from, to) + } + from = b.start + } + + // TODO: Check if b.closed and if so and the full buffer is queried, + // use b.statistics instead of iterating over the buffer. + + samples := 0 + sum, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32 + + var t int64 + for t = from; t < to; t += b.frequency { + idx := int((t - b.start) / b.frequency) + if idx >= cap(b.data) { + b = b.next + if b == nil { + break + } + idx = 0 + } + + if t < b.start || idx >= len(b.data) { + continue + } + + xf := float64(b.data[idx]) + if math.IsNaN(xf) { + continue + } + + samples += 1 + sum += xf + min = math.Min(min, xf) + max = math.Max(max, xf) + } + + return Stats{ + Samples: samples, + Avg: util.Float(sum) / util.Float(samples), + Min: util.Float(min), + Max: util.Float(max), + }, from, t, nil +} + +// Returns statistics for the requested metric on the selected node/level. +// Data is aggregated to the selected level the same way as in `MemoryStore.Read`. +// If `Stats.Samples` is zero, the statistics should not be considered as valid. +func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int64) (*Stats, int64, int64, error) { + if from > to { + return nil, 0, 0, errors.New("invalid time range") + } + + minfo, ok := m.Metrics[metric] + if !ok { + return nil, 0, 0, errors.New("unkown metric: " + metric) + } + + n, samples := 0, 0 + avg, min, max := util.Float(0), math.MaxFloat32, -math.MaxFloat32 + err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error { + stats, cfrom, cto, err := b.stats(from, to) + if err != nil { + return err + } + + if n == 0 { + from, to = cfrom, cto + } else if from != cfrom || to != cto { + return ErrDataDoesNotAlign + } + + samples += stats.Samples + avg += stats.Avg + min = math.Min(min, float64(stats.Min)) + max = math.Max(max, float64(stats.Max)) + n += 1 + return nil + }) + if err != nil { + return nil, 0, 0, err + } + + if n == 0 { + return nil, 0, 0, ErrNoData + } + + if minfo.Aggregation == config.AvgAggregation { + avg /= util.Float(n) + } else if n > 1 && minfo.Aggregation != config.SumAggregation { + return nil, 0, 0, errors.New("invalid aggregation") + } + + return &Stats{ + Samples: samples, + Avg: avg, + Min: util.Float(min), + Max: util.Float(max), + }, from, to, nil +} From bd2cdfcef2b4bd2cd972c7e413fe47eb1f1addf6 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 12 Aug 2025 17:04:31 +0200 Subject: [PATCH 21/40] reorganize plots, reduce tabs, --- web/frontend/src/Analysis.root.svelte | 2 +- web/frontend/src/Status.root.svelte | 28 +- web/frontend/src/generic/plots/Pie.svelte | 116 ++-- web/frontend/src/status/DevelDash.svelte | 20 +- web/frontend/src/status/StatusDash.svelte | 135 ++++- web/frontend/src/status/UsageDash.svelte | 630 ++++++++++++++-------- 6 files changed, 621 insertions(+), 310 deletions(-) diff --git a/web/frontend/src/Analysis.root.svelte b/web/frontend/src/Analysis.root.svelte index 689b7a2..122a67b 100644 --- a/web/frontend/src/Analysis.root.svelte +++ b/web/frontend/src/Analysis.root.svelte @@ -459,7 +459,7 @@ {#each $topQuery.data.topList as te, i} - + {#if groupSelection.key == "user"} -
+
diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte index e0f4960..17426fc 100644 --- a/web/frontend/src/status/DevelDash.svelte +++ b/web/frontend/src/status/DevelDash.svelte @@ -22,12 +22,13 @@ } from "../generic/utils.js"; //import Roofline from "../generic/plots/Roofline.svelte"; import Roofline from "../generic/plots/Roofline.svelte"; - import Pie, { cbColors, colors } from "../generic/plots/Pie.svelte"; + import Pie, { colors } from "../generic/plots/Pie.svelte"; import { formatTime } from "../generic/units.js"; /* Svelte 5 Props */ let { - cluster + cluster, + useCbColors = false } = $props(); /* Const Init */ @@ -40,7 +41,6 @@ let plotWidths = $state([]); let statesWidth = $state(0); let healthWidth = $state(0); - let cbmode = $state(false); // let nodesCounts = $state({}); // let jobsJounts = $state({}); @@ -313,6 +313,12 @@ return result } + function legendColors(targetIdx) { + // Reuses first color if targetIdx overflows + let c = [...colors['default']]; + return c[(c.length + targetIdx) % c.length]; + } + @@ -386,7 +392,7 @@ }, 0)} Nodes {#each refinedStateData as sd, i} - + {sd.state} {sd.count} @@ -427,7 +433,7 @@ }, 0)} Nodes {#each refinedHealthData as hd, i} - + {hd.state} {hd.count} diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index f3fdd9b..44a0ab4 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -15,7 +15,7 @@ CardBody, Table, Progress, - // Icon, + Icon, } from "@sveltestrap/sveltestrap"; import { queryStore, @@ -24,15 +24,16 @@ } from "@urql/svelte"; import { init, - // transformPerNodeDataForRoofline, - } from "../generic/utils.js"; import { scaleNumbers, formatTime } from "../generic/units.js"; import Roofline from "../generic/plots/Roofline.svelte"; + import Pie, { colors } from "../generic/plots/Pie.svelte"; /* Svelte 5 Props */ let { - cluster + cluster, + useCbColors = false, + useAltColors = false, } = $props(); /* Const Init */ @@ -42,6 +43,7 @@ /* State Init */ let from = $state(new Date(Date.now() - 5 * 60 * 1000)); let to = $state(new Date(Date.now())); + let pieWidth = $state(0); let plotWidths = $state([]); // Bar Gauges let allocatedNodes = $state({}); @@ -58,6 +60,30 @@ let totalAccs = $state({}); /* Derived */ + // Accumulated NodeStates for Piecharts + const nodesStateCounts = $derived(queryStore({ + client: client, + query: gql` + query ($filter: [NodeFilter!]) { + nodeStates(filter: $filter) { + state + count + } + } + `, + variables: { + filter: { cluster: { eq: cluster }} + }, + })); + + const refinedStateData = $derived.by(() => { + return $nodesStateCounts?.data?.nodeStates.filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)) + }); + + const refinedHealthData = $derived.by(() => { + return $nodesStateCounts?.data?.nodeStates.filter((e) => ['full', 'partial', 'failed'].includes(e.state)) + }); + // Note: nodeMetrics are requested on configured $timestep resolution // Result: The latest 5 minutes (datapoints) for each node independent of job const statusQuery = $derived(queryStore({ @@ -334,8 +360,107 @@ return result } + function legendColors(targetIdx) { + // Reuses first color if targetIdx overflows + let c; + if (useCbColors) { + c = [...colors['colorblind']]; + } else if (useAltColors) { + c = [...colors['alternative']]; + } else { + c = [...colors['default']]; + } + return c[(c.length + targetIdx) % c.length]; + } + + +{#if $initq.data && $nodesStateCounts.data} + + +
+ {#key refinedStateData} +

+ {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States +

+ sd.count, + )} + entities={refinedStateData.map( + (sd) => sd.state, + )} + /> + {/key} +
+ + + {#key refinedStateData} + + + + + + + {#each refinedStateData as sd, i} + + + + + + {/each} +
Current StateNodes
{sd.state}{sd.count}
+ {/key} + + + +
+ {#key refinedHealthData} +

+ {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health +

+ sd.count, + )} + entities={refinedHealthData.map( + (sd) => sd.state, + )} + /> + {/key} +
+ + + {#key refinedHealthData} + + + + + + + {#each refinedHealthData as hd, i} + + + + + + {/each} +
Current HealthNodes
{hd.state}{hd.count}
+ {/key} + +
+{/if} + +
{#if $initq.data && $statusQuery.data} {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} @@ -454,5 +579,5 @@ {/each} {:else} - Cannot render status tab: No data! + Cannot render status rooflines: No data! {/if} diff --git a/web/frontend/src/status/UsageDash.svelte b/web/frontend/src/status/UsageDash.svelte index 93604ea..0de7da2 100644 --- a/web/frontend/src/status/UsageDash.svelte +++ b/web/frontend/src/status/UsageDash.svelte @@ -6,7 +6,6 @@ --> -{#if $initq.data} - - - -
+ +{#if $topJobsQuery.fetching || $nodeStatusQuery.fetching} + +{:else if $topJobsQuery.data && $nodeStatusQuery.data} + + + + + +

- Top Users on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} + Top Users: Jobs

- {#key $topUserQuery.data} - {#if $topUserQuery.fetching} - - {:else if $topUserQuery.error} - {$topUserQuery.error.message} - {:else} - tu[topUserSelection.key], - )} - entities={$topUserQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} - /> - {/if} - {/key} + tu['totalJobs'], + )} + entities={$topJobsQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} + />
- - {#key $topUserQuery.data} - {#if $topUserQuery.fetching} - - {:else if $topUserQuery.error} - {$topUserQuery.error.message} - {:else} - - - - - - - {#each $topUserQuery.data.topUser as tu, i} - - - - {#if tu?.name} - {scrambleNames ? scramble(tu.name) : tu.name} - {/if} - - - {/each} -
LegendUser NameNumber of - -
{scrambleNames ? scramble(tu.id) : tu.id}{tu[topUserSelection.key]}
- {/if} - {/key} + + + + + + + + {#each $topJobsQuery.data.topUser as tu, i} + + + + {#if tu?.name} + {scrambleNames ? scramble(tu.name) : tu.name} + {/if} + + + {/each} +
UserActive Jobs
+ {scrambleNames ? scramble(tu.id) : tu.id} + + {tu['totalJobs']}
- + +

- Top Projects on {cluster.charAt(0).toUpperCase() + cluster.slice(1)} + Top Projects: Jobs

- {#key $topProjectQuery.data} - {#if $topProjectQuery.fetching} - - {:else if $topProjectQuery.error} - {$topProjectQuery.error.message} - {:else} - tp[topProjectSelection.key], - )} - entities={$topProjectQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} - /> - {/if} - {/key} + tp['totalJobs'], + )} + entities={$topJobsQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} + /> - - {#key $topProjectQuery.data} - {#if $topProjectQuery.fetching} - - {:else if $topProjectQuery.error} - {$topProjectQuery.error.message} - {:else} - - - - - - - {#each $topProjectQuery.data.topProjects as tp, i} - - - - - - {/each} -
LegendProject CodeNumber of - -
{scrambleNames ? scramble(tp.id) : tp.id}{tp[topProjectSelection.key]}
- {/if} - {/key} + + + + + + + + {#each $topJobsQuery.data.topProjects as tp, i} + + + + + + {/each} +
ProjectActive Jobs
+ {scrambleNames ? scramble(tp.id) : tp.id} + + {tp['totalJobs']}
+{:else} + Cannot render job status charts: No data! {/if} + +
+ + +{#if $topNodesQuery.fetching || $nodeStatusQuery.fetching} + +{:else if $topNodesQuery.data && $nodeStatusQuery.data} + + + + + +
+

+ Top Users: Nodes +

+ tu['totalNodes'], + )} + entities={$topNodesQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} + /> +
+ + + + + + + + + {#each $topNodesQuery.data.topUser as tu, i} + + + + {#if tu?.name} + {scrambleNames ? scramble(tu.name) : tu.name} + {/if} + + + {/each} +
UserNodes
+ {scrambleNames ? scramble(tu.id) : tu.id} + + {tu['totalNodes']}
+ + + +

+ Top Projects: Nodes +

+ tp['totalNodes'], + )} + entities={$topNodesQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} + /> + + + + + + + + + {#each $topNodesQuery.data.topProjects as tp, i} + + + + + + {/each} +
ProjectNodes
+ {scrambleNames ? scramble(tp.id) : tp.id} + + {tp['totalNodes']}
+ +
+{:else} + Cannot render node status charts: No data! +{/if} + +
+ + +{#if $topAccsQuery.fetching || $nodeStatusQuery.fetching} + +{:else if $topAccsQuery.data && $nodeStatusQuery.data} + + + + + +
+

+ Top Users: GPUs +

+ tu['totalAccs'], + )} + entities={$topAccsQuery.data.topUser.map((tu) => scrambleNames ? scramble(tu.id) : tu.id)} + /> +
+ + + + + + + + + {#each $topAccsQuery.data.topUser as tu, i} + + + + {#if tu?.name} + {scrambleNames ? scramble(tu.name) : tu.name} + {/if} + + + {/each} +
UserGPUs
+ {scrambleNames ? scramble(tu.id) : tu.id} + + {tu['totalAccs']}
+ + + +

+ Top Projects: GPUs +

+ tp['totalAccs'], + )} + entities={$topAccsQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} + /> + + + + + + + + + {#each $topAccsQuery.data.topProjects as tp, i} + + + + + + {/each} +
ProjectGPUs
+ {scrambleNames ? scramble(tp.id) : tp.id} + + {tp['totalAccs']}
+ +
+{:else} + Cannot render accelerator status charts: No data! +{/if} \ No newline at end of file From 44d8254a0bbbe195ba3321b219a5ad51848aa3a4 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Tue, 12 Aug 2025 17:57:04 +0200 Subject: [PATCH 22/40] fix layouting --- web/frontend/src/status/UsageDash.svelte | 30 ++++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/web/frontend/src/status/UsageDash.svelte b/web/frontend/src/status/UsageDash.svelte index 0de7da2..5afde37 100644 --- a/web/frontend/src/status/UsageDash.svelte +++ b/web/frontend/src/status/UsageDash.svelte @@ -198,7 +198,7 @@ {:else if $topJobsQuery.data && $nodeStatusQuery.data} - + - +

Top Users: Jobs @@ -228,7 +228,7 @@ />

- + @@ -256,7 +256,7 @@
- +

Top Projects: Jobs

@@ -271,7 +271,7 @@ entities={$topJobsQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} /> - + @@ -303,7 +303,7 @@ {:else if $topNodesQuery.data && $nodeStatusQuery.data} - + - +

Top Users: Nodes @@ -331,7 +331,7 @@ />

- +
@@ -359,7 +359,7 @@
- +

Top Projects: Nodes

@@ -374,7 +374,7 @@ entities={$topNodesQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} /> - + @@ -406,7 +406,7 @@ {:else if $topAccsQuery.data && $nodeStatusQuery.data} - + - +

Top Users: GPUs @@ -434,7 +434,7 @@ />

- +
@@ -462,7 +462,7 @@
- +

Top Projects: GPUs

@@ -477,7 +477,7 @@ entities={$topAccsQuery.data.topProjects.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)} /> - + From 58ae476a3e3bab322055f45ed80600dbab7dc8bd Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 13 Aug 2025 14:22:24 +0200 Subject: [PATCH 23/40] move and add interface options for status tabs --- web/frontend/src/Status.root.svelte | 47 ++-------- web/frontend/src/User.root.svelte | 1 + .../generic/select/HistogramSelection.svelte | 8 +- web/frontend/src/status.entrypoint.js | 2 +- web/frontend/src/status/StatisticsDash.svelte | 65 ++++++-------- web/frontend/src/status/StatusDash.svelte | 47 +++++----- web/frontend/src/status/UsageDash.svelte | 86 ++++++++++++++----- 7 files changed, 126 insertions(+), 130 deletions(-) diff --git a/web/frontend/src/Status.root.svelte b/web/frontend/src/Status.root.svelte index f0336f9..e28af8e 100644 --- a/web/frontend/src/Status.root.svelte +++ b/web/frontend/src/Status.root.svelte @@ -2,7 +2,7 @@ @component Main cluster status view component; renders current system-usage information Properties: - - `cluster String`: The cluster to show status information for + - `presetCluster String`: The cluster to show status information for --> - + -

Current utilization of cluster "{cluster}"

- - - { - from = new Date(Date.now() - 5 * 60 * 1000); - to = new Date(Date.now()); - }} - /> +

Current Status of Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"

- - - + - + - + diff --git a/web/frontend/src/User.root.svelte b/web/frontend/src/User.root.svelte index c1f0fb8..f675a0d 100644 --- a/web/frontend/src/User.root.svelte +++ b/web/frontend/src/User.root.svelte @@ -404,6 +404,7 @@ cluster={selectedCluster} bind:isOpen={isHistogramSelectionOpen} presetSelectedHistograms={selectedHistograms} + configName="user_view_histogramMetrics" applyChange={(newSelection) => { selectedHistogramsBuffer[selectedCluster || 'all'] = [...newSelection]; }} diff --git a/web/frontend/src/generic/select/HistogramSelection.svelte b/web/frontend/src/generic/select/HistogramSelection.svelte index 0468efd..a424ef4 100644 --- a/web/frontend/src/generic/select/HistogramSelection.svelte +++ b/web/frontend/src/generic/select/HistogramSelection.svelte @@ -3,8 +3,9 @@ Properties: - `cluster String`: Currently selected cluster - - `selectedHistograms [String]`: The currently selected metrics to display as histogram - `ìsOpen Bool`: Is selection opened [Bindable] + - `configName String`: The config id string to be updated in database on selection change + - `presetSelectedHistograms [String]`: The currently selected metrics to display as histogram - `applyChange Func`: The callback function to apply current selection --> @@ -25,6 +26,7 @@ let { cluster, isOpen = $bindable(), + configName, presetSelectedHistograms, applyChange } = $props(); @@ -67,8 +69,8 @@ applyChange(selectedHistograms) updateConfiguration({ name: cluster - ? `user_view_histogramMetrics:${cluster}` - : "user_view_histogramMetrics", + ? `${configName}:${cluster}` + : configName, value: selectedHistograms, }); } diff --git a/web/frontend/src/status.entrypoint.js b/web/frontend/src/status.entrypoint.js index 3e45cb7..c3407c1 100644 --- a/web/frontend/src/status.entrypoint.js +++ b/web/frontend/src/status.entrypoint.js @@ -5,7 +5,7 @@ import Status from './Status.root.svelte' mount(Status, { target: document.getElementById('svelte-app'), props: { - cluster: infos.cluster, + presetCluster: infos.cluster, }, context: new Map([ ['cc-config', clusterCockpitConfig] diff --git a/web/frontend/src/status/StatisticsDash.svelte b/web/frontend/src/status/StatisticsDash.svelte index e573554..8523c80 100644 --- a/web/frontend/src/status/StatisticsDash.svelte +++ b/web/frontend/src/status/StatisticsDash.svelte @@ -2,7 +2,7 @@ @component Main cluster status view component; renders current system-usage information Properties: - - `cluster String`: The cluster to show status information for + - `presetCluster String`: The cluster to show status information for --> - -
+ + + + { + from = new Date(Date.now() - (30 * 24 * 60 * 60 * 1000)); // Triggers GQL + to = new Date(Date.now()); + }} + /> + + {#if $initq.fetching || $metricStatusQuery.fetching} @@ -168,6 +152,7 @@ {cluster} bind:isOpen={isHistogramSelectionOpen} presetSelectedHistograms={selectedHistograms} + configName="status_view_selectedHistograms" applyChange={(newSelection) => { selectedHistograms = [...newSelection]; }} diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index 44a0ab4..280b04b 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -2,7 +2,7 @@ @component Main cluster status view component; renders current system-usage information Properties: - - `cluster String`: The cluster to show status information for + - `presetCluster String`: The cluster to show status information for --> + + + + { + from = new Date(Date.now() - 5 * 60 * 1000); + to = new Date(Date.now()); + }} + /> + + + +
+ {#if $initq.data && $nodesStateCounts.data} diff --git a/web/frontend/src/status/UsageDash.svelte b/web/frontend/src/status/UsageDash.svelte index 5afde37..16575e4 100644 --- a/web/frontend/src/status/UsageDash.svelte +++ b/web/frontend/src/status/UsageDash.svelte @@ -2,7 +2,7 @@ @component Main cluster status view component; renders current system-usage information Properties: - - `cluster String`: The cluster to show status information for + - `presetCluster String`: The cluster to show status information for --> + + +
+ + + + + + Duration Bin Size + + + {#each durationBinOptions as dbin} + + {/each} + + + + + { + from = new Date(Date.now() - (30 * 24 * 60 * 60 * 1000)); // Triggers GQL + to = new Date(Date.now()); + }} + /> + + + +
+ {#if $topJobsQuery.fetching || $nodeStatusQuery.fetching} {:else if $topJobsQuery.data && $nodeStatusQuery.data}
- + {#key $nodeStatusQuery.data.jobsStatistics[0].histDuration} + + {/key}
@@ -233,7 +277,7 @@
- + {#each $topJobsQuery.data.topUser as tu, i} @@ -276,7 +320,7 @@ - + {#each $topJobsQuery.data.topProjects as tp, i} From 19a75554b0fec818ab4a77cd38021e6a08280fa0 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 13 Aug 2025 14:23:19 +0200 Subject: [PATCH 24/40] remove outdated components --- web/frontend/src/status/DevelDash.svelte | 468 ----------------------- web/frontend/src/status/NodeDash.svelte | 127 ------ 2 files changed, 595 deletions(-) delete mode 100644 web/frontend/src/status/DevelDash.svelte delete mode 100644 web/frontend/src/status/NodeDash.svelte diff --git a/web/frontend/src/status/DevelDash.svelte b/web/frontend/src/status/DevelDash.svelte deleted file mode 100644 index 17426fc..0000000 --- a/web/frontend/src/status/DevelDash.svelte +++ /dev/null @@ -1,468 +0,0 @@ - - - - - -{#if $initq.data && $jobRoofQuery.data} - {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} - - - Bubble Node -
- {#key $nodesData?.data?.nodeMetrics || $nodesJobs?.data?.jobs} - {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( - (data) => data.subCluster == subCluster.name, - ).length} Jobs - data.subCluster == subCluster.name, - ) - )} - nodesData={transformNodesStatsToInfo($nodesData?.data?.nodeMetrics.filter( - (data) => data.subCluster == subCluster.name, - ) - )} - /> - {/key} -
- - - Bubble Jobs -
- {#key $jobRoofQuery.data.jobsMetricStats} - {subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter( - (data) => data.subCluster == subCluster.name, - ).length} Jobs - data.subCluster == subCluster.name, - ) - )} - jobsData={transformJobsStatsToInfo($jobRoofQuery?.data?.jobsMetricStats.filter( - (data) => data.subCluster == subCluster.name, - ) - )} - /> - {/key} -
- - - {/each} -{/if} - -
-
- -{#if $initq.data && $nodesStateCounts.data} - - - Node State -
- {#key refinedStateData} - Total: {refinedStateData.reduce((sum, item) => { - return sum + item.count; - }, 0)} Nodes - - sd.count, - )} - entities={refinedStateData.map( - (sd) => sd.state, - )} - /> - {/key} -
- - - {#key refinedStateData} -
UserActive JobsJobs
ProjectActive JobsJobs
- - - - - - {#each refinedStateData as sd, i} - - - - - - {/each} -
LegendCurrent State#Nodes
{sd.state}{sd.count}
- {/key} - - - - Node Health -
- {#key refinedHealthData} - Total: {refinedStateData.reduce((sum, item) => { - return sum + item.count; - }, 0)} Nodes - - sd.count, - )} - entities={refinedHealthData.map( - (sd) => sd.state, - )} - /> - {/key} -
- - - {#key refinedHealthData} - - - - - - - {#each refinedHealthData as hd, i} - - - - - - {/each} -
LegendCurrent Health#Nodes
{hd.state}{hd.count}
- {/key} - -
-{/if} diff --git a/web/frontend/src/status/NodeDash.svelte b/web/frontend/src/status/NodeDash.svelte deleted file mode 100644 index 29a3cf8..0000000 --- a/web/frontend/src/status/NodeDash.svelte +++ /dev/null @@ -1,127 +0,0 @@ - - - - -{#if $initq.data && $nodeStatusQuery.data} - - - - {#key $nodeStatusQuery.data.jobsStatistics} - - {/key} - - - {#key $nodeStatusQuery.data.jobsStatistics} - - {/key} - - - - - {#key $nodeStatusQuery.data.jobsStatistics} - - {/key} - - - {#key $nodeStatusQuery.data.jobsStatistics} - - {/key} - - -{/if} - - From bca176170c114b1492a0ea4c900ac3640fd10b39 Mon Sep 17 00:00:00 2001 From: Aditya Ujeniya Date: Wed, 3 Sep 2025 08:22:15 +0200 Subject: [PATCH 25/40] Migration SQL fix --- cmd/cc-backend/main.go | 18 ++- cmd/cc-backend/server.go | 4 + go.mod | 2 + go.sum | 6 + internal/config/config.go | 10 ++ internal/importer/initDB.go | 4 + internal/memorystore/checkpoint.go | 2 +- internal/memorystore/memorystore.go | 115 +++++++++++++++--- internal/memorystore/stats.go | 5 +- internal/repository/jobCreate.go | 16 +-- .../sqlite3/09_add-job-cache.up.sql | 19 ++- internal/taskManager/taskManager.go | 7 +- var/._job-archive | Bin 0 -> 163 bytes 13 files changed, 172 insertions(+), 36 deletions(-) create mode 100755 var/._job-archive diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 56018c3..0790a0b 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -18,6 +18,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/importer" + "github.com/ClusterCockpit/cc-backend/internal/memorystore" "github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/tagger" @@ -96,6 +97,12 @@ func main() { } else { cclog.Abort("Cluster configuration must be present") } + + if mscfg := ccconf.GetPackageConfig("metric-store"); mscfg != nil { + config.InitMetricStore(mscfg) + } else { + cclog.Abort("Metric Store configuration must be present") + } } else { cclog.Abort("Main configuration must be present") } @@ -201,7 +208,7 @@ func main() { if archiveCfg := ccconf.GetPackageConfig("archive"); archiveCfg != nil { err = archive.Init(archiveCfg, config.Keys.DisableArchive) } else { - err = archive.Init(json.RawMessage(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`), config.Keys.DisableArchive) + err = archive.Init(json.RawMessage("{\"kind\":\"file\",\"path\":\"./var/job-archive\"}"), config.Keys.DisableArchive) } if err != nil { cclog.Abortf("Init: Failed to initialize archive.\nError: %s\n", err.Error()) @@ -241,10 +248,15 @@ func main() { cclog.Exit("No errors, server flag not set. Exiting cc-backend.") } + //Metric Store starts after all flags have been processes + memorystore.Init() + archiver.Start(repository.GetJobRepository()) - taskManager.Start(ccconf.GetPackageConfig("cron"), - ccconf.GetPackageConfig("archive")) + // // Comment out + // taskManager.Start(ccconf.GetPackageConfig("cron"), + // ccconf.GetPackageConfig("archive")) + serverInit() var wg sync.WaitGroup diff --git a/cmd/cc-backend/server.go b/cmd/cc-backend/server.go index 3983268..537270d 100644 --- a/cmd/cc-backend/server.go +++ b/cmd/cc-backend/server.go @@ -26,6 +26,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/graph" "github.com/ClusterCockpit/cc-backend/internal/graph/generated" + "github.com/ClusterCockpit/cc-backend/internal/memorystore" "github.com/ClusterCockpit/cc-backend/internal/routerConfig" "github.com/ClusterCockpit/cc-backend/web" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" @@ -325,6 +326,9 @@ func serverShutdown() { // First shut down the server gracefully (waiting for all ongoing requests) server.Shutdown(context.Background()) + //Archive all the metric store data + memorystore.Shutdown() + // Then, wait for any async archivings still pending... archiver.WaitForArchiving() } diff --git a/go.mod b/go.mod index 554ea56..5858cff 100644 --- a/go.mod +++ b/go.mod @@ -51,6 +51,7 @@ require ( github.com/go-openapi/spec v0.21.0 // indirect github.com/go-openapi/swag v0.23.1 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect + github.com/golang/snappy v0.0.4 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/securecookie v1.1.2 // indirect github.com/gorilla/websocket v1.5.3 // indirect @@ -63,6 +64,7 @@ require ( github.com/json-iterator/go v1.1.12 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect + github.com/linkedin/goavro/v2 v2.14.0 // indirect github.com/mailru/easyjson v0.9.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect diff --git a/go.sum b/go.sum index 6f61908..3c51770 100644 --- a/go.sum +++ b/go.sum @@ -91,6 +91,9 @@ github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeD github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang-migrate/migrate/v4 v4.18.2 h1:2VSCMz7x7mjyTXx3m2zPokOY82LTRgxK1yQYKo6wWQ8= github.com/golang-migrate/migrate/v4 v4.18.2/go.mod h1:2CM6tJvn2kqPXwnXO/d3rAQYiyoIm180VsO8PRX6Rpk= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -166,6 +169,8 @@ github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6Fm github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/linkedin/goavro/v2 v2.14.0 h1:aNO/js65U+Mwq4yB5f1h01c3wiM458qtRad1DN0CMUI= +github.com/linkedin/goavro/v2 v2.14.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk= github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= @@ -233,6 +238,7 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= diff --git a/internal/config/config.go b/internal/config/config.go index 7332941..74ee9b0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -9,6 +9,7 @@ import ( "encoding/json" "time" + "github.com/ClusterCockpit/cc-backend/internal/memorystore" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" ) @@ -166,3 +167,12 @@ func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) { cclog.Abort("Config Init: At least one cluster required in config. Exited with error.") } } + +func InitMetricStore(msConfig json.RawMessage) { + // Validate(msConfigSchema, msConfig) + dec := json.NewDecoder(bytes.NewReader(msConfig)) + dec.DisallowUnknownFields() + if err := dec.Decode(&memorystore.Keys); err != nil { + cclog.Abortf("Metric Store Config Init: Could not decode config file '%s'.\nError: %s\n", msConfig, err.Error()) + } +} diff --git a/internal/importer/initDB.go b/internal/importer/initDB.go index 179c21c..79879b2 100644 --- a/internal/importer/initDB.go +++ b/internal/importer/initDB.go @@ -142,6 +142,10 @@ func InitDB() error { continue } + if jobMeta.Shared == "" { + jobMeta.Shared = "none" + } + id, err := r.TransactionAddNamed(t, repository.NamedJobInsert, jobMeta) if err != nil { diff --git a/internal/memorystore/checkpoint.go b/internal/memorystore/checkpoint.go index ecd6fb1..80a048b 100644 --- a/internal/memorystore/checkpoint.go +++ b/internal/memorystore/checkpoint.go @@ -19,7 +19,7 @@ import ( "sync/atomic" "time" - "github.com/ClusterCockpit/cc-backend/pkg/avro" + "github.com/ClusterCockpit/cc-backend/internal/avro" "github.com/ClusterCockpit/cc-lib/util" "github.com/linkedin/goavro/v2" ) diff --git a/internal/memorystore/memorystore.go b/internal/memorystore/memorystore.go index 7659a89..76079d4 100644 --- a/internal/memorystore/memorystore.go +++ b/internal/memorystore/memorystore.go @@ -2,16 +2,18 @@ package memorystore import ( "context" + "encoding/json" "errors" + "fmt" "log" "runtime" "sync" "time" - "github.com/ClusterCockpit/cc-backend/pkg/avro" + "github.com/ClusterCockpit/cc-backend/internal/avro" "github.com/ClusterCockpit/cc-lib/resampler" + "github.com/ClusterCockpit/cc-lib/schema" "github.com/ClusterCockpit/cc-lib/util" - "github.com/ClusterCockpit/cc-metric-store/internal/config" ) var ( @@ -29,20 +31,101 @@ func init() { } } +// For aggregation over multiple values at different cpus/sockets/..., not time! +type AggregationStrategy int + +const ( + NoAggregation AggregationStrategy = iota + SumAggregation + AvgAggregation +) + +func (as *AggregationStrategy) UnmarshalJSON(data []byte) error { + var str string + if err := json.Unmarshal(data, &str); err != nil { + return err + } + + switch str { + case "": + *as = NoAggregation + case "sum": + *as = SumAggregation + case "avg": + *as = AvgAggregation + default: + return fmt.Errorf("invalid aggregation strategy: %#v", str) + } + return nil +} + +type MetricConfig struct { + // Interval in seconds at which measurements will arive. + Frequency int64 `json:"frequency"` + + // Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy. + Aggregation AggregationStrategy `json:"aggregation"` + + // Private, used internally... + Offset int +} + type Metric struct { Name string Value util.Float - MetricConfig config.MetricConfig + MetricConfig MetricConfig } type MemoryStore struct { - Metrics map[string]config.MetricConfig + Metrics map[string]MetricConfig root Level } +func Init() { + startupTime := time.Now() + + //Pass the keys from cluster config + InitMetrics() + + ms := GetMemoryStore() + + d, err := time.ParseDuration(Keys.Checkpoints.Restore) + if err != nil { + log.Fatal(err) + } + + restoreFrom := startupTime.Add(-d) + log.Printf("Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339)) + files, err := ms.FromCheckpointFiles(Keys.Checkpoints.RootDir, restoreFrom.Unix()) + loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB + if err != nil { + log.Fatalf("Loading checkpoints failed: %s\n", err.Error()) + } else { + log.Printf("Checkpoints loaded (%d files, %d MB, that took %fs)\n", files, loadedData, time.Since(startupTime).Seconds()) + } + + // Try to use less memory by forcing a GC run here and then + // lowering the target percentage. The default of 100 means + // that only once the ratio of new allocations execeds the + // previously active heap, a GC is triggered. + // Forcing a GC here will set the "previously active heap" + // to a minumum. + runtime.GC() + + ctx, _ := context.WithCancel(context.Background()) + + var wg sync.WaitGroup + wg.Add(4) + + Retention(&wg, ctx) + Checkpointing(&wg, ctx) + Archiving(&wg, ctx) + avro.DataStaging(&wg, ctx) +} + // Create a new, initialized instance of a MemoryStore. // Will panic if values in the metric configurations are invalid. -func Init(metrics map[string]config.MetricConfig) { +func InitMetrics(metrics map[string]MetricConfig) { singleton.Do(func() { offset := 0 for key, cfg := range metrics { @@ -50,7 +133,7 @@ func Init(metrics map[string]config.MetricConfig) { panic("invalid frequency") } - metrics[key] = config.MetricConfig{ + metrics[key] = MetricConfig{ Frequency: cfg.Frequency, Aggregation: cfg.Aggregation, Offset: offset, @@ -77,16 +160,16 @@ func GetMemoryStore() *MemoryStore { } func Shutdown() { - log.Printf("Writing to '%s'...\n", config.Keys.Checkpoints.RootDir) + log.Printf("Writing to '%s'...\n", Keys.Checkpoints.RootDir) var files int var err error ms := GetMemoryStore() - if config.Keys.Checkpoints.FileFormat == "json" { - files, err = ms.ToCheckpoint(config.Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix()) + if Keys.Checkpoints.FileFormat == "json" { + files, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix()) } else { - files, err = avro.GetAvroStore().ToCheckpoint(config.Keys.Checkpoints.RootDir, true) + files, err = avro.GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, true) close(avro.LineProtocolMessages) } @@ -172,7 +255,7 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) { go func() { defer wg.Done() - d, err := time.ParseDuration(config.Keys.RetentionInMemory) + d, err := time.ParseDuration(Keys.RetentionInMemory) if err != nil { log.Fatal(err) } @@ -261,7 +344,7 @@ func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metric // If the level does not hold the metric itself, the data will be aggregated recursively from the children. // The second and third return value are the actual from/to for the data. Those can be different from // the range asked for if no data was available. -func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]util.Float, int64, int64, int64, error) { +func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) { if from > to { return nil, 0, 0, 0, errors.New("invalid time range") } @@ -271,7 +354,7 @@ func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, reso return nil, 0, 0, 0, errors.New("unkown metric: " + metric) } - n, data := 0, make([]util.Float, (to-from)/minfo.Frequency+1) + n, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1) err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error { cdata, cfrom, cto, err := b.read(from, to, data) @@ -309,12 +392,12 @@ func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, reso } else if n == 0 { return nil, 0, 0, 0, errors.New("metric or host not found") } else if n > 1 { - if minfo.Aggregation == config.AvgAggregation { - normalize := 1. / util.Float(n) + if minfo.Aggregation == AvgAggregation { + normalize := 1. / schema.Float(n) for i := 0; i < len(data); i++ { data[i] *= normalize } - } else if minfo.Aggregation != config.SumAggregation { + } else if minfo.Aggregation != SumAggregation { return nil, 0, 0, 0, errors.New("invalid aggregation") } } diff --git a/internal/memorystore/stats.go b/internal/memorystore/stats.go index 6682d62..831e282 100644 --- a/internal/memorystore/stats.go +++ b/internal/memorystore/stats.go @@ -5,7 +5,6 @@ import ( "math" "github.com/ClusterCockpit/cc-lib/util" - "github.com/ClusterCockpit/cc-metric-store/internal/config" ) type Stats struct { @@ -105,9 +104,9 @@ func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int6 return nil, 0, 0, ErrNoData } - if minfo.Aggregation == config.AvgAggregation { + if minfo.Aggregation == AvgAggregation { avg /= util.Float(n) - } else if n > 1 && minfo.Aggregation != config.SumAggregation { + } else if n > 1 && minfo.Aggregation != SumAggregation { return nil, 0, 0, errors.New("invalid aggregation") } diff --git a/internal/repository/jobCreate.go b/internal/repository/jobCreate.go index aa2ea76..666313f 100644 --- a/internal/repository/jobCreate.go +++ b/internal/repository/jobCreate.go @@ -14,19 +14,19 @@ import ( ) const NamedJobCacheInsert string = `INSERT INTO job_cache ( - job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, - exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data + job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, + shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data ) VALUES ( - :job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc, - :exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data + :job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc, + :shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data );` const NamedJobInsert string = `INSERT INTO job ( - job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, - exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data + job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, + shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data ) VALUES ( - :job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc, - :exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data + :job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc, + :shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data );` func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) { diff --git a/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql b/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql index 003eab0..2c25029 100644 --- a/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql +++ b/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql @@ -3,7 +3,7 @@ CREATE TABLE "job_cache" ( job_id BIGINT NOT NULL, hpc_cluster VARCHAR(255) NOT NULL, subcluster VARCHAR(255) NOT NULL, - submit_time BIGINT NOT NULL, -- Unix timestamp + submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp hpc_user VARCHAR(255) NOT NULL, project VARCHAR(255) NOT NULL, @@ -30,7 +30,7 @@ CREATE TABLE "job_cache" ( energy REAL NOT NULL DEFAULT 0.0, energy_footprint TEXT DEFAULT NULL, footprint TEXT DEFAULT NULL, - UNIQUE (job_id, cluster, start_time) + UNIQUE (job_id, hpc_cluster, start_time) ); CREATE TABLE "job_new" ( @@ -65,10 +65,21 @@ CREATE TABLE "job_new" ( energy REAL NOT NULL DEFAULT 0.0, energy_footprint TEXT DEFAULT NULL, footprint TEXT DEFAULT NULL, - UNIQUE (job_id, cluster, start_time) + UNIQUE (job_id, hpc_cluster, start_time) ); ALTER TABLE job RENAME COLUMN cluster TO hpc_cluster; -INSERT INTO job_new SELECT * FROM job; +INSERT INTO job_new ( + id, job_id, hpc_cluster, subcluster, submit_time, start_time, hpc_user, project, + cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, + num_nodes, num_hwthreads, num_acc, smt, shared, monitoring_status, energy, + energy_footprint, footprint +) +SELECT + id, job_id, hpc_cluster, subcluster, 0, start_time, hpc_user, project, + cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, + num_nodes, num_hwthreads, num_acc, smt, exclusive, monitoring_status, energy, + energy_footprint, footprint +FROM job; DROP TABLE job; ALTER TABLE job_new RENAME TO job; diff --git a/internal/taskManager/taskManager.go b/internal/taskManager/taskManager.go index 7231d12..df6c4d0 100644 --- a/internal/taskManager/taskManager.go +++ b/internal/taskManager/taskManager.go @@ -7,6 +7,7 @@ package taskManager import ( "bytes" "encoding/json" + "fmt" "time" "github.com/ClusterCockpit/cc-backend/internal/auth" @@ -65,10 +66,14 @@ func Start(cronCfg, archiveConfig json.RawMessage) { RegisterStopJobsExceedTime() } + fmt.Printf("Keys : %#v\n", Keys) + fmt.Printf("cronCfg : %#v\n", cronCfg) + fmt.Printf("archiveConfig : %#v\n", archiveConfig) + dec := json.NewDecoder(bytes.NewReader(cronCfg)) dec.DisallowUnknownFields() if err := dec.Decode(&Keys); err != nil { - cclog.Errorf("error while decoding ldap config: %v", err) + cclog.Errorf("error while decoding cron config: %v", err) } var cfg struct { diff --git a/var/._job-archive b/var/._job-archive new file mode 100755 index 0000000000000000000000000000000000000000..9d11b52bb7ed13ffc4799b7e3bcb26eb2c0b9b7a GIT binary patch literal 163 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S oBE&_L^K Date: Mon, 8 Sep 2025 11:29:27 +0200 Subject: [PATCH 26/40] Combined metricstore api and functions --- cmd/cc-backend/main.go | 6 +- cmd/cc-backend/server.go | 10 + configs/config-demo.json | 27 +- configs/config.json | 4 + internal/api/rest.go | 15 + internal/auth/auth.go | 36 ++ internal/avro/avroCheckpoint.go | 9 +- internal/avro/avroHelper.go | 5 +- internal/avro/avroStruct.go | 14 +- internal/config/config.go | 12 +- internal/config/memorystore.go | 128 ++++++ internal/config/schema.go | 2 +- internal/memorystore/api.go | 419 ++++++++++++++++++++ internal/memorystore/archive.go | 17 +- internal/memorystore/buffer.go | 22 +- internal/memorystore/checkpoint.go | 81 ++-- internal/memorystore/config.go | 26 -- internal/memorystore/debug.go | 2 +- internal/memorystore/healthcheck.go | 2 +- internal/memorystore/lineprotocol.go | 349 ++++++++++++++++ internal/memorystore/memorystore.go | 139 +++---- internal/memorystore/stats.go | 5 +- internal/metricDataDispatcher/dataLoader.go | 6 +- internal/metricdata/cc-metric-store.go | 325 +++++---------- internal/metricdata/utils.go | 3 +- pkg/archive/clusterConfig.go | 14 + 26 files changed, 1248 insertions(+), 430 deletions(-) create mode 100644 internal/config/memorystore.go create mode 100644 internal/memorystore/api.go delete mode 100644 internal/memorystore/config.go create mode 100644 internal/memorystore/lineprotocol.go diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 0790a0b..9c7ad1f 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -248,8 +248,10 @@ func main() { cclog.Exit("No errors, server flag not set. Exiting cc-backend.") } + var wg sync.WaitGroup + //Metric Store starts after all flags have been processes - memorystore.Init() + memorystore.Init(wg) archiver.Start(repository.GetJobRepository()) @@ -259,8 +261,6 @@ func main() { serverInit() - var wg sync.WaitGroup - wg.Add(1) go func() { defer wg.Done() diff --git a/cmd/cc-backend/server.go b/cmd/cc-backend/server.go index 537270d..18d7ea5 100644 --- a/cmd/cc-backend/server.go +++ b/cmd/cc-backend/server.go @@ -119,6 +119,7 @@ func serverInit() { userapi := router.PathPrefix("/userapi").Subrouter() configapi := router.PathPrefix("/config").Subrouter() frontendapi := router.PathPrefix("/frontend").Subrouter() + metricstoreapi := router.PathPrefix("/metricstore").Subrouter() if !config.Keys.DisableAuthentication { router.Handle("/login", authHandle.Login( @@ -199,6 +200,14 @@ func serverInit() { onFailureResponse) }) + metricstoreapi.Use(func(next http.Handler) http.Handler { + return authHandle.AuthMetricStoreApi( + // On success; + next, + // On failure: JSON Response + onFailureResponse) + }) + configapi.Use(func(next http.Handler) http.Handler { return authHandle.AuthConfigApi( // On success; @@ -232,6 +241,7 @@ func serverInit() { routerConfig.SetupRoutes(secured, buildInfo) apiHandle.MountApiRoutes(securedapi) apiHandle.MountUserApiRoutes(userapi) + apiHandle.MountMetricStoreApiRoutes(metricstoreapi) apiHandle.MountConfigApiRoutes(configapi) apiHandle.MountFrontendApiRoutes(frontendapi) diff --git a/configs/config-demo.json b/configs/config-demo.json index d388d78..a31d65d 100644 --- a/configs/config-demo.json +++ b/configs/config-demo.json @@ -9,6 +9,10 @@ "apiAllowedIPs": ["*"], "emission-constant": 317 }, + "archive": { + "kind": "file", + "path": "./var/job-archive" + }, "auth": { "jwts": { "max-age": "2000h" @@ -18,9 +22,7 @@ { "name": "fritz", "metricDataRepository": { - "kind": "cc-metric-store", - "url": "http://localhost:8082", - "token": "" + "kind": "cc-metric-store" }, "filterRanges": { "numNodes": { @@ -40,9 +42,7 @@ { "name": "alex", "metricDataRepository": { - "kind": "cc-metric-store", - "url": "http://localhost:8082", - "token": "" + "kind": "cc-metric-store" }, "filterRanges": { "numNodes": { @@ -59,5 +59,18 @@ } } } - ] + ], + "metric-store": { + "checkpoints": { + "file-format": "avro", + "interval": "2h", + "directory": "./var/checkpoints", + "restore": "48h" + }, + "archive": { + "interval": "48h", + "directory": "./var/archive" + }, + "retention-in-memory": "48h" + } } diff --git a/configs/config.json b/configs/config.json index 27c4ce2..ed7d546 100644 --- a/configs/config.json +++ b/configs/config.json @@ -13,6 +13,10 @@ "resolutions": [600, 300, 120, 60] } }, + "archive": { + "kind": "file", + "path": "./var/job-archive" + }, "clusters": [ { "name": "test", diff --git a/internal/api/rest.go b/internal/api/rest.go index e4411a4..8cefe48 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -15,6 +15,7 @@ import ( "github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/config" + "github.com/ClusterCockpit/cc-backend/internal/memorystore" "github.com/ClusterCockpit/cc-backend/internal/repository" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" "github.com/ClusterCockpit/cc-lib/schema" @@ -95,6 +96,20 @@ func (api *RestApi) MountUserApiRoutes(r *mux.Router) { r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet) } +func (api *RestApi) MountMetricStoreApiRoutes(r *mux.Router) { + r.StrictSlash(true) + // REST API Uses TokenAuth + r.HandleFunc("/api/free", memorystore.HandleFree).Methods(http.MethodPost) + r.HandleFunc("/api/write", memorystore.HandleWrite).Methods(http.MethodPost) + r.HandleFunc("/api/debug", memorystore.HandleDebug).Methods(http.MethodGet) + r.HandleFunc("/api/healthcheck", memorystore.HandleHealthCheck).Methods(http.MethodGet) + // Refactor + r.HandleFunc("/api/free/", memorystore.HandleFree).Methods(http.MethodPost) + r.HandleFunc("/api/write/", memorystore.HandleWrite).Methods(http.MethodPost) + r.HandleFunc("/api/debug/", memorystore.HandleDebug).Methods(http.MethodGet) + r.HandleFunc("/api/healthcheck/", memorystore.HandleHealthCheck).Methods(http.MethodGet) +} + func (api *RestApi) MountConfigApiRoutes(r *mux.Router) { r.StrictSlash(true) // Settings Frontend Uses SessionAuth diff --git a/internal/auth/auth.go b/internal/auth/auth.go index 6564878..5a80f7c 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -417,6 +417,42 @@ func (auth *Authentication) AuthUserApi( }) } +func (auth *Authentication) AuthMetricStoreApi( + onsuccess http.Handler, + onfailure func(rw http.ResponseWriter, r *http.Request, authErr error), +) http.Handler { + return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + user, err := auth.JwtAuth.AuthViaJWT(rw, r) + if err != nil { + cclog.Infof("auth metricstore api -> authentication failed: %s", err.Error()) + onfailure(rw, r, err) + return + } + + if user != nil { + switch { + case len(user.Roles) == 1: + if user.HasRole(schema.RoleApi) { + ctx := context.WithValue(r.Context(), repository.ContextUserKey, user) + onsuccess.ServeHTTP(rw, r.WithContext(ctx)) + return + } + case len(user.Roles) >= 2: + if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) { + ctx := context.WithValue(r.Context(), repository.ContextUserKey, user) + onsuccess.ServeHTTP(rw, r.WithContext(ctx)) + return + } + default: + cclog.Info("auth metricstore api -> authentication failed: missing role") + onfailure(rw, r, errors.New("unauthorized")) + } + } + cclog.Info("auth metricstore api -> authentication failed: no auth") + onfailure(rw, r, errors.New("unauthorized")) + }) +} + func (auth *Authentication) AuthConfigApi( onsuccess http.Handler, onfailure func(rw http.ResponseWriter, r *http.Request, authErr error), diff --git a/internal/avro/avroCheckpoint.go b/internal/avro/avroCheckpoint.go index 4a3cf19..4d72d36 100644 --- a/internal/avro/avroCheckpoint.go +++ b/internal/avro/avroCheckpoint.go @@ -19,7 +19,8 @@ import ( "sync/atomic" "time" - "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-backend/internal/config" + "github.com/ClusterCockpit/cc-lib/schema" "github.com/linkedin/goavro/v2" ) @@ -139,7 +140,7 @@ func getTimestamp(dir string) int64 { } } - interval, _ := time.ParseDuration(Keys.Checkpoints.Interval) + interval, _ := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Interval) updateTime := time.Unix(maxTs, 0).Add(interval).Add(time.Duration(CheckpointBufferMinutes-1) * time.Minute).Unix() if updateTime < time.Now().Unix() { @@ -408,7 +409,7 @@ func compareSchema(schemaRead, schemaGen string) (bool, string, error) { return true, string(mergedSchemaJson), nil } -func generateSchema(data map[string]util.Float) (string, error) { +func generateSchema(data map[string]schema.Float) (string, error) { // Define the Avro schema structure schema := map[string]interface{}{ "type": "record", @@ -440,7 +441,7 @@ func generateSchema(data map[string]util.Float) (string, error) { return string(schemaString), nil } -func generateRecord(data map[string]util.Float) map[string]interface{} { +func generateRecord(data map[string]schema.Float) map[string]interface{} { record := make(map[string]interface{}) // Iterate through each map in data diff --git a/internal/avro/avroHelper.go b/internal/avro/avroHelper.go index ee09759..ea733cd 100644 --- a/internal/avro/avroHelper.go +++ b/internal/avro/avroHelper.go @@ -6,13 +6,14 @@ import ( "strconv" "sync" + "github.com/ClusterCockpit/cc-backend/internal/config" ) func DataStaging(wg *sync.WaitGroup, ctx context.Context) { // AvroPool is a pool of Avro writers. go func() { - if Keys.Checkpoints.FileFormat == "json" { + if config.MetricStoreKeys.Checkpoints.FileFormat == "json" { wg.Done() // Mark this goroutine as done return // Exit the goroutine } @@ -28,7 +29,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) { return case val := <-LineProtocolMessages: //Fetch the frequency of the metric from the global configuration - freq, err := Keys.GetMetricFrequency(val.MetricName) + freq, err := config.MetricStoreKeys.GetMetricFrequency(val.MetricName) if err != nil { fmt.Printf("Error fetching metric frequency: %s\n", err) continue diff --git a/internal/avro/avroStruct.go b/internal/avro/avroStruct.go index 27aac47..ee65291 100644 --- a/internal/avro/avroStruct.go +++ b/internal/avro/avroStruct.go @@ -3,7 +3,7 @@ package avro import ( "sync" - "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-lib/schema" ) var ( @@ -20,7 +20,7 @@ type AvroStruct struct { Cluster string Node string Selector []string - Value util.Float + Value schema.Float Timestamp int64 } @@ -32,7 +32,7 @@ var avroStore AvroStore type AvroLevel struct { children map[string]*AvroLevel - data map[int64]map[string]util.Float + data map[int64]map[string]schema.Float lock sync.RWMutex } @@ -81,7 +81,7 @@ func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel { } child = &AvroLevel{ - data: make(map[int64]map[string]util.Float, 0), + data: make(map[int64]map[string]schema.Float, 0), children: nil, } @@ -94,7 +94,7 @@ func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel { return child.findAvroLevelOrCreate(selector[1:]) } -func (l *AvroLevel) addMetric(metricName string, value util.Float, timestamp int64, Freq int) { +func (l *AvroLevel) addMetric(metricName string, value schema.Float, timestamp int64, Freq int) { l.lock.Lock() defer l.lock.Unlock() @@ -104,7 +104,7 @@ func (l *AvroLevel) addMetric(metricName string, value util.Float, timestamp int if len(l.data) != KeyCounter { if len(l.data) == 0 { for i := range KeyCounter { - l.data[timestamp+int64(i*Freq)] = make(map[string]util.Float, 0) + l.data[timestamp+int64(i*Freq)] = make(map[string]schema.Float, 0) } } else { // Get the last timestamp @@ -115,7 +115,7 @@ func (l *AvroLevel) addMetric(metricName string, value util.Float, timestamp int } } // Create keys for the next KeyCounter timestamps - l.data[lastTs+int64(Freq)] = make(map[string]util.Float, 0) + l.data[lastTs+int64(Freq)] = make(map[string]schema.Float, 0) } } diff --git a/internal/config/config.go b/internal/config/config.go index 74ee9b0..183608c 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -9,7 +9,6 @@ import ( "encoding/json" "time" - "github.com/ClusterCockpit/cc-backend/internal/memorystore" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" ) @@ -163,16 +162,7 @@ func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) { cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error()) } - if Clusters == nil || len(Clusters) < 1 { + if len(Clusters) < 1 { cclog.Abort("Config Init: At least one cluster required in config. Exited with error.") } } - -func InitMetricStore(msConfig json.RawMessage) { - // Validate(msConfigSchema, msConfig) - dec := json.NewDecoder(bytes.NewReader(msConfig)) - dec.DisallowUnknownFields() - if err := dec.Decode(&memorystore.Keys); err != nil { - cclog.Abortf("Metric Store Config Init: Could not decode config file '%s'.\nError: %s\n", msConfig, err.Error()) - } -} diff --git a/internal/config/memorystore.go b/internal/config/memorystore.go new file mode 100644 index 0000000..b9273b4 --- /dev/null +++ b/internal/config/memorystore.go @@ -0,0 +1,128 @@ +package config + +import ( + "bytes" + "encoding/json" + "fmt" + + cclog "github.com/ClusterCockpit/cc-lib/ccLogger" +) + +// -------------------- +// Metric Store config +// -------------------- +type MetricStoreConfig struct { + Checkpoints struct { + FileFormat string `json:"file-format"` + Interval string `json:"interval"` + RootDir string `json:"directory"` + Restore string `json:"restore"` + } `json:"checkpoints"` + Debug struct { + DumpToFile string `json:"dump-to-file"` + EnableGops bool `json:"gops"` + } `json:"debug"` + RetentionInMemory string `json:"retention-in-memory"` + Archive struct { + Interval string `json:"interval"` + RootDir string `json:"directory"` + DeleteInstead bool `json:"delete-instead"` + } `json:"archive"` + Nats []*NatsConfig `json:"nats"` +} + +type NatsConfig struct { + // Address of the nats server + Address string `json:"address"` + + // Username/Password, optional + Username string `json:"username"` + Password string `json:"password"` + + //Creds file path + Credsfilepath string `json:"creds-file-path"` + + Subscriptions []struct { + // Channel name + SubscribeTo string `json:"subscribe-to"` + + // Allow lines without a cluster tag, use this as default, optional + ClusterTag string `json:"cluster-tag"` + } `json:"subscriptions"` +} + +var MetricStoreKeys MetricStoreConfig + +// For aggregation over multiple values at different cpus/sockets/..., not time! +type AggregationStrategy int + +const ( + NoAggregation AggregationStrategy = iota + SumAggregation + AvgAggregation +) + +func AssignAggregationStratergy(str string) (AggregationStrategy, error) { + switch str { + case "": + return NoAggregation, nil + case "sum": + return SumAggregation, nil + case "avg": + return AvgAggregation, nil + default: + return NoAggregation, fmt.Errorf("[METRICSTORE]> unknown aggregation strategy: %s", str) + } +} + +type MetricConfig struct { + // Interval in seconds at which measurements will arive. + Frequency int64 + + // Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy. + Aggregation AggregationStrategy + + // Private, used internally... + Offset int +} + +var Metrics map[string]MetricConfig + +func InitMetricStore(msConfig json.RawMessage) { + // Validate(msConfigSchema, msConfig) + dec := json.NewDecoder(bytes.NewReader(msConfig)) + dec.DisallowUnknownFields() + if err := dec.Decode(&MetricStoreKeys); err != nil { + cclog.Abortf("[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\nError: %s\n", msConfig, err.Error()) + } +} + +func (c *MetricStoreConfig) GetMetricFrequency(metricName string) (int64, error) { + // if metric, ok := c.Metrics[metricName]; ok { + // return metric.Frequency, nil + // } + return 0, fmt.Errorf("[METRICSTORE]> metric %s not found", metricName) +} + +// add logic to add metrics. Redundant metrics should be updated with max frequency. +// use metric.Name to check if the metric already exists. +// if not, add it to the Metrics map. +func AddMetric(name string, metric MetricConfig) error { + + if Metrics == nil { + Metrics = make(map[string]MetricConfig, 0) + } + + if existingMetric, ok := Metrics[name]; ok { + if existingMetric.Frequency != metric.Frequency { + if existingMetric.Frequency < metric.Frequency { + existingMetric.Frequency = metric.Frequency + Metrics[name] = existingMetric + } + } + } else { + Metrics[name] = metric + } + + return nil +} diff --git a/internal/config/schema.go b/internal/config/schema.go index 37d662a..ca0440e 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -144,7 +144,7 @@ var clustersSchema = ` "type": "string" } }, - "required": ["kind", "url"] + "required": ["kind"] }, "filterRanges": { "description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.", diff --git a/internal/memorystore/api.go b/internal/memorystore/api.go new file mode 100644 index 0000000..367f245 --- /dev/null +++ b/internal/memorystore/api.go @@ -0,0 +1,419 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. +package memorystore + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "math" + "net/http" + "strconv" + "strings" + + "github.com/ClusterCockpit/cc-lib/schema" + "github.com/ClusterCockpit/cc-lib/util" + + "github.com/influxdata/line-protocol/v2/lineprotocol" +) + +// @title cc-metric-store REST API +// @version 1.0.0 +// @description API for cc-metric-store + +// @contact.name ClusterCockpit Project +// @contact.url https://clustercockpit.org +// @contact.email support@clustercockpit.org + +// @license.name MIT License +// @license.url https://opensource.org/licenses/MIT + +// @host localhost:8082 +// @basePath /api/ + +// @securityDefinitions.apikey ApiKeyAuth +// @in header +// @name X-Auth-Token + +// ErrorResponse model +type ErrorResponse struct { + // Statustext of Errorcode + Status string `json:"status"` + Error string `json:"error"` // Error Message +} + +type ApiMetricData struct { + Error *string `json:"error,omitempty"` + Data schema.FloatArray `json:"data,omitempty"` + From int64 `json:"from"` + To int64 `json:"to"` + Resolution int64 `json:"resolution"` + Avg schema.Float `json:"avg"` + Min schema.Float `json:"min"` + Max schema.Float `json:"max"` +} + +func handleError(err error, statusCode int, rw http.ResponseWriter) { + // log.Warnf("REST ERROR : %s", err.Error()) + rw.Header().Add("Content-Type", "application/json") + rw.WriteHeader(statusCode) + json.NewEncoder(rw).Encode(ErrorResponse{ + Status: http.StatusText(statusCode), + Error: err.Error(), + }) +} + +// TODO: Optimize this, just like the stats endpoint! +func (data *ApiMetricData) AddStats() { + n := 0 + sum, min, max := 0.0, math.MaxFloat64, -math.MaxFloat64 + for _, x := range data.Data { + if x.IsNaN() { + continue + } + + n += 1 + sum += float64(x) + min = math.Min(min, float64(x)) + max = math.Max(max, float64(x)) + } + + if n > 0 { + avg := sum / float64(n) + data.Avg = schema.Float(avg) + data.Min = schema.Float(min) + data.Max = schema.Float(max) + } else { + data.Avg, data.Min, data.Max = schema.NaN, schema.NaN, schema.NaN + } +} + +func (data *ApiMetricData) ScaleBy(f schema.Float) { + if f == 0 || f == 1 { + return + } + + data.Avg *= f + data.Min *= f + data.Max *= f + for i := 0; i < len(data.Data); i++ { + data.Data[i] *= f + } +} + +func (data *ApiMetricData) PadDataWithNull(ms *MemoryStore, from, to int64, metric string) { + minfo, ok := ms.Metrics[metric] + if !ok { + return + } + + if (data.From / minfo.Frequency) > (from / minfo.Frequency) { + padfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency)) + ndata := make([]schema.Float, 0, padfront+len(data.Data)) + for i := 0; i < padfront; i++ { + ndata = append(ndata, schema.NaN) + } + for j := 0; j < len(data.Data); j++ { + ndata = append(ndata, data.Data[j]) + } + data.Data = ndata + } +} + +// handleFree godoc +// @summary +// @tags free +// @description This endpoint allows the users to free the Buffers from the +// metric store. This endpoint offers the users to remove then systematically +// and also allows then to prune the data under node, if they do not want to +// remove the whole node. +// @produce json +// @param to query string false "up to timestamp" +// @success 200 {string} string "ok" +// @failure 400 {object} api.ErrorResponse "Bad Request" +// @failure 401 {object} api.ErrorResponse "Unauthorized" +// @failure 403 {object} api.ErrorResponse "Forbidden" +// @failure 500 {object} api.ErrorResponse "Internal Server Error" +// @security ApiKeyAuth +// @router /free/ [post] +func HandleFree(rw http.ResponseWriter, r *http.Request) { + rawTo := r.URL.Query().Get("to") + if rawTo == "" { + handleError(errors.New("'to' is a required query parameter"), http.StatusBadRequest, rw) + return + } + + to, err := strconv.ParseInt(rawTo, 10, 64) + if err != nil { + handleError(err, http.StatusInternalServerError, rw) + return + } + + // // TODO: lastCheckpoint might be modified by different go-routines. + // // Load it using the sync/atomic package? + // freeUpTo := lastCheckpoint.Unix() + // if to < freeUpTo { + // freeUpTo = to + // } + + bodyDec := json.NewDecoder(r.Body) + var selectors [][]string + err = bodyDec.Decode(&selectors) + if err != nil { + http.Error(rw, err.Error(), http.StatusBadRequest) + return + } + + ms := GetMemoryStore() + n := 0 + for _, sel := range selectors { + bn, err := ms.Free(sel, to) + if err != nil { + handleError(err, http.StatusInternalServerError, rw) + return + } + + n += bn + } + + rw.WriteHeader(http.StatusOK) + fmt.Fprintf(rw, "buffers freed: %d\n", n) +} + +// handleWrite godoc +// @summary Receive metrics in InfluxDB line-protocol +// @tags write +// @description Write data to the in-memory store in the InfluxDB line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md) + +// @accept plain +// @produce json +// @param cluster query string false "If the lines in the body do not have a cluster tag, use this value instead." +// @success 200 {string} string "ok" +// @failure 400 {object} api.ErrorResponse "Bad Request" +// @failure 401 {object} api.ErrorResponse "Unauthorized" +// @failure 403 {object} api.ErrorResponse "Forbidden" +// @failure 500 {object} api.ErrorResponse "Internal Server Error" +// @security ApiKeyAuth +// @router /write/ [post] +func HandleWrite(rw http.ResponseWriter, r *http.Request) { + bytes, err := io.ReadAll(r.Body) + rw.Header().Add("Content-Type", "application/json") + if err != nil { + handleError(err, http.StatusInternalServerError, rw) + return + } + + ms := GetMemoryStore() + dec := lineprotocol.NewDecoderWithBytes(bytes) + if err := decodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil { + log.Printf("/api/write error: %s", err.Error()) + handleError(err, http.StatusBadRequest, rw) + return + } + rw.WriteHeader(http.StatusOK) +} + +type ApiQueryRequest struct { + Cluster string `json:"cluster"` + Queries []ApiQuery `json:"queries"` + ForAllNodes []string `json:"for-all-nodes"` + From int64 `json:"from"` + To int64 `json:"to"` + WithStats bool `json:"with-stats"` + WithData bool `json:"with-data"` + WithPadding bool `json:"with-padding"` +} + +type ApiQueryResponse struct { + Queries []ApiQuery `json:"queries,omitempty"` + Results [][]ApiMetricData `json:"results"` +} + +type ApiQuery struct { + Type *string `json:"type,omitempty"` + SubType *string `json:"subtype,omitempty"` + Metric string `json:"metric"` + Hostname string `json:"host"` + Resolution int64 `json:"resolution"` + TypeIds []string `json:"type-ids,omitempty"` + SubTypeIds []string `json:"subtype-ids,omitempty"` + ScaleFactor schema.Float `json:"scale-by,omitempty"` + Aggregate bool `json:"aggreg"` +} + +func FetchData(req ApiQueryRequest) (*ApiQueryResponse, error) { + + req.WithData = true + req.WithData = true + req.WithData = true + + ms := GetMemoryStore() + + response := ApiQueryResponse{ + Results: make([][]ApiMetricData, 0, len(req.Queries)), + } + if req.ForAllNodes != nil { + nodes := ms.ListChildren([]string{req.Cluster}) + for _, node := range nodes { + for _, metric := range req.ForAllNodes { + q := ApiQuery{ + Metric: metric, + Hostname: node, + } + req.Queries = append(req.Queries, q) + response.Queries = append(response.Queries, q) + } + } + } + + for _, query := range req.Queries { + sels := make([]util.Selector, 0, 1) + if query.Aggregate || query.Type == nil { + sel := util.Selector{{String: req.Cluster}, {String: query.Hostname}} + if query.Type != nil { + if len(query.TypeIds) == 1 { + sel = append(sel, util.SelectorElement{String: *query.Type + query.TypeIds[0]}) + } else { + ids := make([]string, len(query.TypeIds)) + for i, id := range query.TypeIds { + ids[i] = *query.Type + id + } + sel = append(sel, util.SelectorElement{Group: ids}) + } + + if query.SubType != nil { + if len(query.SubTypeIds) == 1 { + sel = append(sel, util.SelectorElement{String: *query.SubType + query.SubTypeIds[0]}) + } else { + ids := make([]string, len(query.SubTypeIds)) + for i, id := range query.SubTypeIds { + ids[i] = *query.SubType + id + } + sel = append(sel, util.SelectorElement{Group: ids}) + } + } + } + sels = append(sels, sel) + } else { + for _, typeId := range query.TypeIds { + if query.SubType != nil { + for _, subTypeId := range query.SubTypeIds { + sels = append(sels, util.Selector{ + {String: req.Cluster}, + {String: query.Hostname}, + {String: *query.Type + typeId}, + {String: *query.SubType + subTypeId}, + }) + } + } else { + sels = append(sels, util.Selector{ + {String: req.Cluster}, + {String: query.Hostname}, + {String: *query.Type + typeId}, + }) + } + } + } + + // log.Printf("query: %#v\n", query) + // log.Printf("sels: %#v\n", sels) + var err error + res := make([]ApiMetricData, 0, len(sels)) + for _, sel := range sels { + data := ApiMetricData{} + + data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution) + + if err != nil { + msg := err.Error() + data.Error = &msg + res = append(res, data) + continue + } + + if req.WithStats { + data.AddStats() + } + if query.ScaleFactor != 0 { + data.ScaleBy(query.ScaleFactor) + } + if req.WithPadding { + data.PadDataWithNull(ms, req.From, req.To, query.Metric) + } + if !req.WithData { + data.Data = nil + } + res = append(res, data) + } + response.Results = append(response.Results, res) + } + + return &response, nil +} + +// handleDebug godoc +// @summary Debug endpoint +// @tags debug +// @description This endpoint allows the users to print the content of +// nodes/clusters/metrics to review the state of the data. +// @produce json +// @param selector query string false "Selector" +// @success 200 {string} string "Debug dump" +// @failure 400 {object} api.ErrorResponse "Bad Request" +// @failure 401 {object} api.ErrorResponse "Unauthorized" +// @failure 403 {object} api.ErrorResponse "Forbidden" +// @failure 500 {object} api.ErrorResponse "Internal Server Error" +// @security ApiKeyAuth +// @router /debug/ [post] +func HandleDebug(rw http.ResponseWriter, r *http.Request) { + raw := r.URL.Query().Get("selector") + rw.Header().Add("Content-Type", "application/json") + selector := []string{} + if len(raw) != 0 { + selector = strings.Split(raw, ":") + } + + ms := GetMemoryStore() + if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil { + handleError(err, http.StatusBadRequest, rw) + return + } +} + +// handleHealthCheck godoc +// @summary HealthCheck endpoint +// @tags healthcheck +// @description This endpoint allows the users to check if a node is healthy +// @produce json +// @param selector query string false "Selector" +// @success 200 {string} string "Debug dump" +// @failure 400 {object} api.ErrorResponse "Bad Request" +// @failure 401 {object} api.ErrorResponse "Unauthorized" +// @failure 403 {object} api.ErrorResponse "Forbidden" +// @failure 500 {object} api.ErrorResponse "Internal Server Error" +// @security ApiKeyAuth +// @router /healthcheck/ [get] +func HandleHealthCheck(rw http.ResponseWriter, r *http.Request) { + rawCluster := r.URL.Query().Get("cluster") + rawNode := r.URL.Query().Get("node") + + if rawCluster == "" || rawNode == "" { + handleError(errors.New("'cluster' and 'node' are required query parameter"), http.StatusBadRequest, rw) + return + } + + rw.Header().Add("Content-Type", "application/json") + + selector := []string{rawCluster, rawNode} + + ms := GetMemoryStore() + if err := ms.HealthCheck(bufio.NewWriter(rw), selector); err != nil { + handleError(err, http.StatusBadRequest, rw) + return + } +} diff --git a/internal/memorystore/archive.go b/internal/memorystore/archive.go index 6e25aff..7857d71 100644 --- a/internal/memorystore/archive.go +++ b/internal/memorystore/archive.go @@ -17,15 +17,16 @@ import ( "sync/atomic" "time" + "github.com/ClusterCockpit/cc-backend/internal/config" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" ) func Archiving(wg *sync.WaitGroup, ctx context.Context) { go func() { defer wg.Done() - d, err := time.ParseDuration(Keys.Archive.Interval) + d, err := time.ParseDuration(config.MetricStoreKeys.Archive.Interval) if err != nil { - cclog.Fatalf("error parsing archive interval duration: %v\n", err) + cclog.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err) } if d <= 0 { return @@ -43,14 +44,14 @@ func Archiving(wg *sync.WaitGroup, ctx context.Context) { return case <-ticks: t := time.Now().Add(-d) - cclog.Infof("start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339)) - n, err := ArchiveCheckpoints(Keys.Checkpoints.RootDir, - Keys.Archive.RootDir, t.Unix(), Keys.Archive.DeleteInstead) + cclog.Infof("[METRICSTORE]> start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339)) + n, err := ArchiveCheckpoints(config.MetricStoreKeys.Checkpoints.RootDir, + config.MetricStoreKeys.Archive.RootDir, t.Unix(), config.MetricStoreKeys.Archive.DeleteInstead) if err != nil { - cclog.Warnf("archiving failed: %s\n", err.Error()) + cclog.Warnf("[METRICSTORE]> archiving failed: %s\n", err.Error()) } else { - cclog.Infof("done: %d files zipped and moved to archive\n", n) + cclog.Infof("[METRICSTORE]> done: %d files zipped and moved to archive\n", n) } } } @@ -127,7 +128,7 @@ func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead return 0, err } - extension := Keys.Checkpoints.FileFormat + extension := config.MetricStoreKeys.Checkpoints.FileFormat files, err := findFiles(entries, from, extension, false) if err != nil { return 0, err diff --git a/internal/memorystore/buffer.go b/internal/memorystore/buffer.go index d084c6d..39e9abc 100644 --- a/internal/memorystore/buffer.go +++ b/internal/memorystore/buffer.go @@ -4,7 +4,7 @@ import ( "errors" "sync" - "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-lib/schema" ) // Default buffer capacity. @@ -19,14 +19,14 @@ const ( var bufferPool sync.Pool = sync.Pool{ New: func() interface{} { return &buffer{ - data: make([]util.Float, 0, BUFFER_CAP), + data: make([]schema.Float, 0, BUFFER_CAP), } }, } var ( - ErrNoData error = errors.New("no data for this metric/level") - ErrDataDoesNotAlign error = errors.New("data from lower granularities does not align") + ErrNoData error = errors.New("[METRICSTORE]> no data for this metric/level") + ErrDataDoesNotAlign error = errors.New("[METRICSTORE]> data from lower granularities does not align") ) // Each metric on each level has it's own buffer. @@ -36,7 +36,7 @@ var ( type buffer struct { prev *buffer next *buffer - data []util.Float + data []schema.Float frequency int64 start int64 archived bool @@ -59,9 +59,9 @@ func newBuffer(ts, freq int64) *buffer { // Otherwise, the existing buffer is returnd. // Normaly, only "newer" data should be written, but if the value would // end up in the same buffer anyways it is allowed. -func (b *buffer) write(ts int64, value util.Float) (*buffer, error) { +func (b *buffer) write(ts int64, value schema.Float) (*buffer, error) { if ts < b.start { - return nil, errors.New("cannot write value to buffer from past") + return nil, errors.New("[METRICSTORE]> cannot write value to buffer from past") } // idx := int((ts - b.start + (b.frequency / 3)) / b.frequency) @@ -83,7 +83,7 @@ func (b *buffer) write(ts int64, value util.Float) (*buffer, error) { // Fill up unwritten slots with NaN for i := len(b.data); i < idx; i++ { - b.data = append(b.data, util.NaN) + b.data = append(b.data, schema.NaN) } b.data = append(b.data, value) @@ -147,7 +147,7 @@ func (b *buffer) close() { // This function goes back the buffer chain if `from` is older than the currents buffer start. // The loaded values are added to `data` and `data` is returned, possibly with a shorter length. // If `data` is not long enough to hold all values, this function will panic! -func (b *buffer) read(from, to int64, data []util.Float) ([]util.Float, int64, int64, error) { +func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) { if from < b.firstWrite() { if b.prev != nil { return b.prev.read(from, to, data) @@ -171,9 +171,9 @@ func (b *buffer) read(from, to int64, data []util.Float) ([]util.Float, int64, i if b.next == nil || to <= b.next.start { break } - data[i] += util.NaN + data[i] += schema.NaN } else if t < b.start { - data[i] += util.NaN + data[i] += schema.NaN // } else if b.data[idx].IsNaN() { // data[i] += interpolate(idx, b.data) } else { diff --git a/internal/memorystore/checkpoint.go b/internal/memorystore/checkpoint.go index 80a048b..76a5472 100644 --- a/internal/memorystore/checkpoint.go +++ b/internal/memorystore/checkpoint.go @@ -20,15 +20,16 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/avro" - "github.com/ClusterCockpit/cc-lib/util" + "github.com/ClusterCockpit/cc-backend/internal/config" + "github.com/ClusterCockpit/cc-lib/schema" "github.com/linkedin/goavro/v2" ) // Whenever changed, update MarshalJSON as well! type CheckpointMetrics struct { - Data []util.Float `json:"data"` - Frequency int64 `json:"frequency"` - Start int64 `json:"start"` + Data []schema.Float `json:"data"` + Frequency int64 `json:"frequency"` + Start int64 `json:"start"` } type CheckpointFile struct { @@ -43,12 +44,12 @@ var lastCheckpoint time.Time func Checkpointing(wg *sync.WaitGroup, ctx context.Context) { lastCheckpoint = time.Now() - if Keys.Checkpoints.FileFormat == "json" { + if config.MetricStoreKeys.Checkpoints.FileFormat == "json" { ms := GetMemoryStore() go func() { defer wg.Done() - d, err := time.ParseDuration(Keys.Checkpoints.Interval) + d, err := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Interval) if err != nil { log.Fatal(err) } @@ -67,14 +68,14 @@ func Checkpointing(wg *sync.WaitGroup, ctx context.Context) { case <-ctx.Done(): return case <-ticks: - log.Printf("start checkpointing (starting at %s)...\n", lastCheckpoint.Format(time.RFC3339)) + log.Printf("[METRICSTORE]> start checkpointing (starting at %s)...\n", lastCheckpoint.Format(time.RFC3339)) now := time.Now() - n, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir, + n, err := ms.ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, lastCheckpoint.Unix(), now.Unix()) if err != nil { - log.Printf("checkpointing failed: %s\n", err.Error()) + log.Printf("[METRICSTORE]> checkpointing failed: %s\n", err.Error()) } else { - log.Printf("done: %d checkpoint files created\n", n) + log.Printf("[METRICSTORE]> done: %d checkpoint files created\n", n) lastCheckpoint = now } } @@ -90,7 +91,7 @@ func Checkpointing(wg *sync.WaitGroup, ctx context.Context) { return case <-time.After(time.Duration(avro.CheckpointBufferMinutes) * time.Minute): // This is the first tick untill we collect the data for given minutes. - avro.GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false) + avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, false) // log.Printf("Checkpointing %d avro files", count) } @@ -108,7 +109,7 @@ func Checkpointing(wg *sync.WaitGroup, ctx context.Context) { return case <-ticks: // Regular ticks of 1 minute to write data. - avro.GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false) + avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, false) // log.Printf("Checkpointing %d avro files", count) } } @@ -179,7 +180,7 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) { continue } - log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error()) + log.Printf("[METRICSTORE]> error while checkpointing %#v: %s", workItem.selector, err.Error()) atomic.AddInt32(&errs, 1) } else { atomic.AddInt32(&n, 1) @@ -201,7 +202,7 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) { wg.Wait() if errs > 0 { - return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n) + return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n) } return int(n), nil } @@ -235,14 +236,14 @@ func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFil continue } - data := make([]util.Float, (to-from)/b.frequency+1) + data := make([]schema.Float, (to-from)/b.frequency+1) data, start, end, err := b.read(from, to, data) if err != nil { return nil, err } for i := int((end - start) / b.frequency); i < len(data); i++ { - data[i] = util.NaN + data[i] = schema.NaN } retval.Metrics[metric] = &CheckpointMetrics{ @@ -314,7 +315,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) ( lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics)) nn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from, extension) if err != nil { - log.Fatalf("error while loading checkpoints: %s", err.Error()) + log.Fatalf("[METRICSTORE]> error while loading checkpoints: %s", err.Error()) atomic.AddInt32(&errs, 1) } atomic.AddInt32(&n, int32(nn)) @@ -326,7 +327,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) ( clustersDir, err := os.ReadDir(dir) for _, clusterDir := range clustersDir { if !clusterDir.IsDir() { - err = errors.New("expected only directories at first level of checkpoints/ directory") + err = errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory") goto done } @@ -338,7 +339,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) ( for _, hostDir := range hostsDir { if !hostDir.IsDir() { - err = errors.New("expected only directories at second level of checkpoints/ directory") + err = errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory") goto done } @@ -364,7 +365,7 @@ done: } if errs > 0 { - return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n) + return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n) } return int(n), nil } @@ -377,13 +378,13 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) { // The directory does not exist, so create it using os.MkdirAll() err := os.MkdirAll(dir, 0755) // 0755 sets the permissions for the directory if err != nil { - log.Fatalf("Error creating directory: %#v\n", err) + log.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err) } - fmt.Printf("%#v Directory created successfully.\n", dir) + fmt.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir) } // Config read (replace with your actual config read) - fileFormat := Keys.Checkpoints.FileFormat + fileFormat := config.MetricStoreKeys.Checkpoints.FileFormat if fileFormat == "" { fileFormat = "avro" } @@ -396,22 +397,22 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) { // First, attempt to load the specified format if found, err := checkFilesWithExtension(dir, fileFormat); err != nil { - return 0, fmt.Errorf("error checking files with extension: %v", err) + return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err) } else if found { - log.Printf("Loading %s files because fileformat is %s\n", fileFormat, fileFormat) + log.Printf("[METRICSTORE]> Loading %s files because fileformat is %s\n", fileFormat, fileFormat) return m.FromCheckpoint(dir, from, fileFormat) } // If not found, attempt the opposite format altFormat := oppositeFormat[fileFormat] if found, err := checkFilesWithExtension(dir, altFormat); err != nil { - return 0, fmt.Errorf("error checking files with extension: %v", err) + return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err) } else if found { - log.Printf("Loading %s files but fileformat is %s\n", altFormat, fileFormat) + log.Printf("[METRICSTORE]> Loading %s files but fileformat is %s\n", altFormat, fileFormat) return m.FromCheckpoint(dir, from, altFormat) } - log.Println("No valid checkpoint files found in the directory.") + log.Println("[METRICSTORE]> No valid checkpoint files found in the directory.") return 0, nil } @@ -420,7 +421,7 @@ func checkFilesWithExtension(dir string, extension string) (bool, error) { err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { if err != nil { - return fmt.Errorf("error accessing path %s: %v", path, err) + return fmt.Errorf("[METRICSTORE]> error accessing path %s: %v", path, err) } if !info.IsDir() && filepath.Ext(info.Name()) == "."+extension { found = true @@ -429,7 +430,7 @@ func checkFilesWithExtension(dir string, extension string) (bool, error) { return nil }) if err != nil { - return false, fmt.Errorf("error walking through directories: %s", err) + return false, fmt.Errorf("[METRICSTORE]> error walking through directories: %s", err) } return found, nil @@ -441,7 +442,7 @@ func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error { fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:] resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64) if err != nil { - return fmt.Errorf("error while reading avro file (resolution parsing) : %s", err) + return fmt.Errorf("[METRICSTORE]> error while reading avro file (resolution parsing) : %s", err) } from_timestamp, err := strconv.ParseInt(fileName[strings.Index(fileName, "_")+1:len(fileName)-5], 10, 64) @@ -450,7 +451,7 @@ func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error { from_timestamp -= (resolution / 2) if err != nil { - return fmt.Errorf("error converting timestamp from the avro file : %s", err) + return fmt.Errorf("[METRICSTORE]> error converting timestamp from the avro file : %s", err) } // fmt.Printf("File : %s with resolution : %d\n", fileName, resolution) @@ -463,21 +464,21 @@ func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error { panic(err) } - metricsData := make(map[string]util.FloatArray) + metricsData := make(map[string]schema.FloatArray) for ocfReader.Scan() { datum, err := ocfReader.Read() if err != nil { - return fmt.Errorf("error while reading avro file : %s", err) + return fmt.Errorf("[METRICSTORE]> error while reading avro file : %s", err) } record, ok := datum.(map[string]interface{}) if !ok { - panic("failed to assert datum as map[string]interface{}") + panic("[METRICSTORE]> failed to assert datum as map[string]interface{}") } for key, value := range record { - metricsData[key] = append(metricsData[key], util.ConvertToFloat(value.(float64))) + metricsData[key] = append(metricsData[key], schema.ConvertToFloat(value.(float64))) } recordCounter += 1 @@ -518,12 +519,12 @@ func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error { leafMetricName := subString[len(subString)-1] err = lvl.createBuffer(m, leafMetricName, floatArray, from_timestamp, resolution) if err != nil { - return fmt.Errorf("error while creating buffers from avroReader : %s", err) + return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err) } } else { err = l.createBuffer(m, metricName, floatArray, from_timestamp, resolution) if err != nil { - return fmt.Errorf("error while creating buffers from avroReader : %s", err) + return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err) } } @@ -532,7 +533,7 @@ func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error { return nil } -func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray util.FloatArray, from int64, resolution int64) error { +func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schema.FloatArray, from int64, resolution int64) error { n := len(floatArray) b := &buffer{ frequency: resolution, @@ -566,7 +567,7 @@ func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray util. missingCount /= int(b.frequency) for range missingCount { - prev.data = append(prev.data, util.NaN) + prev.data = append(prev.data, schema.NaN) } prev.data = prev.data[0:len(prev.data):len(prev.data)] diff --git a/internal/memorystore/config.go b/internal/memorystore/config.go deleted file mode 100644 index 0d8a8ab..0000000 --- a/internal/memorystore/config.go +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. -// All rights reserved. This file is part of cc-backend. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. -package memorystore - -type MetricStoreConfig struct { - Checkpoints struct { - FileFormat string `json:"file-format"` - Interval string `json:"interval"` - RootDir string `json:"directory"` - Restore string `json:"restore"` - } `json:"checkpoints"` - Debug struct { - DumpToFile string `json:"dump-to-file"` - EnableGops bool `json:"gops"` - } `json:"debug"` - RetentionInMemory string `json:"retention-in-memory"` - Archive struct { - Interval string `json:"interval"` - RootDir string `json:"directory"` - DeleteInstead bool `json:"delete-instead"` - } `json:"archive"` -} - -var Keys MetricStoreConfig diff --git a/internal/memorystore/debug.go b/internal/memorystore/debug.go index 2743a45..0f85024 100644 --- a/internal/memorystore/debug.go +++ b/internal/memorystore/debug.go @@ -87,7 +87,7 @@ func (l *Level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf [ func (m *MemoryStore) DebugDump(w *bufio.Writer, selector []string) error { lvl := m.root.findLevel(selector) if lvl == nil { - return fmt.Errorf("not found: %#v", selector) + return fmt.Errorf("[METRICSTORE]> not found: %#v", selector) } buf := make([]byte, 0, 2048) diff --git a/internal/memorystore/healthcheck.go b/internal/memorystore/healthcheck.go index cb22d49..d655db3 100644 --- a/internal/memorystore/healthcheck.go +++ b/internal/memorystore/healthcheck.go @@ -59,7 +59,7 @@ func (l *Level) healthCheck(m *MemoryStore, count int64) (int64, error) { func (m *MemoryStore) HealthCheck(w *bufio.Writer, selector []string) error { lvl := m.root.findLevel(selector) if lvl == nil { - return fmt.Errorf("not found: %#v", selector) + return fmt.Errorf("[METRICSTORE]> not found: %#v", selector) } buf := make([]byte, 0, 25) diff --git a/internal/memorystore/lineprotocol.go b/internal/memorystore/lineprotocol.go new file mode 100644 index 0000000..e12b9e2 --- /dev/null +++ b/internal/memorystore/lineprotocol.go @@ -0,0 +1,349 @@ +package memorystore + +import ( + "context" + "errors" + "fmt" + "log" + "net" + "sync" + "time" + + "github.com/ClusterCockpit/cc-backend/internal/avro" + "github.com/ClusterCockpit/cc-backend/internal/config" + "github.com/ClusterCockpit/cc-lib/schema" + "github.com/influxdata/line-protocol/v2/lineprotocol" + "github.com/nats-io/nats.go" +) + +// Each connection is handled in it's own goroutine. This is a blocking function. +func ReceiveRaw(ctx context.Context, + listener net.Listener, + handleLine func(*lineprotocol.Decoder, string) error, +) error { + var wg sync.WaitGroup + + wg.Add(1) + go func() { + defer wg.Done() + <-ctx.Done() + if err := listener.Close(); err != nil { + log.Printf("listener.Close(): %s", err.Error()) + } + }() + + for { + conn, err := listener.Accept() + if err != nil { + if errors.Is(err, net.ErrClosed) { + break + } + + log.Printf("listener.Accept(): %s", err.Error()) + } + + wg.Add(2) + go func() { + defer wg.Done() + defer conn.Close() + + dec := lineprotocol.NewDecoder(conn) + connctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + defer wg.Done() + select { + case <-connctx.Done(): + conn.Close() + case <-ctx.Done(): + conn.Close() + } + }() + + if err := handleLine(dec, "default"); err != nil { + if errors.Is(err, net.ErrClosed) { + return + } + + log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error()) + errmsg := make([]byte, 128) + errmsg = append(errmsg, `error: `...) + errmsg = append(errmsg, err.Error()...) + errmsg = append(errmsg, '\n') + conn.Write(errmsg) + } + }() + } + + wg.Wait() + return nil +} + +// Connect to a nats server and subscribe to "updates". This is a blocking +// function. handleLine will be called for each line recieved via nats. +// Send `true` through the done channel for gracefull termination. +func ReceiveNats(conf *(config.NatsConfig), + ms *MemoryStore, + workers int, + ctx context.Context, +) error { + var opts []nats.Option + if conf.Username != "" && conf.Password != "" { + opts = append(opts, nats.UserInfo(conf.Username, conf.Password)) + } + + if conf.Credsfilepath != "" { + opts = append(opts, nats.UserCredentials(conf.Credsfilepath)) + } + + nc, err := nats.Connect(conf.Address, opts...) + if err != nil { + return err + } + defer nc.Close() + + var wg sync.WaitGroup + var subs []*nats.Subscription + + msgs := make(chan *nats.Msg, workers*2) + + for _, sc := range conf.Subscriptions { + clusterTag := sc.ClusterTag + var sub *nats.Subscription + if workers > 1 { + wg.Add(workers) + + for i := 0; i < workers; i++ { + go func() { + for m := range msgs { + dec := lineprotocol.NewDecoderWithBytes(m.Data) + if err := decodeLine(dec, ms, clusterTag); err != nil { + log.Printf("error: %s\n", err.Error()) + } + } + + wg.Done() + }() + } + + sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) { + msgs <- m + }) + } else { + sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) { + dec := lineprotocol.NewDecoderWithBytes(m.Data) + if err := decodeLine(dec, ms, clusterTag); err != nil { + log.Printf("error: %s\n", err.Error()) + } + }) + } + + if err != nil { + return err + } + log.Printf("NATS subscription to '%s' on '%s' established\n", sc.SubscribeTo, conf.Address) + subs = append(subs, sub) + } + + <-ctx.Done() + for _, sub := range subs { + err = sub.Unsubscribe() + if err != nil { + log.Printf("NATS unsubscribe failed: %s", err.Error()) + } + } + close(msgs) + wg.Wait() + + nc.Close() + log.Println("NATS connection closed") + return nil +} + +// Place `prefix` in front of `buf` but if possible, +// do that inplace in `buf`. +func reorder(buf, prefix []byte) []byte { + n := len(prefix) + m := len(buf) + if cap(buf) < m+n { + return append(prefix[:n:n], buf...) + } else { + buf = buf[:n+m] + for i := m - 1; i >= 0; i-- { + buf[i+n] = buf[i] + } + for i := 0; i < n; i++ { + buf[i] = prefix[i] + } + return buf + } +} + +// Decode lines using dec and make write calls to the MemoryStore. +// If a line is missing its cluster tag, use clusterDefault as default. +func decodeLine(dec *lineprotocol.Decoder, + ms *MemoryStore, + clusterDefault string, +) error { + // Reduce allocations in loop: + t := time.Now() + metric, metricBuf := Metric{}, make([]byte, 0, 16) + selector := make([]string, 0, 4) + typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0) + + // Optimize for the case where all lines in a "batch" are about the same + // cluster and host. By using `WriteToLevel` (level = host), we do not need + // to take the root- and cluster-level lock as often. + var lvl *Level = nil + prevCluster, prevHost := "", "" + + var ok bool + for dec.Next() { + rawmeasurement, err := dec.Measurement() + if err != nil { + return err + } + + // Needs to be copied because another call to dec.* would + // invalidate the returned slice. + metricBuf = append(metricBuf[:0], rawmeasurement...) + + // The go compiler optimizes map[string(byteslice)] lookups: + metric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)] + if !ok { + continue + } + + typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0] + cluster, host := clusterDefault, "" + for { + key, val, err := dec.NextTag() + if err != nil { + return err + } + if key == nil { + break + } + + // The go compiler optimizes string([]byte{...}) == "...": + switch string(key) { + case "cluster": + if string(val) == prevCluster { + cluster = prevCluster + } else { + cluster = string(val) + lvl = nil + } + case "hostname", "host": + if string(val) == prevHost { + host = prevHost + } else { + host = string(val) + lvl = nil + } + case "type": + if string(val) == "node" { + break + } + + // We cannot be sure that the "type" tag comes before the "type-id" tag: + if len(typeBuf) == 0 { + typeBuf = append(typeBuf, val...) + } else { + typeBuf = reorder(typeBuf, val) + } + case "type-id": + typeBuf = append(typeBuf, val...) + case "subtype": + // We cannot be sure that the "subtype" tag comes before the "stype-id" tag: + if len(subTypeBuf) == 0 { + subTypeBuf = append(subTypeBuf, val...) + } else { + subTypeBuf = reorder(subTypeBuf, val) + // subTypeBuf = reorder(typeBuf, val) + } + case "stype-id": + subTypeBuf = append(subTypeBuf, val...) + default: + // Ignore unkown tags (cc-metric-collector might send us a unit for example that we do not need) + // return fmt.Errorf("unkown tag: '%s' (value: '%s')", string(key), string(val)) + } + } + + // If the cluster or host changed, the lvl was set to nil + if lvl == nil { + selector = selector[:2] + selector[0], selector[1] = cluster, host + lvl = ms.GetLevel(selector) + prevCluster, prevHost = cluster, host + } + + // subtypes: + selector = selector[:0] + if len(typeBuf) > 0 { + selector = append(selector, string(typeBuf)) // <- Allocation :( + if len(subTypeBuf) > 0 { + selector = append(selector, string(subTypeBuf)) + } + } + + for { + key, val, err := dec.NextField() + if err != nil { + return err + } + + if key == nil { + break + } + + if string(key) != "value" { + return fmt.Errorf("host %s: unknown field: '%s' (value: %#v)", host, string(key), val) + } + + if val.Kind() == lineprotocol.Float { + metric.Value = schema.Float(val.FloatV()) + } else if val.Kind() == lineprotocol.Int { + metric.Value = schema.Float(val.IntV()) + } else if val.Kind() == lineprotocol.Uint { + metric.Value = schema.Float(val.UintV()) + } else { + return fmt.Errorf("host %s: unsupported value type in message: %s", host, val.Kind().String()) + } + } + + if t, err = dec.Time(lineprotocol.Second, t); err != nil { + t = time.Now() + if t, err = dec.Time(lineprotocol.Millisecond, t); err != nil { + t = time.Now() + if t, err = dec.Time(lineprotocol.Microsecond, t); err != nil { + t = time.Now() + if t, err = dec.Time(lineprotocol.Nanosecond, t); err != nil { + return fmt.Errorf("host %s: timestamp : %#v with error : %#v", host, t, err.Error()) + } + } + } + } + + if err != nil { + return fmt.Errorf("host %s: timestamp : %#v with error : %#v", host, t, err.Error()) + } + + time := t.Unix() + + if config.MetricStoreKeys.Checkpoints.FileFormat != "json" { + avro.LineProtocolMessages <- &avro.AvroStruct{ + MetricName: string(metricBuf), + Cluster: cluster, + Node: host, + Selector: append([]string{}, selector...), + Value: metric.Value, + Timestamp: time} + } + + if err := ms.WriteToLevel(lvl, selector, time, []Metric{metric}); err != nil { + return err + } + } + return nil +} diff --git a/internal/memorystore/memorystore.go b/internal/memorystore/memorystore.go index 76079d4..efa4065 100644 --- a/internal/memorystore/memorystore.go +++ b/internal/memorystore/memorystore.go @@ -2,16 +2,19 @@ package memorystore import ( "context" - "encoding/json" "errors" - "fmt" "log" + "os" + "os/signal" "runtime" "sync" + "syscall" "time" "github.com/ClusterCockpit/cc-backend/internal/avro" + "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-lib/resampler" + "github.com/ClusterCockpit/cc-lib/runtimeEnv" "github.com/ClusterCockpit/cc-lib/schema" "github.com/ClusterCockpit/cc-lib/util" ) @@ -21,6 +24,8 @@ var ( msInstance *MemoryStore ) +var Clusters = make([]string, 0) + var NumWorkers int = 4 func init() { @@ -31,77 +36,38 @@ func init() { } } -// For aggregation over multiple values at different cpus/sockets/..., not time! -type AggregationStrategy int - -const ( - NoAggregation AggregationStrategy = iota - SumAggregation - AvgAggregation -) - -func (as *AggregationStrategy) UnmarshalJSON(data []byte) error { - var str string - if err := json.Unmarshal(data, &str); err != nil { - return err - } - - switch str { - case "": - *as = NoAggregation - case "sum": - *as = SumAggregation - case "avg": - *as = AvgAggregation - default: - return fmt.Errorf("invalid aggregation strategy: %#v", str) - } - return nil -} - -type MetricConfig struct { - // Interval in seconds at which measurements will arive. - Frequency int64 `json:"frequency"` - - // Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy. - Aggregation AggregationStrategy `json:"aggregation"` - - // Private, used internally... - Offset int -} - type Metric struct { Name string - Value util.Float - MetricConfig MetricConfig + Value schema.Float + MetricConfig config.MetricConfig } type MemoryStore struct { - Metrics map[string]MetricConfig + Metrics map[string]config.MetricConfig root Level } -func Init() { +func Init(wg sync.WaitGroup) { startupTime := time.Now() - //Pass the keys from cluster config - InitMetrics() + //Pass the config.MetricStoreKeys + InitMetrics(config.Metrics) ms := GetMemoryStore() - d, err := time.ParseDuration(Keys.Checkpoints.Restore) + d, err := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Restore) if err != nil { log.Fatal(err) } restoreFrom := startupTime.Add(-d) - log.Printf("Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339)) - files, err := ms.FromCheckpointFiles(Keys.Checkpoints.RootDir, restoreFrom.Unix()) + log.Printf("[METRICSTORE]> Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339)) + files, err := ms.FromCheckpointFiles(config.MetricStoreKeys.Checkpoints.RootDir, restoreFrom.Unix()) loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB if err != nil { - log.Fatalf("Loading checkpoints failed: %s\n", err.Error()) + log.Fatalf("[METRICSTORE]> Loading checkpoints failed: %s\n", err.Error()) } else { - log.Printf("Checkpoints loaded (%d files, %d MB, that took %fs)\n", files, loadedData, time.Since(startupTime).Seconds()) + log.Printf("[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\n", files, loadedData, time.Since(startupTime).Seconds()) } // Try to use less memory by forcing a GC run here and then @@ -112,28 +78,53 @@ func Init() { // to a minumum. runtime.GC() - ctx, _ := context.WithCancel(context.Background()) + ctx, shutdown := context.WithCancel(context.Background()) - var wg sync.WaitGroup wg.Add(4) Retention(&wg, ctx) Checkpointing(&wg, ctx) Archiving(&wg, ctx) avro.DataStaging(&wg, ctx) + + wg.Add(1) + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) + go func() { + defer wg.Done() + <-sigs + runtimeEnv.SystemdNotifiy(false, "[METRICSTORE]> Shutting down ...") + shutdown() + }() + + if config.MetricStoreKeys.Nats != nil { + for _, natsConf := range config.MetricStoreKeys.Nats { + // TODO: When multiple nats configs share a URL, do a single connect. + wg.Add(1) + nc := natsConf + go func() { + // err := ReceiveNats(conf.Nats, decodeLine, runtime.NumCPU()-1, ctx) + err := ReceiveNats(nc, ms, 1, ctx) + if err != nil { + log.Fatal(err) + } + wg.Done() + }() + } + } } // Create a new, initialized instance of a MemoryStore. // Will panic if values in the metric configurations are invalid. -func InitMetrics(metrics map[string]MetricConfig) { +func InitMetrics(metrics map[string]config.MetricConfig) { singleton.Do(func() { offset := 0 for key, cfg := range metrics { if cfg.Frequency == 0 { - panic("invalid frequency") + panic("[METRICSTORE]> invalid frequency") } - metrics[key] = MetricConfig{ + metrics[key] = config.MetricConfig{ Frequency: cfg.Frequency, Aggregation: cfg.Aggregation, Offset: offset, @@ -153,30 +144,30 @@ func InitMetrics(metrics map[string]MetricConfig) { func GetMemoryStore() *MemoryStore { if msInstance == nil { - log.Fatalf("MemoryStore not initialized!") + log.Fatalf("[METRICSTORE]> MemoryStore not initialized!") } return msInstance } func Shutdown() { - log.Printf("Writing to '%s'...\n", Keys.Checkpoints.RootDir) + log.Printf("[METRICSTORE]> Writing to '%s'...\n", config.MetricStoreKeys.Checkpoints.RootDir) var files int var err error ms := GetMemoryStore() - if Keys.Checkpoints.FileFormat == "json" { - files, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix()) + if config.MetricStoreKeys.Checkpoints.FileFormat == "json" { + files, err = ms.ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix()) } else { - files, err = avro.GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, true) + files, err = avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, true) close(avro.LineProtocolMessages) } if err != nil { - log.Printf("Writing checkpoint failed: %s\n", err.Error()) + log.Printf("[METRICSTORE]> Writing checkpoint failed: %s\n", err.Error()) } - log.Printf("Done! (%d files written)\n", files) + log.Printf("[METRICSTORE]> Done! (%d files written)\n", files) // ms.PrintHeirarchy() } @@ -255,7 +246,7 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) { go func() { defer wg.Done() - d, err := time.ParseDuration(Keys.RetentionInMemory) + d, err := time.ParseDuration(config.MetricStoreKeys.RetentionInMemory) if err != nil { log.Fatal(err) } @@ -276,12 +267,12 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) { return case <-ticks: t := time.Now().Add(-d) - log.Printf("start freeing buffers (older than %s)...\n", t.Format(time.RFC3339)) + log.Printf("[METRICSTORE]> start freeing buffers (older than %s)...\n", t.Format(time.RFC3339)) freed, err := ms.Free(nil, t.Unix()) if err != nil { - log.Printf("freeing up buffers failed: %s\n", err.Error()) + log.Printf("[METRICSTORE]> freeing up buffers failed: %s\n", err.Error()) } else { - log.Printf("done: %d buffers freed\n", freed) + log.Printf("[METRICSTORE]> done: %d buffers freed\n", freed) } } } @@ -346,12 +337,12 @@ func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metric // the range asked for if no data was available. func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) { if from > to { - return nil, 0, 0, 0, errors.New("invalid time range") + return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range") } minfo, ok := m.Metrics[metric] if !ok { - return nil, 0, 0, 0, errors.New("unkown metric: " + metric) + return nil, 0, 0, 0, errors.New("[METRICSTORE]> unkown metric: " + metric) } n, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1) @@ -390,15 +381,15 @@ func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, reso if err != nil { return nil, 0, 0, 0, err } else if n == 0 { - return nil, 0, 0, 0, errors.New("metric or host not found") + return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found") } else if n > 1 { - if minfo.Aggregation == AvgAggregation { + if minfo.Aggregation == config.AvgAggregation { normalize := 1. / schema.Float(n) for i := 0; i < len(data); i++ { data[i] *= normalize } - } else if minfo.Aggregation != SumAggregation { - return nil, 0, 0, 0, errors.New("invalid aggregation") + } else if minfo.Aggregation != config.SumAggregation { + return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid aggregation") } } diff --git a/internal/memorystore/stats.go b/internal/memorystore/stats.go index 831e282..1066dcb 100644 --- a/internal/memorystore/stats.go +++ b/internal/memorystore/stats.go @@ -4,6 +4,7 @@ import ( "errors" "math" + "github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-lib/util" ) @@ -104,9 +105,9 @@ func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int6 return nil, 0, 0, ErrNoData } - if minfo.Aggregation == AvgAggregation { + if minfo.Aggregation == config.AvgAggregation { avg /= util.Float(n) - } else if n > 1 && minfo.Aggregation != SumAggregation { + } else if n > 1 && minfo.Aggregation != config.SumAggregation { return nil, 0, 0, errors.New("invalid aggregation") } diff --git a/internal/metricDataDispatcher/dataLoader.go b/internal/metricDataDispatcher/dataLoader.go index 2b73e11..4f8e3b5 100644 --- a/internal/metricDataDispatcher/dataLoader.go +++ b/internal/metricDataDispatcher/dataLoader.go @@ -91,14 +91,14 @@ func LoadData(job *schema.Job, // Pass the resolution from frontend here. for _, v := range jd { for _, v_ := range v { - timestep := 0 + timestep := int64(0) for i := 0; i < len(v_.Series); i += 1 { - v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution) + v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, int64(v_.Timestep), int64(resolution)) if err != nil { return err, 0, 0 } } - v_.Timestep = timestep + v_.Timestep = int(timestep) } } diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go index 36c0dd7..d8cef4d 100644 --- a/internal/metricdata/cc-metric-store.go +++ b/internal/metricdata/cc-metric-store.go @@ -5,23 +5,22 @@ package metricdata import ( - "bufio" - "bytes" "context" "encoding/json" "fmt" - "net/http" "sort" "strconv" "strings" "time" "github.com/ClusterCockpit/cc-backend/internal/graph/model" + "github.com/ClusterCockpit/cc-backend/internal/memorystore" "github.com/ClusterCockpit/cc-backend/pkg/archive" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" "github.com/ClusterCockpit/cc-lib/schema" ) +// Bloat Code type CCMetricStoreConfig struct { Kind string `json:"kind"` Url string `json:"url"` @@ -33,141 +32,16 @@ type CCMetricStoreConfig struct { Renamings map[string]string `json:"metricRenamings"` } +// Bloat Code type CCMetricStore struct { - here2there map[string]string - there2here map[string]string - client http.Client - jwt string - url string - queryEndpoint string -} - -type ApiQueryRequest struct { - Cluster string `json:"cluster"` - Queries []ApiQuery `json:"queries"` - ForAllNodes []string `json:"for-all-nodes"` - From int64 `json:"from"` - To int64 `json:"to"` - WithStats bool `json:"with-stats"` - WithData bool `json:"with-data"` -} - -type ApiQuery struct { - Type *string `json:"type,omitempty"` - SubType *string `json:"subtype,omitempty"` - Metric string `json:"metric"` - Hostname string `json:"host"` - Resolution int `json:"resolution"` - TypeIds []string `json:"type-ids,omitempty"` - SubTypeIds []string `json:"subtype-ids,omitempty"` - Aggregate bool `json:"aggreg"` -} - -type ApiQueryResponse struct { - Queries []ApiQuery `json:"queries,omitempty"` - Results [][]ApiMetricData `json:"results"` -} - -type ApiMetricData struct { - Error *string `json:"error"` - Data []schema.Float `json:"data"` - From int64 `json:"from"` - To int64 `json:"to"` - Resolution int `json:"resolution"` - Avg schema.Float `json:"avg"` - Min schema.Float `json:"min"` - Max schema.Float `json:"max"` } +// Bloat Code func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error { - var config CCMetricStoreConfig - if err := json.Unmarshal(rawConfig, &config); err != nil { - cclog.Warn("Error while unmarshaling raw json config") - return err - } - - ccms.url = config.Url - ccms.queryEndpoint = fmt.Sprintf("%s/api/query", config.Url) - ccms.jwt = config.Token - ccms.client = http.Client{ - Timeout: 10 * time.Second, - } - - if config.Renamings != nil { - ccms.here2there = config.Renamings - ccms.there2here = make(map[string]string, len(config.Renamings)) - for k, v := range ccms.here2there { - ccms.there2here[v] = k - } - } else { - ccms.here2there = make(map[string]string) - ccms.there2here = make(map[string]string) - } return nil } -func (ccms *CCMetricStore) toRemoteName(metric string) string { - if renamed, ok := ccms.here2there[metric]; ok { - return renamed - } - - return metric -} - -func (ccms *CCMetricStore) toLocalName(metric string) string { - if renamed, ok := ccms.there2here[metric]; ok { - return renamed - } - - return metric -} - -func (ccms *CCMetricStore) doRequest( - ctx context.Context, - body *ApiQueryRequest, -) (*ApiQueryResponse, error) { - buf := &bytes.Buffer{} - if err := json.NewEncoder(buf).Encode(body); err != nil { - cclog.Errorf("Error while encoding request body: %s", err.Error()) - return nil, err - } - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf) - if err != nil { - cclog.Errorf("Error while building request body: %s", err.Error()) - return nil, err - } - if ccms.jwt != "" { - req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt)) - } - - // versioning the cc-metric-store query API. - // v2 = data with resampling - // v1 = data without resampling - q := req.URL.Query() - q.Add("version", "v2") - req.URL.RawQuery = q.Encode() - - res, err := ccms.client.Do(req) - if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) - return nil, err - } - - if res.StatusCode != http.StatusOK { - return nil, fmt.Errorf("'%s': HTTP Status: %s", ccms.queryEndpoint, res.Status) - } - - var resBody ApiQueryResponse - if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil { - cclog.Errorf("Error while decoding result body: %s", err.Error()) - return nil, err - } - - return &resBody, nil -} - func (ccms *CCMetricStore) LoadData( job *schema.Job, metrics []string, @@ -175,13 +49,13 @@ func (ccms *CCMetricStore) LoadData( ctx context.Context, resolution int, ) (schema.JobData, error) { - queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution) + queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, int64(resolution)) if err != nil { cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error()) return nil, err } - req := ApiQueryRequest{ + req := memorystore.ApiQueryRequest{ Cluster: job.Cluster, From: job.StartTime, To: job.StartTime + int64(job.Duration), @@ -190,9 +64,9 @@ func (ccms *CCMetricStore) LoadData( WithData: true, } - resBody, err := ccms.doRequest(ctx, &req) + resBody, err := memorystore.FetchData(req) if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) + cclog.Errorf("Error while fetching data : %s", err.Error()) return nil, err } @@ -200,7 +74,7 @@ func (ccms *CCMetricStore) LoadData( jobData := make(schema.JobData) for i, row := range resBody.Results { query := req.Queries[i] - metric := ccms.toLocalName(query.Metric) + metric := query.Metric scope := assignedScope[i] mc := archive.GetMetricConfig(job.Cluster, metric) if _, ok := jobData[metric]; !ok { @@ -209,7 +83,7 @@ func (ccms *CCMetricStore) LoadData( res := mc.Timestep if len(row) > 0 { - res = row[0].Resolution + res = int(row[0].Resolution) } jobMetric, ok := jobData[metric][scope] @@ -282,9 +156,9 @@ func (ccms *CCMetricStore) buildQueries( job *schema.Job, metrics []string, scopes []schema.MetricScope, - resolution int, -) ([]ApiQuery, []schema.MetricScope, error) { - queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) + resolution int64, +) ([]memorystore.ApiQuery, []schema.MetricScope, error) { + queries := make([]memorystore.ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) assignedScope := []schema.MetricScope{} subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster) @@ -294,7 +168,6 @@ func (ccms *CCMetricStore) buildQueries( topology := subcluster.Topology for _, metric := range metrics { - remoteName := ccms.toRemoteName(metric) mc := archive.GetMetricConfig(job.Cluster, metric) if mc == nil { // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster) @@ -306,7 +179,7 @@ func (ccms *CCMetricStore) buildQueries( if len(mc.SubClusters) != 0 { isRemoved := false for _, scConfig := range mc.SubClusters { - if scConfig.Name == job.SubCluster && scConfig.Remove == true { + if scConfig.Name == job.SubCluster && scConfig.Remove { isRemoved = true break } @@ -347,8 +220,8 @@ func (ccms *CCMetricStore) buildQueries( continue } - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: false, Type: &acceleratorString, @@ -365,8 +238,8 @@ func (ccms *CCMetricStore) buildQueries( continue } - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: true, Type: &acceleratorString, @@ -379,8 +252,8 @@ func (ccms *CCMetricStore) buildQueries( // HWThread -> HWThead if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: false, Type: &hwthreadString, @@ -395,8 +268,8 @@ func (ccms *CCMetricStore) buildQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(hwthreads) for _, core := range cores { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: true, Type: &hwthreadString, @@ -412,8 +285,8 @@ func (ccms *CCMetricStore) buildQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) for _, socket := range sockets { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: true, Type: &hwthreadString, @@ -427,8 +300,8 @@ func (ccms *CCMetricStore) buildQueries( // HWThread -> Node if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: true, Type: &hwthreadString, @@ -442,8 +315,8 @@ func (ccms *CCMetricStore) buildQueries( // Core -> Core if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: false, Type: &coreString, @@ -458,8 +331,8 @@ func (ccms *CCMetricStore) buildQueries( if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromCores(hwthreads) for _, socket := range sockets { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: true, Type: &coreString, @@ -474,8 +347,8 @@ func (ccms *CCMetricStore) buildQueries( // Core -> Node if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { cores, _ := topology.GetCoresFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: true, Type: &coreString, @@ -489,8 +362,8 @@ func (ccms *CCMetricStore) buildQueries( // MemoryDomain -> MemoryDomain if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: false, Type: &memoryDomainString, @@ -504,8 +377,8 @@ func (ccms *CCMetricStore) buildQueries( // MemoryDoman -> Node if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: true, Type: &memoryDomainString, @@ -519,8 +392,8 @@ func (ccms *CCMetricStore) buildQueries( // Socket -> Socket if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: false, Type: &socketString, @@ -534,8 +407,8 @@ func (ccms *CCMetricStore) buildQueries( // Socket -> Node if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Aggregate: true, Type: &socketString, @@ -548,8 +421,8 @@ func (ccms *CCMetricStore) buildQueries( // Node -> Node if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: host.Hostname, Resolution: resolution, }) @@ -576,7 +449,7 @@ func (ccms *CCMetricStore) LoadStats( return nil, err } - req := ApiQueryRequest{ + req := memorystore.ApiQueryRequest{ Cluster: job.Cluster, From: job.StartTime, To: job.StartTime + int64(job.Duration), @@ -585,16 +458,16 @@ func (ccms *CCMetricStore) LoadStats( WithData: false, } - resBody, err := ccms.doRequest(ctx, &req) + resBody, err := memorystore.FetchData(req) if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) + cclog.Errorf("Error while fetching data : %s", err.Error()) return nil, err } stats := make(map[string]map[string]schema.MetricStatistics, len(metrics)) for i, res := range resBody.Results { query := req.Queries[i] - metric := ccms.toLocalName(query.Metric) + metric := query.Metric data := res[0] if data.Error != nil { cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) @@ -635,7 +508,7 @@ func (ccms *CCMetricStore) LoadScopedStats( return nil, err } - req := ApiQueryRequest{ + req := memorystore.ApiQueryRequest{ Cluster: job.Cluster, From: job.StartTime, To: job.StartTime + int64(job.Duration), @@ -644,9 +517,9 @@ func (ccms *CCMetricStore) LoadScopedStats( WithData: false, } - resBody, err := ccms.doRequest(ctx, &req) + resBody, err := memorystore.FetchData(req) if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) + cclog.Errorf("Error while fetching data : %s", err.Error()) return nil, err } @@ -655,7 +528,7 @@ func (ccms *CCMetricStore) LoadScopedStats( for i, row := range resBody.Results { query := req.Queries[i] - metric := ccms.toLocalName(query.Metric) + metric := query.Metric scope := assignedScope[i] if _, ok := scopedJobStats[metric]; !ok { @@ -721,7 +594,7 @@ func (ccms *CCMetricStore) LoadNodeData( from, to time.Time, ctx context.Context, ) (map[string]map[string][]*schema.JobMetric, error) { - req := ApiQueryRequest{ + req := memorystore.ApiQueryRequest{ Cluster: cluster, From: from.Unix(), To: to.Unix(), @@ -730,38 +603,36 @@ func (ccms *CCMetricStore) LoadNodeData( } if nodes == nil { - for _, metric := range metrics { - req.ForAllNodes = append(req.ForAllNodes, ccms.toRemoteName(metric)) - } + req.ForAllNodes = append(req.ForAllNodes, metrics...) } else { for _, node := range nodes { for _, metric := range metrics { - req.Queries = append(req.Queries, ApiQuery{ + req.Queries = append(req.Queries, memorystore.ApiQuery{ Hostname: node, - Metric: ccms.toRemoteName(metric), + Metric: metric, Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution }) } } } - resBody, err := ccms.doRequest(ctx, &req) + resBody, err := memorystore.FetchData(req) if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) + cclog.Errorf("Error while fetching data : %s", err.Error()) return nil, err } var errors []string data := make(map[string]map[string][]*schema.JobMetric) for i, res := range resBody.Results { - var query ApiQuery + var query memorystore.ApiQuery if resBody.Queries != nil { query = resBody.Queries[i] } else { query = req.Queries[i] } - metric := ccms.toLocalName(query.Metric) + metric := query.Metric qdata := res[0] if qdata.Error != nil { /* Build list for "partial errors", if any */ @@ -861,13 +732,13 @@ func (ccms *CCMetricStore) LoadNodeListData( // Note: Order of node data is not guaranteed after this point, but contents match page and filter criteria - queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution) + queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution)) if err != nil { cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error()) return nil, totalNodes, hasNextPage, err } - req := ApiQueryRequest{ + req := memorystore.ApiQueryRequest{ Cluster: cluster, Queries: queries, From: from.Unix(), @@ -876,29 +747,29 @@ func (ccms *CCMetricStore) LoadNodeListData( WithData: true, } - resBody, err := ccms.doRequest(ctx, &req) + resBody, err := memorystore.FetchData(req) if err != nil { - cclog.Errorf("Error while performing request: %s", err.Error()) + cclog.Errorf("Error while fetching data : %s", err.Error()) return nil, totalNodes, hasNextPage, err } var errors []string data := make(map[string]schema.JobData) for i, row := range resBody.Results { - var query ApiQuery + var query memorystore.ApiQuery if resBody.Queries != nil { query = resBody.Queries[i] } else { query = req.Queries[i] } // qdata := res[0] - metric := ccms.toLocalName(query.Metric) + metric := query.Metric scope := assignedScope[i] mc := archive.GetMetricConfig(cluster, metric) res := mc.Timestep if len(row) > 0 { - res = row[0].Resolution + res = int(row[0].Resolution) } // Init Nested Map Data Structures If Not Found @@ -971,9 +842,9 @@ func (ccms *CCMetricStore) buildNodeQueries( nodes []string, metrics []string, scopes []schema.MetricScope, - resolution int, -) ([]ApiQuery, []schema.MetricScope, error) { - queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes)) + resolution int64, +) ([]memorystore.ApiQuery, []schema.MetricScope, error) { + queries := make([]memorystore.ApiQuery, 0, len(metrics)*len(scopes)*len(nodes)) assignedScope := []schema.MetricScope{} // Get Topol before loop if subCluster given @@ -988,7 +859,7 @@ func (ccms *CCMetricStore) buildNodeQueries( } for _, metric := range metrics { - remoteName := ccms.toRemoteName(metric) + metric := metric mc := archive.GetMetricConfig(cluster, metric) if mc == nil { // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster) @@ -1000,7 +871,7 @@ func (ccms *CCMetricStore) buildNodeQueries( if mc.SubClusters != nil { isRemoved := false for _, scConfig := range mc.SubClusters { - if scConfig.Name == subCluster && scConfig.Remove == true { + if scConfig.Name == subCluster && scConfig.Remove { isRemoved = true break } @@ -1056,8 +927,8 @@ func (ccms *CCMetricStore) buildNodeQueries( continue } - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: false, Type: &acceleratorString, @@ -1074,8 +945,8 @@ func (ccms *CCMetricStore) buildNodeQueries( continue } - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: true, Type: &acceleratorString, @@ -1088,8 +959,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // HWThread -> HWThead if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: false, Type: &hwthreadString, @@ -1104,8 +975,8 @@ func (ccms *CCMetricStore) buildNodeQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(topology.Node) for _, core := range cores { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: true, Type: &hwthreadString, @@ -1121,8 +992,8 @@ func (ccms *CCMetricStore) buildNodeQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) for _, socket := range sockets { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: true, Type: &hwthreadString, @@ -1136,8 +1007,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // HWThread -> Node if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: true, Type: &hwthreadString, @@ -1151,8 +1022,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // Core -> Core if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: false, Type: &coreString, @@ -1167,8 +1038,8 @@ func (ccms *CCMetricStore) buildNodeQueries( if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromCores(topology.Node) for _, socket := range sockets { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: true, Type: &coreString, @@ -1183,8 +1054,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // Core -> Node if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { cores, _ := topology.GetCoresFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: true, Type: &coreString, @@ -1198,8 +1069,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // MemoryDomain -> MemoryDomain if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: false, Type: &memoryDomainString, @@ -1213,8 +1084,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // MemoryDoman -> Node if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: true, Type: &memoryDomainString, @@ -1228,8 +1099,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // Socket -> Socket if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: false, Type: &socketString, @@ -1243,8 +1114,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // Socket -> Node if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Aggregate: true, Type: &socketString, @@ -1257,8 +1128,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // Node -> Node if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { - queries = append(queries, ApiQuery{ - Metric: remoteName, + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, Hostname: hostname, Resolution: resolution, }) diff --git a/internal/metricdata/utils.go b/internal/metricdata/utils.go index 59e640e..2e0d423 100644 --- a/internal/metricdata/utils.go +++ b/internal/metricdata/utils.go @@ -74,9 +74,8 @@ func (tmdr *TestMetricDataRepository) LoadNodeListData( } func DeepCopy(jd_temp schema.JobData) schema.JobData { - var jd schema.JobData - jd = make(schema.JobData, len(jd_temp)) + jd := make(schema.JobData, len(jd_temp)) for k, v := range jd_temp { jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k])) for k_, v_ := range v { diff --git a/pkg/archive/clusterConfig.go b/pkg/archive/clusterConfig.go index 51b89b1..3317487 100644 --- a/pkg/archive/clusterConfig.go +++ b/pkg/archive/clusterConfig.go @@ -8,6 +8,8 @@ import ( "errors" "fmt" + "github.com/ClusterCockpit/cc-backend/internal/config" + "github.com/ClusterCockpit/cc-backend/internal/memorystore" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" "github.com/ClusterCockpit/cc-lib/schema" ) @@ -31,6 +33,8 @@ func initClusterConfig() error { return err } + memorystore.Clusters = append(memorystore.Clusters, cluster.Name) + if len(cluster.Name) == 0 || len(cluster.MetricConfig) == 0 || len(cluster.SubClusters) == 0 { @@ -122,6 +126,16 @@ func initClusterConfig() error { } ml.Availability = append(metricLookup[mc.Name].Availability, availability) metricLookup[mc.Name] = ml + + agg, err := config.AssignAggregationStratergy(mc.Aggregation) + if err != nil { + return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err) + } + + config.AddMetric(mc.Name, config.MetricConfig{ + Frequency: int64(mc.Timestep), + Aggregation: agg, + }) } Clusters = append(Clusters, cluster) From af43901ca3ddd54cdbb5377682fb4abb3bcf1bbc Mon Sep 17 00:00:00 2001 From: Aditya Ujeniya Date: Mon, 8 Sep 2025 22:54:13 +0200 Subject: [PATCH 27/40] Trial and Test MetricStore components --- .gitignore | 5 + api/schema.graphqls | 4 +- api/swagger.json | 30 +++-- api/swagger.yaml | 25 ++-- cmd/cc-backend/main.go | 6 +- configs/config-demo.json | 18 ++- configs/config.json | 18 ++- go.mod | 7 +- go.sum | 19 +++ internal/api/api_test.go | 4 +- internal/api/docs.go | 30 +++-- internal/api/rest.go | 1 - internal/avro/avroHelper.go | 2 +- internal/config/memorystore.go | 8 +- internal/graph/generated/generated.go | 97 +++++---------- internal/graph/model/models_gen.go | 2 +- internal/graph/schema.resolvers.go | 5 - internal/memorystore/checkpoint.go | 2 +- internal/memorystore/lineprotocol.go | 110 +++++++++--------- internal/memorystore/memorystore.go | 16 +-- internal/repository/job.go | 14 +-- internal/repository/jobCreate.go | 2 +- internal/repository/jobFind.go | 10 +- internal/repository/jobQuery.go | 6 +- internal/repository/stats.go | 2 +- internal/tagger/jobclasses/highload.json | 2 +- .../tagger/jobclasses/lowUtilization.json | 2 +- internal/tagger/jobclasses/lowload.json | 2 +- internal/taskManager/commitJobService.go | 4 +- internal/taskManager/taskManager.go | 5 - internal/taskManager/updateDurationService.go | 4 +- .../taskManager/updateFootprintService.go | 4 +- startDemo.sh | 37 +++++- test_ccms_write_api.sh.bak | 110 ++++++++++++++++++ 34 files changed, 394 insertions(+), 219 deletions(-) create mode 100755 test_ccms_write_api.sh.bak diff --git a/.gitignore b/.gitignore index 75cc004..963073d 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,11 @@ /var/*.db /var/*.txt +/var/checkpoints* + +migrateTimestamps.pl +test_ccms_write_api.sh + /web/frontend/public/build /web/frontend/node_modules diff --git a/api/schema.graphqls b/api/schema.graphqls index 794c630..070b5b7 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -37,7 +37,7 @@ type Job { numAcc: Int! energy: Float! SMT: Int! - exclusive: Int! + shared: String! partition: String! arrayJobId: Int! monitoringStatus: Int! @@ -419,7 +419,7 @@ input JobFilter { startTime: TimeRange state: [JobState!] metricStats: [MetricStatItem!] - exclusive: Int + shared: StringInput node: StringInput } diff --git a/api/swagger.json b/api/swagger.json index 87bf3ed..c60810a 100644 --- a/api/swagger.json +++ b/api/swagger.json @@ -1394,12 +1394,6 @@ "format": "float64" } }, - "exclusive": { - "type": "integer", - "maximum": 2, - "minimum": 0, - "example": 1 - }, "footprint": { "type": "object", "additionalProperties": { @@ -1416,12 +1410,18 @@ }, "jobState": { "enum": [ - "completed", - "failed", + "boot_fail", "cancelled", - "stopped", - "timeout", - "out_of_memory" + "completed", + "deadline", + "failed", + "node_fail", + "out-of-memory", + "pending", + "preempted", + "running", + "suspended", + "timeout" ], "allOf": [ { @@ -1477,6 +1477,14 @@ "$ref": "#/definitions/schema.Resource" } }, + "shared": { + "type": "string", + "enum": [ + "none", + "single_user", + "multi_user" + ] + }, "smt": { "type": "integer", "example": 4 diff --git a/api/swagger.yaml b/api/swagger.yaml index 06caa56..6a4adbd 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -207,11 +207,6 @@ definitions: format: float64 type: number type: object - exclusive: - example: 1 - maximum: 2 - minimum: 0 - type: integer footprint: additionalProperties: format: float64 @@ -226,12 +221,18 @@ definitions: allOf: - $ref: '#/definitions/schema.JobState' enum: - - completed - - failed + - boot_fail - cancelled - - stopped + - completed + - deadline + - failed + - node_fail + - out-of-memory + - pending + - preempted + - running + - suspended - timeout - - out_of_memory example: completed metaData: additionalProperties: @@ -269,6 +270,12 @@ definitions: items: $ref: '#/definitions/schema.Resource' type: array + shared: + enum: + - none + - single_user + - multi_user + type: string smt: example: 4 type: integer diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 9c7ad1f..0146118 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -251,13 +251,13 @@ func main() { var wg sync.WaitGroup //Metric Store starts after all flags have been processes - memorystore.Init(wg) + memorystore.Init(&wg) archiver.Start(repository.GetJobRepository()) // // Comment out - // taskManager.Start(ccconf.GetPackageConfig("cron"), - // ccconf.GetPackageConfig("archive")) + taskManager.Start(ccconf.GetPackageConfig("cron"), + ccconf.GetPackageConfig("archive")) serverInit() diff --git a/configs/config-demo.json b/configs/config-demo.json index a31d65d..3c0d858 100644 --- a/configs/config-demo.json +++ b/configs/config-demo.json @@ -4,11 +4,23 @@ "short-running-jobs-duration": 300, "resampling": { "trigger": 30, - "resolutions": [600, 300, 120, 60] + "resolutions": [ + 600, + 300, + 120, + 60 + ] }, - "apiAllowedIPs": ["*"], + "apiAllowedIPs": [ + "*" + ], "emission-constant": 317 }, + "cron": { + "commit-job-worker": "2m", + "duration-worker": "5m", + "footprint-worker": "10m" + }, "archive": { "kind": "file", "path": "./var/job-archive" @@ -73,4 +85,4 @@ }, "retention-in-memory": "48h" } -} +} \ No newline at end of file diff --git a/configs/config.json b/configs/config.json index ed7d546..505e446 100644 --- a/configs/config.json +++ b/configs/config.json @@ -6,13 +6,25 @@ "user": "clustercockpit", "group": "clustercockpit", "validate": false, - "apiAllowedIPs": ["*"], + "apiAllowedIPs": [ + "*" + ], "short-running-jobs-duration": 300, "resampling": { "trigger": 30, - "resolutions": [600, 300, 120, 60] + "resolutions": [ + 600, + 300, + 120, + 60 + ] } }, + "cron": { + "commit-job-worker": "2m", + "duration-worker": "5m", + "footprint-worker": "10m" + }, "archive": { "kind": "file", "path": "./var/job-archive" @@ -41,4 +53,4 @@ } } ] -} +} \ No newline at end of file diff --git a/go.mod b/go.mod index 5858cff..e0add97 100644 --- a/go.mod +++ b/go.mod @@ -19,9 +19,12 @@ require ( github.com/gorilla/handlers v1.5.2 github.com/gorilla/mux v1.8.1 github.com/gorilla/sessions v1.4.0 + github.com/influxdata/line-protocol/v2 v2.2.1 github.com/jmoiron/sqlx v1.4.0 github.com/joho/godotenv v1.5.1 + github.com/linkedin/goavro/v2 v2.14.0 github.com/mattn/go-sqlite3 v1.14.24 + github.com/nats-io/nats.go v1.44.0 github.com/prometheus/client_golang v1.23.0 github.com/prometheus/common v0.65.0 github.com/qustavo/sqlhooks/v2 v2.1.0 @@ -62,14 +65,16 @@ require ( github.com/josharian/intern v1.0.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.18.0 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect - github.com/linkedin/goavro/v2 v2.14.0 // indirect github.com/mailru/easyjson v0.9.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect + github.com/nats-io/nkeys v0.4.11 // indirect + github.com/nats-io/nuid v1.0.1 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/procfs v0.16.1 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 3c51770..792ec1c 100644 --- a/go.sum +++ b/go.sum @@ -38,6 +38,7 @@ github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc github.com/coreos/go-oidc/v3 v3.12.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0= github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -57,6 +58,10 @@ github.com/expr-lang/expr v1.17.5 h1:i1WrMvcdLF249nSNlpQZN1S6NXuW9WaOfF5tPi3aw3k github.com/expr-lang/expr v1.17.5/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= +github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= +github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk= +github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk= @@ -94,6 +99,8 @@ github.com/golang-migrate/migrate/v4 v4.18.2/go.mod h1:2CM6tJvn2kqPXwnXO/d3rAQYi github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -130,6 +137,11 @@ github.com/influxdata/influxdb-client-go/v2 v2.14.0 h1:AjbBfJuq+QoaXNcrova8smSjw github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI= github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU= github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo= +github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98= +github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig= +github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo= +github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY= +github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY= github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE= github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM= github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= @@ -158,8 +170,11 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw= @@ -198,6 +213,7 @@ github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0= github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -358,15 +374,18 @@ golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxb golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= diff --git a/internal/api/api_test.go b/internal/api/api_test.go index 9f47a1f..1c81fc9 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -241,7 +241,7 @@ func TestRestApi(t *testing.T) { "numNodes": 1, "numHwthreads": 8, "numAcc": 0, - "exclusive": 1, + "shared": "none", "monitoringStatus": 1, "smt": 1, "resources": [ @@ -396,7 +396,7 @@ func TestRestApi(t *testing.T) { "partition": "default", "walltime": 3600, "numNodes": 1, - "exclusive": 1, + "shared": "none", "monitoringStatus": 1, "smt": 1, "resources": [ diff --git a/internal/api/docs.go b/internal/api/docs.go index 50cab92..c10745c 100644 --- a/internal/api/docs.go +++ b/internal/api/docs.go @@ -1401,12 +1401,6 @@ const docTemplate = `{ "format": "float64" } }, - "exclusive": { - "type": "integer", - "maximum": 2, - "minimum": 0, - "example": 1 - }, "footprint": { "type": "object", "additionalProperties": { @@ -1423,12 +1417,18 @@ const docTemplate = `{ }, "jobState": { "enum": [ - "completed", - "failed", + "boot_fail", "cancelled", - "stopped", - "timeout", - "out_of_memory" + "completed", + "deadline", + "failed", + "node_fail", + "out-of-memory", + "pending", + "preempted", + "running", + "suspended", + "timeout" ], "allOf": [ { @@ -1484,6 +1484,14 @@ const docTemplate = `{ "$ref": "#/definitions/schema.Resource" } }, + "shared": { + "type": "string", + "enum": [ + "none", + "single_user", + "multi_user" + ] + }, "smt": { "type": "integer", "example": 4 diff --git a/internal/api/rest.go b/internal/api/rest.go index 8cefe48..fcadc90 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -97,7 +97,6 @@ func (api *RestApi) MountUserApiRoutes(r *mux.Router) { } func (api *RestApi) MountMetricStoreApiRoutes(r *mux.Router) { - r.StrictSlash(true) // REST API Uses TokenAuth r.HandleFunc("/api/free", memorystore.HandleFree).Methods(http.MethodPost) r.HandleFunc("/api/write", memorystore.HandleWrite).Methods(http.MethodPost) diff --git a/internal/avro/avroHelper.go b/internal/avro/avroHelper.go index ea733cd..21a5617 100644 --- a/internal/avro/avroHelper.go +++ b/internal/avro/avroHelper.go @@ -29,7 +29,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) { return case val := <-LineProtocolMessages: //Fetch the frequency of the metric from the global configuration - freq, err := config.MetricStoreKeys.GetMetricFrequency(val.MetricName) + freq, err := config.GetMetricFrequency(val.MetricName) if err != nil { fmt.Printf("Error fetching metric frequency: %s\n", err) continue diff --git a/internal/config/memorystore.go b/internal/config/memorystore.go index b9273b4..c277045 100644 --- a/internal/config/memorystore.go +++ b/internal/config/memorystore.go @@ -97,10 +97,10 @@ func InitMetricStore(msConfig json.RawMessage) { } } -func (c *MetricStoreConfig) GetMetricFrequency(metricName string) (int64, error) { - // if metric, ok := c.Metrics[metricName]; ok { - // return metric.Frequency, nil - // } +func GetMetricFrequency(metricName string) (int64, error) { + if metric, ok := Metrics[metricName]; ok { + return metric.Frequency, nil + } return 0, fmt.Errorf("[METRICSTORE]> metric %s not found", metricName) } diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index 3a85858..eed946d 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -118,7 +118,6 @@ type ComplexityRoot struct { Duration func(childComplexity int) int Energy func(childComplexity int) int EnergyFootprint func(childComplexity int) int - Exclusive func(childComplexity int) int Footprint func(childComplexity int) int ID func(childComplexity int) int JobID func(childComplexity int) int @@ -131,6 +130,7 @@ type ComplexityRoot struct { Project func(childComplexity int) int Resources func(childComplexity int) int SMT func(childComplexity int) int + Shared func(childComplexity int) int StartTime func(childComplexity int) int State func(childComplexity int) int SubCluster func(childComplexity int) int @@ -425,8 +425,6 @@ type ClusterResolver interface { type JobResolver interface { StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error) - Exclusive(ctx context.Context, obj *schema.Job) (int, error) - Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) @@ -726,13 +724,6 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin return e.complexity.Job.EnergyFootprint(childComplexity), true - case "Job.exclusive": - if e.complexity.Job.Exclusive == nil { - break - } - - return e.complexity.Job.Exclusive(childComplexity), true - case "Job.footprint": if e.complexity.Job.Footprint == nil { break @@ -817,6 +808,13 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin return e.complexity.Job.SMT(childComplexity), true + case "Job.shared": + if e.complexity.Job.Shared == nil { + break + } + + return e.complexity.Job.Shared(childComplexity), true + case "Job.startTime": if e.complexity.Job.StartTime == nil { break @@ -2361,7 +2359,7 @@ type Job { numAcc: Int! energy: Float! SMT: Int! - exclusive: Int! + shared: String! partition: String! arrayJobId: Int! monitoringStatus: Int! @@ -2743,7 +2741,7 @@ input JobFilter { startTime: TimeRange state: [JobState!] metricStats: [MetricStatItem!] - exclusive: Int + shared: StringInput node: StringInput } @@ -5217,8 +5215,8 @@ func (ec *executionContext) fieldContext_Job_SMT(_ context.Context, field graphq return fc, nil } -func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_Job_exclusive(ctx, field) +func (ec *executionContext) _Job_shared(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Job_shared(ctx, field) if err != nil { return graphql.Null } @@ -5231,7 +5229,7 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { ctx = rctx // use context from middleware stack in children - return ec.resolvers.Job().Exclusive(rctx, obj) + return obj.Shared, nil }) if err != nil { ec.Error(ctx, err) @@ -5243,19 +5241,19 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co } return graphql.Null } - res := resTmp.(int) + res := resTmp.(string) fc.Result = res - return ec.marshalNInt2int(ctx, field.Selections, res) + return ec.marshalNString2string(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_Job_exclusive(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_Job_shared(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "Job", Field: field, - IsMethod: true, - IsResolver: true, + IsMethod: false, + IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type Int does not have child fields") + return nil, errors.New("field of type String does not have child fields") }, } return fc, nil @@ -6404,8 +6402,8 @@ func (ec *executionContext) fieldContext_JobResultList_items(_ context.Context, return ec.fieldContext_Job_energy(ctx, field) case "SMT": return ec.fieldContext_Job_SMT(ctx, field) - case "exclusive": - return ec.fieldContext_Job_exclusive(ctx, field) + case "shared": + return ec.fieldContext_Job_shared(ctx, field) case "partition": return ec.fieldContext_Job_partition(ctx, field) case "arrayJobId": @@ -11042,8 +11040,8 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr return ec.fieldContext_Job_energy(ctx, field) case "SMT": return ec.fieldContext_Job_SMT(ctx, field) - case "exclusive": - return ec.fieldContext_Job_exclusive(ctx, field) + case "shared": + return ec.fieldContext_Job_shared(ctx, field) case "partition": return ec.fieldContext_Job_partition(ctx, field) case "arrayJobId": @@ -16357,7 +16355,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any asMap[k] = v } - fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "exclusive", "node"} + fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "shared", "node"} for _, k := range fieldsInOrder { v, ok := asMap[k] if !ok { @@ -16490,13 +16488,13 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any return it, err } it.MetricStats = data - case "exclusive": - ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("exclusive")) - data, err := ec.unmarshalOInt2ᚖint(ctx, v) + case "shared": + ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("shared")) + data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v) if err != nil { return it, err } - it.Exclusive = data + it.Shared = data case "node": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("node")) data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v) @@ -17397,42 +17395,11 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj if out.Values[i] == graphql.Null { atomic.AddUint32(&out.Invalids, 1) } - case "exclusive": - field := field - - innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { - defer func() { - if r := recover(); r != nil { - ec.Error(ctx, ec.Recover(ctx, r)) - } - }() - res = ec._Job_exclusive(ctx, field, obj) - if res == graphql.Null { - atomic.AddUint32(&fs.Invalids, 1) - } - return res + case "shared": + out.Values[i] = ec._Job_shared(ctx, field, obj) + if out.Values[i] == graphql.Null { + atomic.AddUint32(&out.Invalids, 1) } - - if field.Deferrable != nil { - dfs, ok := deferred[field.Deferrable.Label] - di := 0 - if ok { - dfs.AddField(field) - di = len(dfs.Values) - 1 - } else { - dfs = graphql.NewFieldSet([]graphql.CollectedField{field}) - deferred[field.Deferrable.Label] = dfs - } - dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler { - return innerFunc(ctx, dfs) - }) - - // don't run the out.Concurrently() call below - out.Values[i] = graphql.Null - continue - } - - out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) case "partition": out.Values[i] = ec._Job_partition(ctx, field, obj) if out.Values[i] == graphql.Null { diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index e9abf0d..accc344 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -69,7 +69,7 @@ type JobFilter struct { StartTime *config.TimeRange `json:"startTime,omitempty"` State []schema.JobState `json:"state,omitempty"` MetricStats []*MetricStatItem `json:"metricStats,omitempty"` - Exclusive *int `json:"exclusive,omitempty"` + Shared *StringInput `json:"shared,omitempty"` Node *StringInput `json:"node,omitempty"` } diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 8868497..315f1a3 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -35,11 +35,6 @@ func (r *jobResolver) StartTime(ctx context.Context, obj *schema.Job) (*time.Tim return ×tamp, nil } -// Exclusive is the resolver for the exclusive field. -func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) { - panic(fmt.Errorf("not implemented: Exclusive - exclusive")) -} - // Tags is the resolver for the tags field. func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) { return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID) diff --git a/internal/memorystore/checkpoint.go b/internal/memorystore/checkpoint.go index 76a5472..adee443 100644 --- a/internal/memorystore/checkpoint.go +++ b/internal/memorystore/checkpoint.go @@ -380,7 +380,7 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) { if err != nil { log.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err) } - fmt.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir) + log.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir) } // Config read (replace with your actual config read) diff --git a/internal/memorystore/lineprotocol.go b/internal/memorystore/lineprotocol.go index e12b9e2..495197d 100644 --- a/internal/memorystore/lineprotocol.go +++ b/internal/memorystore/lineprotocol.go @@ -2,10 +2,8 @@ package memorystore import ( "context" - "errors" "fmt" "log" - "net" "sync" "time" @@ -17,67 +15,67 @@ import ( ) // Each connection is handled in it's own goroutine. This is a blocking function. -func ReceiveRaw(ctx context.Context, - listener net.Listener, - handleLine func(*lineprotocol.Decoder, string) error, -) error { - var wg sync.WaitGroup +// func ReceiveRaw(ctx context.Context, +// listener net.Listener, +// handleLine func(*lineprotocol.Decoder, string) error, +// ) error { +// var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - <-ctx.Done() - if err := listener.Close(); err != nil { - log.Printf("listener.Close(): %s", err.Error()) - } - }() +// wg.Add(1) +// go func() { +// defer wg.Done() +// <-ctx.Done() +// if err := listener.Close(); err != nil { +// log.Printf("listener.Close(): %s", err.Error()) +// } +// }() - for { - conn, err := listener.Accept() - if err != nil { - if errors.Is(err, net.ErrClosed) { - break - } +// for { +// conn, err := listener.Accept() +// if err != nil { +// if errors.Is(err, net.ErrClosed) { +// break +// } - log.Printf("listener.Accept(): %s", err.Error()) - } +// log.Printf("listener.Accept(): %s", err.Error()) +// } - wg.Add(2) - go func() { - defer wg.Done() - defer conn.Close() +// wg.Add(2) +// go func() { +// defer wg.Done() +// defer conn.Close() - dec := lineprotocol.NewDecoder(conn) - connctx, cancel := context.WithCancel(context.Background()) - defer cancel() - go func() { - defer wg.Done() - select { - case <-connctx.Done(): - conn.Close() - case <-ctx.Done(): - conn.Close() - } - }() +// dec := lineprotocol.NewDecoder(conn) +// connctx, cancel := context.WithCancel(context.Background()) +// defer cancel() +// go func() { +// defer wg.Done() +// select { +// case <-connctx.Done(): +// conn.Close() +// case <-ctx.Done(): +// conn.Close() +// } +// }() - if err := handleLine(dec, "default"); err != nil { - if errors.Is(err, net.ErrClosed) { - return - } +// if err := handleLine(dec, "default"); err != nil { +// if errors.Is(err, net.ErrClosed) { +// return +// } - log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error()) - errmsg := make([]byte, 128) - errmsg = append(errmsg, `error: `...) - errmsg = append(errmsg, err.Error()...) - errmsg = append(errmsg, '\n') - conn.Write(errmsg) - } - }() - } +// log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error()) +// errmsg := make([]byte, 128) +// errmsg = append(errmsg, `error: `...) +// errmsg = append(errmsg, err.Error()...) +// errmsg = append(errmsg, '\n') +// conn.Write(errmsg) +// } +// }() +// } - wg.Wait() - return nil -} +// wg.Wait() +// return nil +// } // Connect to a nats server and subscribe to "updates". This is a blocking // function. handleLine will be called for each line recieved via nats. @@ -113,7 +111,7 @@ func ReceiveNats(conf *(config.NatsConfig), if workers > 1 { wg.Add(workers) - for i := 0; i < workers; i++ { + for range workers { go func() { for m := range msgs { dec := lineprotocol.NewDecoderWithBytes(m.Data) diff --git a/internal/memorystore/memorystore.go b/internal/memorystore/memorystore.go index efa4065..4a631c2 100644 --- a/internal/memorystore/memorystore.go +++ b/internal/memorystore/memorystore.go @@ -47,7 +47,7 @@ type MemoryStore struct { root Level } -func Init(wg sync.WaitGroup) { +func Init(wg *sync.WaitGroup) { startupTime := time.Now() //Pass the config.MetricStoreKeys @@ -82,10 +82,10 @@ func Init(wg sync.WaitGroup) { wg.Add(4) - Retention(&wg, ctx) - Checkpointing(&wg, ctx) - Archiving(&wg, ctx) - avro.DataStaging(&wg, ctx) + Retention(wg, ctx) + Checkpointing(wg, ctx) + Archiving(wg, ctx) + avro.DataStaging(wg, ctx) wg.Add(1) sigs := make(chan os.Signal, 1) @@ -337,12 +337,12 @@ func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metric // the range asked for if no data was available. func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) { if from > to { - return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range") + return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range\n") } minfo, ok := m.Metrics[metric] if !ok { - return nil, 0, 0, 0, errors.New("[METRICSTORE]> unkown metric: " + metric) + return nil, 0, 0, 0, errors.New("[METRICSTORE]> unkown metric: \n" + metric) } n, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1) @@ -381,7 +381,7 @@ func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, reso if err != nil { return nil, 0, 0, 0, err } else if n == 0 { - return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found") + return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found\n") } else if n > 1 { if minfo.Aggregation == config.AvgAggregation { normalize := 1. / schema.Float(n) diff --git a/internal/repository/job.go b/internal/repository/job.go index dd40ebc..68778e1 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -52,18 +52,18 @@ func GetJobRepository() *JobRepository { } var jobColumns []string = []string{ - "job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", + "job.id", "job.job_id", "job.hpc_user", "job.project", "job.hpc_cluster", "job.subcluster", "job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes", - "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", + "job.num_hwthreads", "job.num_acc", "job.shared", "job.monitoring_status", "job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy", } var jobCacheColumns []string = []string{ - "job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster", + "job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.hpc_cluster", "job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition", "job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads", - "job_cache.num_acc", "job_cache.exclusive", "job_cache.monitoring_status", "job_cache.smt", + "job_cache.num_acc", "job_cache.shared", "job_cache.monitoring_status", "job_cache.smt", "job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources", "job_cache.footprint", "job_cache.energy", } @@ -390,7 +390,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) { start := time.Now() partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) { parts := []string{} - if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil { + if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.hpc_cluster = ?;`, cluster); err != nil { return nil, 0, 1000 } @@ -410,7 +410,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in subclusters := make(map[string]map[string]int) rows, err := sq.Select("resources", "subcluster").From("job"). Where("job.job_state = 'running'"). - Where("job.cluster = ?", cluster). + Where("job.hpc_cluster = ?", cluster). RunWith(r.stmtCache).Query() if err != nil { cclog.Error("Error while running query") @@ -505,7 +505,7 @@ func (r *JobRepository) FindJobIdsByTag(tagId int64) ([]int64, error) { // FIXME: Reconsider filtering short jobs with harcoded threshold func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) { query := sq.Select(jobColumns...).From("job"). - Where(fmt.Sprintf("job.cluster = '%s'", cluster)). + Where(fmt.Sprintf("job.hpc_cluster = '%s'", cluster)). Where("job.job_state = 'running'"). Where("job.duration > 600") diff --git a/internal/repository/jobCreate.go b/internal/repository/jobCreate.go index 666313f..f43be58 100644 --- a/internal/repository/jobCreate.go +++ b/internal/repository/jobCreate.go @@ -70,7 +70,7 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) { } _, err = r.DB.Exec( - "INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache") + "INSERT INTO job (job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache") if err != nil { cclog.Warnf("Error while Job sync: %v", err) return nil, err diff --git a/internal/repository/jobFind.go b/internal/repository/jobFind.go index 39519d5..3abce8c 100644 --- a/internal/repository/jobFind.go +++ b/internal/repository/jobFind.go @@ -31,7 +31,7 @@ func (r *JobRepository) Find( Where("job.job_id = ?", *jobId) if cluster != nil { - q = q.Where("job.cluster = ?", *cluster) + q = q.Where("job.hpc_cluster = ?", *cluster) } if startTime != nil { q = q.Where("job.start_time = ?", *startTime) @@ -52,7 +52,7 @@ func (r *JobRepository) FindCached( Where("job_cache.job_id = ?", *jobId) if cluster != nil { - q = q.Where("job_cache.cluster = ?", *cluster) + q = q.Where("job_cache.hpc_cluster = ?", *cluster) } if startTime != nil { q = q.Where("job_cache.start_time = ?", *startTime) @@ -78,7 +78,7 @@ func (r *JobRepository) FindAll( Where("job.job_id = ?", *jobId) if cluster != nil { - q = q.Where("job.cluster = ?", *cluster) + q = q.Where("job.hpc_cluster = ?", *cluster) } if startTime != nil { q = q.Where("job.start_time = ?", *startTime) @@ -183,7 +183,7 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime q := sq.Select(jobColumns...). From("job"). Where("job.job_id = ?", jobId). - Where("job.cluster = ?", cluster). + Where("job.hpc_cluster = ?", cluster). Where("job.start_time = ?", startTime) q, qerr := SecurityCheck(ctx, q) @@ -203,7 +203,7 @@ func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cl From("job"). Where("job.job_id = ?", jobId). Where("job.hpc_user = ?", user). - Where("job.cluster = ?", cluster). + Where("job.hpc_cluster = ?", cluster). Where("job.start_time = ?", startTime) _, err := scanJob(q.RunWith(r.stmtCache).QueryRow()) diff --git a/internal/repository/jobQuery.go b/internal/repository/jobQuery.go index fdcc904..19cdd9a 100644 --- a/internal/repository/jobQuery.go +++ b/internal/repository/jobQuery.go @@ -168,7 +168,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select query = buildMetaJsonCondition("jobName", filter.JobName, query) } if filter.Cluster != nil { - query = buildStringCondition("job.cluster", filter.Cluster, query) + query = buildStringCondition("job.hpc_cluster", filter.Cluster, query) } if filter.Partition != nil { query = buildStringCondition("job.cluster_partition", filter.Partition, query) @@ -183,8 +183,8 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs. query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor) } - if filter.Exclusive != nil { - query = query.Where("job.exclusive = ?", *filter.Exclusive) + if filter.Shared != nil { + query = query.Where("job.shared = ?", *filter.Shared) } if filter.State != nil { states := make([]string, len(filter.State)) diff --git a/internal/repository/stats.go b/internal/repository/stats.go index 7beb674..25c862f 100644 --- a/internal/repository/stats.go +++ b/internal/repository/stats.go @@ -23,7 +23,7 @@ import ( var groupBy2column = map[model.Aggregate]string{ model.AggregateUser: "job.hpc_user", model.AggregateProject: "job.project", - model.AggregateCluster: "job.cluster", + model.AggregateCluster: "job.hpc_cluster", } var sortBy2column = map[model.SortByAggregate]string{ diff --git a/internal/tagger/jobclasses/highload.json b/internal/tagger/jobclasses/highload.json index 0d16b45..9667011 100644 --- a/internal/tagger/jobclasses/highload.json +++ b/internal/tagger/jobclasses/highload.json @@ -8,7 +8,7 @@ ], "metrics": ["cpu_load"], "requirements": [ - "job.exclusive == 1", + "job.shared == \"none\"", "job.duration > job_min_duration_seconds" ], "variables": [ diff --git a/internal/tagger/jobclasses/lowUtilization.json b/internal/tagger/jobclasses/lowUtilization.json index 9613b06..e84b81d 100644 --- a/internal/tagger/jobclasses/lowUtilization.json +++ b/internal/tagger/jobclasses/lowUtilization.json @@ -4,7 +4,7 @@ "parameters": ["job_min_duration_seconds"], "metrics": ["flops_any", "mem_bw"], "requirements": [ - "job.exclusive == 1", + "job.shared == \"none\"", "job.duration > job_min_duration_seconds" ], "variables": [ diff --git a/internal/tagger/jobclasses/lowload.json b/internal/tagger/jobclasses/lowload.json index 2212bd1..f952da5 100644 --- a/internal/tagger/jobclasses/lowload.json +++ b/internal/tagger/jobclasses/lowload.json @@ -8,7 +8,7 @@ ], "metrics": ["cpu_load"], "requirements": [ - "job.exclusive == 1", + "job.shared == \"none\"", "job.duration > job_min_duration_seconds" ], "variables": [ diff --git a/internal/taskManager/commitJobService.go b/internal/taskManager/commitJobService.go index e7c169a..88c2708 100644 --- a/internal/taskManager/commitJobService.go +++ b/internal/taskManager/commitJobService.go @@ -26,9 +26,9 @@ func RegisterCommitJobService() { gocron.NewTask( func() { start := time.Now() - cclog.Printf("Jobcache sync started at %s", start.Format(time.RFC3339)) + cclog.Printf("Jobcache sync started at %s\n", start.Format(time.RFC3339)) jobs, _ := jobRepo.SyncJobs() repository.CallJobStartHooks(jobs) - cclog.Printf("Jobcache sync and job callbacks are done and took %s", time.Since(start)) + cclog.Printf("Jobcache sync and job callbacks are done and took %s\n", time.Since(start)) })) } diff --git a/internal/taskManager/taskManager.go b/internal/taskManager/taskManager.go index df6c4d0..35d6ea5 100644 --- a/internal/taskManager/taskManager.go +++ b/internal/taskManager/taskManager.go @@ -7,7 +7,6 @@ package taskManager import ( "bytes" "encoding/json" - "fmt" "time" "github.com/ClusterCockpit/cc-backend/internal/auth" @@ -66,10 +65,6 @@ func Start(cronCfg, archiveConfig json.RawMessage) { RegisterStopJobsExceedTime() } - fmt.Printf("Keys : %#v\n", Keys) - fmt.Printf("cronCfg : %#v\n", cronCfg) - fmt.Printf("archiveConfig : %#v\n", archiveConfig) - dec := json.NewDecoder(bytes.NewReader(cronCfg)) dec.DisallowUnknownFields() if err := dec.Decode(&Keys); err != nil { diff --git a/internal/taskManager/updateDurationService.go b/internal/taskManager/updateDurationService.go index d650afb..53882f0 100644 --- a/internal/taskManager/updateDurationService.go +++ b/internal/taskManager/updateDurationService.go @@ -25,8 +25,8 @@ func RegisterUpdateDurationWorker() { gocron.NewTask( func() { start := time.Now() - cclog.Printf("Update duration started at %s", start.Format(time.RFC3339)) + cclog.Printf("Update duration started at %s\n", start.Format(time.RFC3339)) jobRepo.UpdateDuration() - cclog.Printf("Update duration is done and took %s", time.Since(start)) + cclog.Printf("Update duration is done and took %s\n", time.Since(start)) })) } diff --git a/internal/taskManager/updateFootprintService.go b/internal/taskManager/updateFootprintService.go index 4025849..2ce9901 100644 --- a/internal/taskManager/updateFootprintService.go +++ b/internal/taskManager/updateFootprintService.go @@ -134,8 +134,8 @@ func RegisterFootprintWorker() { } jobRepo.TransactionEnd(t) } - cclog.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster)) + cclog.Debugf("Finish Cluster %s, took %s\n", cluster.Name, time.Since(s_cluster)) } - cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s)) + cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s\n", c, cl, ce, time.Since(s)) })) } diff --git a/startDemo.sh b/startDemo.sh index faf6d35..b027bf5 100755 --- a/startDemo.sh +++ b/startDemo.sh @@ -12,6 +12,41 @@ else cp ./configs/env-template.txt .env cp ./configs/config-demo.json config.json + # mkdir -p ./var/checkpoints + # cp -rf ~/cc-metric-store/var/checkpoints ~/cc-backend/var + ./cc-backend -migrate-db - ./cc-backend -server -dev -init-db -add-user demo:admin:demo + ./cc-backend -dev -init-db -add-user demo:admin,api:demo + + # --- begin: generate JWT for demo and update test_ccms_write_api.sh --- + CC_BIN="./cc-backend" + TEST_FILE="./test_ccms_write_api.sh" + BACKUP_FILE="${TEST_FILE}.bak" + + if [ -x "$CC_BIN" ]; then + echo "Generating JWT for user 'demo'..." + output="$($CC_BIN -jwt demo 2>&1 || true)" + token="$(printf '%s\n' "$output" | grep -oE '[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+' | head -n1 || true)" + + if [ -z "$token" ]; then + echo "Warning: could not extract JWT from output:" >&2 + printf '%s\n' "$output" >&2 + else + if [ -f "$TEST_FILE" ]; then + cp -a "$TEST_FILE" "$BACKUP_FILE" + # replace first line with JWT="..." + sed -i "1s#.*#JWT=\"$token\"#" "$TEST_FILE" + echo "Updated JWT in $TEST_FILE (backup at $BACKUP_FILE)" + else + echo "Warning: $TEST_FILE not found; JWT not written." + fi + fi + else + echo "Warning: $CC_BIN not found or not executable; skipping JWT generation." + fi + # --- end: generate JWT for demo and update test_ccms_write_api.sh --- + + + ./cc-backend -server -dev + fi diff --git a/test_ccms_write_api.sh.bak b/test_ccms_write_api.sh.bak new file mode 100755 index 0000000..f76322f --- /dev/null +++ b/test_ccms_write_api.sh.bak @@ -0,0 +1,110 @@ +JWT="eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NjQ1NjMzOTUsImlhdCI6MTc1NzM2MzM5NSwicm9sZXMiOlsiYWRtaW4iLCJhcGkiXSwic3ViIjoiZGVtbyJ9.uhtEbS-ty4xNc8GWTKjyh1b06j6b3vtEw7lzQy0Eht5LtISZwRfyRBfdKjbm_t25xGrNH9sxINq4qiYKBjAaDQ" + +# curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" -d $'cpu_load,cluster=alex,hostname=a042,type=hwthread,type-id=0 value=35.0 1725827464642231296' + +rm sample_fritz.txt +rm sample_alex.txt + +while [ true ]; do + echo "Alex Metrics for hwthread types and type-ids" + timestamp="$(date '+%s')" + echo "Timestamp : "+$timestamp + for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do + for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do + for id in {0..127}; do + echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt + done + done + done + + curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt + + echo "Fritz Metrics for hwthread types and type-ids" + for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do + for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do + for id in {0..71}; do + echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt + done + done + done + + curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt + + rm sample_fritz.txt + rm sample_alex.txt + + echo "Alex Metrics for accelerator types and type-ids" + for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do + for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do + for id in 00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0; do + echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt + done + done + done + + curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt + + rm sample_alex.txt + + echo "Alex Metrics for memoryDomain types and type-ids" + for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do + for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do + for id in {0..7}; do + echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt + done + done + done + + curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt + + rm sample_alex.txt + + echo "Alex Metrics for socket types and type-ids" + for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do + for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do + for id in {0..1}; do + echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt + done + done + done + + curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt + + echo "Fritz Metrics for socket types and type-ids" + for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do + for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do + for id in {0..1}; do + echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt + done + done + done + + curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt + + rm sample_fritz.txt + rm sample_alex.txt + + echo "Alex Metrics for nodes" + for metric in cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do + for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do + echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt + done + done + + curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt + + echo "Fritz Metrics for nodes" + for metric in cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do + for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do + echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt + done + done + + curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt + + rm sample_fritz.txt + rm sample_alex.txt + + sleep 1m +done +# curl -X 'POST' 'http://localhost:8081/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" -d $'cpu_load,cluster=alex,hostname=a042,type=hwthread,type-id=0 value=35.0 1725827464642231296' \ No newline at end of file From 39f21763e4ca13de09d447475de5c16caf3f3b6e Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 9 Sep 2025 11:30:20 +0200 Subject: [PATCH 28/40] Revert test database --- internal/repository/testdata/job.db | Bin 118784 -> 118784 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/internal/repository/testdata/job.db b/internal/repository/testdata/job.db index e9e20cebc366222e58e1ba375f78b4b57a3dd444..43ec9d3c7f36c7ea505a96cc1208c4ce7a148eed 100644 GIT binary patch delta 919 zcmZ`%O=uHA7@f(^?q-usU)Cf>X+z?no{A(|D|+%IUc|E(LyV~g8%?Cp(u0Wy!GfU8 zA_3_k1i`zj_SjxZ3PprakyHgSwHGmg+C!m&&h9pwfaxx?`@Q$QZ+_lkxh5D#*tG>vLl=4E-_dh5G^Y4SYU&sF9gxkZXC4NHi6CbgRS9ppU zld>OdEyet_Ei*#ooeB@5ONnFjl3Wan;ZTH5-p@|YCgwlI)&1x>ADX@fnn=?g`JpurtjgWkA$TB2wZv^m~_-cBC% zSO?t2E;<%BA}YK{RV4nE6y{j)8~o*p@9;}h;7K2qT(+iG@*7&bgN3SAtHy$TCEIA? zJyu}w!bue82f4!MC=B35&R^^Dst}=$0sX+`6(UMEVXWByt+xUtN ztn&@laQ%u?&R4&90Netj=^w#8sT+vhfLTFnQx@!?x#N-rrlecViethrY@&{Lc!@G! z;{ht`NqV+YPSW!|quTh4HM~I;Pq=0l#Q+OO?FLHU%jKss)0vyn08Sjhu_G)LwVK8$ s75=AO8dq8E;;5V*xA4|W|12N(at};0$j1mDyw8c*P(w?x75WYT01oNs3;+NC delta 2449 zcmeHJU1%d!6rMXXnVC$R+{Bp1!Is?qY?DB`Hc4xtyP(||+-hmDrlJp;y-jADtdpOe zOxj&RwyTKiJ`~qMim*b_Hy^iZUW8R}Uls%@E3)8&KlmoA)hAh{-g_sFiLKzPumfl2 z-gCb5Gv|DBll3imeM^4xLBGav+(1ht@;rVeo{DMQ(U;D;$PU1}M}Nx5B+ZMO}2**9dLzY)K z_%xWQOMdT_KY~bpP(j~Q(3h2b_KmgV0oiaw{#QPyY3LA z;Cf@ZQ|Dzm3!7HXDwTEnUDt?*s*1IIu2F9}_EJsFte(;H7K?O&Dq3N3HR4>dO4SN_ zW@a&ca%M4_oScj~>9J|z^SQ199YdjEV&-$O_x`oD6wCxevbCMw5qo-?R^^=N6CG#{fZLr>1dGsp>jl|?`Gdt21RPHL++L)>>hAT z$K4}9-9F`hpH~k*Cbk6Wp&lue9@zXP0Up>M?EOJ#iZ$>j)RZRYmNqze+k4CVj^{Pc z3(Bv`b!kJ{AYYUvO}zT@up9~o!Rya6!qZMs@pjXks=68Ou(4XVDwt4$(wMQalf#Gc zn(1gX#utY!&g5Hk>9BS2B|_L&&0|Y1MB*qyCa;^T&bm}EbgC=(KvEPe(=NLlw&N0Th|KT|X)FwCuWZy>$ c*t@RL|2X#%xt~ZskpK}t5nmI13ciJZ0j;Ni(f|Me From d8e85cf75d051c5ce1e878c4814c17635f8a7d0c Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 9 Sep 2025 11:35:34 +0200 Subject: [PATCH 29/40] Fix migration --- .../sqlite3/09_add-job-cache.up.sql | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql b/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql index 2c25029..8c54622 100644 --- a/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql +++ b/internal/repository/migrations/sqlite3/09_add-job-cache.up.sql @@ -69,17 +69,29 @@ CREATE TABLE "job_new" ( ); ALTER TABLE job RENAME COLUMN cluster TO hpc_cluster; + +CREATE TABLE IF NOT EXISTS lookup_exclusive ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE +); + +INSERT INTO lookup_exclusive (id, name) VALUES + (0, 'multi_user'), + (1, 'none'), + (2, 'single_user'); + INSERT INTO job_new ( - id, job_id, hpc_cluster, subcluster, submit_time, start_time, hpc_user, project, - cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, - num_nodes, num_hwthreads, num_acc, smt, shared, monitoring_status, energy, + id, job_id, hpc_cluster, subcluster, submit_time, start_time, hpc_user, project, + cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, + num_nodes, num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint -) -SELECT - id, job_id, hpc_cluster, subcluster, 0, start_time, hpc_user, project, - cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, - num_nodes, num_hwthreads, num_acc, smt, exclusive, monitoring_status, energy, +) SELECT + id, job_id, hpc_cluster, subcluster, 0, start_time, hpc_user, project, + cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, + num_nodes, num_hwthreads, num_acc, smt, (SELECT name FROM lookup_exclusive WHERE id=job.exclusive), monitoring_status, energy, energy_footprint, footprint FROM job; + +DROP TABLE lookup_exclusive; DROP TABLE job; ALTER TABLE job_new RENAME TO job; From d00881de2e96e3d5c4537cbc8e4cec8fbd5991d7 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 9 Sep 2025 11:36:02 +0200 Subject: [PATCH 30/40] Refactor and update dependencies --- go.mod | 23 +++++++------- go.sum | 54 ++++++++++++++++----------------- internal/avro/avroCheckpoint.go | 23 +++++++------- internal/avro/avroHelper.go | 10 ++++-- internal/avro/avroStruct.go | 10 ++++-- 5 files changed, 63 insertions(+), 57 deletions(-) diff --git a/go.mod b/go.mod index e0add97..0725a30 100644 --- a/go.mod +++ b/go.mod @@ -6,10 +6,10 @@ toolchain go1.24.1 require ( github.com/99designs/gqlgen v0.17.78 - github.com/ClusterCockpit/cc-lib v0.7.0 + github.com/ClusterCockpit/cc-lib v0.8.0 github.com/Masterminds/squirrel v1.5.4 github.com/coreos/go-oidc/v3 v3.12.0 - github.com/expr-lang/expr v1.17.5 + github.com/expr-lang/expr v1.17.6 github.com/go-co-op/gocron/v2 v2.16.0 github.com/go-ldap/ldap/v3 v3.4.10 github.com/go-sql-driver/mysql v1.9.0 @@ -24,17 +24,17 @@ require ( github.com/joho/godotenv v1.5.1 github.com/linkedin/goavro/v2 v2.14.0 github.com/mattn/go-sqlite3 v1.14.24 - github.com/nats-io/nats.go v1.44.0 - github.com/prometheus/client_golang v1.23.0 - github.com/prometheus/common v0.65.0 + github.com/nats-io/nats.go v1.45.0 + github.com/prometheus/client_golang v1.23.2 + github.com/prometheus/common v0.66.1 github.com/qustavo/sqlhooks/v2 v2.1.0 github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 github.com/swaggo/http-swagger v1.3.4 github.com/swaggo/swag v1.16.6 github.com/vektah/gqlparser/v2 v2.5.30 - golang.org/x/crypto v0.40.0 + golang.org/x/crypto v0.41.0 golang.org/x/oauth2 v0.30.0 - golang.org/x/time v0.5.0 + golang.org/x/time v0.12.0 ) require ( @@ -87,13 +87,12 @@ require ( go.yaml.in/yaml/v2 v2.4.2 // indirect golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect golang.org/x/mod v0.26.0 // indirect - golang.org/x/net v0.42.0 // indirect + golang.org/x/net v0.43.0 // indirect golang.org/x/sync v0.16.0 // indirect - golang.org/x/sys v0.34.0 // indirect - golang.org/x/text v0.27.0 // indirect + golang.org/x/sys v0.35.0 // indirect + golang.org/x/text v0.28.0 // indirect golang.org/x/tools v0.35.0 // indirect - google.golang.org/protobuf v1.36.6 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect + google.golang.org/protobuf v1.36.8 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect sigs.k8s.io/yaml v1.6.0 // indirect ) diff --git a/go.sum b/go.sum index 792ec1c..81ae22b 100644 --- a/go.sum +++ b/go.sum @@ -6,16 +6,16 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= -github.com/ClusterCockpit/cc-lib v0.7.0 h1:THuSYrMcn9pSbrMditSI1LMOluq9TnM0/aVId4uK1Hc= -github.com/ClusterCockpit/cc-lib v0.7.0/go.mod h1:TD1PS8pL2RDvEWaqs8VNejoTSm5OawI9Dcc0CTY/yWQ= +github.com/ClusterCockpit/cc-lib v0.8.0 h1:kQRMOx30CJCy+Q6TgCK9rarJnJ/CKZPWlIEdIXYlxoA= +github.com/ClusterCockpit/cc-lib v0.8.0/go.mod h1:5xTwONu9pSp15mJ9CjBKGU9I3Jad8NfhrVHJZl50/yI= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= -github.com/NVIDIA/go-nvml v0.12.9-0 h1:e344UK8ZkeMeeLkdQtRhmXRxNf+u532LDZPGMtkdus0= -github.com/NVIDIA/go-nvml v0.12.9-0/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= +github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw= +github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo= github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y= github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM= @@ -54,8 +54,8 @@ github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/expr-lang/expr v1.17.5 h1:i1WrMvcdLF249nSNlpQZN1S6NXuW9WaOfF5tPi3aw3k= -github.com/expr-lang/expr v1.17.5/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= +github.com/expr-lang/expr v1.17.6 h1:1h6i8ONk9cexhDmowO/A64VPxHScu7qfSl2k8OlINec= +github.com/expr-lang/expr v1.17.6/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= @@ -207,8 +207,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/nats-io/nats.go v1.44.0 h1:ECKVrDLdh/kDPV1g0gAQ+2+m2KprqZK5O/eJAyAnH2M= -github.com/nats-io/nats.go v1.44.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= +github.com/nats-io/nats.go v1.45.0 h1:/wGPbnYXDM0pLKFjZTX+2JOw9TQPoIgTFrUaH97giwA= +github.com/nats-io/nats.go v1.45.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0= github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= @@ -225,12 +225,12 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= -github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= -github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0= @@ -257,8 +257,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE= github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg= github.com/swaggo/http-swagger v1.3.4 h1:q7t/XLx0n15H1Q9/tk3Y9L4n210XzJF5WtnDX64a5ww= @@ -295,8 +295,8 @@ golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliY golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= -golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM= -golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY= +golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= +golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o= golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= @@ -317,8 +317,8 @@ golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= -golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= -golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -341,8 +341,8 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= -golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -361,10 +361,10 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= -golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= -golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= +golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= @@ -375,15 +375,13 @@ golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= -google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/internal/avro/avroCheckpoint.go b/internal/avro/avroCheckpoint.go index 4d72d36..b7c2ea1 100644 --- a/internal/avro/avroCheckpoint.go +++ b/internal/avro/avroCheckpoint.go @@ -65,7 +65,7 @@ func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) { defer wg.Done() for workItem := range work { - var from int64 = getTimestamp(workItem.dir) + from := getTimestamp(workItem.dir) if err := workItem.level.toCheckpoint(workItem.dir, from, dumpAll); err != nil { if err == ErrNoNewData { @@ -159,7 +159,7 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error { int_res, _ := strconv.Atoi(path.Base(dir)) // find smallest overall timestamp in l.data map and delete it from l.data - var minTs int64 = int64(1<<63 - 1) + minTs := int64(1<<63 - 1) for ts, dat := range l.data { if ts < minTs && len(dat) != 0 { minTs = ts @@ -176,7 +176,7 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error { var schema string var codec *goavro.Codec - record_list := make([]map[string]interface{}, 0) + record_list := make([]map[string]any, 0) var f *os.File @@ -220,7 +220,7 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error { repeat := 60 / int_res for range repeat { - record_list = append(record_list, make(map[string]interface{})) + record_list = append(record_list, make(map[string]any)) } } @@ -262,7 +262,7 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error { return fmt.Errorf("failed to read record: %v", err) } - record_list = append(record_list, record.(map[string]interface{})) + record_list = append(record_list, record.(map[string]any)) } f.Close() @@ -411,10 +411,10 @@ func compareSchema(schemaRead, schemaGen string) (bool, string, error) { func generateSchema(data map[string]schema.Float) (string, error) { // Define the Avro schema structure - schema := map[string]interface{}{ + schema := map[string]any{ "type": "record", "name": "DataRecord", - "fields": []map[string]interface{}{}, + "fields": []map[string]any{}, } fieldTracker := make(map[string]struct{}) @@ -423,12 +423,12 @@ func generateSchema(data map[string]schema.Float) (string, error) { if _, exists := fieldTracker[key]; !exists { key = correctKey(key) - field := map[string]interface{}{ + field := map[string]any{ "name": key, "type": "double", "default": -1.0, } - schema["fields"] = append(schema["fields"].([]map[string]interface{}), field) + schema["fields"] = append(schema["fields"].([]map[string]any), field) fieldTracker[key] = struct{}{} } } @@ -441,14 +441,15 @@ func generateSchema(data map[string]schema.Float) (string, error) { return string(schemaString), nil } -func generateRecord(data map[string]schema.Float) map[string]interface{} { - record := make(map[string]interface{}) +func generateRecord(data map[string]schema.Float) map[string]any { + record := make(map[string]any) // Iterate through each map in data for key, value := range data { key = correctKey(key) // Set the value in the record + // avro only accepts basic types record[key] = value.Double() } diff --git a/internal/avro/avroHelper.go b/internal/avro/avroHelper.go index 21a5617..8ffc770 100644 --- a/internal/avro/avroHelper.go +++ b/internal/avro/avroHelper.go @@ -1,8 +1,13 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. package avro import ( "context" "fmt" + "slices" "strconv" "sync" @@ -10,7 +15,6 @@ import ( ) func DataStaging(wg *sync.WaitGroup, ctx context.Context) { - // AvroPool is a pool of Avro writers. go func() { if config.MetricStoreKeys.Checkpoints.FileFormat == "json" { @@ -28,7 +32,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) { case <-ctx.Done(): return case val := <-LineProtocolMessages: - //Fetch the frequency of the metric from the global configuration + // Fetch the frequency of the metric from the global configuration freq, err := config.GetMetricFrequency(val.MetricName) if err != nil { fmt.Printf("Error fetching metric frequency: %s\n", err) @@ -58,7 +62,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) { if avroLevel == nil { fmt.Printf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName) } - oldSelector = append([]string{}, selector...) + oldSelector = slices.Clone(selector) } avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq)) diff --git a/internal/avro/avroStruct.go b/internal/avro/avroStruct.go index ee65291..b0ded94 100644 --- a/internal/avro/avroStruct.go +++ b/internal/avro/avroStruct.go @@ -1,3 +1,7 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. package avro import ( @@ -37,9 +41,9 @@ type AvroLevel struct { } type AvroField struct { - Name string `json:"name"` - Type interface{} `json:"type"` - Default interface{} `json:"default,omitempty"` + Name string `json:"name"` + Type any `json:"type"` + Default any `json:"default,omitempty"` } type AvroSchema struct { From 3b9d05cc6d5d24f8a6c7f9a2c40a802a34a25181 Mon Sep 17 00:00:00 2001 From: Aditya Ujeniya Date: Tue, 9 Sep 2025 14:57:05 +0200 Subject: [PATCH 31/40] Fix exclusive to shared in svlete and graphql --- api/schema.graphqls | 2 +- configs/config-demo.json | 2 +- internal/avro/avroHelper.go | 6 +- internal/graph/generated/generated.go | 4 +- internal/graph/model/models_gen.go | 2 +- internal/graph/schema.resolvers.go | 12 ++ internal/memorystore/archive.go | 9 +- .../taskManager/updateFootprintService.go | 2 +- test_ccms_write_api.sh.bak | 110 ------------------ var/._job-archive | Bin 163 -> 0 bytes web/frontend/src/Job.root.svelte | 4 +- web/frontend/src/generic/JobList.svelte | 2 +- .../src/generic/joblist/JobInfo.svelte | 2 +- .../src/generic/joblist/JobListRow.svelte | 2 +- .../src/systems/nodelist/NodeInfo.svelte | 4 +- .../src/systems/nodelist/NodeListRow.svelte | 4 +- 16 files changed, 35 insertions(+), 132 deletions(-) delete mode 100755 test_ccms_write_api.sh.bak delete mode 100755 var/._job-archive diff --git a/api/schema.graphqls b/api/schema.graphqls index 070b5b7..c19fc64 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -419,7 +419,7 @@ input JobFilter { startTime: TimeRange state: [JobState!] metricStats: [MetricStatItem!] - shared: StringInput + shared: String node: StringInput } diff --git a/configs/config-demo.json b/configs/config-demo.json index 3c0d858..d47f926 100644 --- a/configs/config-demo.json +++ b/configs/config-demo.json @@ -80,7 +80,7 @@ "restore": "48h" }, "archive": { - "interval": "48h", + "interval": "2h", "directory": "./var/archive" }, "retention-in-memory": "48h" diff --git a/internal/avro/avroHelper.go b/internal/avro/avroHelper.go index 8ffc770..7710f0f 100644 --- a/internal/avro/avroHelper.go +++ b/internal/avro/avroHelper.go @@ -6,7 +6,7 @@ package avro import ( "context" - "fmt" + "log" "slices" "strconv" "sync" @@ -35,7 +35,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) { // Fetch the frequency of the metric from the global configuration freq, err := config.GetMetricFrequency(val.MetricName) if err != nil { - fmt.Printf("Error fetching metric frequency: %s\n", err) + log.Printf("Error fetching metric frequency: %s\n", err) continue } @@ -60,7 +60,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) { // If the Avro level is nil, create a new one if avroLevel == nil { - fmt.Printf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName) + log.Printf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName) } oldSelector = slices.Clone(selector) } diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index eed946d..778f1d6 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -2741,7 +2741,7 @@ input JobFilter { startTime: TimeRange state: [JobState!] metricStats: [MetricStatItem!] - shared: StringInput + shared: String node: StringInput } @@ -16490,7 +16490,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any it.MetricStats = data case "shared": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("shared")) - data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v) + data, err := ec.unmarshalOString2ᚖstring(ctx, v) if err != nil { return it, err } diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index accc344..c4948d0 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -69,7 +69,7 @@ type JobFilter struct { StartTime *config.TimeRange `json:"startTime,omitempty"` State []schema.JobState `json:"state,omitempty"` MetricStats []*MetricStatItem `json:"metricStats,omitempty"` - Shared *StringInput `json:"shared,omitempty"` + Shared *string `json:"shared,omitempty"` Node *StringInput `json:"node,omitempty"` } diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 315f1a3..b886c34 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -831,3 +831,15 @@ type mutationResolver struct{ *Resolver } type nodeResolver struct{ *Resolver } type queryResolver struct{ *Resolver } type subClusterResolver struct{ *Resolver } + +// !!! WARNING !!! +// The code below was going to be deleted when updating resolvers. It has been copied here so you have +// one last chance to move it out of harms way if you want. There are two reasons this happens: +// - When renaming or deleting a resolver the old code will be put in here. You can safely delete +// it when you're done. +// - You have helper methods in this file. Move them out to keep these resolver files clean. +/* + func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) { + panic(fmt.Errorf("not implemented: Exclusive - exclusive")) +} +*/ diff --git a/internal/memorystore/archive.go b/internal/memorystore/archive.go index 7857d71..9720d20 100644 --- a/internal/memorystore/archive.go +++ b/internal/memorystore/archive.go @@ -11,6 +11,7 @@ import ( "errors" "fmt" "io" + "log" "os" "path/filepath" "sync" @@ -26,7 +27,7 @@ func Archiving(wg *sync.WaitGroup, ctx context.Context) { defer wg.Done() d, err := time.ParseDuration(config.MetricStoreKeys.Archive.Interval) if err != nil { - cclog.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err) + log.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err) } if d <= 0 { return @@ -44,14 +45,14 @@ func Archiving(wg *sync.WaitGroup, ctx context.Context) { return case <-ticks: t := time.Now().Add(-d) - cclog.Infof("[METRICSTORE]> start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339)) + log.Printf("[METRICSTORE]> start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339)) n, err := ArchiveCheckpoints(config.MetricStoreKeys.Checkpoints.RootDir, config.MetricStoreKeys.Archive.RootDir, t.Unix(), config.MetricStoreKeys.Archive.DeleteInstead) if err != nil { - cclog.Warnf("[METRICSTORE]> archiving failed: %s\n", err.Error()) + log.Printf("[METRICSTORE]> archiving failed: %s\n", err.Error()) } else { - cclog.Infof("[METRICSTORE]> done: %d files zipped and moved to archive\n", n) + log.Printf("[METRICSTORE]> done: %d files zipped and moved to archive\n", n) } } } diff --git a/internal/taskManager/updateFootprintService.go b/internal/taskManager/updateFootprintService.go index 2ce9901..4fb5e45 100644 --- a/internal/taskManager/updateFootprintService.go +++ b/internal/taskManager/updateFootprintService.go @@ -34,7 +34,7 @@ func RegisterFootprintWorker() { c := 0 ce := 0 cl := 0 - cclog.Printf("Update Footprints started at %s", s.Format(time.RFC3339)) + cclog.Printf("Update Footprints started at %s\n", s.Format(time.RFC3339)) for _, cluster := range archive.Clusters { s_cluster := time.Now() diff --git a/test_ccms_write_api.sh.bak b/test_ccms_write_api.sh.bak deleted file mode 100755 index f76322f..0000000 --- a/test_ccms_write_api.sh.bak +++ /dev/null @@ -1,110 +0,0 @@ -JWT="eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NjQ1NjMzOTUsImlhdCI6MTc1NzM2MzM5NSwicm9sZXMiOlsiYWRtaW4iLCJhcGkiXSwic3ViIjoiZGVtbyJ9.uhtEbS-ty4xNc8GWTKjyh1b06j6b3vtEw7lzQy0Eht5LtISZwRfyRBfdKjbm_t25xGrNH9sxINq4qiYKBjAaDQ" - -# curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" -d $'cpu_load,cluster=alex,hostname=a042,type=hwthread,type-id=0 value=35.0 1725827464642231296' - -rm sample_fritz.txt -rm sample_alex.txt - -while [ true ]; do - echo "Alex Metrics for hwthread types and type-ids" - timestamp="$(date '+%s')" - echo "Timestamp : "+$timestamp - for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do - for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do - for id in {0..127}; do - echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt - done - done - done - - curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt - - echo "Fritz Metrics for hwthread types and type-ids" - for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do - for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do - for id in {0..71}; do - echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt - done - done - done - - curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt - - rm sample_fritz.txt - rm sample_alex.txt - - echo "Alex Metrics for accelerator types and type-ids" - for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do - for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do - for id in 00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0; do - echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt - done - done - done - - curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt - - rm sample_alex.txt - - echo "Alex Metrics for memoryDomain types and type-ids" - for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do - for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do - for id in {0..7}; do - echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt - done - done - done - - curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt - - rm sample_alex.txt - - echo "Alex Metrics for socket types and type-ids" - for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do - for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do - for id in {0..1}; do - echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt - done - done - done - - curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt - - echo "Fritz Metrics for socket types and type-ids" - for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do - for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do - for id in {0..1}; do - echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt - done - done - done - - curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt - - rm sample_fritz.txt - rm sample_alex.txt - - echo "Alex Metrics for nodes" - for metric in cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do - for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do - echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt - done - done - - curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt - - echo "Fritz Metrics for nodes" - for metric in cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do - for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do - echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt - done - done - - curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt - - rm sample_fritz.txt - rm sample_alex.txt - - sleep 1m -done -# curl -X 'POST' 'http://localhost:8081/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" -d $'cpu_load,cluster=alex,hostname=a042,type=hwthread,type-id=0 value=35.0 1725827464642231296' \ No newline at end of file diff --git a/var/._job-archive b/var/._job-archive deleted file mode 100755 index 9d11b52bb7ed13ffc4799b7e3bcb26eb2c0b9b7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 163 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUl?c_=|y<2;dkJ5(HHS(lG;wxzV&S oBE&_L^K gm.name == item.metric)?.unit} nativeScope={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.scope} presetScopes={item.data.map((x) => x.scope)} - isShared={$initq.data.job.exclusive != 1} + isShared={$initq.data.job.shared != "none"} /> {:else if item.disabled == true} diff --git a/web/frontend/src/generic/JobList.svelte b/web/frontend/src/generic/JobList.svelte index dc6def2..5ca8981 100644 --- a/web/frontend/src/generic/JobList.svelte +++ b/web/frontend/src/generic/JobList.svelte @@ -69,7 +69,7 @@ hostname } SMT - exclusive + shared partition arrayJobId monitoringStatus diff --git a/web/frontend/src/generic/joblist/JobInfo.svelte b/web/frontend/src/generic/joblist/JobInfo.svelte index f56d800..794efe9 100644 --- a/web/frontend/src/generic/joblist/JobInfo.svelte +++ b/web/frontend/src/generic/joblist/JobInfo.svelte @@ -172,7 +172,7 @@ {job.numNodes} {/if} - {#if job.exclusive != 1} + {#if job.shared != "none"} (shared) {/if} {#if job.numAcc > 0} diff --git a/web/frontend/src/generic/joblist/JobListRow.svelte b/web/frontend/src/generic/joblist/JobListRow.svelte index b17f66d..28574d9 100644 --- a/web/frontend/src/generic/joblist/JobListRow.svelte +++ b/web/frontend/src/generic/joblist/JobListRow.svelte @@ -213,7 +213,7 @@ metric={metric.data.name} cluster={cluster.find((c) => c.name == job.cluster)} subCluster={job.subCluster} - isShared={job.exclusive != 1} + isShared={job.shared != "none"} numhwthreads={job.numHWThreads} numaccs={job.numAcc} zoomState={zoomStates[metric.data.name] || null} diff --git a/web/frontend/src/systems/nodelist/NodeInfo.svelte b/web/frontend/src/systems/nodelist/NodeInfo.svelte index 363379f..77e7416 100644 --- a/web/frontend/src/systems/nodelist/NodeInfo.svelte +++ b/web/frontend/src/systems/nodelist/NodeInfo.svelte @@ -92,7 +92,7 @@ Missing Metric - {:else if nodeJobsData.jobs.count == 1 && nodeJobsData.jobs.items[0].exclusive} + {:else if nodeJobsData.jobs.count == 1 && nodeJobsData.jobs.items[0].shared == "none"} @@ -104,7 +104,7 @@ Exclusive - {:else if nodeJobsData.jobs.count >= 1 && !nodeJobsData.jobs.items[0].exclusive} + {:else if nodeJobsData.jobs.count >= 1 && !(nodeJobsData.jobs.items[0].shared == "none")} diff --git a/web/frontend/src/systems/nodelist/NodeListRow.svelte b/web/frontend/src/systems/nodelist/NodeListRow.svelte index 5cdf493..a9111f6 100644 --- a/web/frontend/src/systems/nodelist/NodeListRow.svelte +++ b/web/frontend/src/systems/nodelist/NodeListRow.svelte @@ -45,7 +45,7 @@ jobId user project - exclusive + shared resources { hostname accelerators @@ -101,7 +101,7 @@ function buildExtendedLegend() { let pendingExtendedLegendData = null // Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes] - if ($nodeJobsData.data.jobs.count >= 1) { // "&& !$nodeJobsData.data.jobs.items[0].exclusive)" + if ($nodeJobsData.data.jobs.count >= 1) { const accSet = Array.from(new Set($nodeJobsData.data.jobs.items .map((i) => i.resources .filter((r) => (r.hostname === nodeData.host) && r?.accelerators) From eaca187032698f6c9740b0703e292b131354273e Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Tue, 9 Sep 2025 15:04:25 +0200 Subject: [PATCH 32/40] Fix testdata for new schema --- .../importer/testdata/meta-fritzError.input | 2 +- .../importer/testdata/meta-fritzMinimal.input | 2 +- internal/tagger/apps/vasp.txt | 1 + .../emmy/1403/244/1608923076/meta.json | 382 +++++++++--------- .../emmy/1404/397/1609300556/meta.json | 382 +++++++++--------- 5 files changed, 385 insertions(+), 384 deletions(-) diff --git a/internal/importer/testdata/meta-fritzError.input b/internal/importer/testdata/meta-fritzError.input index 2b8d0e8..90e46cf 100644 --- a/internal/importer/testdata/meta-fritzError.input +++ b/internal/importer/testdata/meta-fritzError.input @@ -1 +1 @@ -{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"exclusive":1,"monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}} +{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"shared":"none","monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}} diff --git a/internal/importer/testdata/meta-fritzMinimal.input b/internal/importer/testdata/meta-fritzMinimal.input index f2cce79..f0289fb 100644 --- a/internal/importer/testdata/meta-fritzMinimal.input +++ b/internal/importer/testdata/meta-fritzMinimal.input @@ -1 +1 @@ -{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"exclusive":1,"jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}} +{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"shared":"none","jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}} diff --git a/internal/tagger/apps/vasp.txt b/internal/tagger/apps/vasp.txt index bd537e4..9f9b9d5 100644 --- a/internal/tagger/apps/vasp.txt +++ b/internal/tagger/apps/vasp.txt @@ -1 +1,2 @@ vasp +VASP diff --git a/pkg/archive/testdata/archive/emmy/1403/244/1608923076/meta.json b/pkg/archive/testdata/archive/emmy/1403/244/1608923076/meta.json index 1ce3f87..aadf21c 100644 --- a/pkg/archive/testdata/archive/emmy/1403/244/1608923076/meta.json +++ b/pkg/archive/testdata/archive/emmy/1403/244/1608923076/meta.json @@ -1,194 +1,194 @@ { - "exclusive": 1, - "jobId": 1403244, - "statistics": { - "mem_bw": { - "avg": 63.57, - "min": 0, - "unit": { - "base": "B/s", - "prefix": "G" - }, - "max": 74.5 - }, - "rapl_power": { - "avg": 228.07, - "min": 0, - "unit": { - "base": "W" - }, - "max": 258.56 - }, - "ipc": { - "unit": { - "base": "IPC" - }, - "max": 0.510204081632653, - "avg": 1.53846153846154, - "min": 0.0 - }, - "clock": { - "min": 1380.32, - "avg": 2599.39, - "unit": { - "base": "Hz", - "prefix": "M" - }, - "max": 2634.46 - }, - "cpu_load": { - "avg": 18.4, - "min": 0, - "max": 23.58, - "unit": { - "base": "load" - } - }, - "flops_any": { - "max": 404.62, - "unit": { - "base": "F/s", - "prefix": "G" - }, - "avg": 225.59, - "min": 0 - }, - "flops_dp": { - "max": 0.24, - "unit": { - "base": "F/s", - "prefix": "G" - }, - "min": 0, - "avg": 0 - }, - "mem_used": { - "min": 1.55, - "avg": 27.84, - "unit": { - "base": "B", - "prefix": "G" - }, - "max": 37.5 - }, - "flops_sp": { - "min": 0, - "avg": 225.59, - "max": 404.62, - "unit": { - "base": "F/s", - "prefix": "G" - } - } + "shared": "none", + "jobId": 1403244, + "statistics": { + "mem_bw": { + "avg": 63.57, + "min": 0, + "unit": { + "base": "B/s", + "prefix": "G" + }, + "max": 74.5 }, - "resources": [ - { - "hostname": "e0102" - }, - { - "hostname": "e0103" - }, - { - "hostname": "e0105" - }, - { - "hostname": "e0106" - }, - { - "hostname": "e0107" - }, - { - "hostname": "e0108" - }, - { - "hostname": "e0114" - }, - { - "hostname": "e0320" - }, - { - "hostname": "e0321" - }, - { - "hostname": "e0325" - }, - { - "hostname": "e0404" - }, - { - "hostname": "e0415" - }, - { - "hostname": "e0433" - }, - { - "hostname": "e0437" - }, - { - "hostname": "e0439" - }, - { - "hostname": "e0501" - }, - { - "hostname": "e0503" - }, - { - "hostname": "e0505" - }, - { - "hostname": "e0506" - }, - { - "hostname": "e0512" - }, - { - "hostname": "e0513" - }, - { - "hostname": "e0514" - }, - { - "hostname": "e0653" - }, - { - "hostname": "e0701" - }, - { - "hostname": "e0716" - }, - { - "hostname": "e0727" - }, - { - "hostname": "e0728" - }, - { - "hostname": "e0925" - }, - { - "hostname": "e0926" - }, - { - "hostname": "e0929" - }, - { - "hostname": "e0934" - }, - { - "hostname": "e0951" - } - ], - "walltime": 10, - "jobState": "completed", - "cluster": "emmy", - "subCluster": "haswell", - "stopTime": 1609009562, - "user": "emmyUser6", - "startTime": 1608923076, - "partition": "work", - "tags": [], - "project": "no project", - "numNodes": 32, - "duration": 86486 + "rapl_power": { + "avg": 228.07, + "min": 0, + "unit": { + "base": "W" + }, + "max": 258.56 + }, + "ipc": { + "unit": { + "base": "IPC" + }, + "max": 0.510204081632653, + "avg": 1.53846153846154, + "min": 0.0 + }, + "clock": { + "min": 1380.32, + "avg": 2599.39, + "unit": { + "base": "Hz", + "prefix": "M" + }, + "max": 2634.46 + }, + "cpu_load": { + "avg": 18.4, + "min": 0, + "max": 23.58, + "unit": { + "base": "load" + } + }, + "flops_any": { + "max": 404.62, + "unit": { + "base": "F/s", + "prefix": "G" + }, + "avg": 225.59, + "min": 0 + }, + "flops_dp": { + "max": 0.24, + "unit": { + "base": "F/s", + "prefix": "G" + }, + "min": 0, + "avg": 0 + }, + "mem_used": { + "min": 1.55, + "avg": 27.84, + "unit": { + "base": "B", + "prefix": "G" + }, + "max": 37.5 + }, + "flops_sp": { + "min": 0, + "avg": 225.59, + "max": 404.62, + "unit": { + "base": "F/s", + "prefix": "G" + } + } + }, + "resources": [ + { + "hostname": "e0102" + }, + { + "hostname": "e0103" + }, + { + "hostname": "e0105" + }, + { + "hostname": "e0106" + }, + { + "hostname": "e0107" + }, + { + "hostname": "e0108" + }, + { + "hostname": "e0114" + }, + { + "hostname": "e0320" + }, + { + "hostname": "e0321" + }, + { + "hostname": "e0325" + }, + { + "hostname": "e0404" + }, + { + "hostname": "e0415" + }, + { + "hostname": "e0433" + }, + { + "hostname": "e0437" + }, + { + "hostname": "e0439" + }, + { + "hostname": "e0501" + }, + { + "hostname": "e0503" + }, + { + "hostname": "e0505" + }, + { + "hostname": "e0506" + }, + { + "hostname": "e0512" + }, + { + "hostname": "e0513" + }, + { + "hostname": "e0514" + }, + { + "hostname": "e0653" + }, + { + "hostname": "e0701" + }, + { + "hostname": "e0716" + }, + { + "hostname": "e0727" + }, + { + "hostname": "e0728" + }, + { + "hostname": "e0925" + }, + { + "hostname": "e0926" + }, + { + "hostname": "e0929" + }, + { + "hostname": "e0934" + }, + { + "hostname": "e0951" + } + ], + "walltime": 10, + "jobState": "completed", + "cluster": "emmy", + "subCluster": "haswell", + "stopTime": 1609009562, + "user": "emmyUser6", + "startTime": 1608923076, + "partition": "work", + "tags": [], + "project": "no project", + "numNodes": 32, + "duration": 86486 } diff --git a/pkg/archive/testdata/archive/emmy/1404/397/1609300556/meta.json b/pkg/archive/testdata/archive/emmy/1404/397/1609300556/meta.json index e1fff10..c1e603a 100644 --- a/pkg/archive/testdata/archive/emmy/1404/397/1609300556/meta.json +++ b/pkg/archive/testdata/archive/emmy/1404/397/1609300556/meta.json @@ -1,194 +1,194 @@ { - "stopTime": 1609387081, - "resources": [ - { - "hostname": "e0151" - }, - { - "hostname": "e0152" - }, - { - "hostname": "e0153" - }, - { - "hostname": "e0232" - }, - { - "hostname": "e0303" - }, - { - "hostname": "e0314" - }, - { - "hostname": "e0344" - }, - { - "hostname": "e0345" - }, - { - "hostname": "e0348" - }, - { - "hostname": "e0507" - }, - { - "hostname": "e0518" - }, - { - "hostname": "e0520" - }, - { - "hostname": "e0522" - }, - { - "hostname": "e0526" - }, - { - "hostname": "e0527" - }, - { - "hostname": "e0528" - }, - { - "hostname": "e0530" - }, - { - "hostname": "e0551" - }, - { - "hostname": "e0604" - }, - { - "hostname": "e0613" - }, - { - "hostname": "e0634" - }, - { - "hostname": "e0639" - }, - { - "hostname": "e0640" - }, - { - "hostname": "e0651" - }, - { - "hostname": "e0653" - }, - { - "hostname": "e0701" - }, - { - "hostname": "e0704" - }, - { - "hostname": "e0751" - }, - { - "hostname": "e0809" - }, - { - "hostname": "e0814" - }, - { - "hostname": "e0819" - }, - { - "hostname": "e0908" - } - ], - "walltime": 10, - "cluster": "emmy", - "subCluster": "haswell", - "jobState": "completed", - "statistics": { - "clock": { - "max": 2634.9, - "unit": { - "base": "Hz", - "prefix": "M" - }, - "min": 0, - "avg": 2597.8 - }, - "cpu_load": { - "max": 27.41, - "min": 0, - "avg": 18.39, - "unit": { - "base": "load" - } - }, - "mem_bw": { - "min": 0, - "avg": 63.23, - "unit": { - "base": "B/s", - "prefix": "G" - }, - "max": 75.06 - }, - "ipc": { - "min": 0.0, - "avg": 1.53846153846154, - "unit": { - "base": "IPC" - }, - "max": 0.490196078431373 - }, - "rapl_power": { - "min": 0, - "avg": 227.32, - "unit": { - "base": "W" - }, - "max": 256.22 - }, - "mem_used": { - "min": 1.5, - "avg": 27.77, - "unit": { - "base": "B", - "prefix": "G" - }, - "max": 37.43 - }, - "flops_sp": { - "unit": { - "base": "F/s", - "prefix": "G" - }, - "max": 413.21, - "min": 0, - "avg": 224.41 - }, - "flops_dp": { - "max": 5.72, - "unit": { - "base": "F/s", - "prefix": "G" - }, - "min": 0, - "avg": 0 - }, - "flops_any": { - "min": 0, - "avg": 224.42, - "max": 413.21, - "unit": { - "base": "F/s", - "prefix": "G" - } - } + "stopTime": 1609387081, + "resources": [ + { + "hostname": "e0151" }, - "exclusive": 1, - "jobId": 1404397, - "tags": [], - "partition": "work", - "project": "no project", - "user": "emmyUser6", - "startTime": 1609300556, - "duration": 86525, - "numNodes": 32 + { + "hostname": "e0152" + }, + { + "hostname": "e0153" + }, + { + "hostname": "e0232" + }, + { + "hostname": "e0303" + }, + { + "hostname": "e0314" + }, + { + "hostname": "e0344" + }, + { + "hostname": "e0345" + }, + { + "hostname": "e0348" + }, + { + "hostname": "e0507" + }, + { + "hostname": "e0518" + }, + { + "hostname": "e0520" + }, + { + "hostname": "e0522" + }, + { + "hostname": "e0526" + }, + { + "hostname": "e0527" + }, + { + "hostname": "e0528" + }, + { + "hostname": "e0530" + }, + { + "hostname": "e0551" + }, + { + "hostname": "e0604" + }, + { + "hostname": "e0613" + }, + { + "hostname": "e0634" + }, + { + "hostname": "e0639" + }, + { + "hostname": "e0640" + }, + { + "hostname": "e0651" + }, + { + "hostname": "e0653" + }, + { + "hostname": "e0701" + }, + { + "hostname": "e0704" + }, + { + "hostname": "e0751" + }, + { + "hostname": "e0809" + }, + { + "hostname": "e0814" + }, + { + "hostname": "e0819" + }, + { + "hostname": "e0908" + } + ], + "walltime": 10, + "cluster": "emmy", + "subCluster": "haswell", + "jobState": "completed", + "statistics": { + "clock": { + "max": 2634.9, + "unit": { + "base": "Hz", + "prefix": "M" + }, + "min": 0, + "avg": 2597.8 + }, + "cpu_load": { + "max": 27.41, + "min": 0, + "avg": 18.39, + "unit": { + "base": "load" + } + }, + "mem_bw": { + "min": 0, + "avg": 63.23, + "unit": { + "base": "B/s", + "prefix": "G" + }, + "max": 75.06 + }, + "ipc": { + "min": 0.0, + "avg": 1.53846153846154, + "unit": { + "base": "IPC" + }, + "max": 0.490196078431373 + }, + "rapl_power": { + "min": 0, + "avg": 227.32, + "unit": { + "base": "W" + }, + "max": 256.22 + }, + "mem_used": { + "min": 1.5, + "avg": 27.77, + "unit": { + "base": "B", + "prefix": "G" + }, + "max": 37.43 + }, + "flops_sp": { + "unit": { + "base": "F/s", + "prefix": "G" + }, + "max": 413.21, + "min": 0, + "avg": 224.41 + }, + "flops_dp": { + "max": 5.72, + "unit": { + "base": "F/s", + "prefix": "G" + }, + "min": 0, + "avg": 0 + }, + "flops_any": { + "min": 0, + "avg": 224.42, + "max": 413.21, + "unit": { + "base": "F/s", + "prefix": "G" + } + } + }, + "shared": "none", + "jobId": 1404397, + "tags": [], + "partition": "work", + "project": "no project", + "user": "emmyUser6", + "startTime": 1609300556, + "duration": 86525, + "numNodes": 32 } From 9b644119ae1e2e45ea375a9954e4e1cb74b6fce6 Mon Sep 17 00:00:00 2001 From: Aditya Ujeniya Date: Tue, 9 Sep 2025 18:34:10 +0200 Subject: [PATCH 33/40] Fix to testdata database --- internal/importer/initDB.go | 4 ---- internal/repository/testdata/job.db | Bin 118784 -> 122880 bytes startDemo.sh | 33 ---------------------------- 3 files changed, 37 deletions(-) diff --git a/internal/importer/initDB.go b/internal/importer/initDB.go index 79879b2..179c21c 100644 --- a/internal/importer/initDB.go +++ b/internal/importer/initDB.go @@ -142,10 +142,6 @@ func InitDB() error { continue } - if jobMeta.Shared == "" { - jobMeta.Shared = "none" - } - id, err := r.TransactionAddNamed(t, repository.NamedJobInsert, jobMeta) if err != nil { diff --git a/internal/repository/testdata/job.db b/internal/repository/testdata/job.db index 43ec9d3c7f36c7ea505a96cc1208c4ce7a148eed..c8911a6377687841be6a1f2ffce9a36de1bb38bb 100644 GIT binary patch delta 5331 zcmeHLU2NOd73L+W|2UMZ$%riD9pXjmeJT3Z9~TkUrxw^SV8DPO7%&Vguwm#PvWKP_umVGhW#?Q{ z5+%!SvLbt$VN&Ed=YHqhbM86kUY@<`IQx;~C%xS}7>3yeSN-qk<=9ws2ea?{FW7)Y zgxkp2F5K?={kDgQyd$vg zt{TY`&g zh^>5mQ6K*iVE#Y6n z4N&qYA>aUMV&gda%d)H;-d#5K+7jW#b^hv?p*>xmPZ$q>f)QR5_HHLe%o_1J!JjZ= zHo;F_U59Kb!l0V8?kCU@KXzk$;E`YIC*F`Ar3a^gJQ%j!e2IBA-0|vJ|Zuh z77xuFnVDM@=jP!$K06zuVwrp@nNmtcT`q`~TdshB1B#_(E6r!pa#=m8XLB+lRX%t4 z!^1H#9v6?R+0!D@i@KD{Q&e~=w8{={DvK7Bil+K)s48F3o|98LwWo{()0ku)P_ueg zQ^ltz7pA5s7b2tM<56QGQlTK7Nw!SYV5Cb0iH_Mg`BRdj&Aj`SD30^3vD&>peVyTGI1}sh! z(86>cQPXERS(lP&NteXM#50R@t)Mj`M)l}Z^K(?VAQ!dLY9S>TsZ?|Qv`DF?ToO$$ zf=}Al81gcw^h^OJPWi^fku0TBIKUtjb9&{VbD%6n;EDPc7R^ZA*kVR1$mv!atWh!f z7?QB6p?kpv;0i^i%L`v~B?!0iR!7ZgYF5_@AVwobK@B_|gnJ)eY zlw9;c!>L&d)oDH*H2xu&QG1M^EckR{2lh`HRq5c5Ra3!DOwE_O?$qSPs*n; zF|sG+_V@0V+R)BzDG)%b)YgKfpgk>F+_1@5Bo@!MnryD2X6b5~P=ot*idgxp zi&zJx^>WsMwXv|FkYQ_wf>t&-oh#ZC`|N=MXoOiv`O~_zT1>(%n+9LP?=I8zGjoR% z&xlQ~I6sHp27(xY8?=e&OFFDIzn@$@Mni~6jh71$f|iDZwARq=!_Z-!!A#2KF|QLx z5)19|(~3G>Z`f^Uxwp2OPVL8N0r^rf3FTT#Nl@uF!A_x&uIZApi9k-qI{KA3rkIJe z(s3TEcViG0+R7mGA}u@}+Kh$Q+GiSWz?uf-SuUJ3ihMS$(6X1yo<;(#VHlKfT05np za7le$#Yf7im%xlHDSBqpZf9Y)H@uaSLVF$U%MyJw8(E8jyJ|D_3YvlS(Sr*lR&V=J zchs_bEpOPpdLCpZzt&diiFkYFKuXjzr@t+8%8*!;ZA~2@VCt;I9D#8^SrcduV*HBr z%~jHHBhq?X5IadBG}6V0nH$hh>Om^Q`j5_j(;gV|lbl6Q6Mtt1e1qG>ZNMSg@`H}R z7?|y{z||`g?ty6j2TjznQ9@eHrczDxhSex#Ebr)a)E?OFC#x164eXu{7Pmn*kjipY zi8un^277%LtcJaf`i~?09e_7Vb}+oT1CGvw?SZhL$fmMGmY|}F2R2u+0XG_kZu*#? zc2hr{(FoqVVLL$2UqgGUS3e|uAzV&)JoBF41ML(KWDnKbDNuzcz%ggCr!_(vBHkanbV zeGQ~q?kG8D6E{HtoBHq=;;Y@6C&WwKHG-ch9Q+l=eUQHbroQsOHq~P>wfgrr$j%tc zcp0Y?Zt!=hWZ%hf0YA7cIDl^ZSz*76IGujMu^L@k`{(&|7)p#p_z0*uC~6;6e7tdzT+$`!ALF z&DB+*n2dXw1+{>Ce4UcKS$JTi|;D delta 4478 zcmc&&eQX=$8Nc^FeP<_6T*tXOiODrps3WD!=?86E6w*TY(r95rh)Q%9Cv}LKTPLym zNn0l^ARq`Qy)pmOR#mlW+N5nFMuY(x)ugU$g=vZyD12;!7N!MA)~-WH{1Kk_&Ubba z$KoH#UG9AMeEgpGdEWP)=Xo=9P8vERjn;WphGByF%Y(n0##+`ksmx7>1}gB)!=Gp1 zdAPBXRJ`q3=i&JC?hllAm6P&QawO-idV(`#e%NT?J<8K}c_r27BPaG71AM?D1@feT zP0C&4CAZL>NW`1RtLA{X#=F%`zgsf;4;Ihem)pj;bOUl zQ-d32;&S07NiKR!tPYcN(t55Vqlf$CosvtB!>76FebOJ5edvNFoW-x!I;r&6f<1c;2O-Dw0c2CC66rP zU6gqzp#K9rQQ~8$7YT>V3L>(W47I}YCq<>5KZz30@OO+fjjSA5G19=+GMwMX2mL;R z0XIU!eZ)H27^SN{?^(~KsjIekJ(2!Cbe172$o)gvv2o(heQ`M~Y$8>JVL0Zo#@=|M zKdDFd9MmKG9%MQU8EGR6>>R6i>=-+zgCaw;BD_Bmass2i|9|Ze>HyjdEocn>bXOYjRgfi{kUadB?Fabm8m5T+S;AKri$;S4;B5(gmd z{2v7=#CL zI9nlICj=4G4l-Skjuuyj$JSCkXL9E0)eAv%(GIqm1sqIq0ke|3h-7>K)fRURXi@X3V#=l3L1Zr*SRjPiS?4{7sg&A zxrn@vD8q;ABz4s)a(sjFk@7mfHyIr?e&b$~ezaCn*ZatDAhu^$^uVKuZap4LrgX|r z>d91OFoiVVZ9nSV@<69{_qN+RAJR$*Y1_9^x)w53^i*tjv_+#JEv7h9Q_kzE85D^&Ppb-%jNX4xtA{Y32V7qfA{kJpFMHI%N;J- z(1yMoSwj{m9;;uVxO4iLP{nni$y#bYz~fEM&CQak@gZJwv1}(cdTi$lV{6%_%;C(T z%z?~4K44cL+UH?o?X+KF>k!%wZ2bv3(46nD#Dq*s?XQ*`oDU=DMwZ|X~-DPAYDIdG+^bbg_i1UuF&=yPD zJo=cJAJ(9lA66{i9*lO0<%YE(i#wnY$JG5di&GV7w5L~Ga#W52o!dc@{pT7YPfWJr zh;A!8w-qqNc$CvD_RwvW|E_$pa$Utc6~h&NPqlkWIj@|?vqR~i{wU=u(YEIAwHBod zbq{me^{++gLTab^q$sh|Om7v?@3I!9?fUOj8?<{JYf-w;TwJGGgqdPT12zt_S&Z(n z!<@tieo;xI6Ogn^j$MQ8+MC?lZ9mevGv0THLis%8#$UQ?sm6<@Cz^w4#=?I^Fo0Lp MMCcs3KsTk|00&{a4gdfE diff --git a/startDemo.sh b/startDemo.sh index b027bf5..8087b1c 100755 --- a/startDemo.sh +++ b/startDemo.sh @@ -12,41 +12,8 @@ else cp ./configs/env-template.txt .env cp ./configs/config-demo.json config.json - # mkdir -p ./var/checkpoints - # cp -rf ~/cc-metric-store/var/checkpoints ~/cc-backend/var - ./cc-backend -migrate-db ./cc-backend -dev -init-db -add-user demo:admin,api:demo - - # --- begin: generate JWT for demo and update test_ccms_write_api.sh --- - CC_BIN="./cc-backend" - TEST_FILE="./test_ccms_write_api.sh" - BACKUP_FILE="${TEST_FILE}.bak" - - if [ -x "$CC_BIN" ]; then - echo "Generating JWT for user 'demo'..." - output="$($CC_BIN -jwt demo 2>&1 || true)" - token="$(printf '%s\n' "$output" | grep -oE '[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+' | head -n1 || true)" - - if [ -z "$token" ]; then - echo "Warning: could not extract JWT from output:" >&2 - printf '%s\n' "$output" >&2 - else - if [ -f "$TEST_FILE" ]; then - cp -a "$TEST_FILE" "$BACKUP_FILE" - # replace first line with JWT="..." - sed -i "1s#.*#JWT=\"$token\"#" "$TEST_FILE" - echo "Updated JWT in $TEST_FILE (backup at $BACKUP_FILE)" - else - echo "Warning: $TEST_FILE not found; JWT not written." - fi - fi - else - echo "Warning: $CC_BIN not found or not executable; skipping JWT generation." - fi - # --- end: generate JWT for demo and update test_ccms_write_api.sh --- - - ./cc-backend -server -dev fi From 79605c8a9ea0d21d827567589d0bf939eece889a Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 10 Sep 2025 09:08:32 +0200 Subject: [PATCH 34/40] Update test pipeline to go 1.25 --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a8a7429..6974301 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,7 +7,7 @@ jobs: - name: Install Go uses: actions/setup-go@v4 with: - go-version: 1.24.x + go-version: 1.25.x - name: Checkout code uses: actions/checkout@v3 - name: Build, Vet & Test From 7948d5f773c3d826e830c857d8d3f798e43ac0bc Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 10 Sep 2025 10:34:11 +0200 Subject: [PATCH 35/40] Use different demo job-archive for dev branch --- startDemo.sh | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/startDemo.sh b/startDemo.sh index 8087b1c..6817dc8 100755 --- a/startDemo.sh +++ b/startDemo.sh @@ -1,19 +1,19 @@ #!/bin/sh if [ -d './var' ]; then - echo 'Directory ./var already exists! Skipping initialization.' - ./cc-backend -server -dev + echo 'Directory ./var already exists! Skipping initialization.' + ./cc-backend -server -dev else - make - wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar - tar xf job-archive-demo.tar - rm ./job-archive-demo.tar + make + wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar + tar xf job-archive-dev.tar + rm ./job-archive-dev.tar - cp ./configs/env-template.txt .env - cp ./configs/config-demo.json config.json + cp ./configs/env-template.txt .env + cp ./configs/config-demo.json config.json - ./cc-backend -migrate-db - ./cc-backend -dev -init-db -add-user demo:admin,api:demo - ./cc-backend -server -dev + ./cc-backend -migrate-db + ./cc-backend -dev -init-db -add-user demo:admin,api:demo + ./cc-backend -server -dev fi From b2368a075103157fdd5e4a1aa35a05ec79e6b896 Mon Sep 17 00:00:00 2001 From: Aditya Ujeniya Date: Wed, 10 Sep 2025 14:23:18 +0200 Subject: [PATCH 36/40] Connectivity to CCMS feature readded --- cmd/cc-backend/main.go | 15 +- cmd/cc-backend/server.go | 9 +- configs/config-demo.json | 8 +- internal/config/memorystore.go | 2 + internal/config/schema.go | 4 +- .../metricdata/cc-metric-store-internal.go | 1154 +++++++++++++++++ internal/metricdata/cc-metric-store.go | 345 +++-- internal/metricdata/metricdata.go | 3 + 8 files changed, 1412 insertions(+), 128 deletions(-) create mode 100644 internal/metricdata/cc-metric-store-internal.go diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 0146118..eed0914 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -97,12 +97,6 @@ func main() { } else { cclog.Abort("Cluster configuration must be present") } - - if mscfg := ccconf.GetPackageConfig("metric-store"); mscfg != nil { - config.InitMetricStore(mscfg) - } else { - cclog.Abort("Metric Store configuration must be present") - } } else { cclog.Abort("Main configuration must be present") } @@ -251,8 +245,15 @@ func main() { var wg sync.WaitGroup //Metric Store starts after all flags have been processes - memorystore.Init(&wg) + if config.InternalCCMSFlag { + if mscfg := ccconf.GetPackageConfig("metric-store"); mscfg != nil { + config.InitMetricStore(mscfg) + } else { + cclog.Abort("Metric Store configuration must be present") + } + memorystore.Init(&wg) + } archiver.Start(repository.GetJobRepository()) // // Comment out diff --git a/cmd/cc-backend/server.go b/cmd/cc-backend/server.go index 18d7ea5..e2141ec 100644 --- a/cmd/cc-backend/server.go +++ b/cmd/cc-backend/server.go @@ -241,10 +241,13 @@ func serverInit() { routerConfig.SetupRoutes(secured, buildInfo) apiHandle.MountApiRoutes(securedapi) apiHandle.MountUserApiRoutes(userapi) - apiHandle.MountMetricStoreApiRoutes(metricstoreapi) apiHandle.MountConfigApiRoutes(configapi) apiHandle.MountFrontendApiRoutes(frontendapi) + if config.InternalCCMSFlag { + apiHandle.MountMetricStoreApiRoutes(metricstoreapi) + } + if config.Keys.EmbedStaticFiles { if i, err := os.Stat("./var/img"); err == nil { if i.IsDir() { @@ -337,7 +340,9 @@ func serverShutdown() { server.Shutdown(context.Background()) //Archive all the metric store data - memorystore.Shutdown() + if config.InternalCCMSFlag { + memorystore.Shutdown() + } // Then, wait for any async archivings still pending... archiver.WaitForArchiving() diff --git a/configs/config-demo.json b/configs/config-demo.json index d47f926..92e5b47 100644 --- a/configs/config-demo.json +++ b/configs/config-demo.json @@ -34,7 +34,9 @@ { "name": "fritz", "metricDataRepository": { - "kind": "cc-metric-store" + "kind": "cc-metric-store", + "url": "http://localhost:8082", + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9" }, "filterRanges": { "numNodes": { @@ -54,7 +56,9 @@ { "name": "alex", "metricDataRepository": { - "kind": "cc-metric-store" + "kind": "cc-metric-store", + "url": "http://localhost:8082", + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9" }, "filterRanges": { "numNodes": { diff --git a/internal/config/memorystore.go b/internal/config/memorystore.go index c277045..b18d26d 100644 --- a/internal/config/memorystore.go +++ b/internal/config/memorystore.go @@ -8,6 +8,8 @@ import ( cclog "github.com/ClusterCockpit/cc-lib/ccLogger" ) +var InternalCCMSFlag bool = false + // -------------------- // Metric Store config // -------------------- diff --git a/internal/config/schema.go b/internal/config/schema.go index ca0440e..bf8cf2b 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -135,7 +135,7 @@ var clustersSchema = ` "properties": { "kind": { "type": "string", - "enum": ["influxdb", "prometheus", "cc-metric-store", "test"] + "enum": ["influxdb", "prometheus", "cc-metric-store", "cc-metric-store-internal", "test"] }, "url": { "type": "string" @@ -144,7 +144,7 @@ var clustersSchema = ` "type": "string" } }, - "required": ["kind"] + "required": ["kind","url"] }, "filterRanges": { "description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.", diff --git a/internal/metricdata/cc-metric-store-internal.go b/internal/metricdata/cc-metric-store-internal.go new file mode 100644 index 0000000..c5b7e0e --- /dev/null +++ b/internal/metricdata/cc-metric-store-internal.go @@ -0,0 +1,1154 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. This file is part of cc-backend. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. +package metricdata + +import ( + "context" + "encoding/json" + "fmt" + "sort" + "strconv" + "strings" + "time" + + "github.com/ClusterCockpit/cc-backend/internal/graph/model" + "github.com/ClusterCockpit/cc-backend/internal/memorystore" + "github.com/ClusterCockpit/cc-backend/pkg/archive" + cclog "github.com/ClusterCockpit/cc-lib/ccLogger" + "github.com/ClusterCockpit/cc-lib/schema" +) + +// Bloat Code +type CCMetricStoreConfigInternal struct { + Kind string `json:"kind"` + Url string `json:"url"` + Token string `json:"token"` + + // If metrics are known to this MetricDataRepository under a different + // name than in the `metricConfig` section of the 'cluster.json', + // provide this optional mapping of local to remote name for this metric. + Renamings map[string]string `json:"metricRenamings"` +} + +// Bloat Code +type CCMetricStoreInternal struct { +} + +// Bloat Code +func (ccms *CCMetricStoreInternal) Init(rawConfig json.RawMessage) error { + + return nil +} + +func (ccms *CCMetricStoreInternal) LoadData( + job *schema.Job, + metrics []string, + scopes []schema.MetricScope, + ctx context.Context, + resolution int, +) (schema.JobData, error) { + queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, int64(resolution)) + if err != nil { + cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error()) + return nil, err + } + + req := memorystore.ApiQueryRequest{ + Cluster: job.Cluster, + From: job.StartTime, + To: job.StartTime + int64(job.Duration), + Queries: queries, + WithStats: true, + WithData: true, + } + + resBody, err := memorystore.FetchData(req) + if err != nil { + cclog.Errorf("Error while fetching data : %s", err.Error()) + return nil, err + } + + var errors []string + jobData := make(schema.JobData) + for i, row := range resBody.Results { + query := req.Queries[i] + metric := query.Metric + scope := assignedScope[i] + mc := archive.GetMetricConfig(job.Cluster, metric) + if _, ok := jobData[metric]; !ok { + jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric) + } + + res := mc.Timestep + if len(row) > 0 { + res = int(row[0].Resolution) + } + + jobMetric, ok := jobData[metric][scope] + if !ok { + jobMetric = &schema.JobMetric{ + Unit: mc.Unit, + Timestep: res, + Series: make([]schema.Series, 0), + } + jobData[metric][scope] = jobMetric + } + + for ndx, res := range row { + if res.Error != nil { + /* Build list for "partial errors", if any */ + errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error)) + continue + } + + id := (*string)(nil) + if query.Type != nil { + id = new(string) + *id = query.TypeIds[ndx] + } + + if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() { + // "schema.Float()" because regular float64 can not be JSONed when NaN. + res.Avg = schema.Float(0) + res.Min = schema.Float(0) + res.Max = schema.Float(0) + } + + jobMetric.Series = append(jobMetric.Series, schema.Series{ + Hostname: query.Hostname, + Id: id, + Statistics: schema.MetricStatistics{ + Avg: float64(res.Avg), + Min: float64(res.Min), + Max: float64(res.Max), + }, + Data: res.Data, + }) + } + + // So that one can later check len(jobData): + if len(jobMetric.Series) == 0 { + delete(jobData[metric], scope) + if len(jobData[metric]) == 0 { + delete(jobData, metric) + } + } + } + + if len(errors) != 0 { + /* Returns list for "partial errors" */ + return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", ")) + } + return jobData, nil +} + +var ( + hwthreadString = string(schema.MetricScopeHWThread) + coreString = string(schema.MetricScopeCore) + memoryDomainString = string(schema.MetricScopeMemoryDomain) + socketString = string(schema.MetricScopeSocket) + acceleratorString = string(schema.MetricScopeAccelerator) +) + +func (ccms *CCMetricStoreInternal) buildQueries( + job *schema.Job, + metrics []string, + scopes []schema.MetricScope, + resolution int64, +) ([]memorystore.ApiQuery, []schema.MetricScope, error) { + queries := make([]memorystore.ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) + assignedScope := []schema.MetricScope{} + + subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster) + if scerr != nil { + return nil, nil, scerr + } + topology := subcluster.Topology + + for _, metric := range metrics { + mc := archive.GetMetricConfig(job.Cluster, metric) + if mc == nil { + // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster) + cclog.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster) + continue + } + + // Skip if metric is removed for subcluster + if len(mc.SubClusters) != 0 { + isRemoved := false + for _, scConfig := range mc.SubClusters { + if scConfig.Name == job.SubCluster && scConfig.Remove { + isRemoved = true + break + } + } + if isRemoved { + continue + } + } + + // Avoid duplicates... + handledScopes := make([]schema.MetricScope, 0, 3) + + scopesLoop: + for _, requestedScope := range scopes { + nativeScope := mc.Scope + if nativeScope == schema.MetricScopeAccelerator && job.NumAcc == 0 { + continue + } + + scope := nativeScope.Max(requestedScope) + for _, s := range handledScopes { + if scope == s { + continue scopesLoop + } + } + handledScopes = append(handledScopes, scope) + + for _, host := range job.Resources { + hwthreads := host.HWThreads + if hwthreads == nil { + hwthreads = topology.Node + } + + // Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node) + if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) { + if scope != schema.MetricScopeAccelerator { + // Skip all other catched cases + continue + } + + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: false, + Type: &acceleratorString, + TypeIds: host.Accelerators, + Resolution: resolution, + }) + assignedScope = append(assignedScope, schema.MetricScopeAccelerator) + continue + } + + // Accelerator -> Node + if nativeScope == schema.MetricScopeAccelerator && scope == schema.MetricScopeNode { + if len(host.Accelerators) == 0 { + continue + } + + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: true, + Type: &acceleratorString, + TypeIds: host.Accelerators, + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // HWThread -> HWThead + if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: false, + Type: &hwthreadString, + TypeIds: intToStringSlice(hwthreads), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // HWThread -> Core + if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { + cores, _ := topology.GetCoresFromHWThreads(hwthreads) + for _, core := range cores { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: true, + Type: &hwthreadString, + TypeIds: intToStringSlice(topology.Core[core]), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + } + continue + } + + // HWThread -> Socket + if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { + sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) + for _, socket := range sockets { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: true, + Type: &hwthreadString, + TypeIds: intToStringSlice(topology.Socket[socket]), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + } + continue + } + + // HWThread -> Node + if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: true, + Type: &hwthreadString, + TypeIds: intToStringSlice(hwthreads), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // Core -> Core + if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { + cores, _ := topology.GetCoresFromHWThreads(hwthreads) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: false, + Type: &coreString, + TypeIds: intToStringSlice(cores), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // Core -> Socket + if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { + sockets, _ := topology.GetSocketsFromCores(hwthreads) + for _, socket := range sockets { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: true, + Type: &coreString, + TypeIds: intToStringSlice(topology.Socket[socket]), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + } + continue + } + + // Core -> Node + if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { + cores, _ := topology.GetCoresFromHWThreads(hwthreads) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: true, + Type: &coreString, + TypeIds: intToStringSlice(cores), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // MemoryDomain -> MemoryDomain + if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { + sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: false, + Type: &memoryDomainString, + TypeIds: intToStringSlice(sockets), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // MemoryDoman -> Node + if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { + sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: true, + Type: &memoryDomainString, + TypeIds: intToStringSlice(sockets), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // Socket -> Socket + if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { + sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: false, + Type: &socketString, + TypeIds: intToStringSlice(sockets), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // Socket -> Node + if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { + sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Aggregate: true, + Type: &socketString, + TypeIds: intToStringSlice(sockets), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // Node -> Node + if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: host.Hostname, + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope) + } + } + } + + return queries, assignedScope, nil +} + +func (ccms *CCMetricStoreInternal) LoadStats( + job *schema.Job, + metrics []string, + ctx context.Context, +) (map[string]map[string]schema.MetricStatistics, error) { + queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization? + if err != nil { + cclog.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error()) + return nil, err + } + + req := memorystore.ApiQueryRequest{ + Cluster: job.Cluster, + From: job.StartTime, + To: job.StartTime + int64(job.Duration), + Queries: queries, + WithStats: true, + WithData: false, + } + + resBody, err := memorystore.FetchData(req) + if err != nil { + cclog.Errorf("Error while fetching data : %s", err.Error()) + return nil, err + } + + stats := make(map[string]map[string]schema.MetricStatistics, len(metrics)) + for i, res := range resBody.Results { + query := req.Queries[i] + metric := query.Metric + data := res[0] + if data.Error != nil { + cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) + continue + } + + metricdata, ok := stats[metric] + if !ok { + metricdata = make(map[string]schema.MetricStatistics, job.NumNodes) + stats[metric] = metricdata + } + + if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() { + cclog.Warnf("fetching %s for node %s failed: one of avg/min/max is NaN", metric, query.Hostname) + continue + } + + metricdata[query.Hostname] = schema.MetricStatistics{ + Avg: float64(data.Avg), + Min: float64(data.Min), + Max: float64(data.Max), + } + } + + return stats, nil +} + +// Used for Job-View Statistics Table +func (ccms *CCMetricStoreInternal) LoadScopedStats( + job *schema.Job, + metrics []string, + scopes []schema.MetricScope, + ctx context.Context, +) (schema.ScopedJobStats, error) { + queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, 0) + if err != nil { + cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error()) + return nil, err + } + + req := memorystore.ApiQueryRequest{ + Cluster: job.Cluster, + From: job.StartTime, + To: job.StartTime + int64(job.Duration), + Queries: queries, + WithStats: true, + WithData: false, + } + + resBody, err := memorystore.FetchData(req) + if err != nil { + cclog.Errorf("Error while fetching data : %s", err.Error()) + return nil, err + } + + var errors []string + scopedJobStats := make(schema.ScopedJobStats) + + for i, row := range resBody.Results { + query := req.Queries[i] + metric := query.Metric + scope := assignedScope[i] + + if _, ok := scopedJobStats[metric]; !ok { + scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats) + } + + if _, ok := scopedJobStats[metric][scope]; !ok { + scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0) + } + + for ndx, res := range row { + if res.Error != nil { + /* Build list for "partial errors", if any */ + errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error)) + continue + } + + id := (*string)(nil) + if query.Type != nil { + id = new(string) + *id = query.TypeIds[ndx] + } + + if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() { + // "schema.Float()" because regular float64 can not be JSONed when NaN. + res.Avg = schema.Float(0) + res.Min = schema.Float(0) + res.Max = schema.Float(0) + } + + scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{ + Hostname: query.Hostname, + Id: id, + Data: &schema.MetricStatistics{ + Avg: float64(res.Avg), + Min: float64(res.Min), + Max: float64(res.Max), + }, + }) + } + + // So that one can later check len(scopedJobStats[metric][scope]): Remove from map if empty + if len(scopedJobStats[metric][scope]) == 0 { + delete(scopedJobStats[metric], scope) + if len(scopedJobStats[metric]) == 0 { + delete(scopedJobStats, metric) + } + } + } + + if len(errors) != 0 { + /* Returns list for "partial errors" */ + return scopedJobStats, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", ")) + } + return scopedJobStats, nil +} + +// Used for Systems-View Node-Overview +func (ccms *CCMetricStoreInternal) LoadNodeData( + cluster string, + metrics, nodes []string, + scopes []schema.MetricScope, + from, to time.Time, + ctx context.Context, +) (map[string]map[string][]*schema.JobMetric, error) { + req := memorystore.ApiQueryRequest{ + Cluster: cluster, + From: from.Unix(), + To: to.Unix(), + WithStats: true, + WithData: true, + } + + if nodes == nil { + req.ForAllNodes = append(req.ForAllNodes, metrics...) + } else { + for _, node := range nodes { + for _, metric := range metrics { + req.Queries = append(req.Queries, memorystore.ApiQuery{ + Hostname: node, + Metric: metric, + Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution + }) + } + } + } + + resBody, err := memorystore.FetchData(req) + if err != nil { + cclog.Errorf("Error while fetching data : %s", err.Error()) + return nil, err + } + + var errors []string + data := make(map[string]map[string][]*schema.JobMetric) + for i, res := range resBody.Results { + var query memorystore.ApiQuery + if resBody.Queries != nil { + query = resBody.Queries[i] + } else { + query = req.Queries[i] + } + + metric := query.Metric + qdata := res[0] + if qdata.Error != nil { + /* Build list for "partial errors", if any */ + errors = append(errors, fmt.Sprintf("fetching %s for node %s failed: %s", metric, query.Hostname, *qdata.Error)) + } + + if qdata.Avg.IsNaN() || qdata.Min.IsNaN() || qdata.Max.IsNaN() { + // return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN") + qdata.Avg, qdata.Min, qdata.Max = 0., 0., 0. + } + + hostdata, ok := data[query.Hostname] + if !ok { + hostdata = make(map[string][]*schema.JobMetric) + data[query.Hostname] = hostdata + } + + mc := archive.GetMetricConfig(cluster, metric) + hostdata[metric] = append(hostdata[metric], &schema.JobMetric{ + Unit: mc.Unit, + Timestep: mc.Timestep, + Series: []schema.Series{ + { + Hostname: query.Hostname, + Data: qdata.Data, + Statistics: schema.MetricStatistics{ + Avg: float64(qdata.Avg), + Min: float64(qdata.Min), + Max: float64(qdata.Max), + }, + }, + }, + }) + } + + if len(errors) != 0 { + /* Returns list of "partial errors" */ + return data, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", ")) + } + + return data, nil +} + +// Used for Systems-View Node-List +func (ccms *CCMetricStoreInternal) LoadNodeListData( + cluster, subCluster, nodeFilter string, + metrics []string, + scopes []schema.MetricScope, + resolution int, + from, to time.Time, + page *model.PageRequest, + ctx context.Context, +) (map[string]schema.JobData, int, bool, error) { + // 0) Init additional vars + var totalNodes int = 0 + var hasNextPage bool = false + + // 1) Get list of all nodes + var nodes []string + if subCluster != "" { + scNodes := archive.NodeLists[cluster][subCluster] + nodes = scNodes.PrintList() + } else { + subClusterNodeLists := archive.NodeLists[cluster] + for _, nodeList := range subClusterNodeLists { + nodes = append(nodes, nodeList.PrintList()...) + } + } + + // 2) Filter nodes + if nodeFilter != "" { + filteredNodes := []string{} + for _, node := range nodes { + if strings.Contains(node, nodeFilter) { + filteredNodes = append(filteredNodes, node) + } + } + nodes = filteredNodes + } + + // 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ... + totalNodes = len(nodes) + sort.Strings(nodes) + + // 3) Apply paging + if len(nodes) > page.ItemsPerPage { + start := (page.Page - 1) * page.ItemsPerPage + end := start + page.ItemsPerPage + if end >= len(nodes) { + end = len(nodes) + hasNextPage = false + } else { + hasNextPage = true + } + nodes = nodes[start:end] + } + + // Note: Order of node data is not guaranteed after this point, but contents match page and filter criteria + + queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution)) + if err != nil { + cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error()) + return nil, totalNodes, hasNextPage, err + } + + req := memorystore.ApiQueryRequest{ + Cluster: cluster, + Queries: queries, + From: from.Unix(), + To: to.Unix(), + WithStats: true, + WithData: true, + } + + resBody, err := memorystore.FetchData(req) + if err != nil { + cclog.Errorf("Error while fetching data : %s", err.Error()) + return nil, totalNodes, hasNextPage, err + } + + var errors []string + data := make(map[string]schema.JobData) + for i, row := range resBody.Results { + var query memorystore.ApiQuery + if resBody.Queries != nil { + query = resBody.Queries[i] + } else { + query = req.Queries[i] + } + // qdata := res[0] + metric := query.Metric + scope := assignedScope[i] + mc := archive.GetMetricConfig(cluster, metric) + + res := mc.Timestep + if len(row) > 0 { + res = int(row[0].Resolution) + } + + // Init Nested Map Data Structures If Not Found + hostData, ok := data[query.Hostname] + if !ok { + hostData = make(schema.JobData) + data[query.Hostname] = hostData + } + + metricData, ok := hostData[metric] + if !ok { + metricData = make(map[schema.MetricScope]*schema.JobMetric) + data[query.Hostname][metric] = metricData + } + + scopeData, ok := metricData[scope] + if !ok { + scopeData = &schema.JobMetric{ + Unit: mc.Unit, + Timestep: res, + Series: make([]schema.Series, 0), + } + data[query.Hostname][metric][scope] = scopeData + } + + for ndx, res := range row { + if res.Error != nil { + /* Build list for "partial errors", if any */ + errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error)) + continue + } + + id := (*string)(nil) + if query.Type != nil { + id = new(string) + *id = query.TypeIds[ndx] + } + + if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() { + // "schema.Float()" because regular float64 can not be JSONed when NaN. + res.Avg = schema.Float(0) + res.Min = schema.Float(0) + res.Max = schema.Float(0) + } + + scopeData.Series = append(scopeData.Series, schema.Series{ + Hostname: query.Hostname, + Id: id, + Statistics: schema.MetricStatistics{ + Avg: float64(res.Avg), + Min: float64(res.Min), + Max: float64(res.Max), + }, + Data: res.Data, + }) + } + } + + if len(errors) != 0 { + /* Returns list of "partial errors" */ + return data, totalNodes, hasNextPage, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", ")) + } + + return data, totalNodes, hasNextPage, nil +} + +func (ccms *CCMetricStoreInternal) buildNodeQueries( + cluster string, + subCluster string, + nodes []string, + metrics []string, + scopes []schema.MetricScope, + resolution int64, +) ([]memorystore.ApiQuery, []schema.MetricScope, error) { + queries := make([]memorystore.ApiQuery, 0, len(metrics)*len(scopes)*len(nodes)) + assignedScope := []schema.MetricScope{} + + // Get Topol before loop if subCluster given + var subClusterTopol *schema.SubCluster + var scterr error + if subCluster != "" { + subClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster) + if scterr != nil { + cclog.Errorf("could not load cluster %s subCluster %s topology: %s", cluster, subCluster, scterr.Error()) + return nil, nil, scterr + } + } + + for _, metric := range metrics { + metric := metric + mc := archive.GetMetricConfig(cluster, metric) + if mc == nil { + // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster) + cclog.Warnf("metric '%s' is not specified for cluster '%s'", metric, cluster) + continue + } + + // Skip if metric is removed for subcluster + if mc.SubClusters != nil { + isRemoved := false + for _, scConfig := range mc.SubClusters { + if scConfig.Name == subCluster && scConfig.Remove { + isRemoved = true + break + } + } + if isRemoved { + continue + } + } + + // Avoid duplicates... + handledScopes := make([]schema.MetricScope, 0, 3) + + scopesLoop: + for _, requestedScope := range scopes { + nativeScope := mc.Scope + + scope := nativeScope.Max(requestedScope) + for _, s := range handledScopes { + if scope == s { + continue scopesLoop + } + } + handledScopes = append(handledScopes, scope) + + for _, hostname := range nodes { + + // If no subCluster given, get it by node + if subCluster == "" { + subClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname) + if scnerr != nil { + return nil, nil, scnerr + } + subClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName) + if scterr != nil { + return nil, nil, scterr + } + } + + // Always full node hwthread id list, no partial queries expected -> Use "topology.Node" directly where applicable + // Always full accelerator id list, no partial queries expected -> Use "acceleratorIds" directly where applicable + topology := subClusterTopol.Topology + acceleratorIds := topology.GetAcceleratorIDs() + + // Moved check here if metric matches hardware specs + if nativeScope == schema.MetricScopeAccelerator && len(acceleratorIds) == 0 { + continue scopesLoop + } + + // Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node) + if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) { + if scope != schema.MetricScopeAccelerator { + // Skip all other catched cases + continue + } + + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: false, + Type: &acceleratorString, + TypeIds: acceleratorIds, + Resolution: resolution, + }) + assignedScope = append(assignedScope, schema.MetricScopeAccelerator) + continue + } + + // Accelerator -> Node + if nativeScope == schema.MetricScopeAccelerator && scope == schema.MetricScopeNode { + if len(acceleratorIds) == 0 { + continue + } + + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: true, + Type: &acceleratorString, + TypeIds: acceleratorIds, + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // HWThread -> HWThead + if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: false, + Type: &hwthreadString, + TypeIds: intToStringSlice(topology.Node), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // HWThread -> Core + if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { + cores, _ := topology.GetCoresFromHWThreads(topology.Node) + for _, core := range cores { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: true, + Type: &hwthreadString, + TypeIds: intToStringSlice(topology.Core[core]), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + } + continue + } + + // HWThread -> Socket + if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { + sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) + for _, socket := range sockets { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: true, + Type: &hwthreadString, + TypeIds: intToStringSlice(topology.Socket[socket]), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + } + continue + } + + // HWThread -> Node + if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: true, + Type: &hwthreadString, + TypeIds: intToStringSlice(topology.Node), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // Core -> Core + if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { + cores, _ := topology.GetCoresFromHWThreads(topology.Node) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: false, + Type: &coreString, + TypeIds: intToStringSlice(cores), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // Core -> Socket + if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { + sockets, _ := topology.GetSocketsFromCores(topology.Node) + for _, socket := range sockets { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: true, + Type: &coreString, + TypeIds: intToStringSlice(topology.Socket[socket]), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + } + continue + } + + // Core -> Node + if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { + cores, _ := topology.GetCoresFromHWThreads(topology.Node) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: true, + Type: &coreString, + TypeIds: intToStringSlice(cores), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // MemoryDomain -> MemoryDomain + if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { + sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: false, + Type: &memoryDomainString, + TypeIds: intToStringSlice(sockets), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // MemoryDoman -> Node + if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { + sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: true, + Type: &memoryDomainString, + TypeIds: intToStringSlice(sockets), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // Socket -> Socket + if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { + sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: false, + Type: &socketString, + TypeIds: intToStringSlice(sockets), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // Socket -> Node + if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { + sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Aggregate: true, + Type: &socketString, + TypeIds: intToStringSlice(sockets), + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + // Node -> Node + if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { + queries = append(queries, memorystore.ApiQuery{ + Metric: metric, + Hostname: hostname, + Resolution: resolution, + }) + assignedScope = append(assignedScope, scope) + continue + } + + return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope) + } + } + } + + return queries, assignedScope, nil +} + +func intToStringSlice(is []int) []string { + ss := make([]string, len(is)) + for i, x := range is { + ss[i] = strconv.Itoa(x) + } + return ss +} diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go index d8cef4d..a188686 100644 --- a/internal/metricdata/cc-metric-store.go +++ b/internal/metricdata/cc-metric-store.go @@ -1,26 +1,26 @@ // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. -// All rights reserved. This file is part of cc-backend. +// All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. package metricdata import ( + "bufio" + "bytes" "context" "encoding/json" "fmt" + "net/http" "sort" - "strconv" "strings" "time" "github.com/ClusterCockpit/cc-backend/internal/graph/model" - "github.com/ClusterCockpit/cc-backend/internal/memorystore" "github.com/ClusterCockpit/cc-backend/pkg/archive" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" "github.com/ClusterCockpit/cc-lib/schema" ) -// Bloat Code type CCMetricStoreConfig struct { Kind string `json:"kind"` Url string `json:"url"` @@ -32,16 +32,141 @@ type CCMetricStoreConfig struct { Renamings map[string]string `json:"metricRenamings"` } -// Bloat Code type CCMetricStore struct { + here2there map[string]string + there2here map[string]string + client http.Client + jwt string + url string + queryEndpoint string +} + +type ApiQueryRequest struct { + Cluster string `json:"cluster"` + Queries []ApiQuery `json:"queries"` + ForAllNodes []string `json:"for-all-nodes"` + From int64 `json:"from"` + To int64 `json:"to"` + WithStats bool `json:"with-stats"` + WithData bool `json:"with-data"` +} + +type ApiQuery struct { + Type *string `json:"type,omitempty"` + SubType *string `json:"subtype,omitempty"` + Metric string `json:"metric"` + Hostname string `json:"host"` + Resolution int `json:"resolution"` + TypeIds []string `json:"type-ids,omitempty"` + SubTypeIds []string `json:"subtype-ids,omitempty"` + Aggregate bool `json:"aggreg"` +} + +type ApiQueryResponse struct { + Queries []ApiQuery `json:"queries,omitempty"` + Results [][]ApiMetricData `json:"results"` +} + +type ApiMetricData struct { + Error *string `json:"error"` + Data []schema.Float `json:"data"` + From int64 `json:"from"` + To int64 `json:"to"` + Resolution int `json:"resolution"` + Avg schema.Float `json:"avg"` + Min schema.Float `json:"min"` + Max schema.Float `json:"max"` } -// Bloat Code func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error { + var config CCMetricStoreConfig + if err := json.Unmarshal(rawConfig, &config); err != nil { + cclog.Warn("Error while unmarshaling raw json config") + return err + } + + ccms.url = config.Url + ccms.queryEndpoint = fmt.Sprintf("%s/api/query", config.Url) + ccms.jwt = config.Token + ccms.client = http.Client{ + Timeout: 10 * time.Second, + } + + if config.Renamings != nil { + ccms.here2there = config.Renamings + ccms.there2here = make(map[string]string, len(config.Renamings)) + for k, v := range ccms.here2there { + ccms.there2here[v] = k + } + } else { + ccms.here2there = make(map[string]string) + ccms.there2here = make(map[string]string) + } return nil } +func (ccms *CCMetricStore) toRemoteName(metric string) string { + if renamed, ok := ccms.here2there[metric]; ok { + return renamed + } + + return metric +} + +func (ccms *CCMetricStore) toLocalName(metric string) string { + if renamed, ok := ccms.there2here[metric]; ok { + return renamed + } + + return metric +} + +func (ccms *CCMetricStore) doRequest( + ctx context.Context, + body *ApiQueryRequest, +) (*ApiQueryResponse, error) { + buf := &bytes.Buffer{} + if err := json.NewEncoder(buf).Encode(body); err != nil { + cclog.Errorf("Error while encoding request body: %s", err.Error()) + return nil, err + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf) + if err != nil { + cclog.Errorf("Error while building request body: %s", err.Error()) + return nil, err + } + if ccms.jwt != "" { + req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt)) + } + + // versioning the cc-metric-store query API. + // v2 = data with resampling + // v1 = data without resampling + q := req.URL.Query() + q.Add("version", "v2") + req.URL.RawQuery = q.Encode() + + res, err := ccms.client.Do(req) + if err != nil { + cclog.Errorf("Error while performing request: %s", err.Error()) + return nil, err + } + + if res.StatusCode != http.StatusOK { + return nil, fmt.Errorf("'%s': HTTP Status: %s", ccms.queryEndpoint, res.Status) + } + + var resBody ApiQueryResponse + if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil { + cclog.Errorf("Error while decoding result body: %s", err.Error()) + return nil, err + } + + return &resBody, nil +} + func (ccms *CCMetricStore) LoadData( job *schema.Job, metrics []string, @@ -49,13 +174,13 @@ func (ccms *CCMetricStore) LoadData( ctx context.Context, resolution int, ) (schema.JobData, error) { - queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, int64(resolution)) + queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution) if err != nil { cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error()) return nil, err } - req := memorystore.ApiQueryRequest{ + req := ApiQueryRequest{ Cluster: job.Cluster, From: job.StartTime, To: job.StartTime + int64(job.Duration), @@ -64,9 +189,9 @@ func (ccms *CCMetricStore) LoadData( WithData: true, } - resBody, err := memorystore.FetchData(req) + resBody, err := ccms.doRequest(ctx, &req) if err != nil { - cclog.Errorf("Error while fetching data : %s", err.Error()) + cclog.Errorf("Error while performing request: %s", err.Error()) return nil, err } @@ -74,7 +199,7 @@ func (ccms *CCMetricStore) LoadData( jobData := make(schema.JobData) for i, row := range resBody.Results { query := req.Queries[i] - metric := query.Metric + metric := ccms.toLocalName(query.Metric) scope := assignedScope[i] mc := archive.GetMetricConfig(job.Cluster, metric) if _, ok := jobData[metric]; !ok { @@ -83,7 +208,7 @@ func (ccms *CCMetricStore) LoadData( res := mc.Timestep if len(row) > 0 { - res = int(row[0].Resolution) + res = row[0].Resolution } jobMetric, ok := jobData[metric][scope] @@ -144,21 +269,13 @@ func (ccms *CCMetricStore) LoadData( return jobData, nil } -var ( - hwthreadString = string(schema.MetricScopeHWThread) - coreString = string(schema.MetricScopeCore) - memoryDomainString = string(schema.MetricScopeMemoryDomain) - socketString = string(schema.MetricScopeSocket) - acceleratorString = string(schema.MetricScopeAccelerator) -) - func (ccms *CCMetricStore) buildQueries( job *schema.Job, metrics []string, scopes []schema.MetricScope, - resolution int64, -) ([]memorystore.ApiQuery, []schema.MetricScope, error) { - queries := make([]memorystore.ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) + resolution int, +) ([]ApiQuery, []schema.MetricScope, error) { + queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) assignedScope := []schema.MetricScope{} subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster) @@ -168,6 +285,7 @@ func (ccms *CCMetricStore) buildQueries( topology := subcluster.Topology for _, metric := range metrics { + remoteName := ccms.toRemoteName(metric) mc := archive.GetMetricConfig(job.Cluster, metric) if mc == nil { // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster) @@ -220,8 +338,8 @@ func (ccms *CCMetricStore) buildQueries( continue } - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: false, Type: &acceleratorString, @@ -238,8 +356,8 @@ func (ccms *CCMetricStore) buildQueries( continue } - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: true, Type: &acceleratorString, @@ -252,8 +370,8 @@ func (ccms *CCMetricStore) buildQueries( // HWThread -> HWThead if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: false, Type: &hwthreadString, @@ -268,8 +386,8 @@ func (ccms *CCMetricStore) buildQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(hwthreads) for _, core := range cores { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: true, Type: &hwthreadString, @@ -285,8 +403,8 @@ func (ccms *CCMetricStore) buildQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) for _, socket := range sockets { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: true, Type: &hwthreadString, @@ -300,8 +418,8 @@ func (ccms *CCMetricStore) buildQueries( // HWThread -> Node if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: true, Type: &hwthreadString, @@ -315,8 +433,8 @@ func (ccms *CCMetricStore) buildQueries( // Core -> Core if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(hwthreads) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: false, Type: &coreString, @@ -331,8 +449,8 @@ func (ccms *CCMetricStore) buildQueries( if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromCores(hwthreads) for _, socket := range sockets { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: true, Type: &coreString, @@ -347,8 +465,8 @@ func (ccms *CCMetricStore) buildQueries( // Core -> Node if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { cores, _ := topology.GetCoresFromHWThreads(hwthreads) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: true, Type: &coreString, @@ -362,8 +480,8 @@ func (ccms *CCMetricStore) buildQueries( // MemoryDomain -> MemoryDomain if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: false, Type: &memoryDomainString, @@ -377,8 +495,8 @@ func (ccms *CCMetricStore) buildQueries( // MemoryDoman -> Node if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: true, Type: &memoryDomainString, @@ -392,8 +510,8 @@ func (ccms *CCMetricStore) buildQueries( // Socket -> Socket if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: false, Type: &socketString, @@ -407,8 +525,8 @@ func (ccms *CCMetricStore) buildQueries( // Socket -> Node if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Aggregate: true, Type: &socketString, @@ -421,8 +539,8 @@ func (ccms *CCMetricStore) buildQueries( // Node -> Node if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: host.Hostname, Resolution: resolution, }) @@ -443,13 +561,14 @@ func (ccms *CCMetricStore) LoadStats( metrics []string, ctx context.Context, ) (map[string]map[string]schema.MetricStatistics, error) { + queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization? if err != nil { cclog.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error()) return nil, err } - req := memorystore.ApiQueryRequest{ + req := ApiQueryRequest{ Cluster: job.Cluster, From: job.StartTime, To: job.StartTime + int64(job.Duration), @@ -458,16 +577,16 @@ func (ccms *CCMetricStore) LoadStats( WithData: false, } - resBody, err := memorystore.FetchData(req) + resBody, err := ccms.doRequest(ctx, &req) if err != nil { - cclog.Errorf("Error while fetching data : %s", err.Error()) + cclog.Errorf("Error while performing request: %s", err.Error()) return nil, err } stats := make(map[string]map[string]schema.MetricStatistics, len(metrics)) for i, res := range resBody.Results { query := req.Queries[i] - metric := query.Metric + metric := ccms.toLocalName(query.Metric) data := res[0] if data.Error != nil { cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) @@ -508,7 +627,7 @@ func (ccms *CCMetricStore) LoadScopedStats( return nil, err } - req := memorystore.ApiQueryRequest{ + req := ApiQueryRequest{ Cluster: job.Cluster, From: job.StartTime, To: job.StartTime + int64(job.Duration), @@ -517,9 +636,9 @@ func (ccms *CCMetricStore) LoadScopedStats( WithData: false, } - resBody, err := memorystore.FetchData(req) + resBody, err := ccms.doRequest(ctx, &req) if err != nil { - cclog.Errorf("Error while fetching data : %s", err.Error()) + cclog.Errorf("Error while performing request: %s", err.Error()) return nil, err } @@ -528,7 +647,7 @@ func (ccms *CCMetricStore) LoadScopedStats( for i, row := range resBody.Results { query := req.Queries[i] - metric := query.Metric + metric := ccms.toLocalName(query.Metric) scope := assignedScope[i] if _, ok := scopedJobStats[metric]; !ok { @@ -594,7 +713,7 @@ func (ccms *CCMetricStore) LoadNodeData( from, to time.Time, ctx context.Context, ) (map[string]map[string][]*schema.JobMetric, error) { - req := memorystore.ApiQueryRequest{ + req := ApiQueryRequest{ Cluster: cluster, From: from.Unix(), To: to.Unix(), @@ -603,36 +722,38 @@ func (ccms *CCMetricStore) LoadNodeData( } if nodes == nil { - req.ForAllNodes = append(req.ForAllNodes, metrics...) + for _, metric := range metrics { + req.ForAllNodes = append(req.ForAllNodes, ccms.toRemoteName(metric)) + } } else { for _, node := range nodes { for _, metric := range metrics { - req.Queries = append(req.Queries, memorystore.ApiQuery{ + req.Queries = append(req.Queries, ApiQuery{ Hostname: node, - Metric: metric, + Metric: ccms.toRemoteName(metric), Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution }) } } } - resBody, err := memorystore.FetchData(req) + resBody, err := ccms.doRequest(ctx, &req) if err != nil { - cclog.Errorf("Error while fetching data : %s", err.Error()) + cclog.Errorf("Error while performing request: %s", err.Error()) return nil, err } var errors []string data := make(map[string]map[string][]*schema.JobMetric) for i, res := range resBody.Results { - var query memorystore.ApiQuery + var query ApiQuery if resBody.Queries != nil { query = resBody.Queries[i] } else { query = req.Queries[i] } - metric := query.Metric + metric := ccms.toLocalName(query.Metric) qdata := res[0] if qdata.Error != nil { /* Build list for "partial errors", if any */ @@ -686,6 +807,7 @@ func (ccms *CCMetricStore) LoadNodeListData( page *model.PageRequest, ctx context.Context, ) (map[string]schema.JobData, int, bool, error) { + // 0) Init additional vars var totalNodes int = 0 var hasNextPage bool = false @@ -721,7 +843,7 @@ func (ccms *CCMetricStore) LoadNodeListData( if len(nodes) > page.ItemsPerPage { start := (page.Page - 1) * page.ItemsPerPage end := start + page.ItemsPerPage - if end >= len(nodes) { + if end > len(nodes) { end = len(nodes) hasNextPage = false } else { @@ -732,13 +854,13 @@ func (ccms *CCMetricStore) LoadNodeListData( // Note: Order of node data is not guaranteed after this point, but contents match page and filter criteria - queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution)) + queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution) if err != nil { cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error()) return nil, totalNodes, hasNextPage, err } - req := memorystore.ApiQueryRequest{ + req := ApiQueryRequest{ Cluster: cluster, Queries: queries, From: from.Unix(), @@ -747,29 +869,29 @@ func (ccms *CCMetricStore) LoadNodeListData( WithData: true, } - resBody, err := memorystore.FetchData(req) + resBody, err := ccms.doRequest(ctx, &req) if err != nil { - cclog.Errorf("Error while fetching data : %s", err.Error()) + cclog.Errorf("Error while performing request: %s", err.Error()) return nil, totalNodes, hasNextPage, err } var errors []string data := make(map[string]schema.JobData) for i, row := range resBody.Results { - var query memorystore.ApiQuery + var query ApiQuery if resBody.Queries != nil { query = resBody.Queries[i] } else { query = req.Queries[i] } // qdata := res[0] - metric := query.Metric + metric := ccms.toLocalName(query.Metric) scope := assignedScope[i] mc := archive.GetMetricConfig(cluster, metric) res := mc.Timestep if len(row) > 0 { - res = int(row[0].Resolution) + res = row[0].Resolution } // Init Nested Map Data Structures If Not Found @@ -842,9 +964,10 @@ func (ccms *CCMetricStore) buildNodeQueries( nodes []string, metrics []string, scopes []schema.MetricScope, - resolution int64, -) ([]memorystore.ApiQuery, []schema.MetricScope, error) { - queries := make([]memorystore.ApiQuery, 0, len(metrics)*len(scopes)*len(nodes)) + resolution int, +) ([]ApiQuery, []schema.MetricScope, error) { + + queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes)) assignedScope := []schema.MetricScope{} // Get Topol before loop if subCluster given @@ -859,7 +982,7 @@ func (ccms *CCMetricStore) buildNodeQueries( } for _, metric := range metrics { - metric := metric + remoteName := ccms.toRemoteName(metric) mc := archive.GetMetricConfig(cluster, metric) if mc == nil { // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster) @@ -927,8 +1050,8 @@ func (ccms *CCMetricStore) buildNodeQueries( continue } - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: false, Type: &acceleratorString, @@ -945,8 +1068,8 @@ func (ccms *CCMetricStore) buildNodeQueries( continue } - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: true, Type: &acceleratorString, @@ -959,8 +1082,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // HWThread -> HWThead if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: false, Type: &hwthreadString, @@ -975,8 +1098,8 @@ func (ccms *CCMetricStore) buildNodeQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(topology.Node) for _, core := range cores { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: true, Type: &hwthreadString, @@ -992,8 +1115,8 @@ func (ccms *CCMetricStore) buildNodeQueries( if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) for _, socket := range sockets { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: true, Type: &hwthreadString, @@ -1007,8 +1130,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // HWThread -> Node if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: true, Type: &hwthreadString, @@ -1022,8 +1145,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // Core -> Core if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { cores, _ := topology.GetCoresFromHWThreads(topology.Node) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: false, Type: &coreString, @@ -1038,8 +1161,8 @@ func (ccms *CCMetricStore) buildNodeQueries( if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromCores(topology.Node) for _, socket := range sockets { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: true, Type: &coreString, @@ -1054,8 +1177,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // Core -> Node if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { cores, _ := topology.GetCoresFromHWThreads(topology.Node) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: true, Type: &coreString, @@ -1069,8 +1192,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // MemoryDomain -> MemoryDomain if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: false, Type: &memoryDomainString, @@ -1084,8 +1207,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // MemoryDoman -> Node if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: true, Type: &memoryDomainString, @@ -1099,8 +1222,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // Socket -> Socket if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: false, Type: &socketString, @@ -1114,8 +1237,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // Socket -> Node if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Aggregate: true, Type: &socketString, @@ -1128,8 +1251,8 @@ func (ccms *CCMetricStore) buildNodeQueries( // Node -> Node if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { - queries = append(queries, memorystore.ApiQuery{ - Metric: metric, + queries = append(queries, ApiQuery{ + Metric: remoteName, Hostname: hostname, Resolution: resolution, }) @@ -1144,11 +1267,3 @@ func (ccms *CCMetricStore) buildNodeQueries( return queries, assignedScope, nil } - -func intToStringSlice(is []int) []string { - ss := make([]string, len(is)) - for i, x := range is { - ss[i] = strconv.Itoa(x) - } - return ss -} diff --git a/internal/metricdata/metricdata.go b/internal/metricdata/metricdata.go index 87867af..4cfff34 100644 --- a/internal/metricdata/metricdata.go +++ b/internal/metricdata/metricdata.go @@ -54,6 +54,9 @@ func Init() error { switch kind.Kind { case "cc-metric-store": mdr = &CCMetricStore{} + case "cc-metric-store-internal": + mdr = &CCMetricStoreInternal{} + config.InternalCCMSFlag = true case "prometheus": mdr = &PrometheusDataRepository{} case "test": From be92d5943dc0cb0e1c086ecae9cff258ec6c1523 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 10 Sep 2025 15:13:53 +0200 Subject: [PATCH 37/40] Decrease verbosity in jobcache sync task --- internal/taskManager/commitJobService.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/taskManager/commitJobService.go b/internal/taskManager/commitJobService.go index 88c2708..8df29dd 100644 --- a/internal/taskManager/commitJobService.go +++ b/internal/taskManager/commitJobService.go @@ -20,15 +20,15 @@ func RegisterCommitJobService() { frequency = "2m" } d, _ := time.ParseDuration(frequency) - cclog.Infof("Register commitJob service with %s interval", frequency) + cclog.Infof("register commitJob service with %s interval", frequency) s.NewJob(gocron.DurationJob(d), gocron.NewTask( func() { start := time.Now() - cclog.Printf("Jobcache sync started at %s\n", start.Format(time.RFC3339)) + cclog.Debugf("jobcache sync started at %s\n", start.Format(time.RFC3339)) jobs, _ := jobRepo.SyncJobs() repository.CallJobStartHooks(jobs) - cclog.Printf("Jobcache sync and job callbacks are done and took %s\n", time.Since(start)) + cclog.Debugf("jobcache sync and job callbacks are done and took %s\n", time.Since(start)) })) } From 05c45c646800959bb72a0030a83ef70667e20571 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 10 Sep 2025 15:31:39 +0200 Subject: [PATCH 38/40] fix: add missing kes to node overview, solves load to empty overview --- web/frontend/src/systems/NodeOverview.svelte | 46 ++++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/web/frontend/src/systems/NodeOverview.svelte b/web/frontend/src/systems/NodeOverview.svelte index 3d0f482..c3b9202 100644 --- a/web/frontend/src/systems/NodeOverview.svelte +++ b/web/frontend/src/systems/NodeOverview.svelte @@ -124,25 +124,25 @@ {:else if filteredData?.length > 0} - {#each filteredData as item (item.host)} - -

- {item.host} ({item.subCluster}) -

- {#if item?.disabled} - Metric disabled for subcluster {selectedMetric}:{item.subCluster} - {:else} - - - {#key item.data[0].metric.series[0].data.length} + {#key filteredData} + {#each filteredData as item (item.host)} + +

+ {item.host} ({item.subCluster}) +

+ {#if item?.disabled} + Metric disabled for subcluster {selectedMetric}:{item.subCluster} + {:else} + + - {/key} - {/if} - - {/each} + {/if} + + {/each} + {/key} {/if} \ No newline at end of file From d03e5b4562821a78bb5af4cdf32d1d1c91065f5b Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 10 Sep 2025 15:42:13 +0200 Subject: [PATCH 39/40] handle metric disabled state explicitly in nodeOverview component --- web/frontend/src/systems/NodeOverview.svelte | 36 ++++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/web/frontend/src/systems/NodeOverview.svelte b/web/frontend/src/systems/NodeOverview.svelte index c3b9202..9554c5e 100644 --- a/web/frontend/src/systems/NodeOverview.svelte +++ b/web/frontend/src/systems/NodeOverview.svelte @@ -124,23 +124,23 @@ {:else if filteredData?.length > 0} - {#key filteredData} - {#each filteredData as item (item.host)} - -

- {item.host} ({item.subCluster}) -

- {#if item?.disabled} + {#each filteredData as item (item.host)} + +

+ {item.host} ({item.subCluster}) +

+ {#key item?.disabled} + {#if item?.disabled === true} Metric disabled for subcluster {selectedMetric}:{item.subCluster} - {:else} + {:else if item?.disabled === false} + {:else} + Metric Query Empty: Please Reload Page {selectedMetric}:{item.subCluster} {/if} - - {/each} - {/key} + {/key} + + {/each}
{/if} \ No newline at end of file From 935fb238a4314a3f21e2da9ee729ecb2e46cf4a8 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Wed, 10 Sep 2025 18:01:33 +0200 Subject: [PATCH 40/40] add init context to nodeOverview, add additional key for plot rerender --- web/frontend/src/systems/NodeOverview.svelte | 71 ++++++++++---------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/web/frontend/src/systems/NodeOverview.svelte b/web/frontend/src/systems/NodeOverview.svelte index 9554c5e..2fc7bde 100644 --- a/web/frontend/src/systems/NodeOverview.svelte +++ b/web/frontend/src/systems/NodeOverview.svelte @@ -11,6 +11,7 @@ -->