From 6f9737c2c2bdf3bd7e62244b421466ed152acdf5 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 4 Jun 2025 13:44:37 +0200 Subject: [PATCH] Add node repository, extend GraphQL API Sync commit. --- api/schema.graphqls | 427 +++++++----- gqlgen.yml | 9 + internal/graph/generated/generated.go | 957 +++++++++++++++++++++----- internal/graph/schema.resolvers.go | 19 + internal/repository/node.go | 217 ++++++ 5 files changed, 1315 insertions(+), 314 deletions(-) create mode 100644 internal/repository/node.go diff --git a/api/schema.graphqls b/api/schema.graphqls index 268a579..6542464 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -4,61 +4,78 @@ scalar Any scalar NullableFloat scalar MetricScope scalar JobState +scalar NodeState +scalar MonitoringState + +type Node { + id: ID! + hostname: String! + cluster: String! + subCluster: String! + nodeState: NodeState! + HealthState: MonitoringState! + metaData: Any +} + +type NodeStats { + state: String! + count: Int! +} type Job { - id: ID! - jobId: Int! - user: String! - project: String! - cluster: String! - subCluster: String! - startTime: Time! - duration: Int! - walltime: Int! - numNodes: Int! - numHWThreads: Int! - numAcc: Int! - energy: Float! - SMT: Int! - exclusive: Int! - partition: String! - arrayJobId: Int! + id: ID! + jobId: Int! + user: String! + project: String! + cluster: String! + subCluster: String! + startTime: Time! + duration: Int! + walltime: Int! + numNodes: Int! + numHWThreads: Int! + numAcc: Int! + energy: Float! + SMT: Int! + exclusive: Int! + partition: String! + arrayJobId: Int! monitoringStatus: Int! - state: JobState! - tags: [Tag!]! - resources: [Resource!]! - concurrentJobs: JobLinkResultList - footprint: [FootprintValue] - energyFootprint: [EnergyFootprintValue] - metaData: Any - userData: User + state: JobState! + tags: [Tag!]! + resources: [Resource!]! + concurrentJobs: JobLinkResultList + footprint: [FootprintValue] + energyFootprint: [EnergyFootprintValue] + metaData: Any + userData: User } type JobLink { - id: ID! - jobId: Int! + id: ID! + jobId: Int! } type Cluster { - name: String! - partitions: [String!]! # Slurm partitions - subClusters: [SubCluster!]! # Hardware partitions/subclusters + name: String! + partitions: [String!]! # Slurm partitions + subClusters: [SubCluster!]! # Hardware partitions/subclusters } type SubCluster { - name: String! - nodes: String! - numberOfNodes: Int! - processorType: String! - socketsPerNode: Int! - coresPerSocket: Int! - threadsPerCore: Int! - flopRateScalar: MetricValue! - flopRateSimd: MetricValue! + name: String! + nodes: String! + numberOfNodes: Int! + processorType: String! + socketsPerNode: Int! + coresPerSocket: Int! + threadsPerCore: Int! + flopRateScalar: MetricValue! + flopRateSimd: MetricValue! memoryBandwidth: MetricValue! - topology: Topology! - metricConfig: [MetricConfig!]! - footprint: [String!]! + topology: Topology! + metricConfig: [MetricConfig!]! + footprint: [String!]! } type FootprintValue { @@ -80,94 +97,94 @@ type MetricValue { } type Topology { - node: [Int!] - socket: [[Int!]!] + node: [Int!] + socket: [[Int!]!] memoryDomain: [[Int!]!] - die: [[Int!]!] - core: [[Int!]!] + die: [[Int!]!] + core: [[Int!]!] accelerators: [Accelerator!] } type Accelerator { - id: String! - type: String! + id: String! + type: String! model: String! } type SubClusterConfig { - name: String! - peak: Float - normal: Float + name: String! + peak: Float + normal: Float caution: Float - alert: Float - remove: Boolean + alert: Float + remove: Boolean } type MetricConfig { - name: String! - unit: Unit! - scope: MetricScope! + name: String! + unit: Unit! + scope: MetricScope! aggregation: String! - timestep: Int! - peak: Float! - normal: Float + timestep: Int! + peak: Float! + normal: Float caution: Float! - alert: Float! + alert: Float! lowerIsBetter: Boolean subClusters: [SubClusterConfig!]! } type Tag { - id: ID! + id: ID! type: String! name: String! scope: String! } type Resource { - hostname: String! - hwthreads: [Int!] - accelerators: [String!] + hostname: String! + hwthreads: [Int!] + accelerators: [String!] configuration: String } type JobMetricWithName { - name: String! - scope: MetricScope! + name: String! + scope: MetricScope! metric: JobMetric! } type JobMetric { - unit: Unit - timestep: Int! - series: [Series!] + unit: Unit + timestep: Int! + series: [Series!] statisticsSeries: StatsSeries } type Series { - hostname: String! - id: String + hostname: String! + id: String statistics: MetricStatistics - data: [NullableFloat!]! + data: [NullableFloat!]! } type StatsSeries { - mean: [NullableFloat!]! + mean: [NullableFloat!]! median: [NullableFloat!]! - min: [NullableFloat!]! - max: [NullableFloat!]! + min: [NullableFloat!]! + max: [NullableFloat!]! } type NamedStatsWithScope { - name: String! - scope: MetricScope! - stats: [ScopedStats!]! + name: String! + scope: MetricScope! + stats: [ScopedStats!]! } type ScopedStats { - hostname: String! - id: String - data: MetricStatistics! + hostname: String! + id: String + data: MetricStatistics! } type JobStats { @@ -184,8 +201,8 @@ type JobStats { } type NamedStats { - name: String! - data: MetricStatistics! + name: String! + data: MetricStatistics! } type Unit { @@ -201,12 +218,12 @@ type MetricStatistics { type MetricFootprints { metric: String! - data: [NullableFloat!]! + data: [NullableFloat!]! } type Footprints { timeWeights: TimeWeights! - metrics: [MetricFootprints!]! + metrics: [MetricFootprints!]! } type TimeWeights { @@ -215,20 +232,33 @@ type TimeWeights { coreHours: [NullableFloat!]! } -enum Aggregate { USER, PROJECT, CLUSTER } -enum SortByAggregate { TOTALWALLTIME, TOTALJOBS, TOTALNODES, TOTALNODEHOURS, TOTALCORES, TOTALCOREHOURS, TOTALACCS, TOTALACCHOURS } +enum Aggregate { + USER + PROJECT + CLUSTER +} +enum SortByAggregate { + TOTALWALLTIME + TOTALJOBS + TOTALNODES + TOTALNODEHOURS + TOTALCORES + TOTALCOREHOURS + TOTALACCS + TOTALACCHOURS +} type NodeMetrics { - host: String! + host: String! subCluster: String! - metrics: [JobMetricWithName!]! + metrics: [JobMetricWithName!]! } type NodesResultList { - items: [NodeMetrics!]! + items: [NodeMetrics!]! offset: Int - limit: Int - count: Int + limit: Int + count: Int totalNodes: Int hasNextPage: Boolean } @@ -247,14 +277,14 @@ type GlobalMetricListItem { } type Count { - name: String! + name: String! count: Int! } type User { username: String! - name: String! - email: String! + name: String! + email: String! } input MetricStatItem { @@ -263,27 +293,81 @@ input MetricStatItem { } type Query { - clusters: [Cluster!]! # List of all clusters - tags: [Tag!]! # List of all tags - globalMetrics: [GlobalMetricListItem!]! + clusters: [Cluster!]! # List of all clusters + tags: [Tag!]! # List of all tags + globalMetrics: [GlobalMetricListItem!]! user(username: String!): User allocatedNodes(cluster: String!): [Count!]! - job(id: ID!): Job - jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]! - jobStats(id: ID!, metrics: [String!]): [NamedStats!]! - scopedJobStats(id: ID!, metrics: [String!], scopes: [MetricScope!]): [NamedStatsWithScope!]! + node(id: ID!): Node + nodes(filter: [NodeFilter!], order: OrderByInput): NodesResultList! + nodeStats(filter: [NodeFilter!]): [NodeStats!]! + + job(id: ID!): Job + jobMetrics( + id: ID! + metrics: [String!] + scopes: [MetricScope!] + resolution: Int + ): [JobMetricWithName!]! + + jobStats(id: ID!, metrics: [String!]): [NamedStats!]! + + scopedJobStats( + id: ID! + metrics: [String!] + scopes: [MetricScope!] + ): [NamedStatsWithScope!]! + + jobs( + filter: [JobFilter!] + page: PageRequest + order: OrderByInput + ): JobResultList! + + jobsStatistics( + filter: [JobFilter!] + metrics: [String!] + page: PageRequest + sortBy: SortByAggregate + groupBy: Aggregate + numDurationBins: String + numMetricBins: Int + ): [JobsStatistics!]! - jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList! - jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate, numDurationBins: String, numMetricBins: Int): [JobsStatistics!]! jobsMetricStats(filter: [JobFilter!], metrics: [String!]): [JobStats!]! jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints - rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]! + rooflineHeatmap( + filter: [JobFilter!]! + rows: Int! + cols: Int! + minX: Float! + minY: Float! + maxX: Float! + maxY: Float! + ): [[Float!]!]! - nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]! - nodeMetricsList(cluster: String!, subCluster: String!, nodeFilter: String!, scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!, page: PageRequest, resolution: Int): NodesResultList! + nodeMetrics( + cluster: String! + nodes: [String!] + scopes: [MetricScope!] + metrics: [String!] + from: Time! + to: Time! + ): [NodeMetrics!]! + nodeMetricsList( + cluster: String! + subCluster: String! + nodeFilter: String! + scopes: [MetricScope!] + metrics: [String!] + from: Time! + to: Time! + page: PageRequest + resolution: Int + ): NodesResultList! } type Mutation { @@ -296,38 +380,53 @@ type Mutation { updateConfiguration(name: String!, value: String!): String } -type IntRangeOutput { from: Int!, to: Int! } -type TimeRangeOutput { range: String, from: Time!, to: Time! } +type IntRangeOutput { + from: Int! + to: Int! +} +type TimeRangeOutput { + range: String + from: Time! + to: Time! +} + +input NodeFilter { + hostname: StringInput + cluster: StringInput + subCluster: StringInput + nodeState: NodeState + healthState: MonitoringState +} input JobFilter { - tags: [ID!] - dbId: [ID!] - jobId: StringInput - arrayJobId: Int - user: StringInput - project: StringInput - jobName: StringInput - cluster: StringInput - partition: StringInput - duration: IntRange - energy: FloatRange + tags: [ID!] + dbId: [ID!] + jobId: StringInput + arrayJobId: Int + user: StringInput + project: StringInput + jobName: StringInput + cluster: StringInput + partition: StringInput + duration: IntRange + energy: FloatRange minRunningFor: Int - numNodes: IntRange + numNodes: IntRange numAccelerators: IntRange - numHWThreads: IntRange + numHWThreads: IntRange - startTime: TimeRange - state: [JobState!] + startTime: TimeRange + state: [JobState!] metricStats: [MetricStatItem!] - exclusive: Int - node: StringInput + exclusive: Int + node: StringInput } input OrderByInput { field: String! - type: String!, + type: String! order: SortDirectionEnum! = ASC } @@ -337,34 +436,46 @@ enum SortDirectionEnum { } input StringInput { - eq: String - neq: String - contains: String + eq: String + neq: String + contains: String startsWith: String - endsWith: String - in: [String!] + endsWith: String + in: [String!] } -input IntRange { from: Int!, to: Int! } -input TimeRange { range: String, from: Time, to: Time } +input IntRange { + from: Int! + to: Int! +} +input TimeRange { + range: String + from: Time + to: Time +} input FloatRange { from: Float! to: Float! } +type NodesResultList { + items: [Node!]! + count: Int +} + type JobResultList { - items: [Job!]! + items: [Job!]! offset: Int - limit: Int - count: Int + limit: Int + count: Int hasNextPage: Boolean } type JobLinkResultList { listQuery: String - items: [JobLink!]! - count: Int + items: [JobLink!]! + count: Int } type HistoPoint { @@ -386,27 +497,27 @@ type MetricHistoPoint { max: Int } -type JobsStatistics { - id: ID! # If `groupBy` was used, ID of the user/project/cluster - name: String! # if User-Statistics: Given Name of Account (ID) Owner - totalJobs: Int! # Number of jobs - runningJobs: Int! # Number of running jobs - shortJobs: Int! # Number of jobs with a duration of less than duration - totalWalltime: Int! # Sum of the duration of all matched jobs in hours - totalNodes: Int! # Sum of the nodes of all matched jobs - totalNodeHours: Int! # Sum of the node hours of all matched jobs - totalCores: Int! # Sum of the cores of all matched jobs - totalCoreHours: Int! # Sum of the core hours of all matched jobs - totalAccs: Int! # Sum of the accs of all matched jobs - totalAccHours: Int! # Sum of the gpu hours of all matched jobs - histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value - histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes - histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores - histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs - histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average +type JobsStatistics { + id: ID! # If `groupBy` was used, ID of the user/project/cluster + name: String! # if User-Statistics: Given Name of Account (ID) Owner + totalJobs: Int! # Number of jobs + runningJobs: Int! # Number of running jobs + shortJobs: Int! # Number of jobs with a duration of less than duration + totalWalltime: Int! # Sum of the duration of all matched jobs in hours + totalNodes: Int! # Sum of the nodes of all matched jobs + totalNodeHours: Int! # Sum of the node hours of all matched jobs + totalCores: Int! # Sum of the cores of all matched jobs + totalCoreHours: Int! # Sum of the core hours of all matched jobs + totalAccs: Int! # Sum of the accs of all matched jobs + totalAccHours: Int! # Sum of the gpu hours of all matched jobs + histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value + histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes + histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores + histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs + histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average } input PageRequest { itemsPerPage: Int! - page: Int! + page: Int! } diff --git a/gqlgen.yml b/gqlgen.yml index ccd95ff..307a074 100644 --- a/gqlgen.yml +++ b/gqlgen.yml @@ -62,6 +62,11 @@ models: fields: partitions: resolver: true + Node: + model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Node" + fields: + metaData: + resolver: true NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" } MetricScope: @@ -81,6 +86,10 @@ models: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" } JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" } + MonitoringState: + { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.NodeState" } + HealthState: + { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MonitoringState" } TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" } IntRange: diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index 60e3ca0..a1e9f92 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -44,6 +44,7 @@ type ResolverRoot interface { Job() JobResolver MetricValue() MetricValueResolver Mutation() MutationResolver + Node() NodeResolver Query() QueryResolver SubCluster() SubClusterResolver } @@ -268,6 +269,16 @@ type ComplexityRoot struct { Stats func(childComplexity int) int } + Node struct { + Cluster func(childComplexity int) int + HealthState func(childComplexity int) int + Hostname func(childComplexity int) int + ID func(childComplexity int) int + MetaData func(childComplexity int) int + NodeState func(childComplexity int) int + SubCluster func(childComplexity int) int + } + NodeMetrics struct { Host func(childComplexity int) int Metrics func(childComplexity int) int @@ -419,6 +430,11 @@ type MutationResolver interface { RemoveTagFromList(ctx context.Context, tagIds []string) ([]int, error) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) } +type NodeResolver interface { + NodeState(ctx context.Context, obj *schema.Node) (string, error) + HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error) + MetaData(ctx context.Context, obj *schema.Node) (any, error) +} type QueryResolver interface { Clusters(ctx context.Context) ([]*schema.Cluster, error) Tags(ctx context.Context) ([]*schema.Tag, error) @@ -1435,6 +1451,55 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.NamedStatsWithScope.Stats(childComplexity), true + case "Node.cluster": + if e.complexity.Node.Cluster == nil { + break + } + + return e.complexity.Node.Cluster(childComplexity), true + + case "Node.HealthState": + if e.complexity.Node.HealthState == nil { + break + } + + return e.complexity.Node.HealthState(childComplexity), true + + case "Node.hostname": + if e.complexity.Node.Hostname == nil { + break + } + + return e.complexity.Node.Hostname(childComplexity), true + + case "Node.id": + if e.complexity.Node.ID == nil { + break + } + + return e.complexity.Node.ID(childComplexity), true + + case "Node.metaData": + if e.complexity.Node.MetaData == nil { + break + } + + return e.complexity.Node.MetaData(childComplexity), true + + case "Node.nodeState": + if e.complexity.Node.NodeState == nil { + break + } + + return e.complexity.Node.NodeState(childComplexity), true + + case "Node.subCluster": + if e.complexity.Node.SubCluster == nil { + break + } + + return e.complexity.Node.SubCluster(childComplexity), true + case "NodeMetrics.host": if e.complexity.NodeMetrics.Host == nil { break @@ -2179,61 +2244,73 @@ scalar Any scalar NullableFloat scalar MetricScope scalar JobState +scalar NodeState +scalar MonitoringState + +type Node { + id: ID! + hostname: String! + cluster: String! + subCluster: String! + nodeState: NodeState! + HealthState: MonitoringState! + metaData: Any +} type Job { - id: ID! - jobId: Int! - user: String! - project: String! - cluster: String! - subCluster: String! - startTime: Time! - duration: Int! - walltime: Int! - numNodes: Int! - numHWThreads: Int! - numAcc: Int! - energy: Float! - SMT: Int! - exclusive: Int! - partition: String! - arrayJobId: Int! + id: ID! + jobId: Int! + user: String! + project: String! + cluster: String! + subCluster: String! + startTime: Time! + duration: Int! + walltime: Int! + numNodes: Int! + numHWThreads: Int! + numAcc: Int! + energy: Float! + SMT: Int! + exclusive: Int! + partition: String! + arrayJobId: Int! monitoringStatus: Int! - state: JobState! - tags: [Tag!]! - resources: [Resource!]! - concurrentJobs: JobLinkResultList - footprint: [FootprintValue] - energyFootprint: [EnergyFootprintValue] - metaData: Any - userData: User + state: JobState! + tags: [Tag!]! + resources: [Resource!]! + concurrentJobs: JobLinkResultList + footprint: [FootprintValue] + energyFootprint: [EnergyFootprintValue] + metaData: Any + userData: User } type JobLink { - id: ID! - jobId: Int! + id: ID! + jobId: Int! } type Cluster { - name: String! - partitions: [String!]! # Slurm partitions - subClusters: [SubCluster!]! # Hardware partitions/subclusters + name: String! + partitions: [String!]! # Slurm partitions + subClusters: [SubCluster!]! # Hardware partitions/subclusters } type SubCluster { - name: String! - nodes: String! - numberOfNodes: Int! - processorType: String! - socketsPerNode: Int! - coresPerSocket: Int! - threadsPerCore: Int! - flopRateScalar: MetricValue! - flopRateSimd: MetricValue! + name: String! + nodes: String! + numberOfNodes: Int! + processorType: String! + socketsPerNode: Int! + coresPerSocket: Int! + threadsPerCore: Int! + flopRateScalar: MetricValue! + flopRateSimd: MetricValue! memoryBandwidth: MetricValue! - topology: Topology! - metricConfig: [MetricConfig!]! - footprint: [String!]! + topology: Topology! + metricConfig: [MetricConfig!]! + footprint: [String!]! } type FootprintValue { @@ -2255,94 +2332,94 @@ type MetricValue { } type Topology { - node: [Int!] - socket: [[Int!]!] + node: [Int!] + socket: [[Int!]!] memoryDomain: [[Int!]!] - die: [[Int!]!] - core: [[Int!]!] + die: [[Int!]!] + core: [[Int!]!] accelerators: [Accelerator!] } type Accelerator { - id: String! - type: String! + id: String! + type: String! model: String! } type SubClusterConfig { - name: String! - peak: Float - normal: Float + name: String! + peak: Float + normal: Float caution: Float - alert: Float - remove: Boolean + alert: Float + remove: Boolean } type MetricConfig { - name: String! - unit: Unit! - scope: MetricScope! + name: String! + unit: Unit! + scope: MetricScope! aggregation: String! - timestep: Int! - peak: Float! - normal: Float + timestep: Int! + peak: Float! + normal: Float caution: Float! - alert: Float! + alert: Float! lowerIsBetter: Boolean subClusters: [SubClusterConfig!]! } type Tag { - id: ID! + id: ID! type: String! name: String! scope: String! } type Resource { - hostname: String! - hwthreads: [Int!] - accelerators: [String!] + hostname: String! + hwthreads: [Int!] + accelerators: [String!] configuration: String } type JobMetricWithName { - name: String! - scope: MetricScope! + name: String! + scope: MetricScope! metric: JobMetric! } type JobMetric { - unit: Unit - timestep: Int! - series: [Series!] + unit: Unit + timestep: Int! + series: [Series!] statisticsSeries: StatsSeries } type Series { - hostname: String! - id: String + hostname: String! + id: String statistics: MetricStatistics - data: [NullableFloat!]! + data: [NullableFloat!]! } type StatsSeries { - mean: [NullableFloat!]! + mean: [NullableFloat!]! median: [NullableFloat!]! - min: [NullableFloat!]! - max: [NullableFloat!]! + min: [NullableFloat!]! + max: [NullableFloat!]! } type NamedStatsWithScope { - name: String! - scope: MetricScope! - stats: [ScopedStats!]! + name: String! + scope: MetricScope! + stats: [ScopedStats!]! } type ScopedStats { - hostname: String! - id: String - data: MetricStatistics! + hostname: String! + id: String + data: MetricStatistics! } type JobStats { @@ -2359,8 +2436,8 @@ type JobStats { } type NamedStats { - name: String! - data: MetricStatistics! + name: String! + data: MetricStatistics! } type Unit { @@ -2376,12 +2453,12 @@ type MetricStatistics { type MetricFootprints { metric: String! - data: [NullableFloat!]! + data: [NullableFloat!]! } type Footprints { timeWeights: TimeWeights! - metrics: [MetricFootprints!]! + metrics: [MetricFootprints!]! } type TimeWeights { @@ -2390,20 +2467,33 @@ type TimeWeights { coreHours: [NullableFloat!]! } -enum Aggregate { USER, PROJECT, CLUSTER } -enum SortByAggregate { TOTALWALLTIME, TOTALJOBS, TOTALNODES, TOTALNODEHOURS, TOTALCORES, TOTALCOREHOURS, TOTALACCS, TOTALACCHOURS } +enum Aggregate { + USER + PROJECT + CLUSTER +} +enum SortByAggregate { + TOTALWALLTIME + TOTALJOBS + TOTALNODES + TOTALNODEHOURS + TOTALCORES + TOTALCOREHOURS + TOTALACCS + TOTALACCHOURS +} type NodeMetrics { - host: String! + host: String! subCluster: String! - metrics: [JobMetricWithName!]! + metrics: [JobMetricWithName!]! } type NodesResultList { - items: [NodeMetrics!]! + items: [NodeMetrics!]! offset: Int - limit: Int - count: Int + limit: Int + count: Int totalNodes: Int hasNextPage: Boolean } @@ -2422,14 +2512,14 @@ type GlobalMetricListItem { } type Count { - name: String! + name: String! count: Int! } type User { username: String! - name: String! - email: String! + name: String! + email: String! } input MetricStatItem { @@ -2438,27 +2528,73 @@ input MetricStatItem { } type Query { - clusters: [Cluster!]! # List of all clusters - tags: [Tag!]! # List of all tags - globalMetrics: [GlobalMetricListItem!]! + clusters: [Cluster!]! # List of all clusters + tags: [Tag!]! # List of all tags + globalMetrics: [GlobalMetricListItem!]! user(username: String!): User allocatedNodes(cluster: String!): [Count!]! job(id: ID!): Job - jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]! + jobMetrics( + id: ID! + metrics: [String!] + scopes: [MetricScope!] + resolution: Int + ): [JobMetricWithName!]! jobStats(id: ID!, metrics: [String!]): [NamedStats!]! - scopedJobStats(id: ID!, metrics: [String!], scopes: [MetricScope!]): [NamedStatsWithScope!]! + scopedJobStats( + id: ID! + metrics: [String!] + scopes: [MetricScope!] + ): [NamedStatsWithScope!]! - jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList! - jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate, numDurationBins: String, numMetricBins: Int): [JobsStatistics!]! + jobs( + filter: [JobFilter!] + page: PageRequest + order: OrderByInput + ): JobResultList! + jobsStatistics( + filter: [JobFilter!] + metrics: [String!] + page: PageRequest + sortBy: SortByAggregate + groupBy: Aggregate + numDurationBins: String + numMetricBins: Int + ): [JobsStatistics!]! jobsMetricStats(filter: [JobFilter!], metrics: [String!]): [JobStats!]! jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints - rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]! + rooflineHeatmap( + filter: [JobFilter!]! + rows: Int! + cols: Int! + minX: Float! + minY: Float! + maxX: Float! + maxY: Float! + ): [[Float!]!]! - nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]! - nodeMetricsList(cluster: String!, subCluster: String!, nodeFilter: String!, scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!, page: PageRequest, resolution: Int): NodesResultList! + nodeMetrics( + cluster: String! + nodes: [String!] + scopes: [MetricScope!] + metrics: [String!] + from: Time! + to: Time! + ): [NodeMetrics!]! + nodeMetricsList( + cluster: String! + subCluster: String! + nodeFilter: String! + scopes: [MetricScope!] + metrics: [String!] + from: Time! + to: Time! + page: PageRequest + resolution: Int + ): NodesResultList! } type Mutation { @@ -2471,38 +2607,45 @@ type Mutation { updateConfiguration(name: String!, value: String!): String } -type IntRangeOutput { from: Int!, to: Int! } -type TimeRangeOutput { range: String, from: Time!, to: Time! } +type IntRangeOutput { + from: Int! + to: Int! +} +type TimeRangeOutput { + range: String + from: Time! + to: Time! +} input JobFilter { - tags: [ID!] - dbId: [ID!] - jobId: StringInput - arrayJobId: Int - user: StringInput - project: StringInput - jobName: StringInput - cluster: StringInput - partition: StringInput - duration: IntRange - energy: FloatRange + tags: [ID!] + dbId: [ID!] + jobId: StringInput + arrayJobId: Int + user: StringInput + project: StringInput + jobName: StringInput + cluster: StringInput + partition: StringInput + duration: IntRange + energy: FloatRange minRunningFor: Int - numNodes: IntRange + numNodes: IntRange numAccelerators: IntRange - numHWThreads: IntRange + numHWThreads: IntRange - startTime: TimeRange - state: [JobState!] + startTime: TimeRange + state: [JobState!] metricStats: [MetricStatItem!] - exclusive: Int - node: StringInput + exclusive: Int + node: StringInput } input OrderByInput { field: String! - type: String!, + type: String! order: SortDirectionEnum! = ASC } @@ -2512,16 +2655,23 @@ enum SortDirectionEnum { } input StringInput { - eq: String - neq: String - contains: String + eq: String + neq: String + contains: String startsWith: String - endsWith: String - in: [String!] + endsWith: String + in: [String!] } -input IntRange { from: Int!, to: Int! } -input TimeRange { range: String, from: Time, to: Time } +input IntRange { + from: Int! + to: Int! +} +input TimeRange { + range: String + from: Time + to: Time +} input FloatRange { from: Float! @@ -2529,17 +2679,17 @@ input FloatRange { } type JobResultList { - items: [Job!]! + items: [Job!]! offset: Int - limit: Int - count: Int + limit: Int + count: Int hasNextPage: Boolean } type JobLinkResultList { listQuery: String - items: [JobLink!]! - count: Int + items: [JobLink!]! + count: Int } type HistoPoint { @@ -2561,29 +2711,29 @@ type MetricHistoPoint { max: Int } -type JobsStatistics { - id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster - name: String! # if User-Statistics: Given Name of Account (ID) Owner - totalJobs: Int! # Number of jobs - runningJobs: Int! # Number of running jobs - shortJobs: Int! # Number of jobs with a duration of less than duration - totalWalltime: Int! # Sum of the duration of all matched jobs in hours - totalNodes: Int! # Sum of the nodes of all matched jobs - totalNodeHours: Int! # Sum of the node hours of all matched jobs - totalCores: Int! # Sum of the cores of all matched jobs - totalCoreHours: Int! # Sum of the core hours of all matched jobs - totalAccs: Int! # Sum of the accs of all matched jobs - totalAccHours: Int! # Sum of the gpu hours of all matched jobs - histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value - histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes - histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores - histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs - histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average +type JobsStatistics { + id: ID! # If ` + "`" + `groupBy` + "`" + ` was used, ID of the user/project/cluster + name: String! # if User-Statistics: Given Name of Account (ID) Owner + totalJobs: Int! # Number of jobs + runningJobs: Int! # Number of running jobs + shortJobs: Int! # Number of jobs with a duration of less than duration + totalWalltime: Int! # Sum of the duration of all matched jobs in hours + totalNodes: Int! # Sum of the nodes of all matched jobs + totalNodeHours: Int! # Sum of the node hours of all matched jobs + totalCores: Int! # Sum of the cores of all matched jobs + totalCoreHours: Int! # Sum of the core hours of all matched jobs + totalAccs: Int! # Sum of the accs of all matched jobs + totalAccHours: Int! # Sum of the gpu hours of all matched jobs + histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value + histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes + histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores + histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs + histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average } input PageRequest { itemsPerPage: Int! - page: Int! + page: Int! } `, BuiltIn: false}, } @@ -10445,6 +10595,311 @@ func (ec *executionContext) fieldContext_NamedStatsWithScope_stats(_ context.Con return fc, nil } +func (ec *executionContext) _Node_id(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_id(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.ID, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(int64) + fc.Result = res + return ec.marshalNID2int64(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Node_id(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Node", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type ID does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _Node_hostname(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_hostname(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.Hostname, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNString2string(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Node_hostname(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Node", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type String does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _Node_cluster(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_cluster(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.Cluster, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNString2string(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Node_cluster(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Node", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type String does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _Node_subCluster(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_subCluster(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.SubCluster, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNString2string(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Node_subCluster(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Node", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type String does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _Node_nodeState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_nodeState(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return ec.resolvers.Node().NodeState(rctx, obj) + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNNodeState2string(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Node_nodeState(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Node", + Field: field, + IsMethod: true, + IsResolver: true, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type NodeState does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _Node_HealthState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_HealthState(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return ec.resolvers.Node().HealthState(rctx, obj) + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(schema.NodeState) + fc.Result = res + return ec.marshalNMonitoringState2githubᚗcomᚋClusterCockpitᚋccᚑbackendᚋpkgᚋschemaᚐNodeState(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Node_HealthState(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Node", + Field: field, + IsMethod: true, + IsResolver: true, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type MonitoringState does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _Node_metaData(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Node_metaData(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return ec.resolvers.Node().MetaData(rctx, obj) + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.(any) + fc.Result = res + return ec.marshalOAny2interface(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Node_metaData(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Node", + Field: field, + IsMethod: true, + IsResolver: true, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Any does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _NodeMetrics_host(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) { fc, err := ec.fieldContext_NodeMetrics_host(ctx, field) if err != nil { @@ -18695,6 +19150,165 @@ func (ec *executionContext) _NamedStatsWithScope(ctx context.Context, sel ast.Se return out } +var nodeImplementors = []string{"Node"} + +func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj *schema.Node) graphql.Marshaler { + fields := graphql.CollectFields(ec.OperationContext, sel, nodeImplementors) + + out := graphql.NewFieldSet(fields) + deferred := make(map[string]*graphql.FieldSet) + for i, field := range fields { + switch field.Name { + case "__typename": + out.Values[i] = graphql.MarshalString("Node") + case "id": + out.Values[i] = ec._Node_id(ctx, field, obj) + if out.Values[i] == graphql.Null { + atomic.AddUint32(&out.Invalids, 1) + } + case "hostname": + out.Values[i] = ec._Node_hostname(ctx, field, obj) + if out.Values[i] == graphql.Null { + atomic.AddUint32(&out.Invalids, 1) + } + case "cluster": + out.Values[i] = ec._Node_cluster(ctx, field, obj) + if out.Values[i] == graphql.Null { + atomic.AddUint32(&out.Invalids, 1) + } + case "subCluster": + out.Values[i] = ec._Node_subCluster(ctx, field, obj) + if out.Values[i] == graphql.Null { + atomic.AddUint32(&out.Invalids, 1) + } + case "nodeState": + field := field + + innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + } + }() + res = ec._Node_nodeState(ctx, field, obj) + if res == graphql.Null { + atomic.AddUint32(&fs.Invalids, 1) + } + return res + } + + if field.Deferrable != nil { + dfs, ok := deferred[field.Deferrable.Label] + di := 0 + if ok { + dfs.AddField(field) + di = len(dfs.Values) - 1 + } else { + dfs = graphql.NewFieldSet([]graphql.CollectedField{field}) + deferred[field.Deferrable.Label] = dfs + } + dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler { + return innerFunc(ctx, dfs) + }) + + // don't run the out.Concurrently() call below + out.Values[i] = graphql.Null + continue + } + + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) + case "HealthState": + field := field + + innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + } + }() + res = ec._Node_HealthState(ctx, field, obj) + if res == graphql.Null { + atomic.AddUint32(&fs.Invalids, 1) + } + return res + } + + if field.Deferrable != nil { + dfs, ok := deferred[field.Deferrable.Label] + di := 0 + if ok { + dfs.AddField(field) + di = len(dfs.Values) - 1 + } else { + dfs = graphql.NewFieldSet([]graphql.CollectedField{field}) + deferred[field.Deferrable.Label] = dfs + } + dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler { + return innerFunc(ctx, dfs) + }) + + // don't run the out.Concurrently() call below + out.Values[i] = graphql.Null + continue + } + + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) + case "metaData": + field := field + + innerFunc := func(ctx context.Context, _ *graphql.FieldSet) (res graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + } + }() + res = ec._Node_metaData(ctx, field, obj) + return res + } + + if field.Deferrable != nil { + dfs, ok := deferred[field.Deferrable.Label] + di := 0 + if ok { + dfs.AddField(field) + di = len(dfs.Values) - 1 + } else { + dfs = graphql.NewFieldSet([]graphql.CollectedField{field}) + deferred[field.Deferrable.Label] = dfs + } + dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler { + return innerFunc(ctx, dfs) + }) + + // don't run the out.Concurrently() call below + out.Values[i] = graphql.Null + continue + } + + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) + default: + panic("unknown field " + strconv.Quote(field.Name)) + } + } + out.Dispatch(ctx) + if out.Invalids > 0 { + return graphql.Null + } + + atomic.AddInt32(&ec.deferred, int32(len(deferred))) + + for label, dfs := range deferred { + ec.processDeferredGroup(graphql.DeferredGroup{ + Label: label, + Path: graphql.GetPath(ctx), + FieldSet: dfs, + Context: ctx, + }) + } + + return out +} + var nodeMetricsImplementors = []string{"NodeMetrics"} func (ec *executionContext) _NodeMetrics(ctx context.Context, sel ast.SelectionSet, obj *model.NodeMetrics) graphql.Marshaler { @@ -21285,6 +21899,22 @@ func (ec *executionContext) marshalNMetricValue2githubᚗcomᚋClusterCockpitᚋ return ec._MetricValue(ctx, sel, &v) } +func (ec *executionContext) unmarshalNMonitoringState2githubᚗcomᚋClusterCockpitᚋccᚑbackendᚋpkgᚋschemaᚐNodeState(ctx context.Context, v any) (schema.NodeState, error) { + tmp, err := graphql.UnmarshalString(v) + res := schema.NodeState(tmp) + return res, graphql.ErrorOnPath(ctx, err) +} + +func (ec *executionContext) marshalNMonitoringState2githubᚗcomᚋClusterCockpitᚋccᚑbackendᚋpkgᚋschemaᚐNodeState(ctx context.Context, sel ast.SelectionSet, v schema.NodeState) graphql.Marshaler { + res := graphql.MarshalString(string(v)) + if res == graphql.Null { + if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { + ec.Errorf(ctx, "the requested element is null which the schema does not allow") + } + } + return res +} + func (ec *executionContext) marshalNNamedStats2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNamedStatsᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.NamedStats) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup @@ -21447,6 +22077,21 @@ func (ec *executionContext) marshalNNodeMetrics2ᚖgithubᚗcomᚋClusterCockpit return ec._NodeMetrics(ctx, sel, v) } +func (ec *executionContext) unmarshalNNodeState2string(ctx context.Context, v any) (string, error) { + res, err := graphql.UnmarshalString(v) + return res, graphql.ErrorOnPath(ctx, err) +} + +func (ec *executionContext) marshalNNodeState2string(ctx context.Context, sel ast.SelectionSet, v string) graphql.Marshaler { + res := graphql.MarshalString(v) + if res == graphql.Null { + if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { + ec.Errorf(ctx, "the requested element is null which the schema does not allow") + } + } + return res +} + func (ec *executionContext) marshalNNodesResultList2githubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodesResultList(ctx context.Context, sel ast.SelectionSet, v model.NodesResultList) graphql.Marshaler { return ec._NodesResultList(ctx, sel, &v) } diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 6b790a5..7d2331f 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -304,6 +304,21 @@ func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, return nil, nil } +// NodeState is the resolver for the nodeState field. +func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) { + panic(fmt.Errorf("not implemented: NodeState - nodeState")) +} + +// HealthState is the resolver for the HealthState field. +func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error) { + panic(fmt.Errorf("not implemented: HealthState - HealthState")) +} + +// MetaData is the resolver for the metaData field. +func (r *nodeResolver) MetaData(ctx context.Context, obj *schema.Node) (any, error) { + panic(fmt.Errorf("not implemented: MetaData - metaData")) +} + // Clusters is the resolver for the clusters field. func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error) { return archive.Clusters, nil @@ -775,6 +790,9 @@ func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricV // Mutation returns generated.MutationResolver implementation. func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} } +// Node returns generated.NodeResolver implementation. +func (r *Resolver) Node() generated.NodeResolver { return &nodeResolver{r} } + // Query returns generated.QueryResolver implementation. func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} } @@ -785,5 +803,6 @@ type clusterResolver struct{ *Resolver } type jobResolver struct{ *Resolver } type metricValueResolver struct{ *Resolver } type mutationResolver struct{ *Resolver } +type nodeResolver struct{ *Resolver } type queryResolver struct{ *Resolver } type subClusterResolver struct{ *Resolver } diff --git a/internal/repository/node.go b/internal/repository/node.go new file mode 100644 index 0000000..3713bbd --- /dev/null +++ b/internal/repository/node.go @@ -0,0 +1,217 @@ +// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. +// All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. +package repository + +import ( + "encoding/json" + "fmt" + "maps" + "sync" + "time" + + "github.com/ClusterCockpit/cc-backend/pkg/log" + "github.com/ClusterCockpit/cc-backend/pkg/lrucache" + "github.com/ClusterCockpit/cc-backend/pkg/schema" + sq "github.com/Masterminds/squirrel" + "github.com/jmoiron/sqlx" +) + +var ( + nodeRepoOnce sync.Once + nodeRepoInstance *NodeRepository +) + +type NodeRepository struct { + DB *sqlx.DB + stmtCache *sq.StmtCache + cache *lrucache.Cache + driver string +} + +func GetNodeRepository() *NodeRepository { + nodeRepoOnce.Do(func() { + db := GetConnection() + + nodeRepoInstance = &NodeRepository{ + DB: db.DB, + driver: db.Driver, + + stmtCache: sq.NewStmtCache(db.DB), + cache: lrucache.New(1024 * 1024), + } + }) + return nodeRepoInstance +} + +func (r *NodeRepository) FetchMetadata(node *schema.Node) (map[string]string, error) { + start := time.Now() + cachekey := fmt.Sprintf("metadata:%d", node.ID) + if cached := r.cache.Get(cachekey, nil); cached != nil { + node.MetaData = cached.(map[string]string) + return node.MetaData, nil + } + + if err := sq.Select("node.meta_data").From("node").Where("node.id = ?", node.ID). + RunWith(r.stmtCache).QueryRow().Scan(&node.RawMetaData); err != nil { + log.Warn("Error while scanning for node metadata") + return nil, err + } + + if len(node.RawMetaData) == 0 { + return nil, nil + } + + if err := json.Unmarshal(node.RawMetaData, &node.MetaData); err != nil { + log.Warn("Error while unmarshaling raw metadata json") + return nil, err + } + + r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour) + log.Debugf("Timer FetchMetadata %s", time.Since(start)) + return node.MetaData, nil +} + +func (r *NodeRepository) UpdateMetadata(node *schema.Node, key, val string) (err error) { + cachekey := fmt.Sprintf("metadata:%d", node.ID) + r.cache.Del(cachekey) + if node.MetaData == nil { + if _, err = r.FetchMetadata(node); err != nil { + log.Warnf("Error while fetching metadata for node, DB ID '%v'", node.ID) + return err + } + } + + if node.MetaData != nil { + cpy := make(map[string]string, len(node.MetaData)+1) + maps.Copy(cpy, node.MetaData) + cpy[key] = val + node.MetaData = cpy + } else { + node.MetaData = map[string]string{key: val} + } + + if node.RawMetaData, err = json.Marshal(node.MetaData); err != nil { + log.Warnf("Error while marshaling metadata for node, DB ID '%v'", node.ID) + return err + } + + if _, err = sq.Update("node"). + Set("meta_data", node.RawMetaData). + Where("node.id = ?", node.ID). + RunWith(r.stmtCache).Exec(); err != nil { + log.Warnf("Error while updating metadata for node, DB ID '%v'", node.ID) + return err + } + + r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour) + return nil +} + +func (r *NodeRepository) GetNode(id int64, withMeta bool) (*schema.Node, error) { + node := &schema.Node{} + if err := sq.Select("id", "hostname", "cluster", "subcluster", "node_state", + "health_state").From("node"). + Where("node.id = ?", id).RunWith(r.DB). + QueryRow().Scan(&node.ID, &node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, + &node.HealthState); err != nil { + log.Warnf("Error while querying node '%v' from database", id) + return nil, err + } + + if withMeta { + var err error + var meta map[string]string + if meta, err = r.FetchMetadata(node); err != nil { + log.Warnf("Error while fetching metadata for node '%v'", id) + return nil, err + } + node.MetaData = meta + } + + return node, nil +} + +const NamedNodeInsert string = ` +INSERT INTO node (hostname, cluster, subcluster, node_state, health_state, raw_meta_data) + VALUES (:hostname, :cluster, :subcluster, :node_state, :health_state, :raw_meta_data);` + +func (r *NodeRepository) AddNode(node *schema.Node) (int64, error) { + var err error + node.RawMetaData, err = json.Marshal(node.MetaData) + if err != nil { + log.Errorf("Error while marshaling metadata for node '%v'", node.Hostname) + return 0, err + } + + res, err := r.DB.NamedExec(NamedNodeInsert, node) + if err != nil { + log.Errorf("Error while adding node '%v' to database", node.Hostname) + return 0, err + } + node.ID, err = res.LastInsertId() + if err != nil { + log.Errorf("Error while getting last insert id for node '%v' from database", node.Hostname) + return 0, err + } + + return node.ID, nil +} + +func (r *NodeRepository) UpdateNodeState(id int64, nodeState *schema.NodeState) error { + if _, err := sq.Update("node").Set("node_state", nodeState).Where("node.id = ?", id).RunWith(r.DB).Exec(); err != nil { + log.Errorf("error while updating node '%d'", id) + return err + } + + return nil +} + +func (r *NodeRepository) UpdateHealthState(id int64, healthState *schema.MonitoringState) error { + if _, err := sq.Update("node").Set("health_state", healthState).Where("node.id = ?", id).RunWith(r.DB).Exec(); err != nil { + log.Errorf("error while updating node '%d'", id) + return err + } + + return nil +} + +func (r *NodeRepository) DeleteNode(id int64) error { + _, err := r.DB.Exec(`DELETE FROM node WHERE node.id = ?`, id) + if err != nil { + log.Errorf("Error while deleting node '%d' from DB", id) + return err + } + log.Infof("deleted node '%d' from DB", id) + return nil +} + +func (r *NodeRepository) QueryNodes() ([]*schema.Node, error) { + return nil, nil +} + +func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) { + q := sq.Select("hostname", "cluster", "subcluster", "node_state", + "health_state").From("node").Where("node.cluster = ?", cluster).OrderBy("node.hostname ASC") + + rows, err := q.RunWith(r.DB).Query() + if err != nil { + log.Warn("Error while querying user list") + return nil, err + } + nodeList := make([]*schema.Node, 0, 100) + defer rows.Close() + for rows.Next() { + node := &schema.Node{} + if err := rows.Scan(&node.Hostname, &node.Cluster, + &node.SubCluster, &node.NodeState, &node.HealthState); err != nil { + log.Warn("Error while scanning node list") + return nil, err + } + + nodeList = append(nodeList, node) + } + + return nodeList, nil +}