diff --git a/config/config.go b/config/config.go index ef49a12..76adeb0 100644 --- a/config/config.go +++ b/config/config.go @@ -277,3 +277,22 @@ func AssignSubCluster(job *schema.BaseJob) error { return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0) } + +func GetSubClusterByNode(cluster, hostname string) (string, error) { + for sc, nl := range nodeLists[cluster] { + if nl != nil && nl.Contains(hostname) { + return sc, nil + } + } + + c := GetCluster(cluster) + if c == nil { + return "", fmt.Errorf("unkown cluster: %#v", cluster) + } + + if c.SubClusters[0].Nodes == "" { + return c.SubClusters[0].Name, nil + } + + return "", fmt.Errorf("no subcluster found for cluster %#v and host %#v", cluster, hostname) +} diff --git a/graph/generated/generated.go b/graph/generated/generated.go index 2d98de2..7e01446 100644 --- a/graph/generated/generated.go +++ b/graph/generated/generated.go @@ -174,8 +174,9 @@ type ComplexityRoot struct { } NodeMetrics struct { - Host func(childComplexity int) int - Metrics func(childComplexity int) int + Host func(childComplexity int) int + Metrics func(childComplexity int) int + SubCluster func(childComplexity int) int } Query struct { @@ -187,7 +188,7 @@ type ComplexityRoot struct { JobsCount func(childComplexity int, filter []*model.JobFilter, groupBy model.Aggregate, limit *int) int JobsFootprints func(childComplexity int, filter []*model.JobFilter, metrics []string) int JobsStatistics func(childComplexity int, filter []*model.JobFilter, groupBy *model.Aggregate) int - NodeMetrics func(childComplexity int, cluster string, partition *string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int + NodeMetrics func(childComplexity int, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int Tags func(childComplexity int) int User func(childComplexity int, username string) int @@ -220,6 +221,7 @@ type ComplexityRoot struct { MemoryBandwidth func(childComplexity int) int Name func(childComplexity int) int Nodes func(childComplexity int) int + NumberOfNodes func(childComplexity int) int ProcessorType func(childComplexity int) int SocketsPerNode func(childComplexity int) int ThreadsPerCore func(childComplexity int) int @@ -281,7 +283,7 @@ type QueryResolver interface { JobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) JobsCount(ctx context.Context, filter []*model.JobFilter, groupBy model.Aggregate, limit *int) ([]*model.Count, error) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) - NodeMetrics(ctx context.Context, cluster string, partition *string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) + NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) } type executableSchema struct { @@ -884,6 +886,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.NodeMetrics.Metrics(childComplexity), true + case "NodeMetrics.subCluster": + if e.complexity.NodeMetrics.SubCluster == nil { + break + } + + return e.complexity.NodeMetrics.SubCluster(childComplexity), true + case "Query.allocatedNodes": if e.complexity.Query.AllocatedNodes == nil { break @@ -985,7 +994,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return 0, false } - return e.complexity.Query.NodeMetrics(childComplexity, args["cluster"].(string), args["partition"].(*string), args["nodes"].([]string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time)), true + return e.complexity.Query.NodeMetrics(childComplexity, args["cluster"].(string), args["nodes"].([]string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time)), true case "Query.rooflineHeatmap": if e.complexity.Query.RooflineHeatmap == nil { @@ -1137,6 +1146,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.SubCluster.Nodes(childComplexity), true + case "SubCluster.numberOfNodes": + if e.complexity.SubCluster.NumberOfNodes == nil { + break + } + + return e.complexity.SubCluster.NumberOfNodes(childComplexity), true + case "SubCluster.processorType": if e.complexity.SubCluster.ProcessorType == nil { break @@ -1371,6 +1387,7 @@ type Cluster { type SubCluster { name: String! nodes: String! + numberOfNodes: Int! processorType: String! socketsPerNode: Int! coresPerSocket: Int! @@ -1466,8 +1483,9 @@ type Footprints { enum Aggregate { USER, PROJECT, CLUSTER } type NodeMetrics { - host: String! - metrics: [JobMetricWithName!]! + host: String! + subCluster: String! + metrics: [JobMetricWithName!]! } type Count { @@ -1498,7 +1516,7 @@ type Query { rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]! - nodeMetrics(cluster: String!, partition: String, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]! + nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]! } type Mutation { @@ -1913,60 +1931,51 @@ func (ec *executionContext) field_Query_nodeMetrics_args(ctx context.Context, ra } } args["cluster"] = arg0 - var arg1 *string - if tmp, ok := rawArgs["partition"]; ok { - ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("partition")) - arg1, err = ec.unmarshalOString2ᚖstring(ctx, tmp) - if err != nil { - return nil, err - } - } - args["partition"] = arg1 - var arg2 []string + var arg1 []string if tmp, ok := rawArgs["nodes"]; ok { ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("nodes")) - arg2, err = ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) + arg1, err = ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) if err != nil { return nil, err } } - args["nodes"] = arg2 - var arg3 []schema.MetricScope + args["nodes"] = arg1 + var arg2 []schema.MetricScope if tmp, ok := rawArgs["scopes"]; ok { ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("scopes")) - arg3, err = ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋschemaᚐMetricScopeᚄ(ctx, tmp) + arg2, err = ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋschemaᚐMetricScopeᚄ(ctx, tmp) if err != nil { return nil, err } } - args["scopes"] = arg3 - var arg4 []string + args["scopes"] = arg2 + var arg3 []string if tmp, ok := rawArgs["metrics"]; ok { ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("metrics")) - arg4, err = ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) + arg3, err = ec.unmarshalOString2ᚕstringᚄ(ctx, tmp) if err != nil { return nil, err } } - args["metrics"] = arg4 - var arg5 time.Time + args["metrics"] = arg3 + var arg4 time.Time if tmp, ok := rawArgs["from"]; ok { ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("from")) + arg4, err = ec.unmarshalNTime2timeᚐTime(ctx, tmp) + if err != nil { + return nil, err + } + } + args["from"] = arg4 + var arg5 time.Time + if tmp, ok := rawArgs["to"]; ok { + ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("to")) arg5, err = ec.unmarshalNTime2timeᚐTime(ctx, tmp) if err != nil { return nil, err } } - args["from"] = arg5 - var arg6 time.Time - if tmp, ok := rawArgs["to"]; ok { - ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("to")) - arg6, err = ec.unmarshalNTime2timeᚐTime(ctx, tmp) - if err != nil { - return nil, err - } - } - args["to"] = arg6 + args["to"] = arg5 return args, nil } @@ -4865,6 +4874,41 @@ func (ec *executionContext) _NodeMetrics_host(ctx context.Context, field graphql return ec.marshalNString2string(ctx, field.Selections, res) } +func (ec *executionContext) _NodeMetrics_subCluster(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + fc := &graphql.FieldContext{ + Object: "NodeMetrics", + Field: field, + Args: nil, + IsMethod: false, + IsResolver: false, + } + + ctx = graphql.WithFieldContext(ctx, fc) + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.SubCluster, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNString2string(ctx, field.Selections, res) +} + func (ec *executionContext) _NodeMetrics_metrics(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) { defer func() { if r := recover(); r != nil { @@ -5364,7 +5408,7 @@ func (ec *executionContext) _Query_nodeMetrics(ctx context.Context, field graphq fc.Args = args resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return ec.resolvers.Query().NodeMetrics(rctx, args["cluster"].(string), args["partition"].(*string), args["nodes"].([]string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time)) + return ec.resolvers.Query().NodeMetrics(rctx, args["cluster"].(string), args["nodes"].([]string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time)) }) if err != nil { ec.Error(ctx, err) @@ -5892,6 +5936,41 @@ func (ec *executionContext) _SubCluster_nodes(ctx context.Context, field graphql return ec.marshalNString2string(ctx, field.Selections, res) } +func (ec *executionContext) _SubCluster_numberOfNodes(ctx context.Context, field graphql.CollectedField, obj *model.SubCluster) (ret graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + fc := &graphql.FieldContext{ + Object: "SubCluster", + Field: field, + Args: nil, + IsMethod: false, + IsResolver: false, + } + + ctx = graphql.WithFieldContext(ctx, fc) + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.NumberOfNodes, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(int) + fc.Result = res + return ec.marshalNInt2int(ctx, field.Selections, res) +} + func (ec *executionContext) _SubCluster_processorType(ctx context.Context, field graphql.CollectedField, obj *model.SubCluster) (ret graphql.Marshaler) { defer func() { if r := recover(); r != nil { @@ -8854,6 +8933,11 @@ func (ec *executionContext) _NodeMetrics(ctx context.Context, sel ast.SelectionS if out.Values[i] == graphql.Null { invalids++ } + case "subCluster": + out.Values[i] = ec._NodeMetrics_subCluster(ctx, field, obj) + if out.Values[i] == graphql.Null { + invalids++ + } case "metrics": out.Values[i] = ec._NodeMetrics_metrics(ctx, field, obj) if out.Values[i] == graphql.Null { @@ -9186,6 +9270,11 @@ func (ec *executionContext) _SubCluster(ctx context.Context, sel ast.SelectionSe if out.Values[i] == graphql.Null { invalids++ } + case "numberOfNodes": + out.Values[i] = ec._SubCluster_numberOfNodes(ctx, field, obj) + if out.Values[i] == graphql.Null { + invalids++ + } case "processorType": out.Values[i] = ec._SubCluster_processorType(ctx, field, obj) if out.Values[i] == graphql.Null { diff --git a/graph/model/models_gen.go b/graph/model/models_gen.go index acd50bb..16e6590 100644 --- a/graph/model/models_gen.go +++ b/graph/model/models_gen.go @@ -114,8 +114,9 @@ type MetricFootprints struct { } type NodeMetrics struct { - Host string `json:"host"` - Metrics []*JobMetricWithName `json:"metrics"` + Host string `json:"host"` + SubCluster string `json:"subCluster"` + Metrics []*JobMetricWithName `json:"metrics"` } type OrderByInput struct { @@ -138,6 +139,7 @@ type StringInput struct { type SubCluster struct { Name string `json:"name"` Nodes string `json:"nodes"` + NumberOfNodes int `json:"numberOfNodes"` ProcessorType string `json:"processorType"` SocketsPerNode int `json:"socketsPerNode"` CoresPerSocket int `json:"coresPerSocket"` diff --git a/graph/schema.graphqls b/graph/schema.graphqls index 8ff12b2..c9f2cf5 100644 --- a/graph/schema.graphqls +++ b/graph/schema.graphqls @@ -42,6 +42,7 @@ type Cluster { type SubCluster { name: String! nodes: String! + numberOfNodes: Int! processorType: String! socketsPerNode: Int! coresPerSocket: Int! @@ -137,8 +138,9 @@ type Footprints { enum Aggregate { USER, PROJECT, CLUSTER } type NodeMetrics { - host: String! - metrics: [JobMetricWithName!]! + host: String! + subCluster: String! + metrics: [JobMetricWithName!]! } type Count { @@ -169,7 +171,7 @@ type Query { rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]! - nodeMetrics(cluster: String!, partition: String, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]! + nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]! } type Mutation { diff --git a/graph/schema.resolvers.go b/graph/schema.resolvers.go index 2f4ffce..d886cfe 100644 --- a/graph/schema.resolvers.go +++ b/graph/schema.resolvers.go @@ -209,23 +209,19 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY) } -func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, partition *string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) { +func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) { user := auth.GetUser(ctx) if user != nil && !user.HasRole(auth.RoleAdmin) { return nil, errors.New("you need to be an administrator for this query") } - if partition == nil { - partition = new(string) - } - if metrics == nil { for _, mc := range config.GetCluster(cluster).MetricConfig { metrics = append(metrics, mc.Name) } } - data, err := metricdata.LoadNodeData(cluster, *partition, metrics, nodes, scopes, from, to, ctx) + data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx) if err != nil { return nil, err } @@ -236,6 +232,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, partiti Host: hostname, Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)), } + host.SubCluster, _ = config.GetSubClusterByNode(cluster, hostname) for metric, scopedMetrics := range metrics { for _, scopedMetric := range scopedMetrics { diff --git a/metricdata/cc-metric-store.go b/metricdata/cc-metric-store.go index c77d43d..2c3401d 100644 --- a/metricdata/cc-metric-store.go +++ b/metricdata/cc-metric-store.go @@ -499,7 +499,7 @@ func (ccms *CCMetricStore) LoadStats(job *schema.Job, metrics []string, ctx cont } // TODO: Support sub-node-scope metrics! For this, the partition of a node needs to be known! -func (ccms *CCMetricStore) LoadNodeData(cluster, partition string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) { +func (ccms *CCMetricStore) LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) { req := ApiQueryRequest{ Cluster: cluster, From: from.Unix(), diff --git a/metricdata/metricdata.go b/metricdata/metricdata.go index 76fdcbe..edbce95 100644 --- a/metricdata/metricdata.go +++ b/metricdata/metricdata.go @@ -23,7 +23,7 @@ type MetricDataRepository interface { LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) // Return a map of hosts to a map of metrics at the requested scopes for that node. - LoadNodeData(cluster, partition string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) + LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) } var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{} @@ -173,7 +173,7 @@ func LoadAverages(job *schema.Job, metrics []string, data [][]schema.Float, ctx } // Used for the node/system view. Returns a map of nodes to a map of metrics. -func LoadNodeData(cluster, partition string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) { +func LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) { repo, ok := metricDataRepos[cluster] if !ok { return nil, fmt.Errorf("no metric data repository configured for '%s'", cluster) @@ -185,7 +185,7 @@ func LoadNodeData(cluster, partition string, metrics, nodes []string, scopes []s } } - data, err := repo.LoadNodeData(cluster, partition, metrics, nodes, scopes, from, to, ctx) + data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx) if err != nil { if len(data) != 0 { log.Errorf("partial error: %s", err.Error()) diff --git a/metricdata/utils.go b/metricdata/utils.go index de6c573..27c7d10 100644 --- a/metricdata/utils.go +++ b/metricdata/utils.go @@ -32,6 +32,6 @@ func (tmdr *TestMetricDataRepository) LoadStats(job *schema.Job, metrics []strin panic("TODO") } -func (tmdr *TestMetricDataRepository) LoadNodeData(cluster, partition string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) { +func (tmdr *TestMetricDataRepository) LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) { panic("TODO") }