Update nodeMetrics query; Add numberOfNodes to SubCluster type

This commit is contained in:
Lou Knauer 2022-03-24 14:34:42 +01:00
parent b572ef2aef
commit 0b83917294
8 changed files with 163 additions and 54 deletions

View File

@ -277,3 +277,22 @@ func AssignSubCluster(job *schema.BaseJob) error {
return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0)
}
func GetSubClusterByNode(cluster, hostname string) (string, error) {
for sc, nl := range nodeLists[cluster] {
if nl != nil && nl.Contains(hostname) {
return sc, nil
}
}
c := GetCluster(cluster)
if c == nil {
return "", fmt.Errorf("unkown cluster: %#v", cluster)
}
if c.SubClusters[0].Nodes == "" {
return c.SubClusters[0].Name, nil
}
return "", fmt.Errorf("no subcluster found for cluster %#v and host %#v", cluster, hostname)
}

View File

@ -174,8 +174,9 @@ type ComplexityRoot struct {
}
NodeMetrics struct {
Host func(childComplexity int) int
Metrics func(childComplexity int) int
Host func(childComplexity int) int
Metrics func(childComplexity int) int
SubCluster func(childComplexity int) int
}
Query struct {
@ -187,7 +188,7 @@ type ComplexityRoot struct {
JobsCount func(childComplexity int, filter []*model.JobFilter, groupBy model.Aggregate, limit *int) int
JobsFootprints func(childComplexity int, filter []*model.JobFilter, metrics []string) int
JobsStatistics func(childComplexity int, filter []*model.JobFilter, groupBy *model.Aggregate) int
NodeMetrics func(childComplexity int, cluster string, partition *string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int
NodeMetrics func(childComplexity int, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int
RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int
Tags func(childComplexity int) int
User func(childComplexity int, username string) int
@ -220,6 +221,7 @@ type ComplexityRoot struct {
MemoryBandwidth func(childComplexity int) int
Name func(childComplexity int) int
Nodes func(childComplexity int) int
NumberOfNodes func(childComplexity int) int
ProcessorType func(childComplexity int) int
SocketsPerNode func(childComplexity int) int
ThreadsPerCore func(childComplexity int) int
@ -281,7 +283,7 @@ type QueryResolver interface {
JobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error)
JobsCount(ctx context.Context, filter []*model.JobFilter, groupBy model.Aggregate, limit *int) ([]*model.Count, error)
RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error)
NodeMetrics(ctx context.Context, cluster string, partition *string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error)
NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error)
}
type executableSchema struct {
@ -884,6 +886,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.NodeMetrics.Metrics(childComplexity), true
case "NodeMetrics.subCluster":
if e.complexity.NodeMetrics.SubCluster == nil {
break
}
return e.complexity.NodeMetrics.SubCluster(childComplexity), true
case "Query.allocatedNodes":
if e.complexity.Query.AllocatedNodes == nil {
break
@ -985,7 +994,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return 0, false
}
return e.complexity.Query.NodeMetrics(childComplexity, args["cluster"].(string), args["partition"].(*string), args["nodes"].([]string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time)), true
return e.complexity.Query.NodeMetrics(childComplexity, args["cluster"].(string), args["nodes"].([]string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time)), true
case "Query.rooflineHeatmap":
if e.complexity.Query.RooflineHeatmap == nil {
@ -1137,6 +1146,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.SubCluster.Nodes(childComplexity), true
case "SubCluster.numberOfNodes":
if e.complexity.SubCluster.NumberOfNodes == nil {
break
}
return e.complexity.SubCluster.NumberOfNodes(childComplexity), true
case "SubCluster.processorType":
if e.complexity.SubCluster.ProcessorType == nil {
break
@ -1371,6 +1387,7 @@ type Cluster {
type SubCluster {
name: String!
nodes: String!
numberOfNodes: Int!
processorType: String!
socketsPerNode: Int!
coresPerSocket: Int!
@ -1466,8 +1483,9 @@ type Footprints {
enum Aggregate { USER, PROJECT, CLUSTER }
type NodeMetrics {
host: String!
metrics: [JobMetricWithName!]!
host: String!
subCluster: String!
metrics: [JobMetricWithName!]!
}
type Count {
@ -1498,7 +1516,7 @@ type Query {
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
nodeMetrics(cluster: String!, partition: String, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
}
type Mutation {
@ -1913,60 +1931,51 @@ func (ec *executionContext) field_Query_nodeMetrics_args(ctx context.Context, ra
}
}
args["cluster"] = arg0
var arg1 *string
if tmp, ok := rawArgs["partition"]; ok {
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("partition"))
arg1, err = ec.unmarshalOString2ᚖstring(ctx, tmp)
if err != nil {
return nil, err
}
}
args["partition"] = arg1
var arg2 []string
var arg1 []string
if tmp, ok := rawArgs["nodes"]; ok {
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("nodes"))
arg2, err = ec.unmarshalOString2ᚕstringᚄ(ctx, tmp)
arg1, err = ec.unmarshalOString2ᚕstringᚄ(ctx, tmp)
if err != nil {
return nil, err
}
}
args["nodes"] = arg2
var arg3 []schema.MetricScope
args["nodes"] = arg1
var arg2 []schema.MetricScope
if tmp, ok := rawArgs["scopes"]; ok {
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("scopes"))
arg3, err = ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋschemaᚐMetricScopeᚄ(ctx, tmp)
arg2, err = ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋschemaᚐMetricScopeᚄ(ctx, tmp)
if err != nil {
return nil, err
}
}
args["scopes"] = arg3
var arg4 []string
args["scopes"] = arg2
var arg3 []string
if tmp, ok := rawArgs["metrics"]; ok {
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("metrics"))
arg4, err = ec.unmarshalOString2ᚕstringᚄ(ctx, tmp)
arg3, err = ec.unmarshalOString2ᚕstringᚄ(ctx, tmp)
if err != nil {
return nil, err
}
}
args["metrics"] = arg4
var arg5 time.Time
args["metrics"] = arg3
var arg4 time.Time
if tmp, ok := rawArgs["from"]; ok {
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("from"))
arg4, err = ec.unmarshalNTime2timeᚐTime(ctx, tmp)
if err != nil {
return nil, err
}
}
args["from"] = arg4
var arg5 time.Time
if tmp, ok := rawArgs["to"]; ok {
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("to"))
arg5, err = ec.unmarshalNTime2timeᚐTime(ctx, tmp)
if err != nil {
return nil, err
}
}
args["from"] = arg5
var arg6 time.Time
if tmp, ok := rawArgs["to"]; ok {
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("to"))
arg6, err = ec.unmarshalNTime2timeᚐTime(ctx, tmp)
if err != nil {
return nil, err
}
}
args["to"] = arg6
args["to"] = arg5
return args, nil
}
@ -4865,6 +4874,41 @@ func (ec *executionContext) _NodeMetrics_host(ctx context.Context, field graphql
return ec.marshalNString2string(ctx, field.Selections, res)
}
func (ec *executionContext) _NodeMetrics_subCluster(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
fc := &graphql.FieldContext{
Object: "NodeMetrics",
Field: field,
Args: nil,
IsMethod: false,
IsResolver: false,
}
ctx = graphql.WithFieldContext(ctx, fc)
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.SubCluster, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
ec.Errorf(ctx, "must not be null")
}
return graphql.Null
}
res := resTmp.(string)
fc.Result = res
return ec.marshalNString2string(ctx, field.Selections, res)
}
func (ec *executionContext) _NodeMetrics_metrics(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
@ -5364,7 +5408,7 @@ func (ec *executionContext) _Query_nodeMetrics(ctx context.Context, field graphq
fc.Args = args
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return ec.resolvers.Query().NodeMetrics(rctx, args["cluster"].(string), args["partition"].(*string), args["nodes"].([]string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time))
return ec.resolvers.Query().NodeMetrics(rctx, args["cluster"].(string), args["nodes"].([]string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time))
})
if err != nil {
ec.Error(ctx, err)
@ -5892,6 +5936,41 @@ func (ec *executionContext) _SubCluster_nodes(ctx context.Context, field graphql
return ec.marshalNString2string(ctx, field.Selections, res)
}
func (ec *executionContext) _SubCluster_numberOfNodes(ctx context.Context, field graphql.CollectedField, obj *model.SubCluster) (ret graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
fc := &graphql.FieldContext{
Object: "SubCluster",
Field: field,
Args: nil,
IsMethod: false,
IsResolver: false,
}
ctx = graphql.WithFieldContext(ctx, fc)
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.NumberOfNodes, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
ec.Errorf(ctx, "must not be null")
}
return graphql.Null
}
res := resTmp.(int)
fc.Result = res
return ec.marshalNInt2int(ctx, field.Selections, res)
}
func (ec *executionContext) _SubCluster_processorType(ctx context.Context, field graphql.CollectedField, obj *model.SubCluster) (ret graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
@ -8854,6 +8933,11 @@ func (ec *executionContext) _NodeMetrics(ctx context.Context, sel ast.SelectionS
if out.Values[i] == graphql.Null {
invalids++
}
case "subCluster":
out.Values[i] = ec._NodeMetrics_subCluster(ctx, field, obj)
if out.Values[i] == graphql.Null {
invalids++
}
case "metrics":
out.Values[i] = ec._NodeMetrics_metrics(ctx, field, obj)
if out.Values[i] == graphql.Null {
@ -9186,6 +9270,11 @@ func (ec *executionContext) _SubCluster(ctx context.Context, sel ast.SelectionSe
if out.Values[i] == graphql.Null {
invalids++
}
case "numberOfNodes":
out.Values[i] = ec._SubCluster_numberOfNodes(ctx, field, obj)
if out.Values[i] == graphql.Null {
invalids++
}
case "processorType":
out.Values[i] = ec._SubCluster_processorType(ctx, field, obj)
if out.Values[i] == graphql.Null {

View File

@ -114,8 +114,9 @@ type MetricFootprints struct {
}
type NodeMetrics struct {
Host string `json:"host"`
Metrics []*JobMetricWithName `json:"metrics"`
Host string `json:"host"`
SubCluster string `json:"subCluster"`
Metrics []*JobMetricWithName `json:"metrics"`
}
type OrderByInput struct {
@ -138,6 +139,7 @@ type StringInput struct {
type SubCluster struct {
Name string `json:"name"`
Nodes string `json:"nodes"`
NumberOfNodes int `json:"numberOfNodes"`
ProcessorType string `json:"processorType"`
SocketsPerNode int `json:"socketsPerNode"`
CoresPerSocket int `json:"coresPerSocket"`

View File

@ -42,6 +42,7 @@ type Cluster {
type SubCluster {
name: String!
nodes: String!
numberOfNodes: Int!
processorType: String!
socketsPerNode: Int!
coresPerSocket: Int!
@ -137,8 +138,9 @@ type Footprints {
enum Aggregate { USER, PROJECT, CLUSTER }
type NodeMetrics {
host: String!
metrics: [JobMetricWithName!]!
host: String!
subCluster: String!
metrics: [JobMetricWithName!]!
}
type Count {
@ -169,7 +171,7 @@ type Query {
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
nodeMetrics(cluster: String!, partition: String, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
}
type Mutation {

View File

@ -209,23 +209,19 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
}
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, partition *string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
user := auth.GetUser(ctx)
if user != nil && !user.HasRole(auth.RoleAdmin) {
return nil, errors.New("you need to be an administrator for this query")
}
if partition == nil {
partition = new(string)
}
if metrics == nil {
for _, mc := range config.GetCluster(cluster).MetricConfig {
metrics = append(metrics, mc.Name)
}
}
data, err := metricdata.LoadNodeData(cluster, *partition, metrics, nodes, scopes, from, to, ctx)
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
return nil, err
}
@ -236,6 +232,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, partiti
Host: hostname,
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
}
host.SubCluster, _ = config.GetSubClusterByNode(cluster, hostname)
for metric, scopedMetrics := range metrics {
for _, scopedMetric := range scopedMetrics {

View File

@ -499,7 +499,7 @@ func (ccms *CCMetricStore) LoadStats(job *schema.Job, metrics []string, ctx cont
}
// TODO: Support sub-node-scope metrics! For this, the partition of a node needs to be known!
func (ccms *CCMetricStore) LoadNodeData(cluster, partition string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
func (ccms *CCMetricStore) LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
req := ApiQueryRequest{
Cluster: cluster,
From: from.Unix(),

View File

@ -23,7 +23,7 @@ type MetricDataRepository interface {
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
// Return a map of hosts to a map of metrics at the requested scopes for that node.
LoadNodeData(cluster, partition string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
}
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
@ -173,7 +173,7 @@ func LoadAverages(job *schema.Job, metrics []string, data [][]schema.Float, ctx
}
// Used for the node/system view. Returns a map of nodes to a map of metrics.
func LoadNodeData(cluster, partition string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
func LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
repo, ok := metricDataRepos[cluster]
if !ok {
return nil, fmt.Errorf("no metric data repository configured for '%s'", cluster)
@ -185,7 +185,7 @@ func LoadNodeData(cluster, partition string, metrics, nodes []string, scopes []s
}
}
data, err := repo.LoadNodeData(cluster, partition, metrics, nodes, scopes, from, to, ctx)
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Errorf("partial error: %s", err.Error())

View File

@ -32,6 +32,6 @@ func (tmdr *TestMetricDataRepository) LoadStats(job *schema.Job, metrics []strin
panic("TODO")
}
func (tmdr *TestMetricDataRepository) LoadNodeData(cluster, partition string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
func (tmdr *TestMetricDataRepository) LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
panic("TODO")
}