diff --git a/api/schema.graphqls b/api/schema.graphqls index 4ee573c..8f5e1c7 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -28,9 +28,8 @@ type NodeStates { type NodeStatesTimed { state: String! - type: String! - count: Int! - time: Int! + counts: [Int!]! + times: [Int!]! } type Job { @@ -263,6 +262,7 @@ enum SortByAggregate { type NodeMetrics { host: String! + state: String! subCluster: String! metrics: [JobMetricWithName!]! } @@ -317,7 +317,7 @@ type Query { node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! nodeStates(filter: [NodeFilter!]): [NodeStates!]! - nodeStatesTimed(filter: [NodeFilter!]): [NodeStatesTimed!]! + nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]! job(id: ID!): Job jobMetrics( @@ -376,6 +376,7 @@ type Query { nodeMetricsList( cluster: String! subCluster: String! + stateFilter: String! nodeFilter: String! scopes: [MetricScope!] metrics: [String!] diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index 52a938f..b4ade3b 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -50,8 +50,7 @@ type ResolverRoot interface { SubCluster() SubClusterResolver } -type DirectiveRoot struct { -} +type DirectiveRoot struct{} type ComplexityRoot struct { Accelerator struct { @@ -288,6 +287,7 @@ type ComplexityRoot struct { NodeMetrics struct { Host func(childComplexity int) int Metrics func(childComplexity int) int + State func(childComplexity int) int SubCluster func(childComplexity int) int } @@ -302,10 +302,9 @@ type ComplexityRoot struct { } NodeStatesTimed struct { - Count func(childComplexity int) int - State func(childComplexity int) int - Time func(childComplexity int) int - Type func(childComplexity int) int + Counts func(childComplexity int) int + State func(childComplexity int) int + Times func(childComplexity int) int } NodesResultList struct { @@ -330,9 +329,9 @@ type ComplexityRoot struct { JobsStatistics func(childComplexity int, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *string, numMetricBins *int) int Node func(childComplexity int, id string) int NodeMetrics func(childComplexity int, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int - NodeMetricsList func(childComplexity int, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) int + NodeMetricsList func(childComplexity int, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) int NodeStates func(childComplexity int, filter []*model.NodeFilter) int - NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter) int + NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter, typeArg string) int Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int ScopedJobStats func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int @@ -473,7 +472,7 @@ type QueryResolver interface { Node(ctx context.Context, id string) (*schema.Node, error) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) - NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStatesTimed, error) + NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error) Job(ctx context.Context, id string) (*schema.Job, error) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error) @@ -484,7 +483,7 @@ type QueryResolver interface { JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) - NodeMetricsList(ctx context.Context, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) + NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) } type SubClusterResolver interface { NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) @@ -1455,12 +1454,21 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin } return e.complexity.NodeMetrics.Host(childComplexity), true + case "NodeMetrics.metrics": if e.complexity.NodeMetrics.Metrics == nil { break } return e.complexity.NodeMetrics.Metrics(childComplexity), true + + case "NodeMetrics.state": + if e.complexity.NodeMetrics.State == nil { + break + } + + return e.complexity.NodeMetrics.State(childComplexity), true + case "NodeMetrics.subCluster": if e.complexity.NodeMetrics.SubCluster == nil { break @@ -1494,30 +1502,26 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin return e.complexity.NodeStates.State(childComplexity), true - case "NodeStatesTimed.count": - if e.complexity.NodeStatesTimed.Count == nil { + case "NodeStatesTimed.counts": + if e.complexity.NodeStatesTimed.Counts == nil { break } - return e.complexity.NodeStatesTimed.Count(childComplexity), true + return e.complexity.NodeStatesTimed.Counts(childComplexity), true + case "NodeStatesTimed.state": if e.complexity.NodeStatesTimed.State == nil { break } return e.complexity.NodeStatesTimed.State(childComplexity), true - case "NodeStatesTimed.time": - if e.complexity.NodeStatesTimed.Time == nil { + + case "NodeStatesTimed.times": + if e.complexity.NodeStatesTimed.Times == nil { break } - return e.complexity.NodeStatesTimed.Time(childComplexity), true - case "NodeStatesTimed.type": - if e.complexity.NodeStatesTimed.Type == nil { - break - } - - return e.complexity.NodeStatesTimed.Type(childComplexity), true + return e.complexity.NodeStatesTimed.Times(childComplexity), true case "NodesResultList.count": if e.complexity.NodesResultList.Count == nil { @@ -1688,7 +1692,8 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin return 0, false } - return e.complexity.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int)), true + return e.complexity.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["stateFilter"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int)), true + case "Query.nodeStates": if e.complexity.Query.NodeStates == nil { break @@ -1710,7 +1715,8 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin return 0, false } - return e.complexity.Query.NodeStatesTimed(childComplexity, args["filter"].([]*model.NodeFilter)), true + return e.complexity.Query.NodeStatesTimed(childComplexity, args["filter"].([]*model.NodeFilter), args["type"].(string)), true + case "Query.nodes": if e.complexity.Query.Nodes == nil { break @@ -2248,9 +2254,8 @@ type NodeStates { type NodeStatesTimed { state: String! - type: String! - count: Int! - time: Int! + counts: [Int!]! + times: [Int!]! } type Job { @@ -2483,6 +2488,7 @@ enum SortByAggregate { type NodeMetrics { host: String! + state: String! subCluster: String! metrics: [JobMetricWithName!]! } @@ -2537,7 +2543,7 @@ type Query { node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! nodeStates(filter: [NodeFilter!]): [NodeStates!]! - nodeStatesTimed(filter: [NodeFilter!]): [NodeStatesTimed!]! + nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]! job(id: ID!): Job jobMetrics( @@ -2596,6 +2602,7 @@ type Query { nodeMetricsList( cluster: String! subCluster: String! + stateFilter: String! nodeFilter: String! scopes: [MetricScope!] metrics: [String!] @@ -3040,41 +3047,46 @@ func (ec *executionContext) field_Query_nodeMetricsList_args(ctx context.Context return nil, err } args["subCluster"] = arg1 - arg2, err := graphql.ProcessArgField(ctx, rawArgs, "nodeFilter", ec.unmarshalNString2string) + arg2, err := graphql.ProcessArgField(ctx, rawArgs, "stateFilter", ec.unmarshalNString2string) if err != nil { return nil, err } - args["nodeFilter"] = arg2 - arg3, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ) + args["stateFilter"] = arg2 + arg3, err := graphql.ProcessArgField(ctx, rawArgs, "nodeFilter", ec.unmarshalNString2string) if err != nil { return nil, err } - args["scopes"] = arg3 - arg4, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ) + args["nodeFilter"] = arg3 + arg4, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ) if err != nil { return nil, err } - args["metrics"] = arg4 - arg5, err := graphql.ProcessArgField(ctx, rawArgs, "from", ec.unmarshalNTime2timeᚐTime) + args["scopes"] = arg4 + arg5, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ) if err != nil { return nil, err } - args["from"] = arg5 - arg6, err := graphql.ProcessArgField(ctx, rawArgs, "to", ec.unmarshalNTime2timeᚐTime) + args["metrics"] = arg5 + arg6, err := graphql.ProcessArgField(ctx, rawArgs, "from", ec.unmarshalNTime2timeᚐTime) if err != nil { return nil, err } - args["to"] = arg6 - arg7, err := graphql.ProcessArgField(ctx, rawArgs, "page", ec.unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐPageRequest) + args["from"] = arg6 + arg7, err := graphql.ProcessArgField(ctx, rawArgs, "to", ec.unmarshalNTime2timeᚐTime) if err != nil { return nil, err } - args["page"] = arg7 - arg8, err := graphql.ProcessArgField(ctx, rawArgs, "resolution", ec.unmarshalOInt2ᚖint) + args["to"] = arg7 + arg8, err := graphql.ProcessArgField(ctx, rawArgs, "page", ec.unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐPageRequest) if err != nil { return nil, err } - args["resolution"] = arg8 + args["page"] = arg8 + arg9, err := graphql.ProcessArgField(ctx, rawArgs, "resolution", ec.unmarshalOInt2ᚖint) + if err != nil { + return nil, err + } + args["resolution"] = arg9 return args, nil } @@ -3122,6 +3134,11 @@ func (ec *executionContext) field_Query_nodeStatesTimed_args(ctx context.Context return nil, err } args["filter"] = arg0 + arg1, err := graphql.ProcessArgField(ctx, rawArgs, "type", ec.unmarshalNString2string) + if err != nil { + return nil, err + } + args["type"] = arg1 return args, nil } @@ -7976,6 +7993,50 @@ func (ec *executionContext) fieldContext_NodeMetrics_host(_ context.Context, fie return fc, nil } +func (ec *executionContext) _NodeMetrics_state(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeMetrics_state(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.State, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNString2string(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_NodeMetrics_state(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "NodeMetrics", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type String does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _NodeMetrics_subCluster(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) { return graphql.ResolveField( ctx, @@ -8211,52 +8272,38 @@ func (ec *executionContext) fieldContext_NodeStatesTimed_state(_ context.Context return fc, nil } -func (ec *executionContext) _NodeStatesTimed_type(ctx context.Context, field graphql.CollectedField, obj *model.NodeStatesTimed) (ret graphql.Marshaler) { - return graphql.ResolveField( - ctx, - ec.OperationContext, - field, - ec.fieldContext_NodeStatesTimed_type, - func(ctx context.Context) (any, error) { - return obj.Type, nil - }, - nil, - ec.marshalNString2string, - true, - true, - ) -} - -func (ec *executionContext) fieldContext_NodeStatesTimed_type(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { - fc = &graphql.FieldContext{ - Object: "NodeStatesTimed", - Field: field, - IsMethod: false, - IsResolver: false, - Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type String does not have child fields") - }, +func (ec *executionContext) _NodeStatesTimed_counts(ctx context.Context, field graphql.CollectedField, obj *model.NodeStatesTimed) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeStatesTimed_counts(ctx, field) + if err != nil { + return graphql.Null } - return fc, nil + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.Counts, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.([]int) + fc.Result = res + return ec.marshalNInt2ᚕintᚄ(ctx, field.Selections, res) } -func (ec *executionContext) _NodeStatesTimed_count(ctx context.Context, field graphql.CollectedField, obj *model.NodeStatesTimed) (ret graphql.Marshaler) { - return graphql.ResolveField( - ctx, - ec.OperationContext, - field, - ec.fieldContext_NodeStatesTimed_count, - func(ctx context.Context) (any, error) { - return obj.Count, nil - }, - nil, - ec.marshalNInt2int, - true, - true, - ) -} - -func (ec *executionContext) fieldContext_NodeStatesTimed_count(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_NodeStatesTimed_counts(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "NodeStatesTimed", Field: field, @@ -8269,23 +8316,38 @@ func (ec *executionContext) fieldContext_NodeStatesTimed_count(_ context.Context return fc, nil } -func (ec *executionContext) _NodeStatesTimed_time(ctx context.Context, field graphql.CollectedField, obj *model.NodeStatesTimed) (ret graphql.Marshaler) { - return graphql.ResolveField( - ctx, - ec.OperationContext, - field, - ec.fieldContext_NodeStatesTimed_time, - func(ctx context.Context) (any, error) { - return obj.Time, nil - }, - nil, - ec.marshalNInt2int, - true, - true, - ) +func (ec *executionContext) _NodeStatesTimed_times(ctx context.Context, field graphql.CollectedField, obj *model.NodeStatesTimed) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeStatesTimed_times(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.Times, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.([]int) + fc.Result = res + return ec.marshalNInt2ᚕintᚄ(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_NodeStatesTimed_time(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_NodeStatesTimed_times(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "NodeStatesTimed", Field: field, @@ -8324,6 +8386,8 @@ func (ec *executionContext) fieldContext_NodesResultList_items(_ context.Context switch field.Name { case "host": return ec.fieldContext_NodeMetrics_host(ctx, field) + case "state": + return ec.fieldContext_NodeMetrics_state(ctx, field) case "subCluster": return ec.fieldContext_NodeMetrics_subCluster(ctx, field) case "metrics": @@ -8853,20 +8917,34 @@ func (ec *executionContext) fieldContext_Query_nodeStates(ctx context.Context, f } func (ec *executionContext) _Query_nodeStatesTimed(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { - return graphql.ResolveField( - ctx, - ec.OperationContext, - field, - ec.fieldContext_Query_nodeStatesTimed, - func(ctx context.Context) (any, error) { - fc := graphql.GetFieldContext(ctx) - return ec.resolvers.Query().NodeStatesTimed(ctx, fc.Args["filter"].([]*model.NodeFilter)) - }, - nil, - ec.marshalNNodeStatesTimed2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesTimedᚄ, - true, - true, - ) + fc, err := ec.fieldContext_Query_nodeStatesTimed(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return ec.resolvers.Query().NodeStatesTimed(rctx, fc.Args["filter"].([]*model.NodeFilter), fc.Args["type"].(string)) + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.([]*model.NodeStatesTimed) + fc.Result = res + return ec.marshalNNodeStatesTimed2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesTimedᚄ(ctx, field.Selections, res) } func (ec *executionContext) fieldContext_Query_nodeStatesTimed(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { @@ -8879,12 +8957,10 @@ func (ec *executionContext) fieldContext_Query_nodeStatesTimed(ctx context.Conte switch field.Name { case "state": return ec.fieldContext_NodeStatesTimed_state(ctx, field) - case "type": - return ec.fieldContext_NodeStatesTimed_type(ctx, field) - case "count": - return ec.fieldContext_NodeStatesTimed_count(ctx, field) - case "time": - return ec.fieldContext_NodeStatesTimed_time(ctx, field) + case "counts": + return ec.fieldContext_NodeStatesTimed_counts(ctx, field) + case "times": + return ec.fieldContext_NodeStatesTimed_times(ctx, field) } return nil, fmt.Errorf("no field named %q was found under type NodeStatesTimed", field.Name) }, @@ -9453,6 +9529,8 @@ func (ec *executionContext) fieldContext_Query_nodeMetrics(ctx context.Context, switch field.Name { case "host": return ec.fieldContext_NodeMetrics_host(ctx, field) + case "state": + return ec.fieldContext_NodeMetrics_state(ctx, field) case "subCluster": return ec.fieldContext_NodeMetrics_subCluster(ctx, field) case "metrics": @@ -9476,20 +9554,34 @@ func (ec *executionContext) fieldContext_Query_nodeMetrics(ctx context.Context, } func (ec *executionContext) _Query_nodeMetricsList(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { - return graphql.ResolveField( - ctx, - ec.OperationContext, - field, - ec.fieldContext_Query_nodeMetricsList, - func(ctx context.Context) (any, error) { - fc := graphql.GetFieldContext(ctx) - return ec.resolvers.Query().NodeMetricsList(ctx, fc.Args["cluster"].(string), fc.Args["subCluster"].(string), fc.Args["nodeFilter"].(string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["metrics"].([]string), fc.Args["from"].(time.Time), fc.Args["to"].(time.Time), fc.Args["page"].(*model.PageRequest), fc.Args["resolution"].(*int)) - }, - nil, - ec.marshalNNodesResultList2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodesResultList, - true, - true, - ) + fc, err := ec.fieldContext_Query_nodeMetricsList(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return ec.resolvers.Query().NodeMetricsList(rctx, fc.Args["cluster"].(string), fc.Args["subCluster"].(string), fc.Args["stateFilter"].(string), fc.Args["nodeFilter"].(string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["metrics"].([]string), fc.Args["from"].(time.Time), fc.Args["to"].(time.Time), fc.Args["page"].(*model.PageRequest), fc.Args["resolution"].(*int)) + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(*model.NodesResultList) + fc.Result = res + return ec.marshalNNodesResultList2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodesResultList(ctx, field.Selections, res) } func (ec *executionContext) fieldContext_Query_nodeMetricsList(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { @@ -15315,6 +15407,11 @@ func (ec *executionContext) _NodeMetrics(ctx context.Context, sel ast.SelectionS if out.Values[i] == graphql.Null { out.Invalids++ } + case "state": + out.Values[i] = ec._NodeMetrics_state(ctx, field, obj) + if out.Values[i] == graphql.Null { + out.Invalids++ + } case "subCluster": out.Values[i] = ec._NodeMetrics_subCluster(ctx, field, obj) if out.Values[i] == graphql.Null { @@ -15449,18 +15546,13 @@ func (ec *executionContext) _NodeStatesTimed(ctx context.Context, sel ast.Select if out.Values[i] == graphql.Null { out.Invalids++ } - case "type": - out.Values[i] = ec._NodeStatesTimed_type(ctx, field, obj) + case "counts": + out.Values[i] = ec._NodeStatesTimed_counts(ctx, field, obj) if out.Values[i] == graphql.Null { out.Invalids++ } - case "count": - out.Values[i] = ec._NodeStatesTimed_count(ctx, field, obj) - if out.Values[i] == graphql.Null { - out.Invalids++ - } - case "time": - out.Values[i] = ec._NodeStatesTimed_time(ctx, field, obj) + case "times": + out.Values[i] = ec._NodeStatesTimed_times(ctx, field, obj) if out.Values[i] == graphql.Null { out.Invalids++ } @@ -19234,7 +19326,7 @@ func (ec *executionContext) unmarshalOAggregate2ᚖgithubᚗcomᚋClusterCockpit if v == nil { return nil, nil } - var res = new(model.Aggregate) + res := new(model.Aggregate) err := res.UnmarshalGQL(v) return res, graphql.ErrorOnPath(ctx, err) } @@ -19900,7 +19992,7 @@ func (ec *executionContext) unmarshalOSortByAggregate2ᚖgithubᚗcomᚋClusterC if v == nil { return nil, nil } - var res = new(model.SortByAggregate) + res := new(model.SortByAggregate) err := res.UnmarshalGQL(v) return res, graphql.ErrorOnPath(ctx, err) } diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index 7b64464..4cb414e 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -181,6 +181,7 @@ type NodeFilter struct { type NodeMetrics struct { Host string `json:"host"` + State string `json:"state"` SubCluster string `json:"subCluster"` Metrics []*JobMetricWithName `json:"metrics"` } @@ -196,10 +197,9 @@ type NodeStates struct { } type NodeStatesTimed struct { - State string `json:"state"` - Type string `json:"type"` - Count int `json:"count"` - Time int `json:"time"` + State string `json:"state"` + Counts []int `json:"counts"` + Times []int `json:"times"` } type NodesResultList struct { diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 7469fed..0de96cb 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -312,7 +312,11 @@ func (r *nodeResolver) ID(ctx context.Context, obj *schema.Node) (string, error) // SchedulerState is the resolver for the schedulerState field. func (r *nodeResolver) SchedulerState(ctx context.Context, obj *schema.Node) (schema.SchedulerState, error) { - panic(fmt.Errorf("not implemented: SchedulerState - schedulerState")) + if obj.NodeState != "" { + return obj.NodeState, nil + } else { + return "", fmt.Errorf("No SchedulerState (NodeState) on Object") + } } // HealthState is the resolver for the healthState field. @@ -387,13 +391,13 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) { repo := repository.GetNodeRepository() - stateCounts, serr := repo.CountNodeStates(ctx, filter) + stateCounts, serr := repo.CountStates(ctx, filter, "node_state") if serr != nil { cclog.Warnf("Error while counting nodeStates: %s", serr.Error()) return nil, serr } - healthCounts, herr := repo.CountHealthStates(ctx, filter) + healthCounts, herr := repo.CountStates(ctx, filter, "health_state") if herr != nil { cclog.Warnf("Error while counting healthStates: %s", herr.Error()) return nil, herr @@ -406,26 +410,28 @@ func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilt } // NodeStatesTimed is the resolver for the nodeStatesTimed field. -func (r *queryResolver) NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStatesTimed, error) { - panic(fmt.Errorf("not implemented: NodeStatesTimed - NodeStatesTimed")) - // repo := repository.GetNodeRepository() +func (r *queryResolver) NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error) { + repo := repository.GetNodeRepository() - // stateCounts, serr := repo.CountNodeStates(ctx, filter) - // if serr != nil { - // cclog.Warnf("Error while counting nodeStates: %s", serr.Error()) - // return nil, serr - // } + if typeArg == "node" { + stateCounts, serr := repo.CountStatesTimed(ctx, filter, "node_state") + if serr != nil { + cclog.Warnf("Error while counting nodeStates in time: %s", serr.Error()) + return nil, serr + } + return stateCounts, nil + } - // healthCounts, herr := repo.CountHealthStates(ctx, filter) - // if herr != nil { - // cclog.Warnf("Error while counting healthStates: %s", herr.Error()) - // return nil, herr - // } + if typeArg == "health" { + healthCounts, herr := repo.CountStatesTimed(ctx, filter, "health_state") + if herr != nil { + cclog.Warnf("Error while counting healthStates in time: %s", herr.Error()) + return nil, herr + } + return healthCounts, nil + } - // allCounts := make([]*model.NodeStates, 0) - // allCounts = append(stateCounts, healthCounts...) - - // return allCounts, nil + return nil, errors.New("Unknown Node State Query Type") } // Job is the resolver for the job field. @@ -750,10 +756,14 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [ return nil, err } + nodeRepo := repository.GetNodeRepository() + stateMap, _ := nodeRepo.MapNodes(cluster) + nodeMetrics := make([]*model.NodeMetrics, 0, len(data)) for hostname, metrics := range data { host := &model.NodeMetrics{ Host: hostname, + State: stateMap[hostname], Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)), } host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname) @@ -778,7 +788,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [ } // NodeMetricsList is the resolver for the nodeMetricsList field. -func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) { +func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) { if resolution == nil { // Load from Config if config.Keys.EnableResampling != nil { defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions) @@ -800,9 +810,47 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub } } - data, totalNodes, hasNextPage, err := metricDataDispatcher.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, *resolution, from, to, page, ctx) + // Note: This Prefilter Logic Can Be Used To Completely Switch Node Source Of Truth To SQLite DB + // Adapt and extend filters/paging/sorting in QueryNodes Function to return []string array of hostnames, input array to LoadNodeListData + // LoadNodeListData, instead of building queried nodes from topoplogy anew, directly will use QueryNodes hostname array + // Caveat: "notindb" state will not be resolvable anymore by default, or needs reverse lookup by dedicated comparison to topology data after all + preFiltered := make([]string, 0) + stateMap := make(map[string]string) + if stateFilter != "all" { + nodeRepo := repository.GetNodeRepository() + stateQuery := make([]*model.NodeFilter, 0) + // Required Filters + stateQuery = append(stateQuery, &model.NodeFilter{Cluster: &model.StringInput{Eq: &cluster}}) + if subCluster != "" { + stateQuery = append(stateQuery, &model.NodeFilter{Subcluster: &model.StringInput{Eq: &subCluster}}) + } + + if stateFilter == "notindb" { + // Backward Filtering: Add Keyword, No Additional FIlters: Returns All Nodes For Cluster (and SubCluster) + preFiltered = append(preFiltered, "exclude") + } else { + // Workaround: If no nodes match, we need at least one element for trigger in LoadNodeListData + preFiltered = append(preFiltered, stateFilter) + // Forward Filtering: Match Only selected stateFilter + var queryState schema.SchedulerState = schema.SchedulerState(stateFilter) + stateQuery = append(stateQuery, &model.NodeFilter{SchedulerState: &queryState}) + } + + stateNodes, serr := nodeRepo.QueryNodes(ctx, stateQuery, &model.OrderByInput{}) // Order not Used + if serr != nil { + cclog.Warn("error while loading node database data (Resolver.NodeMetricsList)") + return nil, serr + } + + for _, node := range stateNodes { + preFiltered = append(preFiltered, node.Hostname) + stateMap[node.Hostname] = string(node.NodeState) + } + } + + data, totalNodes, hasNextPage, err := metricDataDispatcher.LoadNodeListData(cluster, subCluster, nodeFilter, preFiltered, metrics, scopes, *resolution, from, to, page, ctx) if err != nil { - cclog.Warn("error while loading node data") + cclog.Warn("error while loading node data (Resolver.NodeMetricsList") return nil, err } @@ -810,6 +858,7 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub for hostname, metrics := range data { host := &model.NodeMetrics{ Host: hostname, + State: stateMap[hostname], Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)), } host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname) diff --git a/internal/memorystore/configSchema.go b/internal/memorystore/configSchema.go index 133ba58..2616edc 100644 --- a/internal/memorystore/configSchema.go +++ b/internal/memorystore/configSchema.go @@ -51,35 +51,41 @@ const configSchema = `{ }, "nats": { "description": "Configuration for accepting published data through NATS.", - "type": "object", - "properties": { - "address": { - "description": "Address of the NATS server.", - "type": "string" - }, - "username": { - "description": "Optional: If configured with username/password method.", - "type": "string" - }, - "password": { - "description": "Optional: If configured with username/password method.", - "type": "string" - }, - "creds-file-path": { - "description": "Optional: If configured with Credential File method. Path to your NATS cred file.", - "type": "string" - }, - "subscriptions": { - "description": "Array of various subscriptions. Allows to subscibe to different subjects and publishers.", - "type": "object", - "properties": { - "subscribe-to": { - "description": "Channel name", - "type": "string" - }, - "cluster-tag": { - "description": "Optional: Allow lines without a cluster tag, use this as default", - "type": "string" + "type": "array", + "items": { + "type": "object", + "properties": { + "address": { + "description": "Address of the NATS server.", + "type": "string" + }, + "username": { + "description": "Optional: If configured with username/password method.", + "type": "string" + }, + "password": { + "description": "Optional: If configured with username/password method.", + "type": "string" + }, + "creds-file-path": { + "description": "Optional: If configured with Credential File method. Path to your NATS cred file.", + "type": "string" + }, + "subscriptions": { + "description": "Array of various subscriptions. Allows to subscibe to different subjects and publishers.", + "type": "array", + "items": { + "type": "object", + "properties": { + "subscribe-to": { + "description": "Channel name", + "type": "string" + }, + "cluster-tag": { + "description": "Optional: Allow lines without a cluster tag, use this as default", + "type": "string" + } + } } } } diff --git a/internal/metricDataDispatcher/dataLoader.go b/internal/metricDataDispatcher/dataLoader.go index 4f8e3b5..1257370 100644 --- a/internal/metricDataDispatcher/dataLoader.go +++ b/internal/metricDataDispatcher/dataLoader.go @@ -333,6 +333,7 @@ func LoadNodeData( func LoadNodeListData( cluster, subCluster, nodeFilter string, + preFiltered []string, metrics []string, scopes []schema.MetricScope, resolution int, @@ -351,7 +352,7 @@ func LoadNodeListData( } } - data, totalNodes, hasNextPage, err := repo.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, resolution, from, to, page, ctx) + data, totalNodes, hasNextPage, err := repo.LoadNodeListData(cluster, subCluster, nodeFilter, preFiltered, metrics, scopes, resolution, from, to, page, ctx) if err != nil { if len(data) != 0 { cclog.Warnf("partial error: %s", err.Error()) diff --git a/internal/metricdata/cc-metric-store-internal.go b/internal/metricdata/cc-metric-store-internal.go index 01f4a05..54046d0 100644 --- a/internal/metricdata/cc-metric-store-internal.go +++ b/internal/metricdata/cc-metric-store-internal.go @@ -9,6 +9,7 @@ import ( "context" "encoding/json" "fmt" + "slices" "sort" "strconv" "strings" @@ -678,6 +679,7 @@ func (ccms *CCMetricStoreInternal) LoadNodeData( // Used for Systems-View Node-List func (ccms *CCMetricStoreInternal) LoadNodeListData( cluster, subCluster, nodeFilter string, + preFiltered []string, metrics []string, scopes []schema.MetricScope, resolution int, @@ -701,18 +703,37 @@ func (ccms *CCMetricStoreInternal) LoadNodeListData( } } - // 2) Filter nodes + // 2.1) Filter nodes by name if nodeFilter != "" { - filteredNodes := []string{} + filteredNodesByName := []string{} for _, node := range nodes { if strings.Contains(node, nodeFilter) { - filteredNodes = append(filteredNodes, node) + filteredNodesByName = append(filteredNodesByName, node) } } - nodes = filteredNodes + nodes = filteredNodesByName } - // 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ... + // 2.2) Filter nodes by state using prefiltered match array + if len(preFiltered) > 0 { + filteredNodesByState := []string{} + if preFiltered[0] == "exclude" { // Backwards: PreFiltered contains all Nodes in DB > Return Missing Nodes + for _, node := range nodes { + if !slices.Contains(preFiltered, node) { + filteredNodesByState = append(filteredNodesByState, node) + } + } + } else { // Forwards: Prefiltered contains specific nodeState > Return Matches + for _, node := range nodes { + if slices.Contains(preFiltered, node) { + filteredNodesByState = append(filteredNodesByState, node) + } + } + } + nodes = filteredNodesByState + } + + // 2.3) Count total nodes && Sort nodes -> Sorting invalidated after return ... totalNodes = len(nodes) sort.Strings(nodes) diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go index a188686..b36ad08 100644 --- a/internal/metricdata/cc-metric-store.go +++ b/internal/metricdata/cc-metric-store.go @@ -11,6 +11,7 @@ import ( "encoding/json" "fmt" "net/http" + "slices" "sort" "strings" "time" @@ -800,6 +801,7 @@ func (ccms *CCMetricStore) LoadNodeData( // Used for Systems-View Node-List func (ccms *CCMetricStore) LoadNodeListData( cluster, subCluster, nodeFilter string, + preFiltered []string, metrics []string, scopes []schema.MetricScope, resolution int, @@ -824,18 +826,37 @@ func (ccms *CCMetricStore) LoadNodeListData( } } - // 2) Filter nodes + // 2.1) Filter nodes by name if nodeFilter != "" { - filteredNodes := []string{} + filteredNodesByName := []string{} for _, node := range nodes { if strings.Contains(node, nodeFilter) { - filteredNodes = append(filteredNodes, node) + filteredNodesByName = append(filteredNodesByName, node) } } - nodes = filteredNodes + nodes = filteredNodesByName } - // 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ... + // 2.2) Filter nodes by state using prefiltered match array + if len(preFiltered) > 0 { + filteredNodesByState := []string{} + if preFiltered[0] == "exclude" { // Backwards: PreFiltered contains all Nodes in DB > Return Missing Nodes + for _, node := range nodes { + if !slices.Contains(preFiltered, node) { + filteredNodesByState = append(filteredNodesByState, node) + } + } + } else { // Forwards: Prefiltered contains specific nodeState > Return Matches + for _, node := range nodes { + if slices.Contains(preFiltered, node) { + filteredNodesByState = append(filteredNodesByState, node) + } + } + } + nodes = filteredNodesByState + } + + // 2.3) Count total nodes && Sort nodes -> Sorting invalidated after return ... totalNodes = len(nodes) sort.Strings(nodes) diff --git a/internal/metricdata/metricdata.go b/internal/metricdata/metricdata.go index 3219611..cab7284 100644 --- a/internal/metricdata/metricdata.go +++ b/internal/metricdata/metricdata.go @@ -36,7 +36,7 @@ type MetricDataRepository interface { LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) // Return a map of hosts to a map of metrics to a map of scopes for multiple nodes. - LoadNodeListData(cluster, subCluster, nodeFilter string, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, page *model.PageRequest, ctx context.Context) (map[string]schema.JobData, int, bool, error) + LoadNodeListData(cluster, subCluster, nodeFilter string, preFiltered []string, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, page *model.PageRequest, ctx context.Context) (map[string]schema.JobData, int, bool, error) } var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{} diff --git a/internal/metricdata/prometheus.go b/internal/metricdata/prometheus.go index 2ec8558..d441d28 100644 --- a/internal/metricdata/prometheus.go +++ b/internal/metricdata/prometheus.go @@ -14,6 +14,7 @@ import ( "net/http" "os" "regexp" + "slices" "sort" "strings" "sync" @@ -495,6 +496,7 @@ func (pdb *PrometheusDataRepository) LoadScopedStats( // Implemented by NHR@FAU; Used in NodeList-View func (pdb *PrometheusDataRepository) LoadNodeListData( cluster, subCluster, nodeFilter string, + preFiltered []string, metrics []string, scopes []schema.MetricScope, resolution int, @@ -520,18 +522,37 @@ func (pdb *PrometheusDataRepository) LoadNodeListData( } } - // 2) Filter nodes + // 2.1) Filter nodes by name if nodeFilter != "" { - filteredNodes := []string{} + filteredNodesByName := []string{} for _, node := range nodes { if strings.Contains(node, nodeFilter) { - filteredNodes = append(filteredNodes, node) + filteredNodesByName = append(filteredNodesByName, node) } } - nodes = filteredNodes + nodes = filteredNodesByName } - // 2.1) Count total nodes && Sort nodes -> Sorting invalidated after return ... + // 2.2) Filter nodes by state using prefiltered match array + if len(preFiltered) > 0 { + filteredNodesByState := []string{} + if preFiltered[0] == "exclude" { // Backwards: PreFiltered contains all Nodes in DB > Return Missing Nodes + for _, node := range nodes { + if !slices.Contains(preFiltered, node) { + filteredNodesByState = append(filteredNodesByState, node) + } + } + } else { // Forwards: Prefiltered contains specific nodeState > Return Matches + for _, node := range nodes { + if slices.Contains(preFiltered, node) { + filteredNodesByState = append(filteredNodesByState, node) + } + } + } + nodes = filteredNodesByState + } + + // 2.3) Count total nodes && Sort nodes -> Sorting invalidated after return ... totalNodes = len(nodes) sort.Strings(nodes) diff --git a/internal/metricdata/utils.go b/internal/metricdata/utils.go index faae0dac..71f4a43 100644 --- a/internal/metricdata/utils.go +++ b/internal/metricdata/utils.go @@ -64,6 +64,7 @@ func (tmdr *TestMetricDataRepository) LoadNodeData( func (tmdr *TestMetricDataRepository) LoadNodeListData( cluster, subCluster, nodeFilter string, + preFiltered []string, metrics []string, scopes []schema.MetricScope, resolution int, diff --git a/internal/repository/node.go b/internal/repository/node.go index f9c056e..b6b89d1 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -79,6 +79,7 @@ func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[str func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) (*schema.Node, error) { node := &schema.Node{} + var timestamp int if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", "node_state.health_state", "MAX(node_state.time_stamp) as time"). From("node_state"). @@ -87,8 +88,8 @@ func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) Where("node.cluster = ?", cluster). GroupBy("node_state.node_id"). RunWith(r.DB). - QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil { - cclog.Warnf("Error while querying node '%s' from database: %v", hostname, err) + QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState, ×tamp); err != nil { + cclog.Warnf("Error while querying node '%s' at time '%d' from database: %v", hostname, timestamp, err) return nil, err } @@ -107,6 +108,7 @@ func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) func (r *NodeRepository) GetNodeById(id int64, withMeta bool) (*schema.Node, error) { node := &schema.Node{} + var timestamp int if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", "node_state.health_state", "MAX(node_state.time_stamp) as time"). From("node_state"). @@ -114,8 +116,8 @@ func (r *NodeRepository) GetNodeById(id int64, withMeta bool) (*schema.Node, err Where("node.id = ?", id). GroupBy("node_state.node_id"). RunWith(r.DB). - QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil { - cclog.Warnf("Error while querying node ID '%d' from database: %v", id, err) + QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState, ×tamp); err != nil { + cclog.Warnf("Error while querying node ID '%d' at time '%d' from database: %v", id, timestamp, err) return nil, err } @@ -238,8 +240,8 @@ func (r *NodeRepository) QueryNodes( order *model.OrderByInput, // Currently unused! ) ([]*schema.Node, error) { query, qerr := AccessCheck(ctx, - sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", - "node_state.health_state", "MAX(node_state.time_stamp) as time"). + sq.Select("hostname", "cluster", "subcluster", "node_state", + "health_state", "MAX(time_stamp) as time"). From("node"). Join("node_state ON node_state.node_id = node.id")) if qerr != nil { @@ -248,24 +250,31 @@ func (r *NodeRepository) QueryNodes( for _, f := range filters { if f.Hostname != nil { - query = buildStringCondition("node.hostname", f.Hostname, query) + query = buildStringCondition("hostname", f.Hostname, query) } if f.Cluster != nil { - query = buildStringCondition("node.cluster", f.Cluster, query) + query = buildStringCondition("cluster", f.Cluster, query) } if f.Subcluster != nil { - query = buildStringCondition("node.subcluster", f.Subcluster, query) + query = buildStringCondition("subcluster", f.Subcluster, query) } if f.SchedulerState != nil { - query = query.Where("node.node_state = ?", f.SchedulerState) + query = query.Where("node_state = ?", f.SchedulerState) + // Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned + now := time.Now().Unix() + query = query.Where(sq.Gt{"time_stamp": (now - 60)}) } if f.HealthState != nil { - query = query.Where("node.health_state = ?", f.HealthState) + query = query.Where("health_state = ?", f.HealthState) + // Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned + now := time.Now().Unix() + query = query.Where(sq.Gt{"time_stamp": (now - 60)}) } } - // Add Grouping after filters - query = query.GroupBy("node_state.node_id") + // Add Grouping and ORder after filters + query = query.GroupBy("node_id"). + OrderBy("hostname ASC") rows, err := query.RunWith(r.stmtCache).Query() if err != nil { @@ -277,11 +286,11 @@ func (r *NodeRepository) QueryNodes( nodes := make([]*schema.Node, 0, 50) for rows.Next() { node := schema.Node{} - + var timestamp int if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster, - &node.NodeState, &node.HealthState); err != nil { + &node.NodeState, &node.HealthState, ×tamp); err != nil { rows.Close() - cclog.Warn("Error while scanning rows (Nodes)") + cclog.Warnf("Error while scanning rows (QueryNodes) at time '%d'", timestamp) return nil, err } nodes = append(nodes, &node) @@ -308,9 +317,10 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) { defer rows.Close() for rows.Next() { node := &schema.Node{} + var timestamp int if err := rows.Scan(&node.Hostname, &node.Cluster, - &node.SubCluster, &node.NodeState, &node.HealthState); err != nil { - cclog.Warn("Error while scanning node list") + &node.SubCluster, &node.NodeState, &node.HealthState, ×tamp); err != nil { + cclog.Warnf("Error while scanning node list (ListNodes) at time '%d'", timestamp) return nil, err } @@ -320,8 +330,38 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) { return nodeList, nil } -func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) { - query, qerr := AccessCheck(ctx, sq.Select("hostname", "node_state", "MAX(time_stamp) as time").From("node")) +func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) { + q := sq.Select("node.hostname", "node_state.node_state", "MAX(node_state.time_stamp) as time"). + From("node"). + Join("node_state ON node_state.node_id = node.id"). + Where("node.cluster = ?", cluster). + GroupBy("node_state.node_id"). + OrderBy("node.hostname ASC") + + rows, err := q.RunWith(r.DB).Query() + if err != nil { + cclog.Warn("Error while querying node list") + return nil, err + } + + stateMap := make(map[string]string) + defer rows.Close() + for rows.Next() { + var hostname, nodestate string + var timestamp int + if err := rows.Scan(&hostname, &nodestate, ×tamp); err != nil { + cclog.Warnf("Error while scanning node list (MapNodes) at time '%d'", timestamp) + return nil, err + } + + stateMap[hostname] = nodestate + } + + return stateMap, nil +} + +func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeFilter, column string) ([]*model.NodeStates, error) { + query, qerr := AccessCheck(ctx, sq.Select("hostname", column, "MAX(time_stamp) as time").From("node")) if qerr != nil { return nil, qerr } @@ -358,16 +398,16 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N stateMap := map[string]int{} for rows.Next() { - var hostname, node_state string - var timestamp int64 + var hostname, state string + var timestamp int - if err := rows.Scan(&hostname, &node_state, ×tamp); err != nil { + if err := rows.Scan(&hostname, &state, ×tamp); err != nil { rows.Close() - cclog.Warn("Error while scanning rows (NodeStates)") + cclog.Warnf("Error while scanning rows (CountStates) at time '%d'", timestamp) return nil, err } - stateMap[node_state] += 1 + stateMap[state] += 1 } nodes := make([]*model.NodeStates, 0) @@ -379,8 +419,8 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N return nodes, nil } -func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) { - query, qerr := AccessCheck(ctx, sq.Select("hostname", "health_state", "MAX(time_stamp) as time").From("node")) +func (r *NodeRepository) CountStatesTimed(ctx context.Context, filters []*model.NodeFilter, column string) ([]*model.NodeStatesTimed, error) { + query, qerr := AccessCheck(ctx, sq.Select(column, "time_stamp", "count(*) as count").From("node")) // "cluster"? if qerr != nil { return nil, qerr } @@ -388,6 +428,11 @@ func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model query = query.Join("node_state ON node_state.node_id = node.id") for _, f := range filters { + // Required + if f.TimeStart != nil { + query = query.Where("time_stamp > ?", f.TimeStart) + } + // Optional if f.Hostname != nil { query = buildStringCondition("hostname", f.Hostname, query) } @@ -406,7 +451,7 @@ func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model } // Add Group and Order - query = query.GroupBy("hostname").OrderBy("hostname DESC") + query = query.GroupBy(column + ", time_stamp").OrderBy("time_stamp ASC") rows, err := query.RunWith(r.stmtCache).Query() if err != nil { @@ -415,27 +460,32 @@ func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model return nil, err } - stateMap := map[string]int{} + rawData := make(map[string][][]int) for rows.Next() { - var hostname, health_state string - var timestamp int64 + var state string + var timestamp, count int - if err := rows.Scan(&hostname, &health_state, ×tamp); err != nil { + if err := rows.Scan(&state, ×tamp, &count); err != nil { rows.Close() - cclog.Warn("Error while scanning rows (NodeStates)") + cclog.Warnf("Error while scanning rows (CountStatesTimed) at time '%d'", timestamp) return nil, err } - stateMap[health_state] += 1 + if rawData[state] == nil { + rawData[state] = [][]int{make([]int, 0), make([]int, 0)} + } + + rawData[state][0] = append(rawData[state][0], timestamp) + rawData[state][1] = append(rawData[state][1], count) } - nodes := make([]*model.NodeStates, 0) - for state, counts := range stateMap { - node := model.NodeStates{State: state, Count: counts} - nodes = append(nodes, &node) + timedStates := make([]*model.NodeStatesTimed, 0) + for state, data := range rawData { + entry := model.NodeStatesTimed{State: state, Times: data[0], Counts: data[1]} + timedStates = append(timedStates, &entry) } - return nodes, nil + return timedStates, nil } func AccessCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) { diff --git a/pkg/archive/s3Backend.go b/pkg/archive/s3Backend.go index 61195ed..23e94e2 100644 --- a/pkg/archive/s3Backend.go +++ b/pkg/archive/s3Backend.go @@ -202,7 +202,7 @@ func (s3a *S3Archive) Info() { for _, cluster := range s3a.clusters { ci[cluster] = &clusterInfo{dateFirst: time.Now().Unix()} - + // List all jobs for this cluster prefix := cluster + "/" paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{ @@ -260,19 +260,19 @@ func (s3a *S3Archive) Info() { func (s3a *S3Archive) Exists(job *schema.Job) bool { ctx := context.Background() key := getS3Key(job, "meta.json") - + _, err := s3a.client.HeadObject(ctx, &s3.HeadObjectInput{ Bucket: aws.String(s3a.bucket), Key: aws.String(key), }) - + return err == nil } func (s3a *S3Archive) LoadJobMeta(job *schema.Job) (*schema.Job, error) { ctx := context.Background() key := getS3Key(job, "meta.json") - + result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{ Bucket: aws.String(s3a.bucket), Key: aws.String(key), @@ -300,14 +300,13 @@ func (s3a *S3Archive) LoadJobMeta(job *schema.Job) (*schema.Job, error) { func (s3a *S3Archive) LoadJobData(job *schema.Job) (schema.JobData, error) { ctx := context.Background() - + // Try compressed file first keyGz := getS3Key(job, "data.json.gz") result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{ Bucket: aws.String(s3a.bucket), Key: aws.String(keyGz), }) - if err != nil { // Try uncompressed file key := getS3Key(job, "data.json") @@ -352,14 +351,13 @@ func (s3a *S3Archive) LoadJobData(job *schema.Job) (schema.JobData, error) { func (s3a *S3Archive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) { ctx := context.Background() - + // Try compressed file first keyGz := getS3Key(job, "data.json.gz") result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{ Bucket: aws.String(s3a.bucket), Key: aws.String(keyGz), }) - if err != nil { // Try uncompressed file key := getS3Key(job, "data.json") @@ -405,7 +403,7 @@ func (s3a *S3Archive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, erro func (s3a *S3Archive) LoadClusterCfg(name string) (*schema.Cluster, error) { ctx := context.Background() key := fmt.Sprintf("%s/cluster.json", name) - + result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{ Bucket: aws.String(s3a.bucket), Key: aws.String(key), @@ -433,7 +431,7 @@ func (s3a *S3Archive) LoadClusterCfg(name string) (*schema.Cluster, error) { func (s3a *S3Archive) StoreJobMeta(job *schema.Job) error { ctx := context.Background() key := getS3Key(job, "meta.json") - + var buf bytes.Buffer if err := EncodeJobMeta(&buf, job); err != nil { cclog.Error("S3Archive StoreJobMeta() > encoding error") @@ -445,7 +443,6 @@ func (s3a *S3Archive) StoreJobMeta(job *schema.Job) error { Key: aws.String(key), Body: bytes.NewReader(buf.Bytes()), }) - if err != nil { cclog.Errorf("S3Archive StoreJobMeta() > PutObject error: %v", err) return err @@ -503,16 +500,16 @@ func (s3a *S3Archive) GetClusters() []string { func (s3a *S3Archive) CleanUp(jobs []*schema.Job) { ctx := context.Background() start := time.Now() - + for _, job := range jobs { if job == nil { cclog.Errorf("S3Archive CleanUp() error: job is nil") continue } - + // Delete all files in the job directory prefix := getS3Directory(job) - + paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{ Bucket: aws.String(s3a.bucket), Prefix: aws.String(prefix), @@ -544,10 +541,10 @@ func (s3a *S3Archive) CleanUp(jobs []*schema.Job) { func (s3a *S3Archive) Move(jobs []*schema.Job, targetPath string) { ctx := context.Background() - + for _, job := range jobs { sourcePrefix := getS3Directory(job) - + // List all objects in source paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{ Bucket: aws.String(s3a.bucket), @@ -565,10 +562,10 @@ func (s3a *S3Archive) Move(jobs []*schema.Job, targetPath string) { if obj.Key == nil { continue } - + // Compute target key by replacing prefix targetKey := strings.Replace(*obj.Key, sourcePrefix, targetPath+"/", 1) - + // Copy object _, err := s3a.client.CopyObject(ctx, &s3.CopyObjectInput{ Bucket: aws.String(s3a.bucket), @@ -595,14 +592,14 @@ func (s3a *S3Archive) Move(jobs []*schema.Job, targetPath string) { func (s3a *S3Archive) Clean(before int64, after int64) { ctx := context.Background() - + if after == 0 { after = math.MaxInt64 } for _, cluster := range s3a.clusters { prefix := cluster + "/" - + paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{ Bucket: aws.String(s3a.bucket), Prefix: aws.String(prefix), @@ -633,7 +630,7 @@ func (s3a *S3Archive) Clean(before int64, after int64) { if startTime < before || startTime > after { // Delete entire job directory jobPrefix := strings.Join(parts[:4], "/") + "/" - + jobPaginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{ Bucket: aws.String(s3a.bucket), Prefix: aws.String(jobPrefix), @@ -671,7 +668,7 @@ func (s3a *S3Archive) Compress(jobs []*schema.Job) { for _, job := range jobs { dataKey := getS3Key(job, "data.json") - + // Check if uncompressed file exists and get its size headResult, err := s3a.client.HeadObject(ctx, &s3.HeadObjectInput{ Bucket: aws.String(s3a.bucket), @@ -742,13 +739,13 @@ func (s3a *S3Archive) Compress(jobs []*schema.Job) { func (s3a *S3Archive) CompressLast(starttime int64) int64 { ctx := context.Background() compressKey := "compress.txt" - + // Try to read existing compress.txt result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{ Bucket: aws.String(s3a.bucket), Key: aws.String(compressKey), }) - + var last int64 if err == nil { b, _ := io.ReadAll(result.Body) @@ -780,14 +777,14 @@ func (s3a *S3Archive) CompressLast(starttime int64) int64 { func (s3a *S3Archive) Iter(loadMetricData bool) <-chan JobContainer { ch := make(chan JobContainer) - + go func() { ctx := context.Background() defer close(ch) for _, cluster := range s3a.clusters { prefix := cluster + "/" - + paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{ Bucket: aws.String(s3a.bucket), Prefix: aws.String(prefix), diff --git a/web/frontend/src/Analysis.root.svelte b/web/frontend/src/Analysis.root.svelte index 8d9972a..6c5d850 100644 --- a/web/frontend/src/Analysis.root.svelte +++ b/web/frontend/src/Analysis.root.svelte @@ -76,7 +76,7 @@ /* State Init */ let filterComponent = $state(); // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the - let cluster = $state(filterPresets?.cluster); + let cluster = $state({}); let rooflineMaxY = $state(0); let maxY = $state(-1); let colWidth1 = $state(0); @@ -370,7 +370,7 @@ {:else if $statsQuery.data} - + @@ -419,7 +419,7 @@ {:else} t[sortSelection.key], @@ -490,13 +490,21 @@ {:else if $rooflineQuery.error} {$rooflineQuery.error.message} {:else if $rooflineQuery.data && cluster} +
+
Job Roofline Heatmap
+ +
{#key $rooflineQuery.data} = 1 ? cluster.subClusters[0] : null} maxY={rooflineMaxY} diff --git a/web/frontend/src/Jobs.root.svelte b/web/frontend/src/Jobs.root.svelte index 8bbc5ad..0198b23 100644 --- a/web/frontend/src/Jobs.root.svelte +++ b/web/frontend/src/Jobs.root.svelte @@ -79,6 +79,11 @@ }); }); + $effect(() => { + // Load Metric-Selection for last selected cluster + metrics = selectedCluster ? ccconfig[`metricConfig_jobListMetrics:${selectedCluster}`] : ccconfig.metricConfig_jobListMetrics + }); + /* On Mount */ // The filterPresets are handled by the Filters component, // so we need to wait for it to be ready before we can start a query. diff --git a/web/frontend/src/Node.root.svelte b/web/frontend/src/Node.root.svelte index 5c37ad1..cf11902 100644 --- a/web/frontend/src/Node.root.svelte +++ b/web/frontend/src/Node.root.svelte @@ -16,6 +16,7 @@ Row, Col, Input, + Button, InputGroup, InputGroupText, Icon, @@ -63,6 +64,7 @@ query ($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) { nodeMetrics(cluster: $cluster, nodes: $nodes, from: $from, to: $to) { host + state subCluster metrics { name @@ -97,6 +99,16 @@ } } `; + // Node State Colors + const stateColors = { + allocated: 'success', + reserved: 'info', + idle: 'primary', + mixed: 'warning', + down: 'danger', + unknown: 'dark', + notindb: 'secondary' + } /* State Init */ let from = $state(presetFrom ? presetFrom : new Date(nowEpoch - (4 * 3600 * 1000))); @@ -123,6 +135,8 @@ }) ); + const thisNodeState = $derived($nodeMetricsData?.data?.nodeMetrics[0]?.state ? $nodeMetricsData.data.nodeMetrics[0].state : 'notindb'); + /* Effect */ $effect(() => { loadUnits($initialized); @@ -138,7 +152,7 @@ } - + {#if $initq.error} {$initq.error.message} {:else if $initq.fetching} @@ -149,19 +163,18 @@ Selected Node - + - +
- { - from = newFrom; - to = newTo; - }} - /> + + + Node State + + @@ -184,6 +197,17 @@ {/if} + + + { + from = newFrom; + to = newTo; + }} + /> + c.name == cluster)} subCluster={$nodeMetricsData.data.nodeMetrics[0].subCluster} series={item.metric.series} + enableFlip forNode /> {:else if item.disabled === true && item.metric} diff --git a/web/frontend/src/Status.root.svelte b/web/frontend/src/Status.root.svelte index d5ae0f7..3d9002a 100644 --- a/web/frontend/src/Status.root.svelte +++ b/web/frontend/src/Status.root.svelte @@ -9,13 +9,17 @@ import { getContext } from "svelte" + import { + init, + } from "./generic/utils.js"; import { Row, Col, Card, CardBody, TabContent, - TabPane + TabPane, + Spinner } from "@sveltestrap/sveltestrap"; import StatusDash from "./status/StatusDash.svelte"; @@ -28,8 +32,8 @@ } = $props(); /*Const Init */ + const { query: initq } = init(); const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false - @@ -40,24 +44,39 @@ - - - - - - - - - - - - - - - - - - - - \ No newline at end of file +{#if $initq.fetching} + + + + + +{:else if $initq.error} + + + {$initq.error.message} + + +{:else} + + + + + + + + + + + + + + + + + + + + + +{/if} diff --git a/web/frontend/src/Systems.root.svelte b/web/frontend/src/Systems.root.svelte index 3c46adc..079f993 100644 --- a/web/frontend/src/Systems.root.svelte +++ b/web/frontend/src/Systems.root.svelte @@ -59,6 +59,7 @@ const resampleResolutions = resampleConfig ? [...resampleConfig.resolutions] : []; const resampleDefault = resampleConfig ? Math.max(...resampleConfig.resolutions) : 0; + const stateOptions = ['all', 'allocated', 'idle', 'reserved', 'mixed', 'down', 'unknown', 'notindb']; const nowDate = new Date(Date.now()); /* Var Init */ @@ -69,6 +70,7 @@ let from = $state(presetFrom || new Date(nowDate.setHours(nowDate.getHours() - 4))); let selectedResolution = $state(resampleConfig ? resampleDefault : 0); let hostnameFilter = $state(""); + let hoststateFilter = $state("all"); let pendingHostnameFilter = $state(""); let isMetricsSelectionOpen = $state(false); @@ -154,7 +156,7 @@ - + {#if $initq.data} {#if !displayNodeOverview} @@ -191,7 +193,7 @@ - Find Node(s) + Node(s) + + + + + State + + {#each stateOptions as so} + + {/each} + + + - + {:else} - + {/if} {/if} diff --git a/web/frontend/src/User.root.svelte b/web/frontend/src/User.root.svelte index f435846..3c59927 100644 --- a/web/frontend/src/User.root.svelte +++ b/web/frontend/src/User.root.svelte @@ -133,6 +133,11 @@ }; }); + $effect(() => { + // Load Metric-Selection for last selected cluster + metrics = selectedCluster ? ccconfig[`metricConfig_jobListMetrics:${selectedCluster}`] : ccconfig.metricConfig_jobListMetrics + }); + /* On Mount */ onMount(() => { filterComponent.updateFilters(); @@ -348,6 +353,7 @@ ylabel="Number of Jobs" yunit="Jobs" usesBins + enableFlip /> {/snippet} diff --git a/web/frontend/src/generic/helper/Refresher.svelte b/web/frontend/src/generic/helper/Refresher.svelte index bfa58dd..7f568bf 100644 --- a/web/frontend/src/generic/helper/Refresher.svelte +++ b/web/frontend/src/generic/helper/Refresher.svelte @@ -27,7 +27,7 @@ function refreshIntervalChanged() { if (refreshIntervalId != null) clearInterval(refreshIntervalId); if (refreshInterval == null) return; - refreshIntervalId = setInterval(() => onRefresh(), refreshInterval); + refreshIntervalId = setInterval(() => onRefresh(refreshInterval), refreshInterval); } /* Svelte 5 onMount */ @@ -51,7 +51,7 @@ - {#if $initq.fetching || $metricStatusQuery.fetching} - - {:else if $initq.error} - {$initq.error.message} - {:else} - - {/if} - - -{#if $metricStatusQuery.error} - + {#if $metricStatusQuery.fetching} + + + {:else if $metricStatusQuery.error} + {$metricStatusQuery.error.message} - -{/if} + {/if} + -{#if $initq.data && $metricStatusQuery.data} +{#if $metricStatusQuery.data} {#if selectedHistograms} @@ -132,6 +123,7 @@ ylabel="Number of Jobs" yunit="Jobs" usesBins + enableFlip /> {/snippet} diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index 758c563..03a8cc4 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -22,34 +22,36 @@ gql, getContextClient, } from "@urql/svelte"; - import { - init, - } from "../generic/utils.js"; - import { scaleNumbers, formatDurationTime } from "../generic/units.js"; + import { formatDurationTime } from "../generic/units.js"; import Refresher from "../generic/helper/Refresher.svelte"; + import TimeSelection from "../generic/select/TimeSelection.svelte"; import Roofline from "../generic/plots/Roofline.svelte"; import Pie, { colors } from "../generic/plots/Pie.svelte"; + import Stacked from "../generic/plots/Stacked.svelte"; /* Svelte 5 Props */ let { + clusters, presetCluster, useCbColors = false, useAltColors = false, } = $props(); /* Const Init */ - const { query: initq } = init(); const client = getContextClient(); /* State Init */ let cluster = $state(presetCluster); let pieWidth = $state(0); - let stackedWidth = $state(0); + let stackedWidth1 = $state(0); + let stackedWidth2 = $state(0); let plotWidths = $state([]); let from = $state(new Date(Date.now() - 5 * 60 * 1000)); let to = $state(new Date(Date.now())); + let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400); // Bar Gauges let allocatedNodes = $state({}); + let allocatedCores = $state({}); let allocatedAccs = $state({}); let flopRate = $state({}); let flopRateUnitPrefix = $state({}); @@ -63,46 +65,29 @@ let totalAccs = $state({}); /* Derived */ - // Accumulated NodeStates for Piecharts - const nodesStateCounts = $derived(queryStore({ + // States for Stacked charts + const statesTimed = $derived(queryStore({ client: client, query: gql` - query ($filter: [NodeFilter!]) { - nodeStates(filter: $filter) { + query ($filter: [NodeFilter!], $typeNode: String!, $typeHealth: String!) { + nodeStates: nodeStatesTimed(filter: $filter, type: $typeNode) { state - count + counts + times + } + healthStates: nodeStatesTimed(filter: $filter, type: $typeHealth) { + state + counts + times } } `, variables: { - filter: { cluster: { eq: cluster }} - }, - })); - - const refinedStateData = $derived.by(() => { - return $nodesStateCounts?.data?.nodeStates.filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)) - }); - - const refinedHealthData = $derived.by(() => { - return $nodesStateCounts?.data?.nodeStates.filter((e) => ['full', 'partial', 'failed'].includes(e.state)) - }); - - // NodeStates for Stacked charts - const nodesStateTimes = $derived(queryStore({ - client: client, - query: gql` - query ($filter: [NodeFilter!]) { - nodeStatesTimed(filter: $filter) { - state - type - count - time - } - } - `, - variables: { - filter: { cluster: { eq: cluster }, timeStart: Date.now() - (24 * 3600 * 1000)} // Add Selector for Timeframe (4h, 12h, 24h)? + filter: { cluster: { eq: cluster }, timeStart: stackedFrom}, + typeNode: "node", + typeHealth: "health" }, + requestPolicy: "network-only" })); // Note: nodeMetrics are requested on configured $timestep resolution @@ -177,9 +162,14 @@ hostname cluster subCluster - nodeState + schedulerState } } + # Get Current States fir Pie Charts + nodeStates(filter: $nodeFilter) { + state + count + } # totalNodes includes multiples if shared jobs jobsStatistics( filter: $jobFilter @@ -190,6 +180,7 @@ id totalJobs totalUsers + totalCores totalAccs } } @@ -207,10 +198,22 @@ requestPolicy: "network-only" })); + const refinedStateData = $derived.by(() => { + return $statusQuery?.data?.nodeStates. + filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)). + sort((a, b) => b.count - a.count) + }); + + const refinedHealthData = $derived.by(() => { + return $statusQuery?.data?.nodeStates. + filter((e) => ['full', 'partial', 'failed'].includes(e.state)). + sort((a, b) => b.count - a.count) + }); + /* Effects */ $effect(() => { - if ($initq.data && $statusQuery.data) { - let subClusters = $initq.data.clusters.find( + if ($statusQuery.data) { + let subClusters = clusters.find( (c) => c.name == cluster, ).subClusters; for (let subCluster of subClusters) { @@ -219,6 +222,10 @@ $statusQuery.data.allocatedNodes.find( ({ name }) => name == subCluster.name, )?.count || 0; + allocatedCores[subCluster.name] = + $statusQuery.data.jobsStatistics.find( + ({ id }) => id == subCluster.name, + )?.totalCores || 0; allocatedAccs[subCluster.name] = $statusQuery.data.jobsStatistics.find( ({ id }) => id == subCluster.name, @@ -348,7 +355,7 @@ for (let j = 0; j < subClusterData.length; j++) { const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown" const nodeMatch = $statusQuery?.data?.nodes?.items?.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster); - const nodeState = nodeMatch?.nodeState ? nodeMatch.nodeState : "notindb" + const schedulerState = nodeMatch?.schedulerState ? nodeMatch.schedulerState : "notindb" let numJobs = 0 if ($statusQuery?.data) { @@ -356,7 +363,7 @@ numJobs = nodeJobs?.length ? nodeJobs.length : 0 } - result.push({nodeName: nodeName, nodeState: nodeState, numJobs: numJobs}) + result.push({nodeName: nodeName, schedulerState: schedulerState, numJobs: numJobs}) }; }; return result @@ -378,14 +385,24 @@ - + + + { + stackedFrom = Math.floor(newFrom.getTime() / 1000); + }} + /> + { - console.log('Trigger Refresh StatusTab') + onRefresh={(interval) => { from = new Date(Date.now() - 5 * 60 * 1000); to = new Date(Date.now()); + + if (interval) stackedFrom += Math.floor(interval / 1000); + else stackedFrom += 1 // Workaround: TineSelection not linked, just trigger new data on manual refresh }} /> @@ -394,43 +411,40 @@
- -{#if $initq.data && $nodesStateCounts.data} +{#if $statusQuery?.data?.nodeStates}
{#key refinedStateData}

- {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States + Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States

{#key refinedHealthData}

- {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health + Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health

-{#if $initq.data && $statusQuery.data} - {#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i} +{#if $statusQuery.data} + {#each clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
@@ -579,6 +591,21 @@ Nodes + + + + + {#if totalAccs[subCluster.name] !== null} diff --git a/web/frontend/src/status/UsageDash.svelte b/web/frontend/src/status/UsageDash.svelte index 3b39e55..74dd7a9 100644 --- a/web/frontend/src/status/UsageDash.svelte +++ b/web/frontend/src/status/UsageDash.svelte @@ -24,7 +24,6 @@ getContextClient, } from "@urql/svelte"; import { - init, scramble, scrambleNames, convert2uplot, @@ -41,7 +40,6 @@ } = $props(); /* Const Init */ - const { query: initq } = init(); const client = getContextClient(); const durationBinOptions = ["1m","10m","1h","6h","12h"]; @@ -255,6 +253,7 @@ height="275" usesBins xtime + enableFlip /> {/key} @@ -359,6 +358,7 @@ ylabel="Number of Jobs" yunit="Jobs" height="275" + enableFlip /> @@ -462,6 +462,7 @@ ylabel="Number of Jobs" yunit="Jobs" height="275" + enableFlip /> diff --git a/web/frontend/src/systems/NodeList.svelte b/web/frontend/src/systems/NodeList.svelte index a4cec02..66bedf2 100644 --- a/web/frontend/src/systems/NodeList.svelte +++ b/web/frontend/src/systems/NodeList.svelte @@ -8,6 +8,7 @@ - `selectedMetrics [String]`: The array of selected metrics [Default []] - `selectedResolution Number?`: The selected data resolution [Default: 0] - `hostnameFilter String?`: The active hostnamefilter [Default: ""] + - `hoststateFilter String?`: The active hoststatefilter [Default: ""] - `presetSystemUnits Object`: The object of metric units [Default: null] - `from Date?`: The selected "from" date [Default: null] - `to Date?`: The selected "to" date [Default: null] @@ -28,6 +29,7 @@ selectedMetrics = [], selectedResolution = 0, hostnameFilter = "", + hoststateFilter = "", presetSystemUnits = null, from = null, to = null @@ -37,11 +39,14 @@ const client = getContextClient(); const usePaging = ccconfig?.nodeList_usePaging || false; const nodeListQuery = gql` - query ($cluster: String!, $subCluster: String!, $nodeFilter: String!, $metrics: [String!], $scopes: [MetricScope!]!, $from: Time!, $to: Time!, $paging: PageRequest!, $selectedResolution: Int) { + query ($cluster: String!, $subCluster: String!, $nodeFilter: String!, $stateFilter: String!, $metrics: [String!], + $scopes: [MetricScope!]!, $from: Time!, $to: Time!, $paging: PageRequest!, $selectedResolution: Int + ) { nodeMetricsList( cluster: $cluster subCluster: $subCluster nodeFilter: $nodeFilter + stateFilter: $stateFilter, scopes: $scopes metrics: $metrics from: $from @@ -51,6 +56,7 @@ ) { items { host + state subCluster metrics { name @@ -100,6 +106,7 @@ variables: { cluster: cluster, subCluster: subCluster, + stateFilter: hoststateFilter, nodeFilter: hostnameFilter, scopes: ["core", "socket", "accelerator"], metrics: selectedMetrics, @@ -137,7 +144,7 @@ // Triggers (Except Paging) from, to selectedMetrics, selectedResolution - hostnameFilter + hostnameFilter, hoststateFilter // Continous Scroll: Reset nodes and paging if parameters change: Existing entries will not match new selections if (!usePaging) { nodes = []; diff --git a/web/frontend/src/systems/NodeOverview.svelte b/web/frontend/src/systems/NodeOverview.svelte index 366f705..6e89398 100644 --- a/web/frontend/src/systems/NodeOverview.svelte +++ b/web/frontend/src/systems/NodeOverview.svelte @@ -6,6 +6,7 @@ - `cluster String`: The cluster to show status information for - `selectedMetric String?`: The selectedMetric input [Default: ""] - `hostnameFilter String?`: The active hostnamefilter [Default: ""] + - `hostnameFilter String?`: The active hoststatefilter [Default: ""] - `from Date?`: The selected "from" date [Default: null] - `to Date?`: The selected "to" date [Default: null] --> @@ -13,7 +14,7 @@
Allocated Cores
+ +
{allocatedCores[subCluster.name]} / {subCluster.socketsPerNode * subCluster.coresPerSocket * subCluster.numberOfNodes} + Cores
Allocated Accelerators