From 714d6af7cd5f72153b7503c73d48de379562082a Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 17 Oct 2025 18:24:05 +0200 Subject: [PATCH] initial branch commit, improve countstate backend logic - stacked component rough sketch - gql data request pipeline layed out --- api/schema.graphqls | 9 + internal/graph/generated/generated.go | 449 +++++++++++++- internal/graph/model/models_gen.go | 8 + internal/graph/schema.resolvers.go | 55 +- internal/repository/node.go | 119 ++-- web/frontend/src/generic/JobCompare.svelte | 4 +- .../src/generic/plots/Comparogram.svelte | 4 +- .../src/generic/plots/Histogram.svelte | 10 +- .../src/generic/plots/MetricPlot.svelte | 10 +- web/frontend/src/generic/plots/Stacked.svelte | 570 ++++++++++++++++++ web/frontend/src/generic/units.js | 12 +- web/frontend/src/status/StatusDash.svelte | 72 ++- 12 files changed, 1267 insertions(+), 55 deletions(-) create mode 100644 web/frontend/src/generic/plots/Stacked.svelte diff --git a/api/schema.graphqls b/api/schema.graphqls index 8e32c64..4ee573c 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -26,6 +26,13 @@ type NodeStates { count: Int! } +type NodeStatesTimed { + state: String! + type: String! + count: Int! + time: Int! +} + type Job { id: ID! jobId: Int! @@ -310,6 +317,7 @@ type Query { node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! nodeStates(filter: [NodeFilter!]): [NodeStates!]! + nodeStatesTimed(filter: [NodeFilter!]): [NodeStatesTimed!]! job(id: ID!): Job jobMetrics( @@ -404,6 +412,7 @@ input NodeFilter { subcluster: StringInput schedulerState: SchedulerState healthState: MonitoringState + timeStart: Int } input JobFilter { diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index 9c47a39..e1baf4c 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -301,6 +301,13 @@ type ComplexityRoot struct { State func(childComplexity int) int } + NodeStatesTimed struct { + Count func(childComplexity int) int + State func(childComplexity int) int + Time func(childComplexity int) int + Type func(childComplexity int) int + } + NodesResultList struct { Count func(childComplexity int) int HasNextPage func(childComplexity int) int @@ -325,6 +332,7 @@ type ComplexityRoot struct { NodeMetrics func(childComplexity int, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int NodeMetricsList func(childComplexity int, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) int NodeStates func(childComplexity int, filter []*model.NodeFilter) int + NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter) int Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int ScopedJobStats func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int @@ -465,6 +473,7 @@ type QueryResolver interface { Node(ctx context.Context, id string) (*schema.Node, error) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) + NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStatesTimed, error) Job(ctx context.Context, id string) (*schema.Job, error) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error) @@ -1608,6 +1617,34 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin return e.complexity.NodeStates.State(childComplexity), true + case "NodeStatesTimed.count": + if e.complexity.NodeStatesTimed.Count == nil { + break + } + + return e.complexity.NodeStatesTimed.Count(childComplexity), true + + case "NodeStatesTimed.state": + if e.complexity.NodeStatesTimed.State == nil { + break + } + + return e.complexity.NodeStatesTimed.State(childComplexity), true + + case "NodeStatesTimed.time": + if e.complexity.NodeStatesTimed.Time == nil { + break + } + + return e.complexity.NodeStatesTimed.Time(childComplexity), true + + case "NodeStatesTimed.type": + if e.complexity.NodeStatesTimed.Type == nil { + break + } + + return e.complexity.NodeStatesTimed.Type(childComplexity), true + case "NodesResultList.count": if e.complexity.NodesResultList.Count == nil { break @@ -1808,6 +1845,18 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin return e.complexity.Query.NodeStates(childComplexity, args["filter"].([]*model.NodeFilter)), true + case "Query.nodeStatesTimed": + if e.complexity.Query.NodeStatesTimed == nil { + break + } + + args, err := ec.field_Query_nodeStatesTimed_args(ctx, rawArgs) + if err != nil { + return 0, false + } + + return e.complexity.Query.NodeStatesTimed(childComplexity, args["filter"].([]*model.NodeFilter)), true + case "Query.nodes": if e.complexity.Query.Nodes == nil { break @@ -2390,6 +2439,13 @@ type NodeStates { count: Int! } +type NodeStatesTimed { + state: String! + type: String! + count: Int! + time: Int! +} + type Job { id: ID! jobId: Int! @@ -2674,6 +2730,7 @@ type Query { node(id: ID!): Node nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! nodeStates(filter: [NodeFilter!]): [NodeStates!]! + nodeStatesTimed(filter: [NodeFilter!]): [NodeStatesTimed!]! job(id: ID!): Job jobMetrics( @@ -2768,6 +2825,7 @@ input NodeFilter { subcluster: StringInput schedulerState: SchedulerState healthState: MonitoringState + timeStart: Int } input JobFilter { @@ -3249,6 +3307,17 @@ func (ec *executionContext) field_Query_nodeMetrics_args(ctx context.Context, ra return args, nil } +func (ec *executionContext) field_Query_nodeStatesTimed_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { + var err error + args := map[string]any{} + arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalONodeFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeFilterᚄ) + if err != nil { + return nil, err + } + args["filter"] = arg0 + return args, nil +} + func (ec *executionContext) field_Query_nodeStates_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { var err error args := map[string]any{} @@ -10517,6 +10586,182 @@ func (ec *executionContext) fieldContext_NodeStates_count(_ context.Context, fie return fc, nil } +func (ec *executionContext) _NodeStatesTimed_state(ctx context.Context, field graphql.CollectedField, obj *model.NodeStatesTimed) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeStatesTimed_state(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.State, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNString2string(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_NodeStatesTimed_state(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "NodeStatesTimed", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type String does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _NodeStatesTimed_type(ctx context.Context, field graphql.CollectedField, obj *model.NodeStatesTimed) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeStatesTimed_type(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.Type, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(string) + fc.Result = res + return ec.marshalNString2string(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_NodeStatesTimed_type(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "NodeStatesTimed", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type String does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _NodeStatesTimed_count(ctx context.Context, field graphql.CollectedField, obj *model.NodeStatesTimed) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeStatesTimed_count(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.Count, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(int) + fc.Result = res + return ec.marshalNInt2int(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_NodeStatesTimed_count(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "NodeStatesTimed", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Int does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _NodeStatesTimed_time(ctx context.Context, field graphql.CollectedField, obj *model.NodeStatesTimed) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_NodeStatesTimed_time(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return obj.Time, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.(int) + fc.Result = res + return ec.marshalNInt2int(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_NodeStatesTimed_time(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "NodeStatesTimed", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Int does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _NodesResultList_items(ctx context.Context, field graphql.CollectedField, obj *model.NodesResultList) (ret graphql.Marshaler) { fc, err := ec.fieldContext_NodesResultList_items(ctx, field) if err != nil { @@ -11255,6 +11500,71 @@ func (ec *executionContext) fieldContext_Query_nodeStates(ctx context.Context, f return fc, nil } +func (ec *executionContext) _Query_nodeStatesTimed(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Query_nodeStatesTimed(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) { + ctx = rctx // use context from middleware stack in children + return ec.resolvers.Query().NodeStatesTimed(rctx, fc.Args["filter"].([]*model.NodeFilter)) + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.([]*model.NodeStatesTimed) + fc.Result = res + return ec.marshalNNodeStatesTimed2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesTimedᚄ(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Query_nodeStatesTimed(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Query", + Field: field, + IsMethod: true, + IsResolver: true, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + switch field.Name { + case "state": + return ec.fieldContext_NodeStatesTimed_state(ctx, field) + case "type": + return ec.fieldContext_NodeStatesTimed_type(ctx, field) + case "count": + return ec.fieldContext_NodeStatesTimed_count(ctx, field) + case "time": + return ec.fieldContext_NodeStatesTimed_time(ctx, field) + } + return nil, fmt.Errorf("no field named %q was found under type NodeStatesTimed", field.Name) + }, + } + defer func() { + if r := recover(); r != nil { + err = ec.Recover(ctx, r) + ec.Error(ctx, err) + } + }() + ctx = graphql.WithFieldContext(ctx, fc) + if fc.Args, err = ec.field_Query_nodeStatesTimed_args(ctx, field.ArgumentMap(ec.Variables)); err != nil { + ec.Error(ctx, err) + return fc, err + } + return fc, nil +} + func (ec *executionContext) _Query_job(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { fc, err := ec.fieldContext_Query_job(ctx, field) if err != nil { @@ -16830,7 +17140,7 @@ func (ec *executionContext) unmarshalInputNodeFilter(ctx context.Context, obj an asMap[k] = v } - fieldsInOrder := [...]string{"hostname", "cluster", "subcluster", "schedulerState", "healthState"} + fieldsInOrder := [...]string{"hostname", "cluster", "subcluster", "schedulerState", "healthState", "timeStart"} for _, k := range fieldsInOrder { v, ok := asMap[k] if !ok { @@ -16872,6 +17182,13 @@ func (ec *executionContext) unmarshalInputNodeFilter(ctx context.Context, obj an return it, err } it.HealthState = data + case "timeStart": + ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("timeStart")) + data, err := ec.unmarshalOInt2ᚖint(ctx, v) + if err != nil { + return it, err + } + it.TimeStart = data } } @@ -19222,6 +19539,60 @@ func (ec *executionContext) _NodeStates(ctx context.Context, sel ast.SelectionSe return out } +var nodeStatesTimedImplementors = []string{"NodeStatesTimed"} + +func (ec *executionContext) _NodeStatesTimed(ctx context.Context, sel ast.SelectionSet, obj *model.NodeStatesTimed) graphql.Marshaler { + fields := graphql.CollectFields(ec.OperationContext, sel, nodeStatesTimedImplementors) + + out := graphql.NewFieldSet(fields) + deferred := make(map[string]*graphql.FieldSet) + for i, field := range fields { + switch field.Name { + case "__typename": + out.Values[i] = graphql.MarshalString("NodeStatesTimed") + case "state": + out.Values[i] = ec._NodeStatesTimed_state(ctx, field, obj) + if out.Values[i] == graphql.Null { + out.Invalids++ + } + case "type": + out.Values[i] = ec._NodeStatesTimed_type(ctx, field, obj) + if out.Values[i] == graphql.Null { + out.Invalids++ + } + case "count": + out.Values[i] = ec._NodeStatesTimed_count(ctx, field, obj) + if out.Values[i] == graphql.Null { + out.Invalids++ + } + case "time": + out.Values[i] = ec._NodeStatesTimed_time(ctx, field, obj) + if out.Values[i] == graphql.Null { + out.Invalids++ + } + default: + panic("unknown field " + strconv.Quote(field.Name)) + } + } + out.Dispatch(ctx) + if out.Invalids > 0 { + return graphql.Null + } + + atomic.AddInt32(&ec.deferred, int32(len(deferred))) + + for label, dfs := range deferred { + ec.processDeferredGroup(graphql.DeferredGroup{ + Label: label, + Path: graphql.GetPath(ctx), + FieldSet: dfs, + Context: ctx, + }) + } + + return out +} + var nodesResultListImplementors = []string{"NodesResultList"} func (ec *executionContext) _NodesResultList(ctx context.Context, sel ast.SelectionSet, obj *model.NodesResultList) graphql.Marshaler { @@ -19459,6 +19830,28 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) } + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) }) + case "nodeStatesTimed": + field := field + + innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + } + }() + res = ec._Query_nodeStatesTimed(ctx, field) + if res == graphql.Null { + atomic.AddUint32(&fs.Invalids, 1) + } + return res + } + + rrm := func(ctx context.Context) graphql.Marshaler { + return ec.OperationContext.RootResolverMiddleware(ctx, + func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) + } + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) }) case "job": field := field @@ -22128,6 +22521,60 @@ func (ec *executionContext) marshalNNodeStates2ᚖgithubᚗcomᚋClusterCockpit return ec._NodeStates(ctx, sel, v) } +func (ec *executionContext) marshalNNodeStatesTimed2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesTimedᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.NodeStatesTimed) graphql.Marshaler { + ret := make(graphql.Array, len(v)) + var wg sync.WaitGroup + isLen1 := len(v) == 1 + if !isLen1 { + wg.Add(len(v)) + } + for i := range v { + i := i + fc := &graphql.FieldContext{ + Index: &i, + Result: &v[i], + } + ctx := graphql.WithFieldContext(ctx, fc) + f := func(i int) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = nil + } + }() + if !isLen1 { + defer wg.Done() + } + ret[i] = ec.marshalNNodeStatesTimed2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesTimed(ctx, sel, v[i]) + } + if isLen1 { + f(i) + } else { + go f(i) + } + + } + wg.Wait() + + for _, e := range ret { + if e == graphql.Null { + return graphql.Null + } + } + + return ret +} + +func (ec *executionContext) marshalNNodeStatesTimed2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesTimed(ctx context.Context, sel ast.SelectionSet, v *model.NodeStatesTimed) graphql.Marshaler { + if v == nil { + if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { + ec.Errorf(ctx, "the requested element is null which the schema does not allow") + } + return graphql.Null + } + return ec._NodeStatesTimed(ctx, sel, v) +} + func (ec *executionContext) marshalNNodesResultList2githubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodesResultList(ctx context.Context, sel ast.SelectionSet, v model.NodesResultList) graphql.Marshaler { return ec._NodesResultList(ctx, sel, &v) } diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index 1834eb9..7b64464 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -176,6 +176,7 @@ type NodeFilter struct { Subcluster *StringInput `json:"subcluster,omitempty"` SchedulerState *schema.SchedulerState `json:"schedulerState,omitempty"` HealthState *string `json:"healthState,omitempty"` + TimeStart *int `json:"timeStart,omitempty"` } type NodeMetrics struct { @@ -194,6 +195,13 @@ type NodeStates struct { Count int `json:"count"` } +type NodeStatesTimed struct { + State string `json:"state"` + Type string `json:"type"` + Count int `json:"count"` + Time int `json:"time"` +} + type NodesResultList struct { Items []*NodeMetrics `json:"items"` Offset *int `json:"offset,omitempty"` diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index e65fcdf..15bc6df 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -366,17 +366,66 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]* // Node is the resolver for the node field. func (r *queryResolver) Node(ctx context.Context, id string) (*schema.Node, error) { - panic(fmt.Errorf("not implemented: Node - node")) + repo := repository.GetNodeRepository() + numericId, err := strconv.ParseInt(id, 10, 64) + if err != nil { + cclog.Warn("Error while parsing job id") + return nil, err + } + return repo.GetNodeById(numericId, false) } // Nodes is the resolver for the nodes field. func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) { - panic(fmt.Errorf("not implemented: Nodes - nodes")) + repo := repository.GetNodeRepository() + nodes, err := repo.QueryNodes(ctx, filter, order) + count := len(nodes) + return &model.NodeStateResultList{Items: nodes, Count: &count}, err } // NodeStates is the resolver for the nodeStates field. func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) { - panic(fmt.Errorf("not implemented: NodeStates - nodeStates")) + repo := repository.GetNodeRepository() + + stateCounts, serr := repo.CountNodeStates(ctx, filter) + if serr != nil { + cclog.Warnf("Error while counting nodeStates: %s", serr.Error()) + return nil, serr + } + + healthCounts, herr := repo.CountHealthStates(ctx, filter) + if herr != nil { + cclog.Warnf("Error while counting healthStates: %s", herr.Error()) + return nil, herr + } + + allCounts := make([]*model.NodeStates, 0) + allCounts = append(stateCounts, healthCounts...) + + return allCounts, nil +} + +// NodeStatesTimed is the resolver for the nodeStatesTimed field. +func (r *queryResolver) NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStatesTimed, error) { + panic(fmt.Errorf("not implemented: NodeStatesTimed - NodeStatesTimed")) + // repo := repository.GetNodeRepository() + + // stateCounts, serr := repo.CountNodeStates(ctx, filter) + // if serr != nil { + // cclog.Warnf("Error while counting nodeStates: %s", serr.Error()) + // return nil, serr + // } + + // healthCounts, herr := repo.CountHealthStates(ctx, filter) + // if herr != nil { + // cclog.Warnf("Error while counting healthStates: %s", herr.Error()) + // return nil, herr + // } + + // allCounts := make([]*model.NodeStates, 0) + // allCounts = append(stateCounts, healthCounts...) + + // return allCounts, nil } // Job is the resolver for the job field. diff --git a/internal/repository/node.go b/internal/repository/node.go index 91a37c6..c3152f4 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -117,9 +117,13 @@ func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[str func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) (*schema.Node, error) { node := &schema.Node{} if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", - "node_state.health_state", "MAX(node_state.time_stamp)").From("node_state"). - Join("node ON nodes_state.node_id = node.id").GroupBy("node_state.node_id"). - Where("node.hostname = ?", hostname).Where("node.cluster = ?", cluster).RunWith(r.DB). + "node_state.health_state", "MAX(node_state.time_stamp) as time"). + From("node_state"). + Join("node ON nodes_state.node_id = node.id"). + Where("node.hostname = ?", hostname). + Where("node.cluster = ?", cluster). + GroupBy("node_state.node_id"). + RunWith(r.DB). QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil { cclog.Warnf("Error while querying node '%s' from database: %v", hostname, err) return nil, err @@ -138,6 +142,34 @@ func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) return node, nil } +func (r *NodeRepository) GetNodeById(id int64, withMeta bool) (*schema.Node, error) { + node := &schema.Node{} + if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", + "node_state.health_state", "MAX(node_state.time_stamp) as time"). + From("node_state"). + Join("node ON nodes_state.node_id = node.id"). + Where("node.id = ?", id). + GroupBy("node_state.node_id"). + RunWith(r.DB). + QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil { + cclog.Warnf("Error while querying node ID '%d' from database: %v", id, err) + return nil, err + } + + // NEEDS METADATA BY ID + // if withMeta { + // var err error + // var meta map[string]string + // if meta, err = r.FetchMetadata(hostname, cluster); err != nil { + // cclog.Warnf("Error while fetching metadata for node '%s'", hostname) + // return nil, err + // } + // node.MetaData = meta + // } + + return node, nil +} + // const NamedNodeInsert string = ` // INSERT INTO node (time_stamp, hostname, cluster, subcluster, node_state, health_state, // @@ -244,8 +276,9 @@ func (r *NodeRepository) QueryNodes( ) ([]*schema.Node, error) { query, qerr := AccessCheck(ctx, sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", - "node_state.health_state", "MAX(node_state.time_stamp)").From("node"). - Join("node_state ON nodes_state.node_id = node.id").GroupBy("node_state.node_id")) + "node_state.health_state", "MAX(node_state.time_stamp) as time"). + From("node"). + Join("node_state ON nodes_state.node_id = node.id")) if qerr != nil { return nil, qerr } @@ -268,6 +301,9 @@ func (r *NodeRepository) QueryNodes( } } + // Add Grouping after filters + query = query.GroupBy("node_state.node_id") + rows, err := query.RunWith(r.stmtCache).Query() if err != nil { queryString, queryVars, _ := query.ToSql() @@ -293,9 +329,12 @@ func (r *NodeRepository) QueryNodes( func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) { q := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state", - "node_state.health_state", "MAX(node_state.time_stamp)").From("node"). - Join("node_state ON node_state.node_id = node.id").GroupBy("node_state.node_id"). - Where("node.cluster = ?", cluster).OrderBy("node.hostname ASC") + "node_state.health_state", "MAX(node_state.time_stamp) as time"). + From("node"). + Join("node_state ON node_state.node_id = node.id"). + Where("node.cluster = ?", cluster). + GroupBy("node_state.node_id"). + OrderBy("node.hostname ASC") rows, err := q.RunWith(r.DB).Query() if err != nil { @@ -319,35 +358,33 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) { } func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) { - query, qerr := AccessCheck(ctx, sq.Select("node_state", "count(*) AS count").From("node")) + query, qerr := AccessCheck(ctx, sq.Select("hostname", "node_state", "MAX(time_stamp) as time").From("node")) if qerr != nil { return nil, qerr } - // Get latest Info aka closest Timestamp to $now - now := time.Now().Unix() - query = query.Join("node_state ON node_state.node_id = node.id").Where(sq.Gt{"node_state.time_stamp": (now - 60)}) // .Distinct() + query = query.Join("node_state ON node_state.node_id = node.id") for _, f := range filters { if f.Hostname != nil { - query = buildStringCondition("node.hostname", f.Hostname, query) + query = buildStringCondition("hostname", f.Hostname, query) } if f.Cluster != nil { - query = buildStringCondition("node.cluster", f.Cluster, query) + query = buildStringCondition("cluster", f.Cluster, query) } if f.Subcluster != nil { - query = buildStringCondition("node.subcluster", f.Subcluster, query) + query = buildStringCondition("subcluster", f.Subcluster, query) } if f.SchedulerState != nil { - query = query.Where("node.node_state = ?", f.SchedulerState) + query = query.Where("node_state = ?", f.SchedulerState) } if f.HealthState != nil { - query = query.Where("node.health_state = ?", f.HealthState) + query = query.Where("health_state = ?", f.HealthState) } } // Add Group and Order - query = query.GroupBy("node_state").OrderBy("count DESC") + query = query.GroupBy("hostname").OrderBy("hostname DESC") rows, err := query.RunWith(r.stmtCache).Query() if err != nil { @@ -356,15 +393,23 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N return nil, err } - nodes := make([]*model.NodeStates, 0) + stateMap := map[string]int{} for rows.Next() { - node := model.NodeStates{} + var hostname, node_state string + var timestamp int64 - if err := rows.Scan(&node.State, &node.Count); err != nil { + if err := rows.Scan(&hostname, &node_state, ×tamp); err != nil { rows.Close() cclog.Warn("Error while scanning rows (NodeStates)") return nil, err } + + stateMap[node_state] += 1 + } + + nodes := make([]*model.NodeStates, 0) + for state, counts := range stateMap { + node := model.NodeStates{State: state, Count: counts} nodes = append(nodes, &node) } @@ -372,35 +417,33 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N } func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) { - query, qerr := AccessCheck(ctx, sq.Select("health_state", "count(*) AS count").From("node")) + query, qerr := AccessCheck(ctx, sq.Select("hostname", "health_state", "MAX(time_stamp) as time").From("node")) if qerr != nil { return nil, qerr } - // Get latest Info aka closest Timestamp to $now - now := time.Now().Unix() - query = query.Join("node_state ON node_state.node_id = node.id").Where(sq.Gt{"node_state.time_stamp": (now - 60)}) // .Distinct() + query = query.Join("node_state ON node_state.node_id = node.id") for _, f := range filters { if f.Hostname != nil { - query = buildStringCondition("node.hostname", f.Hostname, query) + query = buildStringCondition("hostname", f.Hostname, query) } if f.Cluster != nil { - query = buildStringCondition("node.cluster", f.Cluster, query) + query = buildStringCondition("cluster", f.Cluster, query) } if f.Subcluster != nil { - query = buildStringCondition("node.subcluster", f.Subcluster, query) + query = buildStringCondition("subcluster", f.Subcluster, query) } if f.SchedulerState != nil { - query = query.Where("node.node_state = ?", f.SchedulerState) + query = query.Where("node_state = ?", f.SchedulerState) } if f.HealthState != nil { - query = query.Where("node.health_state = ?", f.HealthState) + query = query.Where("health_state = ?", f.HealthState) } } // Add Group and Order - query = query.GroupBy("health_state").OrderBy("count DESC") + query = query.GroupBy("hostname").OrderBy("hostname DESC") rows, err := query.RunWith(r.stmtCache).Query() if err != nil { @@ -409,15 +452,23 @@ func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model return nil, err } - nodes := make([]*model.NodeStates, 0) + stateMap := map[string]int{} for rows.Next() { - node := model.NodeStates{} + var hostname, health_state string + var timestamp int64 - if err := rows.Scan(&node.State, &node.Count); err != nil { + if err := rows.Scan(&hostname, &health_state, ×tamp); err != nil { rows.Close() cclog.Warn("Error while scanning rows (NodeStates)") return nil, err } + + stateMap[health_state] += 1 + } + + nodes := make([]*model.NodeStates, 0) + for state, counts := range stateMap { + node := model.NodeStates{State: state, Count: counts} nodes = append(nodes, &node) } diff --git a/web/frontend/src/generic/JobCompare.svelte b/web/frontend/src/generic/JobCompare.svelte index a504e98..dea324f 100644 --- a/web/frontend/src/generic/JobCompare.svelte +++ b/web/frontend/src/generic/JobCompare.svelte @@ -20,7 +20,7 @@ // mutationStore, } from "@urql/svelte"; import { Row, Col, Card, Spinner, Table, Input, InputGroup, InputGroupText, Icon } from "@sveltestrap/sveltestrap"; - import { formatTime, roundTwoDigits } from "./units.js"; + import { formatDurationTime, roundTwoDigits } from "./units.js"; import Comparogram from "./plots/Comparogram.svelte"; /* Svelte 5 Props */ @@ -373,7 +373,7 @@ {job.jobId} {new Date(job.startTime * 1000).toLocaleString()} - {formatTime(job.duration)} + {formatDurationTime(job.duration)} {job.cluster} ({job.subCluster}) {job.numNodes} {job.numHWThreads} diff --git a/web/frontend/src/generic/plots/Comparogram.svelte b/web/frontend/src/generic/plots/Comparogram.svelte index 5f50475..2a37417 100644 --- a/web/frontend/src/generic/plots/Comparogram.svelte +++ b/web/frontend/src/generic/plots/Comparogram.svelte @@ -20,7 +20,7 @@ + + +{#if data && data[0].length > 0} +
+{:else} + Cannot render plot: No series data returned for {metric?metric:'job resources'} +{/if} diff --git a/web/frontend/src/generic/units.js b/web/frontend/src/generic/units.js index 9193ff8..dbf220a 100644 --- a/web/frontend/src/generic/units.js +++ b/web/frontend/src/generic/units.js @@ -35,7 +35,7 @@ export function scaleNumbers(x, y , p = '') { return Math.abs(rawYValue) >= 1000 ? `${rawXValue.toExponential()} / ${rawYValue.toExponential()}` : `${rawYValue.toString()} / ${rawYValue.toString()}` } -export function formatTime(t, forNode = false) { +export function formatDurationTime(t, forNode = false) { if (t !== null) { if (isNaN(t)) { return t; @@ -51,6 +51,16 @@ export function formatTime(t, forNode = false) { } } +export function formatUnixTime(t) { + if (t !== null) { + if (isNaN(t)) { + return t; + } else { + return new Date(t * 1000).toLocaleString() + } + } +} + // const equalsCheck = (a, b) => { // return JSON.stringify(a) === JSON.stringify(b); // } diff --git a/web/frontend/src/status/StatusDash.svelte b/web/frontend/src/status/StatusDash.svelte index 9579c0f..758c563 100644 --- a/web/frontend/src/status/StatusDash.svelte +++ b/web/frontend/src/status/StatusDash.svelte @@ -25,7 +25,7 @@ import { init, } from "../generic/utils.js"; - import { scaleNumbers, formatTime } from "../generic/units.js"; + import { scaleNumbers, formatDurationTime } from "../generic/units.js"; import Refresher from "../generic/helper/Refresher.svelte"; import Roofline from "../generic/plots/Roofline.svelte"; import Pie, { colors } from "../generic/plots/Pie.svelte"; @@ -44,6 +44,7 @@ /* State Init */ let cluster = $state(presetCluster); let pieWidth = $state(0); + let stackedWidth = $state(0); let plotWidths = $state([]); let from = $state(new Date(Date.now() - 5 * 60 * 1000)); let to = $state(new Date(Date.now())); @@ -86,6 +87,24 @@ return $nodesStateCounts?.data?.nodeStates.filter((e) => ['full', 'partial', 'failed'].includes(e.state)) }); + // NodeStates for Stacked charts + const nodesStateTimes = $derived(queryStore({ + client: client, + query: gql` + query ($filter: [NodeFilter!]) { + nodeStatesTimed(filter: $filter) { + state + type + count + time + } + } + `, + variables: { + filter: { cluster: { eq: cluster }, timeStart: Date.now() - (24 * 3600 * 1000)} // Add Selector for Timeframe (4h, 12h, 24h)? + }, + })); + // Note: nodeMetrics are requested on configured $timestep resolution // Result: The latest 5 minutes (datapoints) for each node independent of job const statusQuery = $derived(queryStore({ @@ -315,7 +334,7 @@ function transformJobsStatsToInfo(subclusterData) { if (subclusterData) { - return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0, duration: formatTime(sc.duration)} }) + return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0, duration: formatDurationTime(sc.duration)} }) } else { console.warn("transformJobsStatsToInfo: jobInfo missing!") return [] @@ -374,6 +393,55 @@
+ + + {#if $initq.data && $nodesStateCounts.data}