add healthStatus tab to status details, add graphql endpoints and handlers

This commit is contained in:
Christoph Kluge
2026-02-17 14:38:06 +01:00
parent 9af44779aa
commit fe3ebe0abc
8 changed files with 676 additions and 158 deletions

View File

@@ -287,6 +287,7 @@ type ComplexityRoot struct {
Cluster func(childComplexity int) int
CpusAllocated func(childComplexity int) int
GpusAllocated func(childComplexity int) int
HealthData func(childComplexity int) int
HealthState func(childComplexity int) int
Hostname func(childComplexity int) int
ID func(childComplexity int) int
@@ -347,6 +348,7 @@ type ComplexityRoot struct {
NodeStates func(childComplexity int, filter []*model.NodeFilter) int
NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter, typeArg string) int
Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int
NodesWithMeta func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int
RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int
ScopedJobStats func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int
Tags func(childComplexity int) int
@@ -369,7 +371,7 @@ type ComplexityRoot struct {
Series struct {
Data func(childComplexity int) int
Hostname func(childComplexity int) int
Id func(childComplexity int) int
ID func(childComplexity int) int
Statistics func(childComplexity int) int
}
@@ -476,6 +478,7 @@ type NodeResolver interface {
SchedulerState(ctx context.Context, obj *schema.Node) (schema.SchedulerState, error)
HealthState(ctx context.Context, obj *schema.Node) (string, error)
MetaData(ctx context.Context, obj *schema.Node) (any, error)
HealthData(ctx context.Context, obj *schema.Node) (any, error)
}
type QueryResolver interface {
Clusters(ctx context.Context) ([]*schema.Cluster, error)
@@ -485,6 +488,7 @@ type QueryResolver interface {
AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error)
Node(ctx context.Context, id string) (*schema.Node, error)
Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error)
NodesWithMeta(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error)
NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error)
NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error)
Job(ctx context.Context, id string) (*schema.Job, error)
@@ -1452,6 +1456,12 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
}
return e.complexity.Node.GpusAllocated(childComplexity), true
case "Node.healthData":
if e.complexity.Node.HealthData == nil {
break
}
return e.complexity.Node.HealthData(childComplexity), true
case "Node.healthState":
if e.complexity.Node.HealthState == nil {
break
@@ -1785,6 +1795,17 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
}
return e.complexity.Query.Nodes(childComplexity, args["filter"].([]*model.NodeFilter), args["order"].(*model.OrderByInput)), true
case "Query.nodesWithMeta":
if e.complexity.Query.NodesWithMeta == nil {
break
}
args, err := ec.field_Query_nodesWithMeta_args(ctx, rawArgs)
if err != nil {
return 0, false
}
return e.complexity.Query.NodesWithMeta(childComplexity, args["filter"].([]*model.NodeFilter), args["order"].(*model.OrderByInput)), true
case "Query.rooflineHeatmap":
if e.complexity.Query.RooflineHeatmap == nil {
break
@@ -1882,11 +1903,11 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return e.complexity.Series.Hostname(childComplexity), true
case "Series.id":
if e.complexity.Series.Id == nil {
if e.complexity.Series.ID == nil {
break
}
return e.complexity.Series.Id(childComplexity), true
return e.complexity.Series.ID(childComplexity), true
case "Series.statistics":
if e.complexity.Series.Statistics == nil {
break
@@ -2302,6 +2323,7 @@ type Node {
schedulerState: SchedulerState!
healthState: MonitoringState!
metaData: Any
healthData: Any
}
type NodeStates {
@@ -2611,6 +2633,7 @@ type Query {
## Node Queries New
node(id: ID!): Node
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodesWithMeta(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodeStates(filter: [NodeFilter!]): [NodeStates!]!
nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]!
@@ -3268,6 +3291,22 @@ func (ec *executionContext) field_Query_node_args(ctx context.Context, rawArgs m
return args, nil
}
func (ec *executionContext) field_Query_nodesWithMeta_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) {
var err error
args := map[string]any{}
arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalONodeFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeFilterᚄ)
if err != nil {
return nil, err
}
args["filter"] = arg0
arg1, err := graphql.ProcessArgField(ctx, rawArgs, "order", ec.unmarshalOOrderByInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐOrderByInput)
if err != nil {
return nil, err
}
args["order"] = arg1
return args, nil
}
func (ec *executionContext) field_Query_nodes_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) {
var err error
args := map[string]any{}
@@ -8258,6 +8297,35 @@ func (ec *executionContext) fieldContext_Node_metaData(_ context.Context, field
return fc, nil
}
func (ec *executionContext) _Node_healthData(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
ec.OperationContext,
field,
ec.fieldContext_Node_healthData,
func(ctx context.Context) (any, error) {
return ec.resolvers.Node().HealthData(ctx, obj)
},
nil,
ec.marshalOAny2interface,
true,
false,
)
}
func (ec *executionContext) fieldContext_Node_healthData(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Node",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Any does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _NodeMetrics_host(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
@@ -8428,6 +8496,8 @@ func (ec *executionContext) fieldContext_NodeStateResultList_items(_ context.Con
return ec.fieldContext_Node_healthState(ctx, field)
case "metaData":
return ec.fieldContext_Node_metaData(ctx, field)
case "healthData":
return ec.fieldContext_Node_healthData(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type Node", field.Name)
},
@@ -9053,6 +9123,8 @@ func (ec *executionContext) fieldContext_Query_node(ctx context.Context, field g
return ec.fieldContext_Node_healthState(ctx, field)
case "metaData":
return ec.fieldContext_Node_metaData(ctx, field)
case "healthData":
return ec.fieldContext_Node_healthData(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type Node", field.Name)
},
@@ -9118,6 +9190,53 @@ func (ec *executionContext) fieldContext_Query_nodes(ctx context.Context, field
return fc, nil
}
func (ec *executionContext) _Query_nodesWithMeta(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
ec.OperationContext,
field,
ec.fieldContext_Query_nodesWithMeta,
func(ctx context.Context) (any, error) {
fc := graphql.GetFieldContext(ctx)
return ec.resolvers.Query().NodesWithMeta(ctx, fc.Args["filter"].([]*model.NodeFilter), fc.Args["order"].(*model.OrderByInput))
},
nil,
ec.marshalNNodeStateResultList2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStateResultList,
true,
true,
)
}
func (ec *executionContext) fieldContext_Query_nodesWithMeta(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Query",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
switch field.Name {
case "items":
return ec.fieldContext_NodeStateResultList_items(ctx, field)
case "count":
return ec.fieldContext_NodeStateResultList_count(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type NodeStateResultList", field.Name)
},
}
defer func() {
if r := recover(); r != nil {
err = ec.Recover(ctx, r)
ec.Error(ctx, err)
}
}()
ctx = graphql.WithFieldContext(ctx, fc)
if fc.Args, err = ec.field_Query_nodesWithMeta_args(ctx, field.ArgumentMap(ec.Variables)); err != nil {
ec.Error(ctx, err)
return fc, err
}
return fc, nil
}
func (ec *executionContext) _Query_nodeStates(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
@@ -15744,6 +15863,39 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj
continue
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
case "healthData":
field := field
innerFunc := func(ctx context.Context, _ *graphql.FieldSet) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Node_healthData(ctx, field, obj)
return res
}
if field.Deferrable != nil {
dfs, ok := deferred[field.Deferrable.Label]
di := 0
if ok {
dfs.AddField(field)
di = len(dfs.Values) - 1
} else {
dfs = graphql.NewFieldSet([]graphql.CollectedField{field})
deferred[field.Deferrable.Label] = dfs
}
dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler {
return innerFunc(ctx, dfs)
})
// don't run the out.Concurrently() call below
out.Values[i] = graphql.Null
continue
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
default:
panic("unknown field " + strconv.Quote(field.Name))
@@ -16171,6 +16323,28 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) })
case "nodesWithMeta":
field := field
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Query_nodesWithMeta(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&fs.Invalids, 1)
}
return res
}
rrm := func(ctx context.Context) graphql.Marshaler {
return ec.OperationContext.RootResolverMiddleware(ctx,
func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) })
case "nodeStates":
field := field

View File

@@ -318,18 +318,39 @@ func (r *nodeResolver) SchedulerState(ctx context.Context, obj *schema.Node) (sc
if obj.NodeState != "" {
return obj.NodeState, nil
} else {
return "", fmt.Errorf("no SchedulerState (NodeState) on Object")
return "", fmt.Errorf("resolver: no SchedulerState (NodeState) on node object")
}
}
// HealthState is the resolver for the healthState field.
func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (string, error) {
panic(fmt.Errorf("not implemented: HealthState - healthState"))
if obj.HealthState != "" {
return string(obj.HealthState), nil
} else {
return "", fmt.Errorf("resolver: no HealthState (NodeState) on node object")
}
}
// MetaData is the resolver for the metaData field.
func (r *nodeResolver) MetaData(ctx context.Context, obj *schema.Node) (any, error) {
panic(fmt.Errorf("not implemented: MetaData - metaData"))
if obj.MetaData != nil {
return obj.MetaData, nil
} else {
cclog.Debug("resolver: no MetaData (NodeState) on node object")
emptyMeta := make(map[string]string, 0)
return emptyMeta, nil
}
}
// HealthData is the resolver for the healthData field.
func (r *nodeResolver) HealthData(ctx context.Context, obj *schema.Node) (any, error) {
if obj.HealthData != nil {
return obj.HealthData, nil
} else {
cclog.Debug("resolver: no HealthData (NodeState) on node object")
emptyHealth := make(map[string][]string, 0)
return emptyHealth, nil
}
}
// Clusters is the resolver for the clusters field.
@@ -398,6 +419,15 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o
return &model.NodeStateResultList{Items: nodes, Count: &count}, err
}
// NodesWithMeta is the resolver for the nodesWithMeta field.
func (r *queryResolver) NodesWithMeta(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) {
// Why Extra Handler? -> graphql.CollectAllFields(ctx) only returns toplevel fields (i.e.: items, count), and not subfields like item.metaData
repo := repository.GetNodeRepository()
nodes, err := repo.QueryNodesWithMeta(ctx, filter, nil, order) // Ignore Paging, Order Unused
count := len(nodes)
return &model.NodeStateResultList{Items: nodes, Count: &count}, err
}
// NodeStates is the resolver for the nodeStates field.
func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) {
repo := repository.GetNodeRepository()

View File

@@ -154,16 +154,14 @@ func (r *NodeRepository) GetNodeByID(id int64, withMeta bool) (*schema.Node, err
return nil, err
}
// NEEDS METADATA BY ID
// if withMeta {
// var err error
// var meta map[string]string
// if meta, err = r.FetchMetadata(hostname, cluster); err != nil {
// cclog.Warnf("Error while fetching metadata for node '%s'", hostname)
// return nil, err
// }
// node.MetaData = meta
// }
if withMeta {
meta, metaErr := r.FetchMetadata(node.Hostname, node.Cluster)
if metaErr != nil {
cclog.Warnf("Error while fetching metadata for node ID '%d': %v", id, metaErr)
return nil, metaErr
}
node.MetaData = meta
}
return node, nil
}
@@ -382,6 +380,81 @@ func (r *NodeRepository) QueryNodes(
return nodes, nil
}
// QueryNodesWithMeta returns a list of nodes based on a node filter. It always operates
// on the last state (largest timestamp). It includes both (!) optional JSON column data
func (r *NodeRepository) QueryNodesWithMeta(
ctx context.Context,
filters []*model.NodeFilter,
page *model.PageRequest,
order *model.OrderByInput, // Currently unused!
) ([]*schema.Node, error) {
query, qerr := AccessCheck(ctx,
sq.Select("node.hostname", "node.cluster", "node.subcluster",
"node_state.node_state", "node_state.health_state",
"node.meta_data", "node_state.health_metrics").
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()))
if qerr != nil {
return nil, qerr
}
query = applyNodeFilters(query, filters)
query = query.OrderBy("node.hostname ASC")
if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage)
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
}
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
queryString, queryVars, _ := query.ToSql()
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
return nil, err
}
nodes := make([]*schema.Node, 0)
for rows.Next() {
node := schema.Node{}
RawMetaData := make([]byte, 0)
RawMetricHealth := make([]byte, 0)
if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
&node.NodeState, &node.HealthState, &RawMetaData, &RawMetricHealth); err != nil {
rows.Close()
cclog.Warn("Error while scanning rows (QueryNodes)")
return nil, err
}
if len(RawMetaData) == 0 {
node.MetaData = nil
} else {
metaData := make(map[string]string)
if err := json.Unmarshal(RawMetaData, &metaData); err != nil {
cclog.Warn("Error while unmarshaling raw metadata json")
return nil, err
}
node.MetaData = metaData
}
if len(RawMetricHealth) == 0 {
node.HealthData = nil
} else {
healthData := make(map[string][]string)
if err := json.Unmarshal(RawMetricHealth, &healthData); err != nil {
cclog.Warn("Error while unmarshaling raw healthdata json")
return nil, err
}
node.HealthData = healthData
}
nodes = append(nodes, &node)
}
return nodes, nil
}
// CountNodes returns the total matched nodes based on a node filter. It always operates
// on the last state (largest timestamp) per node.
func (r *NodeRepository) CountNodes(