add healthStatus tab to status details, add graphql endpoints and handlers

This commit is contained in:
Christoph Kluge
2026-02-17 14:38:06 +01:00
parent 9af44779aa
commit fe3ebe0abc
8 changed files with 676 additions and 158 deletions

View File

@@ -19,6 +19,7 @@ type Node {
schedulerState: SchedulerState! schedulerState: SchedulerState!
healthState: MonitoringState! healthState: MonitoringState!
metaData: Any metaData: Any
healthData: Any
} }
type NodeStates { type NodeStates {
@@ -328,6 +329,7 @@ type Query {
## Node Queries New ## Node Queries New
node(id: ID!): Node node(id: ID!): Node
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodesWithMeta(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodeStates(filter: [NodeFilter!]): [NodeStates!]! nodeStates(filter: [NodeFilter!]): [NodeStates!]!
nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]! nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]!

2
go.sum
View File

@@ -4,8 +4,6 @@ github.com/99designs/gqlgen v0.17.85 h1:EkGx3U2FDcxQm8YDLQSpXIAVmpDyZ3IcBMOJi2nH
github.com/99designs/gqlgen v0.17.85/go.mod h1:yvs8s0bkQlRfqg03YXr3eR4OQUowVhODT/tHzCXnbOU= github.com/99designs/gqlgen v0.17.85/go.mod h1:yvs8s0bkQlRfqg03YXr3eR4OQUowVhODT/tHzCXnbOU=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/ClusterCockpit/cc-lib/v2 v2.4.0 h1:OnZlvqSatg7yCQ2NtSR7AddpUVSiuSMZ8scF1a7nfOk=
github.com/ClusterCockpit/cc-lib/v2 v2.4.0/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw=
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=

View File

@@ -287,6 +287,7 @@ type ComplexityRoot struct {
Cluster func(childComplexity int) int Cluster func(childComplexity int) int
CpusAllocated func(childComplexity int) int CpusAllocated func(childComplexity int) int
GpusAllocated func(childComplexity int) int GpusAllocated func(childComplexity int) int
HealthData func(childComplexity int) int
HealthState func(childComplexity int) int HealthState func(childComplexity int) int
Hostname func(childComplexity int) int Hostname func(childComplexity int) int
ID func(childComplexity int) int ID func(childComplexity int) int
@@ -347,6 +348,7 @@ type ComplexityRoot struct {
NodeStates func(childComplexity int, filter []*model.NodeFilter) int NodeStates func(childComplexity int, filter []*model.NodeFilter) int
NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter, typeArg string) int NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter, typeArg string) int
Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int
NodesWithMeta func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int
RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int
ScopedJobStats func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int ScopedJobStats func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int
Tags func(childComplexity int) int Tags func(childComplexity int) int
@@ -369,7 +371,7 @@ type ComplexityRoot struct {
Series struct { Series struct {
Data func(childComplexity int) int Data func(childComplexity int) int
Hostname func(childComplexity int) int Hostname func(childComplexity int) int
Id func(childComplexity int) int ID func(childComplexity int) int
Statistics func(childComplexity int) int Statistics func(childComplexity int) int
} }
@@ -476,6 +478,7 @@ type NodeResolver interface {
SchedulerState(ctx context.Context, obj *schema.Node) (schema.SchedulerState, error) SchedulerState(ctx context.Context, obj *schema.Node) (schema.SchedulerState, error)
HealthState(ctx context.Context, obj *schema.Node) (string, error) HealthState(ctx context.Context, obj *schema.Node) (string, error)
MetaData(ctx context.Context, obj *schema.Node) (any, error) MetaData(ctx context.Context, obj *schema.Node) (any, error)
HealthData(ctx context.Context, obj *schema.Node) (any, error)
} }
type QueryResolver interface { type QueryResolver interface {
Clusters(ctx context.Context) ([]*schema.Cluster, error) Clusters(ctx context.Context) ([]*schema.Cluster, error)
@@ -485,6 +488,7 @@ type QueryResolver interface {
AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error)
Node(ctx context.Context, id string) (*schema.Node, error) Node(ctx context.Context, id string) (*schema.Node, error)
Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error)
NodesWithMeta(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error)
NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error)
NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error) NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error)
Job(ctx context.Context, id string) (*schema.Job, error) Job(ctx context.Context, id string) (*schema.Job, error)
@@ -1452,6 +1456,12 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
} }
return e.complexity.Node.GpusAllocated(childComplexity), true return e.complexity.Node.GpusAllocated(childComplexity), true
case "Node.healthData":
if e.complexity.Node.HealthData == nil {
break
}
return e.complexity.Node.HealthData(childComplexity), true
case "Node.healthState": case "Node.healthState":
if e.complexity.Node.HealthState == nil { if e.complexity.Node.HealthState == nil {
break break
@@ -1785,6 +1795,17 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
} }
return e.complexity.Query.Nodes(childComplexity, args["filter"].([]*model.NodeFilter), args["order"].(*model.OrderByInput)), true return e.complexity.Query.Nodes(childComplexity, args["filter"].([]*model.NodeFilter), args["order"].(*model.OrderByInput)), true
case "Query.nodesWithMeta":
if e.complexity.Query.NodesWithMeta == nil {
break
}
args, err := ec.field_Query_nodesWithMeta_args(ctx, rawArgs)
if err != nil {
return 0, false
}
return e.complexity.Query.NodesWithMeta(childComplexity, args["filter"].([]*model.NodeFilter), args["order"].(*model.OrderByInput)), true
case "Query.rooflineHeatmap": case "Query.rooflineHeatmap":
if e.complexity.Query.RooflineHeatmap == nil { if e.complexity.Query.RooflineHeatmap == nil {
break break
@@ -1882,11 +1903,11 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return e.complexity.Series.Hostname(childComplexity), true return e.complexity.Series.Hostname(childComplexity), true
case "Series.id": case "Series.id":
if e.complexity.Series.Id == nil { if e.complexity.Series.ID == nil {
break break
} }
return e.complexity.Series.Id(childComplexity), true return e.complexity.Series.ID(childComplexity), true
case "Series.statistics": case "Series.statistics":
if e.complexity.Series.Statistics == nil { if e.complexity.Series.Statistics == nil {
break break
@@ -2302,6 +2323,7 @@ type Node {
schedulerState: SchedulerState! schedulerState: SchedulerState!
healthState: MonitoringState! healthState: MonitoringState!
metaData: Any metaData: Any
healthData: Any
} }
type NodeStates { type NodeStates {
@@ -2611,6 +2633,7 @@ type Query {
## Node Queries New ## Node Queries New
node(id: ID!): Node node(id: ID!): Node
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList! nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodesWithMeta(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodeStates(filter: [NodeFilter!]): [NodeStates!]! nodeStates(filter: [NodeFilter!]): [NodeStates!]!
nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]! nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]!
@@ -3268,6 +3291,22 @@ func (ec *executionContext) field_Query_node_args(ctx context.Context, rawArgs m
return args, nil return args, nil
} }
func (ec *executionContext) field_Query_nodesWithMeta_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) {
var err error
args := map[string]any{}
arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalONodeFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeFilterᚄ)
if err != nil {
return nil, err
}
args["filter"] = arg0
arg1, err := graphql.ProcessArgField(ctx, rawArgs, "order", ec.unmarshalOOrderByInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐOrderByInput)
if err != nil {
return nil, err
}
args["order"] = arg1
return args, nil
}
func (ec *executionContext) field_Query_nodes_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { func (ec *executionContext) field_Query_nodes_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) {
var err error var err error
args := map[string]any{} args := map[string]any{}
@@ -8258,6 +8297,35 @@ func (ec *executionContext) fieldContext_Node_metaData(_ context.Context, field
return fc, nil return fc, nil
} }
func (ec *executionContext) _Node_healthData(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
ec.OperationContext,
field,
ec.fieldContext_Node_healthData,
func(ctx context.Context) (any, error) {
return ec.resolvers.Node().HealthData(ctx, obj)
},
nil,
ec.marshalOAny2interface,
true,
false,
)
}
func (ec *executionContext) fieldContext_Node_healthData(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Node",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Any does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _NodeMetrics_host(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) { func (ec *executionContext) _NodeMetrics_host(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) {
return graphql.ResolveField( return graphql.ResolveField(
ctx, ctx,
@@ -8428,6 +8496,8 @@ func (ec *executionContext) fieldContext_NodeStateResultList_items(_ context.Con
return ec.fieldContext_Node_healthState(ctx, field) return ec.fieldContext_Node_healthState(ctx, field)
case "metaData": case "metaData":
return ec.fieldContext_Node_metaData(ctx, field) return ec.fieldContext_Node_metaData(ctx, field)
case "healthData":
return ec.fieldContext_Node_healthData(ctx, field)
} }
return nil, fmt.Errorf("no field named %q was found under type Node", field.Name) return nil, fmt.Errorf("no field named %q was found under type Node", field.Name)
}, },
@@ -9053,6 +9123,8 @@ func (ec *executionContext) fieldContext_Query_node(ctx context.Context, field g
return ec.fieldContext_Node_healthState(ctx, field) return ec.fieldContext_Node_healthState(ctx, field)
case "metaData": case "metaData":
return ec.fieldContext_Node_metaData(ctx, field) return ec.fieldContext_Node_metaData(ctx, field)
case "healthData":
return ec.fieldContext_Node_healthData(ctx, field)
} }
return nil, fmt.Errorf("no field named %q was found under type Node", field.Name) return nil, fmt.Errorf("no field named %q was found under type Node", field.Name)
}, },
@@ -9118,6 +9190,53 @@ func (ec *executionContext) fieldContext_Query_nodes(ctx context.Context, field
return fc, nil return fc, nil
} }
func (ec *executionContext) _Query_nodesWithMeta(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
ec.OperationContext,
field,
ec.fieldContext_Query_nodesWithMeta,
func(ctx context.Context) (any, error) {
fc := graphql.GetFieldContext(ctx)
return ec.resolvers.Query().NodesWithMeta(ctx, fc.Args["filter"].([]*model.NodeFilter), fc.Args["order"].(*model.OrderByInput))
},
nil,
ec.marshalNNodeStateResultList2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStateResultList,
true,
true,
)
}
func (ec *executionContext) fieldContext_Query_nodesWithMeta(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Query",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
switch field.Name {
case "items":
return ec.fieldContext_NodeStateResultList_items(ctx, field)
case "count":
return ec.fieldContext_NodeStateResultList_count(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type NodeStateResultList", field.Name)
},
}
defer func() {
if r := recover(); r != nil {
err = ec.Recover(ctx, r)
ec.Error(ctx, err)
}
}()
ctx = graphql.WithFieldContext(ctx, fc)
if fc.Args, err = ec.field_Query_nodesWithMeta_args(ctx, field.ArgumentMap(ec.Variables)); err != nil {
ec.Error(ctx, err)
return fc, err
}
return fc, nil
}
func (ec *executionContext) _Query_nodeStates(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { func (ec *executionContext) _Query_nodeStates(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) {
return graphql.ResolveField( return graphql.ResolveField(
ctx, ctx,
@@ -15744,6 +15863,39 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj
continue continue
} }
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
case "healthData":
field := field
innerFunc := func(ctx context.Context, _ *graphql.FieldSet) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Node_healthData(ctx, field, obj)
return res
}
if field.Deferrable != nil {
dfs, ok := deferred[field.Deferrable.Label]
di := 0
if ok {
dfs.AddField(field)
di = len(dfs.Values) - 1
} else {
dfs = graphql.NewFieldSet([]graphql.CollectedField{field})
deferred[field.Deferrable.Label] = dfs
}
dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler {
return innerFunc(ctx, dfs)
})
// don't run the out.Concurrently() call below
out.Values[i] = graphql.Null
continue
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
default: default:
panic("unknown field " + strconv.Quote(field.Name)) panic("unknown field " + strconv.Quote(field.Name))
@@ -16171,6 +16323,28 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
} }
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) })
case "nodesWithMeta":
field := field
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Query_nodesWithMeta(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&fs.Invalids, 1)
}
return res
}
rrm := func(ctx context.Context) graphql.Marshaler {
return ec.OperationContext.RootResolverMiddleware(ctx,
func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) }) out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) })
case "nodeStates": case "nodeStates":
field := field field := field

View File

@@ -318,18 +318,39 @@ func (r *nodeResolver) SchedulerState(ctx context.Context, obj *schema.Node) (sc
if obj.NodeState != "" { if obj.NodeState != "" {
return obj.NodeState, nil return obj.NodeState, nil
} else { } else {
return "", fmt.Errorf("no SchedulerState (NodeState) on Object") return "", fmt.Errorf("resolver: no SchedulerState (NodeState) on node object")
} }
} }
// HealthState is the resolver for the healthState field. // HealthState is the resolver for the healthState field.
func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (string, error) { func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (string, error) {
panic(fmt.Errorf("not implemented: HealthState - healthState")) if obj.HealthState != "" {
return string(obj.HealthState), nil
} else {
return "", fmt.Errorf("resolver: no HealthState (NodeState) on node object")
}
} }
// MetaData is the resolver for the metaData field. // MetaData is the resolver for the metaData field.
func (r *nodeResolver) MetaData(ctx context.Context, obj *schema.Node) (any, error) { func (r *nodeResolver) MetaData(ctx context.Context, obj *schema.Node) (any, error) {
panic(fmt.Errorf("not implemented: MetaData - metaData")) if obj.MetaData != nil {
return obj.MetaData, nil
} else {
cclog.Debug("resolver: no MetaData (NodeState) on node object")
emptyMeta := make(map[string]string, 0)
return emptyMeta, nil
}
}
// HealthData is the resolver for the healthData field.
func (r *nodeResolver) HealthData(ctx context.Context, obj *schema.Node) (any, error) {
if obj.HealthData != nil {
return obj.HealthData, nil
} else {
cclog.Debug("resolver: no HealthData (NodeState) on node object")
emptyHealth := make(map[string][]string, 0)
return emptyHealth, nil
}
} }
// Clusters is the resolver for the clusters field. // Clusters is the resolver for the clusters field.
@@ -398,6 +419,15 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o
return &model.NodeStateResultList{Items: nodes, Count: &count}, err return &model.NodeStateResultList{Items: nodes, Count: &count}, err
} }
// NodesWithMeta is the resolver for the nodesWithMeta field.
func (r *queryResolver) NodesWithMeta(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) {
// Why Extra Handler? -> graphql.CollectAllFields(ctx) only returns toplevel fields (i.e.: items, count), and not subfields like item.metaData
repo := repository.GetNodeRepository()
nodes, err := repo.QueryNodesWithMeta(ctx, filter, nil, order) // Ignore Paging, Order Unused
count := len(nodes)
return &model.NodeStateResultList{Items: nodes, Count: &count}, err
}
// NodeStates is the resolver for the nodeStates field. // NodeStates is the resolver for the nodeStates field.
func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) { func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) {
repo := repository.GetNodeRepository() repo := repository.GetNodeRepository()

View File

@@ -154,16 +154,14 @@ func (r *NodeRepository) GetNodeByID(id int64, withMeta bool) (*schema.Node, err
return nil, err return nil, err
} }
// NEEDS METADATA BY ID if withMeta {
// if withMeta { meta, metaErr := r.FetchMetadata(node.Hostname, node.Cluster)
// var err error if metaErr != nil {
// var meta map[string]string cclog.Warnf("Error while fetching metadata for node ID '%d': %v", id, metaErr)
// if meta, err = r.FetchMetadata(hostname, cluster); err != nil { return nil, metaErr
// cclog.Warnf("Error while fetching metadata for node '%s'", hostname) }
// return nil, err node.MetaData = meta
// } }
// node.MetaData = meta
// }
return node, nil return node, nil
} }
@@ -382,6 +380,81 @@ func (r *NodeRepository) QueryNodes(
return nodes, nil return nodes, nil
} }
// QueryNodesWithMeta returns a list of nodes based on a node filter. It always operates
// on the last state (largest timestamp). It includes both (!) optional JSON column data
func (r *NodeRepository) QueryNodesWithMeta(
ctx context.Context,
filters []*model.NodeFilter,
page *model.PageRequest,
order *model.OrderByInput, // Currently unused!
) ([]*schema.Node, error) {
query, qerr := AccessCheck(ctx,
sq.Select("node.hostname", "node.cluster", "node.subcluster",
"node_state.node_state", "node_state.health_state",
"node.meta_data", "node_state.health_metrics").
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()))
if qerr != nil {
return nil, qerr
}
query = applyNodeFilters(query, filters)
query = query.OrderBy("node.hostname ASC")
if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage)
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
}
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
queryString, queryVars, _ := query.ToSql()
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
return nil, err
}
nodes := make([]*schema.Node, 0)
for rows.Next() {
node := schema.Node{}
RawMetaData := make([]byte, 0)
RawMetricHealth := make([]byte, 0)
if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
&node.NodeState, &node.HealthState, &RawMetaData, &RawMetricHealth); err != nil {
rows.Close()
cclog.Warn("Error while scanning rows (QueryNodes)")
return nil, err
}
if len(RawMetaData) == 0 {
node.MetaData = nil
} else {
metaData := make(map[string]string)
if err := json.Unmarshal(RawMetaData, &metaData); err != nil {
cclog.Warn("Error while unmarshaling raw metadata json")
return nil, err
}
node.MetaData = metaData
}
if len(RawMetricHealth) == 0 {
node.HealthData = nil
} else {
healthData := make(map[string][]string)
if err := json.Unmarshal(RawMetricHealth, &healthData); err != nil {
cclog.Warn("Error while unmarshaling raw healthdata json")
return nil, err
}
node.HealthData = healthData
}
nodes = append(nodes, &node)
}
return nodes, nil
}
// CountNodes returns the total matched nodes based on a node filter. It always operates // CountNodes returns the total matched nodes based on a node filter. It always operates
// on the last state (largest timestamp) per node. // on the last state (largest timestamp) per node.
func (r *NodeRepository) CountNodes( func (r *NodeRepository) CountNodes(

View File

@@ -23,6 +23,7 @@
} from "@sveltestrap/sveltestrap"; } from "@sveltestrap/sveltestrap";
import StatusDash from "./dashdetails/StatusDash.svelte"; import StatusDash from "./dashdetails/StatusDash.svelte";
import HealthDash from "./dashdetails/HealthDash.svelte";
import UsageDash from "./dashdetails/UsageDash.svelte"; import UsageDash from "./dashdetails/UsageDash.svelte";
import StatisticsDash from "./dashdetails/StatisticsDash.svelte"; import StatisticsDash from "./dashdetails/StatisticsDash.svelte";
@@ -65,7 +66,13 @@
<TabContent> <TabContent>
<TabPane tabId="status-dash" tab="Status" active> <TabPane tabId="status-dash" tab="Status" active>
<CardBody> <CardBody>
<StatusDash clusters={$initq.data.clusters} {presetCluster} {useCbColors} useAltColors></StatusDash> <StatusDash clusters={$initq.data.clusters} {presetCluster}></StatusDash>
</CardBody>
</TabPane>
<TabPane tabId="health-dash" tab="Metric Status">
<CardBody>
<HealthDash {presetCluster}></HealthDash>
</CardBody> </CardBody>
</TabPane> </TabPane>

View File

@@ -0,0 +1,372 @@
<!--
@component Main cluster status view component; renders current system-usage information
Properties:
- `presetCluster String`: The cluster to show status information for
-->
<script>
import {
Row,
Col,
Card,
Input,
InputGroup,
InputGroupText,
Table,
Icon,
Spinner
} from "@sveltestrap/sveltestrap";
import {
queryStore,
gql,
getContextClient,
} from "@urql/svelte";
import Refresher from "../../generic/helper/Refresher.svelte";
import Pie, { colors } from "../../generic/plots/Pie.svelte";
/* Svelte 5 Props */
let {
presetCluster,
} = $props();
/* Const Init */
const client = getContextClient();
/* State Init */
let pieWidth = $state(0);
let tableHostFilter = $state("");
let tableStateFilter = $state("");
let tableHealthFilter = $state("");
let healthTableSorting = $state(
{
schedulerState: { dir: "down", active: true },
healthState: { dir: "down", active: false },
hostname: { dir: "down", active: false },
}
);
/* Derived */
let cluster = $derived(presetCluster);
const statusQuery = $derived(queryStore({
client: client,
query: gql`
query (
$nodeFilter: [NodeFilter!]!
$sorting: OrderByInput!
) {
# $sorting unused in backend: Use placeholder
nodes: nodesWithMeta(filter: $nodeFilter, order: $sorting) {
count
items {
hostname
cluster
subCluster
schedulerState
healthState
metaData
healthData
}
}
# Get Current States for Pie Charts
nodeStates(filter: $nodeFilter) {
state
count
},
}
`,
variables: {
nodeFilter: { cluster: { eq: cluster }},
sorting: { field: "startTime", type: "col", order: "DESC" },
},
requestPolicy: "network-only"
}));
let healthTableData = $derived.by(() => {
if ($statusQuery?.data) {
return [...$statusQuery.data.nodes.items].sort((n1, n2) => {
return n1['schedulerState'].localeCompare(n2['schedulerState'])
});
} else {
return [];
}
});
let filteredTableData = $derived.by(() => {
let pendingTableData = [...healthTableData];
if (tableHostFilter != "") {
pendingTableData = pendingTableData.filter((e) => e.hostname.includes(tableHostFilter))
}
if (tableStateFilter != "") {
pendingTableData = pendingTableData.filter((e) => e.schedulerState.includes(tableStateFilter))
}
if (tableHealthFilter != "") {
pendingTableData = pendingTableData.filter((e) => e.healthState.includes(tableHealthFilter))
}
return pendingTableData
});
const refinedStateData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
const refinedHealthData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['full', 'partial', 'failed'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
/* Functions */
function sortBy(field) {
const s = healthTableSorting[field];
if (s.active) {
s.dir = s.dir == "up" ? "down" : "up";
} else {
for (let key in healthTableSorting)
healthTableSorting[key].active = false;
s.active = true;
}
const pendingHealthData = healthTableData.sort((n1, n2) => {
if (n1[field] == null || n2[field] == null) return -1;
else if (s.dir == "down") return n1[field].localeCompare(n2[field])
else return n2[field].localeCompare(n1[field])
});
healthTableSorting = {...healthTableSorting};
healthTableData = [...pendingHealthData];
}
</script>
<!-- Refresher and space for other options -->
<Row class="justify-content-between">
<Col xs="12" md="5" lg="4" xl="3">
<Refresher
initially={120}
onRefresh={(interval) => {
sorting = { field: "startTime", type: "col", order: "DESC" }
}}
/>
</Col>
</Row>
<hr/>
<!-- Node Health Pis, later Charts -->
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">Status Query (States): {$statusQuery.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery?.data?.nodeStates}
<Row cols={{ lg: 4, md: 2 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={pieWidth}>
{#key refinedStateData}
<h4 class="text-center">
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States
</h4>
<Pie
canvasId="hpcpie-slurm"
size={pieWidth * 0.55}
sliceLabel="Nodes"
quantities={refinedStateData.map(
(sd) => sd.count,
)}
entities={refinedStateData.map(
(sd) => sd.state,
)}
fixColors={refinedStateData.map(
(sd) => colors['nodeStates'][sd.state],
)}
/>
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key refinedStateData}
<Table>
<tr class="mb-2">
<th></th>
<th>Current State</th>
<th>Nodes</th>
</tr>
{#each refinedStateData as sd, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors['nodeStates'][sd.state]};"/></td>
<td>{sd.state}</td>
<td>{sd.count}</td>
</tr>
{/each}
</Table>
{/key}
</Col>
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={pieWidth}>
{#key refinedHealthData}
<h4 class="text-center">
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health
</h4>
<Pie
canvasId="hpcpie-health"
size={pieWidth * 0.55}
sliceLabel="Nodes"
quantities={refinedHealthData.map(
(hd) => hd.count,
)}
entities={refinedHealthData.map(
(hd) => hd.state,
)}
fixColors={refinedHealthData.map(
(hd) => colors['healthStates'][hd.state],
)}
/>
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key refinedHealthData}
<Table>
<tr class="mb-2">
<th></th>
<th>Current Health</th>
<th>Nodes</th>
</tr>
{#each refinedHealthData as hd, i}
<tr>
<td><Icon name="circle-fill"style="color: {colors['healthStates'][hd.state]};" /></td>
<td>{hd.state}</td>
<td>{hd.count}</td>
</tr>
{/each}
</Table>
{/key}
</Col>
</Row>
{/if}
<hr/>
<!-- Tabular Info About Node States and Missing Metrics -->
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">Status Query (Details): {$statusQuery.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery.data}
<Row>
<Col>
<Card>
<Table hover>
<thead>
<!-- Header Row 1: Titles and Sorting -->
<tr>
<th style="width:10%; max-width:12.5%;" onclick={() => sortBy('hostname')}>
Host
<Icon
name="caret-{healthTableSorting['hostname'].dir}{healthTableSorting['hostname']
.active
? '-fill'
: ''}"
/>
</th>
<th style="width:10%; max-width:12.5%;" onclick={() => sortBy('schedulerState')}>
Scheduler State
<Icon
name="caret-{healthTableSorting['schedulerState'].dir}{healthTableSorting['schedulerState']
.active
? '-fill'
: ''}"
/>
</th>
<th style="width:10%; max-width:12.5%;" onclick={() => sortBy('healthState')}>
Health State
<Icon
name="caret-{healthTableSorting['healthState'].dir}{healthTableSorting['healthState']
.active
? '-fill'
: ''}"
/>
</th>
<th>Metric Availability</th>
<th>Meta Information</th>
</tr>
<!-- Header Row 2: Filters -->
<tr>
<th>
<InputGroup size="sm">
<Input type="text" bind:value={tableHostFilter}/>
<InputGroupText>
<Icon name="search"></Icon>
</InputGroupText>
</InputGroup>
</th>
<th>
<InputGroup size="sm">
<Input type="text" bind:value={tableStateFilter}/>
<InputGroupText>
<Icon name="search"></Icon>
</InputGroupText>
</InputGroup>
</th>
<th>
<InputGroup size="sm">
<Input type="text" bind:value={tableHealthFilter}/>
<InputGroupText>
<Icon name="search"></Icon>
</InputGroupText>
</InputGroup>
</th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
{#each filteredTableData as host (host.hostname)}
<tr>
<th><b><a href="/monitoring/node/{cluster}/{host.hostname}" target="_blank">{host.hostname}</a></b></th>
<td>{host.schedulerState}</td>
<td>{host.healthState}</td>
<td>
{#each Object.keys(host.healthData) as hkey}
<p>
<b>{hkey}</b>: {host.healthData[hkey]}
</p>
{/each}
</td>
<td>
{#each Object.keys(host.metaData) as mkey}
<p>
<b>{mkey}</b>: {host.metaData[mkey]}
</p>
{/each}
</td>
</tr>
{/each}
</tbody>
</Table>
</Card>
</Col>
</Row>
{:else}
<Card class="mx-4" body color="warning">Cannot render metric health info: No data!</Card>
{/if}

View File

@@ -15,7 +15,6 @@
CardBody, CardBody,
Table, Table,
Progress, Progress,
Icon,
Spinner Spinner
} from "@sveltestrap/sveltestrap"; } from "@sveltestrap/sveltestrap";
import { import {
@@ -27,22 +26,18 @@
import Refresher from "../../generic/helper/Refresher.svelte"; import Refresher from "../../generic/helper/Refresher.svelte";
import TimeSelection from "../../generic/select/TimeSelection.svelte"; import TimeSelection from "../../generic/select/TimeSelection.svelte";
import Roofline from "../../generic/plots/Roofline.svelte"; import Roofline from "../../generic/plots/Roofline.svelte";
import Pie, { colors } from "../../generic/plots/Pie.svelte";
import Stacked from "../../generic/plots/Stacked.svelte"; import Stacked from "../../generic/plots/Stacked.svelte";
/* Svelte 5 Props */ /* Svelte 5 Props */
let { let {
clusters, clusters,
presetCluster, presetCluster,
useCbColors = false,
useAltColors = false,
} = $props(); } = $props();
/* Const Init */ /* Const Init */
const client = getContextClient(); const client = getContextClient();
/* State Init */ /* State Init */
let pieWidth = $state(0);
let from = $state(new Date(Date.now() - 5 * 60 * 1000)); let from = $state(new Date(Date.now() - 5 * 60 * 1000));
let to = $state(new Date(Date.now())); let to = $state(new Date(Date.now()));
let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400); let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400);
@@ -163,11 +158,6 @@
schedulerState schedulerState
} }
} }
# Get Current States fir Pie Charts
nodeStates(filter: $nodeFilter) {
state
count
}
# totalNodes includes multiples if shared jobs # totalNodes includes multiples if shared jobs
jobsStatistics( jobsStatistics(
filter: $jobFilter filter: $jobFilter
@@ -196,18 +186,6 @@
requestPolicy: "network-only" requestPolicy: "network-only"
})); }));
const refinedStateData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
const refinedHealthData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['full', 'partial', 'failed'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
/* Effects */ /* Effects */
$effect(() => { $effect(() => {
if ($statusQuery.data) { if ($statusQuery.data) {
@@ -367,19 +345,6 @@
return result return result
} }
function legendColors(targetIdx) {
// Reuses first color if targetIdx overflows
let c;
if (useCbColors) {
c = [...colors['colorblind']];
} else if (useAltColors) {
c = [...colors['alternative']];
} else {
c = [...colors['default']];
}
return c[(c.length + targetIdx) % c.length];
}
</script> </script>
<!-- Refresher and space for other options --> <!-- Refresher and space for other options -->
@@ -408,7 +373,7 @@
<hr/> <hr/>
<!-- Node Stack Charts Dev--> <!-- Node Stack Charts -->
{#if $statesTimed.fetching} {#if $statesTimed.fetching}
<Row cols={1} class="text-center mt-3"> <Row cols={1} class="text-center mt-3">
<Col> <Col>
@@ -460,109 +425,6 @@
</Row> </Row>
{/if} {/if}
<hr/>
<!-- Node Health Pis, later Charts -->
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">Status Query (States): {$statesTimed.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery?.data?.nodeStates}
<Row cols={{ lg: 4, md: 2 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={pieWidth}>
{#key refinedStateData}
<h4 class="text-center">
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States
</h4>
<Pie
canvasId="hpcpie-slurm"
size={pieWidth * 0.55}
sliceLabel="Nodes"
quantities={refinedStateData.map(
(sd) => sd.count,
)}
entities={refinedStateData.map(
(sd) => sd.state,
)}
fixColors={refinedStateData.map(
(sd) => colors['nodeStates'][sd.state],
)}
/>
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key refinedStateData}
<Table>
<tr class="mb-2">
<th></th>
<th>Current State</th>
<th>Nodes</th>
</tr>
{#each refinedStateData as sd, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors['nodeStates'][sd.state]};"/></td>
<td>{sd.state}</td>
<td>{sd.count}</td>
</tr>
{/each}
</Table>
{/key}
</Col>
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={pieWidth}>
{#key refinedHealthData}
<h4 class="text-center">
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health
</h4>
<Pie
canvasId="hpcpie-health"
size={pieWidth * 0.55}
sliceLabel="Nodes"
quantities={refinedHealthData.map(
(hd) => hd.count,
)}
entities={refinedHealthData.map(
(hd) => hd.state,
)}
fixColors={refinedHealthData.map(
(hd) => colors['healthStates'][hd.state],
)}
/>
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key refinedHealthData}
<Table>
<tr class="mb-2">
<th></th>
<th>Current Health</th>
<th>Nodes</th>
</tr>
{#each refinedHealthData as hd, i}
<tr>
<td><Icon name="circle-fill"style="color: {colors['healthStates'][hd.state]};" /></td>
<td>{hd.state}</td>
<td>{hd.count}</td>
</tr>
{/each}
</Table>
{/key}
</Col>
</Row>
{/if}
<hr/> <hr/>
<!-- Gauges & Roofline per Subcluster--> <!-- Gauges & Roofline per Subcluster-->
{#if $statusQuery.fetching} {#if $statusQuery.fetching}