Extend bubbleRoofline for nodeData, add column to node table, rename nodeStats query

This commit is contained in:
Christoph Kluge
2025-07-18 18:12:07 +02:00
parent 5cdb80b4d6
commit 697acd1d88
8 changed files with 518 additions and 128 deletions

View File

@@ -12,12 +12,13 @@ type Node {
hostname: String!
cluster: String!
subCluster: String!
runningJobs: Int!
nodeState: NodeState!
HealthState: MonitoringState!
healthState: MonitoringState!
metaData: Any
}
type NodeStats {
type NodeStates {
state: String!
count: Int!
}
@@ -303,7 +304,7 @@ type Query {
## Node Queries New
node(id: ID!): Node
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodeStats(filter: [NodeFilter!]): [NodeStats!]!
nodeStates(filter: [NodeFilter!]): [NodeStates!]!
job(id: ID!): Job
jobMetrics(

View File

@@ -276,6 +276,7 @@ type ComplexityRoot struct {
ID func(childComplexity int) int
MetaData func(childComplexity int) int
NodeState func(childComplexity int) int
RunningJobs func(childComplexity int) int
SubCluster func(childComplexity int) int
}
@@ -290,7 +291,7 @@ type ComplexityRoot struct {
Items func(childComplexity int) int
}
NodeStats struct {
NodeStates struct {
Count func(childComplexity int) int
State func(childComplexity int) int
}
@@ -318,7 +319,7 @@ type ComplexityRoot struct {
Node func(childComplexity int, id string) int
NodeMetrics func(childComplexity int, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int
NodeMetricsList func(childComplexity int, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) int
NodeStats func(childComplexity int, filter []*model.NodeFilter) int
NodeStates func(childComplexity int, filter []*model.NodeFilter) int
Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int
RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int
ScopedJobStats func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int
@@ -444,6 +445,7 @@ type MutationResolver interface {
UpdateConfiguration(ctx context.Context, name string, value string) (*string, error)
}
type NodeResolver interface {
RunningJobs(ctx context.Context, obj *schema.Node) (int, error)
NodeState(ctx context.Context, obj *schema.Node) (string, error)
HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error)
MetaData(ctx context.Context, obj *schema.Node) (any, error)
@@ -456,7 +458,7 @@ type QueryResolver interface {
AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error)
Node(ctx context.Context, id string) (*schema.Node, error)
Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error)
NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error)
NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error)
Job(ctx context.Context, id string) (*schema.Job, error)
JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error)
JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error)
@@ -1474,7 +1476,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.Node.Cluster(childComplexity), true
case "Node.HealthState":
case "Node.healthState":
if e.complexity.Node.HealthState == nil {
break
}
@@ -1509,6 +1511,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.Node.NodeState(childComplexity), true
case "Node.runningJobs":
if e.complexity.Node.RunningJobs == nil {
break
}
return e.complexity.Node.RunningJobs(childComplexity), true
case "Node.subCluster":
if e.complexity.Node.SubCluster == nil {
break
@@ -1551,19 +1560,19 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.NodeStateResultList.Items(childComplexity), true
case "NodeStats.count":
if e.complexity.NodeStats.Count == nil {
case "NodeStates.count":
if e.complexity.NodeStates.Count == nil {
break
}
return e.complexity.NodeStats.Count(childComplexity), true
return e.complexity.NodeStates.Count(childComplexity), true
case "NodeStats.state":
if e.complexity.NodeStats.State == nil {
case "NodeStates.state":
if e.complexity.NodeStates.State == nil {
break
}
return e.complexity.NodeStats.State(childComplexity), true
return e.complexity.NodeStates.State(childComplexity), true
case "NodesResultList.count":
if e.complexity.NodesResultList.Count == nil {
@@ -1753,17 +1762,17 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int)), true
case "Query.nodeStats":
if e.complexity.Query.NodeStats == nil {
case "Query.nodeStates":
if e.complexity.Query.NodeStates == nil {
break
}
args, err := ec.field_Query_nodeStats_args(context.TODO(), rawArgs)
args, err := ec.field_Query_nodeStates_args(context.TODO(), rawArgs)
if err != nil {
return 0, false
}
return e.complexity.Query.NodeStats(childComplexity, args["filter"].([]*model.NodeFilter)), true
return e.complexity.Query.NodeStates(childComplexity, args["filter"].([]*model.NodeFilter)), true
case "Query.nodes":
if e.complexity.Query.Nodes == nil {
@@ -2333,12 +2342,13 @@ type Node {
hostname: String!
cluster: String!
subCluster: String!
runningJobs: Int!
nodeState: NodeState!
HealthState: MonitoringState!
healthState: MonitoringState!
metaData: Any
}
type NodeStats {
type NodeStates {
state: String!
count: Int!
}
@@ -2621,9 +2631,10 @@ type Query {
user(username: String!): User
allocatedNodes(cluster: String!): [Count!]!
## Node Queries New
node(id: ID!): Node
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodeStats(filter: [NodeFilter!]): [NodeStats!]!
nodeStates(filter: [NodeFilter!]): [NodeStates!]!
job(id: ID!): Job
jobMetrics(
@@ -2678,6 +2689,7 @@ type Query {
from: Time!
to: Time!
): [NodeMetrics!]!
nodeMetricsList(
cluster: String!
subCluster: String!
@@ -4062,17 +4074,17 @@ func (ec *executionContext) field_Query_nodeMetrics_argsTo(
return zeroVal, nil
}
func (ec *executionContext) field_Query_nodeStats_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) {
func (ec *executionContext) field_Query_nodeStates_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) {
var err error
args := map[string]any{}
arg0, err := ec.field_Query_nodeStats_argsFilter(ctx, rawArgs)
arg0, err := ec.field_Query_nodeStates_argsFilter(ctx, rawArgs)
if err != nil {
return nil, err
}
args["filter"] = arg0
return args, nil
}
func (ec *executionContext) field_Query_nodeStats_argsFilter(
func (ec *executionContext) field_Query_nodeStates_argsFilter(
ctx context.Context,
rawArgs map[string]any,
) ([]*model.NodeFilter, error) {
@@ -10985,6 +10997,50 @@ func (ec *executionContext) fieldContext_Node_subCluster(_ context.Context, fiel
return fc, nil
}
func (ec *executionContext) _Node_runningJobs(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Node_runningJobs(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return ec.resolvers.Node().RunningJobs(rctx, obj)
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
ec.Errorf(ctx, "must not be null")
}
return graphql.Null
}
res := resTmp.(int)
fc.Result = res
return ec.marshalNInt2int(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Node_runningJobs(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Node",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Int does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Node_nodeState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Node_nodeState(ctx, field)
if err != nil {
@@ -11029,8 +11085,8 @@ func (ec *executionContext) fieldContext_Node_nodeState(_ context.Context, field
return fc, nil
}
func (ec *executionContext) _Node_HealthState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Node_HealthState(ctx, field)
func (ec *executionContext) _Node_healthState(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Node_healthState(ctx, field)
if err != nil {
return graphql.Null
}
@@ -11060,7 +11116,7 @@ func (ec *executionContext) _Node_HealthState(ctx context.Context, field graphql
return ec.marshalNMonitoringState2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐNodeState(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Node_HealthState(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
func (ec *executionContext) fieldContext_Node_healthState(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Node",
Field: field,
@@ -11301,10 +11357,12 @@ func (ec *executionContext) fieldContext_NodeStateResultList_items(_ context.Con
return ec.fieldContext_Node_cluster(ctx, field)
case "subCluster":
return ec.fieldContext_Node_subCluster(ctx, field)
case "runningJobs":
return ec.fieldContext_Node_runningJobs(ctx, field)
case "nodeState":
return ec.fieldContext_Node_nodeState(ctx, field)
case "HealthState":
return ec.fieldContext_Node_HealthState(ctx, field)
case "healthState":
return ec.fieldContext_Node_healthState(ctx, field)
case "metaData":
return ec.fieldContext_Node_metaData(ctx, field)
}
@@ -11355,8 +11413,8 @@ func (ec *executionContext) fieldContext_NodeStateResultList_count(_ context.Con
return fc, nil
}
func (ec *executionContext) _NodeStats_state(ctx context.Context, field graphql.CollectedField, obj *model.NodeStats) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_NodeStats_state(ctx, field)
func (ec *executionContext) _NodeStates_state(ctx context.Context, field graphql.CollectedField, obj *model.NodeStates) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_NodeStates_state(ctx, field)
if err != nil {
return graphql.Null
}
@@ -11386,9 +11444,9 @@ func (ec *executionContext) _NodeStats_state(ctx context.Context, field graphql.
return ec.marshalNString2string(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_NodeStats_state(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
func (ec *executionContext) fieldContext_NodeStates_state(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "NodeStats",
Object: "NodeStates",
Field: field,
IsMethod: false,
IsResolver: false,
@@ -11399,8 +11457,8 @@ func (ec *executionContext) fieldContext_NodeStats_state(_ context.Context, fiel
return fc, nil
}
func (ec *executionContext) _NodeStats_count(ctx context.Context, field graphql.CollectedField, obj *model.NodeStats) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_NodeStats_count(ctx, field)
func (ec *executionContext) _NodeStates_count(ctx context.Context, field graphql.CollectedField, obj *model.NodeStates) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_NodeStates_count(ctx, field)
if err != nil {
return graphql.Null
}
@@ -11430,9 +11488,9 @@ func (ec *executionContext) _NodeStats_count(ctx context.Context, field graphql.
return ec.marshalNInt2int(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_NodeStats_count(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
func (ec *executionContext) fieldContext_NodeStates_count(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "NodeStats",
Object: "NodeStates",
Field: field,
IsMethod: false,
IsResolver: false,
@@ -12027,10 +12085,12 @@ func (ec *executionContext) fieldContext_Query_node(ctx context.Context, field g
return ec.fieldContext_Node_cluster(ctx, field)
case "subCluster":
return ec.fieldContext_Node_subCluster(ctx, field)
case "runningJobs":
return ec.fieldContext_Node_runningJobs(ctx, field)
case "nodeState":
return ec.fieldContext_Node_nodeState(ctx, field)
case "HealthState":
return ec.fieldContext_Node_HealthState(ctx, field)
case "healthState":
return ec.fieldContext_Node_healthState(ctx, field)
case "metaData":
return ec.fieldContext_Node_metaData(ctx, field)
}
@@ -12112,8 +12172,8 @@ func (ec *executionContext) fieldContext_Query_nodes(ctx context.Context, field
return fc, nil
}
func (ec *executionContext) _Query_nodeStats(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Query_nodeStats(ctx, field)
func (ec *executionContext) _Query_nodeStates(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Query_nodeStates(ctx, field)
if err != nil {
return graphql.Null
}
@@ -12126,7 +12186,7 @@ func (ec *executionContext) _Query_nodeStats(ctx context.Context, field graphql.
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return ec.resolvers.Query().NodeStats(rctx, fc.Args["filter"].([]*model.NodeFilter))
return ec.resolvers.Query().NodeStates(rctx, fc.Args["filter"].([]*model.NodeFilter))
})
if err != nil {
ec.Error(ctx, err)
@@ -12138,12 +12198,12 @@ func (ec *executionContext) _Query_nodeStats(ctx context.Context, field graphql.
}
return graphql.Null
}
res := resTmp.([]*model.NodeStats)
res := resTmp.([]*model.NodeStates)
fc.Result = res
return ec.marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatsᚄ(ctx, field.Selections, res)
return ec.marshalNNodeStates2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesᚄ(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Query_nodeStats(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
func (ec *executionContext) fieldContext_Query_nodeStates(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Query",
Field: field,
@@ -12152,11 +12212,11 @@ func (ec *executionContext) fieldContext_Query_nodeStats(ctx context.Context, fi
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
switch field.Name {
case "state":
return ec.fieldContext_NodeStats_state(ctx, field)
return ec.fieldContext_NodeStates_state(ctx, field)
case "count":
return ec.fieldContext_NodeStats_count(ctx, field)
return ec.fieldContext_NodeStates_count(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type NodeStats", field.Name)
return nil, fmt.Errorf("no field named %q was found under type NodeStates", field.Name)
},
}
defer func() {
@@ -12166,7 +12226,7 @@ func (ec *executionContext) fieldContext_Query_nodeStats(ctx context.Context, fi
}
}()
ctx = graphql.WithFieldContext(ctx, fc)
if fc.Args, err = ec.field_Query_nodeStats_args(ctx, field.ArgumentMap(ec.Variables)); err != nil {
if fc.Args, err = ec.field_Query_nodeStates_args(ctx, field.ArgumentMap(ec.Variables)); err != nil {
ec.Error(ctx, err)
return fc, err
}
@@ -19829,6 +19889,42 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj
if out.Values[i] == graphql.Null {
atomic.AddUint32(&out.Invalids, 1)
}
case "runningJobs":
field := field
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Node_runningJobs(ctx, field, obj)
if res == graphql.Null {
atomic.AddUint32(&fs.Invalids, 1)
}
return res
}
if field.Deferrable != nil {
dfs, ok := deferred[field.Deferrable.Label]
di := 0
if ok {
dfs.AddField(field)
di = len(dfs.Values) - 1
} else {
dfs = graphql.NewFieldSet([]graphql.CollectedField{field})
deferred[field.Deferrable.Label] = dfs
}
dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler {
return innerFunc(ctx, dfs)
})
// don't run the out.Concurrently() call below
out.Values[i] = graphql.Null
continue
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
case "nodeState":
field := field
@@ -19865,7 +19961,7 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
case "HealthState":
case "healthState":
field := field
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) {
@@ -19874,7 +19970,7 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Node_HealthState(ctx, field, obj)
res = ec._Node_healthState(ctx, field, obj)
if res == graphql.Null {
atomic.AddUint32(&fs.Invalids, 1)
}
@@ -20047,24 +20143,24 @@ func (ec *executionContext) _NodeStateResultList(ctx context.Context, sel ast.Se
return out
}
var nodeStatsImplementors = []string{"NodeStats"}
var nodeStatesImplementors = []string{"NodeStates"}
func (ec *executionContext) _NodeStats(ctx context.Context, sel ast.SelectionSet, obj *model.NodeStats) graphql.Marshaler {
fields := graphql.CollectFields(ec.OperationContext, sel, nodeStatsImplementors)
func (ec *executionContext) _NodeStates(ctx context.Context, sel ast.SelectionSet, obj *model.NodeStates) graphql.Marshaler {
fields := graphql.CollectFields(ec.OperationContext, sel, nodeStatesImplementors)
out := graphql.NewFieldSet(fields)
deferred := make(map[string]*graphql.FieldSet)
for i, field := range fields {
switch field.Name {
case "__typename":
out.Values[i] = graphql.MarshalString("NodeStats")
out.Values[i] = graphql.MarshalString("NodeStates")
case "state":
out.Values[i] = ec._NodeStats_state(ctx, field, obj)
out.Values[i] = ec._NodeStates_state(ctx, field, obj)
if out.Values[i] == graphql.Null {
out.Invalids++
}
case "count":
out.Values[i] = ec._NodeStats_count(ctx, field, obj)
out.Values[i] = ec._NodeStates_count(ctx, field, obj)
if out.Values[i] == graphql.Null {
out.Invalids++
}
@@ -20307,7 +20403,7 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) })
case "nodeStats":
case "nodeStates":
field := field
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) {
@@ -20316,7 +20412,7 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Query_nodeStats(ctx, field)
res = ec._Query_nodeStates(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&fs.Invalids, 1)
}
@@ -22961,7 +23057,7 @@ func (ec *executionContext) marshalNNodeStateResultList2ᚖgithubᚗcomᚋCluste
return ec._NodeStateResultList(ctx, sel, v)
}
func (ec *executionContext) marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatsᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.NodeStats) graphql.Marshaler {
func (ec *executionContext) marshalNNodeStates2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStatesᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.NodeStates) graphql.Marshaler {
ret := make(graphql.Array, len(v))
var wg sync.WaitGroup
isLen1 := len(v) == 1
@@ -22985,7 +23081,7 @@ func (ec *executionContext) marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpi
if !isLen1 {
defer wg.Done()
}
ret[i] = ec.marshalNNodeStats2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStats(ctx, sel, v[i])
ret[i] = ec.marshalNNodeStates2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStates(ctx, sel, v[i])
}
if isLen1 {
f(i)
@@ -23005,14 +23101,14 @@ func (ec *executionContext) marshalNNodeStats2ᚕᚖgithubᚗcomᚋClusterCockpi
return ret
}
func (ec *executionContext) marshalNNodeStats2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStats(ctx context.Context, sel ast.SelectionSet, v *model.NodeStats) graphql.Marshaler {
func (ec *executionContext) marshalNNodeStates2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStates(ctx context.Context, sel ast.SelectionSet, v *model.NodeStates) graphql.Marshaler {
if v == nil {
if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) {
ec.Errorf(ctx, "the requested element is null which the schema does not allow")
}
return graphql.Null
}
return ec._NodeStats(ctx, sel, v)
return ec._NodeStates(ctx, sel, v)
}
func (ec *executionContext) marshalNNodesResultList2githubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodesResultList(ctx context.Context, sel ast.SelectionSet, v model.NodesResultList) graphql.Marshaler {

View File

@@ -186,7 +186,7 @@ type NodeStateResultList struct {
Count *int `json:"count,omitempty"`
}
type NodeStats struct {
type NodeStates struct {
State string `json:"state"`
Count int `json:"count"`
}

View File

@@ -305,14 +305,20 @@ func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string,
return nil, nil
}
// NodeState is the resolver for the nodeState field.
func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) {
panic(fmt.Errorf("not implemented: NodeState - nodeState"))
// RunningJobs is the resolver for the runningJobs field.
func (r *nodeResolver) RunningJobs(ctx context.Context, obj *schema.Node) (int, error) {
panic(fmt.Errorf("not implemented: RunningJobs - runningJobs"))
}
// HealthState is the resolver for the HealthState field.
// NodeState is the resolver for the nodeState field.
func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) {
return string(obj.NodeState), nil
}
// HealthState is the resolver for the healthState field.
func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error) {
panic(fmt.Errorf("not implemented: HealthState - HealthState"))
// FIXME: Why is Output of schema.NodeState Type?
panic(fmt.Errorf("not implemented: HealthState - healthState"))
}
// MetaData is the resolver for the metaData field.
@@ -378,8 +384,8 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o
return &model.NodeStateResultList{Items: nodes, Count: &count}, err
}
// NodeStats is the resolver for the nodeStats field.
func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error) {
// NodeStates is the resolver for the nodeStates field.
func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) {
repo := repository.GetNodeRepository()
stateCounts, serr := repo.CountNodeStates(ctx, filter)
@@ -394,7 +400,7 @@ func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilte
return nil, herr
}
allCounts := make([]*model.NodeStats, 0)
allCounts := make([]*model.NodeStates, 0)
allCounts = append(stateCounts, healthCounts...)
return allCounts, nil

View File

@@ -3,6 +3,7 @@ CREATE TABLE "node" (
hostname VARCHAR(255) NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
jobs_running INTEGER DEFAULT 0 NOT NULL,
cpus_allocated INTEGER DEFAULT 0 NOT NULL,
cpus_total INTEGER DEFAULT 0 NOT NULL,
memory_allocated INTEGER DEFAULT 0 NOT NULL,

View File

@@ -50,8 +50,9 @@ func GetNodeRepository() *NodeRepository {
}
var nodeColumns []string = []string{
"node.id", "node.hostname", "node.cluster", "node.subcluster",
"node.node_state", "node.health_state", "node.meta_data",
// "node.id,"
"node.hostname", "node.cluster", "node.subcluster",
"node.node_state", "node.health_state", // "node.meta_data",
}
func (r *NodeRepository) FetchMetadata(node *schema.Node) (map[string]string, error) {
@@ -223,7 +224,7 @@ func (r *NodeRepository) DeleteNode(id int64) error {
func (r *NodeRepository) QueryNodes(
ctx context.Context,
filters []*model.NodeFilter,
order *model.OrderByInput,
order *model.OrderByInput, // Currently unused!
) ([]*schema.Node, error) {
query, qerr := AccessCheck(ctx, sq.Select(nodeColumns...).From("node"))
if qerr != nil {
@@ -296,7 +297,7 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
return nodeList, nil
}
func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStats, error) {
func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) {
query, qerr := AccessCheck(ctx, sq.Select("node_state AS state", "count(*) AS count").From("node"))
if qerr != nil {
return nil, qerr
@@ -327,13 +328,13 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N
return nil, err
}
nodes := make([]*model.NodeStats, 0)
nodes := make([]*model.NodeStates, 0)
for rows.Next() {
node := model.NodeStats{}
node := model.NodeStates{}
if err := rows.Scan(&node.State, &node.Count); err != nil {
rows.Close()
cclog.Warn("Error while scanning rows (NodeStats)")
cclog.Warn("Error while scanning rows (NodeStates)")
return nil, err
}
nodes = append(nodes, &node)
@@ -342,7 +343,7 @@ func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.N
return nodes, nil
}
func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStats, error) {
func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) {
query, qerr := AccessCheck(ctx, sq.Select("health_state AS state", "count(*) AS count").From("node"))
if qerr != nil {
return nil, qerr
@@ -373,13 +374,13 @@ func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model
return nil, err
}
nodes := make([]*model.NodeStats, 0)
nodes := make([]*model.NodeStates, 0)
for rows.Next() {
node := model.NodeStats{}
node := model.NodeStates{}
if err := rows.Scan(&node.State, &node.Count); err != nil {
rows.Close()
cclog.Warn("Error while scanning rows (NodeStats)")
cclog.Warn("Error while scanning rows (NodeStates)")
return nil, err
}
nodes = append(nodes, &node)

View File

@@ -31,8 +31,10 @@
let {
roofData = null,
jobsData = null,
allowSizeChange = false,
nodesData = null,
cluster = null,
subCluster = null,
allowSizeChange = false,
width = 600,
height = 380,
} = $props();
@@ -264,16 +266,43 @@
let filtTop = u.posToVal(-maxSize / 2, scaleY.key);
for (let i = 0; i < d[0].length; i++) {
// Color based on Duration, check index for transparency highlighting
u.ctx.strokeStyle = getRGB(u.data[2][i]);
u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill);
// Jobs: Color based on Duration
if (jobsData) {
u.ctx.strokeStyle = getRGB(u.data[2][i]);
u.ctx.fillStyle = getRGB(u.data[2][i], transparentFill);
// Nodes: Color based on Idle vs. Allocated
} else if (nodesData) {
// console.log('In Plot Handler NodesData', nodesData)
if (nodesData[i]?.nodeState == "idle") {
u.ctx.strokeStyle = "rgb(0, 0, 255)";
u.ctx.fillStyle = "rgba(0, 0, 255, 0.5)";
} else if (nodesData[i]?.nodeState == "allocated") {
u.ctx.strokeStyle = "rgb(0, 255, 0)";
u.ctx.fillStyle = "rgba(0, 255, 0, 0.5)";
} else if (nodesData[i]?.nodeState == "notindb") {
u.ctx.strokeStyle = "rgb(0, 0, 0)";
u.ctx.fillStyle = "rgba(0, 0, 0, 0.5)";
} else { // Fallback: All other DEFINED states
u.ctx.strokeStyle = "rgb(255, 0, 0)";
u.ctx.fillStyle = "rgba(255, 0, 0, 0.5)";
}
}
// Get Values
let xVal = d[0][i];
let yVal = d[1][i];
// Calc Size; Alt.: size = sizes[i] * pxRatio
const size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes); // In NodeMode: Scale with Number of Jobs?
// Calc Size; Alt.: size = sizes[i] * pxRatio
let size = 1;
// Jobs: Size based on Resourcecount
if (jobsData) {
size = sizeBase + (jobsData[i]?.numAcc ? jobsData[i].numAcc / 2 : jobsData[i].numNodes)
// Nodes: Size based on Jobcount
} else if (nodesData) {
size = sizeBase + nodesData[i]?.numJobs
};
if (xVal >= filtLft && xVal <= filtRgt && yVal >= filtBtm && yVal <= filtTop) {
let cx = valToPosX(xVal, scaleX, xDim, xOff);
let cy = valToPosY(yVal, scaleY, yDim, yOff);
@@ -338,7 +367,7 @@
};
// Tooltip Plugin
function tooltipPlugin({onclick, getJobData, shiftX = 10, shiftY = 10}) {
function tooltipPlugin({onclick, getLegendData, shiftX = 10, shiftY = 10}) {
let tooltipLeftOffset = 0;
let tooltipTopOffset = 0;
@@ -388,11 +417,34 @@
tooltip.style.top = (tooltipTopOffset + top + shiftX) + "px";
tooltip.style.left = (tooltipLeftOffset + lft + shiftY) + "px";
tooltip.style.borderColor = getRGB(u.data[2][i]);
tooltip.textContent = (
// Tooltip Content as String
`Job ID: ${getJobData(u, i).jobId}\nNodes: ${getJobData(u, i).numNodes}${getJobData(u, i)?.numAcc?`\nAccelerators: ${getJobData(u, i).numAcc}`:''}`
);
// Jobs: Color based on Duration
if (jobsData) {
tooltip.style.borderColor = getRGB(u.data[2][i]);
// Nodes: Color based on Idle vs. Allocated
} else if (nodesData) {
if (nodesData[i]?.nodeState == "idle") {
tooltip.style.borderColor = "rgb(0, 0, 255)";
} else if (nodesData[i]?.nodeState == "allocated") {
tooltip.style.borderColor = "rgb(0, 255, 0)";
} else if (nodesData[i]?.nodeState == "notindb") { // Missing from DB table
tooltip.style.borderColor = "rgb(0, 0, 0)";
} else { // Fallback: All other DEFINED states
tooltip.style.borderColor = "rgb(255, 0, 0)";
}
}
if (jobsData) {
tooltip.textContent = (
// Tooltip Content as String for Job
`Job ID: ${getLegendData(u, i).jobId}\nNodes: ${getLegendData(u, i).numNodes}${getLegendData(u, i)?.numAcc?`\nAccelerators: ${getLegendData(u, i).numAcc}`:''}`
);
} else if (nodesData) {
tooltip.textContent = (
// Tooltip Content as String for Node
`Host: ${getLegendData(u, i).nodeName}\nState: ${getLegendData(u, i).nodeState}\nJobs: ${getLegendData(u, i).numJobs}`
);
}
}
return {
@@ -444,14 +496,18 @@
timeoutId = setTimeout(() => {
timeoutId = null;
if (uplot) uplot.destroy();
render(roofData, jobsData);
render(roofData, jobsData, nodesData);
}, 200);
}
function render(roofdata, jobsData) {
if (roofdata) {
function render(roofData, jobsData, nodesData) {
let plotTitle = "CPU Roofline Diagram";
if (jobsData) plotTitle = "Job Average Roofline Diagram";
if (nodesData) plotTitle = "Node Average Roofline Diagram";
if (roofData) {
const opts = {
title: "Job Average Roofline Diagram",
title: plotTitle,
mode: 2,
width: width,
height: height,
@@ -669,35 +725,87 @@
u.ctx.lineWidth = 0.15;
}
// The Color Scale For Time Information
const posX = u.valToPos(0.1, "x", true)
const posXLimit = u.valToPos(100, "x", true)
const posY = u.valToPos(14000.0, "y", true)
u.ctx.fillStyle = 'black'
u.ctx.fillText('Short', posX, posY)
const start = posX + 10
for (let x = start; x < posXLimit; x += 10) {
let c = (x - start) / (posXLimit - start)
u.ctx.fillStyle = getRGB(c)
u.ctx.beginPath()
u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
// Jobs: The Color Scale For Time Information
if (jobsData) {
const posX = u.valToPos(0.1, "x", true)
const posXLimit = u.valToPos(100, "x", true)
const posY = u.valToPos(14000.0, "y", true)
u.ctx.fillStyle = 'black'
u.ctx.fillText('Short', posX, posY)
const start = posX + 10
for (let x = start; x < posXLimit; x += 10) {
let c = (x - start) / (posXLimit - start)
u.ctx.fillStyle = getRGB(c)
u.ctx.beginPath()
u.ctx.arc(x, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
}
u.ctx.fillStyle = 'black'
u.ctx.fillText('Long', posXLimit + 23, posY)
}
// Nodes: The Colors Of NodeStates (Just 3)
if (nodesData) {
const posY = u.valToPos(14000.0, "y", true)
const posAllocDot = u.valToPos(0.1, "x", true)
const posAllocText = posAllocDot + 60
u.ctx.fillStyle = "rgb(0, 255, 0)"
u.ctx.beginPath()
u.ctx.arc(posAllocDot, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
u.ctx.fillStyle = 'black'
u.ctx.fillText('Allocated', posAllocText, posY)
const posIdleDot = posAllocDot + 150
const posIdleText = posAllocText + 120
u.ctx.fillStyle = "rgb(0, 0, 255)"
u.ctx.beginPath()
u.ctx.arc(posIdleDot, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
u.ctx.fillStyle = 'black'
u.ctx.fillText('Idle', posIdleText, posY)
const posOtherDot = posIdleDot + 150
const posOtherText = posIdleText + 160
u.ctx.fillStyle = "rgb(255, 0, 0)"
u.ctx.beginPath()
u.ctx.arc(posOtherDot, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
u.ctx.fillStyle = 'black'
u.ctx.fillText('Other', posOtherText, posY)
const posMissingDot = posOtherDot + 150
const posMissingText = posOtherText + 190
u.ctx.fillStyle = 'black'
u.ctx.beginPath()
u.ctx.arc(posMissingDot, posY, 3, 0, Math.PI * 2, false)
u.ctx.fill()
u.ctx.fillText('Missing in DB', posMissingText, posY)
}
u.ctx.fillStyle = 'black'
u.ctx.fillText('Long', posXLimit + 23, posY)
},
],
},
plugins: [
tooltipPlugin({
onclick(u, dataIdx) {
window.open(`/monitoring/job/${jobsData[dataIdx].id}`);
if (jobsData) {
window.open(`/monitoring/job/${jobsData[dataIdx].id}`)
} else if (nodesData) {
window.open(`/monitoring/node/${cluster}/${nodesData[dataIdx].nodeName}`)
}
},
getJobData: (u, dataIdx) => { return jobsData[dataIdx] }
getLegendData: (u, dataIdx) => {
if (jobsData) {
return jobsData[dataIdx]
} else if (nodesData) {
return nodesData[dataIdx]
}
}
}),
],
};
uplot = new uPlot(opts, roofdata, plotWrapper);
uplot = new uPlot(opts, roofData, plotWrapper);
} else {
// console.log("No data for roofline!");
}
@@ -705,7 +813,7 @@
/* On Mount */
onMount(() => {
render(roofData, jobsData);
render(roofData, jobsData, nodesData);
});
/* On Destroy */

View File

@@ -31,9 +31,11 @@
const client = getContextClient();
/* State Init */
// let from = $state(new Date(Date.now() - 5 * 60 * 1000));
// let to = $state(new Date(Date.now()));
let from = $state(new Date(Date.now() - 5 * 60 * 1000));
let to = $state(new Date(Date.now()));
let plotWidths = $state([]);
let nodesCounts = $state({});
let jobsJounts = $state({});
/* Derived */
// Note: nodeMetrics are requested on configured $timestep resolution
@@ -64,6 +66,123 @@
},
}));
// Optimal new query, does not exist
// const nodeRoofQuery = $derived(queryStore({
// client: client,
// query: gql`
// query ($filter: [JobFilter!]!, $metrics: [String!]!) {
// nodeRoofline(filter: $filter, metrics: $metrics) {
// nodeName
// nodeState
// numJobs
// stats {
// name
// data {
// avg
// }
// }
// }
// }
// `,
// variables: {
// filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
// metrics: ["flops_any", "mem_bw"], // Fixed names for job roofline
// },
// }));
// Load Required Roofline Data Averages for all nodes of cluster: use for node avg data and name, use secondary (new?) querie(s) for slurmstate and numjobs
const nodesData = $derived(queryStore({
client: client,
query: gql`
query ($cluster: String!, $metrics: [String!], $from: Time!, $to: Time!) {
nodeMetrics(
cluster: $cluster
metrics: $metrics
from: $from
to: $to
) {
host
subCluster
metrics {
name
metric {
series {
statistics {
avg
}
}
}
}
}
}
`,
variables: {
cluster: cluster,
metrics: ["flops_any", "mem_bw"],
from: from,
to: to,
},
}));
// Load for jobcount per node only -- might me required for total running jobs anyways in parent component!
// Also, think about extra query with only TotalJobCount and Items [Resources, ...some meta infos], not including metric data
const paging = { itemsPerPage: 1500, page: 1 };
const sorting = { field: "startTime", type: "col", order: "DESC" };
const filter = [
{ cluster: { eq: cluster } },
{ state: ["running"] },
];
const nodeJobsQuery = gql`
query (
$filter: [JobFilter!]!
$sorting: OrderByInput!
$paging: PageRequest!
) {
jobs(filter: $filter, order: $sorting, page: $paging) {
items {
jobId
resources {
hostname
}
}
count
}
}
`;
const nodesJobs = $derived(queryStore({
client: client,
query: nodeJobsQuery,
variables: { paging, sorting, filter },
})
);
// Last required query: Node State
const nodesState = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [NodeFilter!]
$sorting: OrderByInput
) {
nodes(filter: $filter, order: $sorting) {
count
items {
hostname
cluster
subCluster
nodeState
}
}
}
`,
variables: {
filter: { cluster: { eq: cluster }},
sorting: sorting // Unused in Backend: Use Placeholder
// Subcluster filter?
},
}));
/* Function */
function transformJobsStatsToData(subclusterData) {
/* c will contain values from 0 to 1 representing the duration */
@@ -90,7 +209,7 @@
else c.push(d)
}
} else {
console.warn("transformData: metrics for 'mem_bw' and/or 'flops_any' missing!")
console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0 && c.length > 0) {
@@ -99,15 +218,69 @@
return data
}
function transformNodesStatsToData(subclusterData) {
let data = null
const x = [], y = []
if (subclusterData) {
for (let i = 0; i < subclusterData.length; i++) {
const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any")
const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw")
const f = flopsData.metric.series[0].statistics.avg
const m = memBwData.metric.series[0].statistics.avg
let intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity)) {
// continue // Old: Introduces mismatch between Data and Info Arrays
intensity = 0.0 // New: Set to Float Zero: Will not show in Log-Plot (Always below render limit)
}
x.push(intensity)
y.push(f)
}
} else {
// console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0) {
data = [null, [x, y]] // for dataformat see roofline.svelte
}
return data
}
function transformJobsStatsToInfo(subclusterData) {
if (subclusterData) {
return subclusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0} })
} else {
console.warn("transformData: jobInfo missing!")
console.warn("transformJobsStatsToInfo: jobInfo missing!")
return []
}
}
function transformNodesStatsToInfo(subClusterData) {
let result = [];
if (subClusterData && $nodesState?.data) {
// Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API!
for (let j = 0; j < subClusterData.length; j++) {
// nodesCounts[subClusterData[i].subCluster] = $nodesState.data.nodes.count; // Probably better as own derived!
const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown"
const nodeMatch = $nodesState.data.nodes.items.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster);
const nodeState = nodeMatch?.nodeState ? nodeMatch.nodeState : "notindb"
let numJobs = 0
if ($nodesJobs?.data) {
const nodeJobs = $nodesJobs.data.jobs.items.filter((job) => job.resources.find((res) => res.hostname == nodeName))
numJobs = nodeJobs?.length ? nodeJobs.length : 0
}
result.push({nodeName: nodeName, nodeState: nodeState, numJobs: numJobs})
};
};
return result
}
</script>
<!-- Gauges & Roofline per Subcluster-->
@@ -115,19 +288,23 @@
{#each $initq.data.clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
<Row cols={{ lg: 2, md: 2 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3 mt-2 mt-lg-0">
<b>Classic</b>
<b>Bubble Node</b>
<div bind:clientWidth={plotWidths[i]}>
{#key $jobRoofQuery.data.jobsMetricStats}
{#key $nodesData?.data?.nodeMetrics || $nodesJobs?.data?.jobs}
<b>{subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter(
(data) => data.subCluster == subCluster.name,
).length} Jobs</b>
<Roofline
<NewBubbleRoofline
allowSizeChange
renderTime
width={plotWidths[i] - 10}
height={300}
cluster={cluster}
subCluster={subCluster}
data={transformJobsStatsToData($jobRoofQuery?.data?.jobsMetricStats.filter(
roofData={transformNodesStatsToData($nodesData?.data?.nodeMetrics.filter(
(data) => data.subCluster == subCluster.name,
)
)}
nodesData={transformNodesStatsToInfo($nodesData?.data?.nodeMetrics.filter(
(data) => data.subCluster == subCluster.name,
)
)}
@@ -136,7 +313,7 @@
</div>
</Col>
<Col class="px-3 mt-2 mt-lg-0">
<b>Bubble</b>
<b>Bubble Jobs</b>
<div bind:clientWidth={plotWidths[i]}>
{#key $jobRoofQuery.data.jobsMetricStats}
<b>{subCluster.name} Total: {$jobRoofQuery.data.jobsMetricStats.filter(