Add public dashboard and route, add DoubleMetricPlot and GQL queries

- add roofline legend display switch
- small fixes
This commit is contained in:
Christoph Kluge
2025-12-09 10:26:55 +01:00
parent 967f0a3294
commit 4083de2a51
23 changed files with 2918 additions and 96 deletions

View File

@@ -164,6 +164,13 @@ type JobMetricWithName {
metric: JobMetric! metric: JobMetric!
} }
type ClusterMetricWithName {
name: String!
unit: Unit
timestep: Int!
data: [NullableFloat!]!
}
type JobMetric { type JobMetric {
unit: Unit unit: Unit
timestep: Int! timestep: Int!
@@ -267,6 +274,11 @@ type NodeMetrics {
metrics: [JobMetricWithName!]! metrics: [JobMetricWithName!]!
} }
type ClusterMetrics {
nodeCount: Int!
metrics: [ClusterMetricWithName!]!
}
type NodesResultList { type NodesResultList {
items: [NodeMetrics!]! items: [NodeMetrics!]!
offset: Int offset: Int
@@ -385,6 +397,13 @@ type Query {
page: PageRequest page: PageRequest
resolution: Int resolution: Int
): NodesResultList! ): NodesResultList!
clusterMetrics(
cluster: String!
metrics: [String!]
from: Time!
to: Time!
): ClusterMetrics!
} }
type Mutation { type Mutation {

View File

@@ -66,6 +66,18 @@ type ComplexityRoot struct {
SubClusters func(childComplexity int) int SubClusters func(childComplexity int) int
} }
ClusterMetricWithName struct {
Data func(childComplexity int) int
Name func(childComplexity int) int
Timestep func(childComplexity int) int
Unit func(childComplexity int) int
}
ClusterMetrics struct {
Metrics func(childComplexity int) int
NodeCount func(childComplexity int) int
}
ClusterSupport struct { ClusterSupport struct {
Cluster func(childComplexity int) int Cluster func(childComplexity int) int
SubClusters func(childComplexity int) int SubClusters func(childComplexity int) int
@@ -319,6 +331,7 @@ type ComplexityRoot struct {
Query struct { Query struct {
AllocatedNodes func(childComplexity int, cluster string) int AllocatedNodes func(childComplexity int, cluster string) int
ClusterMetrics func(childComplexity int, cluster string, metrics []string, from time.Time, to time.Time) int
Clusters func(childComplexity int) int Clusters func(childComplexity int) int
GlobalMetrics func(childComplexity int) int GlobalMetrics func(childComplexity int) int
Job func(childComplexity int, id string) int Job func(childComplexity int, id string) int
@@ -485,6 +498,7 @@ type QueryResolver interface {
RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error)
NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error)
NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error)
ClusterMetrics(ctx context.Context, cluster string, metrics []string, from time.Time, to time.Time) (*model.ClusterMetrics, error)
} }
type SubClusterResolver interface { type SubClusterResolver interface {
NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error)
@@ -551,6 +565,48 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return e.complexity.Cluster.SubClusters(childComplexity), true return e.complexity.Cluster.SubClusters(childComplexity), true
case "ClusterMetricWithName.data":
if e.complexity.ClusterMetricWithName.Data == nil {
break
}
return e.complexity.ClusterMetricWithName.Data(childComplexity), true
case "ClusterMetricWithName.name":
if e.complexity.ClusterMetricWithName.Name == nil {
break
}
return e.complexity.ClusterMetricWithName.Name(childComplexity), true
case "ClusterMetricWithName.timestep":
if e.complexity.ClusterMetricWithName.Timestep == nil {
break
}
return e.complexity.ClusterMetricWithName.Timestep(childComplexity), true
case "ClusterMetricWithName.unit":
if e.complexity.ClusterMetricWithName.Unit == nil {
break
}
return e.complexity.ClusterMetricWithName.Unit(childComplexity), true
case "ClusterMetrics.metrics":
if e.complexity.ClusterMetrics.Metrics == nil {
break
}
return e.complexity.ClusterMetrics.Metrics(childComplexity), true
case "ClusterMetrics.nodeCount":
if e.complexity.ClusterMetrics.NodeCount == nil {
break
}
return e.complexity.ClusterMetrics.NodeCount(childComplexity), true
case "ClusterSupport.cluster": case "ClusterSupport.cluster":
if e.complexity.ClusterSupport.Cluster == nil { if e.complexity.ClusterSupport.Cluster == nil {
break break
@@ -1699,6 +1755,18 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return e.complexity.Query.AllocatedNodes(childComplexity, args["cluster"].(string)), true return e.complexity.Query.AllocatedNodes(childComplexity, args["cluster"].(string)), true
case "Query.clusterMetrics":
if e.complexity.Query.ClusterMetrics == nil {
break
}
args, err := ec.field_Query_clusterMetrics_args(ctx, rawArgs)
if err != nil {
return 0, false
}
return e.complexity.Query.ClusterMetrics(childComplexity, args["cluster"].(string), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time)), true
case "Query.clusters": case "Query.clusters":
if e.complexity.Query.Clusters == nil { if e.complexity.Query.Clusters == nil {
break break
@@ -2577,6 +2645,13 @@ type JobMetricWithName {
metric: JobMetric! metric: JobMetric!
} }
type ClusterMetricWithName {
name: String!
unit: Unit
timestep: Int!
data: [NullableFloat!]!
}
type JobMetric { type JobMetric {
unit: Unit unit: Unit
timestep: Int! timestep: Int!
@@ -2680,6 +2755,11 @@ type NodeMetrics {
metrics: [JobMetricWithName!]! metrics: [JobMetricWithName!]!
} }
type ClusterMetrics {
nodeCount: Int!
metrics: [ClusterMetricWithName!]!
}
type NodesResultList { type NodesResultList {
items: [NodeMetrics!]! items: [NodeMetrics!]!
offset: Int offset: Int
@@ -2798,6 +2878,13 @@ type Query {
page: PageRequest page: PageRequest
resolution: Int resolution: Int
): NodesResultList! ): NodesResultList!
clusterMetrics(
cluster: String!
metrics: [String!]
from: Time!
to: Time!
): ClusterMetrics!
} }
type Mutation { type Mutation {
@@ -3074,6 +3161,32 @@ func (ec *executionContext) field_Query_allocatedNodes_args(ctx context.Context,
return args, nil return args, nil
} }
func (ec *executionContext) field_Query_clusterMetrics_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) {
var err error
args := map[string]any{}
arg0, err := graphql.ProcessArgField(ctx, rawArgs, "cluster", ec.unmarshalNString2string)
if err != nil {
return nil, err
}
args["cluster"] = arg0
arg1, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ)
if err != nil {
return nil, err
}
args["metrics"] = arg1
arg2, err := graphql.ProcessArgField(ctx, rawArgs, "from", ec.unmarshalNTime2timeᚐTime)
if err != nil {
return nil, err
}
args["from"] = arg2
arg3, err := graphql.ProcessArgField(ctx, rawArgs, "to", ec.unmarshalNTime2timeᚐTime)
if err != nil {
return nil, err
}
args["to"] = arg3
return args, nil
}
func (ec *executionContext) field_Query_jobMetrics_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) { func (ec *executionContext) field_Query_jobMetrics_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) {
var err error var err error
args := map[string]any{} args := map[string]any{}
@@ -3784,6 +3897,283 @@ func (ec *executionContext) fieldContext_Cluster_subClusters(_ context.Context,
return fc, nil return fc, nil
} }
func (ec *executionContext) _ClusterMetricWithName_name(ctx context.Context, field graphql.CollectedField, obj *model.ClusterMetricWithName) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_ClusterMetricWithName_name(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return obj.Name, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
ec.Errorf(ctx, "must not be null")
}
return graphql.Null
}
res := resTmp.(string)
fc.Result = res
return ec.marshalNString2string(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_ClusterMetricWithName_name(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "ClusterMetricWithName",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type String does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _ClusterMetricWithName_unit(ctx context.Context, field graphql.CollectedField, obj *model.ClusterMetricWithName) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_ClusterMetricWithName_unit(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return obj.Unit, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(*schema.Unit)
fc.Result = res
return ec.marshalOUnit2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐUnit(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_ClusterMetricWithName_unit(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "ClusterMetricWithName",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
switch field.Name {
case "base":
return ec.fieldContext_Unit_base(ctx, field)
case "prefix":
return ec.fieldContext_Unit_prefix(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type Unit", field.Name)
},
}
return fc, nil
}
func (ec *executionContext) _ClusterMetricWithName_timestep(ctx context.Context, field graphql.CollectedField, obj *model.ClusterMetricWithName) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_ClusterMetricWithName_timestep(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return obj.Timestep, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
ec.Errorf(ctx, "must not be null")
}
return graphql.Null
}
res := resTmp.(int)
fc.Result = res
return ec.marshalNInt2int(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_ClusterMetricWithName_timestep(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "ClusterMetricWithName",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Int does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _ClusterMetricWithName_data(ctx context.Context, field graphql.CollectedField, obj *model.ClusterMetricWithName) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_ClusterMetricWithName_data(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return obj.Data, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
ec.Errorf(ctx, "must not be null")
}
return graphql.Null
}
res := resTmp.([]schema.Float)
fc.Result = res
return ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐFloatᚄ(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_ClusterMetricWithName_data(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "ClusterMetricWithName",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type NullableFloat does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _ClusterMetrics_nodeCount(ctx context.Context, field graphql.CollectedField, obj *model.ClusterMetrics) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_ClusterMetrics_nodeCount(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return obj.NodeCount, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
ec.Errorf(ctx, "must not be null")
}
return graphql.Null
}
res := resTmp.(int)
fc.Result = res
return ec.marshalNInt2int(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_ClusterMetrics_nodeCount(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "ClusterMetrics",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Int does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _ClusterMetrics_metrics(ctx context.Context, field graphql.CollectedField, obj *model.ClusterMetrics) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_ClusterMetrics_metrics(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return obj.Metrics, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
ec.Errorf(ctx, "must not be null")
}
return graphql.Null
}
res := resTmp.([]*model.ClusterMetricWithName)
fc.Result = res
return ec.marshalNClusterMetricWithName2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐClusterMetricWithNameᚄ(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_ClusterMetrics_metrics(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "ClusterMetrics",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
switch field.Name {
case "name":
return ec.fieldContext_ClusterMetricWithName_name(ctx, field)
case "unit":
return ec.fieldContext_ClusterMetricWithName_unit(ctx, field)
case "timestep":
return ec.fieldContext_ClusterMetricWithName_timestep(ctx, field)
case "data":
return ec.fieldContext_ClusterMetricWithName_data(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type ClusterMetricWithName", field.Name)
},
}
return fc, nil
}
func (ec *executionContext) _ClusterSupport_cluster(ctx context.Context, field graphql.CollectedField, obj *schema.ClusterSupport) (ret graphql.Marshaler) { func (ec *executionContext) _ClusterSupport_cluster(ctx context.Context, field graphql.CollectedField, obj *schema.ClusterSupport) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_ClusterSupport_cluster(ctx, field) fc, err := ec.fieldContext_ClusterSupport_cluster(ctx, field)
if err != nil { if err != nil {
@@ -12353,6 +12743,67 @@ func (ec *executionContext) fieldContext_Query_nodeMetricsList(ctx context.Conte
return fc, nil return fc, nil
} }
func (ec *executionContext) _Query_clusterMetrics(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Query_clusterMetrics(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return ec.resolvers.Query().ClusterMetrics(rctx, fc.Args["cluster"].(string), fc.Args["metrics"].([]string), fc.Args["from"].(time.Time), fc.Args["to"].(time.Time))
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
ec.Errorf(ctx, "must not be null")
}
return graphql.Null
}
res := resTmp.(*model.ClusterMetrics)
fc.Result = res
return ec.marshalNClusterMetrics2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐClusterMetrics(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Query_clusterMetrics(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Query",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
switch field.Name {
case "nodeCount":
return ec.fieldContext_ClusterMetrics_nodeCount(ctx, field)
case "metrics":
return ec.fieldContext_ClusterMetrics_metrics(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type ClusterMetrics", field.Name)
},
}
defer func() {
if r := recover(); r != nil {
err = ec.Recover(ctx, r)
ec.Error(ctx, err)
}
}()
ctx = graphql.WithFieldContext(ctx, fc)
if fc.Args, err = ec.field_Query_clusterMetrics_args(ctx, field.ArgumentMap(ec.Variables)); err != nil {
ec.Error(ctx, err)
return fc, err
}
return fc, nil
}
func (ec *executionContext) _Query___type(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { func (ec *executionContext) _Query___type(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Query___type(ctx, field) fc, err := ec.fieldContext_Query___type(ctx, field)
if err != nil { if err != nil {
@@ -17527,6 +17978,101 @@ func (ec *executionContext) _Cluster(ctx context.Context, sel ast.SelectionSet,
return out return out
} }
var clusterMetricWithNameImplementors = []string{"ClusterMetricWithName"}
func (ec *executionContext) _ClusterMetricWithName(ctx context.Context, sel ast.SelectionSet, obj *model.ClusterMetricWithName) graphql.Marshaler {
fields := graphql.CollectFields(ec.OperationContext, sel, clusterMetricWithNameImplementors)
out := graphql.NewFieldSet(fields)
deferred := make(map[string]*graphql.FieldSet)
for i, field := range fields {
switch field.Name {
case "__typename":
out.Values[i] = graphql.MarshalString("ClusterMetricWithName")
case "name":
out.Values[i] = ec._ClusterMetricWithName_name(ctx, field, obj)
if out.Values[i] == graphql.Null {
out.Invalids++
}
case "unit":
out.Values[i] = ec._ClusterMetricWithName_unit(ctx, field, obj)
case "timestep":
out.Values[i] = ec._ClusterMetricWithName_timestep(ctx, field, obj)
if out.Values[i] == graphql.Null {
out.Invalids++
}
case "data":
out.Values[i] = ec._ClusterMetricWithName_data(ctx, field, obj)
if out.Values[i] == graphql.Null {
out.Invalids++
}
default:
panic("unknown field " + strconv.Quote(field.Name))
}
}
out.Dispatch(ctx)
if out.Invalids > 0 {
return graphql.Null
}
atomic.AddInt32(&ec.deferred, int32(len(deferred)))
for label, dfs := range deferred {
ec.processDeferredGroup(graphql.DeferredGroup{
Label: label,
Path: graphql.GetPath(ctx),
FieldSet: dfs,
Context: ctx,
})
}
return out
}
var clusterMetricsImplementors = []string{"ClusterMetrics"}
func (ec *executionContext) _ClusterMetrics(ctx context.Context, sel ast.SelectionSet, obj *model.ClusterMetrics) graphql.Marshaler {
fields := graphql.CollectFields(ec.OperationContext, sel, clusterMetricsImplementors)
out := graphql.NewFieldSet(fields)
deferred := make(map[string]*graphql.FieldSet)
for i, field := range fields {
switch field.Name {
case "__typename":
out.Values[i] = graphql.MarshalString("ClusterMetrics")
case "nodeCount":
out.Values[i] = ec._ClusterMetrics_nodeCount(ctx, field, obj)
if out.Values[i] == graphql.Null {
out.Invalids++
}
case "metrics":
out.Values[i] = ec._ClusterMetrics_metrics(ctx, field, obj)
if out.Values[i] == graphql.Null {
out.Invalids++
}
default:
panic("unknown field " + strconv.Quote(field.Name))
}
}
out.Dispatch(ctx)
if out.Invalids > 0 {
return graphql.Null
}
atomic.AddInt32(&ec.deferred, int32(len(deferred)))
for label, dfs := range deferred {
ec.processDeferredGroup(graphql.DeferredGroup{
Label: label,
Path: graphql.GetPath(ctx),
FieldSet: dfs,
Context: ctx,
})
}
return out
}
var clusterSupportImplementors = []string{"ClusterSupport"} var clusterSupportImplementors = []string{"ClusterSupport"}
func (ec *executionContext) _ClusterSupport(ctx context.Context, sel ast.SelectionSet, obj *schema.ClusterSupport) graphql.Marshaler { func (ec *executionContext) _ClusterSupport(ctx context.Context, sel ast.SelectionSet, obj *schema.ClusterSupport) graphql.Marshaler {
@@ -20101,6 +20647,28 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
} }
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) })
case "clusterMetrics":
field := field
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Query_clusterMetrics(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&fs.Invalids, 1)
}
return res
}
rrm := func(ctx context.Context) graphql.Marshaler {
return ec.OperationContext.RootResolverMiddleware(ctx,
func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) }) out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) })
case "__type": case "__type":
out.Values[i] = ec.OperationContext.RootResolverMiddleware(innerCtx, func(ctx context.Context) (res graphql.Marshaler) { out.Values[i] = ec.OperationContext.RootResolverMiddleware(innerCtx, func(ctx context.Context) (res graphql.Marshaler) {
@@ -21205,6 +21773,74 @@ func (ec *executionContext) marshalNCluster2ᚖgithubᚗcomᚋClusterCockpitᚋc
return ec._Cluster(ctx, sel, v) return ec._Cluster(ctx, sel, v)
} }
func (ec *executionContext) marshalNClusterMetricWithName2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐClusterMetricWithNameᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.ClusterMetricWithName) graphql.Marshaler {
ret := make(graphql.Array, len(v))
var wg sync.WaitGroup
isLen1 := len(v) == 1
if !isLen1 {
wg.Add(len(v))
}
for i := range v {
i := i
fc := &graphql.FieldContext{
Index: &i,
Result: &v[i],
}
ctx := graphql.WithFieldContext(ctx, fc)
f := func(i int) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = nil
}
}()
if !isLen1 {
defer wg.Done()
}
ret[i] = ec.marshalNClusterMetricWithName2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐClusterMetricWithName(ctx, sel, v[i])
}
if isLen1 {
f(i)
} else {
go f(i)
}
}
wg.Wait()
for _, e := range ret {
if e == graphql.Null {
return graphql.Null
}
}
return ret
}
func (ec *executionContext) marshalNClusterMetricWithName2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐClusterMetricWithName(ctx context.Context, sel ast.SelectionSet, v *model.ClusterMetricWithName) graphql.Marshaler {
if v == nil {
if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) {
ec.Errorf(ctx, "the requested element is null which the schema does not allow")
}
return graphql.Null
}
return ec._ClusterMetricWithName(ctx, sel, v)
}
func (ec *executionContext) marshalNClusterMetrics2githubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐClusterMetrics(ctx context.Context, sel ast.SelectionSet, v model.ClusterMetrics) graphql.Marshaler {
return ec._ClusterMetrics(ctx, sel, &v)
}
func (ec *executionContext) marshalNClusterMetrics2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐClusterMetrics(ctx context.Context, sel ast.SelectionSet, v *model.ClusterMetrics) graphql.Marshaler {
if v == nil {
if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) {
ec.Errorf(ctx, "the requested element is null which the schema does not allow")
}
return graphql.Null
}
return ec._ClusterMetrics(ctx, sel, v)
}
func (ec *executionContext) marshalNClusterSupport2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐClusterSupport(ctx context.Context, sel ast.SelectionSet, v schema.ClusterSupport) graphql.Marshaler { func (ec *executionContext) marshalNClusterSupport2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐClusterSupport(ctx context.Context, sel ast.SelectionSet, v schema.ClusterSupport) graphql.Marshaler {
return ec._ClusterSupport(ctx, sel, &v) return ec._ClusterSupport(ctx, sel, &v)
} }
@@ -24142,6 +24778,13 @@ func (ec *executionContext) marshalOUnit2githubᚗcomᚋClusterCockpitᚋccᚑli
return ec._Unit(ctx, sel, &v) return ec._Unit(ctx, sel, &v)
} }
func (ec *executionContext) marshalOUnit2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐUnit(ctx context.Context, sel ast.SelectionSet, v *schema.Unit) graphql.Marshaler {
if v == nil {
return graphql.Null
}
return ec._Unit(ctx, sel, v)
}
func (ec *executionContext) marshalOUser2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐUser(ctx context.Context, sel ast.SelectionSet, v *model.User) graphql.Marshaler { func (ec *executionContext) marshalOUser2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐUser(ctx context.Context, sel ast.SelectionSet, v *model.User) graphql.Marshaler {
if v == nil { if v == nil {
return graphql.Null return graphql.Null

View File

@@ -13,6 +13,18 @@ import (
"github.com/ClusterCockpit/cc-lib/schema" "github.com/ClusterCockpit/cc-lib/schema"
) )
type ClusterMetricWithName struct {
Name string `json:"name"`
Unit *schema.Unit `json:"unit,omitempty"`
Timestep int `json:"timestep"`
Data []schema.Float `json:"data"`
}
type ClusterMetrics struct {
NodeCount int `json:"nodeCount"`
Metrics []*ClusterMetricWithName `json:"metrics"`
}
type Count struct { type Count struct {
Name string `json:"name"` Name string `json:"name"`
Count int `json:"count"` Count int `json:"count"`

View File

@@ -8,6 +8,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"math"
"regexp" "regexp"
"slices" "slices"
"strconv" "strconv"
@@ -973,6 +974,86 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub
return nodeMetricsListResult, nil return nodeMetricsListResult, nil
} }
// ClusterMetrics is the resolver for the clusterMetrics field.
func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metrics []string, from time.Time, to time.Time) (*model.ClusterMetrics, error) {
user := repository.GetUserFromContext(ctx)
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
return nil, errors.New("you need to be administrator or support staff for this query")
}
if metrics == nil {
for _, mc := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, mc.Name)
}
}
// 'nodes' == nil -> Defaults to all nodes of cluster for existing query workflow
scopes := []schema.MetricScope{"node"}
data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nil, scopes, from, to, ctx)
if err != nil {
cclog.Warn("error while loading node data")
return nil, err
}
clusterMetricData := make([]*model.ClusterMetricWithName, 0)
clusterMetrics := model.ClusterMetrics{NodeCount: 0, Metrics: clusterMetricData}
collectorTimestep := make(map[string]int)
collectorUnit := make(map[string]schema.Unit)
collectorData := make(map[string][]schema.Float)
for _, metrics := range data {
clusterMetrics.NodeCount += 1
for metric, scopedMetrics := range metrics {
_, ok := collectorData[metric]
if !ok {
collectorData[metric] = make([]schema.Float, 0)
for _, scopedMetric := range scopedMetrics {
// Collect Info
collectorTimestep[metric] = scopedMetric.Timestep
collectorUnit[metric] = scopedMetric.Unit
// Collect Initial Data
for _, ser := range scopedMetric.Series {
for _, val := range ser.Data {
collectorData[metric] = append(collectorData[metric], val)
}
}
}
} else {
// Sum up values by index
for _, scopedMetric := range scopedMetrics {
// For This Purpose (Cluster_Wide-Sum of Node Metrics) OK
for _, ser := range scopedMetric.Series {
for i, val := range ser.Data {
collectorData[metric][i] += val
}
}
}
}
}
}
for metricName, data := range collectorData {
cu := collectorUnit[metricName]
roundedData := make([]schema.Float, 0)
for _, val := range data {
roundedData = append(roundedData, schema.Float((math.Round(float64(val)*100.0) / 100.0)))
}
cm := model.ClusterMetricWithName{
Name: metricName,
Unit: &cu,
Timestep: collectorTimestep[metricName],
Data: roundedData,
}
clusterMetrics.Metrics = append(clusterMetrics.Metrics, &cm)
}
return &clusterMetrics, nil
}
// NumberOfNodes is the resolver for the numberOfNodes field. // NumberOfNodes is the resolver for the numberOfNodes field.
func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) { func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) {
nodeList, err := archive.ParseNodeList(obj.Nodes) nodeList, err := archive.ParseNodeList(obj.Nodes)

View File

@@ -47,7 +47,9 @@ var routes []Route = []Route{
{"/monitoring/systems/list/{cluster}/{subcluster}", "monitoring/systems.tmpl", "Cluster <ID> <SID> Node List - ClusterCockpit", false, setupClusterListRoute}, {"/monitoring/systems/list/{cluster}/{subcluster}", "monitoring/systems.tmpl", "Cluster <ID> <SID> Node List - ClusterCockpit", false, setupClusterListRoute},
{"/monitoring/node/{cluster}/{hostname}", "monitoring/node.tmpl", "Node <ID> - ClusterCockpit", false, setupNodeRoute}, {"/monitoring/node/{cluster}/{hostname}", "monitoring/node.tmpl", "Node <ID> - ClusterCockpit", false, setupNodeRoute},
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analysis - ClusterCockpit", true, setupAnalysisRoute}, {"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analysis - ClusterCockpit", true, setupAnalysisRoute},
{"/monitoring/status/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterStatusRoute}, {"/monitoring/status/{cluster}", "monitoring/status.tmpl", "<ID> Dashboard - ClusterCockpit", false, setupClusterStatusRoute},
{"/monitoring/status/detail/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterDetailRoute},
{"/monitoring/dashboard/{cluster}", "monitoring/dashboard.tmpl", "<ID> Dashboard - ClusterCockpit", false, setupDashboardRoute},
} }
func setupHomeRoute(i InfoType, r *http.Request) InfoType { func setupHomeRoute(i InfoType, r *http.Request) InfoType {
@@ -117,6 +119,33 @@ func setupClusterStatusRoute(i InfoType, r *http.Request) InfoType {
vars := mux.Vars(r) vars := mux.Vars(r)
i["id"] = vars["cluster"] i["id"] = vars["cluster"]
i["cluster"] = vars["cluster"] i["cluster"] = vars["cluster"]
i["displayType"] = "DASHBOARD"
from, to := r.URL.Query().Get("from"), r.URL.Query().Get("to")
if from != "" || to != "" {
i["from"] = from
i["to"] = to
}
return i
}
func setupClusterDetailRoute(i InfoType, r *http.Request) InfoType {
vars := mux.Vars(r)
i["id"] = vars["cluster"]
i["cluster"] = vars["cluster"]
i["displayType"] = "DETAILS"
from, to := r.URL.Query().Get("from"), r.URL.Query().Get("to")
if from != "" || to != "" {
i["from"] = from
i["to"] = to
}
return i
}
func setupDashboardRoute(i InfoType, r *http.Request) InfoType {
vars := mux.Vars(r)
i["id"] = vars["cluster"]
i["cluster"] = vars["cluster"]
i["displayType"] = "PUBLIC"
from, to := r.URL.Query().Get("from"), r.URL.Query().Get("to") from, to := r.URL.Query().Get("from"), r.URL.Query().Get("to")
if from != "" || to != "" { if from != "" || to != "" {
i["from"] = from i["from"] = from

View File

@@ -74,5 +74,6 @@ export default [
entrypoint('node', 'src/node.entrypoint.js'), entrypoint('node', 'src/node.entrypoint.js'),
entrypoint('analysis', 'src/analysis.entrypoint.js'), entrypoint('analysis', 'src/analysis.entrypoint.js'),
entrypoint('status', 'src/status.entrypoint.js'), entrypoint('status', 'src/status.entrypoint.js'),
entrypoint('dashpublic', 'src/dashpublic.entrypoint.js'),
entrypoint('config', 'src/config.entrypoint.js') entrypoint('config', 'src/config.entrypoint.js')
]; ];

View File

@@ -0,0 +1,671 @@
<!--
@component Main cluster status view component; renders current system-usage information
Properties:
- `presetCluster String`: The cluster to show status information for
-->
<script>
import {
getContext
} from "svelte"
import {
queryStore,
gql,
getContextClient,
} from "@urql/svelte";
import {
init,
scramble,
scrambleNames,
convert2uplot
} from "./generic/utils.js";
import {
formatDurationTime,
formatNumber,
} from "./generic/units.js";
import {
Row,
Col,
Card,
CardTitle,
CardHeader,
CardBody,
Spinner,
Table,
Progress,
Icon,
} from "@sveltestrap/sveltestrap";
import Roofline from "./generic/plots/Roofline.svelte";
import Pie, { colors } from "./generic/plots/Pie.svelte";
import Stacked from "./generic/plots/Stacked.svelte";
// import Histogram from "./generic/plots/Histogram.svelte";
import DoubleMetric from "./generic/plots/DoubleMetricPlot.svelte";
/* Svelte 5 Props */
let {
presetCluster,
} = $props();
/*Const Init */
const { query: initq } = init();
const client = getContextClient();
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
/* States */
let pagingState = $state({page: 1, itemsPerPage: 10}) // Top 10
let from = $state(new Date(Date.now() - (5 * 60 * 1000)));
let clusterFrom = $state(new Date(Date.now() - (8 * 60 * 60 * 1000)));
let to = $state(new Date(Date.now()));
let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400);
let colWidthStates = $state(0);
let colWidthRoof = $state(0);
let colWidthTotals = $state(0);
let colWidthStacked = $state(0);
/* Derived */
// States for Stacked charts
const statesTimed = $derived(queryStore({
client: client,
query: gql`
query ($filter: [NodeFilter!], $typeNode: String!, $typeHealth: String!) {
nodeStates: nodeStatesTimed(filter: $filter, type: $typeNode) {
state
counts
times
}
healthStates: nodeStatesTimed(filter: $filter, type: $typeHealth) {
state
counts
times
}
}
`,
variables: {
filter: { cluster: { eq: presetCluster }, timeStart: 1760096999}, // DEBUG VALUE, use StackedFrom
typeNode: "node",
typeHealth: "health"
},
requestPolicy: "network-only"
}));
// Note: nodeMetrics are requested on configured $timestep resolution
// Result: The latest 5 minutes (datapoints) for each node independent of job
const statusQuery = $derived(queryStore({
client: client,
query: gql`
query (
$cluster: String!
$metrics: [String!]
$from: Time!
$to: Time!
$clusterFrom: Time!
$jobFilter: [JobFilter!]!
$nodeFilter: [NodeFilter!]!
$paging: PageRequest!
$sorting: OrderByInput!
) {
# Node 5 Minute Averages for Roofline
nodeMetrics(
cluster: $cluster
metrics: $metrics
from: $from
to: $to
) {
host
subCluster
metrics {
name
metric {
series {
statistics {
avg
}
}
}
}
}
# Running Job Metric Average for Rooflines
jobsMetricStats(filter: $jobFilter, metrics: $metrics) {
id
jobId
duration
numNodes
numAccelerators
subCluster
stats {
name
data {
avg
}
}
}
# Get Jobs for Per-Node Counts
jobs(filter: $jobFilter, order: $sorting, page: $paging) {
items {
jobId
resources {
hostname
}
}
count
}
# Only counts shared nodes once
allocatedNodes(cluster: $cluster) {
name
count
}
# Get Current States fir Pie Charts
nodeStates(filter: $nodeFilter) {
state
count
}
# Get States for Node Roofline; $sorting unused in backend: Use placeholder
nodes(filter: $nodeFilter, order: $sorting) {
count
items {
hostname
cluster
subCluster
schedulerState
}
}
# totalNodes includes multiples if shared jobs: Info-Card Data
jobsStatistics(
filter: $jobFilter
page: $paging
sortBy: TOTALJOBS
groupBy: SUBCLUSTER
) {
id
totalJobs
totalUsers
totalCores
totalAccs
}
# TEST
clusterMetrics(
cluster: $cluster
metrics: $metrics
from: $clusterFrom
to: $to
) {
nodeCount
metrics {
name
unit {
prefix
base
}
timestep
data
}
}
}
`,
variables: {
cluster: presetCluster,
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
from: from.toISOString(),
clusterFrom: clusterFrom.toISOString(),
to: to.toISOString(),
jobFilter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
nodeFilter: { cluster: { eq: presetCluster }},
paging: { itemsPerPage: -1, page: 1 }, // Get all: -1
sorting: { field: "startTime", type: "col", order: "DESC" }
},
requestPolicy: "network-only"
}));
const topJobsQuery = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [JobFilter!]!
$paging: PageRequest!
) {
jobsStatistics(
filter: $filter
page: $paging
sortBy: TOTALJOBS
groupBy: PROJECT
) {
id
totalJobs
}
}
`,
variables: {
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster} }],
paging: pagingState // Top 10
},
requestPolicy: "network-only"
}));
// Note: nodeMetrics are requested on configured $timestep resolution
const nodeStatusQuery = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [JobFilter!]!
$selectedHistograms: [String!]
$numDurationBins: String
) {
jobsStatistics(filter: $filter, metrics: $selectedHistograms, numDurationBins: $numDurationBins) {
histNumCores {
count
value
}
histNumAccs {
count
value
}
}
}
`,
variables: {
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
selectedHistograms: [], // No Metrics requested for node hardware stats - Empty Array can be used for refresh
numDurationBins: "1h", // Hardcode or selector?
},
requestPolicy: "network-only"
}));
const clusterInfo = $derived.by(() => {
if ($initq?.data?.clusters) {
let rawInfos = {};
let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || [];
for (let subCluster of subClusters) {
// Allocations
if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
// Infos
if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]);
else rawInfos['processorTypes'].add(subCluster.processorType);
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0;
else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0;
if (!rawInfos['totalCores']) rawInfos['totalCores'] = (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
else rawInfos['totalCores'] += (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
if (!rawInfos['totalAccs']) rawInfos['totalAccs'] = (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
else rawInfos['totalAccs'] += (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
// Units (Set Once)
if (!rawInfos['flopRateUnit']) rawInfos['flopRateUnit'] = subCluster.flopRateSimd.unit.prefix + subCluster.flopRateSimd.unit.base
if (!rawInfos['memBwRateUnit']) rawInfos['memBwRateUnit'] = subCluster.memoryBandwidth.unit.prefix + subCluster.memoryBandwidth.unit.base
// Get Maxima For Roofline Knee Render
if (!rawInfos['roofData']) {
rawInfos['roofData'] = {
flopRateScalar: {value: subCluster.flopRateScalar.value},
flopRateSimd: {value: subCluster.flopRateSimd.value},
memoryBandwidth: {value: subCluster.memoryBandwidth.value}
};
} else {
rawInfos['roofData']['flopRateScalar']['value'] = Math.max(rawInfos['roofData']['flopRateScalar']['value'], subCluster.flopRateScalar.value)
rawInfos['roofData']['flopRateSimd']['value'] = Math.max(rawInfos['roofData']['flopRateSimd']['value'], subCluster.flopRateSimd.value)
rawInfos['roofData']['memoryBandwidth']['value'] = Math.max(rawInfos['roofData']['memoryBandwidth']['value'], subCluster.memoryBandwidth.value)
}
}
// Keymetrics (Data on Cluster-Scope)
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
sum + (node.metrics.find((m) => m.name == 'flops_any')?.metric?.series[0]?.statistics?.avg || 0),
0, // Initial Value
) || 0;
rawInfos['flopRate'] = Math.floor((rawFlops * 100) / 100)
let rawMemBw = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
sum + (node.metrics.find((m) => m.name == 'mem_bw')?.metric?.series[0]?.statistics?.avg || 0),
0, // Initial Value
) || 0;
rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100)
return rawInfos
} else {
return {};
}
});
const refinedStateData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
/* Functions */
function legendColors(targetIdx, useAltColors) {
// Reuses first color if targetIdx overflows
let c;
if (useCbColors) {
c = [...colors['colorblind']];
} else if (useAltColors) {
c = [...colors['alternative']];
} else {
c = [...colors['default']];
}
return c[(c.length + targetIdx) % c.length];
}
function transformNodesStatsToData(subclusterData) {
let data = null
const x = [], y = []
if (subclusterData) {
for (let i = 0; i < subclusterData.length; i++) {
const flopsData = subclusterData[i].metrics.find((s) => s.name == "flops_any")
const memBwData = subclusterData[i].metrics.find((s) => s.name == "mem_bw")
const f = flopsData.metric.series[0].statistics.avg
const m = memBwData.metric.series[0].statistics.avg
let intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity)) {
intensity = 0.0 // Set to Float Zero: Will not show in Log-Plot (Always below render limit)
}
x.push(intensity)
y.push(f)
}
} else {
// console.warn("transformNodesStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0) {
data = [null, [x, y]] // for dataformat see roofline.svelte
}
return data
}
function transformNodesStatsToInfo(subClusterData) {
let result = [];
if (subClusterData) { // && $nodesState?.data) {
// Use Nodes as Returned from CCMS, *NOT* as saved in DB via SlurmState-API!
for (let j = 0; j < subClusterData.length; j++) {
const nodeName = subClusterData[j]?.host ? subClusterData[j].host : "unknown"
const nodeMatch = $statusQuery?.data?.nodes?.items?.find((n) => n.hostname == nodeName && n.subCluster == subClusterData[j].subCluster);
const schedulerState = nodeMatch?.schedulerState ? nodeMatch.schedulerState : "notindb"
let numJobs = 0
if ($statusQuery?.data) {
const nodeJobs = $statusQuery?.data?.jobs?.items?.filter((job) => job.resources.find((res) => res.hostname == nodeName))
numJobs = nodeJobs?.length ? nodeJobs.length : 0
}
result.push({nodeName: nodeName, schedulerState: schedulerState, numJobs: numJobs})
};
};
return result
}
/* Inspect */
$inspect(clusterInfo).with((type, clusterInfo) => {
console.log(type, 'clusterInfo', clusterInfo)
});
$inspect($statusQuery?.data?.clusterMetrics).with((type, clusterMetrics) => {
console.log(type, 'clusterMetrics', clusterMetrics)
});
</script>
<Card style="height: 98vh;">
<CardHeader class="text-center">
<h3 class="mb-0">{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)} Dashboard</h3>
</CardHeader>
<CardBody>
{#if $statusQuery.fetching || $statesTimed.fetching || $topJobsQuery.fetching || $nodeStatusQuery.fetching}
<Row class="justify-content-center">
<Col xs="auto">
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error || $statesTimed.error || $topJobsQuery.error || $nodeStatusQuery.error}
<Row cols={{xs:1, md:2}}>
{#if $statusQuery.error}
<Col>
<Card color="danger">Error Requesting StatusQuery: {$statusQuery.error.message}</Card>
</Col>
{/if}
{#if $statesTimed.error}
<Col>
<Card color="danger">Error Requesting StatesTimed: {$statesTimed.error.message}</Card>
</Col>
{/if}
{#if $topJobsQuery.error}
<Col>
<Card color="danger">Error Requesting TopJobsQuery: {$topJobsQuery.error.message}</Card>
</Col>
{/if}
{#if $nodeStatusQuery.error}
<Col>
<Card color="danger">Error Requesting NodeStatusQuery: {$nodeStatusQuery.error.message}</Card>
</Col>
{/if}
</Row>
{:else}
<Row cols={{xs:1, md:2, xl: 3}}>
<Col> <!-- Info Card -->
<Card class="h-auto mt-1">
<CardHeader>
<CardTitle class="mb-0">Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</CardTitle>
<span>{[...clusterInfo?.processorTypes].toString()}</span>
</CardHeader>
<CardBody>
<Table borderless>
<tr class="py-2">
<td style="font-size:x-large;">{clusterInfo?.runningJobs} Running Jobs</td>
<td colspan="2" style="font-size:x-large;">{clusterInfo?.activeUsers} Active Users</td>
</tr>
<hr class="my-1"/>
<tr class="pt-2">
<td style="font-size: large;">
Flop Rate (<span style="cursor: help;" title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]">Any</span>)
</td>
<td colspan="2" style="font-size: large;">
Memory BW Rate
</td>
</tr>
<tr class="pb-2">
<td style="font-size:x-large;">
{clusterInfo?.flopRate}
{clusterInfo?.flopRateUnit}
</td>
<td colspan="2" style="font-size:x-large;">
{clusterInfo?.memBwRate}
{clusterInfo?.memBwRateUnit}
</td>
</tr>
<hr class="my-1"/>
<tr class="py-2">
<th scope="col">Allocated Nodes</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedNodes}
max={clusterInfo?.totalNodes}
/>
</div></td
>
<td
>{clusterInfo?.allocatedNodes} / {clusterInfo?.totalNodes}
Nodes</td
>
</tr>
<tr class="py-2">
<th scope="col">Allocated Cores</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedCores}
max={clusterInfo?.totalCores}
/>
</div></td
>
<td
>{formatNumber(clusterInfo?.allocatedCores)} / {formatNumber(clusterInfo?.totalCores)}
Cores</td
>
</tr>
{#if clusterInfo?.totalAccs !== 0}
<tr class="py-2">
<th scope="col">Allocated Accelerators</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedAccs}
max={clusterInfo?.totalAccs}
/>
</div></td
>
<td
>{clusterInfo?.allocatedAccs} / {clusterInfo?.totalAccs}
Accelerators</td
>
</tr>
{/if}
</Table>
</CardBody>
</Card>
</Col>
<Col> <!-- Pie Last States -->
<Row>
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={colWidthStates}>
{#key refinedStateData}
<h4 class="text-center">
Current Node States
</h4>
<Pie
useAltColors
canvasId="hpcpie-slurm"
size={colWidthStates * 0.75}
sliceLabel="Nodes"
quantities={refinedStateData.map(
(sd) => sd.count,
)}
entities={refinedStateData.map(
(sd) => sd.state,
)}
/>
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key refinedStateData}
<Table>
<tr class="mb-2">
<th></th>
<th>Current State</th>
<th>Nodes</th>
</tr>
{#each refinedStateData as sd, i}
<tr>
<td><Icon name="circle-fill" style="color: {legendColors(i, true)};"/></td>
<td>{sd.state}</td>
<td>{sd.count}</td>
</tr>
{/each}
</Table>
{/key}
</Col>
</Row>
</Col>
<Col> <!-- General Cluster Info Card? -->
<!-- TODO -->
<Card>
<CardHeader>
<CardTitle>Infos</CardTitle>
</CardHeader>
<CardBody>
Contents
</CardBody>
</Card>
</Col>
<Col> <!-- Nodes Roofline -->
<div bind:clientWidth={colWidthRoof}>
{#key $statusQuery?.data?.nodeMetrics}
<Roofline
useColors={false}
useLegend={false}
allowSizeChange
width={colWidthRoof - 10}
height={300}
cluster={presetCluster}
subCluster={clusterInfo?.roofData ? clusterInfo.roofData : null}
roofData={transformNodesStatsToData($statusQuery?.data?.nodeMetrics)}
nodesData={transformNodesStatsToInfo($statusQuery?.data?.nodeMetrics)}
/>
{/key}
</div>
</Col>
<Col> <!-- Resources/Job Histogram OR Total Cluster Metric in Time SUMS-->
<div bind:clientWidth={colWidthTotals}>
<DoubleMetric
width={colWidthTotals}
timestep={$statusQuery?.data?.clusterMetrics[0]?.timestep || 60}
numNodes={$statusQuery?.data?.clusterMetrics?.nodeCount || 0}
metricData={$statusQuery?.data?.clusterMetrics?.metrics || []}
cluster={presetCluster}
/>
</div>
<!-- {#if clusterInfo?.totalAccs == 0}
<Histogram
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumCores)}
title="Number of Cores Distribution"
xlabel="Allocated Cores"
xunit="Nodes"
ylabel="Number of Jobs"
yunit="Jobs"
height="275"
enableFlip
/>
{:else}
<Histogram
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumAccs)}
title="Number of Accelerators Distribution"
xlabel="Allocated Accs"
xunit="Accs"
ylabel="Number of Jobs"
yunit="Jobs"
height="275"
enableFlip
/>
{/if} -->
</Col>
<Col> <!-- Stacked SchedState -->
<div bind:clientWidth={colWidthStacked}>
{#key $statesTimed?.data?.nodeStates}
<Stacked
data={$statesTimed?.data?.nodeStates}
width={colWidthStacked * 0.95}
xlabel="Time"
ylabel="Nodes"
yunit = "#Count"
title = "Node States"
stateType = "Node"
/>
{/key}
</div>
</Col>
</Row>
{/if}
</CardBody>
</Card>

View File

@@ -120,7 +120,7 @@
href: "/monitoring/status/", href: "/monitoring/status/",
icon: "clipboard-data", icon: "clipboard-data",
perCluster: true, perCluster: true,
listOptions: false, listOptions: true,
menu: "Info", menu: "Info",
}, },
]; ];

View File

@@ -6,77 +6,43 @@
--> -->
<script> <script>
import {
getContext
} from "svelte"
import {
init,
} from "./generic/utils.js";
import { import {
Row, Row,
Col, Col,
Card, Card,
CardBody,
TabContent,
TabPane,
Spinner
} from "@sveltestrap/sveltestrap"; } from "@sveltestrap/sveltestrap";
import StatusDash from "./status/StatusDash.svelte"; import DashDetails from "./status/DashDetails.svelte";
import UsageDash from "./status/UsageDash.svelte"; import DashInternal from "./status/DashInternal.svelte";
import StatisticsDash from "./status/StatisticsDash.svelte";
/* Svelte 5 Props */ /* Svelte 5 Props */
let { let {
presetCluster presetCluster,
displayType
} = $props(); } = $props();
/*Const Init */ /*Const Init */
const { query: initq } = init(); const displayStatusDetail = (displayType === 'DETAILS');
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
</script> </script>
<!-- Loading indicator & Refresh --> <!-- <Row cols={1} class="mb-2">
<Row cols={1} class="mb-2">
<Col> <Col>
<h3 class="mb-0">Current Status of Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</h3> <h3 class="mb-0">Current Status of Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</h3>
</Col> </Col>
</Row> </Row> -->
{#if displayType !== "DASHBOARD" && displayType !== "DETAILS"}
{#if $initq.fetching} <Row>
<Row cols={1} class="text-center mt-3">
<Col> <Col>
<Spinner /> <Card body color="danger">Unknown displayList type! </Card>
</Col>
</Row>
{:else if $initq.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">{$initq.error.message}</Card>
</Col> </Col>
</Row> </Row>
{:else} {:else}
<Card class="overflow-auto" style="height: auto;"> {#if displayStatusDetail}
<TabContent> <!-- ROW2-1: Node Overview (Grid Included)-->
<TabPane tabId="status-dash" tab="Status" active> <DashDetails {presetCluster}/>
<CardBody> {:else}
<StatusDash clusters={$initq.data.clusters} {presetCluster} {useCbColors} useAltColors></StatusDash> <!-- ROW2-2: Node List (Grid Included)-->
</CardBody> <DashInternal {presetCluster}/>
</TabPane> {/if}
<TabPane tabId="usage-dash" tab="Usage">
<CardBody>
<UsageDash {presetCluster} {useCbColors}></UsageDash>
</CardBody>
</TabPane>
<TabPane tabId="metric-dash" tab="Statistics">
<CardBody>
<StatisticsDash {presetCluster} {useCbColors}></StatisticsDash>
</CardBody>
</TabPane>
</TabContent>
</Card>
{/if} {/if}

View File

@@ -0,0 +1,13 @@
import { mount } from 'svelte';
// import {} from './header.entrypoint.js'
import DashPublic from './DashPublic.root.svelte'
mount(DashPublic, {
target: document.getElementById('svelte-app'),
props: {
presetCluster: infos.cluster,
},
context: new Map([
['cc-config', clusterCockpitConfig]
])
})

View File

@@ -0,0 +1,640 @@
<!--
@component Main plot component, based on uPlot; metricdata values by time
Only width/height should change reactively.
Properties:
- `metric String`: The metric name
- `scope String?`: Scope of the displayed data [Default: node]
- `height Number?`: The plot height [Default: 300]
- `timestep Number`: The timestep used for X-axis rendering
- `series [GraphQL.Series]`: The metric data object
- `statisticsSeries [GraphQL.StatisticsSeries]?`: Min/Max/Median representation of metric data [Default: null]
- `cluster String?`: Cluster name of the parent job / data [Default: ""]
- `subCluster String`: Name of the subCluster of the parent job
- `isShared Bool?`: If this job used shared resources; for additional legend display [Default: false]
- `forNode Bool?`: If this plot is used for node data display; will render x-axis as negative time with $now as maximum [Default: false]
- `numhwthreads Number?`: Number of job HWThreads [Default: 0]
- `numaccs Number?`: Number of job Accelerators [Default: 0]
- `zoomState Object?`: The last zoom state to preserve on user zoom [Default: null]
- `thersholdState Object?`: The last threshold state to preserve on user zoom [Default: null]
- `extendedLegendData Object?`: Additional information to be rendered in an extended legend [Default: null]
- `onZoom Func`: Callback function to handle zoom-in event
-->
<script>
import uPlot from "uplot";
import { formatNumber, formatDurationTime } from "../units.js";
import { getContext, onMount, onDestroy } from "svelte";
import { Card } from "@sveltestrap/sveltestrap";
/* Svelte 5 Props */
let {
// metric,
width = 0,
height = 300,
timestep,
numNodes,
metricData,
// useStatsSeries = false,
// statisticsSeries = null,
cluster = "",
forNode = true,
// zoomState = null,
// thresholdState = null,
enableFlip = false,
// onZoom
} = $props();
/* Const Init */
const clusterCockpitConfig = getContext("cc-config");
// const resampleConfig = getContext("resampling");
// const subClusterTopology = getContext("getHardwareTopology")(cluster, subCluster);
// const metricConfig = getContext("getMetricConfig")(cluster, subCluster, metric);
const lineColors = clusterCockpitConfig.plotConfiguration_colorScheme;
const lineWidth = clusterCockpitConfig.plotConfiguration_lineWidth / window.devicePixelRatio;
// const cbmode = clusterCockpitConfig?.plotConfiguration_colorblindMode || false;
const renderSleepTime = 200;
// const normalLineColor = "#000000";
// const backgroundColors = {
// normal: "rgba(255, 255, 255, 1.0)",
// caution: cbmode ? "rgba(239, 230, 69, 0.3)" : "rgba(255, 128, 0, 0.3)",
// alert: cbmode ? "rgba(225, 86, 44, 0.3)" : "rgba(255, 0, 0, 0.3)",
// };
/* Var Init */
let timeoutId = null;
/* State Init */
let plotWrapper = $state(null);
let uplot = $state(null);
/* Derived */
// const usesMeanStatsSeries = $derived((statisticsSeries?.mean && statisticsSeries.mean.length != 0));
// const resampleTrigger = $derived(resampleConfig?.trigger ? Number(resampleConfig.trigger) : null);
// const resampleResolutions = $derived(resampleConfig?.resolutions ? [...resampleConfig.resolutions] : null);
// const resampleMinimum = $derived(resampleConfig?.resolutions ? Math.min(...resampleConfig.resolutions) : null);
// const thresholds = $derived(findJobAggregationThresholds(
// subClusterTopology,
// metricConfig,
// scope,
// numhwthreads,
// numaccs
// ));
const longestSeries = $derived.by(() => {
// if (useStatsSeries) {
// return usesMeanStatsSeries ? statisticsSeries?.mean?.length : statisticsSeries?.median?.length;
// } else {
return metricData.reduce((n, m) => Math.max(n, m.data.length), 0);
// }
});
const maxX = $derived(longestSeries * timestep);
// const maxY = $derived.by(() => {
// let pendingY = 0;
// // if (useStatsSeries) {
// // pendingY = statisticsSeries.max.reduce(
// // (max, x) => Math.max(max, x),
// // thresholds?.normal,
// // ) || thresholds?.normal
// // } else {
// pendingY = series.reduce(
// (max, series) => Math.max(max, series?.statistics?.max),
// thresholds?.normal,
// ) || thresholds?.normal;
// // }
// if (pendingY >= 10 * thresholds.peak) {
// // Hard y-range render limit if outliers in series data
// return (10 * thresholds.peak);
// } else {
// return pendingY;
// }
// });
// const plotBands = $derived.by(() => {
// if (useStatsSeries) {
// return [
// { series: [2, 3], fill: cbmode ? "rgba(0,0,255,0.1)" : "rgba(0,255,0,0.1)" },
// { series: [3, 1], fill: cbmode ? "rgba(0,255,0,0.1)" : "rgba(255,0,0,0.1)" },
// ];
// };
// return null;
// })
const plotData = $derived.by(() => {
let pendingData = [new Array(longestSeries)];
// X
if (forNode === true) {
// Negative Timestamp Buildup
for (let i = 0; i <= longestSeries; i++) {
pendingData[0][i] = (longestSeries - i) * timestep * -1;
}
} else {
// Positive Timestamp Buildup
for (let j = 0; j < longestSeries; j++) {
pendingData[0][j] = j * timestep;
};
};
// Y
// if (useStatsSeries) {
// pendingData.push(statisticsSeries.min);
// pendingData.push(statisticsSeries.max);
// if (usesMeanStatsSeries) {
// pendingData.push(statisticsSeries.mean);
// } else {
// pendingData.push(statisticsSeries.median);
// }
// } else {
for (let i = 0; i < metricData.length; i++) {
pendingData.push(metricData[i]?.data);
};
// };
return pendingData;
})
const plotSeries = $derived.by(() => {
let pendingSeries = [
// Note: X-Legend Will not be shown as soon as Y-Axis are in extendedMode
{
label: "Runtime",
value: (u, ts, sidx, didx) =>
(didx == null) ? null : formatDurationTime(ts, forNode),
}
];
// Y
// if (useStatsSeries) {
// pendingSeries.push({
// label: "min",
// scale: "y",
// width: lineWidth,
// stroke: cbmode ? "rgb(0,255,0)" : "red",
// });
// pendingSeries.push({
// label: "max",
// scale: "y",
// width: lineWidth,
// stroke: cbmode ? "rgb(0,0,255)" : "green",
// });
// pendingSeries.push({
// label: usesMeanStatsSeries ? "mean" : "median",
// scale: "y",
// width: lineWidth,
// stroke: "black",
// });
// } else {
for (let i = 0; i < metricData.length; i++) {
// Default
// if (!extendedLegendData) {
pendingSeries.push({
label: `${metricData[i]?.name} (${metricData[i]?.unit?.prefix}${metricData[i]?.unit?.base})`,
scale: `y${i+1}`,
width: lineWidth,
stroke: lineColor(i, metricData.length),
});
// }
// Extended Legend For NodeList
// else {
// pendingSeries.push({
// label:
// scope === "node"
// ? series[i].hostname
// : scope === "accelerator"
// ? 'Acc #' + (i + 1) // series[i].id.slice(9, 14) | Too Hardware Specific
// : scope + " #" + (i + 1),
// scale: "y",
// width: lineWidth,
// stroke: lineColor(i, series?.length),
// values: (u, sidx, idx) => {
// // "i" = "sidx - 1" : sidx contains x-axis-data
// if (idx == null)
// return {
// time: '-',
// value: '-',
// user: '-',
// job: '-'
// };
// if (series[i].id in extendedLegendData) {
// return {
// time: formatDurationTime(plotData[0][idx], forNode),
// value: plotData[sidx][idx],
// user: extendedLegendData[series[i].id].user,
// job: extendedLegendData[series[i].id].job,
// };
// } else {
// return {
// time: formatDurationTime(plotData[0][idx], forNode),
// value: plotData[sidx][idx],
// user: '-',
// job: '-',
// };
// }
// }
// });
// }
// };
};
return pendingSeries;
})
/* Effects */
// $effect(() => {
// if (!useStatsSeries && statisticsSeries != null) useStatsSeries = true;
// })
// This updates plot on all size changes if wrapper (== data) exists
$effect(() => {
if (plotWrapper) {
onSizeChange(width, height);
}
});
/* Functions */
function timeIncrs(timestep, maxX, forNode) {
if (forNode === true) {
return [60, 120, 240, 300, 360, 480, 600, 900, 1800, 3600, 7200, 14400, 21600]; // forNode fixed increments
} else {
let incrs = [];
for (let t = timestep; t < maxX; t *= 10)
incrs.push(t, t * 2, t * 3, t * 5);
return incrs;
}
}
// removed arg "subcluster": input metricconfig and topology now directly derived from subcluster
// function findJobAggregationThresholds(
// subClusterTopology,
// metricConfig,
// scope,
// numhwthreads,
// numaccs
// ) {
// if (!subClusterTopology || !metricConfig || !scope) {
// console.warn("Argument missing for findJobAggregationThresholds!");
// return null;
// }
// // handle special *-stat scopes
// if (scope.match(/(.*)-stat$/)) {
// const statParts = scope.split('-');
// scope = statParts[0]
// }
// if (metricConfig?.aggregation == "avg") {
// // Return as Configured
// return {
// normal: metricConfig.normal,
// caution: metricConfig.caution,
// alert: metricConfig.alert,
// peak: metricConfig.peak,
// };
// }
// if (metricConfig?.aggregation == "sum") {
// // Scale Thresholds
// let fraction;
// if (numaccs > 0) fraction = subClusterTopology.accelerators.length / numaccs;
// else if (numhwthreads > 0) fraction = subClusterTopology.core.length / numhwthreads;
// else fraction = 1; // Fallback
// let divisor;
// // Exclusive: Fraction = 1; Shared: Fraction > 1
// if (scope == 'node') divisor = fraction;
// // Cap divisor at number of available sockets or domains
// else if (scope == 'socket') divisor = (fraction < subClusterTopology.socket.length) ? subClusterTopology.socket.length : fraction;
// else if (scope == "memoryDomain") divisor = (fraction < subClusterTopology.memoryDomain.length) ? subClusterTopology.socket.length : fraction;
// // Use Maximum Division for Smallest Scopes
// else if (scope == "core") divisor = subClusterTopology.core.length;
// else if (scope == "hwthread") divisor = subClusterTopology.core.length; // alt. name for core
// else if (scope == "accelerator") divisor = subClusterTopology.accelerators.length;
// else {
// console.log('Unknown scope, return default aggregation thresholds for sum', scope)
// divisor = 1;
// }
// return {
// peak: metricConfig.peak / divisor,
// normal: metricConfig.normal / divisor,
// caution: metricConfig.caution / divisor,
// alert: metricConfig.alert / divisor,
// };
// }
// console.warn(
// "Missing or unkown aggregation mode (sum/avg) for metric:",
// metricConfig,
// );
// return null;
// }
// UPLOT PLUGIN // converts the legend into a simple tooltip
function legendAsTooltipPlugin({
className,
style = { backgroundColor: "rgba(255, 249, 196, 0.92)", color: "black" },
} = {}) {
let legendEl;
const dataSize = metricData.length;
function init(u, opts) {
legendEl = u.root.querySelector(".u-legend");
legendEl.classList.remove("u-inline");
className && legendEl.classList.add(className);
uPlot.assign(legendEl.style, {
minWidth: "100px",
textAlign: "left",
pointerEvents: "none",
display: "none",
position: "absolute",
left: 0,
top: 0,
zIndex: 100,
boxShadow: "2px 2px 10px rgba(0,0,0,0.5)",
...style,
});
// conditional hide series color markers:
if (
// useStatsSeries || // Min/Max/Median Self-Explanatory
dataSize === 1 || // Only one Y-Dataseries
dataSize > 8 // More than 8 Y-Dataseries
) {
const idents = legendEl.querySelectorAll(".u-marker");
for (let i = 0; i < idents.length; i++)
idents[i].style.display = "none";
}
const overEl = u.over;
overEl.style.overflow = "visible";
// move legend into plot bounds
overEl.appendChild(legendEl);
// show/hide tooltip on enter/exit
overEl.addEventListener("mouseenter", () => {
legendEl.style.display = null;
});
overEl.addEventListener("mouseleave", () => {
legendEl.style.display = "none";
});
// let tooltip exit plot
// overEl.style.overflow = "visible";
}
function update(u) {
const { left, top } = u.cursor;
const internalWidth = u?.over?.querySelector(".u-legend")?.offsetWidth ? u.over.querySelector(".u-legend").offsetWidth : 0;
if (enableFlip && (left < (width/2))) {
legendEl.style.transform = "translate(" + (left + 15) + "px, " + (top + 15) + "px)";
} else {
legendEl.style.transform = "translate(" + (left - internalWidth - 15) + "px, " + (top + 15) + "px)";
}
}
if (dataSize <= 12 ) { // || useStatsSeries) {
return {
hooks: {
init: init,
setCursor: update,
},
};
} else {
// Setting legend-opts show/live as object with false here will not work ...
return {};
}
}
// RETURN BG COLOR FROM THRESHOLD
// function backgroundColor() {
// if (
// clusterCockpitConfig.plotConfiguration_colorBackground == false ||
// // !thresholds ||
// !(series && series.every((s) => s.statistics != null))
// )
// return backgroundColors.normal;
// let cond =
// thresholds.alert < thresholds.caution
// ? (a, b) => a <= b
// : (a, b) => a >= b;
// let avg =
// series.reduce((sum, series) => sum + series.statistics.avg, 0) /
// series.length;
// if (Number.isNaN(avg)) return backgroundColors.normal;
// if (cond(avg, thresholds.alert)) return backgroundColors.alert;
// if (cond(avg, thresholds.caution)) return backgroundColors.caution;
// return backgroundColors.normal;
// }
function lineColor(i, n) {
if (n && n >= lineColors.length) return lineColors[i % lineColors.length];
else return lineColors[Math.floor((i / n) * lineColors.length)];
}
function render(ren_width, ren_height) {
// Set Options
const opts = {
width,
height,
plugins: [legendAsTooltipPlugin()],
series: plotSeries,
axes: [
{
scale: "x",
space: 35,
incrs: timeIncrs(timestep, maxX, forNode),
label: "Time",
values: (_, vals) => vals.map((v) => formatDurationTime(v, forNode)),
},
{
scale: "y1",
grid: { show: true },
label: `${metricData[0]?.name} (${metricData[0]?.unit?.prefix}${metricData[0]?.unit?.base})`,
values: (u, vals) => vals.map((v) => formatNumber(v)),
},
{
side: 1,
scale: "y2",
grid: { show: false },
label: `${metricData[1]?.name} (${metricData[1]?.unit?.prefix}${metricData[1]?.unit?.base})`,
values: (u, vals) => vals.map((v) => formatNumber(v)),
},
],
// bands: plotBands,
padding: [5, 10, -20, 0],
hooks: {
// init: [
// (u) => {
// /* IF Zoom Enabled */
// if (resampleConfig && !forNode) {
// u.over.addEventListener("dblclick", (e) => {
// // console.log('Dispatch: Zoom Reset')
// onZoom({
// lastZoomState: {
// x: { time: false },
// y: { auto: true }
// }
// });
// });
// };
// },
// ],
draw: [
(u) => {
// Draw plot type label:
let textl = `Cluster ${cluster}`
// let textl = `${scope}${plotSeries.length > 2 ? "s" : ""}${
// useStatsSeries
// ? (usesMeanStatsSeries ? ": min/mean/max" : ": min/median/max")
// : metricConfig != null && scope != metricConfig.scope
// ? ` (${metricConfig.aggregation})`
// : ""
// }`;
let textr = `Sums of ${numNodes} nodes`
//let textr = `${isShared && scope != "core" && scope != "accelerator" ? "[Shared]" : ""}`;
u.ctx.save();
u.ctx.textAlign = "start"; // 'end'
u.ctx.fillStyle = "black";
u.ctx.fillText(textl, u.bbox.left + 10, u.bbox.top + (forNode ? 0 : 10));
u.ctx.textAlign = "end";
u.ctx.fillStyle = "black";
u.ctx.fillText(
textr,
u.bbox.left + u.bbox.width - 10,
u.bbox.top + (forNode ? 0 : 10),
);
// u.ctx.fillText(text, u.bbox.left + u.bbox.width - 10, u.bbox.top + u.bbox.height - 10) // Recipe for bottom right
// if (!thresholds) {
u.ctx.restore();
return;
// }
// let y = u.valToPos(thresholds.normal, "y", true);
// u.ctx.save();
// u.ctx.lineWidth = lineWidth;
// u.ctx.strokeStyle = normalLineColor;
// u.ctx.setLineDash([5, 5]);
// u.ctx.beginPath();
// u.ctx.moveTo(u.bbox.left, y);
// u.ctx.lineTo(u.bbox.left + u.bbox.width, y);
// u.ctx.stroke();
// u.ctx.restore();
},
],
// setScale: [
// (u, key) => { // If ZoomResample is Configured && Not System/Node View
// if (resampleConfig && !forNode && key === 'x') {
// const numX = (u.series[0].idxs[1] - u.series[0].idxs[0])
// if (numX <= resampleTrigger && timestep !== resampleMinimum) {
// /* Get closest zoom level; prevents multiple iterative zoom requests for big zoom-steps (e.g. 600 -> 300 -> 120 -> 60) */
// // Which resolution to theoretically request to achieve 30 or more visible data points:
// const target = (numX * timestep) / resampleTrigger
// // Which configured resolution actually matches the closest to theoretical target:
// const closest = resampleResolutions.reduce(function(prev, curr) {
// return (Math.abs(curr - target) < Math.abs(prev - target) ? curr : prev);
// });
// // Prevents non-required dispatches
// if (timestep !== closest) {
// // console.log('Dispatch: Zoom with Res from / to', timestep, closest)
// onZoom({
// newRes: closest,
// lastZoomState: u?.scales,
// lastThreshold: thresholds?.normal
// });
// }
// } else {
// // console.log('Dispatch: Zoom Update States')
// onZoom({
// lastZoomState: u?.scales,
// lastThreshold: thresholds?.normal
// });
// };
// };
// },
// ]
},
scales: {
x: { time: false },
y1: { auto: true },
y1: { auto: true },
},
legend: {
// Display legend until max 12 Y-dataseries
show: true, // metricData.length <= 12 || useStatsSeries,
live: true // But This Plot always for 2 Data-Series
},
cursor: {
drag: { x: true, y: true },
}
};
// Handle Render
if (!uplot) {
opts.width = ren_width;
opts.height = ren_height;
// if (plotSync) {
// opts.cursor.sync = {
// key: plotSync.key,
// scales: ["x", null],
// }
// }
// if (zoomState && metricConfig?.aggregation == "avg") {
// opts.scales = {...zoomState}
// } else if (zoomState && metricConfig?.aggregation == "sum") {
// // Allow Zoom In === Ymin changed
// if (zoomState.y.min !== 0) { // scope change?: only use zoomState if thresholds match
// if ((thresholdState === thresholds?.normal)) { opts.scales = {...zoomState} };
// } // else: reset scaling to default
// }
uplot = new uPlot(opts, plotData, plotWrapper);
} else {
uplot.setSize({ width: ren_width, height: ren_height });
}
}
function onSizeChange(chg_width, chg_height) {
if (!uplot) return;
if (timeoutId != null) clearTimeout(timeoutId);
timeoutId = setTimeout(() => {
timeoutId = null;
render(chg_width, chg_height);
}, renderSleepTime);
}
/* On Mount */
onMount(() => {
if (plotWrapper) {
render(width, height);
}
});
/* On Destroy */
onDestroy(() => {
if (timeoutId != null) clearTimeout(timeoutId);
if (uplot) uplot.destroy();
});
</script>
<!-- Define $width Wrapper and NoData Card -->
{#if metricData[0]?.data && metricData[0]?.data?.length > 0}
<div bind:this={plotWrapper} bind:clientWidth={width}
class={forNode ? 'py-2 rounded' : 'rounded'}
></div>
{:else}
<Card body color="warning" class="mx-4"
>Cannot render plot: No series data returned for <code>{cluster}</code></Card
>
{/if}

View File

@@ -36,6 +36,7 @@
subCluster = null, subCluster = null,
allowSizeChange = false, allowSizeChange = false,
useColors = true, useColors = true,
useLegend = true,
width = 600, width = 600,
height = 380, height = 380,
} = $props(); } = $props();
@@ -534,7 +535,7 @@
width: width, width: width,
height: height, height: height,
legend: { legend: {
show: true, show: useLegend,
}, },
cursor: { cursor: {
dataIdx: (u, seriesIdx) => { dataIdx: (u, seriesIdx) => {

View File

@@ -156,7 +156,7 @@
{ {
scale: "y", scale: "y",
grid: { show: true }, grid: { show: true },
labelFont: "sans-serif", // labelFont: "sans-serif",
label: ylabel + (yunit ? ` (${yunit})` : ''), label: ylabel + (yunit ? ` (${yunit})` : ''),
// values: (u, vals) => vals.map((v) => formatNumber(v)), // values: (u, vals) => vals.map((v) => formatNumber(v)),
}, },

View File

@@ -64,6 +64,34 @@
{/each} {/each}
</DropdownMenu> </DropdownMenu>
</Dropdown> </Dropdown>
{:else if item.title === 'Status'}
<Dropdown nav inNavbar {direction}>
<DropdownToggle nav caret>
<Icon name={item.icon} />
{item.title}
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
{#each clusters as cluster}
<Dropdown nav direction="right">
<DropdownToggle nav caret class="dropdown-item py-1 px-2">
{cluster.name}
</DropdownToggle>
<DropdownMenu>
<DropdownItem class="py-1 px-2"
href={item.href + cluster.name}
>
Status Dashboard
</DropdownItem>
<DropdownItem class="py-1 px-2"
href={item.href + 'detail/' + cluster.name}
>
Status Details
</DropdownItem>
</DropdownMenu>
</Dropdown>
{/each}
</DropdownMenu>
</Dropdown>
{:else} {:else}
<Dropdown nav inNavbar {direction}> <Dropdown nav inNavbar {direction}>
<DropdownToggle nav caret> <DropdownToggle nav caret>

View File

@@ -6,6 +6,7 @@ mount(Status, {
target: document.getElementById('svelte-app'), target: document.getElementById('svelte-app'),
props: { props: {
presetCluster: infos.cluster, presetCluster: infos.cluster,
displayType: displayType,
}, },
context: new Map([ context: new Map([
['cc-config', clusterCockpitConfig] ['cc-config', clusterCockpitConfig]

View File

@@ -0,0 +1,82 @@
<!--
@component Main cluster status view component; renders current system-usage information
Properties:
- `presetCluster String`: The cluster to show status information for
-->
<script>
import {
getContext
} from "svelte"
import {
init,
} from "../generic/utils.js";
import {
Row,
Col,
Card,
CardBody,
TabContent,
TabPane,
Spinner
} from "@sveltestrap/sveltestrap";
import StatusDash from "./dashdetails/StatusDash.svelte";
import UsageDash from "./dashdetails/UsageDash.svelte";
import StatisticsDash from "./dashdetails/StatisticsDash.svelte";
/* Svelte 5 Props */
let {
presetCluster,
} = $props();
/*Const Init */
const { query: initq } = init();
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
</script>
<!-- Loading indicator & Refresh -->
<Row cols={1} class="mb-2">
<Col>
<h3 class="mb-0">Current Status of Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</h3>
</Col>
</Row>
{#if $initq.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $initq.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">{$initq.error.message}</Card>
</Col>
</Row>
{:else}
<Card class="overflow-auto" style="height: auto;">
<TabContent>
<TabPane tabId="status-dash" tab="Status" active>
<CardBody>
<StatusDash clusters={$initq.data.clusters} {presetCluster} {useCbColors} useAltColors></StatusDash>
</CardBody>
</TabPane>
<TabPane tabId="usage-dash" tab="Usage">
<CardBody>
<UsageDash {presetCluster} {useCbColors}></UsageDash>
</CardBody>
</TabPane>
<TabPane tabId="metric-dash" tab="Statistics">
<CardBody>
<StatisticsDash {presetCluster} {useCbColors}></StatisticsDash>
</CardBody>
</TabPane>
</TabContent>
</Card>
{/if}

View File

@@ -0,0 +1,605 @@
<!--
@component Main cluster status view component; renders current system-usage information
Properties:
- `presetCluster String`: The cluster to show status information for
-->
<script>
import {
getContext
} from "svelte"
import {
queryStore,
gql,
getContextClient,
} from "@urql/svelte";
import {
init,
scramble,
scrambleNames,
convert2uplot
} from "../generic/utils.js";
import {
formatDurationTime,
formatNumber,
} from "../generic/units.js";
import {
Row,
Col,
Card,
CardTitle,
CardHeader,
CardBody,
Spinner,
Table,
Progress,
Icon,
} from "@sveltestrap/sveltestrap";
import Roofline from "../generic/plots/Roofline.svelte";
import Pie, { colors } from "../generic/plots/Pie.svelte";
import Stacked from "../generic/plots/Stacked.svelte";
import Histogram from "../generic/plots/Histogram.svelte";
/* Svelte 5 Props */
let {
presetCluster,
} = $props();
/*Const Init */
const { query: initq } = init();
const client = getContextClient();
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
/* States */
let pagingState = $state({page: 1, itemsPerPage: 10}) // Top 10
let from = $state(new Date(Date.now() - 5 * 60 * 1000));
let to = $state(new Date(Date.now()));
let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400);
let colWidthJobs = $state(0);
let colWidthRoof = $state(0);
let colWidthStacked1 = $state(0);
let colWidthStacked2 = $state(0);
/* Derived */
// States for Stacked charts
const statesTimed = $derived(queryStore({
client: client,
query: gql`
query ($filter: [NodeFilter!], $typeNode: String!, $typeHealth: String!) {
nodeStates: nodeStatesTimed(filter: $filter, type: $typeNode) {
state
counts
times
}
healthStates: nodeStatesTimed(filter: $filter, type: $typeHealth) {
state
counts
times
}
}
`,
variables: {
filter: { cluster: { eq: presetCluster }, timeStart: 1760096999}, // DEBUG VALUE, use StackedFrom
typeNode: "node",
typeHealth: "health"
},
requestPolicy: "network-only"
}));
// Note: nodeMetrics are requested on configured $timestep resolution
// Result: The latest 5 minutes (datapoints) for each node independent of job
const statusQuery = $derived(queryStore({
client: client,
query: gql`
query (
$cluster: String!
$metrics: [String!]
$from: Time!
$to: Time!
$jobFilter: [JobFilter!]!
$paging: PageRequest!
$sorting: OrderByInput!
) {
# Node 5 Minute Averages for Roofline
nodeMetrics(
cluster: $cluster
metrics: $metrics
from: $from
to: $to
) {
host
subCluster
metrics {
name
metric {
series {
statistics {
avg
}
}
}
}
}
# Running Job Metric Average for Rooflines
jobsMetricStats(filter: $jobFilter, metrics: $metrics) {
id
jobId
duration
numNodes
numAccelerators
subCluster
stats {
name
data {
avg
}
}
}
# Get Jobs for Per-Node Counts
jobs(filter: $jobFilter, order: $sorting, page: $paging) {
items {
jobId
resources {
hostname
}
}
count
}
# Only counts shared nodes once
allocatedNodes(cluster: $cluster) {
name
count
}
# totalNodes includes multiples if shared jobs: Info-Card Data
jobsStatistics(
filter: $jobFilter
page: $paging
sortBy: TOTALJOBS
groupBy: SUBCLUSTER
) {
id
totalJobs
totalUsers
totalCores
totalAccs
}
}
`,
variables: {
cluster: presetCluster,
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
from: from.toISOString(),
to: to.toISOString(),
jobFilter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
paging: { itemsPerPage: -1, page: 1 }, // Get all: -1
sorting: { field: "startTime", type: "col", order: "DESC" }
},
requestPolicy: "network-only"
}));
const topJobsQuery = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [JobFilter!]!
$paging: PageRequest!
) {
jobsStatistics(
filter: $filter
page: $paging
sortBy: TOTALJOBS
groupBy: PROJECT
) {
id
totalJobs
}
}
`,
variables: {
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster} }],
paging: pagingState // Top 10
},
requestPolicy: "network-only"
}));
// Note: nodeMetrics are requested on configured $timestep resolution
const nodeStatusQuery = $derived(queryStore({
client: client,
query: gql`
query (
$filter: [JobFilter!]!
$selectedHistograms: [String!]
$numDurationBins: String
) {
jobsStatistics(filter: $filter, metrics: $selectedHistograms, numDurationBins: $numDurationBins) {
histNumCores {
count
value
}
histNumAccs {
count
value
}
}
}
`,
variables: {
filter: [{ state: ["running"] }, { cluster: { eq: presetCluster } }],
selectedHistograms: [], // No Metrics requested for node hardware stats - Empty Array can be used for refresh
numDurationBins: "1h", // Hardcode or selector?
},
requestPolicy: "network-only"
}));
const clusterInfo = $derived.by(() => {
if ($initq?.data?.clusters) {
let rawInfos = {};
let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || [];
for (let subCluster of subClusters) {
// Allocations
if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
// Infos
if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]);
else rawInfos['processorTypes'].add(subCluster.processorType);
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0;
else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0;
if (!rawInfos['totalCores']) rawInfos['totalCores'] = (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
else rawInfos['totalCores'] += (subCluster?.socketsPerNode * subCluster?.coresPerSocket * subCluster?.numberOfNodes) || 0;
if (!rawInfos['totalAccs']) rawInfos['totalAccs'] = (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
else rawInfos['totalAccs'] += (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
// Units (Set Once)
if (!rawInfos['flopRateUnit']) rawInfos['flopRateUnit'] = subCluster.flopRateSimd.unit.prefix + subCluster.flopRateSimd.unit.base
if (!rawInfos['memBwRateUnit']) rawInfos['memBwRateUnit'] = subCluster.memoryBandwidth.unit.prefix + subCluster.memoryBandwidth.unit.base
// Get Maxima For Roofline Knee Render
if (!rawInfos['roofData']) {
rawInfos['roofData'] = {
flopRateScalar: {value: subCluster.flopRateScalar.value},
flopRateSimd: {value: subCluster.flopRateSimd.value},
memoryBandwidth: {value: subCluster.memoryBandwidth.value}
};
} else {
rawInfos['roofData']['flopRateScalar']['value'] = Math.max(rawInfos['roofData']['flopRateScalar']['value'], subCluster.flopRateScalar.value)
rawInfos['roofData']['flopRateSimd']['value'] = Math.max(rawInfos['roofData']['flopRateSimd']['value'], subCluster.flopRateSimd.value)
rawInfos['roofData']['memoryBandwidth']['value'] = Math.max(rawInfos['roofData']['memoryBandwidth']['value'], subCluster.memoryBandwidth.value)
}
}
// Keymetrics (Data on Cluster-Scope)
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
sum + (node.metrics.find((m) => m.name == 'flops_any')?.metric?.series[0]?.statistics?.avg || 0),
0, // Initial Value
) || 0;
rawInfos['flopRate'] = Math.floor((rawFlops * 100) / 100)
let rawMemBw = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
sum + (node.metrics.find((m) => m.name == 'mem_bw')?.metric?.series[0]?.statistics?.avg || 0),
0, // Initial Value
) || 0;
rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100)
return rawInfos
} else {
return {};
}
});
/* Functions */
function legendColors(targetIdx) {
// Reuses first color if targetIdx overflows
let c;
if (useCbColors) {
c = [...colors['colorblind']];
// } else if (useAltColors) {
// c = [...colors['alternative']];
} else {
c = [...colors['default']];
}
return c[(c.length + targetIdx) % c.length];
}
function transformJobsStatsToData(clusterData) {
/* c will contain values from 0 to 1 representing the duration */
let data = null
const x = [], y = [], c = [], day = 86400.0
if (clusterData) {
for (let i = 0; i < clusterData.length; i++) {
const flopsData = clusterData[i].stats.find((s) => s.name == "flops_any")
const memBwData = clusterData[i].stats.find((s) => s.name == "mem_bw")
const f = flopsData.data.avg
const m = memBwData.data.avg
const d = clusterData[i].duration / day
const intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity))
continue
x.push(intensity)
y.push(f)
// Long Jobs > 1 Day: Use max Color
if (d > 1.0) c.push(1.0)
else c.push(d)
}
} else {
console.warn("transformJobsStatsToData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0 && c.length > 0) {
data = [null, [x, y], c] // for dataformat see roofline.svelte
}
return data
}
function transformJobsStatsToInfo(clusterData) {
if (clusterData) {
return clusterData.map((sc) => { return {id: sc.id, jobId: sc.jobId, numNodes: sc.numNodes, numAcc: sc?.numAccelerators? sc.numAccelerators : 0, duration: formatDurationTime(sc.duration)} })
} else {
console.warn("transformJobsStatsToInfo: jobInfo missing!")
return []
}
}
/* Inspect */
$inspect(clusterInfo).with((type, clusterInfo) => {
console.log(type, 'clusterInfo', clusterInfo)
});
</script>
<Card>
<CardHeader class="text-center">
<h3 class="mb-0">{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)} Dashboard</h3>
</CardHeader>
<CardBody>
{#if $statusQuery.fetching || $statesTimed.fetching || $topJobsQuery.fetching || $nodeStatusQuery.fetching}
<Row class="justify-content-center">
<Col xs="auto">
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error || $statesTimed.error || $topJobsQuery.error || $nodeStatusQuery.error}
<Row cols={{xs:1, md:2}}>
{#if $statusQuery.error}
<Col>
<Card color="danger">Error Requesting StatusQuery: {$statusQuery.error.message}</Card>
</Col>
{/if}
{#if $statesTimed.error}
<Col>
<Card color="danger">Error Requesting StatesTimed: {$statesTimed.error.message}</Card>
</Col>
{/if}
{#if $topJobsQuery.error}
<Col>
<Card color="danger">Error Requesting TopJobsQuery: {$topJobsQuery.error.message}</Card>
</Col>
{/if}
{#if $nodeStatusQuery.error}
<Col>
<Card color="danger">Error Requesting NodeStatusQuery: {$nodeStatusQuery.error.message}</Card>
</Col>
{/if}
</Row>
{:else}
<Row cols={{xs:1, md:2, xl: 3}}>
<Col> <!-- Info Card -->
<Card class="h-auto mt-1">
<CardHeader>
<CardTitle class="mb-0">Cluster "{presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}"</CardTitle>
<span>{[...clusterInfo?.processorTypes].toString()}</span>
</CardHeader>
<CardBody>
<Table borderless>
<tr class="py-2">
<td style="font-size:x-large;">{clusterInfo?.runningJobs} Running Jobs</td>
<td colspan="2" style="font-size:x-large;">{clusterInfo?.activeUsers} Active Users</td>
</tr>
<hr class="my-1"/>
<tr class="pt-2">
<td style="font-size: large;">
Flop Rate (<span style="cursor: help;" title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]">Any</span>)
</td>
<td colspan="2" style="font-size: large;">
Memory BW Rate
</td>
</tr>
<tr class="pb-2">
<td style="font-size:x-large;">
{clusterInfo?.flopRate}
{clusterInfo?.flopRateUnit}
</td>
<td colspan="2" style="font-size:x-large;">
{clusterInfo?.memBwRate}
{clusterInfo?.memBwRateUnit}
</td>
</tr>
<hr class="my-1"/>
<tr class="py-2">
<th scope="col">Allocated Nodes</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedNodes}
max={clusterInfo?.totalNodes}
/>
</div></td
>
<td
>{clusterInfo?.allocatedNodes} / {clusterInfo?.totalNodes}
Nodes</td
>
</tr>
<tr class="py-2">
<th scope="col">Allocated Cores</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedCores}
max={clusterInfo?.totalCores}
/>
</div></td
>
<td
>{formatNumber(clusterInfo?.allocatedCores)} / {formatNumber(clusterInfo?.totalCores)}
Cores</td
>
</tr>
{#if clusterInfo?.totalAccs !== 0}
<tr class="py-2">
<th scope="col">Allocated Accelerators</th>
<td style="min-width: 100px;"
><div class="col">
<Progress
value={clusterInfo?.allocatedAccs}
max={clusterInfo?.totalAccs}
/>
</div></td
>
<td
>{clusterInfo?.allocatedAccs} / {clusterInfo?.totalAccs}
Accelerators</td
>
</tr>
{/if}
</Table>
</CardBody>
</Card>
</Col>
<Col> <!-- Pie Jobs -->
<Row cols={{xs:1, md:2}}>
<Col class="p-2">
<div bind:clientWidth={colWidthJobs}>
<h4 class="text-center">
Top Projects: Jobs
</h4>
<Pie
{useCbColors}
canvasId="hpcpie-jobs-projects"
size={colWidthJobs * 0.75}
sliceLabel={'Jobs'}
quantities={$topJobsQuery.data.jobsStatistics.map(
(tp) => tp['totalJobs'],
)}
entities={$topJobsQuery.data.jobsStatistics.map((tp) => scrambleNames ? scramble(tp.id) : tp.id)}
/>
</div>
</Col>
<Col class="p-2">
<Table>
<tr class="mb-2">
<th></th>
<th style="padding-left: 0.5rem;">Project</th>
<th>Jobs</th>
</tr>
{#each $topJobsQuery.data.jobsStatistics as tp, i}
<tr>
<td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td>
<td>
<a target="_blank" href="/monitoring/jobs/?cluster={presetCluster}&state=running&project={tp.id}&projectMatch=eq"
>{scrambleNames ? scramble(tp.id) : tp.id}
</a>
</td>
<td>{tp['totalJobs']}</td>
</tr>
{/each}
</Table>
</Col>
</Row>
</Col>
<Col> <!-- Job Roofline -->
<div bind:clientWidth={colWidthRoof}>
{#key $statusQuery?.data?.jobsMetricStats}
<Roofline
useColors={true}
allowSizeChange
width={colWidthRoof - 10}
height={300}
subCluster={clusterInfo?.roofData ? clusterInfo.roofData : null}
roofData={transformJobsStatsToData($statusQuery?.data?.jobsMetricStats)}
jobsData={transformJobsStatsToInfo($statusQuery?.data?.jobsMetricStats)}
/>
{/key}
</div>
</Col>
<Col> <!-- Resources/Job Histogram -->
{#if clusterInfo?.totalAccs == 0}
<Histogram
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumCores)}
title="Number of Cores Distribution"
xlabel="Allocated Cores"
xunit="Nodes"
ylabel="Number of Jobs"
yunit="Jobs"
height="275"
enableFlip
/>
{:else}
<Histogram
data={convert2uplot($nodeStatusQuery.data.jobsStatistics[0].histNumAccs)}
title="Number of Accelerators Distribution"
xlabel="Allocated Accs"
xunit="Accs"
ylabel="Number of Jobs"
yunit="Jobs"
height="275"
enableFlip
/>
{/if}
</Col>
<Col> <!-- Stacked SchedState -->
<div bind:clientWidth={colWidthStacked1}>
{#key $statesTimed?.data?.nodeStates}
<Stacked
data={$statesTimed?.data?.nodeStates}
width={colWidthStacked1 * 0.95}
xlabel="Time"
ylabel="Nodes"
yunit = "#Count"
title = "Node States"
stateType = "Node"
/>
{/key}
</div>
</Col>
<Col> <!-- Stacked Healthstate -->
<div bind:clientWidth={colWidthStacked2}>
{#key $statesTimed?.data?.healthStates}
<Stacked
data={$statesTimed?.data?.healthStates}
width={colWidthStacked2 * 0.95}
xlabel="Time"
ylabel="Nodes"
yunit = "#Count"
title = "Health States"
stateType = "Health"
/>
{/key}
</div>
</Col>
</Row>
{/if}
</CardBody>
</Card>

View File

@@ -22,11 +22,11 @@
} from "@urql/svelte"; } from "@urql/svelte";
import { import {
convert2uplot, convert2uplot,
} from "../generic/utils.js"; } from "../../generic/utils.js";
import PlotGrid from "../generic/PlotGrid.svelte"; import PlotGrid from "../../generic/PlotGrid.svelte";
import Histogram from "../generic/plots/Histogram.svelte"; import Histogram from "../../generic/plots/Histogram.svelte";
import HistogramSelection from "../generic/select/HistogramSelection.svelte"; import HistogramSelection from "../../generic/select/HistogramSelection.svelte";
import Refresher from "../generic/helper/Refresher.svelte"; import Refresher from "../../generic/helper/Refresher.svelte";
/* Svelte 5 Props */ /* Svelte 5 Props */
let { let {

View File

@@ -22,12 +22,12 @@
gql, gql,
getContextClient, getContextClient,
} from "@urql/svelte"; } from "@urql/svelte";
import { formatDurationTime } from "../generic/units.js"; import { formatDurationTime } from "../../generic/units.js";
import Refresher from "../generic/helper/Refresher.svelte"; import Refresher from "../../generic/helper/Refresher.svelte";
import TimeSelection from "../generic/select/TimeSelection.svelte"; import TimeSelection from "../../generic/select/TimeSelection.svelte";
import Roofline from "../generic/plots/Roofline.svelte"; import Roofline from "../../generic/plots/Roofline.svelte";
import Pie, { colors } from "../generic/plots/Pie.svelte"; import Pie, { colors } from "../../generic/plots/Pie.svelte";
import Stacked from "../generic/plots/Stacked.svelte"; import Stacked from "../../generic/plots/Stacked.svelte";
/* Svelte 5 Props */ /* Svelte 5 Props */
let { let {
@@ -83,7 +83,7 @@
} }
`, `,
variables: { variables: {
filter: { cluster: { eq: cluster }, timeStart: stackedFrom}, filter: { cluster: { eq: cluster }, timeStart: 1760096999},
typeNode: "node", typeNode: "node",
typeHealth: "health" typeHealth: "health"
}, },

View File

@@ -27,10 +27,10 @@
scramble, scramble,
scrambleNames, scrambleNames,
convert2uplot, convert2uplot,
} from "../generic/utils.js"; } from "../../generic/utils.js";
import Pie, { colors } from "../generic/plots/Pie.svelte"; import Pie, { colors } from "../../generic/plots/Pie.svelte";
import Histogram from "../generic/plots/Histogram.svelte"; import Histogram from "../../generic/plots/Histogram.svelte";
import Refresher from "../generic/helper/Refresher.svelte"; import Refresher from "../../generic/helper/Refresher.svelte";
/* Svelte 5 Props */ /* Svelte 5 Props */
let { let {

View File

@@ -23,34 +23,49 @@
</script> </script>
</head> </head>
<body class="site"> <body class="site">
{{block "navigation" .}} {{if eq .Infos.displayType "PUBLIC"}}
<header id="svelte-header"></header> <main>
{{end}} <div class="container">
{{block "content-public" .}}
Whoops, you should not see this... [MAIN]
{{end}}
</div>
</main>
<main class="site-content"> {{block "javascript-public" .}}
<div class="container"> Whoops, you should not see this... [JS]
{{block "content" .}} {{end}}
Whoops, you should not see this...
{{end}}
</div>
</main>
{{block "footer" .}} {{else}}
<footer class="site-footer bg-light"> {{block "navigation" .}}
<ul class="footer-list"> <header id="svelte-header"></header>
<li class="footer-list-item"><a class="link-secondary fs-5" href="/imprint" title="Imprint" rel="nofollow">Imprint</a></li> {{end}}
<li class="footer-list-item"><a class="link-secondary fs-5" href="/privacy" title="Privacy Policy" rel="nofollow">Privacy Policy</a></li>
</ul>
<ul class="build-list">
<li class="build-list-item">Version {{ .Build.Version }}</li>
<li class="build-list-item">Hash {{ .Build.Hash }}</li>
<li class="build-list-item">Built {{ .Build.Buildtime }}</li>
</ul>
</footer>
{{end}}
{{block "javascript" .}} <main class="site-content">
<script src='/build/header.js'></script> <div class="container">
{{block "content" .}}
Whoops, you should not see this... [MAIN]
{{end}}
</div>
</main>
{{block "footer" .}}
<footer class="site-footer bg-light">
<ul class="footer-list">
<li class="footer-list-item"><a class="link-secondary fs-5" href="/imprint" title="Imprint" rel="nofollow">Imprint</a></li>
<li class="footer-list-item"><a class="link-secondary fs-5" href="/privacy" title="Privacy Policy" rel="nofollow">Privacy Policy</a></li>
</ul>
<ul class="build-list">
<li class="build-list-item">Version {{ .Build.Version }}</li>
<li class="build-list-item">Hash {{ .Build.Hash }}</li>
<li class="build-list-item">Built {{ .Build.Buildtime }}</li>
</ul>
</footer>
{{end}}
{{block "javascript" .}}
<script src='/build/header.js'></script>
{{end}}
{{end}} {{end}}
</body> </body>
</html> </html>

View File

@@ -0,0 +1,14 @@
{{define "content-public"}}
<div id="svelte-app"></div>
{{end}}
{{define "stylesheets"}}
<link rel='stylesheet' href='/build/dashpublic.css'>
{{end}}
{{define "javascript-public"}}
<script>
const infos = {{ .Infos }};
const clusterCockpitConfig = {{ .Config }};
</script>
<script src='/build/dashpublic.js'></script>
{{end}}

View File

@@ -8,6 +8,7 @@
{{define "javascript"}} {{define "javascript"}}
<script> <script>
const infos = {{ .Infos }}; const infos = {{ .Infos }};
const displayType = {{ .Infos.displayType }};
const clusterCockpitConfig = {{ .Config }}; const clusterCockpitConfig = {{ .Config }};
</script> </script>
<script src='/build/status.js'></script> <script src='/build/status.js'></script>