add healthStatus tab to status details, add graphql endpoints and handlers

This commit is contained in:
Christoph Kluge
2026-02-17 14:38:06 +01:00
parent 9af44779aa
commit fe3ebe0abc
8 changed files with 676 additions and 158 deletions

View File

@@ -19,6 +19,7 @@ type Node {
schedulerState: SchedulerState!
healthState: MonitoringState!
metaData: Any
healthData: Any
}
type NodeStates {
@@ -328,6 +329,7 @@ type Query {
## Node Queries New
node(id: ID!): Node
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodesWithMeta(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodeStates(filter: [NodeFilter!]): [NodeStates!]!
nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]!

2
go.sum
View File

@@ -4,8 +4,6 @@ github.com/99designs/gqlgen v0.17.85 h1:EkGx3U2FDcxQm8YDLQSpXIAVmpDyZ3IcBMOJi2nH
github.com/99designs/gqlgen v0.17.85/go.mod h1:yvs8s0bkQlRfqg03YXr3eR4OQUowVhODT/tHzCXnbOU=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/ClusterCockpit/cc-lib/v2 v2.4.0 h1:OnZlvqSatg7yCQ2NtSR7AddpUVSiuSMZ8scF1a7nfOk=
github.com/ClusterCockpit/cc-lib/v2 v2.4.0/go.mod h1:JuxMAuEOaLLNEnnL9U3ejha8kMvsSatLdKPZEgJw6iw=
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=

View File

@@ -287,6 +287,7 @@ type ComplexityRoot struct {
Cluster func(childComplexity int) int
CpusAllocated func(childComplexity int) int
GpusAllocated func(childComplexity int) int
HealthData func(childComplexity int) int
HealthState func(childComplexity int) int
Hostname func(childComplexity int) int
ID func(childComplexity int) int
@@ -347,6 +348,7 @@ type ComplexityRoot struct {
NodeStates func(childComplexity int, filter []*model.NodeFilter) int
NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter, typeArg string) int
Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int
NodesWithMeta func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int
RooflineHeatmap func(childComplexity int, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) int
ScopedJobStats func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int
Tags func(childComplexity int) int
@@ -369,7 +371,7 @@ type ComplexityRoot struct {
Series struct {
Data func(childComplexity int) int
Hostname func(childComplexity int) int
Id func(childComplexity int) int
ID func(childComplexity int) int
Statistics func(childComplexity int) int
}
@@ -476,6 +478,7 @@ type NodeResolver interface {
SchedulerState(ctx context.Context, obj *schema.Node) (schema.SchedulerState, error)
HealthState(ctx context.Context, obj *schema.Node) (string, error)
MetaData(ctx context.Context, obj *schema.Node) (any, error)
HealthData(ctx context.Context, obj *schema.Node) (any, error)
}
type QueryResolver interface {
Clusters(ctx context.Context) ([]*schema.Cluster, error)
@@ -485,6 +488,7 @@ type QueryResolver interface {
AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error)
Node(ctx context.Context, id string) (*schema.Node, error)
Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error)
NodesWithMeta(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error)
NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error)
NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error)
Job(ctx context.Context, id string) (*schema.Job, error)
@@ -1452,6 +1456,12 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
}
return e.complexity.Node.GpusAllocated(childComplexity), true
case "Node.healthData":
if e.complexity.Node.HealthData == nil {
break
}
return e.complexity.Node.HealthData(childComplexity), true
case "Node.healthState":
if e.complexity.Node.HealthState == nil {
break
@@ -1785,6 +1795,17 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
}
return e.complexity.Query.Nodes(childComplexity, args["filter"].([]*model.NodeFilter), args["order"].(*model.OrderByInput)), true
case "Query.nodesWithMeta":
if e.complexity.Query.NodesWithMeta == nil {
break
}
args, err := ec.field_Query_nodesWithMeta_args(ctx, rawArgs)
if err != nil {
return 0, false
}
return e.complexity.Query.NodesWithMeta(childComplexity, args["filter"].([]*model.NodeFilter), args["order"].(*model.OrderByInput)), true
case "Query.rooflineHeatmap":
if e.complexity.Query.RooflineHeatmap == nil {
break
@@ -1882,11 +1903,11 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return e.complexity.Series.Hostname(childComplexity), true
case "Series.id":
if e.complexity.Series.Id == nil {
if e.complexity.Series.ID == nil {
break
}
return e.complexity.Series.Id(childComplexity), true
return e.complexity.Series.ID(childComplexity), true
case "Series.statistics":
if e.complexity.Series.Statistics == nil {
break
@@ -2302,6 +2323,7 @@ type Node {
schedulerState: SchedulerState!
healthState: MonitoringState!
metaData: Any
healthData: Any
}
type NodeStates {
@@ -2611,6 +2633,7 @@ type Query {
## Node Queries New
node(id: ID!): Node
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodesWithMeta(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
nodeStates(filter: [NodeFilter!]): [NodeStates!]!
nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]!
@@ -3268,6 +3291,22 @@ func (ec *executionContext) field_Query_node_args(ctx context.Context, rawArgs m
return args, nil
}
func (ec *executionContext) field_Query_nodesWithMeta_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) {
var err error
args := map[string]any{}
arg0, err := graphql.ProcessArgField(ctx, rawArgs, "filter", ec.unmarshalONodeFilter2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeFilterᚄ)
if err != nil {
return nil, err
}
args["filter"] = arg0
arg1, err := graphql.ProcessArgField(ctx, rawArgs, "order", ec.unmarshalOOrderByInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐOrderByInput)
if err != nil {
return nil, err
}
args["order"] = arg1
return args, nil
}
func (ec *executionContext) field_Query_nodes_args(ctx context.Context, rawArgs map[string]any) (map[string]any, error) {
var err error
args := map[string]any{}
@@ -8258,6 +8297,35 @@ func (ec *executionContext) fieldContext_Node_metaData(_ context.Context, field
return fc, nil
}
func (ec *executionContext) _Node_healthData(ctx context.Context, field graphql.CollectedField, obj *schema.Node) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
ec.OperationContext,
field,
ec.fieldContext_Node_healthData,
func(ctx context.Context) (any, error) {
return ec.resolvers.Node().HealthData(ctx, obj)
},
nil,
ec.marshalOAny2interface,
true,
false,
)
}
func (ec *executionContext) fieldContext_Node_healthData(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Node",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Any does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _NodeMetrics_host(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
@@ -8428,6 +8496,8 @@ func (ec *executionContext) fieldContext_NodeStateResultList_items(_ context.Con
return ec.fieldContext_Node_healthState(ctx, field)
case "metaData":
return ec.fieldContext_Node_metaData(ctx, field)
case "healthData":
return ec.fieldContext_Node_healthData(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type Node", field.Name)
},
@@ -9053,6 +9123,8 @@ func (ec *executionContext) fieldContext_Query_node(ctx context.Context, field g
return ec.fieldContext_Node_healthState(ctx, field)
case "metaData":
return ec.fieldContext_Node_metaData(ctx, field)
case "healthData":
return ec.fieldContext_Node_healthData(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type Node", field.Name)
},
@@ -9118,6 +9190,53 @@ func (ec *executionContext) fieldContext_Query_nodes(ctx context.Context, field
return fc, nil
}
func (ec *executionContext) _Query_nodesWithMeta(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
ec.OperationContext,
field,
ec.fieldContext_Query_nodesWithMeta,
func(ctx context.Context) (any, error) {
fc := graphql.GetFieldContext(ctx)
return ec.resolvers.Query().NodesWithMeta(ctx, fc.Args["filter"].([]*model.NodeFilter), fc.Args["order"].(*model.OrderByInput))
},
nil,
ec.marshalNNodeStateResultList2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐNodeStateResultList,
true,
true,
)
}
func (ec *executionContext) fieldContext_Query_nodesWithMeta(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Query",
Field: field,
IsMethod: true,
IsResolver: true,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
switch field.Name {
case "items":
return ec.fieldContext_NodeStateResultList_items(ctx, field)
case "count":
return ec.fieldContext_NodeStateResultList_count(ctx, field)
}
return nil, fmt.Errorf("no field named %q was found under type NodeStateResultList", field.Name)
},
}
defer func() {
if r := recover(); r != nil {
err = ec.Recover(ctx, r)
ec.Error(ctx, err)
}
}()
ctx = graphql.WithFieldContext(ctx, fc)
if fc.Args, err = ec.field_Query_nodesWithMeta_args(ctx, field.ArgumentMap(ec.Variables)); err != nil {
ec.Error(ctx, err)
return fc, err
}
return fc, nil
}
func (ec *executionContext) _Query_nodeStates(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
@@ -15744,6 +15863,39 @@ func (ec *executionContext) _Node(ctx context.Context, sel ast.SelectionSet, obj
continue
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
case "healthData":
field := field
innerFunc := func(ctx context.Context, _ *graphql.FieldSet) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Node_healthData(ctx, field, obj)
return res
}
if field.Deferrable != nil {
dfs, ok := deferred[field.Deferrable.Label]
di := 0
if ok {
dfs.AddField(field)
di = len(dfs.Values) - 1
} else {
dfs = graphql.NewFieldSet([]graphql.CollectedField{field})
deferred[field.Deferrable.Label] = dfs
}
dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler {
return innerFunc(ctx, dfs)
})
// don't run the out.Concurrently() call below
out.Values[i] = graphql.Null
continue
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
default:
panic("unknown field " + strconv.Quote(field.Name))
@@ -16171,6 +16323,28 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr
func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) })
case "nodesWithMeta":
field := field
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Query_nodesWithMeta(ctx, field)
if res == graphql.Null {
atomic.AddUint32(&fs.Invalids, 1)
}
return res
}
rrm := func(ctx context.Context) graphql.Marshaler {
return ec.OperationContext.RootResolverMiddleware(ctx,
func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) })
case "nodeStates":
field := field

View File

@@ -318,18 +318,39 @@ func (r *nodeResolver) SchedulerState(ctx context.Context, obj *schema.Node) (sc
if obj.NodeState != "" {
return obj.NodeState, nil
} else {
return "", fmt.Errorf("no SchedulerState (NodeState) on Object")
return "", fmt.Errorf("resolver: no SchedulerState (NodeState) on node object")
}
}
// HealthState is the resolver for the healthState field.
func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (string, error) {
panic(fmt.Errorf("not implemented: HealthState - healthState"))
if obj.HealthState != "" {
return string(obj.HealthState), nil
} else {
return "", fmt.Errorf("resolver: no HealthState (NodeState) on node object")
}
}
// MetaData is the resolver for the metaData field.
func (r *nodeResolver) MetaData(ctx context.Context, obj *schema.Node) (any, error) {
panic(fmt.Errorf("not implemented: MetaData - metaData"))
if obj.MetaData != nil {
return obj.MetaData, nil
} else {
cclog.Debug("resolver: no MetaData (NodeState) on node object")
emptyMeta := make(map[string]string, 0)
return emptyMeta, nil
}
}
// HealthData is the resolver for the healthData field.
func (r *nodeResolver) HealthData(ctx context.Context, obj *schema.Node) (any, error) {
if obj.HealthData != nil {
return obj.HealthData, nil
} else {
cclog.Debug("resolver: no HealthData (NodeState) on node object")
emptyHealth := make(map[string][]string, 0)
return emptyHealth, nil
}
}
// Clusters is the resolver for the clusters field.
@@ -398,6 +419,15 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o
return &model.NodeStateResultList{Items: nodes, Count: &count}, err
}
// NodesWithMeta is the resolver for the nodesWithMeta field.
func (r *queryResolver) NodesWithMeta(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) {
// Why Extra Handler? -> graphql.CollectAllFields(ctx) only returns toplevel fields (i.e.: items, count), and not subfields like item.metaData
repo := repository.GetNodeRepository()
nodes, err := repo.QueryNodesWithMeta(ctx, filter, nil, order) // Ignore Paging, Order Unused
count := len(nodes)
return &model.NodeStateResultList{Items: nodes, Count: &count}, err
}
// NodeStates is the resolver for the nodeStates field.
func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) {
repo := repository.GetNodeRepository()

View File

@@ -154,16 +154,14 @@ func (r *NodeRepository) GetNodeByID(id int64, withMeta bool) (*schema.Node, err
return nil, err
}
// NEEDS METADATA BY ID
// if withMeta {
// var err error
// var meta map[string]string
// if meta, err = r.FetchMetadata(hostname, cluster); err != nil {
// cclog.Warnf("Error while fetching metadata for node '%s'", hostname)
// return nil, err
// }
// node.MetaData = meta
// }
if withMeta {
meta, metaErr := r.FetchMetadata(node.Hostname, node.Cluster)
if metaErr != nil {
cclog.Warnf("Error while fetching metadata for node ID '%d': %v", id, metaErr)
return nil, metaErr
}
node.MetaData = meta
}
return node, nil
}
@@ -382,6 +380,81 @@ func (r *NodeRepository) QueryNodes(
return nodes, nil
}
// QueryNodesWithMeta returns a list of nodes based on a node filter. It always operates
// on the last state (largest timestamp). It includes both (!) optional JSON column data
func (r *NodeRepository) QueryNodesWithMeta(
ctx context.Context,
filters []*model.NodeFilter,
page *model.PageRequest,
order *model.OrderByInput, // Currently unused!
) ([]*schema.Node, error) {
query, qerr := AccessCheck(ctx,
sq.Select("node.hostname", "node.cluster", "node.subcluster",
"node_state.node_state", "node_state.health_state",
"node.meta_data", "node_state.health_metrics").
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()))
if qerr != nil {
return nil, qerr
}
query = applyNodeFilters(query, filters)
query = query.OrderBy("node.hostname ASC")
if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage)
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
}
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
queryString, queryVars, _ := query.ToSql()
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
return nil, err
}
nodes := make([]*schema.Node, 0)
for rows.Next() {
node := schema.Node{}
RawMetaData := make([]byte, 0)
RawMetricHealth := make([]byte, 0)
if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
&node.NodeState, &node.HealthState, &RawMetaData, &RawMetricHealth); err != nil {
rows.Close()
cclog.Warn("Error while scanning rows (QueryNodes)")
return nil, err
}
if len(RawMetaData) == 0 {
node.MetaData = nil
} else {
metaData := make(map[string]string)
if err := json.Unmarshal(RawMetaData, &metaData); err != nil {
cclog.Warn("Error while unmarshaling raw metadata json")
return nil, err
}
node.MetaData = metaData
}
if len(RawMetricHealth) == 0 {
node.HealthData = nil
} else {
healthData := make(map[string][]string)
if err := json.Unmarshal(RawMetricHealth, &healthData); err != nil {
cclog.Warn("Error while unmarshaling raw healthdata json")
return nil, err
}
node.HealthData = healthData
}
nodes = append(nodes, &node)
}
return nodes, nil
}
// CountNodes returns the total matched nodes based on a node filter. It always operates
// on the last state (largest timestamp) per node.
func (r *NodeRepository) CountNodes(

View File

@@ -23,6 +23,7 @@
} from "@sveltestrap/sveltestrap";
import StatusDash from "./dashdetails/StatusDash.svelte";
import HealthDash from "./dashdetails/HealthDash.svelte";
import UsageDash from "./dashdetails/UsageDash.svelte";
import StatisticsDash from "./dashdetails/StatisticsDash.svelte";
@@ -65,7 +66,13 @@
<TabContent>
<TabPane tabId="status-dash" tab="Status" active>
<CardBody>
<StatusDash clusters={$initq.data.clusters} {presetCluster} {useCbColors} useAltColors></StatusDash>
<StatusDash clusters={$initq.data.clusters} {presetCluster}></StatusDash>
</CardBody>
</TabPane>
<TabPane tabId="health-dash" tab="Metric Status">
<CardBody>
<HealthDash {presetCluster}></HealthDash>
</CardBody>
</TabPane>

View File

@@ -0,0 +1,372 @@
<!--
@component Main cluster status view component; renders current system-usage information
Properties:
- `presetCluster String`: The cluster to show status information for
-->
<script>
import {
Row,
Col,
Card,
Input,
InputGroup,
InputGroupText,
Table,
Icon,
Spinner
} from "@sveltestrap/sveltestrap";
import {
queryStore,
gql,
getContextClient,
} from "@urql/svelte";
import Refresher from "../../generic/helper/Refresher.svelte";
import Pie, { colors } from "../../generic/plots/Pie.svelte";
/* Svelte 5 Props */
let {
presetCluster,
} = $props();
/* Const Init */
const client = getContextClient();
/* State Init */
let pieWidth = $state(0);
let tableHostFilter = $state("");
let tableStateFilter = $state("");
let tableHealthFilter = $state("");
let healthTableSorting = $state(
{
schedulerState: { dir: "down", active: true },
healthState: { dir: "down", active: false },
hostname: { dir: "down", active: false },
}
);
/* Derived */
let cluster = $derived(presetCluster);
const statusQuery = $derived(queryStore({
client: client,
query: gql`
query (
$nodeFilter: [NodeFilter!]!
$sorting: OrderByInput!
) {
# $sorting unused in backend: Use placeholder
nodes: nodesWithMeta(filter: $nodeFilter, order: $sorting) {
count
items {
hostname
cluster
subCluster
schedulerState
healthState
metaData
healthData
}
}
# Get Current States for Pie Charts
nodeStates(filter: $nodeFilter) {
state
count
},
}
`,
variables: {
nodeFilter: { cluster: { eq: cluster }},
sorting: { field: "startTime", type: "col", order: "DESC" },
},
requestPolicy: "network-only"
}));
let healthTableData = $derived.by(() => {
if ($statusQuery?.data) {
return [...$statusQuery.data.nodes.items].sort((n1, n2) => {
return n1['schedulerState'].localeCompare(n2['schedulerState'])
});
} else {
return [];
}
});
let filteredTableData = $derived.by(() => {
let pendingTableData = [...healthTableData];
if (tableHostFilter != "") {
pendingTableData = pendingTableData.filter((e) => e.hostname.includes(tableHostFilter))
}
if (tableStateFilter != "") {
pendingTableData = pendingTableData.filter((e) => e.schedulerState.includes(tableStateFilter))
}
if (tableHealthFilter != "") {
pendingTableData = pendingTableData.filter((e) => e.healthState.includes(tableHealthFilter))
}
return pendingTableData
});
const refinedStateData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
const refinedHealthData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['full', 'partial', 'failed'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
/* Functions */
function sortBy(field) {
const s = healthTableSorting[field];
if (s.active) {
s.dir = s.dir == "up" ? "down" : "up";
} else {
for (let key in healthTableSorting)
healthTableSorting[key].active = false;
s.active = true;
}
const pendingHealthData = healthTableData.sort((n1, n2) => {
if (n1[field] == null || n2[field] == null) return -1;
else if (s.dir == "down") return n1[field].localeCompare(n2[field])
else return n2[field].localeCompare(n1[field])
});
healthTableSorting = {...healthTableSorting};
healthTableData = [...pendingHealthData];
}
</script>
<!-- Refresher and space for other options -->
<Row class="justify-content-between">
<Col xs="12" md="5" lg="4" xl="3">
<Refresher
initially={120}
onRefresh={(interval) => {
sorting = { field: "startTime", type: "col", order: "DESC" }
}}
/>
</Col>
</Row>
<hr/>
<!-- Node Health Pis, later Charts -->
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">Status Query (States): {$statusQuery.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery?.data?.nodeStates}
<Row cols={{ lg: 4, md: 2 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={pieWidth}>
{#key refinedStateData}
<h4 class="text-center">
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States
</h4>
<Pie
canvasId="hpcpie-slurm"
size={pieWidth * 0.55}
sliceLabel="Nodes"
quantities={refinedStateData.map(
(sd) => sd.count,
)}
entities={refinedStateData.map(
(sd) => sd.state,
)}
fixColors={refinedStateData.map(
(sd) => colors['nodeStates'][sd.state],
)}
/>
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key refinedStateData}
<Table>
<tr class="mb-2">
<th></th>
<th>Current State</th>
<th>Nodes</th>
</tr>
{#each refinedStateData as sd, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors['nodeStates'][sd.state]};"/></td>
<td>{sd.state}</td>
<td>{sd.count}</td>
</tr>
{/each}
</Table>
{/key}
</Col>
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={pieWidth}>
{#key refinedHealthData}
<h4 class="text-center">
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health
</h4>
<Pie
canvasId="hpcpie-health"
size={pieWidth * 0.55}
sliceLabel="Nodes"
quantities={refinedHealthData.map(
(hd) => hd.count,
)}
entities={refinedHealthData.map(
(hd) => hd.state,
)}
fixColors={refinedHealthData.map(
(hd) => colors['healthStates'][hd.state],
)}
/>
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key refinedHealthData}
<Table>
<tr class="mb-2">
<th></th>
<th>Current Health</th>
<th>Nodes</th>
</tr>
{#each refinedHealthData as hd, i}
<tr>
<td><Icon name="circle-fill"style="color: {colors['healthStates'][hd.state]};" /></td>
<td>{hd.state}</td>
<td>{hd.count}</td>
</tr>
{/each}
</Table>
{/key}
</Col>
</Row>
{/if}
<hr/>
<!-- Tabular Info About Node States and Missing Metrics -->
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">Status Query (Details): {$statusQuery.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery.data}
<Row>
<Col>
<Card>
<Table hover>
<thead>
<!-- Header Row 1: Titles and Sorting -->
<tr>
<th style="width:10%; max-width:12.5%;" onclick={() => sortBy('hostname')}>
Host
<Icon
name="caret-{healthTableSorting['hostname'].dir}{healthTableSorting['hostname']
.active
? '-fill'
: ''}"
/>
</th>
<th style="width:10%; max-width:12.5%;" onclick={() => sortBy('schedulerState')}>
Scheduler State
<Icon
name="caret-{healthTableSorting['schedulerState'].dir}{healthTableSorting['schedulerState']
.active
? '-fill'
: ''}"
/>
</th>
<th style="width:10%; max-width:12.5%;" onclick={() => sortBy('healthState')}>
Health State
<Icon
name="caret-{healthTableSorting['healthState'].dir}{healthTableSorting['healthState']
.active
? '-fill'
: ''}"
/>
</th>
<th>Metric Availability</th>
<th>Meta Information</th>
</tr>
<!-- Header Row 2: Filters -->
<tr>
<th>
<InputGroup size="sm">
<Input type="text" bind:value={tableHostFilter}/>
<InputGroupText>
<Icon name="search"></Icon>
</InputGroupText>
</InputGroup>
</th>
<th>
<InputGroup size="sm">
<Input type="text" bind:value={tableStateFilter}/>
<InputGroupText>
<Icon name="search"></Icon>
</InputGroupText>
</InputGroup>
</th>
<th>
<InputGroup size="sm">
<Input type="text" bind:value={tableHealthFilter}/>
<InputGroupText>
<Icon name="search"></Icon>
</InputGroupText>
</InputGroup>
</th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
{#each filteredTableData as host (host.hostname)}
<tr>
<th><b><a href="/monitoring/node/{cluster}/{host.hostname}" target="_blank">{host.hostname}</a></b></th>
<td>{host.schedulerState}</td>
<td>{host.healthState}</td>
<td>
{#each Object.keys(host.healthData) as hkey}
<p>
<b>{hkey}</b>: {host.healthData[hkey]}
</p>
{/each}
</td>
<td>
{#each Object.keys(host.metaData) as mkey}
<p>
<b>{mkey}</b>: {host.metaData[mkey]}
</p>
{/each}
</td>
</tr>
{/each}
</tbody>
</Table>
</Card>
</Col>
</Row>
{:else}
<Card class="mx-4" body color="warning">Cannot render metric health info: No data!</Card>
{/if}

View File

@@ -15,7 +15,6 @@
CardBody,
Table,
Progress,
Icon,
Spinner
} from "@sveltestrap/sveltestrap";
import {
@@ -27,22 +26,18 @@
import Refresher from "../../generic/helper/Refresher.svelte";
import TimeSelection from "../../generic/select/TimeSelection.svelte";
import Roofline from "../../generic/plots/Roofline.svelte";
import Pie, { colors } from "../../generic/plots/Pie.svelte";
import Stacked from "../../generic/plots/Stacked.svelte";
/* Svelte 5 Props */
let {
clusters,
presetCluster,
useCbColors = false,
useAltColors = false,
} = $props();
/* Const Init */
const client = getContextClient();
/* State Init */
let pieWidth = $state(0);
let from = $state(new Date(Date.now() - 5 * 60 * 1000));
let to = $state(new Date(Date.now()));
let stackedFrom = $state(Math.floor(Date.now() / 1000) - 14400);
@@ -163,11 +158,6 @@
schedulerState
}
}
# Get Current States fir Pie Charts
nodeStates(filter: $nodeFilter) {
state
count
}
# totalNodes includes multiples if shared jobs
jobsStatistics(
filter: $jobFilter
@@ -196,18 +186,6 @@
requestPolicy: "network-only"
}));
const refinedStateData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['allocated', 'reserved', 'idle', 'mixed','down', 'unknown'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
const refinedHealthData = $derived.by(() => {
return $statusQuery?.data?.nodeStates.
filter((e) => ['full', 'partial', 'failed'].includes(e.state)).
sort((a, b) => b.count - a.count)
});
/* Effects */
$effect(() => {
if ($statusQuery.data) {
@@ -367,19 +345,6 @@
return result
}
function legendColors(targetIdx) {
// Reuses first color if targetIdx overflows
let c;
if (useCbColors) {
c = [...colors['colorblind']];
} else if (useAltColors) {
c = [...colors['alternative']];
} else {
c = [...colors['default']];
}
return c[(c.length + targetIdx) % c.length];
}
</script>
<!-- Refresher and space for other options -->
@@ -408,7 +373,7 @@
<hr/>
<!-- Node Stack Charts Dev-->
<!-- Node Stack Charts -->
{#if $statesTimed.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
@@ -460,109 +425,6 @@
</Row>
{/if}
<hr/>
<!-- Node Health Pis, later Charts -->
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">Status Query (States): {$statesTimed.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery?.data?.nodeStates}
<Row cols={{ lg: 4, md: 2 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={pieWidth}>
{#key refinedStateData}
<h4 class="text-center">
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States
</h4>
<Pie
canvasId="hpcpie-slurm"
size={pieWidth * 0.55}
sliceLabel="Nodes"
quantities={refinedStateData.map(
(sd) => sd.count,
)}
entities={refinedStateData.map(
(sd) => sd.state,
)}
fixColors={refinedStateData.map(
(sd) => colors['nodeStates'][sd.state],
)}
/>
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key refinedStateData}
<Table>
<tr class="mb-2">
<th></th>
<th>Current State</th>
<th>Nodes</th>
</tr>
{#each refinedStateData as sd, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors['nodeStates'][sd.state]};"/></td>
<td>{sd.state}</td>
<td>{sd.count}</td>
</tr>
{/each}
</Table>
{/key}
</Col>
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={pieWidth}>
{#key refinedHealthData}
<h4 class="text-center">
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health
</h4>
<Pie
canvasId="hpcpie-health"
size={pieWidth * 0.55}
sliceLabel="Nodes"
quantities={refinedHealthData.map(
(hd) => hd.count,
)}
entities={refinedHealthData.map(
(hd) => hd.state,
)}
fixColors={refinedHealthData.map(
(hd) => colors['healthStates'][hd.state],
)}
/>
{/key}
</div>
</Col>
<Col class="px-4 py-2">
{#key refinedHealthData}
<Table>
<tr class="mb-2">
<th></th>
<th>Current Health</th>
<th>Nodes</th>
</tr>
{#each refinedHealthData as hd, i}
<tr>
<td><Icon name="circle-fill"style="color: {colors['healthStates'][hd.state]};" /></td>
<td>{hd.state}</td>
<td>{hd.count}</td>
</tr>
{/each}
</Table>
{/key}
</Col>
</Row>
{/if}
<hr/>
<!-- Gauges & Roofline per Subcluster-->
{#if $statusQuery.fetching}