add nodeState info display and filtering to systems views

This commit is contained in:
Christoph Kluge
2025-11-18 15:56:55 +01:00
parent 21334c8026
commit 90c3381954
17 changed files with 447 additions and 148 deletions

View File

@@ -262,6 +262,7 @@ enum SortByAggregate {
type NodeMetrics {
host: String!
state: String!
subCluster: String!
metrics: [JobMetricWithName!]!
}
@@ -375,6 +376,7 @@ type Query {
nodeMetricsList(
cluster: String!
subCluster: String!
stateFilter: String!
nodeFilter: String!
scopes: [MetricScope!]
metrics: [String!]

View File

@@ -288,6 +288,7 @@ type ComplexityRoot struct {
NodeMetrics struct {
Host func(childComplexity int) int
Metrics func(childComplexity int) int
State func(childComplexity int) int
SubCluster func(childComplexity int) int
}
@@ -329,7 +330,7 @@ type ComplexityRoot struct {
JobsStatistics func(childComplexity int, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *string, numMetricBins *int) int
Node func(childComplexity int, id string) int
NodeMetrics func(childComplexity int, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int
NodeMetricsList func(childComplexity int, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) int
NodeMetricsList func(childComplexity int, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) int
NodeStates func(childComplexity int, filter []*model.NodeFilter) int
NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter, typeArg string) int
Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int
@@ -483,7 +484,7 @@ type QueryResolver interface {
JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error)
RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error)
NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error)
NodeMetricsList(ctx context.Context, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error)
NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error)
}
type SubClusterResolver interface {
NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error)
@@ -1581,6 +1582,13 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return e.complexity.NodeMetrics.Metrics(childComplexity), true
case "NodeMetrics.state":
if e.complexity.NodeMetrics.State == nil {
break
}
return e.complexity.NodeMetrics.State(childComplexity), true
case "NodeMetrics.subCluster":
if e.complexity.NodeMetrics.SubCluster == nil {
break
@@ -1823,7 +1831,7 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return 0, false
}
return e.complexity.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int)), true
return e.complexity.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["stateFilter"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int)), true
case "Query.nodeStates":
if e.complexity.Query.NodeStates == nil {
@@ -2667,6 +2675,7 @@ enum SortByAggregate {
type NodeMetrics {
host: String!
state: String!
subCluster: String!
metrics: [JobMetricWithName!]!
}
@@ -2780,6 +2789,7 @@ type Query {
nodeMetricsList(
cluster: String!
subCluster: String!
stateFilter: String!
nodeFilter: String!
scopes: [MetricScope!]
metrics: [String!]
@@ -3224,41 +3234,46 @@ func (ec *executionContext) field_Query_nodeMetricsList_args(ctx context.Context
return nil, err
}
args["subCluster"] = arg1
arg2, err := graphql.ProcessArgField(ctx, rawArgs, "nodeFilter", ec.unmarshalNString2string)
arg2, err := graphql.ProcessArgField(ctx, rawArgs, "stateFilter", ec.unmarshalNString2string)
if err != nil {
return nil, err
}
args["nodeFilter"] = arg2
arg3, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScopeᚄ)
args["stateFilter"] = arg2
arg3, err := graphql.ProcessArgField(ctx, rawArgs, "nodeFilter", ec.unmarshalNString2string)
if err != nil {
return nil, err
}
args["scopes"] = arg3
arg4, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstring)
args["nodeFilter"] = arg3
arg4, err := graphql.ProcessArgField(ctx, rawArgs, "scopes", ec.unmarshalOMetricScope2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋschemaᚐMetricScope)
if err != nil {
return nil, err
}
args["metrics"] = arg4
arg5, err := graphql.ProcessArgField(ctx, rawArgs, "from", ec.unmarshalNTime2timeᚐTime)
args["scopes"] = arg4
arg5, err := graphql.ProcessArgField(ctx, rawArgs, "metrics", ec.unmarshalOString2ᚕstringᚄ)
if err != nil {
return nil, err
}
args["from"] = arg5
arg6, err := graphql.ProcessArgField(ctx, rawArgs, "to", ec.unmarshalNTime2timeᚐTime)
args["metrics"] = arg5
arg6, err := graphql.ProcessArgField(ctx, rawArgs, "from", ec.unmarshalNTime2timeᚐTime)
if err != nil {
return nil, err
}
args["to"] = arg6
arg7, err := graphql.ProcessArgField(ctx, rawArgs, "page", ec.unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐPageRequest)
args["from"] = arg6
arg7, err := graphql.ProcessArgField(ctx, rawArgs, "to", ec.unmarshalNTime2timeᚐTime)
if err != nil {
return nil, err
}
args["page"] = arg7
arg8, err := graphql.ProcessArgField(ctx, rawArgs, "resolution", ec.unmarshalOInt2ᚖint)
args["to"] = arg7
arg8, err := graphql.ProcessArgField(ctx, rawArgs, "page", ec.unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐPageRequest)
if err != nil {
return nil, err
}
args["resolution"] = arg8
args["page"] = arg8
arg9, err := graphql.ProcessArgField(ctx, rawArgs, "resolution", ec.unmarshalOInt2ᚖint)
if err != nil {
return nil, err
}
args["resolution"] = arg9
return args, nil
}
@@ -10289,6 +10304,50 @@ func (ec *executionContext) fieldContext_NodeMetrics_host(_ context.Context, fie
return fc, nil
}
func (ec *executionContext) _NodeMetrics_state(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_NodeMetrics_state(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return obj.State, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
if !graphql.HasFieldError(ctx, fc) {
ec.Errorf(ctx, "must not be null")
}
return graphql.Null
}
res := resTmp.(string)
fc.Result = res
return ec.marshalNString2string(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_NodeMetrics_state(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "NodeMetrics",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type String does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _NodeMetrics_subCluster(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_NodeMetrics_subCluster(ctx, field)
if err != nil {
@@ -10755,6 +10814,8 @@ func (ec *executionContext) fieldContext_NodesResultList_items(_ context.Context
switch field.Name {
case "host":
return ec.fieldContext_NodeMetrics_host(ctx, field)
case "state":
return ec.fieldContext_NodeMetrics_state(ctx, field)
case "subCluster":
return ec.fieldContext_NodeMetrics_subCluster(ctx, field)
case "metrics":
@@ -12199,6 +12260,8 @@ func (ec *executionContext) fieldContext_Query_nodeMetrics(ctx context.Context,
switch field.Name {
case "host":
return ec.fieldContext_NodeMetrics_host(ctx, field)
case "state":
return ec.fieldContext_NodeMetrics_state(ctx, field)
case "subCluster":
return ec.fieldContext_NodeMetrics_subCluster(ctx, field)
case "metrics":
@@ -12235,7 +12298,7 @@ func (ec *executionContext) _Query_nodeMetricsList(ctx context.Context, field gr
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return ec.resolvers.Query().NodeMetricsList(rctx, fc.Args["cluster"].(string), fc.Args["subCluster"].(string), fc.Args["nodeFilter"].(string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["metrics"].([]string), fc.Args["from"].(time.Time), fc.Args["to"].(time.Time), fc.Args["page"].(*model.PageRequest), fc.Args["resolution"].(*int))
return ec.resolvers.Query().NodeMetricsList(rctx, fc.Args["cluster"].(string), fc.Args["subCluster"].(string), fc.Args["stateFilter"].(string), fc.Args["nodeFilter"].(string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["metrics"].([]string), fc.Args["from"].(time.Time), fc.Args["to"].(time.Time), fc.Args["page"].(*model.PageRequest), fc.Args["resolution"].(*int))
})
if err != nil {
ec.Error(ctx, err)
@@ -19371,6 +19434,11 @@ func (ec *executionContext) _NodeMetrics(ctx context.Context, sel ast.SelectionS
if out.Values[i] == graphql.Null {
out.Invalids++
}
case "state":
out.Values[i] = ec._NodeMetrics_state(ctx, field, obj)
if out.Values[i] == graphql.Null {
out.Invalids++
}
case "subCluster":
out.Values[i] = ec._NodeMetrics_subCluster(ctx, field, obj)
if out.Values[i] == graphql.Null {

View File

@@ -181,6 +181,7 @@ type NodeFilter struct {
type NodeMetrics struct {
Host string `json:"host"`
State string `json:"state"`
SubCluster string `json:"subCluster"`
Metrics []*JobMetricWithName `json:"metrics"`
}

View File

@@ -756,10 +756,14 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
return nil, err
}
nodeRepo := repository.GetNodeRepository()
stateMap, _ := nodeRepo.MapNodes(cluster)
nodeMetrics := make([]*model.NodeMetrics, 0, len(data))
for hostname, metrics := range data {
host := &model.NodeMetrics{
Host: hostname,
State: stateMap[hostname],
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
}
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
@@ -784,7 +788,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
}
// NodeMetricsList is the resolver for the nodeMetricsList field.
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) {
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) {
if resolution == nil { // Load from Config
if config.Keys.EnableResampling != nil {
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
@@ -806,9 +810,47 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub
}
}
data, totalNodes, hasNextPage, err := metricDataDispatcher.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, *resolution, from, to, page, ctx)
// Note: This Prefilter Logic Can Be Used To Completely Switch Node Source Of Truth To SQLite DB
// Adapt and extend filters/paging/sorting in QueryNodes Function to return []string array of hostnames, input array to LoadNodeListData
// LoadNodeListData, instead of building queried nodes from topoplogy anew, directly will use QueryNodes hostname array
// Caveat: "notindb" state will not be resolvable anymore by default, or needs reverse lookup by dedicated comparison to topology data after all
preFiltered := make([]string, 0)
stateMap := make(map[string]string)
if stateFilter != "all" {
nodeRepo := repository.GetNodeRepository()
stateQuery := make([]*model.NodeFilter, 0)
// Required Filters
stateQuery = append(stateQuery, &model.NodeFilter{Cluster: &model.StringInput{Eq: &cluster}})
if subCluster != "" {
stateQuery = append(stateQuery, &model.NodeFilter{Subcluster: &model.StringInput{Eq: &subCluster}})
}
if stateFilter == "notindb" {
// Backward Filtering: Add Keyword, No Additional FIlters: Returns All Nodes For Cluster (and SubCluster)
preFiltered = append(preFiltered, "exclude")
} else {
// Workaround: If no nodes match, we need at least one element for trigger in LoadNodeListData
preFiltered = append(preFiltered, stateFilter)
// Forward Filtering: Match Only selected stateFilter
var queryState schema.SchedulerState = schema.SchedulerState(stateFilter)
stateQuery = append(stateQuery, &model.NodeFilter{SchedulerState: &queryState})
}
stateNodes, serr := nodeRepo.QueryNodes(ctx, stateQuery, &model.OrderByInput{}) // Order not Used
if serr != nil {
cclog.Warn("error while loading node database data (Resolver.NodeMetricsList)")
return nil, serr
}
for _, node := range stateNodes {
preFiltered = append(preFiltered, node.Hostname)
stateMap[node.Hostname] = string(node.NodeState)
}
}
data, totalNodes, hasNextPage, err := metricDataDispatcher.LoadNodeListData(cluster, subCluster, nodeFilter, preFiltered, metrics, scopes, *resolution, from, to, page, ctx)
if err != nil {
cclog.Warn("error while loading node data")
cclog.Warn("error while loading node data (Resolver.NodeMetricsList")
return nil, err
}
@@ -816,6 +858,7 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub
for hostname, metrics := range data {
host := &model.NodeMetrics{
Host: hostname,
State: stateMap[hostname],
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
}
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)

View File

@@ -333,6 +333,7 @@ func LoadNodeData(
func LoadNodeListData(
cluster, subCluster, nodeFilter string,
preFiltered []string,
metrics []string,
scopes []schema.MetricScope,
resolution int,
@@ -351,7 +352,7 @@ func LoadNodeListData(
}
}
data, totalNodes, hasNextPage, err := repo.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, resolution, from, to, page, ctx)
data, totalNodes, hasNextPage, err := repo.LoadNodeListData(cluster, subCluster, nodeFilter, preFiltered, metrics, scopes, resolution, from, to, page, ctx)
if err != nil {
if len(data) != 0 {
cclog.Warnf("partial error: %s", err.Error())

View File

@@ -9,6 +9,7 @@ import (
"context"
"encoding/json"
"fmt"
"slices"
"sort"
"strconv"
"strings"
@@ -678,6 +679,7 @@ func (ccms *CCMetricStoreInternal) LoadNodeData(
// Used for Systems-View Node-List
func (ccms *CCMetricStoreInternal) LoadNodeListData(
cluster, subCluster, nodeFilter string,
preFiltered []string,
metrics []string,
scopes []schema.MetricScope,
resolution int,
@@ -701,18 +703,37 @@ func (ccms *CCMetricStoreInternal) LoadNodeListData(
}
}
// 2) Filter nodes
// 2.1) Filter nodes by name
if nodeFilter != "" {
filteredNodes := []string{}
filteredNodesByName := []string{}
for _, node := range nodes {
if strings.Contains(node, nodeFilter) {
filteredNodes = append(filteredNodes, node)
filteredNodesByName = append(filteredNodesByName, node)
}
}
nodes = filteredNodes
nodes = filteredNodesByName
}
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ...
// 2.2) Filter nodes by state using prefiltered match array
if len(preFiltered) > 0 {
filteredNodesByState := []string{}
if preFiltered[0] == "exclude" { // Backwards: PreFiltered contains all Nodes in DB > Return Missing Nodes
for _, node := range nodes {
if !slices.Contains(preFiltered, node) {
filteredNodesByState = append(filteredNodesByState, node)
}
}
} else { // Forwards: Prefiltered contains specific nodeState > Return Matches
for _, node := range nodes {
if slices.Contains(preFiltered, node) {
filteredNodesByState = append(filteredNodesByState, node)
}
}
}
nodes = filteredNodesByState
}
// 2.3) Count total nodes && Sort nodes -> Sorting invalidated after return ...
totalNodes = len(nodes)
sort.Strings(nodes)

View File

@@ -11,6 +11,7 @@ import (
"encoding/json"
"fmt"
"net/http"
"slices"
"sort"
"strings"
"time"
@@ -800,6 +801,7 @@ func (ccms *CCMetricStore) LoadNodeData(
// Used for Systems-View Node-List
func (ccms *CCMetricStore) LoadNodeListData(
cluster, subCluster, nodeFilter string,
preFiltered []string,
metrics []string,
scopes []schema.MetricScope,
resolution int,
@@ -824,18 +826,37 @@ func (ccms *CCMetricStore) LoadNodeListData(
}
}
// 2) Filter nodes
// 2.1) Filter nodes by name
if nodeFilter != "" {
filteredNodes := []string{}
filteredNodesByName := []string{}
for _, node := range nodes {
if strings.Contains(node, nodeFilter) {
filteredNodes = append(filteredNodes, node)
filteredNodesByName = append(filteredNodesByName, node)
}
}
nodes = filteredNodes
nodes = filteredNodesByName
}
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ...
// 2.2) Filter nodes by state using prefiltered match array
if len(preFiltered) > 0 {
filteredNodesByState := []string{}
if preFiltered[0] == "exclude" { // Backwards: PreFiltered contains all Nodes in DB > Return Missing Nodes
for _, node := range nodes {
if !slices.Contains(preFiltered, node) {
filteredNodesByState = append(filteredNodesByState, node)
}
}
} else { // Forwards: Prefiltered contains specific nodeState > Return Matches
for _, node := range nodes {
if slices.Contains(preFiltered, node) {
filteredNodesByState = append(filteredNodesByState, node)
}
}
}
nodes = filteredNodesByState
}
// 2.3) Count total nodes && Sort nodes -> Sorting invalidated after return ...
totalNodes = len(nodes)
sort.Strings(nodes)

View File

@@ -36,7 +36,7 @@ type MetricDataRepository interface {
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
// Return a map of hosts to a map of metrics to a map of scopes for multiple nodes.
LoadNodeListData(cluster, subCluster, nodeFilter string, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, page *model.PageRequest, ctx context.Context) (map[string]schema.JobData, int, bool, error)
LoadNodeListData(cluster, subCluster, nodeFilter string, preFiltered []string, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, page *model.PageRequest, ctx context.Context) (map[string]schema.JobData, int, bool, error)
}
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}

View File

@@ -14,6 +14,7 @@ import (
"net/http"
"os"
"regexp"
"slices"
"sort"
"strings"
"sync"
@@ -495,6 +496,7 @@ func (pdb *PrometheusDataRepository) LoadScopedStats(
// Implemented by NHR@FAU; Used in NodeList-View
func (pdb *PrometheusDataRepository) LoadNodeListData(
cluster, subCluster, nodeFilter string,
preFiltered []string,
metrics []string,
scopes []schema.MetricScope,
resolution int,
@@ -520,18 +522,37 @@ func (pdb *PrometheusDataRepository) LoadNodeListData(
}
}
// 2) Filter nodes
// 2.1) Filter nodes by name
if nodeFilter != "" {
filteredNodes := []string{}
filteredNodesByName := []string{}
for _, node := range nodes {
if strings.Contains(node, nodeFilter) {
filteredNodes = append(filteredNodes, node)
filteredNodesByName = append(filteredNodesByName, node)
}
}
nodes = filteredNodes
nodes = filteredNodesByName
}
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after return ...
// 2.2) Filter nodes by state using prefiltered match array
if len(preFiltered) > 0 {
filteredNodesByState := []string{}
if preFiltered[0] == "exclude" { // Backwards: PreFiltered contains all Nodes in DB > Return Missing Nodes
for _, node := range nodes {
if !slices.Contains(preFiltered, node) {
filteredNodesByState = append(filteredNodesByState, node)
}
}
} else { // Forwards: Prefiltered contains specific nodeState > Return Matches
for _, node := range nodes {
if slices.Contains(preFiltered, node) {
filteredNodesByState = append(filteredNodesByState, node)
}
}
}
nodes = filteredNodesByState
}
// 2.3) Count total nodes && Sort nodes -> Sorting invalidated after return ...
totalNodes = len(nodes)
sort.Strings(nodes)

View File

@@ -64,6 +64,7 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
func (tmdr *TestMetricDataRepository) LoadNodeListData(
cluster, subCluster, nodeFilter string,
preFiltered []string,
metrics []string,
scopes []schema.MetricScope,
resolution int,

View File

@@ -277,8 +277,8 @@ func (r *NodeRepository) QueryNodes(
order *model.OrderByInput, // Currently unused!
) ([]*schema.Node, error) {
query, qerr := AccessCheck(ctx,
sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
"node_state.health_state", "MAX(node_state.time_stamp) as time").
sq.Select("hostname", "cluster", "subcluster", "node_state",
"health_state", "MAX(time_stamp) as time").
From("node").
Join("node_state ON node_state.node_id = node.id"))
if qerr != nil {
@@ -287,24 +287,31 @@ func (r *NodeRepository) QueryNodes(
for _, f := range filters {
if f.Hostname != nil {
query = buildStringCondition("node.hostname", f.Hostname, query)
query = buildStringCondition("hostname", f.Hostname, query)
}
if f.Cluster != nil {
query = buildStringCondition("node.cluster", f.Cluster, query)
query = buildStringCondition("cluster", f.Cluster, query)
}
if f.Subcluster != nil {
query = buildStringCondition("node.subcluster", f.Subcluster, query)
query = buildStringCondition("subcluster", f.Subcluster, query)
}
if f.SchedulerState != nil {
query = query.Where("node.node_state = ?", f.SchedulerState)
query = query.Where("node_state = ?", f.SchedulerState)
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
now := time.Now().Unix()
query = query.Where(sq.Gt{"time_stamp": (now - 60)})
}
if f.HealthState != nil {
query = query.Where("node.health_state = ?", f.HealthState)
query = query.Where("health_state = ?", f.HealthState)
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
now := time.Now().Unix()
query = query.Where(sq.Gt{"time_stamp": (now - 60)})
}
}
// Add Grouping after filters
query = query.GroupBy("node_state.node_id")
// Add Grouping and ORder after filters
query = query.GroupBy("node_id").
OrderBy("hostname ASC")
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
@@ -360,6 +367,36 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
return nodeList, nil
}
func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
q := sq.Select("node.hostname", "node_state.node_state", "MAX(node_state.time_stamp) as time").
From("node").
Join("node_state ON node_state.node_id = node.id").
Where("node.cluster = ?", cluster).
GroupBy("node_state.node_id").
OrderBy("node.hostname ASC")
rows, err := q.RunWith(r.DB).Query()
if err != nil {
cclog.Warn("Error while querying node list")
return nil, err
}
stateMap := make(map[string]string)
defer rows.Close()
for rows.Next() {
var hostname, nodestate string
var timestamp int
if err := rows.Scan(&hostname, &nodestate, &timestamp); err != nil {
cclog.Warnf("Error while scanning node list (MapNodes) at time '%d'", timestamp)
return nil, err
}
stateMap[hostname] = nodestate
}
return stateMap, nil
}
func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeFilter, column string) ([]*model.NodeStates, error) {
query, qerr := AccessCheck(ctx, sq.Select("hostname", column, "MAX(time_stamp) as time").From("node"))
if qerr != nil {

View File

@@ -16,6 +16,7 @@
Row,
Col,
Input,
Button,
InputGroup,
InputGroupText,
Icon,
@@ -63,6 +64,7 @@
query ($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) {
nodeMetrics(cluster: $cluster, nodes: $nodes, from: $from, to: $to) {
host
state
subCluster
metrics {
name
@@ -97,6 +99,16 @@
}
}
`;
// Node State Colors
const stateColors = {
allocated: 'success',
reserved: 'info',
idle: 'primary',
mixed: 'warning',
down: 'danger',
unknown: 'dark',
notindb: 'secondary'
}
/* State Init */
let from = $state(presetFrom ? presetFrom : new Date(nowEpoch - (4 * 3600 * 1000)));
@@ -123,6 +135,8 @@
})
);
const thisNodeState = $derived($nodeMetricsData?.data?.nodeMetrics[0]?.state ? $nodeMetricsData.data.nodeMetrics[0].state : 'notindb');
/* Effect */
$effect(() => {
loadUnits($initialized);
@@ -138,7 +152,7 @@
}
</script>
<Row cols={{ xs: 2, lg: 4 }}>
<Row cols={{ xs: 2, lg: 5 }}>
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq.fetching}
@@ -149,19 +163,18 @@
<InputGroup>
<InputGroupText><Icon name="hdd" /></InputGroupText>
<InputGroupText>Selected Node</InputGroupText>
<Input style="background-color: white;"type="text" value="{hostname} [{cluster} ({$nodeMetricsData?.data ? $nodeMetricsData.data.nodeMetrics[0].subCluster : ''})]" disabled/>
<Input style="background-color: white;" type="text" value="{hostname} [{cluster} {$nodeMetricsData?.data ? `(${$nodeMetricsData.data.nodeMetrics[0].subCluster})` : ''}]" disabled/>
</InputGroup>
</Col>
<!-- Time Col -->
<!-- State Col -->
<Col>
<TimeSelection
presetFrom={from}
presetTo={to}
applyTime={(newFrom, newTo) => {
from = newFrom;
to = newTo;
}}
/>
<InputGroup>
<InputGroupText><Icon name="clipboard2-pulse" /></InputGroupText>
<InputGroupText>Node State</InputGroupText>
<Button class="flex-grow-1 text-center" color={stateColors[thisNodeState]} disabled>
{thisNodeState}
</Button>
</InputGroup>
</Col>
<!-- Concurrent Col -->
<Col class="mt-2 mt-lg-0">
@@ -184,6 +197,17 @@
</InputGroup>
{/if}
</Col>
<!-- Time Col -->
<Col>
<TimeSelection
presetFrom={from}
presetTo={to}
applyTime={(newFrom, newTo) => {
from = newFrom;
to = newTo;
}}
/>
</Col>
<!-- Refresh Col-->
<Col class="mt-2 mt-lg-0">
<Refresher

View File

@@ -59,6 +59,7 @@
const resampleResolutions = resampleConfig ? [...resampleConfig.resolutions] : [];
const resampleDefault = resampleConfig ? Math.max(...resampleConfig.resolutions) : 0;
const stateOptions = ['all', 'allocated', 'idle', 'reserved', 'mixed', 'down', 'unknown', 'notindb'];
const nowDate = new Date(Date.now());
/* Var Init */
@@ -69,6 +70,7 @@
let from = $state(presetFrom || new Date(nowDate.setHours(nowDate.getHours() - 4)));
let selectedResolution = $state(resampleConfig ? resampleDefault : 0);
let hostnameFilter = $state("");
let hoststateFilter = $state("all");
let pendingHostnameFilter = $state("");
let isMetricsSelectionOpen = $state(false);
@@ -154,7 +156,7 @@
</script>
<!-- ROW1: Tools-->
<Row cols={{ xs: 2, lg: !displayNodeOverview ? (resampleConfig ? 5 : 4) : 4 }} class="mb-3">
<Row cols={{ xs: 2, lg: !displayNodeOverview ? (resampleConfig ? 6 : 5) : 5 }} class="mb-3">
{#if $initq.data}
<!-- List Metric Select Col-->
{#if !displayNodeOverview}
@@ -191,7 +193,7 @@
<Col class="mt-2 mt-lg-0">
<InputGroup>
<InputGroupText><Icon name="hdd" /></InputGroupText>
<InputGroupText>Find Node(s)</InputGroupText>
<InputGroupText>Node(s)</InputGroupText>
<Input
placeholder="Filter hostname ..."
type="text"
@@ -200,6 +202,18 @@
/>
</InputGroup>
</Col>
<!-- State Col-->
<Col class="mt-2 mt-lg-0">
<InputGroup>
<InputGroupText><Icon name="clipboard2-pulse" /></InputGroupText>
<InputGroupText>State</InputGroupText>
<Input type="select" bind:value={hoststateFilter}>
{#each stateOptions as so}
<option value={so}>{so.charAt(0).toUpperCase() + so.slice(1)}</option>
{/each}
</Input>
</InputGroup>
</Col>
<!-- Range Col-->
<Col>
<TimeSelection
@@ -252,10 +266,10 @@
{:else}
{#if displayNodeOverview}
<!-- ROW2-1: Node Overview (Grid Included)-->
<NodeOverview {cluster} {ccconfig} {selectedMetric} {from} {to} {hostnameFilter}/>
<NodeOverview {cluster} {ccconfig} {selectedMetric} {from} {to} {hostnameFilter} {hoststateFilter}/>
{:else}
<!-- ROW2-2: Node List (Grid Included)-->
<NodeList {cluster} {subCluster} {ccconfig} {selectedMetrics} {selectedResolution} {hostnameFilter} {from} {to} {presetSystemUnits}/>
<NodeList {cluster} {subCluster} {ccconfig} {selectedMetrics} {selectedResolution} {hostnameFilter} {hoststateFilter} {from} {to} {presetSystemUnits}/>
{/if}
{/if}

View File

@@ -8,6 +8,7 @@
- `selectedMetrics [String]`: The array of selected metrics [Default []]
- `selectedResolution Number?`: The selected data resolution [Default: 0]
- `hostnameFilter String?`: The active hostnamefilter [Default: ""]
- `hoststateFilter String?`: The active hoststatefilter [Default: ""]
- `presetSystemUnits Object`: The object of metric units [Default: null]
- `from Date?`: The selected "from" date [Default: null]
- `to Date?`: The selected "to" date [Default: null]
@@ -28,6 +29,7 @@
selectedMetrics = [],
selectedResolution = 0,
hostnameFilter = "",
hoststateFilter = "",
presetSystemUnits = null,
from = null,
to = null
@@ -37,11 +39,14 @@
const client = getContextClient();
const usePaging = ccconfig?.nodeList_usePaging || false;
const nodeListQuery = gql`
query ($cluster: String!, $subCluster: String!, $nodeFilter: String!, $metrics: [String!], $scopes: [MetricScope!]!, $from: Time!, $to: Time!, $paging: PageRequest!, $selectedResolution: Int) {
query ($cluster: String!, $subCluster: String!, $nodeFilter: String!, $stateFilter: String!, $metrics: [String!],
$scopes: [MetricScope!]!, $from: Time!, $to: Time!, $paging: PageRequest!, $selectedResolution: Int
) {
nodeMetricsList(
cluster: $cluster
subCluster: $subCluster
nodeFilter: $nodeFilter
stateFilter: $stateFilter,
scopes: $scopes
metrics: $metrics
from: $from
@@ -51,6 +56,7 @@
) {
items {
host
state
subCluster
metrics {
name
@@ -100,6 +106,7 @@
variables: {
cluster: cluster,
subCluster: subCluster,
stateFilter: hoststateFilter,
nodeFilter: hostnameFilter,
scopes: ["core", "socket", "accelerator"],
metrics: selectedMetrics,
@@ -137,7 +144,7 @@
// Triggers (Except Paging)
from, to
selectedMetrics, selectedResolution
hostnameFilter
hostnameFilter, hoststateFilter
// Continous Scroll: Reset nodes and paging if parameters change: Existing entries will not match new selections
if (!usePaging) {
nodes = [];

View File

@@ -6,6 +6,7 @@
- `cluster String`: The cluster to show status information for
- `selectedMetric String?`: The selectedMetric input [Default: ""]
- `hostnameFilter String?`: The active hostnamefilter [Default: ""]
- `hostnameFilter String?`: The active hoststatefilter [Default: ""]
- `from Date?`: The selected "from" date [Default: null]
- `to Date?`: The selected "to" date [Default: null]
-->
@@ -13,7 +14,7 @@
<script>
import { getContext } from "svelte";
import { queryStore, gql, getContextClient } from "@urql/svelte";
import { Row, Col, Card, Spinner } from "@sveltestrap/sveltestrap";
import { Row, Col, Card, Spinner, Badge } from "@sveltestrap/sveltestrap";
import { checkMetricDisabled } from "../generic/utils.js";
import MetricPlot from "../generic/plots/MetricPlot.svelte";
@@ -23,6 +24,7 @@
cluster = "",
selectedMetric = "",
hostnameFilter = "",
hoststateFilter = "",
from = null,
to = null
} = $props();
@@ -30,6 +32,16 @@
/* Const Init */
const initialized = getContext("initialized");
const client = getContextClient();
// Node State Colors
const stateColors = {
allocated: 'success',
reserved: 'info',
idle: 'primary',
mixed: 'warning',
down: 'danger',
unknown: 'dark',
notindb: 'secondary'
}
/* Derived */
const nodesQuery = $derived(queryStore({
@@ -43,6 +55,7 @@
to: $to
) {
host
state
subCluster
metrics {
name
@@ -75,7 +88,15 @@
}));
const mappedData = $derived(handleQueryData($initialized, $nodesQuery?.data));
const filteredData = $derived(mappedData.filter((h) => h.host.includes(hostnameFilter)));
const filteredData = $derived(mappedData.filter((h) => {
if (hostnameFilter) {
if (hoststateFilter == 'all') return h.host.includes(hostnameFilter)
else return (h.host.includes(hostnameFilter) && h.state == hoststateFilter)
} else {
if (hoststateFilter == 'all') return true
else return h.state == hoststateFilter
}
}));
/* Functions */
function handleQueryData(isInitialized, queryData) {
@@ -94,6 +115,7 @@
if (rawData.length > 0) {
pendingMapped = rawData.map((h) => ({
host: h.host,
state: h?.state? h.state : 'notindb',
subCluster: h.subCluster,
data: h.metrics.filter(
(m) => m?.name == selectedMetric && m.scope == "node",
@@ -125,13 +147,18 @@
{#key selectedMetric}
{#each filteredData as item (item.host)}
<Col class="px-1">
<h4 style="width: 100%; text-align: center;">
<a
style="display: block;padding-top: 15px;"
href="/monitoring/node/{cluster}/{item.host}"
>{item.host} ({item.subCluster})</a
>
</h4>
<div class="d-flex align-items-baseline">
<h4 style="width: 100%; text-align: center;">
<a
style="display: block;padding-top: 15px;"
href="/monitoring/node/{cluster}/{item.host}"
>{item.host} ({item.subCluster})</a
>
</h4>
<span style="margin-right: 0.5rem;">
<Badge color={stateColors[item?.state? item.state : 'notindb']}>{item?.state? item.state : 'notindb'}</Badge>
</span>
</div>
{#if item.disabled === true}
<Card body class="mx-3" color="info"
>Metric disabled for subcluster <code

View File

@@ -13,6 +13,8 @@
import {
Icon,
Button,
Row,
Col,
Card,
CardHeader,
CardBody,
@@ -30,6 +32,7 @@
cluster,
subCluster,
hostname,
hoststate,
dataHealth,
nodeJobsData = null,
} = $props();
@@ -39,6 +42,16 @@
const healthWarn = !dataHealth.includes(true);
// At least one non-returned selected metric: Metric config error?
const metricWarn = dataHealth.includes(false);
// Node State Colors
const stateColors = {
allocated: 'success',
reserved: 'info',
idle: 'primary',
mixed: 'warning',
down: 'danger',
unknown: 'dark',
notindb: 'secondary'
}
/* Derived */
const userList = $derived(nodeJobsData
@@ -68,80 +81,72 @@
</div>
</CardHeader>
<CardBody>
{#if healthWarn}
<InputGroup>
<InputGroupText>
<Icon name="exclamation-circle"/>
</InputGroupText>
<InputGroupText>
Status
</InputGroupText>
<Button color="danger" disabled>
Unhealthy
</Button>
</InputGroup>
{:else if metricWarn}
<InputGroup>
<InputGroupText>
<Icon name="info-circle"/>
</InputGroupText>
<InputGroupText>
Status
</InputGroupText>
<Button color="warning" disabled>
Missing Metric
</Button>
</InputGroup>
{:else if nodeJobsData.jobs.count == 1 && nodeJobsData.jobs.items[0].shared == "none"}
<InputGroup>
<InputGroupText>
<Icon name="circle-fill"/>
</InputGroupText>
<InputGroupText>
Status
</InputGroupText>
<Button color="success" disabled>
Exclusive
</Button>
</InputGroup>
{:else if nodeJobsData.jobs.count >= 1 && !(nodeJobsData.jobs.items[0].shared == "none")}
<InputGroup>
<InputGroupText>
<Icon name="circle-half"/>
</InputGroupText>
<InputGroupText>
Status
</InputGroupText>
<Button color="success" disabled>
Shared
</Button>
</InputGroup>
<!-- Fallback -->
{:else if nodeJobsData.jobs.count >= 1}
<InputGroup>
<InputGroupText>
<Icon name="circle-fill"/>
</InputGroupText>
<InputGroupText>
Status
</InputGroupText>
<Button color="success" disabled>
Allocated Jobs
</Button>
</InputGroup>
{:else}
<InputGroup>
<InputGroupText>
<Icon name="circle"/>
</InputGroupText>
<InputGroupText>
Status
</InputGroupText>
<Button color="secondary" disabled>
Idle
</Button>
</InputGroup>
{/if}
<Row cols={{xs: 1, lg: 2}}>
<Col class="mb-2 mb-lg-0">
<InputGroup size="sm">
{#if healthWarn}
<InputGroupText class="flex-grow-1 flex-lg-grow-0">
<Icon name="exclamation-circle" style="padding-right: 0.5rem;"/>
<span>Jobs</span>
</InputGroupText>
<Button class="flex-grow-1" color="danger" disabled>
No Metrics
</Button>
{:else if metricWarn}
<InputGroupText class="flex-grow-1 flex-lg-grow-0">
<Icon name="info-circle" style="padding-right: 0.5rem;"/>
<span>Jobs</span>
</InputGroupText>
<Button class="flex-grow-1" color="warning" disabled>
Missing Metric
</Button>
{:else if nodeJobsData.jobs.count == 1 && nodeJobsData.jobs.items[0].shared == "none"}
<InputGroupText class="flex-grow-1 flex-lg-grow-0">
<Icon name="circle-fill" style="padding-right: 0.5rem;"/>
<span>Jobs</span>
</InputGroupText>
<Button class="flex-grow-1" color="success" disabled>
Exclusive
</Button>
{:else if nodeJobsData.jobs.count >= 1 && !(nodeJobsData.jobs.items[0].shared == "none")}
<InputGroupText class="flex-grow-1 flex-lg-grow-0">
<Icon name="circle-half" style="padding-right: 0.5rem;"/>
<span>Jobs</span>
</InputGroupText>
<Button class="flex-grow-1" color="success" disabled>
Shared
</Button>
<!-- Fallback -->
{:else if nodeJobsData.jobs.count >= 1}
<InputGroupText class="flex-grow-1 flex-lg-grow-0">
<Icon name="circle-fill" style="padding-right: 0.5rem;"/>
<span>Jobs</span>
</InputGroupText>
<Button class="flex-grow-1" color="success" disabled>
Running
</Button>
{:else}
<InputGroupText class="flex-grow-1 flex-lg-grow-0">
<Icon name="circle" style="padding-right: 0.5rem;"/>
<span>Jobs</span>
</InputGroupText>
<Button class="flex-grow-1" color="secondary" disabled>
None
</Button>
{/if}
</InputGroup>
</Col>
<Col>
<InputGroup size="sm">
<InputGroupText class="flex-grow-1 flex-lg-grow-0">
State
</InputGroupText>
<Button class="flex-grow-1" color={stateColors[hoststate]} disabled>
{hoststate.charAt(0).toUpperCase() + hoststate.slice(1)}
</Button>
</InputGroup>
</Col>
</Row>
<hr class="my-3"/>
<!-- JOBS -->
<InputGroup size="sm" class="justify-content-between mb-3">

View File

@@ -139,7 +139,13 @@
</CardBody>
</Card>
{:else}
<NodeInfo nodeJobsData={$nodeJobsData.data} {cluster} subCluster={nodeData.subCluster} hostname={nodeData.host} {dataHealth}/>
<NodeInfo
{cluster}
{dataHealth}
nodeJobsData={$nodeJobsData.data}
subCluster={nodeData.subCluster}
hostname={nodeData.host}
hoststate={nodeData?.state? nodeData.state: 'notindb'}/>
{/if}
</td>
{#each refinedData as metricData (metricData.data.name)}