change: remove heuristic metricHealth, replace with DB metricHealth

- add metricHealth to single Node view
This commit is contained in:
Christoph Kluge
2026-03-19 15:55:58 +01:00
parent 886791cf8a
commit 10b4fa5a06
12 changed files with 171 additions and 104 deletions

View File

@@ -288,10 +288,11 @@ type ComplexityRoot struct {
}
NodeMetrics struct {
Host func(childComplexity int) int
Metrics func(childComplexity int) int
State func(childComplexity int) int
SubCluster func(childComplexity int) int
Host func(childComplexity int) int
MetricHealth func(childComplexity int) int
Metrics func(childComplexity int) int
NodeState func(childComplexity int) int
SubCluster func(childComplexity int) int
}
NodeStateResultList struct {
@@ -1501,18 +1502,24 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
}
return e.ComplexityRoot.NodeMetrics.Host(childComplexity), true
case "NodeMetrics.metricHealth":
if e.ComplexityRoot.NodeMetrics.MetricHealth == nil {
break
}
return e.ComplexityRoot.NodeMetrics.MetricHealth(childComplexity), true
case "NodeMetrics.metrics":
if e.ComplexityRoot.NodeMetrics.Metrics == nil {
break
}
return e.ComplexityRoot.NodeMetrics.Metrics(childComplexity), true
case "NodeMetrics.state":
if e.ComplexityRoot.NodeMetrics.State == nil {
case "NodeMetrics.nodeState":
if e.ComplexityRoot.NodeMetrics.NodeState == nil {
break
}
return e.ComplexityRoot.NodeMetrics.State(childComplexity), true
return e.ComplexityRoot.NodeMetrics.NodeState(childComplexity), true
case "NodeMetrics.subCluster":
if e.ComplexityRoot.NodeMetrics.SubCluster == nil {
break
@@ -2537,7 +2544,8 @@ enum SortByAggregate {
type NodeMetrics {
host: String!
state: String!
nodeState: String!
metricHealth: String!
subCluster: String!
metrics: [JobMetricWithName!]!
}
@@ -8316,14 +8324,14 @@ func (ec *executionContext) fieldContext_NodeMetrics_host(_ context.Context, fie
return fc, nil
}
func (ec *executionContext) _NodeMetrics_state(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) {
func (ec *executionContext) _NodeMetrics_nodeState(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
ec.OperationContext,
field,
ec.fieldContext_NodeMetrics_state,
ec.fieldContext_NodeMetrics_nodeState,
func(ctx context.Context) (any, error) {
return obj.State, nil
return obj.NodeState, nil
},
nil,
ec.marshalNString2string,
@@ -8332,7 +8340,36 @@ func (ec *executionContext) _NodeMetrics_state(ctx context.Context, field graphq
)
}
func (ec *executionContext) fieldContext_NodeMetrics_state(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
func (ec *executionContext) fieldContext_NodeMetrics_nodeState(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "NodeMetrics",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type String does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _NodeMetrics_metricHealth(ctx context.Context, field graphql.CollectedField, obj *model.NodeMetrics) (ret graphql.Marshaler) {
return graphql.ResolveField(
ctx,
ec.OperationContext,
field,
ec.fieldContext_NodeMetrics_metricHealth,
func(ctx context.Context) (any, error) {
return obj.MetricHealth, nil
},
nil,
ec.marshalNString2string,
true,
true,
)
}
func (ec *executionContext) fieldContext_NodeMetrics_metricHealth(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "NodeMetrics",
Field: field,
@@ -8666,8 +8703,10 @@ func (ec *executionContext) fieldContext_NodesResultList_items(_ context.Context
switch field.Name {
case "host":
return ec.fieldContext_NodeMetrics_host(ctx, field)
case "state":
return ec.fieldContext_NodeMetrics_state(ctx, field)
case "nodeState":
return ec.fieldContext_NodeMetrics_nodeState(ctx, field)
case "metricHealth":
return ec.fieldContext_NodeMetrics_metricHealth(ctx, field)
case "subCluster":
return ec.fieldContext_NodeMetrics_subCluster(ctx, field)
case "metrics":
@@ -9844,8 +9883,10 @@ func (ec *executionContext) fieldContext_Query_nodeMetrics(ctx context.Context,
switch field.Name {
case "host":
return ec.fieldContext_NodeMetrics_host(ctx, field)
case "state":
return ec.fieldContext_NodeMetrics_state(ctx, field)
case "nodeState":
return ec.fieldContext_NodeMetrics_nodeState(ctx, field)
case "metricHealth":
return ec.fieldContext_NodeMetrics_metricHealth(ctx, field)
case "subCluster":
return ec.fieldContext_NodeMetrics_subCluster(ctx, field)
case "metrics":
@@ -15917,8 +15958,13 @@ func (ec *executionContext) _NodeMetrics(ctx context.Context, sel ast.SelectionS
if out.Values[i] == graphql.Null {
out.Invalids++
}
case "state":
out.Values[i] = ec._NodeMetrics_state(ctx, field, obj)
case "nodeState":
out.Values[i] = ec._NodeMetrics_nodeState(ctx, field, obj)
if out.Values[i] == graphql.Null {
out.Invalids++
}
case "metricHealth":
out.Values[i] = ec._NodeMetrics_metricHealth(ctx, field, obj)
if out.Values[i] == graphql.Null {
out.Invalids++
}

View File

@@ -193,10 +193,11 @@ type NodeFilter struct {
}
type NodeMetrics struct {
Host string `json:"host"`
State string `json:"state"`
SubCluster string `json:"subCluster"`
Metrics []*JobMetricWithName `json:"metrics"`
Host string `json:"host"`
NodeState string `json:"nodeState"`
MetricHealth string `json:"metricHealth"`
SubCluster string `json:"subCluster"`
Metrics []*JobMetricWithName `json:"metrics"`
}
type NodeStateResultList struct {

View File

@@ -840,14 +840,15 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
}
nodeRepo := repository.GetNodeRepository()
stateMap, _ := nodeRepo.MapNodes(cluster)
nodeStateMap, metricHealthMap, _ := nodeRepo.MapNodes(cluster)
nodeMetrics := make([]*model.NodeMetrics, 0, len(data))
for hostname, metrics := range data {
host := &model.NodeMetrics{
Host: hostname,
State: stateMap[hostname],
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
Host: hostname,
NodeState: nodeStateMap[hostname],
MetricHealth: metricHealthMap[hostname],
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
}
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
if err != nil {
@@ -889,7 +890,7 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub
nodeRepo := repository.GetNodeRepository()
// nodes -> array hostname
nodes, stateMap, countNodes, hasNextPage, nerr := nodeRepo.GetNodesForList(ctx, cluster, subCluster, stateFilter, nodeFilter, page)
nodes, nodeStateMap, metricHealthMap, countNodes, hasNextPage, nerr := nodeRepo.GetNodesForList(ctx, cluster, subCluster, stateFilter, nodeFilter, page)
if nerr != nil {
return nil, errors.New("could not retrieve node list required for resolving NodeMetricsList")
}
@@ -910,9 +911,10 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub
nodeMetricsList := make([]*model.NodeMetrics, 0, len(data))
for _, hostname := range nodes {
host := &model.NodeMetrics{
Host: hostname,
State: stateMap[hostname],
Metrics: make([]*model.JobMetricWithName, 0),
Host: hostname,
NodeState: nodeStateMap[hostname],
MetricHealth: metricHealthMap[hostname],
Metrics: make([]*model.JobMetricWithName, 0),
}
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
if err != nil {

View File

@@ -593,8 +593,8 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
return nodeList, nil
}
func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
q := sq.Select("node.hostname", "node_state.node_state").
func (r *NodeRepository) MapNodes(cluster string) (map[string]string, map[string]string, error) {
q := sq.Select("node.hostname", "node_state.node_state", "node_state.health_state").
From("node").
Join("node_state ON node_state.node_id = node.id").
Where(latestStateCondition()).
@@ -604,22 +604,25 @@ func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
rows, err := q.RunWith(r.DB).Query()
if err != nil {
cclog.Warn("Error while querying node list")
return nil, err
return nil, nil, err
}
stateMap := make(map[string]string)
nodeStateMap := make(map[string]string)
metricHealthMap := make(map[string]string)
defer rows.Close()
for rows.Next() {
var hostname, nodestate string
if err := rows.Scan(&hostname, &nodestate); err != nil {
var hostname, nodeState, metricHealth string
if err := rows.Scan(&hostname, &nodeState, &metricHealth); err != nil {
cclog.Warn("Error while scanning node list (MapNodes)")
return nil, err
return nil, nil, err
}
stateMap[hostname] = nodestate
nodeStateMap[hostname] = nodeState
metricHealthMap[hostname] = metricHealth
}
return stateMap, nil
return nodeStateMap, metricHealthMap, nil
}
func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeFilter, column string) ([]*model.NodeStates, error) {
@@ -741,10 +744,11 @@ func (r *NodeRepository) GetNodesForList(
stateFilter string,
nodeFilter string,
page *model.PageRequest,
) ([]string, map[string]string, int, bool, error) {
) ([]string, map[string]string, map[string]string, int, bool, error) {
// Init Return Vars
nodes := make([]string, 0)
stateMap := make(map[string]string)
nodeStateMap := make(map[string]string)
metricHealthMap := make(map[string]string)
countNodes := 0
hasNextPage := false
@@ -778,7 +782,7 @@ func (r *NodeRepository) GetNodesForList(
rawNodes, serr := r.QueryNodes(ctx, queryFilters, page, nil) // Order not Used
if serr != nil {
cclog.Warn("error while loading node database data (Resolver.NodeMetricsList)")
return nil, nil, 0, false, serr
return nil, nil, nil, 0, false, serr
}
// Intermediate Node Result Info
@@ -787,7 +791,8 @@ func (r *NodeRepository) GetNodesForList(
continue
}
nodes = append(nodes, node.Hostname)
stateMap[node.Hostname] = string(node.NodeState)
nodeStateMap[node.Hostname] = string(node.NodeState)
metricHealthMap[node.Hostname] = string(node.HealthState)
}
// Special Case: Find Nodes not in DB node table but in metricStore only
@@ -847,7 +852,7 @@ func (r *NodeRepository) GetNodesForList(
countNodes, cerr = r.CountNodes(ctx, queryFilters)
if cerr != nil {
cclog.Warn("error while counting node database data (Resolver.NodeMetricsList)")
return nil, nil, 0, false, cerr
return nil, nil, nil, 0, false, cerr
}
hasNextPage = page.Page*page.ItemsPerPage < countNodes
}
@@ -857,7 +862,7 @@ func (r *NodeRepository) GetNodesForList(
nodes, countNodes, hasNextPage = getNodesFromTopol(cluster, subCluster, nodeFilter, page)
}
return nodes, stateMap, countNodes, hasNextPage, nil
return nodes, nodeStateMap, metricHealthMap, countNodes, hasNextPage, nil
}
func AccessCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {