Add monitoring healthstate support in nodestate API.

This commit is contained in:
2026-02-03 12:23:24 +01:00
parent e9cd6b4225
commit 00a41373e8
3 changed files with 611 additions and 2 deletions

View File

@@ -7,11 +7,14 @@ package api
import (
"fmt"
"maps"
"net/http"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
"github.com/ClusterCockpit/cc-lib/v2/schema"
)
@@ -20,6 +23,15 @@ type UpdateNodeStatesRequest struct {
Cluster string `json:"cluster" example:"fritz"`
}
// metricListToNames converts a map of metric configurations to a list of metric names
func metricListToNames(metricList map[string]*schema.Metric) []string {
names := make([]string, 0, len(metricList))
for name := range metricList {
names = append(names, name)
}
return names
}
// this routine assumes that only one of them exists per node
func determineState(states []string) schema.SchedulerState {
for _, state := range states {
@@ -62,18 +74,42 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
http.StatusBadRequest, rw)
return
}
repo := repository.GetNodeRepository()
requestReceived := time.Now().Unix()
repo := repository.GetNodeRepository()
ms := metricstore.GetMemoryStore()
m := make(map[string][]string)
healthStates := make(map[string]metricstore.NodeHealthState)
for _, node := range req.Nodes {
if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil {
m[sc] = append(m[sc], node.Hostname)
}
}
for sc, nl := range m {
if sc != "" {
metricList := archive.GetMetricConfigSubCluster(req.Cluster, sc)
metricNames := metricListToNames(metricList)
if states, err := ms.HealthCheckAlt(req.Cluster, nl, metricNames); err == nil {
maps.Copy(healthStates, states)
}
}
}
for _, node := range req.Nodes {
state := determineState(node.States)
healthState := schema.MonitoringStateFull
if hs, ok := healthStates[node.Hostname]; ok {
healthState = hs.Status
}
nodeState := schema.NodeStateDB{
TimeStamp: requestReceived,
NodeState: state,
CpusAllocated: node.CpusAllocated,
MemoryAllocated: node.MemoryAllocated,
GpusAllocated: node.GpusAllocated,
HealthState: schema.MonitoringStateFull,
HealthState: healthState,
JobsRunning: node.JobsRunning,
}