mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-11 13:31:45 +01:00
Add monitoring healthstate support in nodestate API.
This commit is contained in:
@@ -7,11 +7,14 @@ package api
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"maps"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
)
|
||||
|
||||
@@ -20,6 +23,15 @@ type UpdateNodeStatesRequest struct {
|
||||
Cluster string `json:"cluster" example:"fritz"`
|
||||
}
|
||||
|
||||
// metricListToNames converts a map of metric configurations to a list of metric names
|
||||
func metricListToNames(metricList map[string]*schema.Metric) []string {
|
||||
names := make([]string, 0, len(metricList))
|
||||
for name := range metricList {
|
||||
names = append(names, name)
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
// this routine assumes that only one of them exists per node
|
||||
func determineState(states []string) schema.SchedulerState {
|
||||
for _, state := range states {
|
||||
@@ -62,18 +74,42 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
||||
http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
repo := repository.GetNodeRepository()
|
||||
requestReceived := time.Now().Unix()
|
||||
repo := repository.GetNodeRepository()
|
||||
ms := metricstore.GetMemoryStore()
|
||||
|
||||
m := make(map[string][]string)
|
||||
healthStates := make(map[string]metricstore.NodeHealthState)
|
||||
|
||||
for _, node := range req.Nodes {
|
||||
if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil {
|
||||
m[sc] = append(m[sc], node.Hostname)
|
||||
}
|
||||
}
|
||||
|
||||
for sc, nl := range m {
|
||||
if sc != "" {
|
||||
metricList := archive.GetMetricConfigSubCluster(req.Cluster, sc)
|
||||
metricNames := metricListToNames(metricList)
|
||||
if states, err := ms.HealthCheckAlt(req.Cluster, nl, metricNames); err == nil {
|
||||
maps.Copy(healthStates, states)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, node := range req.Nodes {
|
||||
state := determineState(node.States)
|
||||
healthState := schema.MonitoringStateFull
|
||||
if hs, ok := healthStates[node.Hostname]; ok {
|
||||
healthState = hs.Status
|
||||
}
|
||||
nodeState := schema.NodeStateDB{
|
||||
TimeStamp: requestReceived,
|
||||
NodeState: state,
|
||||
CpusAllocated: node.CpusAllocated,
|
||||
MemoryAllocated: node.MemoryAllocated,
|
||||
GpusAllocated: node.GpusAllocated,
|
||||
HealthState: schema.MonitoringStateFull,
|
||||
HealthState: healthState,
|
||||
JobsRunning: node.JobsRunning,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user