mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-11 13:31:45 +01:00
Intermediate state of node Healthcheck
TODOS: * Remove error handling from routine and simplify API call * Use map for hardware level metrics
This commit is contained in:
@@ -99,7 +99,7 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
for _, node := range req.Nodes {
|
for _, node := range req.Nodes {
|
||||||
state := determineState(node.States)
|
state := determineState(node.States)
|
||||||
healthState := schema.MonitoringStateFull
|
healthState := schema.MonitoringStateFailed
|
||||||
if hs, ok := healthStates[node.Hostname]; ok {
|
if hs, ok := healthStates[node.Hostname]; ok {
|
||||||
healthState = hs
|
healthState = hs
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -283,23 +283,20 @@ func (l *Level) getHealthyMetrics(m *MemoryStore) ([]string, []string, error) {
|
|||||||
for metricName, mc := range m.Metrics {
|
for metricName, mc := range m.Metrics {
|
||||||
b := l.metrics[mc.offset]
|
b := l.metrics[mc.offset]
|
||||||
if b.isBufferHealthy() {
|
if b.isBufferHealthy() {
|
||||||
// Buffer has recent data, now check for missing values
|
healthyList = append(healthyList, metricName)
|
||||||
missingCount := b.countMissingValues()
|
} else {
|
||||||
if missingCount > int(MaxMissingDataPoints) {
|
degradedList = append(degradedList, metricName)
|
||||||
degradedList = append(degradedList, metricName)
|
|
||||||
} else {
|
|
||||||
healthyList = append(healthyList, metricName)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phase 2: Recursively check child levels (hardware components)
|
// Phase 2: Recursively check child levels
|
||||||
for _, lvl := range l.children {
|
for _, lvl := range l.children {
|
||||||
childHealthy, childDegraded, err := lvl.getHealthyMetrics(m)
|
childHealthy, childDegraded, err := lvl.getHealthyMetrics(m)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: Use a map to collect core level metrics
|
||||||
// Merge child metrics into flat lists
|
// Merge child metrics into flat lists
|
||||||
healthyList = append(healthyList, childHealthy...)
|
healthyList = append(healthyList, childHealthy...)
|
||||||
degradedList = append(degradedList, childDegraded...)
|
degradedList = append(degradedList, childDegraded...)
|
||||||
|
|||||||
Reference in New Issue
Block a user