mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-11 13:31:45 +01:00
Update unit test for new HealthCheck update
This commit is contained in:
@@ -90,7 +90,7 @@ func TestBufferRead(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHealthCheckAlt(t *testing.T) {
|
func TestHealthCheck(t *testing.T) {
|
||||||
// Create a test MemoryStore with some metrics
|
// Create a test MemoryStore with some metrics
|
||||||
metrics := map[string]MetricConfig{
|
metrics := map[string]MetricConfig{
|
||||||
"load": {Frequency: 10, Aggregation: AvgAggregation, offset: 0},
|
"load": {Frequency: 10, Aggregation: AvgAggregation, offset: 0},
|
||||||
@@ -111,38 +111,31 @@ func TestHealthCheckAlt(t *testing.T) {
|
|||||||
now := time.Now().Unix()
|
now := time.Now().Unix()
|
||||||
startTime := now - 100 // Start 100 seconds ago to have enough data points
|
startTime := now - 100 // Start 100 seconds ago to have enough data points
|
||||||
|
|
||||||
// Setup test data for node001 - all metrics healthy
|
// Setup test data for node001 - all metrics healthy (recent data)
|
||||||
node001 := ms.root.findLevelOrCreate([]string{"testcluster", "node001"}, len(metrics))
|
node001 := ms.root.findLevelOrCreate([]string{"testcluster", "node001"}, len(metrics))
|
||||||
for i := 0; i < len(metrics); i++ {
|
for i := 0; i < len(metrics); i++ {
|
||||||
node001.metrics[i] = newBuffer(startTime, 10)
|
node001.metrics[i] = newBuffer(startTime, 10)
|
||||||
// Write recent data with no NaN values
|
// Write recent data up to now
|
||||||
for ts := startTime; ts <= now; ts += 10 {
|
for ts := startTime; ts <= now; ts += 10 {
|
||||||
node001.metrics[i].write(ts, schema.Float(float64(i+1)))
|
node001.metrics[i].write(ts, schema.Float(float64(i+1)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup test data for node002 - some metrics degraded (many NaN values)
|
// Setup test data for node002 - some metrics stale (old data beyond MaxMissingDataPoints threshold)
|
||||||
node002 := ms.root.findLevelOrCreate([]string{"testcluster", "node002"}, len(metrics))
|
node002 := ms.root.findLevelOrCreate([]string{"testcluster", "node002"}, len(metrics))
|
||||||
|
// MaxMissingDataPoints = 5, frequency = 10, so threshold is 50 seconds
|
||||||
|
staleTime := now - 100 // Data ends 100 seconds ago (well beyond 50 second threshold)
|
||||||
for i := 0; i < len(metrics); i++ {
|
for i := 0; i < len(metrics); i++ {
|
||||||
node002.metrics[i] = newBuffer(startTime, 10)
|
node002.metrics[i] = newBuffer(staleTime-50, 10)
|
||||||
if i < 2 {
|
if i < 2 {
|
||||||
// First two metrics: healthy (no NaN)
|
// First two metrics: healthy (recent data)
|
||||||
for ts := startTime; ts <= now; ts += 10 {
|
for ts := startTime; ts <= now; ts += 10 {
|
||||||
node002.metrics[i].write(ts, schema.Float(float64(i+1)))
|
node002.metrics[i].write(ts, schema.Float(float64(i+1)))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Last two metrics: degraded (many NaN values in recent data)
|
// Last two metrics: stale (data ends 100 seconds ago)
|
||||||
// Write real values first, then NaN values at the end
|
for ts := staleTime - 50; ts <= staleTime; ts += 10 {
|
||||||
count := 0
|
node002.metrics[i].write(ts, schema.Float(float64(i+1)))
|
||||||
for ts := startTime; ts <= now; ts += 10 {
|
|
||||||
if count < 5 {
|
|
||||||
// Write first 5 real values
|
|
||||||
node002.metrics[i].write(ts, schema.Float(float64(i+1)))
|
|
||||||
} else {
|
|
||||||
// Write NaN for the rest (last ~6 values will be NaN)
|
|
||||||
node002.metrics[i].write(ts, schema.NaN)
|
|
||||||
}
|
|
||||||
count++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -158,6 +151,16 @@ func TestHealthCheckAlt(t *testing.T) {
|
|||||||
}
|
}
|
||||||
// Leave metrics[2] and metrics[3] as nil (missing)
|
// Leave metrics[2] and metrics[3] as nil (missing)
|
||||||
|
|
||||||
|
// Setup test data for node005 - all metrics stale
|
||||||
|
node005 := ms.root.findLevelOrCreate([]string{"testcluster", "node005"}, len(metrics))
|
||||||
|
for i := 0; i < len(metrics); i++ {
|
||||||
|
node005.metrics[i] = newBuffer(staleTime-50, 10)
|
||||||
|
// All metrics have stale data (ends 100 seconds ago)
|
||||||
|
for ts := staleTime - 50; ts <= staleTime; ts += 10 {
|
||||||
|
node005.metrics[i].write(ts, schema.Float(float64(i+1)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// node004 doesn't exist at all
|
// node004 doesn't exist at all
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
@@ -177,7 +180,7 @@ func TestHealthCheckAlt(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "some metrics degraded",
|
name: "some metrics stale",
|
||||||
cluster: "testcluster",
|
cluster: "testcluster",
|
||||||
nodes: []string{"node002"},
|
nodes: []string{"node002"},
|
||||||
expectedMetrics: []string{"load", "mem_used", "cpu_user", "cpu_system"},
|
expectedMetrics: []string{"load", "mem_used", "cpu_user", "cpu_system"},
|
||||||
@@ -203,16 +206,26 @@ func TestHealthCheckAlt(t *testing.T) {
|
|||||||
"node004": schema.MonitoringStateFailed,
|
"node004": schema.MonitoringStateFailed,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "all metrics stale",
|
||||||
|
cluster: "testcluster",
|
||||||
|
nodes: []string{"node005"},
|
||||||
|
expectedMetrics: []string{"load", "mem_used", "cpu_user", "cpu_system"},
|
||||||
|
wantStates: map[string]schema.MonitoringState{
|
||||||
|
"node005": schema.MonitoringStateFailed,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "multiple nodes mixed states",
|
name: "multiple nodes mixed states",
|
||||||
cluster: "testcluster",
|
cluster: "testcluster",
|
||||||
nodes: []string{"node001", "node002", "node003", "node004"},
|
nodes: []string{"node001", "node002", "node003", "node004", "node005"},
|
||||||
expectedMetrics: []string{"load", "mem_used"},
|
expectedMetrics: []string{"load", "mem_used"},
|
||||||
wantStates: map[string]schema.MonitoringState{
|
wantStates: map[string]schema.MonitoringState{
|
||||||
"node001": schema.MonitoringStateFull,
|
"node001": schema.MonitoringStateFull,
|
||||||
"node002": schema.MonitoringStateFull,
|
"node002": schema.MonitoringStateFull, // Only checking first 2 metrics which are healthy
|
||||||
"node003": schema.MonitoringStateFull,
|
"node003": schema.MonitoringStateFull, // Only checking first 2 metrics which exist
|
||||||
"node004": schema.MonitoringStateFailed,
|
"node004": schema.MonitoringStateFailed, // Node doesn't exist
|
||||||
|
"node005": schema.MonitoringStateFailed, // Both metrics are stale
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -221,30 +234,233 @@ func TestHealthCheckAlt(t *testing.T) {
|
|||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
results, err := ms.HealthCheck(tt.cluster, tt.nodes, tt.expectedMetrics)
|
results, err := ms.HealthCheck(tt.cluster, tt.nodes, tt.expectedMetrics)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("HealthCheckAlt() error = %v", err)
|
t.Errorf("HealthCheck() error = %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that we got results for all nodes
|
// Check that we got results for all nodes
|
||||||
if len(results) != len(tt.nodes) {
|
if len(results) != len(tt.nodes) {
|
||||||
t.Errorf("HealthCheckAlt() returned %d results, want %d", len(results), len(tt.nodes))
|
t.Errorf("HealthCheck() returned %d results, want %d", len(results), len(tt.nodes))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check each node's state
|
// Check each node's state
|
||||||
for _, node := range tt.nodes {
|
for _, node := range tt.nodes {
|
||||||
state, ok := results[node]
|
state, ok := results[node]
|
||||||
if !ok {
|
if !ok {
|
||||||
t.Errorf("HealthCheckAlt() missing result for node %s", node)
|
t.Errorf("HealthCheck() missing result for node %s", node)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check status
|
// Check status
|
||||||
if wantStatus, ok := tt.wantStates[node]; ok {
|
if wantStatus, ok := tt.wantStates[node]; ok {
|
||||||
if state != wantStatus {
|
if state != wantStatus {
|
||||||
t.Errorf("HealthCheckAlt() node %s status = %v, want %v", node, state, wantStatus)
|
t.Errorf("HealthCheck() node %s status = %v, want %v", node, state, wantStatus)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestGetHealthyMetrics tests the GetHealthyMetrics function which returns lists of missing and degraded metrics
|
||||||
|
func TestGetHealthyMetrics(t *testing.T) {
|
||||||
|
metrics := map[string]MetricConfig{
|
||||||
|
"load": {Frequency: 10, Aggregation: AvgAggregation, offset: 0},
|
||||||
|
"mem_used": {Frequency: 10, Aggregation: AvgAggregation, offset: 1},
|
||||||
|
"cpu_user": {Frequency: 10, Aggregation: AvgAggregation, offset: 2},
|
||||||
|
}
|
||||||
|
|
||||||
|
ms := &MemoryStore{
|
||||||
|
Metrics: metrics,
|
||||||
|
root: Level{
|
||||||
|
metrics: make([]*buffer, len(metrics)),
|
||||||
|
children: make(map[string]*Level),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now().Unix()
|
||||||
|
startTime := now - 100
|
||||||
|
staleTime := now - 100
|
||||||
|
|
||||||
|
// Setup node with mixed health states
|
||||||
|
node := ms.root.findLevelOrCreate([]string{"testcluster", "testnode"}, len(metrics))
|
||||||
|
|
||||||
|
// Metric 0 (load): healthy - recent data
|
||||||
|
node.metrics[0] = newBuffer(startTime, 10)
|
||||||
|
for ts := startTime; ts <= now; ts += 10 {
|
||||||
|
node.metrics[0].write(ts, schema.Float(1.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metric 1 (mem_used): degraded - stale data
|
||||||
|
node.metrics[1] = newBuffer(staleTime-50, 10)
|
||||||
|
for ts := staleTime - 50; ts <= staleTime; ts += 10 {
|
||||||
|
node.metrics[1].write(ts, schema.Float(2.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metric 2 (cpu_user): missing - no buffer (nil)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
selector []string
|
||||||
|
expectedMetrics []string
|
||||||
|
wantMissing []string
|
||||||
|
wantDegraded []string
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "mixed health states",
|
||||||
|
selector: []string{"testcluster", "testnode"},
|
||||||
|
expectedMetrics: []string{"load", "mem_used", "cpu_user"},
|
||||||
|
wantMissing: []string{"cpu_user"},
|
||||||
|
wantDegraded: []string{"mem_used"},
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "node not found",
|
||||||
|
selector: []string{"testcluster", "nonexistent"},
|
||||||
|
expectedMetrics: []string{"load"},
|
||||||
|
wantMissing: nil,
|
||||||
|
wantDegraded: nil,
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "check only healthy metric",
|
||||||
|
selector: []string{"testcluster", "testnode"},
|
||||||
|
expectedMetrics: []string{"load"},
|
||||||
|
wantMissing: []string{},
|
||||||
|
wantDegraded: []string{},
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
missing, degraded, err := ms.GetHealthyMetrics(tt.selector, tt.expectedMetrics)
|
||||||
|
|
||||||
|
if (err != nil) != tt.wantErr {
|
||||||
|
t.Errorf("GetHealthyMetrics() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.wantErr {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check missing list
|
||||||
|
if len(missing) != len(tt.wantMissing) {
|
||||||
|
t.Errorf("GetHealthyMetrics() missing = %v, want %v", missing, tt.wantMissing)
|
||||||
|
} else {
|
||||||
|
for i, m := range tt.wantMissing {
|
||||||
|
if missing[i] != m {
|
||||||
|
t.Errorf("GetHealthyMetrics() missing[%d] = %v, want %v", i, missing[i], m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check degraded list
|
||||||
|
if len(degraded) != len(tt.wantDegraded) {
|
||||||
|
t.Errorf("GetHealthyMetrics() degraded = %v, want %v", degraded, tt.wantDegraded)
|
||||||
|
} else {
|
||||||
|
for i, d := range tt.wantDegraded {
|
||||||
|
if degraded[i] != d {
|
||||||
|
t.Errorf("GetHealthyMetrics() degraded[%d] = %v, want %v", i, degraded[i], d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestBufferHealthChecks tests the buffer-level health check functions
|
||||||
|
func TestBufferHealthChecks(t *testing.T) {
|
||||||
|
now := time.Now().Unix()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setupBuffer func() *buffer
|
||||||
|
wantExists bool
|
||||||
|
wantHealthy bool
|
||||||
|
description string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "nil buffer",
|
||||||
|
setupBuffer: func() *buffer {
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
wantExists: false,
|
||||||
|
wantHealthy: false,
|
||||||
|
description: "nil buffer should not exist and not be healthy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty buffer",
|
||||||
|
setupBuffer: func() *buffer {
|
||||||
|
b := newBuffer(now, 10)
|
||||||
|
b.data = nil
|
||||||
|
return b
|
||||||
|
},
|
||||||
|
wantExists: false,
|
||||||
|
wantHealthy: false,
|
||||||
|
description: "empty buffer should not exist and not be healthy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "healthy buffer with recent data",
|
||||||
|
setupBuffer: func() *buffer {
|
||||||
|
b := newBuffer(now-30, 10)
|
||||||
|
// Write data up to now (within MaxMissingDataPoints * frequency = 50 seconds)
|
||||||
|
for ts := now - 30; ts <= now; ts += 10 {
|
||||||
|
b.write(ts, schema.Float(1.0))
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
},
|
||||||
|
wantExists: true,
|
||||||
|
wantHealthy: true,
|
||||||
|
description: "buffer with recent data should be healthy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "stale buffer beyond threshold",
|
||||||
|
setupBuffer: func() *buffer {
|
||||||
|
b := newBuffer(now-200, 10)
|
||||||
|
// Write data that ends 100 seconds ago (beyond MaxMissingDataPoints * frequency = 50 seconds)
|
||||||
|
for ts := now - 200; ts <= now-100; ts += 10 {
|
||||||
|
b.write(ts, schema.Float(1.0))
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
},
|
||||||
|
wantExists: true,
|
||||||
|
wantHealthy: false,
|
||||||
|
description: "buffer with stale data should exist but not be healthy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "buffer at threshold boundary",
|
||||||
|
setupBuffer: func() *buffer {
|
||||||
|
b := newBuffer(now-50, 10)
|
||||||
|
// Write data that ends exactly at threshold (MaxMissingDataPoints * frequency = 50 seconds)
|
||||||
|
for ts := now - 50; ts <= now-50; ts += 10 {
|
||||||
|
b.write(ts, schema.Float(1.0))
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
},
|
||||||
|
wantExists: true,
|
||||||
|
wantHealthy: true,
|
||||||
|
description: "buffer at threshold boundary should still be healthy",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
b := tt.setupBuffer()
|
||||||
|
|
||||||
|
exists := b.bufferExists()
|
||||||
|
if exists != tt.wantExists {
|
||||||
|
t.Errorf("bufferExists() = %v, want %v: %s", exists, tt.wantExists, tt.description)
|
||||||
|
}
|
||||||
|
|
||||||
|
if b != nil && b.data != nil && len(b.data) > 0 {
|
||||||
|
healthy := b.isBufferHealthy()
|
||||||
|
if healthy != tt.wantHealthy {
|
||||||
|
t.Errorf("isBufferHealthy() = %v, want %v: %s", healthy, tt.wantHealthy, tt.description)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user