Update unit test for new HealthCheck update

2026-03-03 22:57:29 +01:00 · 2026-02-04 12:53:24 +01:00
parent 46fb52d67e
commit 5d7dd62b72
1 changed files with 243 additions and 27 deletions
--- a/pkg/metricstore/metricstore_test.go
+++ b/pkg/metricstore/metricstore_test.go
@@ -90,7 +90,7 @@ func TestBufferRead(t *testing.T) {
 	}
 }

-func TestHealthCheckAlt(t *testing.T) {
+func TestHealthCheck(t *testing.T) {
 	// Create a test MemoryStore with some metrics
 	metrics := map[string]MetricConfig{
 		"load":       {Frequency: 10, Aggregation: AvgAggregation, offset: 0},
@@ -111,38 +111,31 @@ func TestHealthCheckAlt(t *testing.T) {
 	now := time.Now().Unix()
 	startTime := now - 100 // Start 100 seconds ago to have enough data points

-	// Setup test data for node001 - all metrics healthy
+	// Setup test data for node001 - all metrics healthy (recent data)
 	node001 := ms.root.findLevelOrCreate([]string{"testcluster", "node001"}, len(metrics))
 	for i := 0; i < len(metrics); i++ {
 		node001.metrics[i] = newBuffer(startTime, 10)
-		// Write recent data with no NaN values
+		// Write recent data up to now
 		for ts := startTime; ts <= now; ts += 10 {
 			node001.metrics[i].write(ts, schema.Float(float64(i+1)))
 		}
 	}

-	// Setup test data for node002 - some metrics degraded (many NaN values)
+	// Setup test data for node002 - some metrics stale (old data beyond MaxMissingDataPoints threshold)
 	node002 := ms.root.findLevelOrCreate([]string{"testcluster", "node002"}, len(metrics))
+	// MaxMissingDataPoints = 5, frequency = 10, so threshold is 50 seconds
+	staleTime := now - 100 // Data ends 100 seconds ago (well beyond 50 second threshold)
 	for i := 0; i < len(metrics); i++ {
-		node002.metrics[i] = newBuffer(startTime, 10)
+		node002.metrics[i] = newBuffer(staleTime-50, 10)
 		if i < 2 {
-			// First two metrics: healthy (no NaN)
+			// First two metrics: healthy (recent data)
 			for ts := startTime; ts <= now; ts += 10 {
 				node002.metrics[i].write(ts, schema.Float(float64(i+1)))
 			}
 		} else {
-			// Last two metrics: degraded (many NaN values in recent data)
-			// Write real values first, then NaN values at the end
-			count := 0
-			for ts := startTime; ts <= now; ts += 10 {
-				if count < 5 {
-					// Write first 5 real values
-					node002.metrics[i].write(ts, schema.Float(float64(i+1)))
-				} else {
-					// Write NaN for the rest (last ~6 values will be NaN)
-					node002.metrics[i].write(ts, schema.NaN)
-				}
-				count++
+			// Last two metrics: stale (data ends 100 seconds ago)
+			for ts := staleTime - 50; ts <= staleTime; ts += 10 {
+				node002.metrics[i].write(ts, schema.Float(float64(i+1)))
 			}
 		}
 	}
@@ -158,6 +151,16 @@ func TestHealthCheckAlt(t *testing.T) {
 	}
 	// Leave metrics[2] and metrics[3] as nil (missing)

+	// Setup test data for node005 - all metrics stale
+	node005 := ms.root.findLevelOrCreate([]string{"testcluster", "node005"}, len(metrics))
+	for i := 0; i < len(metrics); i++ {
+		node005.metrics[i] = newBuffer(staleTime-50, 10)
+		// All metrics have stale data (ends 100 seconds ago)
+		for ts := staleTime - 50; ts <= staleTime; ts += 10 {
+			node005.metrics[i].write(ts, schema.Float(float64(i+1)))
+		}
+	}
+
 	// node004 doesn't exist at all

 	tests := []struct {
@@ -177,7 +180,7 @@ func TestHealthCheckAlt(t *testing.T) {
 			},
 		},
 		{
-			name:            "some metrics degraded",
+			name:            "some metrics stale",
 			cluster:         "testcluster",
 			nodes:           []string{"node002"},
 			expectedMetrics: []string{"load", "mem_used", "cpu_user", "cpu_system"},
@@ -203,16 +206,26 @@ func TestHealthCheckAlt(t *testing.T) {
 				"node004": schema.MonitoringStateFailed,
 			},
 		},
+		{
+			name:            "all metrics stale",
+			cluster:         "testcluster",
+			nodes:           []string{"node005"},
+			expectedMetrics: []string{"load", "mem_used", "cpu_user", "cpu_system"},
+			wantStates: map[string]schema.MonitoringState{
+				"node005": schema.MonitoringStateFailed,
+			},
+		},
 		{
 			name:            "multiple nodes mixed states",
 			cluster:         "testcluster",
-			nodes:           []string{"node001", "node002", "node003", "node004"},
+			nodes:           []string{"node001", "node002", "node003", "node004", "node005"},
 			expectedMetrics: []string{"load", "mem_used"},
 			wantStates: map[string]schema.MonitoringState{
 				"node001": schema.MonitoringStateFull,
-				"node002": schema.MonitoringStateFull,
-				"node003": schema.MonitoringStateFull,
-				"node004": schema.MonitoringStateFailed,
+				"node002": schema.MonitoringStateFull,   // Only checking first 2 metrics which are healthy
+				"node003": schema.MonitoringStateFull,   // Only checking first 2 metrics which exist
+				"node004": schema.MonitoringStateFailed, // Node doesn't exist
+				"node005": schema.MonitoringStateFailed, // Both metrics are stale
 			},
 		},
 	}
@@ -221,30 +234,233 @@ func TestHealthCheckAlt(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			results, err := ms.HealthCheck(tt.cluster, tt.nodes, tt.expectedMetrics)
 			if err != nil {
-				t.Errorf("HealthCheckAlt() error = %v", err)
+				t.Errorf("HealthCheck() error = %v", err)
 				return
 			}

 			// Check that we got results for all nodes
 			if len(results) != len(tt.nodes) {
-				t.Errorf("HealthCheckAlt() returned %d results, want %d", len(results), len(tt.nodes))
+				t.Errorf("HealthCheck() returned %d results, want %d", len(results), len(tt.nodes))
 			}

 			// Check each node's state
 			for _, node := range tt.nodes {
 				state, ok := results[node]
 				if !ok {
-					t.Errorf("HealthCheckAlt() missing result for node %s", node)
+					t.Errorf("HealthCheck() missing result for node %s", node)
 					continue
 				}

 				// Check status
 				if wantStatus, ok := tt.wantStates[node]; ok {
 					if state != wantStatus {
-						t.Errorf("HealthCheckAlt() node %s status = %v, want %v", node, state, wantStatus)
+						t.Errorf("HealthCheck() node %s status = %v, want %v", node, state, wantStatus)
 					}
 				}
 			}
 		})
 	}
 }
+
+// TestGetHealthyMetrics tests the GetHealthyMetrics function which returns lists of missing and degraded metrics
+func TestGetHealthyMetrics(t *testing.T) {
+	metrics := map[string]MetricConfig{
+		"load":     {Frequency: 10, Aggregation: AvgAggregation, offset: 0},
+		"mem_used": {Frequency: 10, Aggregation: AvgAggregation, offset: 1},
+		"cpu_user": {Frequency: 10, Aggregation: AvgAggregation, offset: 2},
+	}
+
+	ms := &MemoryStore{
+		Metrics: metrics,
+		root: Level{
+			metrics:  make([]*buffer, len(metrics)),
+			children: make(map[string]*Level),
+		},
+	}
+
+	now := time.Now().Unix()
+	startTime := now - 100
+	staleTime := now - 100
+
+	// Setup node with mixed health states
+	node := ms.root.findLevelOrCreate([]string{"testcluster", "testnode"}, len(metrics))
+
+	// Metric 0 (load): healthy - recent data
+	node.metrics[0] = newBuffer(startTime, 10)
+	for ts := startTime; ts <= now; ts += 10 {
+		node.metrics[0].write(ts, schema.Float(1.0))
+	}
+
+	// Metric 1 (mem_used): degraded - stale data
+	node.metrics[1] = newBuffer(staleTime-50, 10)
+	for ts := staleTime - 50; ts <= staleTime; ts += 10 {
+		node.metrics[1].write(ts, schema.Float(2.0))
+	}
+
+	// Metric 2 (cpu_user): missing - no buffer (nil)
+
+	tests := []struct {
+		name            string
+		selector        []string
+		expectedMetrics []string
+		wantMissing     []string
+		wantDegraded    []string
+		wantErr         bool
+	}{
+		{
+			name:            "mixed health states",
+			selector:        []string{"testcluster", "testnode"},
+			expectedMetrics: []string{"load", "mem_used", "cpu_user"},
+			wantMissing:     []string{"cpu_user"},
+			wantDegraded:    []string{"mem_used"},
+			wantErr:         false,
+		},
+		{
+			name:            "node not found",
+			selector:        []string{"testcluster", "nonexistent"},
+			expectedMetrics: []string{"load"},
+			wantMissing:     nil,
+			wantDegraded:    nil,
+			wantErr:         true,
+		},
+		{
+			name:            "check only healthy metric",
+			selector:        []string{"testcluster", "testnode"},
+			expectedMetrics: []string{"load"},
+			wantMissing:     []string{},
+			wantDegraded:    []string{},
+			wantErr:         false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			missing, degraded, err := ms.GetHealthyMetrics(tt.selector, tt.expectedMetrics)
+
+			if (err != nil) != tt.wantErr {
+				t.Errorf("GetHealthyMetrics() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+
+			if tt.wantErr {
+				return
+			}
+
+			// Check missing list
+			if len(missing) != len(tt.wantMissing) {
+				t.Errorf("GetHealthyMetrics() missing = %v, want %v", missing, tt.wantMissing)
+			} else {
+				for i, m := range tt.wantMissing {
+					if missing[i] != m {
+						t.Errorf("GetHealthyMetrics() missing[%d] = %v, want %v", i, missing[i], m)
+					}
+				}
+			}
+
+			// Check degraded list
+			if len(degraded) != len(tt.wantDegraded) {
+				t.Errorf("GetHealthyMetrics() degraded = %v, want %v", degraded, tt.wantDegraded)
+			} else {
+				for i, d := range tt.wantDegraded {
+					if degraded[i] != d {
+						t.Errorf("GetHealthyMetrics() degraded[%d] = %v, want %v", i, degraded[i], d)
+					}
+				}
+			}
+		})
+	}
+}
+
+// TestBufferHealthChecks tests the buffer-level health check functions
+func TestBufferHealthChecks(t *testing.T) {
+	now := time.Now().Unix()
+
+	tests := []struct {
+		name        string
+		setupBuffer func() *buffer
+		wantExists  bool
+		wantHealthy bool
+		description string
+	}{
+		{
+			name: "nil buffer",
+			setupBuffer: func() *buffer {
+				return nil
+			},
+			wantExists:  false,
+			wantHealthy: false,
+			description: "nil buffer should not exist and not be healthy",
+		},
+		{
+			name: "empty buffer",
+			setupBuffer: func() *buffer {
+				b := newBuffer(now, 10)
+				b.data = nil
+				return b
+			},
+			wantExists:  false,
+			wantHealthy: false,
+			description: "empty buffer should not exist and not be healthy",
+		},
+		{
+			name: "healthy buffer with recent data",
+			setupBuffer: func() *buffer {
+				b := newBuffer(now-30, 10)
+				// Write data up to now (within MaxMissingDataPoints * frequency = 50 seconds)
+				for ts := now - 30; ts <= now; ts += 10 {
+					b.write(ts, schema.Float(1.0))
+				}
+				return b
+			},
+			wantExists:  true,
+			wantHealthy: true,
+			description: "buffer with recent data should be healthy",
+		},
+		{
+			name: "stale buffer beyond threshold",
+			setupBuffer: func() *buffer {
+				b := newBuffer(now-200, 10)
+				// Write data that ends 100 seconds ago (beyond MaxMissingDataPoints * frequency = 50 seconds)
+				for ts := now - 200; ts <= now-100; ts += 10 {
+					b.write(ts, schema.Float(1.0))
+				}
+				return b
+			},
+			wantExists:  true,
+			wantHealthy: false,
+			description: "buffer with stale data should exist but not be healthy",
+		},
+		{
+			name: "buffer at threshold boundary",
+			setupBuffer: func() *buffer {
+				b := newBuffer(now-50, 10)
+				// Write data that ends exactly at threshold (MaxMissingDataPoints * frequency = 50 seconds)
+				for ts := now - 50; ts <= now-50; ts += 10 {
+					b.write(ts, schema.Float(1.0))
+				}
+				return b
+			},
+			wantExists:  true,
+			wantHealthy: true,
+			description: "buffer at threshold boundary should still be healthy",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			b := tt.setupBuffer()
+
+			exists := b.bufferExists()
+			if exists != tt.wantExists {
+				t.Errorf("bufferExists() = %v, want %v: %s", exists, tt.wantExists, tt.description)
+			}
+
+			if b != nil && b.data != nil && len(b.data) > 0 {
+				healthy := b.isBufferHealthy()
+				if healthy != tt.wantHealthy {
+					t.Errorf("isBufferHealthy() = %v, want %v: %s", healthy, tt.wantHealthy, tt.description)
+				}
+			}
+		})
+	}
+}