@@ -90,7 +90,7 @@ func TestBufferRead(t *testing.T) {
9090 }
9191}
9292
93- func TestHealthCheckAlt (t * testing.T ) {
93+ func TestHealthCheck (t * testing.T ) {
9494 // Create a test MemoryStore with some metrics
9595 metrics := map [string ]MetricConfig {
9696 "load" : {Frequency : 10 , Aggregation : AvgAggregation , offset : 0 },
@@ -111,38 +111,31 @@ func TestHealthCheckAlt(t *testing.T) {
111111 now := time .Now ().Unix ()
112112 startTime := now - 100 // Start 100 seconds ago to have enough data points
113113
114- // Setup test data for node001 - all metrics healthy
114+ // Setup test data for node001 - all metrics healthy (recent data)
115115 node001 := ms .root .findLevelOrCreate ([]string {"testcluster" , "node001" }, len (metrics ))
116116 for i := 0 ; i < len (metrics ); i ++ {
117117 node001 .metrics [i ] = newBuffer (startTime , 10 )
118- // Write recent data with no NaN values
118+ // Write recent data up to now
119119 for ts := startTime ; ts <= now ; ts += 10 {
120120 node001 .metrics [i ].write (ts , schema .Float (float64 (i + 1 )))
121121 }
122122 }
123123
124- // Setup test data for node002 - some metrics degraded (many NaN values )
124+ // Setup test data for node002 - some metrics stale (old data beyond MaxMissingDataPoints threshold )
125125 node002 := ms .root .findLevelOrCreate ([]string {"testcluster" , "node002" }, len (metrics ))
126+ // MaxMissingDataPoints = 5, frequency = 10, so threshold is 50 seconds
127+ staleTime := now - 100 // Data ends 100 seconds ago (well beyond 50 second threshold)
126128 for i := 0 ; i < len (metrics ); i ++ {
127- node002 .metrics [i ] = newBuffer (startTime , 10 )
129+ node002 .metrics [i ] = newBuffer (staleTime - 50 , 10 )
128130 if i < 2 {
129- // First two metrics: healthy (no NaN )
131+ // First two metrics: healthy (recent data )
130132 for ts := startTime ; ts <= now ; ts += 10 {
131133 node002 .metrics [i ].write (ts , schema .Float (float64 (i + 1 )))
132134 }
133135 } else {
134- // Last two metrics: degraded (many NaN values in recent data)
135- // Write real values first, then NaN values at the end
136- count := 0
137- for ts := startTime ; ts <= now ; ts += 10 {
138- if count < 5 {
139- // Write first 5 real values
140- node002 .metrics [i ].write (ts , schema .Float (float64 (i + 1 )))
141- } else {
142- // Write NaN for the rest (last ~6 values will be NaN)
143- node002 .metrics [i ].write (ts , schema .NaN )
144- }
145- count ++
136+ // Last two metrics: stale (data ends 100 seconds ago)
137+ for ts := staleTime - 50 ; ts <= staleTime ; ts += 10 {
138+ node002 .metrics [i ].write (ts , schema .Float (float64 (i + 1 )))
146139 }
147140 }
148141 }
@@ -158,6 +151,16 @@ func TestHealthCheckAlt(t *testing.T) {
158151 }
159152 // Leave metrics[2] and metrics[3] as nil (missing)
160153
154+ // Setup test data for node005 - all metrics stale
155+ node005 := ms .root .findLevelOrCreate ([]string {"testcluster" , "node005" }, len (metrics ))
156+ for i := 0 ; i < len (metrics ); i ++ {
157+ node005 .metrics [i ] = newBuffer (staleTime - 50 , 10 )
158+ // All metrics have stale data (ends 100 seconds ago)
159+ for ts := staleTime - 50 ; ts <= staleTime ; ts += 10 {
160+ node005 .metrics [i ].write (ts , schema .Float (float64 (i + 1 )))
161+ }
162+ }
163+
161164 // node004 doesn't exist at all
162165
163166 tests := []struct {
@@ -177,7 +180,7 @@ func TestHealthCheckAlt(t *testing.T) {
177180 },
178181 },
179182 {
180- name : "some metrics degraded " ,
183+ name : "some metrics stale " ,
181184 cluster : "testcluster" ,
182185 nodes : []string {"node002" },
183186 expectedMetrics : []string {"load" , "mem_used" , "cpu_user" , "cpu_system" },
@@ -203,16 +206,26 @@ func TestHealthCheckAlt(t *testing.T) {
203206 "node004" : schema .MonitoringStateFailed ,
204207 },
205208 },
209+ {
210+ name : "all metrics stale" ,
211+ cluster : "testcluster" ,
212+ nodes : []string {"node005" },
213+ expectedMetrics : []string {"load" , "mem_used" , "cpu_user" , "cpu_system" },
214+ wantStates : map [string ]schema.MonitoringState {
215+ "node005" : schema .MonitoringStateFailed ,
216+ },
217+ },
206218 {
207219 name : "multiple nodes mixed states" ,
208220 cluster : "testcluster" ,
209- nodes : []string {"node001" , "node002" , "node003" , "node004" },
221+ nodes : []string {"node001" , "node002" , "node003" , "node004" , "node005" },
210222 expectedMetrics : []string {"load" , "mem_used" },
211223 wantStates : map [string ]schema.MonitoringState {
212224 "node001" : schema .MonitoringStateFull ,
213- "node002" : schema .MonitoringStateFull ,
214- "node003" : schema .MonitoringStateFull ,
215- "node004" : schema .MonitoringStateFailed ,
225+ "node002" : schema .MonitoringStateFull , // Only checking first 2 metrics which are healthy
226+ "node003" : schema .MonitoringStateFull , // Only checking first 2 metrics which exist
227+ "node004" : schema .MonitoringStateFailed , // Node doesn't exist
228+ "node005" : schema .MonitoringStateFailed , // Both metrics are stale
216229 },
217230 },
218231 }
@@ -221,30 +234,233 @@ func TestHealthCheckAlt(t *testing.T) {
221234 t .Run (tt .name , func (t * testing.T ) {
222235 results , err := ms .HealthCheck (tt .cluster , tt .nodes , tt .expectedMetrics )
223236 if err != nil {
224- t .Errorf ("HealthCheckAlt () error = %v" , err )
237+ t .Errorf ("HealthCheck () error = %v" , err )
225238 return
226239 }
227240
228241 // Check that we got results for all nodes
229242 if len (results ) != len (tt .nodes ) {
230- t .Errorf ("HealthCheckAlt () returned %d results, want %d" , len (results ), len (tt .nodes ))
243+ t .Errorf ("HealthCheck () returned %d results, want %d" , len (results ), len (tt .nodes ))
231244 }
232245
233246 // Check each node's state
234247 for _ , node := range tt .nodes {
235248 state , ok := results [node ]
236249 if ! ok {
237- t .Errorf ("HealthCheckAlt () missing result for node %s" , node )
250+ t .Errorf ("HealthCheck () missing result for node %s" , node )
238251 continue
239252 }
240253
241254 // Check status
242255 if wantStatus , ok := tt .wantStates [node ]; ok {
243256 if state != wantStatus {
244- t .Errorf ("HealthCheckAlt() node %s status = %v, want %v" , node , state , wantStatus )
257+ t .Errorf ("HealthCheck() node %s status = %v, want %v" , node , state , wantStatus )
258+ }
259+ }
260+ }
261+ })
262+ }
263+ }
264+
265+ // TestGetHealthyMetrics tests the GetHealthyMetrics function which returns lists of missing and degraded metrics
266+ func TestGetHealthyMetrics (t * testing.T ) {
267+ metrics := map [string ]MetricConfig {
268+ "load" : {Frequency : 10 , Aggregation : AvgAggregation , offset : 0 },
269+ "mem_used" : {Frequency : 10 , Aggregation : AvgAggregation , offset : 1 },
270+ "cpu_user" : {Frequency : 10 , Aggregation : AvgAggregation , offset : 2 },
271+ }
272+
273+ ms := & MemoryStore {
274+ Metrics : metrics ,
275+ root : Level {
276+ metrics : make ([]* buffer , len (metrics )),
277+ children : make (map [string ]* Level ),
278+ },
279+ }
280+
281+ now := time .Now ().Unix ()
282+ startTime := now - 100
283+ staleTime := now - 100
284+
285+ // Setup node with mixed health states
286+ node := ms .root .findLevelOrCreate ([]string {"testcluster" , "testnode" }, len (metrics ))
287+
288+ // Metric 0 (load): healthy - recent data
289+ node .metrics [0 ] = newBuffer (startTime , 10 )
290+ for ts := startTime ; ts <= now ; ts += 10 {
291+ node .metrics [0 ].write (ts , schema .Float (1.0 ))
292+ }
293+
294+ // Metric 1 (mem_used): degraded - stale data
295+ node .metrics [1 ] = newBuffer (staleTime - 50 , 10 )
296+ for ts := staleTime - 50 ; ts <= staleTime ; ts += 10 {
297+ node .metrics [1 ].write (ts , schema .Float (2.0 ))
298+ }
299+
300+ // Metric 2 (cpu_user): missing - no buffer (nil)
301+
302+ tests := []struct {
303+ name string
304+ selector []string
305+ expectedMetrics []string
306+ wantMissing []string
307+ wantDegraded []string
308+ wantErr bool
309+ }{
310+ {
311+ name : "mixed health states" ,
312+ selector : []string {"testcluster" , "testnode" },
313+ expectedMetrics : []string {"load" , "mem_used" , "cpu_user" },
314+ wantMissing : []string {"cpu_user" },
315+ wantDegraded : []string {"mem_used" },
316+ wantErr : false ,
317+ },
318+ {
319+ name : "node not found" ,
320+ selector : []string {"testcluster" , "nonexistent" },
321+ expectedMetrics : []string {"load" },
322+ wantMissing : nil ,
323+ wantDegraded : nil ,
324+ wantErr : true ,
325+ },
326+ {
327+ name : "check only healthy metric" ,
328+ selector : []string {"testcluster" , "testnode" },
329+ expectedMetrics : []string {"load" },
330+ wantMissing : []string {},
331+ wantDegraded : []string {},
332+ wantErr : false ,
333+ },
334+ }
335+
336+ for _ , tt := range tests {
337+ t .Run (tt .name , func (t * testing.T ) {
338+ missing , degraded , err := ms .GetHealthyMetrics (tt .selector , tt .expectedMetrics )
339+
340+ if (err != nil ) != tt .wantErr {
341+ t .Errorf ("GetHealthyMetrics() error = %v, wantErr %v" , err , tt .wantErr )
342+ return
343+ }
344+
345+ if tt .wantErr {
346+ return
347+ }
348+
349+ // Check missing list
350+ if len (missing ) != len (tt .wantMissing ) {
351+ t .Errorf ("GetHealthyMetrics() missing = %v, want %v" , missing , tt .wantMissing )
352+ } else {
353+ for i , m := range tt .wantMissing {
354+ if missing [i ] != m {
355+ t .Errorf ("GetHealthyMetrics() missing[%d] = %v, want %v" , i , missing [i ], m )
356+ }
357+ }
358+ }
359+
360+ // Check degraded list
361+ if len (degraded ) != len (tt .wantDegraded ) {
362+ t .Errorf ("GetHealthyMetrics() degraded = %v, want %v" , degraded , tt .wantDegraded )
363+ } else {
364+ for i , d := range tt .wantDegraded {
365+ if degraded [i ] != d {
366+ t .Errorf ("GetHealthyMetrics() degraded[%d] = %v, want %v" , i , degraded [i ], d )
245367 }
246368 }
247369 }
248370 })
249371 }
250372}
373+
374+ // TestBufferHealthChecks tests the buffer-level health check functions
375+ func TestBufferHealthChecks (t * testing.T ) {
376+ now := time .Now ().Unix ()
377+
378+ tests := []struct {
379+ name string
380+ setupBuffer func () * buffer
381+ wantExists bool
382+ wantHealthy bool
383+ description string
384+ }{
385+ {
386+ name : "nil buffer" ,
387+ setupBuffer : func () * buffer {
388+ return nil
389+ },
390+ wantExists : false ,
391+ wantHealthy : false ,
392+ description : "nil buffer should not exist and not be healthy" ,
393+ },
394+ {
395+ name : "empty buffer" ,
396+ setupBuffer : func () * buffer {
397+ b := newBuffer (now , 10 )
398+ b .data = nil
399+ return b
400+ },
401+ wantExists : false ,
402+ wantHealthy : false ,
403+ description : "empty buffer should not exist and not be healthy" ,
404+ },
405+ {
406+ name : "healthy buffer with recent data" ,
407+ setupBuffer : func () * buffer {
408+ b := newBuffer (now - 30 , 10 )
409+ // Write data up to now (within MaxMissingDataPoints * frequency = 50 seconds)
410+ for ts := now - 30 ; ts <= now ; ts += 10 {
411+ b .write (ts , schema .Float (1.0 ))
412+ }
413+ return b
414+ },
415+ wantExists : true ,
416+ wantHealthy : true ,
417+ description : "buffer with recent data should be healthy" ,
418+ },
419+ {
420+ name : "stale buffer beyond threshold" ,
421+ setupBuffer : func () * buffer {
422+ b := newBuffer (now - 200 , 10 )
423+ // Write data that ends 100 seconds ago (beyond MaxMissingDataPoints * frequency = 50 seconds)
424+ for ts := now - 200 ; ts <= now - 100 ; ts += 10 {
425+ b .write (ts , schema .Float (1.0 ))
426+ }
427+ return b
428+ },
429+ wantExists : true ,
430+ wantHealthy : false ,
431+ description : "buffer with stale data should exist but not be healthy" ,
432+ },
433+ {
434+ name : "buffer at threshold boundary" ,
435+ setupBuffer : func () * buffer {
436+ b := newBuffer (now - 50 , 10 )
437+ // Write data that ends exactly at threshold (MaxMissingDataPoints * frequency = 50 seconds)
438+ for ts := now - 50 ; ts <= now - 50 ; ts += 10 {
439+ b .write (ts , schema .Float (1.0 ))
440+ }
441+ return b
442+ },
443+ wantExists : true ,
444+ wantHealthy : true ,
445+ description : "buffer at threshold boundary should still be healthy" ,
446+ },
447+ }
448+
449+ for _ , tt := range tests {
450+ t .Run (tt .name , func (t * testing.T ) {
451+ b := tt .setupBuffer ()
452+
453+ exists := b .bufferExists ()
454+ if exists != tt .wantExists {
455+ t .Errorf ("bufferExists() = %v, want %v: %s" , exists , tt .wantExists , tt .description )
456+ }
457+
458+ if b != nil && b .data != nil && len (b .data ) > 0 {
459+ healthy := b .isBufferHealthy ()
460+ if healthy != tt .wantHealthy {
461+ t .Errorf ("isBufferHealthy() = %v, want %v: %s" , healthy , tt .wantHealthy , tt .description )
462+ }
463+ }
464+ })
465+ }
466+ }
0 commit comments