@@ -199,18 +199,38 @@ func TestRestartSafetyV2(t *testing.T) {
199
199
// Drain the node; then we'll expect success.
200
200
require .NoError (t , drain (ctx , ts1 , t ))
201
201
202
- req , err = http .NewRequest ("GET" , urlStr , nil )
203
- require .NoError (t , err )
204
- resp , err = client .Do (req )
205
- require .NoError (t , err )
206
- require .NotNil (t , resp )
202
+ testutils .SucceedsSoon (t , func () error {
203
+ req , err := http .NewRequest ("GET" , urlStr , nil )
204
+ if err != nil {
205
+ return err
206
+ }
207
+ resp , err := client .Do (req )
208
+ if err != nil {
209
+ return err
210
+ }
211
+ if resp == nil {
212
+ return fmt .Errorf ("response is nil" )
213
+ }
207
214
208
- bodyBytes , err = io .ReadAll (resp .Body )
209
- require .NoError (t , err )
210
- require .NoError (t , resp .Body .Close ())
215
+ bodyBytes , err := io .ReadAll (resp .Body )
216
+ if err != nil {
217
+ return err
218
+ }
219
+ if err := resp .Body .Close (); err != nil {
220
+ return err
221
+ }
211
222
212
- require .Equal (t , 200 , resp .StatusCode , "expected 200: %s" , string (bodyBytes ))
213
- require .NoError (t , json .Unmarshal (bodyBytes , & response ))
223
+ if resp .StatusCode != 200 {
224
+ return fmt .Errorf ("expected 200 but got %d: %s" , resp .StatusCode , string (bodyBytes ))
225
+ }
226
+
227
+ var response RestartSafetyResponse
228
+ if err := json .Unmarshal (bodyBytes , & response ); err != nil {
229
+ return err
230
+ }
231
+
232
+ return nil
233
+ })
214
234
}
215
235
216
236
// TestRulesV2 tests the /api/v2/rules endpoint to ensure it
@@ -373,6 +393,8 @@ func TestCheckRestartSafe_RangeStatus(t *testing.T) {
373
393
defer leaktest .AfterTest (t )()
374
394
defer log .Scope (t ).Close (t )
375
395
396
+ skip .UnderDuress (t )
397
+
376
398
ctx := context .Background ()
377
399
var err error
378
400
@@ -392,14 +414,29 @@ func TestCheckRestartSafe_RangeStatus(t *testing.T) {
392
414
require .True (t , vitality .GetNodeVitalityFromCache (ts0 .NodeID ()).IsDraining ())
393
415
require .False (t , vitality .GetNodeVitalityFromCache (ts1nodeID ).IsLive (livenesspb .Metrics ))
394
416
395
- res , err := checkRestartSafe (ctx , ts0 .NodeID (), vitality , ts0 .GetStores ().(storeVisitor ), 3 , false )
396
- require .NoError (t , err )
397
- require .False (t , res .IsRestartSafe , "expected unsafe since a different node is down" )
417
+ testutils .SucceedsSoon (t , func () error {
418
+ res , err := checkRestartSafe (ctx , ts0 .NodeID (), vitality , ts0 .GetStores ().(storeVisitor ), 3 , false )
419
+ if err != nil {
420
+ return err
421
+ }
422
+ if res .IsRestartSafe {
423
+ return fmt .Errorf ("expected unsafe since a different node is down" )
424
+ }
398
425
399
- require .Equal (t , int32 (0 ), res .UnavailableRangeCount )
400
- require .Equal (t , int32 (0 ), res .RaftLeadershipOnNodeCount )
401
- require .Equal (t , int32 (0 ), res .StoreNotDrainingCount )
402
- require .Greater (t , res .UnderreplicatedRangeCount , int32 (0 ))
426
+ if res .UnavailableRangeCount != 0 {
427
+ return fmt .Errorf ("expected UnavailableRangeCount=0, got %d" , res .UnavailableRangeCount )
428
+ }
429
+ if res .RaftLeadershipOnNodeCount != 0 {
430
+ return fmt .Errorf ("expected RaftLeadershipOnNodeCount=0, got %d" , res .RaftLeadershipOnNodeCount )
431
+ }
432
+ if res .StoreNotDrainingCount != 0 {
433
+ return fmt .Errorf ("expected StoreNotDrainingCount=0, got %d" , res .StoreNotDrainingCount )
434
+ }
435
+ if res .UnderreplicatedRangeCount <= 0 {
436
+ return fmt .Errorf ("expected UnderreplicatedRangeCount>0, got %d" , res .UnderreplicatedRangeCount )
437
+ }
438
+ return nil
439
+ })
403
440
}
404
441
405
442
func updateAllReplicaCounts (
@@ -455,20 +492,33 @@ func TestCheckRestartSafe_AllowMinimumQuorum_Pass(t *testing.T) {
455
492
require .NoError (t , err )
456
493
457
494
vitality .Draining (ts0 .NodeID (), true )
458
- res , err := checkRestartSafe (ctx , ts0 .NodeID (), vitality , stores , testCluster .NumServers (), false )
459
- require .NoError (t , err )
460
- require .True (t , res .IsRestartSafe )
495
+ testutils .SucceedsSoon (t , func () error {
496
+ res , err := checkRestartSafe (ctx , ts0 .NodeID (), vitality , stores , testCluster .NumServers (), false )
497
+ if err != nil {
498
+ return err
499
+ }
500
+ if ! res .IsRestartSafe {
501
+ return fmt .Errorf ("expected node to be restart safe" )
502
+ }
503
+ return nil
504
+ })
461
505
462
506
ts1nodeID := testCluster .Server (1 ).NodeID ()
463
507
vitality .DownNode (ts1nodeID )
464
508
465
509
require .True (t , vitality .GetNodeVitalityFromCache (ts0 .NodeID ()).IsDraining ())
466
510
require .False (t , vitality .GetNodeVitalityFromCache (ts1nodeID ).IsLive (livenesspb .Metrics ))
467
511
468
- res , err = checkRestartSafe (ctx , ts0 .NodeID (), vitality , stores , testCluster .NumServers (), true )
469
- require .NoError (t , err )
470
-
471
- require .True (t , res .IsRestartSafe )
512
+ testutils .SucceedsSoon (t , func () error {
513
+ res , err := checkRestartSafe (ctx , ts0 .NodeID (), vitality , stores , testCluster .NumServers (), true )
514
+ if err != nil {
515
+ return err
516
+ }
517
+ if ! res .IsRestartSafe {
518
+ return fmt .Errorf ("expected node to be restart safe with minimum quorum" )
519
+ }
520
+ return nil
521
+ })
472
522
}
473
523
474
524
// TestCheckRestartSafe_AllowMinimumQuorum_Fail verifies that with 2
0 commit comments