Skip to content

Commit 7f45c4b

Browse files
committed
server: add SucceedsSoon to some CheckRestartSafe tests
These tests are often waiting on internal signals which are asynchronous and hence should not fail immediately. Resolves: #152143 Resolves: #152198 Resolves: #151676 Epic: None Release note: None
1 parent ac39c2e commit 7f45c4b

File tree

1 file changed

+74
-24
lines changed

1 file changed

+74
-24
lines changed

pkg/server/api_v2_test.go

Lines changed: 74 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -199,18 +199,38 @@ func TestRestartSafetyV2(t *testing.T) {
199199
// Drain the node; then we'll expect success.
200200
require.NoError(t, drain(ctx, ts1, t))
201201

202-
req, err = http.NewRequest("GET", urlStr, nil)
203-
require.NoError(t, err)
204-
resp, err = client.Do(req)
205-
require.NoError(t, err)
206-
require.NotNil(t, resp)
202+
testutils.SucceedsSoon(t, func() error {
203+
req, err := http.NewRequest("GET", urlStr, nil)
204+
if err != nil {
205+
return err
206+
}
207+
resp, err := client.Do(req)
208+
if err != nil {
209+
return err
210+
}
211+
if resp == nil {
212+
return fmt.Errorf("response is nil")
213+
}
207214

208-
bodyBytes, err = io.ReadAll(resp.Body)
209-
require.NoError(t, err)
210-
require.NoError(t, resp.Body.Close())
215+
bodyBytes, err := io.ReadAll(resp.Body)
216+
if err != nil {
217+
return err
218+
}
219+
if err := resp.Body.Close(); err != nil {
220+
return err
221+
}
211222

212-
require.Equal(t, 200, resp.StatusCode, "expected 200: %s", string(bodyBytes))
213-
require.NoError(t, json.Unmarshal(bodyBytes, &response))
223+
if resp.StatusCode != 200 {
224+
return fmt.Errorf("expected 200 but got %d: %s", resp.StatusCode, string(bodyBytes))
225+
}
226+
227+
var response RestartSafetyResponse
228+
if err := json.Unmarshal(bodyBytes, &response); err != nil {
229+
return err
230+
}
231+
232+
return nil
233+
})
214234
}
215235

216236
// TestRulesV2 tests the /api/v2/rules endpoint to ensure it
@@ -373,6 +393,8 @@ func TestCheckRestartSafe_RangeStatus(t *testing.T) {
373393
defer leaktest.AfterTest(t)()
374394
defer log.Scope(t).Close(t)
375395

396+
skip.UnderDuress(t)
397+
376398
ctx := context.Background()
377399
var err error
378400

@@ -392,14 +414,29 @@ func TestCheckRestartSafe_RangeStatus(t *testing.T) {
392414
require.True(t, vitality.GetNodeVitalityFromCache(ts0.NodeID()).IsDraining())
393415
require.False(t, vitality.GetNodeVitalityFromCache(ts1nodeID).IsLive(livenesspb.Metrics))
394416

395-
res, err := checkRestartSafe(ctx, ts0.NodeID(), vitality, ts0.GetStores().(storeVisitor), 3, false)
396-
require.NoError(t, err)
397-
require.False(t, res.IsRestartSafe, "expected unsafe since a different node is down")
417+
testutils.SucceedsSoon(t, func() error {
418+
res, err := checkRestartSafe(ctx, ts0.NodeID(), vitality, ts0.GetStores().(storeVisitor), 3, false)
419+
if err != nil {
420+
return err
421+
}
422+
if res.IsRestartSafe {
423+
return fmt.Errorf("expected unsafe since a different node is down")
424+
}
398425

399-
require.Equal(t, int32(0), res.UnavailableRangeCount)
400-
require.Equal(t, int32(0), res.RaftLeadershipOnNodeCount)
401-
require.Equal(t, int32(0), res.StoreNotDrainingCount)
402-
require.Greater(t, res.UnderreplicatedRangeCount, int32(0))
426+
if res.UnavailableRangeCount != 0 {
427+
return fmt.Errorf("expected UnavailableRangeCount=0, got %d", res.UnavailableRangeCount)
428+
}
429+
if res.RaftLeadershipOnNodeCount != 0 {
430+
return fmt.Errorf("expected RaftLeadershipOnNodeCount=0, got %d", res.RaftLeadershipOnNodeCount)
431+
}
432+
if res.StoreNotDrainingCount != 0 {
433+
return fmt.Errorf("expected StoreNotDrainingCount=0, got %d", res.StoreNotDrainingCount)
434+
}
435+
if res.UnderreplicatedRangeCount <= 0 {
436+
return fmt.Errorf("expected UnderreplicatedRangeCount>0, got %d", res.UnderreplicatedRangeCount)
437+
}
438+
return nil
439+
})
403440
}
404441

405442
func updateAllReplicaCounts(
@@ -455,20 +492,33 @@ func TestCheckRestartSafe_AllowMinimumQuorum_Pass(t *testing.T) {
455492
require.NoError(t, err)
456493

457494
vitality.Draining(ts0.NodeID(), true)
458-
res, err := checkRestartSafe(ctx, ts0.NodeID(), vitality, stores, testCluster.NumServers(), false)
459-
require.NoError(t, err)
460-
require.True(t, res.IsRestartSafe)
495+
testutils.SucceedsSoon(t, func() error {
496+
res, err := checkRestartSafe(ctx, ts0.NodeID(), vitality, stores, testCluster.NumServers(), false)
497+
if err != nil {
498+
return err
499+
}
500+
if !res.IsRestartSafe {
501+
return fmt.Errorf("expected node to be restart safe")
502+
}
503+
return nil
504+
})
461505

462506
ts1nodeID := testCluster.Server(1).NodeID()
463507
vitality.DownNode(ts1nodeID)
464508

465509
require.True(t, vitality.GetNodeVitalityFromCache(ts0.NodeID()).IsDraining())
466510
require.False(t, vitality.GetNodeVitalityFromCache(ts1nodeID).IsLive(livenesspb.Metrics))
467511

468-
res, err = checkRestartSafe(ctx, ts0.NodeID(), vitality, stores, testCluster.NumServers(), true)
469-
require.NoError(t, err)
470-
471-
require.True(t, res.IsRestartSafe)
512+
testutils.SucceedsSoon(t, func() error {
513+
res, err := checkRestartSafe(ctx, ts0.NodeID(), vitality, stores, testCluster.NumServers(), true)
514+
if err != nil {
515+
return err
516+
}
517+
if !res.IsRestartSafe {
518+
return fmt.Errorf("expected node to be restart safe with minimum quorum")
519+
}
520+
return nil
521+
})
472522
}
473523

474524
// TestCheckRestartSafe_AllowMinimumQuorum_Fail verifies that with 2

0 commit comments

Comments
 (0)