Skip to content

Commit c472396

Browse files
committed
kvserver: make ensureLeaderStepsDown more resolute
Addresses a TODO that previously made this function flaky. Closes #159133 Release note: None
1 parent d5ccb9c commit c472396

File tree

2 files changed

+24
-5
lines changed

2 files changed

+24
-5
lines changed

pkg/testutils/testcluster/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ go_library(
1616
"//pkg/kv/kvserver/liveness/livenesspb",
1717
"//pkg/kv/kvserver/storeliveness",
1818
"//pkg/kv/kvserver/storeliveness/storelivenesspb",
19+
"//pkg/kv/kvtestutils",
1920
"//pkg/multitenant/tenantcapabilitiespb",
2021
"//pkg/raft/raftpb",
2122
"//pkg/roachpb",

pkg/testutils/testcluster/testcluster.go

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb"
3030
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/storeliveness"
3131
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/storeliveness/storelivenesspb"
32+
"github.com/cockroachdb/cockroach/pkg/kv/kvtestutils"
3233
"github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilitiespb"
3334
"github.com/cockroachdb/cockroach/pkg/raft/raftpb"
3435
"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -1432,22 +1433,39 @@ func (tc *TestCluster) ensureLeaderStepsDown(
14321433
},
14331434
})
14341435

1436+
// Block Raft messages to the current leader. This prevents the leader from
1437+
// receiving MsgAppResp messages from followers, which could otherwise reset
1438+
// the leader's notion of RecentlyActive, which would then prevent it from
1439+
// stepping down due to CheckQuorum.
1440+
leaderNode.RaftTransport().(*kvserver.RaftTransport).
1441+
ListenIncomingRaftMessages(leaderStore.StoreID(),
1442+
&kvtestutils.UnreliableRaftHandler{
1443+
Name: "ensureLeaderStepsDown",
1444+
RangeID: rangeDesc.RangeID,
1445+
IncomingRaftMessageHandler: leaderStore,
1446+
UnreliableRaftHandlerFuncs: kvtestutils.UnreliableRaftHandlerFuncs{
1447+
DropReq: func(*kvserverpb.RaftMessageRequest) bool { return true },
1448+
DropHB: func(*kvserverpb.RaftHeartbeat) bool { return true },
1449+
DropResp: func(*kvserverpb.RaftMessageResponse) bool { return true },
1450+
},
1451+
})
1452+
14351453
// Advance the manual clock past the lease's expiration.
14361454
log.Dev.Infof(ctx, "test: advancing clock to lease expiration")
14371455
manual.Increment(leaderStore.GetStoreConfig().LeaseExpiration())
14381456

1439-
// Wait for the leader to step down. Sometimes this might take a while since
1440-
// the leader might be replicating to other followers, and it won't step down
1441-
// unless it doesn't receive anything from the followers for a while.
1442-
// TODO(ibrahim): This could be made faster by blocking Raft messages to
1443-
// the leader.
1457+
// Wait for the leader to step down.
14441458
testutils.SucceedsWithin(t, func() error {
14451459
if leaderReplica.RaftStatus().RaftState == raftpb.StateLeader {
14461460
return errors.Errorf("leader hasn't stepped down yet")
14471461
}
14481462
return nil
14491463
}, 2*testutils.SucceedsSoonDuration())
14501464

1465+
// Restore Raft message handling to normal.
1466+
leaderNode.RaftTransport().(*kvserver.RaftTransport).
1467+
ListenIncomingRaftMessages(leaderStore.StoreID(), leaderStore)
1468+
14511469
// Restore store liveness state to normal.
14521470
leaderNode.StoreLivenessTransport().(*storeliveness.Transport).
14531471
ListenMessages(leaderStore.StoreID(),

0 commit comments

Comments
 (0)