Skip to content

Commit edc6b41

Browse files
committed
kvserver: deflake TestClosedTimestampFrozenAfterSubsumption
This commit deflakes TestClosedTimestampFrozenAfterSubsumption by making sure that when running with leader leases, we first make sure that the leader is fortified before stopping storeliveness heartbeats. Otherwise, there could be a case where the leader hasn't been fortified yet, and blocking storeliveness heartbeats won't necessarily cause the leader to step down as the leader will be sending normal raft heartbeats since it never got fortified for this term. Fixes: #153679 Release note: None
1 parent 73d1df8 commit edc6b41

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

pkg/kv/kvserver/closed_timestamp_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -624,8 +624,11 @@ func TestClosedTimestampFrozenAfterSubsumption(t *testing.T) {
624624
defer leaktest.AfterTest(t)()
625625
defer log.Scope(t).Close(t)
626626

627-
skip.UnderRace(t)
628-
skip.UnderDeadlock(t)
627+
skip.UnderDuress(t)
628+
629+
// Increase the verbosity of the logs to help debug the test if it fails, especially
630+
// raft related logs when the test tries to transfer the lease non-cooperatively.
631+
require.NoError(t, log.SetVModule("raft=4,*=1"))
629632

630633
for _, test := range []struct {
631634
name string

pkg/testutils/testcluster/testcluster.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,6 +1267,7 @@ func (tc *TestCluster) MoveRangeLeaseNonCooperatively(
12671267
return nil, errors.Errorf("must set StoreTestingKnobs.AllowLeaseRequestProposalsWhenNotLeader")
12681268
}
12691269

1270+
log.Dev.Infof(ctx, "moving lease non-cooperatively of range %v to %v", rangeDesc, dest)
12701271
destServer, err := tc.FindMemberServer(dest.StoreID)
12711272
if err != nil {
12721273
return nil, err
@@ -1380,8 +1381,17 @@ func (tc *TestCluster) ensureLeaderStepsDown(
13801381
leaderStore = curStore
13811382
leaderNode = s
13821383
leaderReplica = curR
1384+
1385+
// Make sure that the leader is fortified because in the next step we
1386+
// will stop store liveness messages to the leader, and we want to cause
1387+
// it to step down. If the leader isn't fortified yet, stopping store
1388+
// liveness messages to it will not cause it to step down.
1389+
if curR.RaftStatus().LeadSupportUntil.IsEmpty() {
1390+
return errors.Errorf("leader is not fortified")
1391+
}
13831392
}
13841393
}
1394+
13851395
// At this point we have iterated over all nodes in the cluster, if we
13861396
// haven't found a leader, wait for a bit for one to step up.
13871397
if leaderStore == nil {

0 commit comments

Comments
 (0)