@@ -4554,3 +4554,122 @@ func TestSplitWithExternalFilesFastStats(t *testing.T) {
4554
4554
})
4555
4555
}
4556
4556
}
4557
+
4558
+ // TestStoreRangeSplitRaftSnapshotAfterRHSRebalanced tests that a replica that
4559
+ // learns about a split through a snapshot after the RHS has been rebalanced
4560
+ // away correctly deletes the RHS replica's on-disk data.
4561
+ //
4562
+ // Serves as a regression test for
4563
+ // https://github.com/cockroachdb/cockroach/issues/73462.
4564
+ func TestStoreRangeSplitRaftSnapshotAfterRHSRebalanced (t * testing.T ) {
4565
+ defer leaktest .AfterTest (t )()
4566
+ defer log .Scope (t ).Close (t )
4567
+
4568
+ skip .WithIssue (t , 73462 )
4569
+
4570
+ ctx := context .Background ()
4571
+ // Start a 5 node cluster.
4572
+ tc := testcluster .StartTestCluster (t , 5 , base.TestClusterArgs {
4573
+ ReplicationMode : base .ReplicationManual ,
4574
+ })
4575
+ defer tc .Stopper ().Stop (ctx )
4576
+
4577
+ store0 := tc .GetFirstStoreFromServer (t , 0 )
4578
+ store2 := tc .GetFirstStoreFromServer (t , 2 )
4579
+ distSender := tc .Servers [0 ].DistSenderI ().(kv.Sender )
4580
+
4581
+ // Create a scratch range and upreplicate to stores 2 and 3.
4582
+ keyStart := tc .ScratchRange (t )
4583
+ repl := store0 .LookupReplica (roachpb .RKey (keyStart ))
4584
+ keyEnd := repl .Desc ().EndKey .AsRawKey ()
4585
+ keyD := keyStart .Next ().Next ().Next ().Next ()
4586
+
4587
+ tc .AddVotersOrFatal (t , keyStart , tc .Targets (1 , 2 )... )
4588
+ tc .WaitForValues (t , keyStart , []int64 {0 , 0 , 0 , 0 , 0 })
4589
+
4590
+ // Put some keys in [d, /Max) to ensure that the post-split RHS will have
4591
+ // some data. When we learn about the split through the snapshot, we expect
4592
+ // to delete data in the uncontained key range of [keyD, /Max). Adding some
4593
+ // data here allows us to make this assertion.
4594
+ key := keyD
4595
+ for i := 0 ; i < 10 ; i ++ {
4596
+ key = key .Next ()
4597
+ if _ , pErr := kv .SendWrapped (ctx , distSender , incrementArgs (key , 1 )); pErr != nil {
4598
+ t .Fatal (pErr )
4599
+ }
4600
+ tc .WaitForValues (t , key , []int64 {1 , 1 , 1 , 0 , 0 })
4601
+ }
4602
+
4603
+ // Start dropping all Raft traffic to store2.
4604
+ aRepl0 := store0 .LookupReplica (roachpb .RKey (keyStart ))
4605
+ tc .Servers [2 ].RaftTransport ().(* kvserver.RaftTransport ).ListenIncomingRaftMessages (store2 .Ident .StoreID , & unreliableRaftHandler {
4606
+ rangeID : aRepl0 .RangeID ,
4607
+ IncomingRaftMessageHandler : store2 ,
4608
+ unreliableRaftHandlerFuncs : unreliableRaftHandlerFuncs {
4609
+ dropReq : func (request * kvserverpb.RaftMessageRequest ) bool { return true },
4610
+ },
4611
+ })
4612
+
4613
+ // Split at keyD: [keyStart, keyD) and [keyD, /Max)
4614
+ if _ , pErr := kv .SendWrapped (ctx , distSender , adminSplitArgs (keyD )); pErr != nil {
4615
+ t .Fatal (pErr )
4616
+ }
4617
+
4618
+ // Move the [keyD, /Max) replica from store2 to store4.
4619
+ rhsDesc := store0 .LookupReplica (roachpb .RKey (keyD )).Desc ()
4620
+ tc .RemoveVotersOrFatal (t , keyD , tc .Target (2 ))
4621
+ tc .AddVotersOrFatal (t , keyD , tc .Target (4 ))
4622
+
4623
+ // Transfer the lease for [keyD, /Max) to store4.
4624
+ tc .TransferRangeLeaseOrFatal (t , * rhsDesc , tc .Target (4 ))
4625
+
4626
+ // Move the [keyD, /Max) replica from store0 to store2. This allows us to
4627
+ // assert store0 and store2 have the same data below, once we've allowed
4628
+ // store2 to catch up via a snapshot.
4629
+ tc .RemoveVotersOrFatal (t , keyD , tc .Target (0 ))
4630
+ tc .AddVotersOrFatal (t , keyD , tc .Target (3 ))
4631
+
4632
+ // Restore Raft traffic to store2, but drop MsgApp for old log entries as in
4633
+ // the original test.
4634
+ index := store0 .LookupReplica (roachpb .RKey (keyStart )).GetLastIndex ()
4635
+ tc .Servers [2 ].RaftTransport ().(* kvserver.RaftTransport ).ListenIncomingRaftMessages (store2 .Ident .StoreID , & unreliableRaftHandler {
4636
+ rangeID : aRepl0 .RangeID ,
4637
+ IncomingRaftMessageHandler : store2 ,
4638
+ unreliableRaftHandlerFuncs : unreliableRaftHandlerFuncs {
4639
+ dropReq : func (req * kvserverpb.RaftMessageRequest ) bool {
4640
+ return req .Message .Type == raftpb .MsgApp && kvpb .RaftIndex (req .Message .Index ) < index
4641
+ },
4642
+ dropHB : func (* kvserverpb.RaftHeartbeat ) bool { return false },
4643
+ dropResp : func (* kvserverpb.RaftMessageResponse ) bool { return false },
4644
+ },
4645
+ })
4646
+
4647
+ // Wait for all replicas to catch up. This will require a Raft snapshot to
4648
+ // store2.
4649
+ testutils .SucceedsSoon (t , func () error {
4650
+ getKeySet := func (engine storage.Engine ) map [string ]struct {} {
4651
+ kvs , err := storage .Scan (context .Background (), engine , keyStart , keyEnd , 0 )
4652
+ if err != nil {
4653
+ t .Fatal (err )
4654
+ }
4655
+ out := map [string ]struct {}{}
4656
+ for _ , kv := range kvs {
4657
+ out [string (kv .Key .Key )] = struct {}{}
4658
+ }
4659
+ return out
4660
+ }
4661
+ storeKeys0 := getKeySet (store0 .TODOEngine ())
4662
+ storeKeys2 := getKeySet (store2 .TODOEngine ())
4663
+ for k := range storeKeys0 {
4664
+ if _ , ok := storeKeys2 [k ]; ! ok {
4665
+ return fmt .Errorf ("store2 missing key %s" , roachpb .Key (k ))
4666
+ }
4667
+ }
4668
+ for k := range storeKeys2 {
4669
+ if _ , ok := storeKeys0 [k ]; ! ok {
4670
+ return fmt .Errorf ("store2 has extra key %s" , roachpb .Key (k ))
4671
+ }
4672
+ }
4673
+ return nil
4674
+ })
4675
+ }
0 commit comments