@@ -46,12 +46,14 @@ func splitPreApply(
46
46
r .StoreID (), split )
47
47
}
48
48
49
- // Check on the RHS, we need to ensure that it exists and has a minReplicaID
50
- // less than or equal to the replica we're about to initialize.
49
+ // Obtain the RHS replica. In the common case, it exists and its ReplicaID
50
+ // matches the one in the split trigger. It is the uninitialized replica that
51
+ // has just been created or obtained in Replica.acquireSplitLock, and its
52
+ // raftMu is locked.
51
53
//
52
- // The right hand side of the split was already created (and its raftMu
53
- // acquired) in Replica.acquireSplitLock. It must be present here if it hasn't
54
- // been removed in the meantime (handled below) .
54
+ // In the less common case, the ReplicaID is already removed from this Store,
55
+ // and rightRepl is either nil or an uninitialized replica with a higher
56
+ // ReplicaID. Its raftMu is not locked .
55
57
rightRepl := r .store .GetReplicaIfExists (split .RightDesc .RangeID )
56
58
// Check to see if we know that the RHS has already been removed from this
57
59
// store at the replica ID implied by the split.
@@ -76,6 +78,8 @@ func splitPreApply(
76
78
// it's always present).
77
79
var hs raftpb.HardState
78
80
if rightRepl != nil {
81
+ // TODO(pav-kv): rightRepl could have been destroyed by the time we get to
82
+ // lock it here. The HardState read-then-write appears risky in this case.
79
83
rightRepl .raftMu .Lock ()
80
84
defer rightRepl .raftMu .Unlock ()
81
85
// Assert that the rightRepl is not initialized. We're about to clear out
@@ -90,6 +94,10 @@ func splitPreApply(
90
94
log .Fatalf (ctx , "failed to load hard state for removed rhs: %v" , err )
91
95
}
92
96
}
97
+ // TODO(#152199): the rightRepl == nil condition is flaky. There can be a
98
+ // racing replica creation for a higher ReplicaID, and it can subsequently
99
+ // update its HardState. Here, we can accidentally clear the HardState of
100
+ // that new replica.
93
101
if err := kvstorage .ClearRangeData (ctx , split .RightDesc .RangeID , readWriter , readWriter , kvstorage.ClearRangeDataOptions {
94
102
// We know there isn't anything in these two replicated spans below in the
95
103
// right-hand side (before the current batch), so setting these options
@@ -99,19 +107,17 @@ func splitPreApply(
99
107
ClearReplicatedByRangeID : true ,
100
108
// See the HardState write-back dance above and below.
101
109
//
102
- // TODO(tbg): we don't actually want to touch the raft state of the right
103
- // hand side replica since it's absent or a more recent replica than the
104
- // split. Now that we have a boolean targeting the unreplicated
105
- // RangeID-based keyspace, we can set this to false and remove the
106
- // HardState+ReplicaID write-back. (The WriteBatch does not contain
107
- // any writes to the unreplicated RangeID keyspace for the RHS, see
108
- // splitTriggerHelper[^1]) .
110
+ // TODO(tbg): we don't actually want to touch the raft state of the RHS
111
+ // replica since it's absent or a more recent one than in the split. Now
112
+ // that we have a bool targeting unreplicated RangeID-local keys, we can
113
+ // set it to false and remove the HardState+ReplicaID write-back. However,
114
+ // there can be historical split proposals with the RaftTruncatedState key
115
+ // set in splitTriggerHelper[^1]. We must first make sure that such
116
+ // proposals no longer exist, e.g. with a below-raft migration .
109
117
//
110
118
// [^1]: https://github.com/cockroachdb/cockroach/blob/f263a765d750e41f2701da0a923a6e92d09159fa/pkg/kv/kvserver/batcheval/cmd_end_transaction.go#L1109-L1149
111
119
//
112
- // See also:
113
- //
114
- // https://github.com/cockroachdb/cockroach/issues/94933
120
+ // See also: https://github.com/cockroachdb/cockroach/issues/94933
115
121
ClearUnreplicatedByRangeID : true ,
116
122
}); err != nil {
117
123
log .Fatalf (ctx , "failed to clear range data for removed rhs: %v" , err )
0 commit comments