Skip to content

Commit 616f0dd

Browse files
craig[bot]arulajmani
andcommitted
Merge #156897
156897: kvserver: remove replica dependency in splitPreApply r=pav-kv a=arulajmani Refactor to remove the need to pass in a replica struct to splitPreApply. This will make testing this in subsequent patches easier. Epic: None Release note: None Co-authored-by: Arul Ajmani <[email protected]>
2 parents 56a6d37 + 9c4cf4a commit 616f0dd

File tree

3 files changed

+118
-92
lines changed

3 files changed

+118
-92
lines changed

pkg/kv/kvserver/replica_app_batch.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -322,10 +322,12 @@ func (b *replicaAppBatch) runPostAddTriggersReplicaOnly(
322322
// NB: another reason why we shouldn't write HardState at evaluation time is
323323
// that it belongs to the log engine, whereas the evaluated batch must
324324
// contain only state machine updates.
325-
splitPreApply(
326-
ctx, b.r, kvstorage.StateRW(b.batch), kvstorage.TODORaft(b.batch),
327-
res.Split.SplitTrigger, cmd.Cmd.ClosedTimestamp,
328-
)
325+
in, err := validateAndPrepareSplit(ctx, b.r, res.Split.SplitTrigger, cmd.Cmd.ClosedTimestamp)
326+
if err != nil {
327+
log.KvExec.Fatalf(ctx, "unable to validate split: %s", err)
328+
}
329+
330+
splitPreApply(ctx, kvstorage.StateRW(b.batch), kvstorage.TODORaft(b.batch), in)
329331

330332
// The rangefeed processor will no longer be provided logical ops for
331333
// its entire range, so it needs to be shut down and all registrations

pkg/kv/kvserver/store_split.go

Lines changed: 108 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -19,56 +19,108 @@ import (
1919
"github.com/cockroachdb/errors"
2020
)
2121

22-
// splitPreApply is called when the raft command is applied. Any
23-
// changes to the given ReadWriter will be written atomically with the
24-
// split commit.
22+
// splitPreApplyInput contains input for the RHS replica required for
23+
// splitPreApply.
24+
type splitPreApplyInput struct {
25+
// destroyed is set to true iff the RHS replica (the one with the matching
26+
// ReplicaID from the SplitTrigger used to construct a splitPreApplyInput) has
27+
// already been removed from the store.
28+
//
29+
// If the RHS replica has already been destroyed on the store, then split
30+
// application entails "throwing away" the data that would have belonged to
31+
// the RHS. Simply put, user data belonging to the RHS needs to be cleared and
32+
// any RangeID-local replicated state in the split batch also needs to be
33+
// cleared.
34+
destroyed bool
35+
// rhsDesc is the descriptor for the post split RHS range.
36+
rhsDesc roachpb.RangeDescriptor
37+
// initClosedTimestamp is the initial closed timestamp that the RHS replica
38+
// inherits from the pre-split range. Set iff destroyed is false.
39+
initClosedTimestamp hlc.Timestamp
40+
}
41+
42+
// validateAndPrepareSplit performs invariant checks on the supplied
43+
// splitTrigger and, assuming they hold, returns the corresponding input that
44+
// should be passed to splitPreApply.
2545
//
2646
// initClosedTS is the closed timestamp carried by the split command. It will be
27-
// used to initialize the new RHS range.
28-
func splitPreApply(
29-
ctx context.Context,
30-
r *Replica,
31-
stateRW kvstorage.StateRW,
32-
raftRW kvstorage.Raft,
33-
split roachpb.SplitTrigger,
34-
initClosedTS *hlc.Timestamp,
35-
) {
47+
// used to initialize the closed timestamp of the RHS replica.
48+
func validateAndPrepareSplit(
49+
ctx context.Context, r *Replica, split roachpb.SplitTrigger, initClosedTS *hlc.Timestamp,
50+
) (splitPreApplyInput, error) {
3651
// Sanity check that the store is in the split.
37-
//
38-
// The exception to that is if the DisableEagerReplicaRemoval testing flag is
39-
// enabled.
40-
_, hasRightDesc := split.RightDesc.GetReplicaDescriptor(r.StoreID())
52+
splitRightReplDesc, hasRightDesc := split.RightDesc.GetReplicaDescriptor(r.StoreID())
4153
_, hasLeftDesc := split.LeftDesc.GetReplicaDescriptor(r.StoreID())
4254
if !hasRightDesc || !hasLeftDesc {
43-
log.KvExec.Fatalf(ctx, "cannot process split on s%s which does not exist in the split: %+v",
55+
return splitPreApplyInput{}, errors.AssertionFailedf("cannot process split on s%s which does not exist in the split: %+v",
4456
r.StoreID(), split)
4557
}
4658

47-
// Obtain the RHS replica. In the common case, it exists and its ReplicaID
48-
// matches the one in the split trigger. It is the uninitialized replica that
49-
// has just been created or obtained in Replica.acquireSplitLock, and its
50-
// raftMu is locked.
51-
//
52-
// In the less common case, the ReplicaID is already removed from this Store,
53-
// and rightRepl is either nil (though the replica may be created concurrently
54-
// after we got this nil), or an uninitialized replica with a higher
55-
// ReplicaID. Its raftMu is not locked, so this replica might as well be in
56-
// the process of destruction or being replaced with another higher-ReplicaID
57-
// uninitialized replica.
58-
//
59-
// In any case, the RHS, if exists (one or multiple replicas, throughout this
60-
// splitPreApply call), is uninitialized. This is due to the Store invariant
61-
// that all initialized replicas don't overlap, thus the RHS one can only be
62-
// uninitialized while we still own the RHS part of the keyspace.
63-
//
64-
// Uninitialized replicas don't have replicated state, and only have non-empty
65-
// RaftReplicaID and RaftHardState keys in storage (at the time of writing),
66-
// or, more generally, only unreplicated keys. As a rule of thumb, all
67-
// unreplicated keys belong to the *current* ReplicaID in the store, rather
68-
// than the ReplicaID in the split trigger (which can be stale).
59+
// Try to obtain the RHS replica. In the common case, it exists and its
60+
// ReplicaID matches the one in the split trigger. In the less common case,
61+
// the ReplicaID has already been removed from this store, and it may have
62+
// been re-added with a higher ReplicaID one or more times. We use this to
63+
// inform the destroyed field.
6964
rightRepl := r.store.GetReplicaIfExists(split.RightDesc.RangeID)
65+
if rightRepl == nil || rightRepl.isNewerThanSplit(&split) {
66+
// We're in the rare case where we know that the RHS has been removed or
67+
// re-added with a higher replica ID (one or more times).
68+
//
69+
// NB: the rightRepl == nil condition is flaky, in a sense that the RHS
70+
// replica can be created or destroyed concurrently here, one or more times.
71+
// This is because the RHS replica is not locked if its ReplicaID does not
72+
// match the one in the SplitTrigger. But we only use it for a best-effort
73+
// assertion, so this is not critical.
74+
if rightRepl != nil {
75+
// Assert that the rightRepl is not initialized. We're about to clear out
76+
// the data of the RHS of the split; we cannot have already accepted a
77+
// snapshot to initialize this newer RHS.
78+
if rightRepl.IsInitialized() {
79+
return splitPreApplyInput{}, errors.AssertionFailedf(
80+
"unexpectedly found initialized newer RHS of split: %v", rightRepl.Desc(),
81+
)
82+
}
83+
}
84+
85+
return splitPreApplyInput{destroyed: true, rhsDesc: split.RightDesc}, nil
86+
}
87+
// Sanity check the common case -- the RHS replica that exists should match
88+
// the ReplicaID in the split trigger. In particular, it shouldn't older than
89+
// the one in the split trigger; we've already checked for the newer case
90+
// above.
91+
testingAssert(rightRepl.replicaID == splitRightReplDesc.ReplicaID,
92+
"expected RHS replica ID to match split trigger replica ID",
93+
)
7094

71-
rsl := kvstorage.MakeStateLoader(split.RightDesc.RangeID)
95+
// In order to tolerate a nil initClosedTS input, let's forward to
96+
// r.GetCurrentClosedTimestamp(). Generally, initClosedTS is not expected to
97+
// be nil (and is expected to be in advance of r.GetCurrentClosedTimestamp()
98+
// since it's coming hot off a Raft command), but let's not rely on the
99+
// non-nil. Note that r.GetCurrentClosedTimestamp() does not yet incorporate
100+
// initClosedTS because the split command has not been applied yet.
101+
//
102+
// TODO(arul): we should avoid this and have splits always carry a non-nil
103+
// initial closed timestamp; see
104+
// https://github.com/cockroachdb/cockroach/issues/148972.
105+
if initClosedTS == nil {
106+
initClosedTS = &hlc.Timestamp{}
107+
}
108+
initClosedTS.Forward(r.GetCurrentClosedTimestamp(ctx))
109+
110+
return splitPreApplyInput{
111+
destroyed: false,
112+
rhsDesc: split.RightDesc,
113+
initClosedTimestamp: *initClosedTS,
114+
}, nil
115+
}
116+
117+
// splitPreApply is called when the raft command is applied. Any
118+
// changes to the given ReadWriter will be written atomically with the
119+
// split commit.
120+
func splitPreApply(
121+
ctx context.Context, stateRW kvstorage.StateRW, raftRW kvstorage.Raft, in splitPreApplyInput,
122+
) {
123+
rsl := kvstorage.MakeStateLoader(in.rhsDesc.RangeID)
72124
// After PR #149620, the split trigger batch may only contain replicated state
73125
// machine keys, and never contains unreplicated / raft keys. One exception:
74126
// there can still be historical split proposals that write the initial
@@ -93,40 +145,23 @@ func splitPreApply(
93145
log.KvExec.Fatalf(ctx, "cannot clear RaftTruncatedState: %v", err)
94146
}
95147

96-
// Check to see if we know that the RHS has already been removed from this
97-
// store at the replica ID implied by the split.
98-
if rightRepl == nil || rightRepl.isNewerThanSplit(&split) {
99-
// We're in the rare case where we know that the RHS has been removed or
100-
// re-added with a higher replica ID (one or more times).
101-
//
102-
// If rightRepl is not nil, we are *not* holding raftMu.
148+
if in.destroyed {
149+
// The RHS replica has already been removed from the store. To apply the
150+
// split, we must clear the user data the RHS would have inherited from the
151+
// LHS due to the split. Additionally, we also want to clear any
152+
// RangeID-local replicated keys in the split batch.
103153
//
104-
// To apply the split, we need to "throw away" the data that would belong to
105-
// the RHS, i.e. we clear the user data the RHS would have inherited from
106-
// the LHS due to the split.
107-
//
108-
// Leave the RangeID-local state intact, since it either belongs to a newer
109-
// replica or does not exist. At the time of writing, it can be a non-empty
110-
// HardState and RaftReplicaID. It is important to preserve the HardState
111-
// because the replica might have already voted at a higher term. In general
112-
// this shouldn't happen because we add learners and then promote them only
113-
// after they apply a snapshot, but we're going to be extra careful in case
114-
// future versions of cockroach somehow promote replicas without ensuring
115-
// that a snapshot has been received.
116-
//
117-
// NB: the rightRepl == nil condition is flaky, in a sense that the RHS
118-
// replica can be created concurrently here, one or more times. But we only
119-
// use it for a best effort assertion, so this is not critical.
120-
if rightRepl != nil {
121-
// Assert that the rightRepl is not initialized. We're about to clear out
122-
// the data of the RHS of the split; we cannot have already accepted a
123-
// snapshot to initialize this newer RHS.
124-
if rightRepl.IsInitialized() {
125-
log.KvExec.Fatalf(ctx, "unexpectedly found initialized newer RHS of split: %v", rightRepl.Desc())
126-
}
127-
}
154+
// Note that we leave the RangeID-local state intact, since it either
155+
// belongs to a newer replica or does not exist. For the former case, when a
156+
// newer RHS replica exists on this store, it must be uninitialized (this
157+
// was asserted in validateAndPrepareSplit). Uninitialized replicas do not have any
158+
// replicated state; however, at the time of writing, they do have non-empty
159+
// RaftReplicaID and RaftHardState keys in storage. More generally, they are
160+
// allowed to have unreplicated keys. As a rule of thumb, all unreplicated
161+
// keys belong to the *current* ReplicaID in the store, rather than the
162+
// ReplicaID in the split trigger (which in this case, is stale).
128163
if err := kvstorage.RemoveStaleRHSFromSplit(
129-
ctx, kvstorage.WrapState(stateRW), split.RightDesc.RangeID, split.RightDesc.RSpan(),
164+
ctx, kvstorage.WrapState(stateRW), in.rhsDesc.RangeID, in.rhsDesc.RSpan(),
130165
); err != nil {
131166
log.KvExec.Fatalf(ctx, "failed to clear range data for removed rhs: %v", err)
132167
}
@@ -152,18 +187,7 @@ func splitPreApply(
152187
log.KvExec.Fatalf(ctx, "%v", err)
153188
}
154189
// Persist the closed timestamp.
155-
//
156-
// In order to tolerate a nil initClosedTS input, let's forward to
157-
// r.GetCurrentClosedTimestamp(). Generally, initClosedTS is not expected to
158-
// be nil (and is expected to be in advance of r.GetCurrentClosedTimestamp()
159-
// since it's coming hot off a Raft command), but let's not rely on the
160-
// non-nil. Note that r.GetCurrentClosedTimestamp() does not yet incorporate
161-
// initClosedTS because the split command has not been applied yet.
162-
if initClosedTS == nil {
163-
initClosedTS = &hlc.Timestamp{}
164-
}
165-
initClosedTS.Forward(r.GetCurrentClosedTimestamp(ctx))
166-
if err := rsl.SetClosedTimestamp(ctx, stateRW, *initClosedTS); err != nil {
190+
if err := rsl.SetClosedTimestamp(ctx, stateRW, in.initClosedTimestamp); err != nil {
167191
log.KvExec.Fatalf(ctx, "%s", err)
168192
}
169193
}

pkg/kv/kvserver/store_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4043,10 +4043,10 @@ func TestSplitPreApplyInitializesTruncatedState(t *testing.T) {
40434043
}, &rightDesc.InternalReplicas[0])
40444044
require.NoError(t, err)
40454045

4046-
splitPreApply(
4047-
ctx, lhsRepl, kvstorage.StateRW(batch), kvstorage.TODORaft(batch),
4048-
roachpb.SplitTrigger{LeftDesc: leftDesc, RightDesc: rightDesc}, nil,
4049-
)
4046+
in, err := validateAndPrepareSplit(ctx, lhsRepl, roachpb.SplitTrigger{LeftDesc: leftDesc, RightDesc: rightDesc}, nil)
4047+
require.NoError(t, err)
4048+
4049+
splitPreApply(ctx, kvstorage.StateRW(batch), kvstorage.TODORaft(batch), in)
40504050

40514051
// Verify that the RHS truncated state is initialized as expected.
40524052
rsl := kvstorage.MakeStateLoader(rightDesc.RangeID)

0 commit comments

Comments
 (0)