Skip to content

Commit 0381231

Browse files
committed
batcheval: pass in split trigger information as a struct
Instead of creating a stateloader and reading information about the LHS's lease, gcThrehsold etc. in the splitTriggerHelper, this patch lifts that two levels up. In reference to @pav-kv's comment on #149714. Epic: none Release note: None
1 parent cdd0b29 commit 0381231

File tree

2 files changed

+80
-51
lines changed

2 files changed

+80
-51
lines changed

pkg/kv/kvserver/batcheval/cmd_end_transaction.go

Lines changed: 72 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -882,45 +882,74 @@ func RunCommitTrigger(
882882
"commit wait. Was its timestamp bumped after acquiring latches?", txn, ct.Kind())
883883
}
884884

885+
// Used by both splits and merges.
886+
maybeWrapReplicaCorruptionError := func(ctx context.Context, err error) error {
887+
if err == nil {
888+
log.KvExec.Fatalf(ctx, "unexpected nil error")
889+
}
890+
if info := pebble.ExtractDataCorruptionInfo(err); info != nil {
891+
// Data corruption errors due to external SSTable references getting
892+
// deleted should not be wrapped in replica corruption errors. This
893+
// ensures that we simply fail the split or merge and propagate the error,
894+
// but don't crash the process. In such cases, an excise command should be
895+
// used to get out of this data corruption situation.
896+
return err
897+
}
898+
// Otherwise, fail the split or merge with a critical error that crashes the
899+
// process. Reporting a replica corruption error ensures this. See
900+
// setCorruptRaftMuLocked.
901+
return kvpb.MaybeWrapReplicaCorruptionError(ctx, err)
902+
}
903+
885904
// Stage the commit trigger's side-effects so that they will go into effect on
886905
// each Replica when the corresponding Raft log entry is applied. Only one
887906
// commit trigger can be set.
888907
if ct.GetSplitTrigger() != nil {
908+
sl := MakeStateLoader(rec)
909+
lhsLease, err := sl.LoadLease(ctx, batch)
910+
if err != nil {
911+
return result.Result{}, maybeWrapReplicaCorruptionError(
912+
ctx, errors.Wrap(err, "unable to load lease"),
913+
)
914+
}
915+
gcThreshold, err := sl.LoadGCThreshold(ctx, batch)
916+
if err != nil {
917+
return result.Result{}, maybeWrapReplicaCorruptionError(
918+
ctx, errors.Wrap(err, "unable to load GCThreshold"),
919+
)
920+
}
921+
gcHint, err := sl.LoadGCHint(ctx, batch)
922+
if err != nil {
923+
return result.Result{}, maybeWrapReplicaCorruptionError(
924+
ctx, errors.Wrap(err, "unable to load GCHint"),
925+
)
926+
}
927+
replicaVersion, err := sl.LoadVersion(ctx, batch)
928+
if err != nil {
929+
return result.Result{}, maybeWrapReplicaCorruptionError(
930+
ctx, errors.Wrap(err, "unable to load replica version"),
931+
)
932+
}
933+
in := splitTriggerHelperInput{
934+
leftLease: lhsLease,
935+
gcThreshold: gcThreshold,
936+
gcHint: gcHint,
937+
replicaVersion: replicaVersion,
938+
}
939+
889940
newMS, res, err := splitTrigger(
890-
ctx, rec, batch, *ms, ct.SplitTrigger, txn.WriteTimestamp,
941+
ctx, rec, batch, *ms, ct.SplitTrigger, in, txn.WriteTimestamp,
891942
)
892943
if err != nil {
893-
if info := pebble.ExtractDataCorruptionInfo(err); info != nil {
894-
// We want to handle the data corruption error here because it's possible
895-
// that a file that an external SSTable references got deleted. We want to
896-
// fail the split and propagate the error, but we don't want to crash the
897-
// process. An excise command could be used to get out of this data
898-
// corruption.
899-
return result.Result{}, err
900-
} else {
901-
// Otherwise, failing the split is a critical error. We should crash
902-
// the process and report a replica corruption.
903-
return result.Result{}, kvpb.MaybeWrapReplicaCorruptionError(ctx, err)
904-
}
944+
return result.Result{}, maybeWrapReplicaCorruptionError(ctx, err)
905945
}
906946
*ms = newMS
907947
return res, nil
908948
}
909949
if mt := ct.GetMergeTrigger(); mt != nil {
910950
res, err := mergeTrigger(ctx, rec, batch, ms, mt, txn.WriteTimestamp)
911951
if err != nil {
912-
if info := pebble.ExtractDataCorruptionInfo(err); info != nil {
913-
// We want to handle the data corruption error here because it's
914-
// possible that a file that an external SSTable references got deleted.
915-
// We want to fail the merge and propagate the error, but we don't want
916-
// to crash the process. An excise command could be used to get out of
917-
// this data corruption.
918-
return result.Result{}, err
919-
} else {
920-
// Otherwise, failing the merge is a critical error. We should crash
921-
// the process and report a replica corruption.
922-
return result.Result{}, kvpb.MaybeWrapReplicaCorruptionError(ctx, err)
923-
}
952+
return result.Result{}, maybeWrapReplicaCorruptionError(ctx, err)
924953
}
925954
return res, nil
926955
}
@@ -1135,6 +1164,7 @@ func splitTrigger(
11351164
batch storage.Batch,
11361165
bothDeltaMS enginepb.MVCCStats,
11371166
split *roachpb.SplitTrigger,
1167+
in splitTriggerHelperInput,
11381168
ts hlc.Timestamp,
11391169
) (enginepb.MVCCStats, result.Result, error) {
11401170
desc := rec.Desc()
@@ -1210,7 +1240,7 @@ func splitTrigger(
12101240
MaxBytesDiff: MaxMVCCStatBytesDiff.Get(&rec.ClusterSettings().SV),
12111241
UseEstimatesBecauseExternalBytesArePresent: split.UseEstimatesBecauseExternalBytesArePresent,
12121242
}
1213-
return splitTriggerHelper(ctx, rec, batch, h, split, ts)
1243+
return splitTriggerHelper(ctx, rec, batch, in, h, split, ts)
12141244
}
12151245

12161246
// splitScansRightForStatsFirst controls whether the left hand side or the right
@@ -1246,13 +1276,23 @@ func makeScanStatsFn(
12461276
}
12471277
}
12481278

1279+
// splitTriggerHelperInput contains metadata needed by the RHS when running the
1280+
// splitTriggerHelper.
1281+
type splitTriggerHelperInput struct {
1282+
leftLease roachpb.Lease
1283+
gcThreshold *hlc.Timestamp
1284+
gcHint *roachpb.GCHint
1285+
replicaVersion roachpb.Version
1286+
}
1287+
12491288
// splitTriggerHelper continues the work begun by splitTrigger, but has a
12501289
// reduced scope that has all stats-related concerns bundled into a
12511290
// splitStatsHelper.
12521291
func splitTriggerHelper(
12531292
ctx context.Context,
12541293
rec EvalContext,
12551294
batch storage.Batch,
1295+
in splitTriggerHelperInput,
12561296
statsInput splitStatsHelperInput,
12571297
split *roachpb.SplitTrigger,
12581298
ts hlc.Timestamp,
@@ -1394,27 +1434,22 @@ func splitTriggerHelper(
13941434
// - node two becomes the lease holder for [c,e). Its timestamp cache does
13951435
// not know about the read at 'd' which happened at the beginning.
13961436
// - node two can illegally propose a write to 'd' at a lower timestamp.
1397-
sl := MakeStateLoader(rec)
1398-
leftLease, err := sl.LoadLease(ctx, batch)
1399-
if err != nil {
1400-
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to load lease")
1401-
}
1402-
if leftLease.Empty() {
1437+
if in.leftLease.Empty() {
14031438
log.KvExec.Fatalf(ctx, "LHS of split has no lease")
14041439
}
14051440

14061441
// Copy the lease from the left-hand side of the split over to the
14071442
// right-hand side so that it can immediately start serving requests.
14081443
// When doing so, we need to make a few modifications.
1409-
rightLease := leftLease
1444+
rightLease := in.leftLease
14101445
// Rebind the lease to the existing leaseholder store's replica from the
14111446
// right-hand side's descriptor.
14121447
var ok bool
1413-
rightLease.Replica, ok = split.RightDesc.GetReplicaDescriptor(leftLease.Replica.StoreID)
1448+
rightLease.Replica, ok = split.RightDesc.GetReplicaDescriptor(in.leftLease.Replica.StoreID)
14141449
if !ok {
14151450
return enginepb.MVCCStats{}, result.Result{}, errors.Errorf(
14161451
"pre-split lease holder %+v not found in post-split descriptor %+v",
1417-
leftLease.Replica, split.RightDesc,
1452+
in.leftLease.Replica, split.RightDesc,
14181453
)
14191454
}
14201455
// Convert leader leases into expiration-based leases. A leader lease is
@@ -1429,18 +1464,9 @@ func splitTriggerHelper(
14291464
rightLease.Term = 0
14301465
rightLease.MinExpiration = hlc.Timestamp{}
14311466
}
1432-
1433-
gcThreshold, err := sl.LoadGCThreshold(ctx, batch)
1434-
if err != nil {
1435-
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to load GCThreshold")
1436-
}
1437-
if gcThreshold.IsEmpty() {
1467+
if in.gcThreshold.IsEmpty() {
14381468
log.VEventf(ctx, 1, "LHS's GCThreshold of split is not set")
14391469
}
1440-
gcHint, err := sl.LoadGCHint(ctx, batch)
1441-
if err != nil {
1442-
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to load GCHint")
1443-
}
14441470

14451471
// Writing the initial state is subtle since this also seeds the Raft
14461472
// group. It becomes more subtle due to proposer-evaluated Raft.
@@ -1471,13 +1497,9 @@ func splitTriggerHelper(
14711497
// HardState via a call to synthesizeRaftState. Here, we only call
14721498
// writeInitialReplicaState which essentially writes a ReplicaState
14731499
// only.
1474-
replicaVersion, err := sl.LoadVersion(ctx, batch)
1475-
if err != nil {
1476-
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to load replica version")
1477-
}
14781500
if *h.AbsPostSplitRight(), err = stateloader.WriteInitialReplicaState(
14791501
ctx, batch, *h.AbsPostSplitRight(), split.RightDesc, rightLease,
1480-
*gcThreshold, *gcHint, replicaVersion,
1502+
*in.gcThreshold, *in.gcHint, in.replicaVersion,
14811503
); err != nil {
14821504
return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, "unable to write initial Replica state")
14831505
}

pkg/kv/kvserver/batcheval/cmd_end_transaction_test.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2203,9 +2203,16 @@ func TestSplitTriggerWritesInitialReplicaState(t *testing.T) {
22032203
err = sl.SetVersion(ctx, batch, nil, &version)
22042204
require.NoError(t, err)
22052205

2206+
in := splitTriggerHelperInput{
2207+
leftLease: lease,
2208+
gcThreshold: &gcThreshold,
2209+
gcHint: &gcHint,
2210+
replicaVersion: version,
2211+
}
2212+
22062213
// Run the split trigger, which is normally run as a subset of EndTxn request
22072214
// evaluation.
2208-
_, _, err = splitTrigger(ctx, rec, batch, enginepb.MVCCStats{}, split, hlc.Timestamp{})
2215+
_, _, err = splitTrigger(ctx, rec, batch, enginepb.MVCCStats{}, split, in, hlc.Timestamp{})
22092216
require.NoError(t, err)
22102217

22112218
// Verify that range state was migrated to the right-hand side properly.

0 commit comments

Comments
 (0)