@@ -882,45 +882,74 @@ func RunCommitTrigger(
882
882
"commit wait. Was its timestamp bumped after acquiring latches?" , txn , ct .Kind ())
883
883
}
884
884
885
+ // Used by both splits and merges.
886
+ maybeWrapReplicaCorruptionError := func (ctx context.Context , err error ) error {
887
+ if err == nil {
888
+ log .KvExec .Fatalf (ctx , "unexpected nil error" )
889
+ }
890
+ if info := pebble .ExtractDataCorruptionInfo (err ); info != nil {
891
+ // Data corruption errors due to external SSTable references getting
892
+ // deleted should not be wrapped in replica corruption errors. This
893
+ // ensures that we simply fail the split or merge and propagate the error,
894
+ // but don't crash the process. In such cases, an excise command should be
895
+ // used to get out of this data corruption situation.
896
+ return err
897
+ }
898
+ // Otherwise, fail the split or merge with a critical error that crashes the
899
+ // process. Reporting a replica corruption error ensures this. See
900
+ // setCorruptRaftMuLocked.
901
+ return kvpb .MaybeWrapReplicaCorruptionError (ctx , err )
902
+ }
903
+
885
904
// Stage the commit trigger's side-effects so that they will go into effect on
886
905
// each Replica when the corresponding Raft log entry is applied. Only one
887
906
// commit trigger can be set.
888
907
if ct .GetSplitTrigger () != nil {
908
+ sl := MakeStateLoader (rec )
909
+ lhsLease , err := sl .LoadLease (ctx , batch )
910
+ if err != nil {
911
+ return result.Result {}, maybeWrapReplicaCorruptionError (
912
+ ctx , errors .Wrap (err , "unable to load lease" ),
913
+ )
914
+ }
915
+ gcThreshold , err := sl .LoadGCThreshold (ctx , batch )
916
+ if err != nil {
917
+ return result.Result {}, maybeWrapReplicaCorruptionError (
918
+ ctx , errors .Wrap (err , "unable to load GCThreshold" ),
919
+ )
920
+ }
921
+ gcHint , err := sl .LoadGCHint (ctx , batch )
922
+ if err != nil {
923
+ return result.Result {}, maybeWrapReplicaCorruptionError (
924
+ ctx , errors .Wrap (err , "unable to load GCHint" ),
925
+ )
926
+ }
927
+ replicaVersion , err := sl .LoadVersion (ctx , batch )
928
+ if err != nil {
929
+ return result.Result {}, maybeWrapReplicaCorruptionError (
930
+ ctx , errors .Wrap (err , "unable to load replica version" ),
931
+ )
932
+ }
933
+ in := splitTriggerHelperInput {
934
+ leftLease : lhsLease ,
935
+ gcThreshold : gcThreshold ,
936
+ gcHint : gcHint ,
937
+ replicaVersion : replicaVersion ,
938
+ }
939
+
889
940
newMS , res , err := splitTrigger (
890
- ctx , rec , batch , * ms , ct .SplitTrigger , txn .WriteTimestamp ,
941
+ ctx , rec , batch , * ms , ct .SplitTrigger , in , txn .WriteTimestamp ,
891
942
)
892
943
if err != nil {
893
- if info := pebble .ExtractDataCorruptionInfo (err ); info != nil {
894
- // We want to handle the data corruption error here because it's possible
895
- // that a file that an external SSTable references got deleted. We want to
896
- // fail the split and propagate the error, but we don't want to crash the
897
- // process. An excise command could be used to get out of this data
898
- // corruption.
899
- return result.Result {}, err
900
- } else {
901
- // Otherwise, failing the split is a critical error. We should crash
902
- // the process and report a replica corruption.
903
- return result.Result {}, kvpb .MaybeWrapReplicaCorruptionError (ctx , err )
904
- }
944
+ return result.Result {}, maybeWrapReplicaCorruptionError (ctx , err )
905
945
}
906
946
* ms = newMS
907
947
return res , nil
908
948
}
909
949
if mt := ct .GetMergeTrigger (); mt != nil {
910
950
res , err := mergeTrigger (ctx , rec , batch , ms , mt , txn .WriteTimestamp )
911
951
if err != nil {
912
- if info := pebble .ExtractDataCorruptionInfo (err ); info != nil {
913
- // We want to handle the data corruption error here because it's
914
- // possible that a file that an external SSTable references got deleted.
915
- // We want to fail the merge and propagate the error, but we don't want
916
- // to crash the process. An excise command could be used to get out of
917
- // this data corruption.
918
- return result.Result {}, err
919
- } else {
920
- // Otherwise, failing the merge is a critical error. We should crash
921
- // the process and report a replica corruption.
922
- return result.Result {}, kvpb .MaybeWrapReplicaCorruptionError (ctx , err )
923
- }
952
+ return result.Result {}, maybeWrapReplicaCorruptionError (ctx , err )
924
953
}
925
954
return res , nil
926
955
}
@@ -1135,6 +1164,7 @@ func splitTrigger(
1135
1164
batch storage.Batch ,
1136
1165
bothDeltaMS enginepb.MVCCStats ,
1137
1166
split * roachpb.SplitTrigger ,
1167
+ in splitTriggerHelperInput ,
1138
1168
ts hlc.Timestamp ,
1139
1169
) (enginepb.MVCCStats , result.Result , error ) {
1140
1170
desc := rec .Desc ()
@@ -1210,7 +1240,7 @@ func splitTrigger(
1210
1240
MaxBytesDiff : MaxMVCCStatBytesDiff .Get (& rec .ClusterSettings ().SV ),
1211
1241
UseEstimatesBecauseExternalBytesArePresent : split .UseEstimatesBecauseExternalBytesArePresent ,
1212
1242
}
1213
- return splitTriggerHelper (ctx , rec , batch , h , split , ts )
1243
+ return splitTriggerHelper (ctx , rec , batch , in , h , split , ts )
1214
1244
}
1215
1245
1216
1246
// splitScansRightForStatsFirst controls whether the left hand side or the right
@@ -1246,13 +1276,23 @@ func makeScanStatsFn(
1246
1276
}
1247
1277
}
1248
1278
1279
+ // splitTriggerHelperInput contains metadata needed by the RHS when running the
1280
+ // splitTriggerHelper.
1281
+ type splitTriggerHelperInput struct {
1282
+ leftLease roachpb.Lease
1283
+ gcThreshold * hlc.Timestamp
1284
+ gcHint * roachpb.GCHint
1285
+ replicaVersion roachpb.Version
1286
+ }
1287
+
1249
1288
// splitTriggerHelper continues the work begun by splitTrigger, but has a
1250
1289
// reduced scope that has all stats-related concerns bundled into a
1251
1290
// splitStatsHelper.
1252
1291
func splitTriggerHelper (
1253
1292
ctx context.Context ,
1254
1293
rec EvalContext ,
1255
1294
batch storage.Batch ,
1295
+ in splitTriggerHelperInput ,
1256
1296
statsInput splitStatsHelperInput ,
1257
1297
split * roachpb.SplitTrigger ,
1258
1298
ts hlc.Timestamp ,
@@ -1394,27 +1434,22 @@ func splitTriggerHelper(
1394
1434
// - node two becomes the lease holder for [c,e). Its timestamp cache does
1395
1435
// not know about the read at 'd' which happened at the beginning.
1396
1436
// - node two can illegally propose a write to 'd' at a lower timestamp.
1397
- sl := MakeStateLoader (rec )
1398
- leftLease , err := sl .LoadLease (ctx , batch )
1399
- if err != nil {
1400
- return enginepb.MVCCStats {}, result.Result {}, errors .Wrap (err , "unable to load lease" )
1401
- }
1402
- if leftLease .Empty () {
1437
+ if in .leftLease .Empty () {
1403
1438
log .KvExec .Fatalf (ctx , "LHS of split has no lease" )
1404
1439
}
1405
1440
1406
1441
// Copy the lease from the left-hand side of the split over to the
1407
1442
// right-hand side so that it can immediately start serving requests.
1408
1443
// When doing so, we need to make a few modifications.
1409
- rightLease := leftLease
1444
+ rightLease := in . leftLease
1410
1445
// Rebind the lease to the existing leaseholder store's replica from the
1411
1446
// right-hand side's descriptor.
1412
1447
var ok bool
1413
- rightLease .Replica , ok = split .RightDesc .GetReplicaDescriptor (leftLease .Replica .StoreID )
1448
+ rightLease .Replica , ok = split .RightDesc .GetReplicaDescriptor (in . leftLease .Replica .StoreID )
1414
1449
if ! ok {
1415
1450
return enginepb.MVCCStats {}, result.Result {}, errors .Errorf (
1416
1451
"pre-split lease holder %+v not found in post-split descriptor %+v" ,
1417
- leftLease .Replica , split .RightDesc ,
1452
+ in . leftLease .Replica , split .RightDesc ,
1418
1453
)
1419
1454
}
1420
1455
// Convert leader leases into expiration-based leases. A leader lease is
@@ -1429,18 +1464,9 @@ func splitTriggerHelper(
1429
1464
rightLease .Term = 0
1430
1465
rightLease .MinExpiration = hlc.Timestamp {}
1431
1466
}
1432
-
1433
- gcThreshold , err := sl .LoadGCThreshold (ctx , batch )
1434
- if err != nil {
1435
- return enginepb.MVCCStats {}, result.Result {}, errors .Wrap (err , "unable to load GCThreshold" )
1436
- }
1437
- if gcThreshold .IsEmpty () {
1467
+ if in .gcThreshold .IsEmpty () {
1438
1468
log .VEventf (ctx , 1 , "LHS's GCThreshold of split is not set" )
1439
1469
}
1440
- gcHint , err := sl .LoadGCHint (ctx , batch )
1441
- if err != nil {
1442
- return enginepb.MVCCStats {}, result.Result {}, errors .Wrap (err , "unable to load GCHint" )
1443
- }
1444
1470
1445
1471
// Writing the initial state is subtle since this also seeds the Raft
1446
1472
// group. It becomes more subtle due to proposer-evaluated Raft.
@@ -1471,13 +1497,9 @@ func splitTriggerHelper(
1471
1497
// HardState via a call to synthesizeRaftState. Here, we only call
1472
1498
// writeInitialReplicaState which essentially writes a ReplicaState
1473
1499
// only.
1474
- replicaVersion , err := sl .LoadVersion (ctx , batch )
1475
- if err != nil {
1476
- return enginepb.MVCCStats {}, result.Result {}, errors .Wrap (err , "unable to load replica version" )
1477
- }
1478
1500
if * h .AbsPostSplitRight (), err = stateloader .WriteInitialReplicaState (
1479
1501
ctx , batch , * h .AbsPostSplitRight (), split .RightDesc , rightLease ,
1480
- * gcThreshold , * gcHint , replicaVersion ,
1502
+ * in . gcThreshold , * in . gcHint , in . replicaVersion ,
1481
1503
); err != nil {
1482
1504
return enginepb.MVCCStats {}, result.Result {}, errors .Wrap (err , "unable to write initial Replica state" )
1483
1505
}
0 commit comments