@@ -69,6 +69,7 @@ import (
69
69
"github.com/cockroachdb/cockroach/pkg/util/log/severity"
70
70
"github.com/cockroachdb/cockroach/pkg/util/metamorphic"
71
71
"github.com/cockroachdb/cockroach/pkg/util/metric"
72
+ "github.com/cockroachdb/cockroach/pkg/util/retry"
72
73
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
73
74
"github.com/cockroachdb/cockroach/pkg/util/tracing"
74
75
"github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
@@ -2560,18 +2561,18 @@ func (ex *connExecutor) commitSQLTransactionInternal(ctx context.Context) (retEr
2560
2561
2561
2562
ex .extraTxnState .prepStmtsNamespace .closePortals (ctx , & ex .extraTxnState .prepStmtsNamespaceMemAcc )
2562
2563
2563
- // We need to step the transaction's internal read sequence before committing
2564
- // if it has stepping enabled. If it doesn't have stepping enabled, then we
2565
- // just set the stepping mode back to what it was.
2564
+ // We need to step the transaction's read sequence before committing if it has
2565
+ // stepping enabled. If it doesn't have stepping enabled, then we just set the
2566
+ // stepping mode back to what it was.
2566
2567
//
2567
- // Even if we do step the transaction's internal read sequence, we do not
2568
- // advance its external read timestamp (applicable only to read committed
2569
- // transactions). This is because doing so is not needed before committing,
2570
- // and it would cause the transaction to commit at a higher timestamp than
2571
- // necessary. On heavily contended workloads like the one from #109628, this
2572
- // can cause unnecessary write-write contention between transactions by
2573
- // inflating the contention footprint of each transaction (i.e. the duration
2574
- // measured in MVCC time that the transaction holds locks).
2568
+ // Even if we do step the transaction's read sequence, we do not advance its
2569
+ // read timestamp (applicable only to read committed transactions). This is
2570
+ // because doing so is not needed before committing, and it would cause the
2571
+ // transaction to commit at a higher timestamp than necessary. On heavily
2572
+ // contended workloads like the one from #109628, this can cause unnecessary
2573
+ // write-write contention between transactions by inflating the contention
2574
+ // footprint of each transaction (i.e. the duration measured in MVCC time that
2575
+ // the transaction holds locks).
2575
2576
prevSteppingMode := ex .state .mu .txn .ConfigureStepping (ctx , kv .SteppingEnabled )
2576
2577
if prevSteppingMode == kv .SteppingEnabled {
2577
2578
if err := ex .state .mu .txn .Step (ctx , false /* allowReadTimestampStep */ ); err != nil {
@@ -2722,6 +2723,14 @@ func (ex *connExecutor) rollbackSQLTransaction(
2722
2723
func (ex * connExecutor ) dispatchReadCommittedStmtToExecutionEngine (
2723
2724
ctx context.Context , p * planner , res RestrictedCommandResult ,
2724
2725
) error {
2726
+ if ex .executorType == executorTypeInternal {
2727
+ // Because we step the read timestamp below, this is not safe to call within
2728
+ // internal executor.
2729
+ return errors .AssertionFailedf (
2730
+ "call of dispatchReadCommittedStmtToExecutionEngine within internal executor" ,
2731
+ )
2732
+ }
2733
+
2725
2734
getPausablePortalInfo := func () * portalPauseInfo {
2726
2735
if p != nil && p .pausablePortal != nil {
2727
2736
return p .pausablePortal .pauseInfo
@@ -2738,8 +2747,19 @@ func (ex *connExecutor) dispatchReadCommittedStmtToExecutionEngine(
2738
2747
return err
2739
2748
}
2740
2749
2750
+ // Use retry with exponential backoff and full jitter to reduce collisions for
2751
+ // high-contention workloads. See https://en.wikipedia.org/wiki/Exponential_backoff and
2752
+ // https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
2741
2753
maxRetries := int (ex .sessionData ().MaxRetriesForReadCommitted )
2742
- for attemptNum := 0 ; ; attemptNum ++ {
2754
+ initialBackoff := ex .sessionData ().InitialRetryBackoffForReadCommitted
2755
+ useBackoff := initialBackoff > 0
2756
+ opts := retry.Options {
2757
+ InitialBackoff : initialBackoff ,
2758
+ MaxBackoff : 1024 * initialBackoff ,
2759
+ Multiplier : 2.0 ,
2760
+ RandomizationFactor : 1.0 ,
2761
+ }
2762
+ for attemptNum , r := 0 , retry .StartWithCtx (ctx , opts ); ! useBackoff || r .Next (); attemptNum ++ {
2743
2763
// TODO(99410): Fix the phase time for pausable portals.
2744
2764
startExecTS := crtime .NowMono ()
2745
2765
ex .statsCollector .PhaseTimes ().SetSessionPhaseTime (sessionphase .PlannerMostRecentStartExecStmt , startExecTS )
@@ -2749,6 +2769,15 @@ func (ex *connExecutor) dispatchReadCommittedStmtToExecutionEngine(
2749
2769
ex .sessionTracing .TraceRetryInformation (
2750
2770
ctx , "statement" , p .autoRetryStmtCounter , p .autoRetryStmtReason ,
2751
2771
)
2772
+ // Step both the sequence number and the read timestamp so that we can see
2773
+ // the results of the conflicting transactions that caused us to fail and
2774
+ // any other transactions that occurred in the meantime.
2775
+ if err := ex .state .mu .txn .Step (ctx , true /* allowReadTimestampStep */ ); err != nil {
2776
+ return err
2777
+ }
2778
+ // Also step statement_timestamp so that any SQL using it is up-to-date.
2779
+ stmtTS := ex .server .cfg .Clock .PhysicalTime ()
2780
+ p .extendedEvalCtx .StmtTimestamp = stmtTS
2752
2781
}
2753
2782
bufferPos := res .BufferedResultsLen ()
2754
2783
if err = ex .dispatchToExecutionEngine (ctx , p , res ); err != nil {
@@ -2802,9 +2831,6 @@ func (ex *connExecutor) dispatchReadCommittedStmtToExecutionEngine(
2802
2831
if err := ex .state .mu .txn .PrepareForPartialRetry (ctx ); err != nil {
2803
2832
return err
2804
2833
}
2805
- if err := ex .state .mu .txn .Step (ctx , false /* allowReadTimestampStep */ ); err != nil {
2806
- return err
2807
- }
2808
2834
p .autoRetryStmtCounter ++
2809
2835
p .autoRetryStmtReason = maybeRetriableErr
2810
2836
if ppInfo := getPausablePortalInfo (); ppInfo != nil {
@@ -2813,6 +2839,14 @@ func (ex *connExecutor) dispatchReadCommittedStmtToExecutionEngine(
2813
2839
}
2814
2840
ex .metrics .EngineMetrics .StatementRetryCount .Inc (1 )
2815
2841
}
2842
+ // Check if we exited the loop due to cancelation.
2843
+ if useBackoff {
2844
+ select {
2845
+ case <- ctx .Done ():
2846
+ res .SetError (cancelchecker .QueryCanceledError )
2847
+ default :
2848
+ }
2849
+ }
2816
2850
return nil
2817
2851
}
2818
2852
0 commit comments