Skip to content

Commit e48e814

Browse files
committed
sql: add exponential backoff to read committed stmt retry loop
Testing has shown that adding exponential backoff significantly improves throughput of highly contentious read committed workloads. Informs: #145377 Release note (sql change): Add session variable `initial_retry_backoff_for_read_committed` which controls the initial backoff duration when retrying an individual statement in an explicit READ COMMITTED transaction. A duration of 0 disables exponential backoff. If a statement in an explicit READ COMMITTED transaction is failing with the following 40001 error: ``` ERROR: restart transaction: read committed retry limit exceeded; set by max_retries_for_read_committed=... ``` Then `initial_retry_backoff_for_read_committed` should be set to a duration proportional to the typical execution time of the statement (in addition to also increasing `max_retries_for_read_committed`).
1 parent d6578e5 commit e48e814

File tree

8 files changed

+66
-5
lines changed

8 files changed

+66
-5
lines changed

pkg/sql/conn_executor_exec.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ import (
6969
"github.com/cockroachdb/cockroach/pkg/util/log/severity"
7070
"github.com/cockroachdb/cockroach/pkg/util/metamorphic"
7171
"github.com/cockroachdb/cockroach/pkg/util/metric"
72+
"github.com/cockroachdb/cockroach/pkg/util/retry"
7273
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
7374
"github.com/cockroachdb/cockroach/pkg/util/tracing"
7475
"github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
@@ -2746,8 +2747,19 @@ func (ex *connExecutor) dispatchReadCommittedStmtToExecutionEngine(
27462747
return err
27472748
}
27482749

2750+
// Use retry with exponential backoff and full jitter to reduce collisions for
2751+
// high-contention workloads. See https://en.wikipedia.org/wiki/Exponential_backoff and
2752+
// https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
27492753
maxRetries := int(ex.sessionData().MaxRetriesForReadCommitted)
2750-
for attemptNum := 0; ; attemptNum++ {
2754+
initialBackoff := ex.sessionData().InitialRetryBackoffForReadCommitted
2755+
useBackoff := initialBackoff > 0
2756+
opts := retry.Options{
2757+
InitialBackoff: initialBackoff,
2758+
MaxBackoff: 1024 * initialBackoff,
2759+
Multiplier: 2.0,
2760+
RandomizationFactor: 1.0,
2761+
}
2762+
for attemptNum, r := 0, retry.StartWithCtx(ctx, opts); !useBackoff || r.Next(); attemptNum++ {
27512763
// TODO(99410): Fix the phase time for pausable portals.
27522764
startExecTS := crtime.NowMono()
27532765
ex.statsCollector.PhaseTimes().SetSessionPhaseTime(sessionphase.PlannerMostRecentStartExecStmt, startExecTS)
@@ -2767,7 +2779,6 @@ func (ex *connExecutor) dispatchReadCommittedStmtToExecutionEngine(
27672779
stmtTS := ex.server.cfg.Clock.PhysicalTime()
27682780
p.extendedEvalCtx.StmtTimestamp = stmtTS
27692781
}
2770-
27712782
bufferPos := res.BufferedResultsLen()
27722783
if err = ex.dispatchToExecutionEngine(ctx, p, res); err != nil {
27732784
return err
@@ -2828,6 +2839,14 @@ func (ex *connExecutor) dispatchReadCommittedStmtToExecutionEngine(
28282839
}
28292840
ex.metrics.EngineMetrics.StatementRetryCount.Inc(1)
28302841
}
2842+
// Check if we exited the loop due to cancelation.
2843+
if useBackoff {
2844+
select {
2845+
case <-ctx.Done():
2846+
res.SetError(cancelchecker.QueryCanceledError)
2847+
default:
2848+
}
2849+
}
28312850
return nil
28322851
}
28332852

pkg/sql/exec_util.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4186,6 +4186,10 @@ func (m *sessionDataMutator) SetOptimizerUseExistsFilterHoistRule(val bool) {
41864186
m.data.OptimizerUseExistsFilterHoistRule = val
41874187
}
41884188

4189+
func (m *sessionDataMutator) SetInitialRetryBackoffForReadCommitted(val time.Duration) {
4190+
m.data.InitialRetryBackoffForReadCommitted = val
4191+
}
4192+
41894193
// Utility functions related to scrubbing sensitive information on SQL Stats.
41904194

41914195
// quantizeCounts ensures that the Count field in the

pkg/sql/explain_bundle.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1096,7 +1096,7 @@ func (c *stmtEnvCollector) PrintSessionSettings(w io.Writer, sv *settings.Values
10961096
switch varName {
10971097
case "idle_in_session_timeout", "idle_in_transaction_session_timeout",
10981098
"idle_session_timeout", "lock_timeout", "deadlock_timeout",
1099-
"statement_timeout", "transaction_timeout":
1099+
"statement_timeout", "transaction_timeout", "initial_retry_backoff_for_read_committed":
11001100
// Defaults for timeout settings are of the duration type (i.e.
11011101
// "0s"), so we'll parse it to extract the number of
11021102
// milliseconds (which is what the session variable uses).

pkg/sql/logictest/testdata/logic_test/information_schema

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4011,6 +4011,7 @@ idle_in_transaction_session_timeout 0
40114011
idle_session_timeout 0
40124012
index_join_streamer_batch_size 8.0 MiB
40134013
index_recommendations_enabled off
4014+
initial_retry_backoff_for_read_committed 0
40144015
inject_retry_errors_enabled off
40154016
inject_retry_errors_on_commit_enabled off
40164017
integer_datetimes on

pkg/sql/logictest/testdata/logic_test/pg_catalog

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3005,6 +3005,7 @@ idle_in_transaction_session_timeout 0 N
30053005
idle_session_timeout 0 NULL NULL NULL string
30063006
index_join_streamer_batch_size 8.0 MiB NULL NULL NULL string
30073007
index_recommendations_enabled off NULL NULL NULL string
3008+
initial_retry_backoff_for_read_committed 0 NULL NULL NULL string
30083009
inject_retry_errors_enabled off NULL NULL NULL string
30093010
inject_retry_errors_on_commit_enabled off NULL NULL NULL string
30103011
integer_datetimes on NULL NULL NULL string
@@ -3235,6 +3236,7 @@ idle_in_transaction_session_timeout 0 N
32353236
idle_session_timeout 0 NULL user NULL 0s 0s
32363237
index_join_streamer_batch_size 8.0 MiB NULL user NULL 8.0 MiB 8.0 MiB
32373238
index_recommendations_enabled off NULL user NULL on false
3239+
initial_retry_backoff_for_read_committed 0 NULL user NULL 0s 0s
32383240
inject_retry_errors_enabled off NULL user NULL off off
32393241
inject_retry_errors_on_commit_enabled off NULL user NULL off off
32403242
integer_datetimes on NULL user NULL on on
@@ -3453,6 +3455,7 @@ idle_in_transaction_session_timeout NULL NULL NULL
34533455
idle_session_timeout NULL NULL NULL NULL NULL
34543456
index_join_streamer_batch_size NULL NULL NULL NULL NULL
34553457
index_recommendations_enabled NULL NULL NULL NULL NULL
3458+
initial_retry_backoff_for_read_committed NULL NULL NULL NULL NULL
34563459
inject_retry_errors_enabled NULL NULL NULL NULL NULL
34573460
inject_retry_errors_on_commit_enabled NULL NULL NULL NULL NULL
34583461
integer_datetimes NULL NULL NULL NULL NULL

pkg/sql/logictest/testdata/logic_test/show_source

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ idle_in_transaction_session_timeout 0
119119
idle_session_timeout 0
120120
index_join_streamer_batch_size 8.0 MiB
121121
index_recommendations_enabled off
122+
initial_retry_backoff_for_read_committed 0
122123
inject_retry_errors_enabled off
123124
inject_retry_errors_on_commit_enabled off
124125
integer_datetimes on

pkg/sql/sessiondatapb/local_only_session_data.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,10 @@ message LocalOnlySessionData {
670670
// OptimizerUseExistsFilterHoistRule, when true, causes the optimizer to apply
671671
// the HoistUnboundFilterFromExistsSubquery rule to EXISTS conditions.
672672
bool optimizer_use_exists_filter_hoist_rule = 170;
673+
// InitialRetryBackoffForReadCommitted controls the initial backoff
674+
// duration for automatic retries of statements in explicit READ COMMITTED
675+
// transactions that see a transaction retry error. 0 disables backoff.
676+
int64 initial_retry_backoff_for_read_committed = 171 [(gogoproto.casttype) = "time.Duration"];
673677
///////////////////////////////////////////////////////////////////////////
674678
// WARNING: consider whether a session parameter you're adding needs to //
675679
// be propagated to the remote nodes. If so, that parameter should live //

pkg/sql/vars.go

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2248,8 +2248,10 @@ var varGen = map[string]sessionVar{
22482248
},
22492249

22502250
// CockroachDB extension. Configures the maximum number of automatic retries
2251-
// to perform for statements in explicit READ COMMITTED transactions that
2252-
// see a transaction retry error.
2251+
// to perform for statements in explicit READ COMMITTED transactions that see
2252+
// a transaction retry error. (See also
2253+
// initial_retry_backoff_for_read_committed which should be tuned with
2254+
// max_retries_for_read_committed.)
22532255
`max_retries_for_read_committed`: {
22542256
GetStringVal: makeIntGetStringValFn(`max_retries_for_read_committed`),
22552257
Set: func(_ context.Context, m sessionDataMutator, s string) error {
@@ -4034,6 +4036,33 @@ var varGen = map[string]sessionVar{
40344036
},
40354037
GlobalDefault: globalTrue,
40364038
},
4039+
4040+
// CockroachDB extension. Configures the initial backoff duration for
4041+
// automatic retries of statements in explicit READ COMMITTED transactions
4042+
// that see a transaction retry error. For statements experiencing contention
4043+
// under READ COMMITTED isolation, this should be set to a duration
4044+
// proportional to the typical execution time of the statement (in addition to
4045+
// also increasing `max_retries_for_read_committed`).
4046+
`initial_retry_backoff_for_read_committed`: {
4047+
GetStringVal: makeTimeoutVarGetter(`initial_retry_backoff_for_read_committed`),
4048+
Set: func(_ context.Context, m sessionDataMutator, s string) error {
4049+
duration, err := validateTimeoutVar(m.data.GetIntervalStyle(), s,
4050+
"initial_retry_backoff_for_read_committed",
4051+
)
4052+
if err != nil {
4053+
return err
4054+
}
4055+
m.SetInitialRetryBackoffForReadCommitted(duration)
4056+
return nil
4057+
},
4058+
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
4059+
ms := evalCtx.SessionData().InitialRetryBackoffForReadCommitted.Nanoseconds() / int64(time.Millisecond)
4060+
return strconv.FormatInt(ms, 10), nil
4061+
},
4062+
GlobalDefault: func(sv *settings.Values) string {
4063+
return "0s"
4064+
},
4065+
},
40374066
}
40384067

40394068
func ReplicationModeFromString(s string) (sessiondatapb.ReplicationMode, error) {

0 commit comments

Comments
 (0)