Skip to content

Commit 782ff75

Browse files
committed
retry: add comprehensive documentation and tests for retry policy
Document LimitBackoffRetryPolicy, ExpBackoffRetryPolicy, and Vargo adapter with detailed examples. Preserve backward compatibility by making WithMaxRetries(ctx, 0) mean unlimited retries (original behavior). Add WithNoRetries() for disabling retries and introduce sentinel constants. Enhance RetryFunc documentation to clarify return value semantics and add additional test cases.
1 parent d408129 commit 782ff75

File tree

5 files changed

+468
-16
lines changed

5 files changed

+468
-16
lines changed

crdb/common.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,21 @@ func ExecuteInTx(ctx context.Context, tx Tx, fn func() error) (err error) {
8989

9090
// We have a retryable error. Check the retry policy.
9191
delay, retryErr := retryFunc(err)
92+
// Check if the context has been cancelled
93+
if err := ctx.Err(); err != nil {
94+
return err
95+
}
9296
if delay > 0 && retryErr == nil {
93-
// We don't want to hold locks while waiting for a backoff, so restart the entire transaction
97+
// When backoff is needed, we don't want to hold locks while waiting for a backoff,
98+
// so restart the entire transaction:
99+
// - tx.Exec(ctx, "ROLLBACK") sends SQL to the server:
100+
// it doesn't call tx.Rollback() (which would close the Go sql.Tx object)
101+
// - The underlying connection remains open: the *sql.Tx wrapper maintains the database connection.
102+
// Only the server-side transaction is rolled back.
103+
// - tx.Exec(ctx, "BEGIN") starts a new server-side transaction on the same connection wrapped by the
104+
// same *sql.Tx object
105+
// - The defer handles cleanup - It calls tx.Rollback() (the Go method) only on errors,
106+
// which closes the Go object and returns the connection to the pool
94107
if restartErr := tx.Exec(ctx, "ROLLBACK"); restartErr != nil {
95108
return newTxnRestartError(restartErr, err)
96109
}

crdb/retry.go

Lines changed: 185 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,22 @@ import (
2020

2121
// RetryFunc owns the state for a transaction retry operation. Usually, this is
2222
// just the retry count. RetryFunc is not assumed to be safe for concurrent use.
23+
//
24+
// The function is called after each retryable error to determine whether to
25+
// retry and how long to wait. It receives the retryable error that triggered
26+
// the retry attempt.
27+
//
28+
// Return values:
29+
// - duration: The delay to wait before the next retry attempt. If 0, retry
30+
// immediately without delay.
31+
// - error: If non-nil, stops retrying and returns this error to the caller
32+
// (typically a MaxRetriesExceededError). If nil, the retry will proceed
33+
// after the specified duration.
34+
//
35+
// Example behavior:
36+
// - (100ms, nil): Wait 100ms, then retry
37+
// - (0, nil): Retry immediately
38+
// - (0, err): Stop retrying, return err to caller
2339
type RetryFunc func(err error) (time.Duration, error)
2440

2541
// RetryPolicy constructs a new instance of a RetryFunc for each transaction
@@ -29,31 +45,156 @@ type RetryPolicy interface {
2945
NewRetry() RetryFunc
3046
}
3147

48+
const (
49+
// NoRetries is a sentinel value for LimitBackoffRetryPolicy.RetryLimit
50+
// indicating that no retries should be attempted. When a policy has
51+
// RetryLimit set to NoRetries, the transaction will be attempted only
52+
// once, and any retryable error will immediately return a
53+
// MaxRetriesExceededError.
54+
//
55+
// Use WithNoRetries(ctx) to create a context with this behavior.
56+
NoRetries = -1
57+
58+
// UnlimitedRetries indicates that retries should continue indefinitely
59+
// until the transaction succeeds or a non-retryable error occurs. This
60+
// is represented by setting RetryLimit to 0.
61+
//
62+
// Use WithMaxRetries(ctx, 0) to create a context with unlimited retries,
63+
// though this is generally not recommended in production as it can lead
64+
// to infinite retry loops.
65+
UnlimitedRetries = 0
66+
)
67+
68+
// LimitBackoffRetryPolicy implements RetryPolicy with a configurable retry limit
69+
// and optional constant delay between retries.
70+
//
71+
// The RetryLimit field controls retry behavior:
72+
// - Positive value (e.g., 10): Retry up to that many times before failing
73+
// - UnlimitedRetries (0): Retry indefinitely until success or non-retryable error
74+
// - NoRetries (-1) or any negative value: Do not retry; fail immediately on first retryable error
75+
//
76+
// If Delay is greater than zero, the policy will wait for the specified duration
77+
// between retry attempts.
78+
//
79+
// Example usage with limited retries and no delay:
80+
//
81+
// policy := &LimitBackoffRetryPolicy{
82+
// RetryLimit: 10,
83+
// Delay: 0,
84+
// }
85+
// ctx := crdb.WithRetryPolicy(context.Background(), policy)
86+
// err := crdb.ExecuteTx(ctx, db, nil, func(tx *sql.Tx) error {
87+
// // transaction logic
88+
// })
89+
//
90+
// Example usage with fixed delay between retries:
91+
//
92+
// policy := &LimitBackoffRetryPolicy{
93+
// RetryLimit: 5,
94+
// Delay: 100 * time.Millisecond,
95+
// }
96+
// ctx := crdb.WithRetryPolicy(context.Background(), policy)
97+
//
98+
// Example usage with unlimited retries:
99+
//
100+
// policy := &LimitBackoffRetryPolicy{
101+
// RetryLimit: UnlimitedRetries, // or 0
102+
// Delay: 50 * time.Millisecond,
103+
// }
104+
//
105+
// Note: Convenience functions are available:
106+
// - WithMaxRetries(ctx, n) creates a LimitBackoffRetryPolicy with RetryLimit=n and Delay=0
107+
// - WithNoRetries(ctx) creates a LimitBackoffRetryPolicy with RetryLimit=NoRetries
32108
type LimitBackoffRetryPolicy struct {
109+
// RetryLimit controls the retry behavior:
110+
// - Positive value: Maximum number of retries before returning MaxRetriesExceededError
111+
// - UnlimitedRetries (0): Retry indefinitely
112+
// - NoRetries (-1) or any negative value: Do not retry, fail immediately
33113
RetryLimit int
34-
Delay time.Duration
114+
115+
// Delay is the fixed duration to wait between retry attempts. If 0,
116+
// retries happen immediately without delay.
117+
Delay time.Duration
35118
}
36119

120+
// NewRetry implements RetryPolicy.
37121
func (l *LimitBackoffRetryPolicy) NewRetry() RetryFunc {
38122
tryCount := 0
39123
return func(err error) (time.Duration, error) {
40124
tryCount++
41-
if tryCount > l.RetryLimit {
125+
// Any negative value (including NoRetries) means fail immediately
126+
if l.RetryLimit < UnlimitedRetries {
127+
return 0, newMaxRetriesExceededError(err, 0)
128+
}
129+
// UnlimitedRetries (0) means retry indefinitely, so skip the limit check
130+
// Any positive value enforces the retry limit
131+
if l.RetryLimit > UnlimitedRetries && tryCount > l.RetryLimit {
42132
return 0, newMaxRetriesExceededError(err, l.RetryLimit)
43133
}
44134
return l.Delay, nil
45135
}
46136
}
47137

48-
// ExpBackoffRetryPolicy implements RetryPolicy using an exponential backoff with optional
49-
// saturation.
138+
// ExpBackoffRetryPolicy implements RetryPolicy using an exponential backoff strategy
139+
// where delays double with each retry attempt, with an optional maximum delay cap.
140+
//
141+
// The delay between retries doubles with each attempt, starting from BaseDelay:
142+
// - Retry 1: BaseDelay
143+
// - Retry 2: BaseDelay * 2
144+
// - Retry 3: BaseDelay * 4
145+
// - Retry N: BaseDelay * 2^(N-1)
146+
//
147+
// If MaxDelay is set (> 0), the delay is capped at that value once reached.
148+
// This prevents excessive wait times during high retry counts and provides a
149+
// predictable upper bound for backoff duration.
150+
//
151+
// The policy will retry up to RetryLimit times. When the limit is exceeded or
152+
// if the delay calculation overflows without a MaxDelay set, it returns a
153+
// MaxRetriesExceededError.
154+
//
155+
// Example usage with capped exponential backoff:
156+
//
157+
// policy := &ExpBackoffRetryPolicy{
158+
// RetryLimit: 10,
159+
// BaseDelay: 100 * time.Millisecond,
160+
// MaxDelay: 5 * time.Second,
161+
// }
162+
// ctx := crdb.WithRetryPolicy(context.Background(), policy)
163+
// err := crdb.ExecuteTx(ctx, db, nil, func(tx *sql.Tx) error {
164+
// // transaction logic that may encounter retryable errors
165+
// return tx.ExecContext(ctx, "UPDATE ...")
166+
// })
167+
//
168+
// This configuration produces delays: 100ms, 200ms, 400ms, 800ms, 1.6s, 3.2s,
169+
// then stays at 5s for all subsequent retries.
170+
//
171+
// Example usage with unbounded exponential backoff:
172+
//
173+
// policy := &ExpBackoffRetryPolicy{
174+
// RetryLimit: 5,
175+
// BaseDelay: 1 * time.Second,
176+
// MaxDelay: 0, // no cap
177+
// }
178+
//
179+
// This configuration produces delays: 1s, 2s, 4s, 8s, 16s.
180+
// Note: Setting MaxDelay to 0 means no cap, but be aware that delay overflow
181+
// will cause the policy to fail early.
50182
type ExpBackoffRetryPolicy struct {
183+
// RetryLimit is the maximum number of retries allowed. After this many
184+
// retries, a MaxRetriesExceededError is returned.
51185
RetryLimit int
52-
BaseDelay time.Duration
53-
MaxDelay time.Duration
186+
187+
// BaseDelay is the initial delay before the first retry. Each subsequent
188+
// retry doubles this value: delay = BaseDelay * 2^(attempt-1).
189+
BaseDelay time.Duration
190+
191+
// MaxDelay is the maximum delay cap. If > 0, delays are capped at this
192+
// value once reached. If 0, delays grow unbounded (until overflow, which
193+
// causes early termination).
194+
MaxDelay time.Duration
54195
}
55196

56-
// NewRetry implements RetryPolicy
197+
// NewRetry implements RetryPolicy.
57198
func (l *ExpBackoffRetryPolicy) NewRetry() RetryFunc {
58199
tryCount := 0
59200
return func(err error) (time.Duration, error) {
@@ -78,24 +219,57 @@ func (l *ExpBackoffRetryPolicy) NewRetry() RetryFunc {
78219
}
79220
}
80221

81-
// Vargo converts a go-retry style Delay provider into a RetryPolicy
222+
// Vargo adapts third-party backoff strategies (like those from github.com/sethvargo/go-retry)
223+
// into a RetryPolicy without creating a direct dependency on those libraries.
224+
//
225+
// This function allows you to use any backoff implementation that conforms to the
226+
// VargoBackoff interface, providing flexibility to integrate external retry strategies
227+
// with CockroachDB transaction retries.
228+
//
229+
// Example usage with a hypothetical external backoff library:
230+
//
231+
// import "github.com/sethvargo/go-retry"
232+
//
233+
// // Create a retry policy using an external backoff strategy
234+
// policy := crdb.Vargo(func() crdb.VargoBackoff {
235+
// // Fibonacci backoff: 1s, 1s, 2s, 3s, 5s, 8s...
236+
// return retry.NewFibonacci(1 * time.Second)
237+
// })
238+
// ctx := crdb.WithRetryPolicy(context.Background(), policy)
239+
// err := crdb.ExecuteTx(ctx, db, nil, func(tx *sql.Tx) error {
240+
// // transaction logic
241+
// })
242+
//
243+
// The function parameter should return a fresh VargoBackoff instance for each
244+
// transaction, as backoff state is not safe for concurrent use.
82245
func Vargo(fn func() VargoBackoff) RetryPolicy {
83246
return &vargoAdapter{
84247
DelegateFactory: fn,
85248
}
86249
}
87250

88-
// VargoBackoff allow us to adapt sethvargo/go-retry Backoff policies
89-
// without also creating a transitive dependency on that library.
251+
// VargoBackoff is an interface for external backoff strategies that provide
252+
// delays through a Next() method. This allows adaptation of backoff policies
253+
// from libraries like github.com/sethvargo/go-retry without creating a direct
254+
// dependency.
255+
//
256+
// Next returns the next backoff duration and a boolean indicating whether to
257+
// stop retrying. When stop is true, the retry loop terminates with a
258+
// MaxRetriesExceededError.
90259
type VargoBackoff interface {
260+
// Next returns the next delay duration and whether to stop retrying.
261+
// When stop is true, no more retries will be attempted.
91262
Next() (next time.Duration, stop bool)
92263
}
93264

94-
// vargoAdapter adapts backoff policies in the style of sethvargo/go-retry
265+
// vargoAdapter adapts backoff policies in the style of github.com/sethvargo/go-retry.
95266
type vargoAdapter struct {
96267
DelegateFactory func() VargoBackoff
97268
}
98269

270+
// NewRetry implements RetryPolicy by delegating to the external backoff strategy.
271+
// It creates a fresh backoff instance using DelegateFactory and wraps its Next()
272+
// method to conform to the RetryFunc signature.
99273
func (b *vargoAdapter) NewRetry() RetryFunc {
100274
delegate := b.DelegateFactory()
101275
count := 0

0 commit comments

Comments
 (0)