Skip to content

Commit aa0a380

Browse files
authored
GODRIVER-2457 Retry as many times as possible when Timeout is set. (#1060)
1 parent b2af7b5 commit aa0a380

File tree

8 files changed

+12943
-29
lines changed

8 files changed

+12943
-29
lines changed

mongo/change_stream.go

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,6 @@ func (cs *ChangeStream) createOperationDeployment(server driver.Server, connecti
240240
func (cs *ChangeStream) executeOperation(ctx context.Context, resuming bool) error {
241241
var server driver.Server
242242
var conn driver.Connection
243-
var err error
244243

245244
if server, cs.err = cs.client.deployment.SelectServer(ctx, cs.selector); cs.err != nil {
246245
return cs.Err()
@@ -284,48 +283,65 @@ func (cs *ChangeStream) executeOperation(ctx context.Context, resuming bool) err
284283
// Cancel the timeout-derived context at the end of executeOperation to avoid a context leak.
285284
defer cancelFunc()
286285
}
287-
if original := cs.aggregate.Execute(ctx); original != nil {
288-
retryableRead := cs.client.retryReads && cs.wireVersion != nil && cs.wireVersion.Max >= 6
289-
if !retryableRead {
290-
cs.err = replaceErrors(original)
291-
return cs.err
286+
287+
// Execute the aggregate, retrying on retryable errors once (1) if retryable reads are enabled and
288+
// infinitely (-1) if context is a Timeout context.
289+
var retries int
290+
if cs.client.retryReads && cs.wireVersion != nil && cs.wireVersion.Max >= 6 {
291+
retries = 1
292+
}
293+
if internal.IsTimeoutContext(ctx) {
294+
retries = -1
295+
}
296+
297+
var err error
298+
AggregateExecuteLoop:
299+
for {
300+
err = cs.aggregate.Execute(ctx)
301+
// If no error or no retries remain, do not retry.
302+
if err == nil || retries == 0 {
303+
break AggregateExecuteLoop
292304
}
293305

294-
cs.err = original
295-
switch tt := original.(type) {
306+
switch tt := err.(type) {
296307
case driver.Error:
308+
// If error is not retryable, do not retry.
297309
if !tt.RetryableRead() {
298-
break
310+
break AggregateExecuteLoop
299311
}
300312

313+
// If error is retryable: subtract 1 from retries, redo server selection, checkout
314+
// a connection, and restart loop.
315+
retries--
301316
server, err = cs.client.deployment.SelectServer(ctx, cs.selector)
302317
if err != nil {
303-
break
318+
break AggregateExecuteLoop
304319
}
305320

306321
conn.Close()
307322
conn, err = server.Connection(ctx)
308323
if err != nil {
309-
break
324+
break AggregateExecuteLoop
310325
}
311326
defer conn.Close()
312-
cs.wireVersion = conn.Description().WireVersion
313327

328+
// If wire version is now < 6, do not retry.
329+
cs.wireVersion = conn.Description().WireVersion
314330
if cs.wireVersion == nil || cs.wireVersion.Max < 6 {
315-
break
331+
break AggregateExecuteLoop
316332
}
317333

334+
// Reset deployment.
318335
cs.aggregate.Deployment(cs.createOperationDeployment(server, conn))
319-
cs.err = cs.aggregate.Execute(ctx)
336+
default:
337+
// Do not retry if error is not a driver error.
338+
break AggregateExecuteLoop
320339
}
321-
322-
if cs.err != nil {
323-
cs.err = replaceErrors(cs.err)
324-
return cs.Err()
325-
}
326-
327340
}
328-
cs.err = nil
341+
if err != nil {
342+
cs.err = replaceErrors(err)
343+
return cs.err
344+
}
329345

330346
cr := cs.aggregate.ResultCursorResponse()
331347
cr.Server = server

mongo/integration/unified/client_entity.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,8 @@ func setClientOptionsFromURIOptions(clientOpts *options.ClientOptions, uriOpts b
385385
clientOpts.SetRetryReads(value.(bool))
386386
case "retryWrites":
387387
clientOpts.SetRetryWrites(value.(bool))
388+
case "socketTimeoutMS":
389+
clientOpts.SetSocketTimeout(time.Duration(value.(int32)) * time.Millisecond)
388390
case "w":
389391
wc.W = value
390392
wcSet = true

0 commit comments

Comments
 (0)