@@ -24,6 +24,7 @@ import (
24
24
)
25
25
26
26
const minHeartbeatInterval = 500 * time .Millisecond
27
+ const wireVersion42 = 8 // Wire version for MongoDB 4.2
27
28
28
29
// Server state constants.
29
30
const (
@@ -294,6 +295,8 @@ func (s *Server) ProcessHandshakeError(err error, startingGenerationNumber uint6
294
295
return
295
296
}
296
297
298
+ // Unwrap any connection errors. If there is no wrapped connection error, then the error should
299
+ // not result in any Server state change (e.g. a command error from the database).
297
300
wrappedConnErr := unwrapConnectionError (err )
298
301
if wrappedConnErr == nil {
299
302
return
@@ -384,27 +387,58 @@ func getWriteConcernErrorForProcessing(err error) (*driver.WriteConcernError, bo
384
387
385
388
// ProcessError handles SDAM error handling and implements driver.ErrorProcessor.
386
389
func (s * Server ) ProcessError (err error , conn driver.Connection ) driver.ProcessErrorResult {
387
- // ignore nil error
390
+ // Ignore nil errors.
388
391
if err == nil {
389
392
return driver .NoChange
390
393
}
391
394
395
+ // Ignore errors from stale connections because the error came from a previous generation of the
396
+ // connection pool. The root cause of the error has aleady been handled, which is what caused
397
+ // the pool generation to increment. Processing errors for stale connections could result in
398
+ // handling the same error root cause multiple times (e.g. a temporary network interrupt causing
399
+ // all connections to the same server to return errors).
400
+ if conn .Stale () {
401
+ return driver .NoChange
402
+ }
403
+
392
404
// Must hold the processErrorLock while updating the server description and clearing the pool.
393
405
// Not holding the lock leads to possible out-of-order processing of pool.clear() and
394
406
// pool.ready() calls from concurrent server description updates.
395
407
s .processErrorLock .Lock ()
396
408
defer s .processErrorLock .Unlock ()
397
409
398
- // ignore stale error
399
- if conn .Stale () {
400
- return driver .NoChange
410
+ // Get the wire version and service ID from the connection description because they will never
411
+ // change for the lifetime of a connection and can possibly be different between connections to
412
+ // the same server.
413
+ connDesc := conn .Description ()
414
+ wireVersion := connDesc .WireVersion
415
+ serviceID := connDesc .ServiceID
416
+
417
+ // Get the topology version from the Server description because the Server description is
418
+ // updated by heartbeats and errors, so typically has a more up-to-date topology version.
419
+ serverDesc := s .desc .Load ().(description.Server )
420
+ topologyVersion := serverDesc .TopologyVersion
421
+
422
+ // We don't currently update the Server topology version when we create new application
423
+ // connections, so it's possible for a connection's topology version to be newer than the
424
+ // Server's topology version. Pick the "newest" of the two topology versions.
425
+ // Technically a nil topology version on a new database response should be considered a new
426
+ // topology version and replace the Server's topology version. However, we don't know if the
427
+ // connection's topology version is based on a new or old database response, so we ignore a nil
428
+ // topology version on the connection for now.
429
+ //
430
+ // TODO(GODRIVER-2841): Remove this logic once we set the Server description when we create
431
+ // TODO application connections because then the Server's topology version will always be the
432
+ // TODO latest known.
433
+ if tv := connDesc .TopologyVersion ; tv != nil && topologyVersion .CompareToIncoming (tv ) < 0 {
434
+ topologyVersion = tv
401
435
}
436
+
402
437
// Invalidate server description if not primary or node recovering error occurs.
403
438
// These errors can be reported as a command error or a write concern error.
404
- desc := conn .Description ()
405
439
if cerr , ok := err .(driver.Error ); ok && (cerr .NodeIsRecovering () || cerr .NotPrimary ()) {
406
- // ignore stale error
407
- if desc . TopologyVersion .CompareToIncoming (cerr .TopologyVersion ) >= 0 {
440
+ // Ignore errors that came from when the database was on a previous topology version.
441
+ if topologyVersion .CompareToIncoming (cerr .TopologyVersion ) >= 0 {
408
442
return driver .NoChange
409
443
}
410
444
@@ -414,16 +448,16 @@ func (s *Server) ProcessError(err error, conn driver.Connection) driver.ProcessE
414
448
415
449
res := driver .ServerMarkedUnknown
416
450
// If the node is shutting down or is older than 4.2, we synchronously clear the pool
417
- if cerr .NodeIsShuttingDown () || desc . WireVersion == nil || desc . WireVersion . Max < 8 {
451
+ if cerr .NodeIsShuttingDown () || wireVersion == nil || wireVersion . Max < wireVersion42 {
418
452
res = driver .ConnectionPoolCleared
419
- s .pool .clear (err , desc . ServiceID )
453
+ s .pool .clear (err , serviceID )
420
454
}
421
455
422
456
return res
423
457
}
424
458
if wcerr , ok := getWriteConcernErrorForProcessing (err ); ok {
425
- // ignore stale error
426
- if desc . TopologyVersion .CompareToIncoming (wcerr .TopologyVersion ) >= 0 {
459
+ // Ignore errors that came from when the database was on a previous topology version.
460
+ if topologyVersion .CompareToIncoming (wcerr .TopologyVersion ) >= 0 {
427
461
return driver .NoChange
428
462
}
429
463
@@ -433,9 +467,9 @@ func (s *Server) ProcessError(err error, conn driver.Connection) driver.ProcessE
433
467
434
468
res := driver .ServerMarkedUnknown
435
469
// If the node is shutting down or is older than 4.2, we synchronously clear the pool
436
- if wcerr .NodeIsShuttingDown () || desc . WireVersion == nil || desc . WireVersion . Max < 8 {
470
+ if wcerr .NodeIsShuttingDown () || wireVersion == nil || wireVersion . Max < wireVersion42 {
437
471
res = driver .ConnectionPoolCleared
438
- s .pool .clear (err , desc . ServiceID )
472
+ s .pool .clear (err , serviceID )
439
473
}
440
474
return res
441
475
}
@@ -457,7 +491,7 @@ func (s *Server) ProcessError(err error, conn driver.Connection) driver.ProcessE
457
491
// monitoring check. The check is cancelled last to avoid a post-cancellation reconnect racing with
458
492
// updateDescription.
459
493
s .updateDescription (description .NewServerFromError (s .address , err , nil ))
460
- s .pool .clear (err , desc . ServiceID )
494
+ s .pool .clear (err , serviceID )
461
495
s .cancelCheck ()
462
496
return driver .ConnectionPoolCleared
463
497
}
0 commit comments