Skip to content

Commit d797822

Browse files
GODRIVER-2828 Use topology version from Server instead of Connection in ProcessError. (#1252)
Co-authored-by: Preston Vasquez <[email protected]>
1 parent b1ab40e commit d797822

File tree

3 files changed

+439
-181
lines changed

3 files changed

+439
-181
lines changed

x/mongo/driver/driver.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,12 +158,6 @@ const (
158158
ConnectionPoolCleared
159159
)
160160

161-
// ServerChanged returns true if the ProcessErrorResult indicates that the server changed from an SDAM perspective
162-
// during a ProcessError() call.
163-
func (p ProcessErrorResult) ServerChanged() bool {
164-
return p != NoChange
165-
}
166-
167161
// ErrorProcessor implementations can handle processing errors, which may modify their internal state.
168162
// If this type is implemented by a Server, then Operation.Execute will call it's ProcessError
169163
// method after it decodes a wire message.

x/mongo/driver/topology/server.go

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
)
2525

2626
const minHeartbeatInterval = 500 * time.Millisecond
27+
const wireVersion42 = 8 // Wire version for MongoDB 4.2
2728

2829
// Server state constants.
2930
const (
@@ -294,6 +295,8 @@ func (s *Server) ProcessHandshakeError(err error, startingGenerationNumber uint6
294295
return
295296
}
296297

298+
// Unwrap any connection errors. If there is no wrapped connection error, then the error should
299+
// not result in any Server state change (e.g. a command error from the database).
297300
wrappedConnErr := unwrapConnectionError(err)
298301
if wrappedConnErr == nil {
299302
return
@@ -384,27 +387,58 @@ func getWriteConcernErrorForProcessing(err error) (*driver.WriteConcernError, bo
384387

385388
// ProcessError handles SDAM error handling and implements driver.ErrorProcessor.
386389
func (s *Server) ProcessError(err error, conn driver.Connection) driver.ProcessErrorResult {
387-
// ignore nil error
390+
// Ignore nil errors.
388391
if err == nil {
389392
return driver.NoChange
390393
}
391394

395+
// Ignore errors from stale connections because the error came from a previous generation of the
396+
// connection pool. The root cause of the error has aleady been handled, which is what caused
397+
// the pool generation to increment. Processing errors for stale connections could result in
398+
// handling the same error root cause multiple times (e.g. a temporary network interrupt causing
399+
// all connections to the same server to return errors).
400+
if conn.Stale() {
401+
return driver.NoChange
402+
}
403+
392404
// Must hold the processErrorLock while updating the server description and clearing the pool.
393405
// Not holding the lock leads to possible out-of-order processing of pool.clear() and
394406
// pool.ready() calls from concurrent server description updates.
395407
s.processErrorLock.Lock()
396408
defer s.processErrorLock.Unlock()
397409

398-
// ignore stale error
399-
if conn.Stale() {
400-
return driver.NoChange
410+
// Get the wire version and service ID from the connection description because they will never
411+
// change for the lifetime of a connection and can possibly be different between connections to
412+
// the same server.
413+
connDesc := conn.Description()
414+
wireVersion := connDesc.WireVersion
415+
serviceID := connDesc.ServiceID
416+
417+
// Get the topology version from the Server description because the Server description is
418+
// updated by heartbeats and errors, so typically has a more up-to-date topology version.
419+
serverDesc := s.desc.Load().(description.Server)
420+
topologyVersion := serverDesc.TopologyVersion
421+
422+
// We don't currently update the Server topology version when we create new application
423+
// connections, so it's possible for a connection's topology version to be newer than the
424+
// Server's topology version. Pick the "newest" of the two topology versions.
425+
// Technically a nil topology version on a new database response should be considered a new
426+
// topology version and replace the Server's topology version. However, we don't know if the
427+
// connection's topology version is based on a new or old database response, so we ignore a nil
428+
// topology version on the connection for now.
429+
//
430+
// TODO(GODRIVER-2841): Remove this logic once we set the Server description when we create
431+
// TODO application connections because then the Server's topology version will always be the
432+
// TODO latest known.
433+
if tv := connDesc.TopologyVersion; tv != nil && topologyVersion.CompareToIncoming(tv) < 0 {
434+
topologyVersion = tv
401435
}
436+
402437
// Invalidate server description if not primary or node recovering error occurs.
403438
// These errors can be reported as a command error or a write concern error.
404-
desc := conn.Description()
405439
if cerr, ok := err.(driver.Error); ok && (cerr.NodeIsRecovering() || cerr.NotPrimary()) {
406-
// ignore stale error
407-
if desc.TopologyVersion.CompareToIncoming(cerr.TopologyVersion) >= 0 {
440+
// Ignore errors that came from when the database was on a previous topology version.
441+
if topologyVersion.CompareToIncoming(cerr.TopologyVersion) >= 0 {
408442
return driver.NoChange
409443
}
410444

@@ -414,16 +448,16 @@ func (s *Server) ProcessError(err error, conn driver.Connection) driver.ProcessE
414448

415449
res := driver.ServerMarkedUnknown
416450
// If the node is shutting down or is older than 4.2, we synchronously clear the pool
417-
if cerr.NodeIsShuttingDown() || desc.WireVersion == nil || desc.WireVersion.Max < 8 {
451+
if cerr.NodeIsShuttingDown() || wireVersion == nil || wireVersion.Max < wireVersion42 {
418452
res = driver.ConnectionPoolCleared
419-
s.pool.clear(err, desc.ServiceID)
453+
s.pool.clear(err, serviceID)
420454
}
421455

422456
return res
423457
}
424458
if wcerr, ok := getWriteConcernErrorForProcessing(err); ok {
425-
// ignore stale error
426-
if desc.TopologyVersion.CompareToIncoming(wcerr.TopologyVersion) >= 0 {
459+
// Ignore errors that came from when the database was on a previous topology version.
460+
if topologyVersion.CompareToIncoming(wcerr.TopologyVersion) >= 0 {
427461
return driver.NoChange
428462
}
429463

@@ -433,9 +467,9 @@ func (s *Server) ProcessError(err error, conn driver.Connection) driver.ProcessE
433467

434468
res := driver.ServerMarkedUnknown
435469
// If the node is shutting down or is older than 4.2, we synchronously clear the pool
436-
if wcerr.NodeIsShuttingDown() || desc.WireVersion == nil || desc.WireVersion.Max < 8 {
470+
if wcerr.NodeIsShuttingDown() || wireVersion == nil || wireVersion.Max < wireVersion42 {
437471
res = driver.ConnectionPoolCleared
438-
s.pool.clear(err, desc.ServiceID)
472+
s.pool.clear(err, serviceID)
439473
}
440474
return res
441475
}
@@ -457,7 +491,7 @@ func (s *Server) ProcessError(err error, conn driver.Connection) driver.ProcessE
457491
// monitoring check. The check is cancelled last to avoid a post-cancellation reconnect racing with
458492
// updateDescription.
459493
s.updateDescription(description.NewServerFromError(s.address, err, nil))
460-
s.pool.clear(err, desc.ServiceID)
494+
s.pool.clear(err, serviceID)
461495
s.cancelCheck()
462496
return driver.ConnectionPoolCleared
463497
}

0 commit comments

Comments
 (0)