@@ -7,7 +7,10 @@ import (
77 "strings"
88
99 "github.com/10gen/migration-verifier/internal/logger"
10+ "github.com/10gen/migration-verifier/mmongo"
11+ mapset "github.com/deckarep/golang-set/v2"
1012 "github.com/pkg/errors"
13+ "github.com/samber/lo"
1114 "go.mongodb.org/mongo-driver/bson"
1215 "go.mongodb.org/mongo-driver/mongo"
1316 "go.mongodb.org/mongo-driver/x/mongo/driver"
@@ -184,63 +187,83 @@ func isNetworkError(err error) bool {
184187 return mongo .IsNetworkError (err )
185188}
186189
190+ // The below list was copied from mongosync.
191+ // (Not every error code is known to happen in migration-verifier.)
192+ var transientErrorCodes = mapset .NewSet (
193+ 6 , // HostUnreachable
194+ 7 , // HostNotFound
195+
196+ // CursorNotFound sometimes happens due to inconsistencies
197+ // in the server’s sharding abstraction. See REP-2440.
198+ 43 , // CursorNotFound
199+
200+ 50 , // MaxTimeMSExpired
201+ 63 , // OBSOLETE_StaleShardVersion
202+ 64 , // WriteConcernFailed
203+
204+ // This seems to be possible if a shard is unavailable due
205+ // to an election. See REP-2926.
206+ 70 , // ShardNotFound
207+
208+ 89 , // NetworkTimeout
209+ 90 , // CallbackCanceled
210+ 91 , // ShutdownInProgress
211+ 112 , // WriteConflict
212+ 117 , // ConflictingOperationInProgress
213+ 133 , // FailedToSatisfyReadPreference
214+ 134 , // ReadConcernMajorityNotAvailableYet
215+ 136 , // CappedPositionLost
216+ 175 , // QueryPlanKilled
217+ 187 , // LinearizableReadConcernError
218+ 189 , // PrimarySteppedDown
219+ 202 , // NetworkInterfaceExceededTimeLimit
220+ 211 , // KeyNotFound
221+ 251 , // NoSuchTransaction
222+ 262 , // ExceededTimeLimit
223+ 282 , // TransactionCoordinatorReachedAbortDecision
224+ 290 , // TransactionExceededLifetimeLimitSeconds
225+ 314 , // ObjectIsBusy
226+ 317 , // ConnectionPoolExpired
227+ 358 , // InternalTransactionNotSupported
228+ 365 , // TemporarilyUnavailable
229+ 384 , // ConnectionError
230+ 402 , // ResourceExhausted
231+ 406 , // MigrationBlockingOperationCoordinatorCleaningUp
232+ 407 , // PooledConnectionAcquisitionExceededTimeLimit
233+ 412 , // UpdatesStillPending
234+ 9001 , // SocketException
235+ 10107 , // NotWritablePrimary
236+ 11600 , // InterruptedAtShutdown
237+ 11601 , // Interrupted
238+ 11602 , // InterruptedDueToReplStateChange
239+ 12586 , // BackgroundOperationInProgressForDatabase
240+ 12587 , // BackgroundOperationInProgressForNamespace
241+ 13388 , // StaleConfig
242+ 13435 , // NotPrimaryNoSecondaryOk
243+ 13436 , // NotPrimaryOrSecondary
244+
245+ 50915 , // BackupCursorOpenConflictWithCheckpoint
246+ 91331 , // RemoteCommandFailed
247+ )
248+
187249// hasTransientErrorCode returns true if the error has one of a set of known-to-be-transient
188250// Mongo server error codes.
189251func hasTransientErrorCode (err error ) bool {
190- switch GetErrorCode (err ) {
191- case 6 , 7 , 64 , 89 , 91 , 112 , 136 , 175 , 189 , 202 , 262 , 290 , 314 , 317 ,
192- 9001 , 10107 , 11600 , 11601 , 11602 , 13388 , 13435 , 13436 :
193- // These error codes are either listed as retryable in the remote command retry
194- // scheduler, or have been added here deliberately, since they have been observed to be
195- // issued when applyOps/find/getMore is interrupted while the server is being shut
196- // down.
197- //
198- // There is a list of error codes at
199- // https://github.com/mongodb/mongo/blob/master/src/mongo/base/error_codes.yml. The
200- // list below includes all codes that are in the NetworkError and RetriableError
201- // categories, except 358 (InternalTransactionNotSupported) and 50915
202- // (BackupCursorOpenConflictWithCheckpoint), as these do not apply to any operations
203- // performed by mongosync.
204- //
205- // 6 HostUnreachable
206- // 7 HostNotFound
207- // 64 WriteConcernFailed
208- // 89 NetworkTimeout
209- // 91 ShutdownInProgress
210- // 112 WriteConflict
211- // 136 CappedPositionLost - XXX - there was some discussion over whether this should be included
212- // 175 QueryPlanKilled, e.g. when a collection is dropped/renamed while a cursor is open on it
213- // 189 PrimarySteppedDown
214- // 202 NetworkInterfaceExceededTimeLimit
215- // 262 ExceededTimeLimit
216- // 290 TransactionExceededLifetimeLimitSeconds
217- // 314 ObjectIsBusy
218- // 317 ConnectionPoolExpired
219- // 9001 SocketException
220- // 10107 NotWritablePrimary
221- // 11600 InterruptedAtShutdown
222- // 11601 Interrupted
223- // 11602 InterruptedDueToReplStateChange
224- // 13388 StaleConfig
225- // 13435 NotPrimaryNoSecondaryOk
226- // 13436 NotPrimaryOrSecondary
227- return true
228- case 0 :
252+ if GetErrorCode (err ) == 0 {
229253 // The server may send "not master" without an error code.
230254 if strings .Contains (err .Error (), "not master" ) {
231255 return true
232256 }
233- // These codes only apply to DDL operations. However, we decided that
234- // there's no harm in including them in the default list. See REP-1289 for
235- // more details.
236- case 63 , 117 , 12586 , 12587 :
237- // 63 OBSOLETE_StaleShardVersion
238- // 117 ConflictingOperationInProgress
239- // 12586 BackgroundOperationInProgressForDatabase
240- // 12587 BackgroundOperationInProgressForNamespace
241- return true
242257 }
243- return false
258+
259+ // Now check whether any of the transient error codes appears
260+ // in the error.
261+ return lo .SomeBy (
262+ transientErrorCodes .ToSlice (),
263+ func (code int ) bool {
264+ return mmongo .ErrorHasCode (err , code )
265+ },
266+ )
244267}
245268
246269// These labels come from the mongo source code at
@@ -271,22 +294,17 @@ func IsCollectionUUIDMismatchError(err error) bool {
271294 return GetErrorCode (err ) == 361
272295}
273296
274- // IsServerError returns true if the error implements the ServerError interface in driver.
275- func IsServerError (err error ) bool {
276- // Get the cause of the err.
277- cause := errors .Cause (err )
278- _ , ok := cause .(mongo.ServerError )
279-
280- return ok
281- }
282-
283297// IsCommandNotSupportedOnViewError returns true if this is a CommandNotSupportedOnView error.
284298func IsCommandNotSupportedOnViewError (err error ) bool {
285299 return GetErrorCode (err ) == 166
286300}
287301
288- // GetErrorCode returns the error code corresponding to the provided error.
302+ // GetErrorCode returns the provided error’s top-level error code .
289303// It returns 0 if the error is nil or not one of the supported error types.
304+ //
305+ // CAUTION: Server errors can contain multiple errors, and inspecting the
306+ // just top-level error code often doesn’t achieve proper error handling.
307+ // Instead consider mongo.ServerError.HasErrorCode().
290308func GetErrorCode (err error ) int {
291309 switch e := errors .Cause (err ).(type ) {
292310 case mongo.CommandError :
0 commit comments