Skip to content

Commit c55f11d

Browse files
committed
Merge branch 'REP-5329-retry-more-consistently' into REP-5329-retry-consistently
2 parents 8ea1813 + f130aab commit c55f11d

File tree

4 files changed

+108
-65
lines changed

4 files changed

+108
-65
lines changed

internal/util/error.go

Lines changed: 77 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ import (
77
"strings"
88

99
"github.com/10gen/migration-verifier/internal/logger"
10+
"github.com/10gen/migration-verifier/mmongo"
11+
mapset "github.com/deckarep/golang-set/v2"
1012
"github.com/pkg/errors"
13+
"github.com/samber/lo"
1114
"go.mongodb.org/mongo-driver/bson"
1215
"go.mongodb.org/mongo-driver/mongo"
1316
"go.mongodb.org/mongo-driver/x/mongo/driver"
@@ -184,63 +187,83 @@ func isNetworkError(err error) bool {
184187
return mongo.IsNetworkError(err)
185188
}
186189

190+
// The below list was copied from mongosync.
191+
// (Not every error code is known to happen in migration-verifier.)
192+
var transientErrorCodes = mapset.NewSet(
193+
6, // HostUnreachable
194+
7, // HostNotFound
195+
196+
// CursorNotFound sometimes happens due to inconsistencies
197+
// in the server’s sharding abstraction. See REP-2440.
198+
43, // CursorNotFound
199+
200+
50, // MaxTimeMSExpired
201+
63, // OBSOLETE_StaleShardVersion
202+
64, // WriteConcernFailed
203+
204+
// This seems to be possible if a shard is unavailable due
205+
// to an election. See REP-2926.
206+
70, // ShardNotFound
207+
208+
89, // NetworkTimeout
209+
90, // CallbackCanceled
210+
91, // ShutdownInProgress
211+
112, // WriteConflict
212+
117, // ConflictingOperationInProgress
213+
133, // FailedToSatisfyReadPreference
214+
134, // ReadConcernMajorityNotAvailableYet
215+
136, // CappedPositionLost
216+
175, // QueryPlanKilled
217+
187, // LinearizableReadConcernError
218+
189, // PrimarySteppedDown
219+
202, // NetworkInterfaceExceededTimeLimit
220+
211, // KeyNotFound
221+
251, // NoSuchTransaction
222+
262, // ExceededTimeLimit
223+
282, // TransactionCoordinatorReachedAbortDecision
224+
290, // TransactionExceededLifetimeLimitSeconds
225+
314, // ObjectIsBusy
226+
317, // ConnectionPoolExpired
227+
358, // InternalTransactionNotSupported
228+
365, // TemporarilyUnavailable
229+
384, // ConnectionError
230+
402, // ResourceExhausted
231+
406, // MigrationBlockingOperationCoordinatorCleaningUp
232+
407, // PooledConnectionAcquisitionExceededTimeLimit
233+
412, // UpdatesStillPending
234+
9001, // SocketException
235+
10107, // NotWritablePrimary
236+
11600, // InterruptedAtShutdown
237+
11601, // Interrupted
238+
11602, // InterruptedDueToReplStateChange
239+
12586, // BackgroundOperationInProgressForDatabase
240+
12587, // BackgroundOperationInProgressForNamespace
241+
13388, // StaleConfig
242+
13435, // NotPrimaryNoSecondaryOk
243+
13436, // NotPrimaryOrSecondary
244+
245+
50915, // BackupCursorOpenConflictWithCheckpoint
246+
91331, // RemoteCommandFailed
247+
)
248+
187249
// hasTransientErrorCode returns true if the error has one of a set of known-to-be-transient
188250
// Mongo server error codes.
189251
func hasTransientErrorCode(err error) bool {
190-
switch GetErrorCode(err) {
191-
case 6, 7, 64, 89, 91, 112, 136, 175, 189, 202, 262, 290, 314, 317,
192-
9001, 10107, 11600, 11601, 11602, 13388, 13435, 13436:
193-
// These error codes are either listed as retryable in the remote command retry
194-
// scheduler, or have been added here deliberately, since they have been observed to be
195-
// issued when applyOps/find/getMore is interrupted while the server is being shut
196-
// down.
197-
//
198-
// There is a list of error codes at
199-
// https://github.com/mongodb/mongo/blob/master/src/mongo/base/error_codes.yml. The
200-
// list below includes all codes that are in the NetworkError and RetriableError
201-
// categories, except 358 (InternalTransactionNotSupported) and 50915
202-
// (BackupCursorOpenConflictWithCheckpoint), as these do not apply to any operations
203-
// performed by mongosync.
204-
//
205-
// 6 HostUnreachable
206-
// 7 HostNotFound
207-
// 64 WriteConcernFailed
208-
// 89 NetworkTimeout
209-
// 91 ShutdownInProgress
210-
// 112 WriteConflict
211-
// 136 CappedPositionLost - XXX - there was some discussion over whether this should be included
212-
// 175 QueryPlanKilled, e.g. when a collection is dropped/renamed while a cursor is open on it
213-
// 189 PrimarySteppedDown
214-
// 202 NetworkInterfaceExceededTimeLimit
215-
// 262 ExceededTimeLimit
216-
// 290 TransactionExceededLifetimeLimitSeconds
217-
// 314 ObjectIsBusy
218-
// 317 ConnectionPoolExpired
219-
// 9001 SocketException
220-
// 10107 NotWritablePrimary
221-
// 11600 InterruptedAtShutdown
222-
// 11601 Interrupted
223-
// 11602 InterruptedDueToReplStateChange
224-
// 13388 StaleConfig
225-
// 13435 NotPrimaryNoSecondaryOk
226-
// 13436 NotPrimaryOrSecondary
227-
return true
228-
case 0:
252+
if GetErrorCode(err) == 0 {
229253
// The server may send "not master" without an error code.
230254
if strings.Contains(err.Error(), "not master") {
231255
return true
232256
}
233-
// These codes only apply to DDL operations. However, we decided that
234-
// there's no harm in including them in the default list. See REP-1289 for
235-
// more details.
236-
case 63, 117, 12586, 12587:
237-
// 63 OBSOLETE_StaleShardVersion
238-
// 117 ConflictingOperationInProgress
239-
// 12586 BackgroundOperationInProgressForDatabase
240-
// 12587 BackgroundOperationInProgressForNamespace
241-
return true
242257
}
243-
return false
258+
259+
// Now check whether any of the transient error codes appears
260+
// in the error.
261+
return lo.SomeBy(
262+
transientErrorCodes.ToSlice(),
263+
func(code int) bool {
264+
return mmongo.ErrorHasCode(err, code)
265+
},
266+
)
244267
}
245268

246269
// These labels come from the mongo source code at
@@ -271,22 +294,17 @@ func IsCollectionUUIDMismatchError(err error) bool {
271294
return GetErrorCode(err) == 361
272295
}
273296

274-
// IsServerError returns true if the error implements the ServerError interface in driver.
275-
func IsServerError(err error) bool {
276-
// Get the cause of the err.
277-
cause := errors.Cause(err)
278-
_, ok := cause.(mongo.ServerError)
279-
280-
return ok
281-
}
282-
283297
// IsCommandNotSupportedOnViewError returns true if this is a CommandNotSupportedOnView error.
284298
func IsCommandNotSupportedOnViewError(err error) bool {
285299
return GetErrorCode(err) == 166
286300
}
287301

288-
// GetErrorCode returns the error code corresponding to the provided error.
302+
// GetErrorCode returns the provided error’s top-level error code.
289303
// It returns 0 if the error is nil or not one of the supported error types.
304+
//
305+
// CAUTION: Server errors can contain multiple errors, and inspecting the
306+
// just top-level error code often doesn’t achieve proper error handling.
307+
// Instead consider mongo.ServerError.HasErrorCode().
290308
func GetErrorCode(err error) int {
291309
switch e := errors.Cause(err).(type) {
292310
case mongo.CommandError:

internal/verifier/migration_verifier.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -904,12 +904,10 @@ func (verifier *Verifier) doIndexSpecsMatch(ctx context.Context, srcSpec bson.Ra
904904
ctx,
905905
mongo.Pipeline{
906906
{{"$documents", []bson.D{
907-
{{"spec", srcSpec}},
908-
}}},
909-
910-
// Add the destination spec.
911-
{{"$addFields", bson.D{
912-
{"dstSpec", dstSpec},
907+
{
908+
{"spec", bson.D{{"$literal", srcSpec}}},
909+
{"dstSpec", bson.D{{"$literal", dstSpec}}},
910+
},
913911
}}},
914912

915913
{{"$unset", lo.Reduce(

internal/verifier/migration_verifier_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,19 @@ func (suite *IntegrationTestSuite) TestVerifierCompareIndexSpecs() {
10831083
shouldMatch: true,
10841084
},
10851085

1086+
{
1087+
label: "ignore number types, deep",
1088+
src: bson.D{
1089+
{"name", "testIndex"},
1090+
{"key", bson.M{"foo.bar": float64(123)}},
1091+
},
1092+
dst: bson.D{
1093+
{"name", "testIndex"},
1094+
{"key", bson.M{"foo.bar": 123}},
1095+
},
1096+
shouldMatch: true,
1097+
},
1098+
10861099
{
10871100
label: "find number differences",
10881101
src: bson.D{

mmongo/error.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package mmongo
2+
3+
import (
4+
"github.com/pkg/errors"
5+
"go.mongodb.org/mongo-driver/mongo"
6+
)
7+
8+
// ErrorHasCode returns true if (and only if) this error is a
9+
// mongo.ServerError that contains the given error code.
10+
func ErrorHasCode[T ~int](err error, code T) bool {
11+
var serverError mongo.ServerError
12+
13+
return errors.As(err, &serverError) && serverError.HasErrorCode(int(code))
14+
}

0 commit comments

Comments
 (0)