From 351c94cd1674566e7db42f47c25499eb6a757a25 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Fri, 22 Nov 2024 08:43:54 -0500 Subject: [PATCH 01/32] wip --- internal/verifier/change_stream.go | 195 +++++++++++++++++------- internal/verifier/change_stream_test.go | 4 +- internal/verifier/check.go | 50 +++--- internal/verifier/migration_verifier.go | 38 +++-- internal/verifier/recheck.go | 2 + 5 files changed, 187 insertions(+), 102 deletions(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index ddf8f5ba..bad0262a 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -6,6 +6,7 @@ import ( "time" "github.com/10gen/migration-verifier/internal/keystring" + "github.com/10gen/migration-verifier/internal/logger" "github.com/pkg/errors" "github.com/rs/zerolog" "go.mongodb.org/mongo-driver/bson" @@ -50,6 +51,59 @@ func (uee UnknownEventError) Error() string { return fmt.Sprintf("Received event with unknown optype: %+v", uee.Event) } +type ChangeStreamReader struct { + readerType clusterType + + lastChangeEventTime *primitive.Timestamp + logger *logger.Logger + namespaces []string + metaDBName string + + metaClient *mongo.Client + watcherClient *mongo.Client + + changeStreamRunning bool + ChangeEventBatchChan chan []ParsedEvent + ChangeStreamEnderChan chan struct{} + ChangeStreamErrChan chan error + ChangeStreamDoneChan chan struct{} + + startAtTs *primitive.Timestamp +} + +func (verifier *Verifier) initializeChangeStreamReaders() { + verifier.srcChangeStreamReader = &ChangeStreamReader{ + readerType: srcReaderType, + lastChangeEventTime: nil, + logger: verifier.logger, + namespaces: verifier.srcNamespaces, + metaDBName: verifier.metaDBName, + metaClient: verifier.metaClient, + watcherClient: verifier.srcClient, + changeStreamRunning: false, + ChangeEventBatchChan: make(chan []ParsedEvent), + ChangeStreamEnderChan: make(chan struct{}), + ChangeStreamErrChan: make(chan error), + ChangeStreamDoneChan: make(chan struct{}), + startAtTs: nil, + } + verifier.dstChangeStreamReader = &ChangeStreamReader{ + readerType: dstReaderType, + lastChangeEventTime: nil, + logger: verifier.logger, + namespaces: verifier.dstNamespaces, + metaDBName: verifier.metaDBName, + metaClient: verifier.metaClient, + watcherClient: verifier.dstClient, + changeStreamRunning: false, + ChangeEventBatchChan: make(chan []ParsedEvent), + ChangeStreamEnderChan: make(chan struct{}), + ChangeStreamErrChan: make(chan error), + ChangeStreamDoneChan: make(chan struct{}), + startAtTs: nil, + } +} + // HandleChangeStreamEvents performs the necessary work for change stream events after receiving a batch. func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch []ParsedEvent) error { if len(batch) == 0 { @@ -62,11 +116,6 @@ func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch [] dataSizes := make([]int, len(batch)) for i, changeEvent := range batch { - if changeEvent.ClusterTime != nil && - (verifier.lastChangeEventTime == nil || - verifier.lastChangeEventTime.Compare(*changeEvent.ClusterTime) < 0) { - verifier.lastChangeEventTime = changeEvent.ClusterTime - } switch changeEvent.OpType { case "delete": fallthrough @@ -109,12 +158,12 @@ func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch [] // and omit fullDocument, but $bsonSize was new in MongoDB 4.4, and we still // want to verify migrations from 4.2. fullDocument is unlikely to be a // bottleneck anyway. -func (verifier *Verifier) GetChangeStreamFilter() []bson.D { - if len(verifier.srcNamespaces) == 0 { - return []bson.D{{bson.E{"$match", bson.D{{"ns.db", bson.D{{"$ne", verifier.metaDBName}}}}}}} +func (csr *ChangeStreamReader) GetChangeStreamFilter() []bson.D { + if len(csr.namespaces) == 0 { + return []bson.D{{bson.E{"$match", bson.D{{"ns.db", bson.D{{"$ne", csr.metaDBName}}}}}}} } filter := bson.A{} - for _, ns := range verifier.srcNamespaces { + for _, ns := range csr.namespaces { db, coll := SplitNamespace(ns) filter = append(filter, bson.D{{"ns", bson.D{{"db", db}, {"coll", coll}}}}) } @@ -122,7 +171,7 @@ func (verifier *Verifier) GetChangeStreamFilter() []bson.D { return []bson.D{stage} } -func (verifier *Verifier) iterateChangeStream(ctx context.Context, cs *mongo.ChangeStream) { +func (csr *ChangeStreamReader) iterateChangeStream(ctx context.Context, cs *mongo.ChangeStream) { defer cs.Close(ctx) var lastPersistedTime time.Time @@ -132,7 +181,7 @@ func (verifier *Verifier) iterateChangeStream(ctx context.Context, cs *mongo.Cha return nil } - err := verifier.persistChangeStreamResumeToken(ctx, cs) + err := csr.persistChangeStreamResumeToken(ctx, cs) if err == nil { lastPersistedTime = time.Now() } @@ -159,11 +208,19 @@ func (verifier *Verifier) iterateChangeStream(ctx context.Context, cs *mongo.Cha return false, errors.Wrap(err, "failed to decode change event") } + if changeEventBatch[eventsRead].ClusterTime != nil && + (csr.lastChangeEventTime == nil || + csr.lastChangeEventTime.Compare(*changeEventBatch[eventsRead].ClusterTime) < 0) { + csr.lastChangeEventTime = changeEventBatch[eventsRead].ClusterTime + } + eventsRead++ } if eventsRead > 0 { - verifier.logger.Debug().Int("eventsCount", eventsRead).Msgf("Received a batch of events.") + csr.logger.Debug().Int("eventsCount", eventsRead).Msgf("Received a batch of events.") + csr.ChangeEventBatchChan <- changeEventBatch + // TODO: run change stream event handler in a verifier goroutine. err := verifier.HandleChangeStreamEvents(ctx, changeEventBatch) if err != nil { return false, errors.Wrap(err, "failed to handle change events") @@ -181,16 +238,16 @@ func (verifier *Verifier) iterateChangeStream(ctx context.Context, cs *mongo.Cha // If the context is canceled, return immmediately. case <-ctx.Done(): - verifier.logger.Debug(). + csr.logger.Debug(). Err(ctx.Err()). Msg("Change stream quitting.") return - // If the changeStreamEnderChan has a message, the user has indicated that + // If the ChangeStreamEnderChan has a message, the user has indicated that // source writes are ended. This means we should exit rather than continue // reading the change stream since there should be no more events. - case <-verifier.changeStreamEnderChan: - verifier.logger.Debug(). + case <-csr.ChangeStreamEnderChan: + csr.logger.Debug(). Msg("Change stream thread received shutdown request.") changeStreamEnded = true @@ -215,11 +272,11 @@ func (verifier *Verifier) iterateChangeStream(ctx context.Context, cs *mongo.Cha } if err != nil && !errors.Is(err, context.Canceled) { - verifier.logger.Debug(). + csr.logger.Debug(). Err(err). Msg("Sending change stream error.") - verifier.changeStreamErrChan <- err + csr.ChangeStreamErrChan <- err if !changeStreamEnded { break @@ -227,52 +284,51 @@ func (verifier *Verifier) iterateChangeStream(ctx context.Context, cs *mongo.Cha } if changeStreamEnded { - verifier.mux.Lock() - verifier.changeStreamRunning = false - if verifier.lastChangeEventTime != nil { - verifier.srcStartAtTs = verifier.lastChangeEventTime + csr.changeStreamRunning = false + if csr.lastChangeEventTime != nil { + csr.startAtTs = csr.lastChangeEventTime } - verifier.mux.Unlock() // since we have started Recheck, we must signal that we have // finished the change stream changes so that Recheck can continue. - verifier.changeStreamDoneChan <- struct{}{} + csr.ChangeStreamDoneChan <- struct{}{} break } } - infoLog := verifier.logger.Info() - if verifier.lastChangeEventTime == nil { + infoLog := csr.logger.Info() + if csr.lastChangeEventTime == nil { infoLog = infoLog.Str("changeStreamStopTime", "none") } else { - infoLog = infoLog.Interface("changeStreamStopTime", *verifier.lastChangeEventTime) + infoLog = infoLog.Interface("changeStreamStopTime", *csr.lastChangeEventTime) } infoLog.Msg("Change stream is done.") } // StartChangeStream starts the change stream. -func (verifier *Verifier) StartChangeStream(ctx context.Context) error { - pipeline := verifier.GetChangeStreamFilter() +func (csr *ChangeStreamReader) StartChangeStream(ctx context.Context) error { + pipeline := csr.GetChangeStreamFilter() + opts := options.ChangeStream(). SetMaxAwaitTime(1 * time.Second). SetFullDocument(options.UpdateLookup) - savedResumeToken, err := verifier.loadChangeStreamResumeToken(ctx) + savedResumeToken, err := csr.loadChangeStreamResumeToken(ctx) if err != nil { return errors.Wrap(err, "failed to load persisted change stream resume token") } - csStartLogEvent := verifier.logger.Info() + csStartLogEvent := csr.logger.Info() if savedResumeToken != nil { logEvent := csStartLogEvent. - Stringer("resumeToken", savedResumeToken) + Stringer(csr.resumeTokenDocID(), savedResumeToken) ts, err := extractTimestampFromResumeToken(savedResumeToken) if err == nil { logEvent = addUnixTimeToLogEvent(ts.T, logEvent) } else { - verifier.logger.Warn(). + csr.logger.Warn(). Err(err). Msg("Failed to extract timestamp from persisted resume token.") } @@ -281,25 +337,25 @@ func (verifier *Verifier) StartChangeStream(ctx context.Context) error { opts = opts.SetStartAfter(savedResumeToken) } else { - csStartLogEvent.Msg("Starting change stream from current source cluster time.") + csStartLogEvent.Msgf("Starting change stream from current %s cluster time.", csr.readerType) } - sess, err := verifier.srcClient.StartSession() + sess, err := csr.watcherClient.StartSession() if err != nil { return errors.Wrap(err, "failed to start session") } sctx := mongo.NewSessionContext(ctx, sess) - srcChangeStream, err := verifier.srcClient.Watch(sctx, pipeline, opts) + changeStream, err := csr.watcherClient.Watch(sctx, pipeline, opts) if err != nil { return errors.Wrap(err, "failed to open change stream") } - err = verifier.persistChangeStreamResumeToken(ctx, srcChangeStream) + err = csr.persistChangeStreamResumeToken(ctx, changeStream) if err != nil { return err } - csTimestamp, err := extractTimestampFromResumeToken(srcChangeStream.ResumeToken()) + csTimestamp, err := extractTimestampFromResumeToken(changeStream.ResumeToken()) if err != nil { return errors.Wrap(err, "failed to extract timestamp from change stream's resume token") } @@ -309,16 +365,14 @@ func (verifier *Verifier) StartChangeStream(ctx context.Context) error { return errors.Wrap(err, "failed to read cluster time from session") } - verifier.srcStartAtTs = &csTimestamp + csr.startAtTs = &csTimestamp if csTimestamp.After(clusterTime) { - verifier.srcStartAtTs = &clusterTime + csr.startAtTs = &clusterTime } - verifier.mux.Lock() - verifier.changeStreamRunning = true - verifier.mux.Unlock() + csr.changeStreamRunning = true - go verifier.iterateChangeStream(ctx, srcChangeStream) + go csr.iterateChangeStream(ctx, changeStream) return nil } @@ -327,16 +381,16 @@ func addUnixTimeToLogEvent[T constraints.Integer](unixTime T, event *zerolog.Eve return event.Time("timestampTime", time.Unix(int64(unixTime), int64(0))) } -func (v *Verifier) getChangeStreamMetadataCollection() *mongo.Collection { - return v.metaClient.Database(v.metaDBName).Collection(metadataChangeStreamCollectionName) +func (csr *ChangeStreamReader) getChangeStreamMetadataCollection() *mongo.Collection { + return csr.metaClient.Database(csr.metaDBName).Collection(metadataChangeStreamCollectionName) } -func (verifier *Verifier) loadChangeStreamResumeToken(ctx context.Context) (bson.Raw, error) { - coll := verifier.getChangeStreamMetadataCollection() +func (csr *ChangeStreamReader) loadChangeStreamResumeToken(ctx context.Context) (bson.Raw, error) { + coll := csr.getChangeStreamMetadataCollection() token, err := coll.FindOne( ctx, - bson.D{{"_id", "resumeToken"}}, + bson.D{{"_id", csr.resumeTokenDocID()}}, ).Raw() if errors.Is(err, mongo.ErrNoDocuments) { @@ -346,13 +400,46 @@ func (verifier *Verifier) loadChangeStreamResumeToken(ctx context.Context) (bson return token, err } -func (verifier *Verifier) persistChangeStreamResumeToken(ctx context.Context, cs *mongo.ChangeStream) error { +func (csr *ChangeStreamReader) String() string { + return fmt.Sprintf("%s change stream reader", csr.readerType) +} + +func (csr *ChangeStreamReader) resumeTokenDocID() string { + switch csr.readerType { + case srcReaderType: + return "srcResumeToken" + case dstReaderType: + return "dstResumeToken" + default: + panic("unknown readerType: " + csr.readerType) + } +} + +func (csr *ChangeStreamReader) Wait() error { + if csr.changeStreamRunning { + csr.logger.Debug().Msgf("%s still running, signalling that writes are done and waiting for change stream to exit", csr) + csr.ChangeStreamEnderChan <- struct{}{} + select { + case err := <-csr.ChangeStreamErrChan: + csr.logger.Warn().Err(err). + Msg("Received error from change stream.") + return err + case <-csr.ChangeEventBatchChan: + csr.logger.Debug(). + Msg("Received completion signal from change stream.") + break + } + } + return nil +} + +func (csr *ChangeStreamReader) persistChangeStreamResumeToken(ctx context.Context, cs *mongo.ChangeStream) error { token := cs.ResumeToken() - coll := verifier.getChangeStreamMetadataCollection() + coll := csr.getChangeStreamMetadataCollection() _, err := coll.ReplaceOne( ctx, - bson.D{{"_id", "resumeToken"}}, + bson.D{{"_id", csr.resumeTokenDocID()}}, token, options.Replace().SetUpsert(true), ) @@ -360,12 +447,12 @@ func (verifier *Verifier) persistChangeStreamResumeToken(ctx context.Context, cs if err == nil { ts, err := extractTimestampFromResumeToken(token) - logEvent := verifier.logger.Debug() + logEvent := csr.logger.Debug() if err == nil { logEvent = addUnixTimeToLogEvent(ts.T, logEvent) } else { - verifier.logger.Warn().Err(err). + csr.logger.Warn().Err(err). Msg("failed to extract resume token timestamp") } diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 89bc7877..2d0ed131 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -16,7 +16,7 @@ func TestChangeStreamFilter(t *testing.T) { verifier := Verifier{} verifier.SetMetaDBName("metadb") require.Equal(t, []bson.D{{{"$match", bson.D{{"ns.db", bson.D{{"$ne", "metadb"}}}}}}}, - verifier.GetChangeStreamFilter()) + verifier.GetChangeStreamFilter(verifier.srcNamespaces)) verifier.srcNamespaces = []string{"foo.bar", "foo.baz", "test.car", "test.chaz"} require.Equal(t, []bson.D{ {{"$match", bson.D{ @@ -27,7 +27,7 @@ func TestChangeStreamFilter(t *testing.T) { bson.D{{"ns", bson.D{{"db", "test"}, {"coll", "chaz"}}}}, }}, }}}, - }, verifier.GetChangeStreamFilter()) + }, verifier.GetChangeStreamFilter(verifier.srcNamespaces)) } // TestChangeStreamResumability creates a verifier, starts its change stream, diff --git a/internal/verifier/check.go b/internal/verifier/check.go index 5a60bfb5..baf2d967 100644 --- a/internal/verifier/check.go +++ b/internal/verifier/check.go @@ -42,27 +42,6 @@ func (verifier *Verifier) Check(ctx context.Context, filter map[string]any) { verifier.MaybeStartPeriodicHeapProfileCollection(ctx) } -func (verifier *Verifier) waitForChangeStream() error { - verifier.mux.RLock() - csRunning := verifier.changeStreamRunning - verifier.mux.RUnlock() - if csRunning { - verifier.logger.Debug().Msg("Changestream still running, signalling that writes are done and waiting for change stream to exit") - verifier.changeStreamEnderChan <- struct{}{} - select { - case err := <-verifier.changeStreamErrChan: - verifier.logger.Warn().Err(err). - Msg("Received error from change stream.") - return err - case <-verifier.changeStreamDoneChan: - verifier.logger.Debug(). - Msg("Received completion signal from change stream.") - break - } - } - return nil -} - func (verifier *Verifier) CheckWorker(ctx context.Context) error { verifier.logger.Debug().Msgf("Starting %d verification workers", verifier.numWorkers) ctx, cancel := context.WithCancel(ctx) @@ -90,7 +69,10 @@ func (verifier *Verifier) CheckWorker(ctx context.Context) error { for { select { - case err := <-verifier.changeStreamErrChan: + case err := <-verifier.srcChangeStreamReader.ChangeStreamErrChan: + cancel() + return err + case err := <-verifier.dstChangeStreamReader.ChangeStreamErrChan: cancel() return err case <-ctx.Done(): @@ -135,6 +117,7 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any } verifier.running = true verifier.globalFilter = filter + verifier.initializeChangeStreamReaders() verifier.mux.Unlock() defer func() { verifier.mux.Lock() @@ -171,17 +154,22 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any verifier.phase = Idle }() - verifier.mux.RLock() - csRunning := verifier.changeStreamRunning - verifier.mux.RUnlock() - if !csRunning { - verifier.logger.Debug().Msg("Change stream not running; starting change stream") + if !verifier.srcChangeStreamReader.changeStreamRunning { + verifier.logger.Debug().Msg("Source change stream not running; starting change stream") - err = verifier.StartChangeStream(ctx) + err = verifier.srcChangeStreamReader.StartChangeStream(ctx) if err != nil { return errors.Wrap(err, "failed to start change stream on source") } } + if !verifier.dstChangeStreamReader.changeStreamRunning { + verifier.logger.Debug().Msg("Destination change stream not running; starting change stream") + + err = verifier.dstChangeStreamReader.StartChangeStream(ctx) + if err != nil { + return errors.Wrap(err, "failed to start change stream on destination") + } + } // Log out the verification status when initially booting up so it's easy to see the current state verificationStatus, err := verifier.GetVerificationStatus() if err != nil { @@ -242,8 +230,10 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any // It's necessary to wait for the change stream to finish before incrementing the // generation number, or the last changes will not be checked. verifier.mux.Unlock() - err := verifier.waitForChangeStream() - if err != nil { + if err = verifier.srcChangeStreamReader.Wait(); err != nil { + return err + } + if err = verifier.dstChangeStreamReader.Wait(); err != nil { return err } verifier.mux.Lock() diff --git a/internal/verifier/migration_verifier.go b/internal/verifier/migration_verifier.go index 809e4fe3..9c09bd35 100644 --- a/internal/verifier/migration_verifier.go +++ b/internal/verifier/migration_verifier.go @@ -74,6 +74,13 @@ const ( notOkSymbol = "\u2757" // heavy exclamation mark symbol ) +type clusterType string + +const ( + srcReaderType clusterType = "source" + dstReaderType clusterType = "destination" +) + var timeFormat = time.RFC3339 // Verifier is the main state for the migration verifier @@ -121,14 +128,15 @@ type Verifier struct { dstNamespaces []string nsMap map[string]string metaDBName string - srcStartAtTs *primitive.Timestamp - mux sync.RWMutex - changeStreamRunning bool - changeStreamEnderChan chan struct{} - changeStreamErrChan chan error - changeStreamDoneChan chan struct{} - lastChangeEventTime *primitive.Timestamp + mux sync.RWMutex + + srcChangeStreamReader *ChangeStreamReader + dstChangeStreamReader *ChangeStreamReader + + //ChangeStreamEnderChan chan struct{} + //ChangeStreamErrChan chan error + //ChangeStreamDoneChan chan struct{} readConcernSetting ReadConcernSetting @@ -188,15 +196,13 @@ func NewVerifier(settings VerifierSettings) *Verifier { } return &Verifier{ - phase: Idle, - numWorkers: NumWorkers, - readPreference: readpref.Primary(), - partitionSizeInBytes: 400 * 1024 * 1024, - failureDisplaySize: DefaultFailureDisplaySize, - changeStreamEnderChan: make(chan struct{}), - changeStreamErrChan: make(chan error), - changeStreamDoneChan: make(chan struct{}), - readConcernSetting: readConcern, + phase: Idle, + numWorkers: NumWorkers, + readPreference: readpref.Primary(), + partitionSizeInBytes: 400 * 1024 * 1024, + failureDisplaySize: DefaultFailureDisplaySize, + + readConcernSetting: readConcern, // This will get recreated once gen0 starts, but we want it // here in case the change streams gets an event before then. diff --git a/internal/verifier/recheck.go b/internal/verifier/recheck.go index 0f7ad651..49954580 100644 --- a/internal/verifier/recheck.go +++ b/internal/verifier/recheck.go @@ -29,6 +29,8 @@ const ( // NB: Order is important here so that, within a given generation, // sorting by _id will guarantee that all rechecks for a given // namespace appear consecutively. +// +// DatabaseName and CollectionName should be on the source. type RecheckPrimaryKey struct { Generation int `bson:"generation"` DatabaseName string `bson:"db"` From 53404bc12c29b00540fd5f63571d9b08288121ca Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:59:01 -0500 Subject: [PATCH 02/32] refactor --- internal/verifier/change_stream.go | 146 +++++++++++-------- internal/verifier/check.go | 35 +++-- internal/verifier/compare.go | 4 +- internal/verifier/migration_verifier.go | 70 ++++----- internal/verifier/migration_verifier_test.go | 12 +- internal/verifier/recheck_test.go | 2 +- internal/verifier/verification_task.go | 8 +- 7 files changed, 152 insertions(+), 125 deletions(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index 6d5faf22..f83a73e3 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -61,50 +61,67 @@ type ChangeStreamReader struct { metaClient *mongo.Client watcherClient *mongo.Client - changeStreamRunning bool - ChangeEventBatchChan chan []ParsedEvent - ChangeStreamEnderChan chan struct{} - ChangeStreamErrChan chan error - ChangeStreamDoneChan chan struct{} + changeStreamRunning bool + ChangeEventBatchChan chan []ParsedEvent + ChangeStreamWritesOffTsChan chan primitive.Timestamp + ChangeStreamErrChan chan error + ChangeStreamDoneChan chan struct{} startAtTs *primitive.Timestamp } func (verifier *Verifier) initializeChangeStreamReaders() { verifier.srcChangeStreamReader = &ChangeStreamReader{ - readerType: srcReaderType, - lastChangeEventTime: nil, - logger: verifier.logger, - namespaces: verifier.srcNamespaces, - metaDBName: verifier.metaDBName, - metaClient: verifier.metaClient, - watcherClient: verifier.srcClient, - changeStreamRunning: false, - ChangeEventBatchChan: make(chan []ParsedEvent), - ChangeStreamEnderChan: make(chan struct{}), - ChangeStreamErrChan: make(chan error), - ChangeStreamDoneChan: make(chan struct{}), - startAtTs: nil, + readerType: srcReaderType, + lastChangeEventTime: nil, + logger: verifier.logger, + namespaces: verifier.srcNamespaces, + metaDBName: verifier.metaDBName, + metaClient: verifier.metaClient, + watcherClient: verifier.srcClient, + changeStreamRunning: false, + ChangeEventBatchChan: make(chan []ParsedEvent), + ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), + ChangeStreamErrChan: make(chan error), + ChangeStreamDoneChan: make(chan struct{}), + startAtTs: nil, } verifier.dstChangeStreamReader = &ChangeStreamReader{ - readerType: dstReaderType, - lastChangeEventTime: nil, - logger: verifier.logger, - namespaces: verifier.dstNamespaces, - metaDBName: verifier.metaDBName, - metaClient: verifier.metaClient, - watcherClient: verifier.dstClient, - changeStreamRunning: false, - ChangeEventBatchChan: make(chan []ParsedEvent), - ChangeStreamEnderChan: make(chan struct{}), - ChangeStreamErrChan: make(chan error), - ChangeStreamDoneChan: make(chan struct{}), - startAtTs: nil, + readerType: dstReaderType, + lastChangeEventTime: nil, + logger: verifier.logger, + namespaces: verifier.dstNamespaces, + metaDBName: verifier.metaDBName, + metaClient: verifier.metaClient, + watcherClient: verifier.dstClient, + changeStreamRunning: false, + ChangeEventBatchChan: make(chan []ParsedEvent), + ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), + ChangeStreamErrChan: make(chan error), + ChangeStreamDoneChan: make(chan struct{}), + startAtTs: nil, } } +// StartChangeEventHandler starts a goroutine that handles change event batches from the reader. +func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *ChangeStreamReader) { + go func() { + for { + select { + case <-ctx.Done(): + return + case batch := <-reader.ChangeEventBatchChan: + err := verifier.HandleChangeStreamEvents(ctx, batch, reader.readerType) + if err != nil { + reader.ChangeStreamErrChan <- err + } + } + } + }() +} + // HandleChangeStreamEvents performs the necessary work for change stream events after receiving a batch. -func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch []ParsedEvent) error { +func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch []ParsedEvent, eventOrigin clusterType) error { if len(batch) == 0 { return nil } @@ -126,8 +143,33 @@ func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch [] if err := verifier.eventRecorder.AddEvent(&changeEvent); err != nil { return errors.Wrapf(err, "failed to augment stats with change event (%+v)", changeEvent) } - dbNames[i] = changeEvent.Ns.DB - collNames[i] = changeEvent.Ns.Coll + + var srcDBName, srcCollName string + + // Recheck Docs are keyed by source namespaces. + // We need to retrieve the source namespaces if change events are from the destination. + switch eventOrigin { + case dstReaderType: + if len(verifier.dstSrcNsMap) == 0 { + srcDBName = changeEvent.Ns.DB + srcCollName = changeEvent.Ns.Coll + } else { + dstNs := fmt.Sprintf("%s.%s", changeEvent.Ns.DB, changeEvent.Ns.Coll) + srcNs, exist := verifier.dstSrcNsMap[dstNs] + if !exist { + return errors.Errorf("no source namespace corresponding to the destination namepsace %s", dstNs) + } + srcDBName, srcCollName = SplitNamespace(srcNs) + } + case srcReaderType: + srcDBName = changeEvent.Ns.DB + srcCollName = changeEvent.Ns.Coll + default: + return errors.Errorf("unknown event origin: %s", eventOrigin) + } + + dbNames[i] = srcDBName + collNames[i] = srcCollName docIDs[i] = changeEvent.DocKey.ID if changeEvent.FullDocument == nil { @@ -183,7 +225,7 @@ func (csr *ChangeStreamReader) GetChangeStreamFilter() []bson.D { // the verifier will enqueue rechecks from those post-writesOff events. This // is unideal but shouldn’t impede correctness since post-writesOff events // shouldn’t really happen anyway by definition. -func (verifier *Verifier) readAndHandleOneChangeEventBatch( +func (csr *ChangeStreamReader) readAndHandleOneChangeEventBatch( ctx context.Context, cs *mongo.ChangeStream, ) error { @@ -222,11 +264,7 @@ func (verifier *Verifier) readAndHandleOneChangeEventBatch( return nil } - err := verifier.HandleChangeStreamEvents(ctx, changeEventBatch) - if err != nil { - return errors.Wrap(err, "failed to handle change events") - } - + csr.ChangeEventBatchChan <- changeEventBatch return nil } @@ -264,8 +302,8 @@ func (csr *ChangeStreamReader) iterateChangeStream(ctx context.Context, cs *mong // If the ChangeStreamEnderChan has a message, the user has indicated that // source writes are ended. This means we should exit rather than continue // reading the change stream since there should be no more events. - case writesOffTs := <-verifier.changeStreamWritesOffTsChan: - verifier.logger.Debug(). + case writesOffTs := <-csr.ChangeStreamWritesOffTsChan: + csr.logger.Debug(). Interface("writesOffTimestamp", writesOffTs). Msg("Change stream thread received writesOff timestamp. Finalizing change stream.") @@ -282,7 +320,7 @@ func (csr *ChangeStreamReader) iterateChangeStream(ctx context.Context, cs *mong } if curTs == writesOffTs || curTs.After(writesOffTs) { - verifier.logger.Debug(). + csr.logger.Debug(). Interface("currentTimestamp", curTs). Interface("writesOffTimestamp", writesOffTs). Msg("Change stream has reached the writesOff timestamp. Shutting down.") @@ -290,7 +328,7 @@ func (csr *ChangeStreamReader) iterateChangeStream(ctx context.Context, cs *mong break } - err = verifier.readAndHandleOneChangeEventBatch(ctx, cs) + err = csr.readAndHandleOneChangeEventBatch(ctx, cs) if err != nil { break @@ -298,7 +336,7 @@ func (csr *ChangeStreamReader) iterateChangeStream(ctx context.Context, cs *mong } default: - err = verifier.readAndHandleOneChangeEventBatch(ctx, cs) + err = csr.readAndHandleOneChangeEventBatch(ctx, cs) if err == nil { err = persistResumeTokenIfNeeded() @@ -451,24 +489,6 @@ func (csr *ChangeStreamReader) resumeTokenDocID() string { } } -func (csr *ChangeStreamReader) Wait() error { - if csr.changeStreamRunning { - csr.logger.Debug().Msgf("%s still running, signalling that writes are done and waiting for change stream to exit", csr) - csr.ChangeStreamEnderChan <- struct{}{} - select { - case err := <-csr.ChangeStreamErrChan: - csr.logger.Warn().Err(err). - Msg("Received error from change stream.") - return err - case <-csr.ChangeEventBatchChan: - csr.logger.Debug(). - Msg("Received completion signal from change stream.") - break - } - } - return nil -} - func (csr *ChangeStreamReader) persistChangeStreamResumeToken(ctx context.Context, cs *mongo.ChangeStream) error { token := cs.ResumeToken() diff --git a/internal/verifier/check.go b/internal/verifier/check.go index 9ba33609..183d975b 100644 --- a/internal/verifier/check.go +++ b/internal/verifier/check.go @@ -40,17 +40,17 @@ func (verifier *Verifier) Check(ctx context.Context, filter map[string]any) { verifier.MaybeStartPeriodicHeapProfileCollection(ctx) } -func (verifier *Verifier) waitForChangeStream(ctx context.Context) error { +func (verifier *Verifier) waitForChangeStream(ctx context.Context, csr *ChangeStreamReader) error { select { case <-ctx.Done(): return ctx.Err() - case err := <-verifier.changeStreamErrChan: + case err := <-csr.ChangeStreamErrChan: verifier.logger.Warn().Err(err). - Msg("Received error from change stream.") + Msgf("Received error from %s.", csr) return err - case <-verifier.changeStreamDoneChan: + case <-csr.ChangeStreamDoneChan: verifier.logger.Debug(). - Msg("Received completion signal from change stream.") + Msgf("Received completion signal from %s.", csr) break } @@ -87,10 +87,10 @@ func (verifier *Verifier) CheckWorker(ctx context.Context) error { select { case err := <-verifier.srcChangeStreamReader.ChangeStreamErrChan: cancel() - return err + return errors.Wrapf(err, "got an error from %s", verifier.srcChangeStreamReader) case err := <-verifier.dstChangeStreamReader.ChangeStreamErrChan: cancel() - return err + return errors.Wrapf(err, "got an error from %s", verifier.dstChangeStreamReader) case <-ctx.Done(): cancel() return nil @@ -170,27 +170,30 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any verifier.phase = Idle }() - verifier.mux.RLock() - csRunning := verifier.changeStreamRunning - verifier.mux.RUnlock() - if csRunning { - verifier.logger.Debug().Msg("Check: Change stream already running.") + if verifier.srcChangeStreamReader.changeStreamRunning { + verifier.logger.Debug().Msg("Check: Source change stream already running.") } else { - verifier.logger.Debug().Msg("Change stream not running; starting change stream") + verifier.logger.Debug().Msg("Source change stream not running; starting change stream") err = verifier.srcChangeStreamReader.StartChangeStream(ctx) if err != nil { return errors.Wrap(err, "failed to start change stream on source") } + verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) } - if !verifier.dstChangeStreamReader.changeStreamRunning { + + if verifier.dstChangeStreamReader.changeStreamRunning { + verifier.logger.Debug().Msg("Check: Destination change stream already running.") + } else { verifier.logger.Debug().Msg("Destination change stream not running; starting change stream") err = verifier.dstChangeStreamReader.StartChangeStream(ctx) if err != nil { return errors.Wrap(err, "failed to start change stream on destination") } + verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader) } + // Log out the verification status when initially booting up so it's easy to see the current state verificationStatus, err := verifier.GetVerificationStatus() if err != nil { @@ -251,10 +254,10 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any // It's necessary to wait for the change stream to finish before incrementing the // generation number, or the last changes will not be checked. verifier.mux.Unlock() - if err = verifier.srcChangeStreamReader.Wait(); err != nil { + if err = verifier.waitForChangeStream(ctx, verifier.srcChangeStreamReader); err != nil { return err } - if err = verifier.dstChangeStreamReader.Wait(); err != nil { + if err = verifier.waitForChangeStream(ctx, verifier.dstChangeStreamReader); err != nil { return err } verifier.mux.Lock() diff --git a/internal/verifier/compare.go b/internal/verifier/compare.go index d6a50324..8e88b243 100644 --- a/internal/verifier/compare.go +++ b/internal/verifier/compare.go @@ -267,7 +267,7 @@ func (verifier *Verifier) getFetcherChannels( ctx, verifier.srcClientCollection(task), verifier.srcBuildInfo, - verifier.srcStartAtTs, + verifier.srcChangeStreamReader.startAtTs, task, ) @@ -291,7 +291,7 @@ func (verifier *Verifier) getFetcherChannels( ctx, verifier.dstClientCollection(task), verifier.dstBuildInfo, - nil, //startAtTs + verifier.dstChangeStreamReader.startAtTs, task, ) diff --git a/internal/verifier/migration_verifier.go b/internal/verifier/migration_verifier.go index 6d5c38b9..faebc829 100644 --- a/internal/verifier/migration_verifier.go +++ b/internal/verifier/migration_verifier.go @@ -124,17 +124,14 @@ type Verifier struct { srcNamespaces []string dstNamespaces []string - nsMap map[string]string + srcDstNsMap map[string]string + dstSrcNsMap map[string]string metaDBName string mux sync.RWMutex - changeStreamWritesOffTsChan chan primitive.Timestamp - srcChangeStreamReader *ChangeStreamReader - dstChangeStreamReader *ChangeStreamReader - - lastChangeEventTime *primitive.Timestamp - writesOffTimestamp *primitive.Timestamp + srcChangeStreamReader *ChangeStreamReader + dstChangeStreamReader *ChangeStreamReader readConcernSetting ReadConcernSetting @@ -196,12 +193,11 @@ func NewVerifier(settings VerifierSettings) *Verifier { } return &Verifier{ - phase: Idle, - numWorkers: NumWorkers, - readPreference: readpref.Primary(), - partitionSizeInBytes: 400 * 1024 * 1024, - failureDisplaySize: DefaultFailureDisplaySize, - changeStreamWritesOffTsChan: make(chan primitive.Timestamp), + phase: Idle, + numWorkers: NumWorkers, + readPreference: readpref.Primary(), + partitionSizeInBytes: 400 * 1024 * 1024, + failureDisplaySize: DefaultFailureDisplaySize, readConcernSetting: readConcern, @@ -242,33 +238,38 @@ func (verifier *Verifier) WritesOff(ctx context.Context) error { Msg("WritesOff called.") verifier.mux.Lock() + if verifier.writesOff { + verifier.mux.Unlock() + return errors.New("writesOff already set") + } verifier.writesOff = true + verifier.mux.Unlock() - if verifier.writesOffTimestamp == nil { - verifier.logger.Debug().Msg("Change stream still running. Signalling that writes are done.") - - finalTs, err := GetNewClusterTime( - ctx, - verifier.logger, - verifier.srcClient, - ) + verifier.logger.Debug().Msg("Signalling that writes are done.") - if err != nil { - return errors.Wrapf(err, "failed to fetch source's cluster time") - } + srcFinalTs, err := GetNewClusterTime( + ctx, + verifier.logger, + verifier.srcClient, + ) - verifier.writesOffTimestamp = &finalTs + if err != nil { + return errors.Wrapf(err, "failed to fetch source's cluster time") + } - verifier.mux.Unlock() + dstFinalTs, err := GetNewClusterTime( + ctx, + verifier.logger, + verifier.dstClient, + ) - // This has to happen under the lock because the change stream - // might be inserting docs into the recheck queue, which happens - // under the lock. - verifier.changeStreamWritesOffTsChan <- finalTs - } else { - verifier.mux.Unlock() + if err != nil { + return errors.Wrapf(err, "failed to fetch destination's cluster time") } + verifier.srcChangeStreamReader.ChangeStreamWritesOffTsChan <- srcFinalTs + verifier.dstChangeStreamReader.ChangeStreamWritesOffTsChan <- dstFinalTs + return nil } @@ -362,12 +363,13 @@ func (verifier *Verifier) SetDstNamespaces(arg []string) { } func (verifier *Verifier) SetNamespaceMap() { - verifier.nsMap = make(map[string]string) + verifier.srcDstNsMap = make(map[string]string) if len(verifier.dstNamespaces) == 0 { return } for i, ns := range verifier.srcNamespaces { - verifier.nsMap[ns] = verifier.dstNamespaces[i] + verifier.srcDstNsMap[ns] = verifier.dstNamespaces[i] + verifier.dstSrcNsMap[verifier.dstNamespaces[i]] = ns } } diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index d6e9d8e5..7e482d49 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -152,6 +152,7 @@ func (suite *IntegrationTestSuite) TestGetNamespaceStatistics_Recheck() { ID: "heyhey", }, }}, + srcReaderType, ) suite.Require().NoError(err) @@ -167,6 +168,7 @@ func (suite *IntegrationTestSuite) TestGetNamespaceStatistics_Recheck() { ID: "hoohoo", }, }}, + srcReaderType, ) suite.Require().NoError(err) @@ -408,19 +410,19 @@ func (suite *IntegrationTestSuite) TestFailedVerificationTaskInsertions() { }, } - err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}) + err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) suite.Require().NoError(err) event.OpType = "insert" - err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}) + err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) suite.Require().NoError(err) event.OpType = "replace" - err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}) + err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) suite.Require().NoError(err) event.OpType = "update" - err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}) + err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) suite.Require().NoError(err) event.OpType = "flibbity" - err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}) + err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) badEventErr := UnknownEventError{} suite.Require().ErrorAs(err, &badEventErr) suite.Assert().Equal("flibbity", badEventErr.Event.OpType) diff --git a/internal/verifier/recheck_test.go b/internal/verifier/recheck_test.go index e4b14b50..60e18b03 100644 --- a/internal/verifier/recheck_test.go +++ b/internal/verifier/recheck_test.go @@ -52,7 +52,7 @@ func (suite *IntegrationTestSuite) TestFailedCompareThenReplace() { FullDocument: testutil.MustMarshal(bson.D{{"foo", 1}}), } - err := verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}) + err := verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) suite.Require().NoError(err) recheckDocs = suite.fetchRecheckDocs(ctx, verifier) diff --git a/internal/verifier/verification_task.go b/internal/verifier/verification_task.go index dc1f4af9..911a559e 100644 --- a/internal/verifier/verification_task.go +++ b/internal/verifier/verification_task.go @@ -94,9 +94,9 @@ func (verifier *Verifier) insertCollectionVerificationTask( generation int) (*VerificationTask, error) { dstNamespace := srcNamespace - if len(verifier.nsMap) != 0 { + if len(verifier.srcDstNsMap) != 0 { var ok bool - dstNamespace, ok = verifier.nsMap[srcNamespace] + dstNamespace, ok = verifier.srcDstNsMap[srcNamespace] if !ok { return nil, fmt.Errorf("Could not find Namespace %s", srcNamespace) } @@ -155,9 +155,9 @@ func (verifier *Verifier) InsertPartitionVerificationTask(partition *partitions. func (verifier *Verifier) InsertDocumentRecheckTask(ids []interface{}, dataSize types.ByteCount, srcNamespace string) error { dstNamespace := srcNamespace - if len(verifier.nsMap) != 0 { + if len(verifier.srcDstNsMap) != 0 { var ok bool - dstNamespace, ok = verifier.nsMap[srcNamespace] + dstNamespace, ok = verifier.srcDstNsMap[srcNamespace] if !ok { return fmt.Errorf("Could not find Namespace %s", srcNamespace) } From 24d3881804b4cd08385b1161a337c55ca10d1ec4 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:09:04 -0500 Subject: [PATCH 03/32] Update change_stream_test.go --- internal/verifier/change_stream_test.go | 40 ++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 1f23ad32..9e3207f2 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -17,7 +17,7 @@ func TestChangeStreamFilter(t *testing.T) { verifier := Verifier{} verifier.SetMetaDBName("metadb") require.Equal(t, []bson.D{{{"$match", bson.D{{"ns.db", bson.D{{"$ne", "metadb"}}}}}}}, - verifier.GetChangeStreamFilter(verifier.srcNamespaces)) + verifier.srcChangeStreamReader.GetChangeStreamFilter()) verifier.srcNamespaces = []string{"foo.bar", "foo.baz", "test.car", "test.chaz"} require.Equal(t, []bson.D{ {{"$match", bson.D{ @@ -28,7 +28,7 @@ func TestChangeStreamFilter(t *testing.T) { bson.D{{"ns", bson.D{{"db", "test"}, {"coll", "chaz"}}}}, }}, }}}, - }, verifier.GetChangeStreamFilter(verifier.srcNamespaces)) + }, verifier.srcChangeStreamReader.GetChangeStreamFilter()) } // TestChangeStreamResumability creates a verifier, starts its change stream, @@ -39,7 +39,7 @@ func (suite *IntegrationTestSuite) TestChangeStreamResumability() { verifier1 := suite.BuildVerifier() ctx, cancel := context.WithCancel(context.Background()) defer cancel() - err := verifier1.StartChangeStream(ctx) + err := verifier1.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) }() @@ -64,13 +64,13 @@ func (suite *IntegrationTestSuite) TestChangeStreamResumability() { newTime := suite.getClusterTime(ctx, suite.srcMongoClient) - err = verifier2.StartChangeStream(ctx) + err = verifier2.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) - suite.Require().NotNil(verifier2.srcStartAtTs) + suite.Require().NotNil(verifier2.srcChangeStreamReader.startAtTs) suite.Assert().False( - verifier2.srcStartAtTs.After(newTime), + verifier2.srcChangeStreamReader.startAtTs.After(newTime), "verifier2's change stream should be no later than this new session", ) @@ -139,12 +139,12 @@ func (suite *IntegrationTestSuite) TestStartAtTimeNoChanges() { suite.Require().NoError(err) origStartTs := sess.OperationTime() suite.Require().NotNil(origStartTs) - err = verifier.StartChangeStream(ctx) + err = verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) - suite.Require().Equal(verifier.srcStartAtTs, origStartTs) - verifier.changeStreamWritesOffTsChan <- *origStartTs - <-verifier.changeStreamDoneChan - suite.Require().Equal(verifier.srcStartAtTs, origStartTs) + suite.Require().Equal(verifier.srcChangeStreamReader.startAtTs, origStartTs) + verifier.srcChangeStreamReader.ChangeStreamWritesOffTsChan <- *origStartTs + <-verifier.srcChangeStreamReader.ChangeStreamDoneChan + suite.Require().Equal(verifier.srcChangeStreamReader.startAtTs, origStartTs) } func (suite *IntegrationTestSuite) TestStartAtTimeWithChanges() { @@ -160,9 +160,9 @@ func (suite *IntegrationTestSuite) TestStartAtTimeWithChanges() { origSessionTime := sess.OperationTime() suite.Require().NotNil(origSessionTime) - err = verifier.StartChangeStream(ctx) + err = verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) - suite.Require().Equal(verifier.srcStartAtTs, origSessionTime) + suite.Require().Equal(verifier.srcChangeStreamReader.startAtTs, origSessionTime) _, err = suite.srcMongoClient.Database("testDb").Collection("testColl").InsertOne( sctx, bson.D{{"_id", 1}}) @@ -184,12 +184,12 @@ func (suite *IntegrationTestSuite) TestStartAtTimeWithChanges() { "session time after events should exceed the original", ) - verifier.changeStreamWritesOffTsChan <- *postEventsSessionTime - <-verifier.changeStreamDoneChan + verifier.srcChangeStreamReader.ChangeStreamWritesOffTsChan <- *postEventsSessionTime + <-verifier.srcChangeStreamReader.ChangeStreamDoneChan suite.Assert().Equal( *postEventsSessionTime, - *verifier.srcStartAtTs, + *verifier.srcChangeStreamReader.startAtTs, "verifier.srcStartAtTs should now be our session timestamp", ) } @@ -206,10 +206,10 @@ func (suite *IntegrationTestSuite) TestNoStartAtTime() { suite.Require().NoError(err) origStartTs := sess.OperationTime() suite.Require().NotNil(origStartTs) - err = verifier.StartChangeStream(ctx) + err = verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) - suite.Require().NotNil(verifier.srcStartAtTs) - suite.Require().LessOrEqual(origStartTs.Compare(*verifier.srcStartAtTs), 0) + suite.Require().NotNil(verifier.srcChangeStreamReader.startAtTs) + suite.Require().LessOrEqual(origStartTs.Compare(*verifier.srcChangeStreamReader.startAtTs), 0) } func (suite *IntegrationTestSuite) TestWithChangeEventsBatching() { @@ -218,7 +218,7 @@ func (suite *IntegrationTestSuite) TestWithChangeEventsBatching() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - suite.Require().NoError(verifier.StartChangeStream(ctx)) + suite.Require().NoError(verifier.srcChangeStreamReader.StartChangeStream(ctx)) _, err := suite.srcMongoClient.Database("testDb").Collection("testColl1").InsertOne(ctx, bson.D{{"_id", 1}}) suite.Require().NoError(err) From d66f25e77139d18ea2bd07719d29c98fc8935485 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:14:35 -0500 Subject: [PATCH 04/32] Update change_stream_test.go --- internal/verifier/change_stream_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 9e3207f2..6d764e7c 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -15,6 +15,7 @@ import ( func TestChangeStreamFilter(t *testing.T) { verifier := Verifier{} + verifier.initializeChangeStreamReaders() verifier.SetMetaDBName("metadb") require.Equal(t, []bson.D{{{"$match", bson.D{{"ns.db", bson.D{{"$ne", "metadb"}}}}}}}, verifier.srcChangeStreamReader.GetChangeStreamFilter()) From 733220cfea917341f47a314adda5190eaca6f85b Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Mon, 25 Nov 2024 10:42:43 -0500 Subject: [PATCH 05/32] Update migration_verifier.go --- internal/verifier/migration_verifier.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/verifier/migration_verifier.go b/internal/verifier/migration_verifier.go index df2ec63c..6ce9ca80 100644 --- a/internal/verifier/migration_verifier.go +++ b/internal/verifier/migration_verifier.go @@ -367,6 +367,7 @@ func (verifier *Verifier) SetDstNamespaces(arg []string) { func (verifier *Verifier) SetNamespaceMap() { verifier.srcDstNsMap = make(map[string]string) + verifier.dstSrcNsMap = make(map[string]string) if len(verifier.dstNamespaces) == 0 { return } From 39af4e7bb25a64074556f041ba912e55cd176e42 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Mon, 25 Nov 2024 12:47:26 -0500 Subject: [PATCH 06/32] fix --- internal/verifier/change_stream.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index e6b048de..28698130 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -7,6 +7,7 @@ import ( "github.com/10gen/migration-verifier/internal/keystring" "github.com/10gen/migration-verifier/internal/logger" + "github.com/10gen/migration-verifier/internal/util" "github.com/pkg/errors" "github.com/rs/zerolog" "go.mongodb.org/mongo-driver/bson" @@ -60,6 +61,7 @@ type ChangeStreamReader struct { metaClient *mongo.Client watcherClient *mongo.Client + buildInfo *util.BuildInfo changeStreamRunning bool ChangeEventBatchChan chan []ParsedEvent @@ -79,6 +81,7 @@ func (verifier *Verifier) initializeChangeStreamReaders() { metaDBName: verifier.metaDBName, metaClient: verifier.metaClient, watcherClient: verifier.srcClient, + buildInfo: verifier.srcBuildInfo, changeStreamRunning: false, ChangeEventBatchChan: make(chan []ParsedEvent), ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), @@ -94,6 +97,7 @@ func (verifier *Verifier) initializeChangeStreamReaders() { metaDBName: verifier.metaDBName, metaClient: verifier.metaClient, watcherClient: verifier.dstClient, + buildInfo: verifier.dstBuildInfo, changeStreamRunning: false, ChangeEventBatchChan: make(chan []ParsedEvent), ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), @@ -385,7 +389,7 @@ func (csr *ChangeStreamReader) StartChangeStream(ctx context.Context) error { SetMaxAwaitTime(1 * time.Second). SetFullDocument(options.UpdateLookup) - if verifier.srcBuildInfo.VersionArray[0] >= 6 { + if csr.buildInfo.VersionArray[0] >= 6 { opts = opts.SetCustomPipeline(bson.M{"showExpandedEvents": true}) } From 4ff9863f8314f65c62f96214ce21cfaeefa2ca23 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Mon, 25 Nov 2024 13:22:53 -0500 Subject: [PATCH 07/32] Update integration_test_suite.go --- internal/verifier/integration_test_suite.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/verifier/integration_test_suite.go b/internal/verifier/integration_test_suite.go index 33e3a336..9ba20a18 100644 --- a/internal/verifier/integration_test_suite.go +++ b/internal/verifier/integration_test_suite.go @@ -185,6 +185,7 @@ func (suite *IntegrationTestSuite) BuildVerifier() *Verifier { ) verifier.SetLogger("stderr") verifier.SetMetaDBName(metaDBName) + verifier.initializeChangeStreamReaders() suite.Require().NoError(verifier.srcClientCollection(&task).Drop(ctx)) suite.Require().NoError(verifier.dstClientCollection(&task).Drop(ctx)) From 16224b17a763f0948cb1d2a74312a301012c1feb Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Mon, 25 Nov 2024 15:54:29 -0500 Subject: [PATCH 08/32] fix tests --- internal/verifier/change_stream_test.go | 75 ++++++++++++++++++++- internal/verifier/integration_test_suite.go | 1 - 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 5e60bb3e..c720e8d8 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -17,11 +17,13 @@ import ( func TestChangeStreamFilter(t *testing.T) { verifier := Verifier{} - verifier.initializeChangeStreamReaders() verifier.SetMetaDBName("metadb") + verifier.srcNamespaces = []string{"foo.bar", "foo.baz", "test.car", "test.chaz"} + + verifier.initializeChangeStreamReaders() + require.Equal(t, []bson.D{{{"$match", bson.D{{"ns.db", bson.D{{"$ne", "metadb"}}}}}}}, verifier.srcChangeStreamReader.GetChangeStreamFilter()) - verifier.srcNamespaces = []string{"foo.bar", "foo.baz", "test.car", "test.chaz"} require.Equal(t, []bson.D{ {{"$match", bson.D{ {"$or", bson.A{ @@ -48,6 +50,7 @@ func (suite *IntegrationTestSuite) TestChangeStreamResumability() { verifier1 := suite.BuildVerifier() ctx, cancel := context.WithCancel(context.Background()) defer cancel() + verifier1.StartChangeEventHandler(ctx, verifier1.srcChangeStreamReader) err := verifier1.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) }() @@ -73,6 +76,7 @@ func (suite *IntegrationTestSuite) TestChangeStreamResumability() { newTime := suite.getClusterTime(ctx, suite.srcMongoClient) + verifier2.StartChangeEventHandler(ctx, verifier2.srcChangeStreamReader) err = verifier2.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) @@ -148,6 +152,7 @@ func (suite *IntegrationTestSuite) TestStartAtTimeNoChanges() { suite.Require().NoError(err) origStartTs := sess.OperationTime() suite.Require().NotNil(origStartTs) + verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) err = verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) suite.Require().Equal(verifier.srcChangeStreamReader.startAtTs, origStartTs) @@ -169,6 +174,7 @@ func (suite *IntegrationTestSuite) TestStartAtTimeWithChanges() { origSessionTime := sess.OperationTime() suite.Require().NotNil(origSessionTime) + verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) err = verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) @@ -221,6 +227,7 @@ func (suite *IntegrationTestSuite) TestNoStartAtTime() { suite.Require().NoError(err) origStartTs := sess.OperationTime() suite.Require().NotNil(origStartTs) + verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) err = verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) suite.Require().NotNil(verifier.srcChangeStreamReader.startAtTs) @@ -240,6 +247,7 @@ func (suite *IntegrationTestSuite) TestWithChangeEventsBatching() { verifier := suite.BuildVerifier() + verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) suite.Require().NoError(verifier.srcChangeStreamReader.StartChangeStream(ctx)) _, err := coll1.InsertOne(ctx, bson.D{{"_id", 1}}) @@ -261,7 +269,6 @@ func (suite *IntegrationTestSuite) TestWithChangeEventsBatching() { 500*time.Millisecond, "the verifier should flush a recheck doc after a batch", ) - } func (suite *IntegrationTestSuite) TestManyInsertsBeforeWritesOff() { @@ -358,3 +365,65 @@ func (suite *IntegrationTestSuite) TestCreateForbidden() { suite.Require().ErrorAs(err, &eventErr) suite.Assert().Equal("create", eventErr.Event.OpType) } + +func (suite *IntegrationTestSuite) TestRecheckDocsWithDstChangeEvents() { + ctx := suite.Context() + + db := suite.dstMongoClient.Database("dstDB") + coll1 := db.Collection("dstColl1") + coll2 := db.Collection("dstColl2") + + for _, coll := range mslices.Of(coll1, coll2) { + suite.Require().NoError(db.CreateCollection(ctx, coll.Name())) + } + + verifier := suite.BuildVerifier() + verifier.SetSrcNamespaces([]string{"srcDB.srcColl1", "srcDB.srcColl2"}) + verifier.SetDstNamespaces([]string{"dstDB.dstColl1", "dstDB.dstColl2"}) + verifier.SetNamespaceMap() + + verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader) + suite.Require().NoError(verifier.dstChangeStreamReader.StartChangeStream(ctx)) + + _, err := coll1.InsertOne(ctx, bson.D{{"_id", 1}}) + suite.Require().NoError(err) + _, err = coll1.InsertOne(ctx, bson.D{{"_id", 2}}) + suite.Require().NoError(err) + + _, err = coll2.InsertOne(ctx, bson.D{{"_id", 1}}) + suite.Require().NoError(err) + + var rechecks []RecheckDoc + require.Eventually( + suite.T(), + func() bool { + recheckColl := verifier.verificationDatabase().Collection(recheckQueue) + cursor, err := recheckColl.Find(ctx, bson.D{}) + if errors.Is(err, mongo.ErrNoDocuments) { + return false + } + + suite.Require().NoError(err) + suite.Require().NoError(cursor.All(ctx, &rechecks)) + return len(rechecks) == 3 + }, + time.Minute, + 500*time.Millisecond, + "the verifier should flush a recheck doc after a batch", + ) + + coll1RecheckCount, coll2RecheckCount := 0, 0 + for _, recheck := range rechecks { + suite.Require().Equal("srcDB", recheck.PrimaryKey.DatabaseName) + switch recheck.PrimaryKey.CollectionName { + case "srcColl1": + coll1RecheckCount++ + case "srcColl2": + coll2RecheckCount++ + default: + suite.T().Fatalf("unknown collection name: %v", recheck.PrimaryKey.CollectionName) + } + } + suite.Require().Equal(2, coll1RecheckCount) + suite.Require().Equal(1, coll2RecheckCount) +} diff --git a/internal/verifier/integration_test_suite.go b/internal/verifier/integration_test_suite.go index 9ba20a18..ff0a3567 100644 --- a/internal/verifier/integration_test_suite.go +++ b/internal/verifier/integration_test_suite.go @@ -123,7 +123,6 @@ func (suite *IntegrationTestSuite) TearDownTest() { suite.contextCanceller(errors.Errorf("tearing down test %#q", suite.T().Name())) suite.testContext, suite.contextCanceller = nil, nil - ctx := context.Background() for _, client := range []*mongo.Client{suite.srcMongoClient, suite.dstMongoClient} { dbNames, err := client.ListDatabaseNames(ctx, bson.D{}) From a9cb0c159eb1120bf41b46f4b06f56909ee75402 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Mon, 25 Nov 2024 17:11:34 -0500 Subject: [PATCH 09/32] Update change_stream.go --- internal/verifier/change_stream.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index b6b5cd2c..1287baea 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -328,7 +328,7 @@ func (csr *ChangeStreamReader) iterateChangeStream( return ctx.Err() // If the ChangeStreamEnderChan has a message, the user has indicated that - // source writes are ended. This means we should exit rather than continue + // source and destination writes are ended. This means we should exit rather than continue // reading the change stream since there should be no more events. case writesOffTs := <-csr.ChangeStreamWritesOffTsChan: csr.logger.Debug(). @@ -337,7 +337,7 @@ func (csr *ChangeStreamReader) iterateChangeStream( gotwritesOffTimestamp = true - // Read all change events until the source reports no events. + // Read all change events until the source / destination reports no events. // (i.e., the `getMore` call returns empty) for { var curTs primitive.Timestamp @@ -454,7 +454,7 @@ func (csr *ChangeStreamReader) createChangeStream( return nil, primitive.Timestamp{}, err } - startTs, err := extractTimestampFromResumeToken(srcChangeStream.ResumeToken()) + startTs, err := extractTimestampFromResumeToken(changeStream.ResumeToken()) if err != nil { return nil, primitive.Timestamp{}, errors.Wrap(err, "failed to extract timestamp from change stream's resume token") } @@ -471,11 +471,11 @@ func (csr *ChangeStreamReader) createChangeStream( startTs = clusterTime } - return srcChangeStream, startTs, nil + return changeStream, startTs, nil } // StartChangeStream starts the change stream. -func (verifier *Verifier) StartChangeStream(ctx context.Context) error { +func (csr *ChangeStreamReader) StartChangeStream(ctx context.Context) error { // This channel holds the first change stream creation's result, whether // success or failure. Rather than using a Result we could make separate // Timestamp and error channels, but the single channel is cleaner since @@ -490,9 +490,9 @@ func (verifier *Verifier) StartChangeStream(ctx context.Context) error { err := retryer.RunForTransientErrorsOnly( ctx, - verifier.logger, + csr.logger, func(ri *retry.Info) error { - srcChangeStream, startTs, err := verifier.createChangeStream(ctx) + changeStream, startTs, err := csr.createChangeStream(ctx) if err != nil { if parentThreadWaiting { initialCreateResultChan <- mo.Err[primitive.Timestamp](err) @@ -502,7 +502,7 @@ func (verifier *Verifier) StartChangeStream(ctx context.Context) error { return err } - defer srcChangeStream.Close(ctx) + defer changeStream.Close(ctx) if parentThreadWaiting { initialCreateResultChan <- mo.Ok(startTs) @@ -510,15 +510,15 @@ func (verifier *Verifier) StartChangeStream(ctx context.Context) error { parentThreadWaiting = false } - return verifier.iterateChangeStream(ctx, ri, srcChangeStream) + return csr.iterateChangeStream(ctx, ri, changeStream) }, ) if err != nil { // NB: This failure always happens after the initial change stream // creation. - verifier.changeStreamErrChan <- err - close(verifier.changeStreamErrChan) + csr.ChangeStreamErrChan <- err + close(csr.ChangeStreamErrChan) } }() From 5bf64f60e0860117a46fbfe2f0a22e706c3f74d2 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Wed, 27 Nov 2024 13:56:40 -0500 Subject: [PATCH 10/32] Felipe's review --- internal/verifier/change_stream.go | 35 ++++++++---------- internal/verifier/change_stream_test.go | 15 +++++--- internal/verifier/check.go | 22 ++++++----- internal/verifier/migration_verifier.go | 16 ++------ internal/verifier/nsmap.go | 49 +++++++++++++++++++++++++ internal/verifier/nsmap_test.go | 38 +++++++++++++++++++ internal/verifier/recheck.go | 28 +++++++------- internal/verifier/recheck_test.go | 40 ++++++++++---------- internal/verifier/verification_task.go | 8 ++-- 9 files changed, 164 insertions(+), 87 deletions(-) create mode 100644 internal/verifier/nsmap.go create mode 100644 internal/verifier/nsmap_test.go diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index 1287baea..9122e3a9 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -59,11 +59,10 @@ type ChangeStreamReader struct { lastChangeEventTime *primitive.Timestamp logger *logger.Logger namespaces []string - metaDBName string - metaClient *mongo.Client + metaDB *mongo.Database watcherClient *mongo.Client - buildInfo *util.BuildInfo + buildInfo util.BuildInfo changeStreamRunning bool ChangeEventBatchChan chan []ParsedEvent @@ -77,35 +76,29 @@ type ChangeStreamReader struct { func (verifier *Verifier) initializeChangeStreamReaders() { verifier.srcChangeStreamReader = &ChangeStreamReader{ readerType: srcReaderType, - lastChangeEventTime: nil, logger: verifier.logger, namespaces: verifier.srcNamespaces, - metaDBName: verifier.metaDBName, - metaClient: verifier.metaClient, + metaDB: verifier.metaClient.Database(verifier.metaDBName), watcherClient: verifier.srcClient, - buildInfo: verifier.srcBuildInfo, + buildInfo: *verifier.srcBuildInfo, changeStreamRunning: false, ChangeEventBatchChan: make(chan []ParsedEvent), ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), ChangeStreamErrChan: make(chan error), ChangeStreamDoneChan: make(chan struct{}), - startAtTs: nil, } verifier.dstChangeStreamReader = &ChangeStreamReader{ readerType: dstReaderType, - lastChangeEventTime: nil, logger: verifier.logger, namespaces: verifier.dstNamespaces, - metaDBName: verifier.metaDBName, - metaClient: verifier.metaClient, + metaDB: verifier.metaClient.Database(verifier.metaDBName), watcherClient: verifier.dstClient, - buildInfo: verifier.dstBuildInfo, + buildInfo: *verifier.dstBuildInfo, changeStreamRunning: false, ChangeEventBatchChan: make(chan []ParsedEvent), ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), ChangeStreamErrChan: make(chan error), ChangeStreamDoneChan: make(chan struct{}), - startAtTs: nil, } } @@ -156,12 +149,13 @@ func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch [] // We need to retrieve the source namespaces if change events are from the destination. switch eventOrigin { case dstReaderType: - if len(verifier.dstSrcNsMap) == 0 { + if verifier.nsMap.Len() == 0 { + // Namespace is not remapped. Source namespace is the same as the destination. srcDBName = changeEvent.Ns.DB srcCollName = changeEvent.Ns.Coll } else { dstNs := fmt.Sprintf("%s.%s", changeEvent.Ns.DB, changeEvent.Ns.Coll) - srcNs, exist := verifier.dstSrcNsMap[dstNs] + srcNs, exist := verifier.nsMap.GetSrcNamespace(dstNs) if !exist { return errors.Errorf("no source namespace corresponding to the destination namepsace %s", dstNs) } @@ -214,7 +208,7 @@ func (csr *ChangeStreamReader) GetChangeStreamFilter() (pipeline mongo.Pipeline) if len(csr.namespaces) == 0 { pipeline = mongo.Pipeline{ {{"$match", bson.D{ - {"ns.db", bson.D{{"$ne", csr.metaDBName}}}, + {"ns.db", bson.D{{"$ne", csr.metaDB.Name()}}}, }}}, } } else { @@ -328,8 +322,9 @@ func (csr *ChangeStreamReader) iterateChangeStream( return ctx.Err() // If the ChangeStreamEnderChan has a message, the user has indicated that - // source and destination writes are ended. This means we should exit rather than continue - // reading the change stream since there should be no more events. + // source writes are ended and the migration tool is finished / committed. + // This means we should exit rather than continue reading the change stream + // since there should be no more events. case writesOffTs := <-csr.ChangeStreamWritesOffTsChan: csr.logger.Debug(). Interface("writesOffTimestamp", writesOffTs). @@ -337,7 +332,7 @@ func (csr *ChangeStreamReader) iterateChangeStream( gotwritesOffTimestamp = true - // Read all change events until the source / destination reports no events. + // Read change events until the stream reaches the writesOffTs. // (i.e., the `getMore` call returns empty) for { var curTs primitive.Timestamp @@ -543,7 +538,7 @@ func addTimestampToLogEvent(ts primitive.Timestamp, event *zerolog.Event) *zerol } func (csr *ChangeStreamReader) getChangeStreamMetadataCollection() *mongo.Collection { - return csr.metaClient.Database(csr.metaDBName).Collection(metadataChangeStreamCollectionName) + return csr.metaDB.Collection(metadataChangeStreamCollectionName) } func (csr *ChangeStreamReader) loadChangeStreamResumeToken(ctx context.Context) (bson.Raw, error) { diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 5d186ce8..7455c3b9 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -463,7 +463,10 @@ func (suite *IntegrationTestSuite) TestCreateForbidden() { func (suite *IntegrationTestSuite) TestRecheckDocsWithDstChangeEvents() { ctx := suite.Context() - db := suite.dstMongoClient.Database("dstDB") + srcDBName := suite.DBNameForTest() + "_src" + dstDBName := suite.DBNameForTest() + "_dst" + + db := suite.dstMongoClient.Database(dstDBName) coll1 := db.Collection("dstColl1") coll2 := db.Collection("dstColl2") @@ -472,8 +475,8 @@ func (suite *IntegrationTestSuite) TestRecheckDocsWithDstChangeEvents() { } verifier := suite.BuildVerifier() - verifier.SetSrcNamespaces([]string{"srcDB.srcColl1", "srcDB.srcColl2"}) - verifier.SetDstNamespaces([]string{"dstDB.dstColl1", "dstDB.dstColl2"}) + verifier.SetSrcNamespaces([]string{srcDBName + ".srcColl1", srcDBName + ".srcColl2"}) + verifier.SetDstNamespaces([]string{dstDBName + ".dstColl1", dstDBName + ".dstColl2"}) verifier.SetNamespaceMap() verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader) @@ -508,14 +511,14 @@ func (suite *IntegrationTestSuite) TestRecheckDocsWithDstChangeEvents() { coll1RecheckCount, coll2RecheckCount := 0, 0 for _, recheck := range rechecks { - suite.Require().Equal("srcDB", recheck.PrimaryKey.DatabaseName) - switch recheck.PrimaryKey.CollectionName { + suite.Require().Equal(srcDBName, recheck.PrimaryKey.SrcDatabaseName) + switch recheck.PrimaryKey.SrcCollectionName { case "srcColl1": coll1RecheckCount++ case "srcColl2": coll2RecheckCount++ default: - suite.T().Fatalf("unknown collection name: %v", recheck.PrimaryKey.CollectionName) + suite.T().Fatalf("unknown collection name: %v", recheck.PrimaryKey.SrcCollectionName) } } suite.Require().Equal(2, coll1RecheckCount) diff --git a/internal/verifier/check.go b/internal/verifier/check.go index 6caf0ed4..c1129841 100644 --- a/internal/verifier/check.go +++ b/internal/verifier/check.go @@ -87,10 +87,10 @@ func (verifier *Verifier) CheckWorker(ctx context.Context) error { select { case err := <-verifier.srcChangeStreamReader.ChangeStreamErrChan: cancel() - return errors.Wrapf(err, "got an error from %s", verifier.srcChangeStreamReader) + return errors.Wrapf(err, "%s failed", verifier.srcChangeStreamReader) case err := <-verifier.dstChangeStreamReader.ChangeStreamErrChan: cancel() - return errors.Wrapf(err, "got an error from %s", verifier.dstChangeStreamReader) + return errors.Wrapf(err, "%s failed", verifier.dstChangeStreamReader) case <-ctx.Done(): cancel() return nil @@ -170,16 +170,18 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any verifier.phase = Idle }() - if verifier.srcChangeStreamReader.changeStreamRunning { - verifier.logger.Debug().Msg("Check: Source change stream already running.") - } else { - verifier.logger.Debug().Msg("Source change stream not running; starting change stream") + for _, csReader := range []*ChangeStreamReader{verifier.srcChangeStreamReader, verifier.dstChangeStreamReader} { + if csReader.changeStreamRunning { + verifier.logger.Debug().Msgf("Check: %s already running.", csReader) + } else { + verifier.logger.Debug().Msgf("%s not running; starting change stream", csReader) - err = verifier.srcChangeStreamReader.StartChangeStream(ctx) - if err != nil { - return errors.Wrap(err, "failed to start change stream on source") + err = csReader.StartChangeStream(ctx) + if err != nil { + return errors.Wrapf(err, "failed to start %s", csReader) + } + verifier.StartChangeEventHandler(ctx, csReader) } - verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) } if verifier.dstChangeStreamReader.changeStreamRunning { diff --git a/internal/verifier/migration_verifier.go b/internal/verifier/migration_verifier.go index 6134a960..b1d0e72d 100644 --- a/internal/verifier/migration_verifier.go +++ b/internal/verifier/migration_verifier.go @@ -128,8 +128,7 @@ type Verifier struct { srcNamespaces []string dstNamespaces []string - srcDstNsMap map[string]string - dstSrcNsMap map[string]string + nsMap *NSMap metaDBName string mux sync.RWMutex @@ -271,7 +270,7 @@ func (verifier *Verifier) WritesOff(ctx context.Context) error { return errors.Wrapf(err, "failed to fetch destination's cluster time") } - // This has to happen under the lock because the change stream + // This has to happen outside the lock because the change stream // might be inserting docs into the recheck queue, which happens // under the lock. select { @@ -391,15 +390,8 @@ func (verifier *Verifier) SetDstNamespaces(arg []string) { } func (verifier *Verifier) SetNamespaceMap() { - verifier.srcDstNsMap = make(map[string]string) - verifier.dstSrcNsMap = make(map[string]string) - if len(verifier.dstNamespaces) == 0 { - return - } - for i, ns := range verifier.srcNamespaces { - verifier.srcDstNsMap[ns] = verifier.dstNamespaces[i] - verifier.dstSrcNsMap[verifier.dstNamespaces[i]] = ns - } + verifier.nsMap = NewNSMap() + verifier.nsMap.PopulateWithNamespaces(verifier.srcNamespaces, verifier.dstNamespaces) } func (verifier *Verifier) SetMetaDBName(arg string) { diff --git a/internal/verifier/nsmap.go b/internal/verifier/nsmap.go new file mode 100644 index 00000000..ebf52777 --- /dev/null +++ b/internal/verifier/nsmap.go @@ -0,0 +1,49 @@ +package verifier + +type NSMap struct { + srcDstNsMap map[string]string + dstSrcNsMap map[string]string +} + +func NewNSMap() *NSMap { + return &NSMap{ + srcDstNsMap: make(map[string]string), + dstSrcNsMap: make(map[string]string), + } +} + +func (nsmap *NSMap) PopulateWithNamespaces(srcNamespaces []string, dstNamespaces []string) { + if len(srcNamespaces) != len(dstNamespaces) { + panic("source and destination namespaces are not the same length") + } + + for i, srcNs := range srcNamespaces { + dstNs := dstNamespaces[i] + if _, exist := nsmap.srcDstNsMap[srcNs]; exist { + panic("another mapping already exists for source namespace " + srcNs) + } + if _, exist := nsmap.dstSrcNsMap[dstNs]; exist { + panic("another mapping already exists for destination namespace " + dstNs) + } + nsmap.srcDstNsMap[srcNs] = dstNs + nsmap.dstSrcNsMap[dstNs] = srcNs + } +} + +func (nsmap *NSMap) Len() int { + if len(nsmap.srcDstNsMap) != len(nsmap.dstSrcNsMap) { + panic("source and destination namespaces are not the same length") + } + + return len(nsmap.srcDstNsMap) +} + +func (nsmap *NSMap) GetDstNamespace(srcNamespace string) (string, bool) { + ns, ok := nsmap.srcDstNsMap[srcNamespace] + return ns, ok +} + +func (nsmap *NSMap) GetSrcNamespace(dstNamespace string) (string, bool) { + ns, ok := nsmap.dstSrcNsMap[dstNamespace] + return ns, ok +} diff --git a/internal/verifier/nsmap_test.go b/internal/verifier/nsmap_test.go new file mode 100644 index 00000000..6094e755 --- /dev/null +++ b/internal/verifier/nsmap_test.go @@ -0,0 +1,38 @@ +package verifier + +import ( + "github.com/stretchr/testify/suite" + "testing" +) + +type UnitTestSuite struct { + suite.Suite +} + +func TestUnitTestSuite(t *testing.T) { + ts := new(UnitTestSuite) + suite.Run(t, ts) +} + +func (s *UnitTestSuite) Test_EmptyNsMap() { + nsMap := NewNSMap() + srcNamespaces := []string{"srcDB.A", "srcDB.B"} + dstNamespaces := []string{"dstDB.B", "dstDB.A"} + nsMap.PopulateWithNamespaces(srcNamespaces, dstNamespaces) + s.Require().Equal(2, nsMap.Len()) + + _, ok := nsMap.GetDstNamespace("non-existent.coll") + s.Require().False(ok) + + for i, srcNs := range srcNamespaces { + gotNs, ok := nsMap.GetDstNamespace(srcNs) + s.Require().True(ok) + s.Require().Equal(dstNamespaces[i], gotNs) + } + + for i, dstNs := range dstNamespaces { + gotNs, ok := nsMap.GetSrcNamespace(dstNs) + s.Require().True(ok) + s.Require().Equal(srcNamespaces[i], gotNs) + } +} diff --git a/internal/verifier/recheck.go b/internal/verifier/recheck.go index 49954580..b2c47f86 100644 --- a/internal/verifier/recheck.go +++ b/internal/verifier/recheck.go @@ -24,18 +24,16 @@ const ( // RecheckPrimaryKey stores the implicit type of recheck to perform // Currently, we only handle document mismatches/change stream updates, -// so DatabaseName, CollectionName, and DocumentID must always be specified. +// so SrcDatabaseName, SrcCollectionName, and DocumentID must always be specified. // // NB: Order is important here so that, within a given generation, // sorting by _id will guarantee that all rechecks for a given // namespace appear consecutively. -// -// DatabaseName and CollectionName should be on the source. type RecheckPrimaryKey struct { - Generation int `bson:"generation"` - DatabaseName string `bson:"db"` - CollectionName string `bson:"coll"` - DocumentID interface{} `bson:"docID"` + Generation int `bson:"generation"` + SrcDatabaseName string `bson:"db"` + SrcCollectionName string `bson:"coll"` + DocumentID interface{} `bson:"docID"` } // RecheckDoc stores the necessary information to know which documents must be rechecked. @@ -109,10 +107,10 @@ func (verifier *Verifier) insertRecheckDocs( models := make([]mongo.WriteModel, len(curThreadIndexes)) for m, i := range curThreadIndexes { pk := RecheckPrimaryKey{ - Generation: generation, - DatabaseName: dbNames[i], - CollectionName: collNames[i], - DocumentID: documentIDs[i], + Generation: generation, + SrcDatabaseName: dbNames[i], + SrcCollectionName: collNames[i], + DocumentID: documentIDs[i], } // The filter must exclude DataSize; otherwise, if a failed comparison @@ -302,8 +300,8 @@ func (verifier *Verifier) GenerateRecheckTasks(ctx context.Context) error { // - the buffered document IDs’ size exceeds the per-task maximum // - the buffered documents exceed the partition size // - if doc.PrimaryKey.DatabaseName != prevDBName || - doc.PrimaryKey.CollectionName != prevCollName || + if doc.PrimaryKey.SrcDatabaseName != prevDBName || + doc.PrimaryKey.SrcCollectionName != prevCollName || int64(len(idAccum)) > maxDocsPerTask || idLenAccum >= maxIdsPerRecheckTask || dataSizeAccum >= verifier.partitionSizeInBytes { @@ -313,8 +311,8 @@ func (verifier *Verifier) GenerateRecheckTasks(ctx context.Context) error { return err } - prevDBName = doc.PrimaryKey.DatabaseName - prevCollName = doc.PrimaryKey.CollectionName + prevDBName = doc.PrimaryKey.SrcDatabaseName + prevCollName = doc.PrimaryKey.SrcCollectionName idLenAccum = 0 dataSizeAccum = 0 idAccum = idAccum[:0] diff --git a/internal/verifier/recheck_test.go b/internal/verifier/recheck_test.go index 60e18b03..f5a67772 100644 --- a/internal/verifier/recheck_test.go +++ b/internal/verifier/recheck_test.go @@ -28,10 +28,10 @@ func (suite *IntegrationTestSuite) TestFailedCompareThenReplace() { []RecheckDoc{ { PrimaryKey: RecheckPrimaryKey{ - Generation: verifier.generation, - DatabaseName: "the", - CollectionName: "namespace", - DocumentID: "theDocID", + Generation: verifier.generation, + SrcDatabaseName: "the", + SrcCollectionName: "namespace", + DocumentID: "theDocID", }, DataSize: 1234, }, @@ -60,10 +60,10 @@ func (suite *IntegrationTestSuite) TestFailedCompareThenReplace() { []RecheckDoc{ { PrimaryKey: RecheckPrimaryKey{ - Generation: verifier.generation, - DatabaseName: "the", - CollectionName: "namespace", - DocumentID: "theDocID", + Generation: verifier.generation, + SrcDatabaseName: "the", + SrcCollectionName: "namespace", + DocumentID: "theDocID", }, DataSize: len(event.FullDocument), }, @@ -101,10 +101,10 @@ func (suite *IntegrationTestSuite) TestLargeIDInsertions() { d1 := RecheckDoc{ PrimaryKey: RecheckPrimaryKey{ - Generation: 0, - DatabaseName: "testDB", - CollectionName: "testColl", - DocumentID: id1, + Generation: 0, + SrcDatabaseName: "testDB", + SrcCollectionName: "testColl", + DocumentID: id1, }, DataSize: overlyLarge} d2 := d1 @@ -161,10 +161,10 @@ func (suite *IntegrationTestSuite) TestLargeDataInsertions() { suite.Require().NoError(err) d1 := RecheckDoc{ PrimaryKey: RecheckPrimaryKey{ - Generation: 0, - DatabaseName: "testDB", - CollectionName: "testColl", - DocumentID: id1, + Generation: 0, + SrcDatabaseName: "testDB", + SrcCollectionName: "testColl", + DocumentID: id1, }, DataSize: dataSizes[0]} d2 := d1 @@ -283,10 +283,10 @@ func (suite *IntegrationTestSuite) TestGenerationalClear() { d1 := RecheckDoc{ PrimaryKey: RecheckPrimaryKey{ - Generation: 0, - DatabaseName: "testDB", - CollectionName: "testColl", - DocumentID: id1, + Generation: 0, + SrcDatabaseName: "testDB", + SrcCollectionName: "testColl", + DocumentID: id1, }, DataSize: dataSizes[0]} d2 := d1 diff --git a/internal/verifier/verification_task.go b/internal/verifier/verification_task.go index 911a559e..dd72c93a 100644 --- a/internal/verifier/verification_task.go +++ b/internal/verifier/verification_task.go @@ -94,9 +94,9 @@ func (verifier *Verifier) insertCollectionVerificationTask( generation int) (*VerificationTask, error) { dstNamespace := srcNamespace - if len(verifier.srcDstNsMap) != 0 { + if verifier.nsMap.Len() != 0 { var ok bool - dstNamespace, ok = verifier.srcDstNsMap[srcNamespace] + dstNamespace, ok = verifier.nsMap.GetDstNamespace(srcNamespace) if !ok { return nil, fmt.Errorf("Could not find Namespace %s", srcNamespace) } @@ -155,9 +155,9 @@ func (verifier *Verifier) InsertPartitionVerificationTask(partition *partitions. func (verifier *Verifier) InsertDocumentRecheckTask(ids []interface{}, dataSize types.ByteCount, srcNamespace string) error { dstNamespace := srcNamespace - if len(verifier.srcDstNsMap) != 0 { + if verifier.nsMap.Len() != 0 { var ok bool - dstNamespace, ok = verifier.srcDstNsMap[srcNamespace] + dstNamespace, ok = verifier.nsMap.GetDstNamespace(srcNamespace) if !ok { return fmt.Errorf("Could not find Namespace %s", srcNamespace) } From 743dd6a6c5e01a6feb81636345d024dbc90f69f5 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:30:50 -0500 Subject: [PATCH 11/32] Update change_stream_test.go --- internal/verifier/change_stream_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 53e9980e..d227daa1 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -460,8 +460,8 @@ func (suite *IntegrationTestSuite) TestCreateForbidden() { func (suite *IntegrationTestSuite) TestRecheckDocsWithDstChangeEvents() { ctx := suite.Context() - srcDBName := suite.DBNameForTest() + "_src" - dstDBName := suite.DBNameForTest() + "_dst" + srcDBName := suite.DBNameForTest("src") + dstDBName := suite.DBNameForTest("dst") db := suite.dstMongoClient.Database(dstDBName) coll1 := db.Collection("dstColl1") From cc34be478b64b978f304cd924fdbae14c00c786a Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:41:27 -0500 Subject: [PATCH 12/32] initialize nsmap --- internal/verifier/change_stream_test.go | 4 +++- internal/verifier/migration_verifier.go | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index d227daa1..39c76c76 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -21,7 +21,9 @@ import ( ) func TestChangeStreamFilter(t *testing.T) { - verifier := Verifier{} + verifier := Verifier{ + nsMap: NewNSMap(), + } verifier.SetMetaDBName("metadb") verifier.initializeChangeStreamReaders() assert.Contains(t, diff --git a/internal/verifier/migration_verifier.go b/internal/verifier/migration_verifier.go index 3aae478c..1389fe1b 100644 --- a/internal/verifier/migration_verifier.go +++ b/internal/verifier/migration_verifier.go @@ -208,6 +208,7 @@ func NewVerifier(settings VerifierSettings) *Verifier { eventRecorder: NewEventRecorder(), verificationStatusCheckInterval: 15 * time.Second, + nsMap: NewNSMap(), } } @@ -389,7 +390,6 @@ func (verifier *Verifier) SetDstNamespaces(arg []string) { } func (verifier *Verifier) SetNamespaceMap() { - verifier.nsMap = NewNSMap() verifier.nsMap.PopulateWithNamespaces(verifier.srcNamespaces, verifier.dstNamespaces) } From 4ba0eda84c71190276caa9198cc18c69fe6a1480 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:57:20 -0500 Subject: [PATCH 13/32] fix --- internal/verifier/change_stream_test.go | 16 +++++----------- internal/verifier/migration_verifier_test.go | 5 +++-- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 39c76c76..decde993 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -3,7 +3,6 @@ package verifier import ( "context" "strings" - "testing" "time" "github.com/10gen/migration-verifier/internal/testutil" @@ -11,7 +10,6 @@ import ( "github.com/10gen/migration-verifier/mslices" "github.com/pkg/errors" "github.com/samber/lo" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" @@ -20,20 +18,16 @@ import ( "go.mongodb.org/mongo-driver/mongo/readconcern" ) -func TestChangeStreamFilter(t *testing.T) { - verifier := Verifier{ - nsMap: NewNSMap(), - } - verifier.SetMetaDBName("metadb") - verifier.initializeChangeStreamReaders() - assert.Contains(t, +func (suite *IntegrationTestSuite) TestChangeStreamFilter() { + verifier := suite.BuildVerifier() + suite.Assert().Contains( verifier.srcChangeStreamReader.GetChangeStreamFilter(), bson.D{ - {"$match", bson.D{{"ns.db", bson.D{{"$ne", "metadb"}}}}}, + {"$match", bson.D{{"ns.db", bson.D{{"$ne", metaDBName}}}}}, }, ) verifier.srcChangeStreamReader.namespaces = []string{"foo.bar", "foo.baz", "test.car", "test.chaz"} - assert.Contains(t, + suite.Assert().Contains( verifier.srcChangeStreamReader.GetChangeStreamFilter(), bson.D{{"$match", bson.D{ {"$or", []bson.D{ diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index 7e6996bc..2b967cf9 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -1387,7 +1387,7 @@ func (suite *IntegrationTestSuite) TestGenerationalRechecking() { // wait for generation to finish suite.Require().NoError(runner.AwaitGenerationEnd()) status = waitForTasks() - // there should be no failures now, since they are are equivalent at this point in time + // there should be no failures now, since they are equivalent at this point in time suite.Require().Equal(VerificationStatus{TotalTasks: 1, CompletedTasks: 1}, *status) // now insert in the source, this should come up next generation @@ -1415,7 +1415,7 @@ func (suite *IntegrationTestSuite) TestGenerationalRechecking() { suite.Require().NoError(runner.AwaitGenerationEnd()) status = waitForTasks() - // there should be no failures now, since they are are equivalent at this point in time + // there should be no failures now, since they are equivalent at this point in time suite.Assert().Equal(VerificationStatus{TotalTasks: 1, CompletedTasks: 1}, *status) // We could just abandon this verifier, but we might as well shut it down @@ -1613,6 +1613,7 @@ func (suite *IntegrationTestSuite) TestPartitionWithFilter() { // Set up the verifier for testing. verifier := suite.BuildVerifier() verifier.SetSrcNamespaces([]string{dbname + ".testColl1"}) + verifier.SetDstNamespaces([]string{dbname + ".testColl1"}) verifier.SetNamespaceMap() verifier.globalFilter = filter // Use a small partition size so that we can test creating multiple partitions. From b9d5f3797a6795a0d86884c0e543ab9a8a2dfd1c Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:21:45 -0500 Subject: [PATCH 14/32] fix generational rechecking test --- internal/verifier/migration_verifier_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index 2b967cf9..0ba849ff 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -1390,6 +1390,12 @@ func (suite *IntegrationTestSuite) TestGenerationalRechecking() { // there should be no failures now, since they are equivalent at this point in time suite.Require().Equal(VerificationStatus{TotalTasks: 1, CompletedTasks: 1}, *status) + // The next generation should process the recheck task caused by inserting {_id: 2} on the destination. + suite.Require().NoError(runner.StartNextGeneration()) + suite.Require().NoError(runner.AwaitGenerationEnd()) + status = waitForTasks() + suite.Require().Equal(VerificationStatus{TotalTasks: 1, CompletedTasks: 1}, *status) + // now insert in the source, this should come up next generation _, err = srcColl.InsertOne(ctx, bson.M{"_id": 3, "x": 44}) suite.Require().NoError(err) From 9eed4548f4ff38e6065479ee07af649fd9162dbb Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:47:35 -0500 Subject: [PATCH 15/32] Add end-to-end test --- internal/verifier/migration_verifier_test.go | 57 ++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index 0ba849ff..c0675ace 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -1553,6 +1553,63 @@ func (suite *IntegrationTestSuite) TestVerifierWithFilter() { <-checkDoneChan } +func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { + ctx := suite.Context() + collName := "mycoll" + + srcDB := suite.srcMongoClient.Database(suite.DBNameForTest()) + dstDB := suite.dstMongoClient.Database(suite.DBNameForTest()) + suite.Require().NoError(srcDB.CreateCollection(ctx, collName)) + suite.Require().NoError(dstDB.CreateCollection(ctx, collName)) + + verifier := suite.BuildVerifier() + runner := RunVerifierCheck(ctx, suite.T(), verifier) + + // Dry run generation 0 to make sure change stream reader is started. + suite.Require().NoError(runner.AwaitGenerationEnd()) + + suite.Require().NoError(runner.StartNextGeneration()) + // Insert two documents in generation 1. They should be batched and become a verify task in generation 2. + _, err := dstDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 1}}) + suite.Require().NoError(err) + _, err = dstDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 2}}) + suite.Require().NoError(err) + suite.Require().NoError(runner.AwaitGenerationEnd()) + + // Run generation 2 and get verification status. + suite.Require().NoError(runner.StartNextGeneration()) + suite.Require().NoError(runner.AwaitGenerationEnd()) + status, err := verifier.GetVerificationStatus(ctx) + suite.Require().NoError(err) + suite.Assert().Equal( + 1, + status.FailedTasks, + ) + + // Patch up only one of the two mismatched documents in generation 3. + suite.Require().NoError(runner.StartNextGeneration()) + _, err = srcDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 1}}) + suite.Require().NoError(err) + suite.Require().NoError(runner.AwaitGenerationEnd()) + status, err = verifier.GetVerificationStatus(ctx) + suite.Require().NoError(err) + suite.Assert().Equal( + 1, + status.FailedTasks, + ) + + // Patch up both of the 2 mismatched documents in generation 4. + suite.Require().NoError(runner.StartNextGeneration()) + _, err = srcDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 2}}) + suite.Require().NoError(err) + suite.Require().NoError(runner.AwaitGenerationEnd()) + + // Everything should match by the end of it. + status, err = verifier.GetVerificationStatus(ctx) + suite.Require().NoError(err) + suite.Assert().Zero(status.FailedTasks) +} + func (suite *IntegrationTestSuite) TestBackgroundInIndexSpec() { ctx := suite.Context() From 281afc04cd2c90ede859233bbef776d1a2026181 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:11:11 -0500 Subject: [PATCH 16/32] wait for change event handler --- internal/verifier/change_stream.go | 28 +++++++++++++++++-------- internal/verifier/change_stream_test.go | 16 +++++++------- internal/verifier/check.go | 4 +++- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index 9122e3a9..5218c25b 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -3,6 +3,7 @@ package verifier import ( "context" "fmt" + "golang.org/x/sync/errgroup" "time" "github.com/10gen/migration-verifier/internal/keystring" @@ -82,7 +83,6 @@ func (verifier *Verifier) initializeChangeStreamReaders() { watcherClient: verifier.srcClient, buildInfo: *verifier.srcBuildInfo, changeStreamRunning: false, - ChangeEventBatchChan: make(chan []ParsedEvent), ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), ChangeStreamErrChan: make(chan error), ChangeStreamDoneChan: make(chan struct{}), @@ -95,7 +95,6 @@ func (verifier *Verifier) initializeChangeStreamReaders() { watcherClient: verifier.dstClient, buildInfo: *verifier.dstBuildInfo, changeStreamRunning: false, - ChangeEventBatchChan: make(chan []ParsedEvent), ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), ChangeStreamErrChan: make(chan error), ChangeStreamDoneChan: make(chan struct{}), @@ -103,20 +102,24 @@ func (verifier *Verifier) initializeChangeStreamReaders() { } // StartChangeEventHandler starts a goroutine that handles change event batches from the reader. -func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *ChangeStreamReader) { - go func() { +func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *ChangeStreamReader, errGroup *errgroup.Group) { + errGroup.Go(func() error { for { select { case <-ctx.Done(): - return - case batch := <-reader.ChangeEventBatchChan: + return ctx.Err() + case batch, more := <-reader.ChangeEventBatchChan: + if !more { + // Change stream reader has closed the event batch channel because it has finished. + return nil + } err := verifier.HandleChangeStreamEvents(ctx, batch, reader.readerType) if err != nil { - reader.ChangeStreamErrChan <- err + return err } } } - }() + }) } // HandleChangeStreamEvents performs the necessary work for change stream events after receiving a batch. @@ -298,6 +301,8 @@ func (csr *ChangeStreamReader) iterateChangeStream( ) error { var lastPersistedTime time.Time + defer close(csr.ChangeEventBatchChan) + persistResumeTokenIfNeeded := func() error { if time.Since(lastPersistedTime) <= minChangeStreamPersistInterval { return nil @@ -328,7 +333,7 @@ func (csr *ChangeStreamReader) iterateChangeStream( case writesOffTs := <-csr.ChangeStreamWritesOffTsChan: csr.logger.Debug(). Interface("writesOffTimestamp", writesOffTs). - Msg("Change stream thread received writesOff timestamp. Finalizing change stream.") + Msgf("%s thread received writesOff timestamp. Finalizing change stream.", csr) gotwritesOffTimestamp = true @@ -477,6 +482,11 @@ func (csr *ChangeStreamReader) StartChangeStream(ctx context.Context) error { // there's no chance of "nonsense" like both channels returning a payload. initialCreateResultChan := make(chan mo.Result[primitive.Timestamp]) + // Change stream reader closes ChangeEventBatchChan each time after + // finish reading change events. + // It needs to be re-initialized when a change stream starts. + csr.ChangeEventBatchChan = make(chan []ParsedEvent) + go func() { retryer := retry.New(retry.DefaultDurationLimit) retryer = retryer.WithErrorCodes(util.CursorKilled) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index decde993..6526a7d6 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -2,6 +2,7 @@ package verifier import ( "context" + "golang.org/x/sync/errgroup" "strings" "time" @@ -54,7 +55,7 @@ func (suite *IntegrationTestSuite) TestChangeStreamResumability() { verifier1 := suite.BuildVerifier() ctx, cancel := context.WithCancel(suite.Context()) defer cancel() - verifier1.StartChangeEventHandler(ctx, verifier1.srcChangeStreamReader) + verifier1.StartChangeEventHandler(ctx, verifier1.srcChangeStreamReader, &errgroup.Group{}) err := verifier1.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) }() @@ -80,7 +81,7 @@ func (suite *IntegrationTestSuite) TestChangeStreamResumability() { newTime := suite.getClusterTime(ctx, suite.srcMongoClient) - verifier2.StartChangeEventHandler(ctx, verifier2.srcChangeStreamReader) + verifier2.StartChangeEventHandler(ctx, verifier2.srcChangeStreamReader, &errgroup.Group{}) err = verifier2.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) @@ -155,7 +156,7 @@ func (suite *IntegrationTestSuite) TestStartAtTimeNoChanges() { suite.Require().NoError(err) origStartTs := sess.OperationTime() suite.Require().NotNil(origStartTs) - verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) + verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader, &errgroup.Group{}) err = verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) suite.Require().Equal(verifier.srcChangeStreamReader.startAtTs, origStartTs) @@ -176,7 +177,7 @@ func (suite *IntegrationTestSuite) TestStartAtTimeWithChanges() { origSessionTime := sess.OperationTime() suite.Require().NotNil(origSessionTime) - verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) + verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader, &errgroup.Group{}) err = verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) @@ -228,7 +229,7 @@ func (suite *IntegrationTestSuite) TestNoStartAtTime() { suite.Require().NoError(err) origStartTs := sess.OperationTime() suite.Require().NotNil(origStartTs) - verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) + verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader, &errgroup.Group{}) err = verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) suite.Require().NotNil(verifier.srcChangeStreamReader.startAtTs) @@ -248,7 +249,7 @@ func (suite *IntegrationTestSuite) TestWithChangeEventsBatching() { verifier := suite.BuildVerifier() - verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) + verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader, &errgroup.Group{}) suite.Require().NoError(verifier.srcChangeStreamReader.StartChangeStream(ctx)) _, err := coll1.InsertOne(ctx, bson.D{{"_id", 1}}) @@ -389,6 +390,7 @@ func (suite *IntegrationTestSuite) testInsertsBeforeWritesOff(docsCount int) { lo.ToAnySlice(docs), ) suite.Require().NoError(err) + //fmt.Println(fmt.Sprintf("src cluster time %v", suite.getClusterTime(ctx, suite.srcMongoClient))) suite.Require().NoError(verifier.WritesOff(ctx)) @@ -472,7 +474,7 @@ func (suite *IntegrationTestSuite) TestRecheckDocsWithDstChangeEvents() { verifier.SetDstNamespaces([]string{dstDBName + ".dstColl1", dstDBName + ".dstColl2"}) verifier.SetNamespaceMap() - verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader) + verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader, &errgroup.Group{}) suite.Require().NoError(verifier.dstChangeStreamReader.StartChangeStream(ctx)) _, err := coll1.InsertOne(ctx, bson.D{{"_id", 1}}) diff --git a/internal/verifier/check.go b/internal/verifier/check.go index de9ff9ac..bdf96288 100644 --- a/internal/verifier/check.go +++ b/internal/verifier/check.go @@ -207,6 +207,7 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any verifier.phase = Idle }() + ceHandlerGroup := &errgroup.Group{} for _, csReader := range []*ChangeStreamReader{verifier.srcChangeStreamReader, verifier.dstChangeStreamReader} { if csReader.changeStreamRunning { verifier.logger.Debug().Msgf("Check: %s already running.", csReader) @@ -217,7 +218,7 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any if err != nil { return errors.Wrapf(err, "failed to start %s", csReader) } - verifier.StartChangeEventHandler(ctx, csReader) + verifier.StartChangeEventHandler(ctx, csReader, ceHandlerGroup) } } @@ -293,6 +294,7 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any if err = verifier.waitForChangeStream(ctx, verifier.dstChangeStreamReader); err != nil { return err } + ceHandlerGroup.Wait() verifier.mux.Lock() verifier.lastGeneration = true } From c96a4956103c546c0cb5024287b394498e1db058 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:27:05 -0500 Subject: [PATCH 17/32] fix race condition in tests --- internal/verifier/change_stream.go | 1 + internal/verifier/change_stream_test.go | 31 +++++++++++-------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index 5218c25b..e592cab4 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -102,6 +102,7 @@ func (verifier *Verifier) initializeChangeStreamReaders() { } // StartChangeEventHandler starts a goroutine that handles change event batches from the reader. +// It needs to be started after the reader starts. func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *ChangeStreamReader, errGroup *errgroup.Group) { errGroup.Go(func() error { for { diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 6526a7d6..52a8b29a 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -41,6 +41,12 @@ func (suite *IntegrationTestSuite) TestChangeStreamFilter() { ) } +func (suite *IntegrationTestSuite) startSrcChangeStreamReaderAndHandler(ctx context.Context, verifier *Verifier) { + err := verifier.srcChangeStreamReader.StartChangeStream(ctx) + suite.Require().NoError(err) + verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader, &errgroup.Group{}) +} + // TestChangeStreamResumability creates a verifier, starts its change stream, // terminates that verifier, updates the source cluster, starts a new // verifier with change stream, and confirms that things look as they should. @@ -55,9 +61,7 @@ func (suite *IntegrationTestSuite) TestChangeStreamResumability() { verifier1 := suite.BuildVerifier() ctx, cancel := context.WithCancel(suite.Context()) defer cancel() - verifier1.StartChangeEventHandler(ctx, verifier1.srcChangeStreamReader, &errgroup.Group{}) - err := verifier1.srcChangeStreamReader.StartChangeStream(ctx) - suite.Require().NoError(err) + suite.startSrcChangeStreamReaderAndHandler(ctx, verifier1) }() ctx, cancel := context.WithCancel(suite.Context()) @@ -81,9 +85,7 @@ func (suite *IntegrationTestSuite) TestChangeStreamResumability() { newTime := suite.getClusterTime(ctx, suite.srcMongoClient) - verifier2.StartChangeEventHandler(ctx, verifier2.srcChangeStreamReader, &errgroup.Group{}) - err = verifier2.srcChangeStreamReader.StartChangeStream(ctx) - suite.Require().NoError(err) + suite.startSrcChangeStreamReaderAndHandler(ctx, verifier2) suite.Require().NotNil(verifier2.srcChangeStreamReader.startAtTs) @@ -156,9 +158,7 @@ func (suite *IntegrationTestSuite) TestStartAtTimeNoChanges() { suite.Require().NoError(err) origStartTs := sess.OperationTime() suite.Require().NotNil(origStartTs) - verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader, &errgroup.Group{}) - err = verifier.srcChangeStreamReader.StartChangeStream(ctx) - suite.Require().NoError(err) + suite.startSrcChangeStreamReaderAndHandler(ctx, verifier) suite.Require().Equal(verifier.srcChangeStreamReader.startAtTs, origStartTs) verifier.srcChangeStreamReader.ChangeStreamWritesOffTsChan <- *origStartTs <-verifier.srcChangeStreamReader.ChangeStreamDoneChan @@ -177,9 +177,7 @@ func (suite *IntegrationTestSuite) TestStartAtTimeWithChanges() { origSessionTime := sess.OperationTime() suite.Require().NotNil(origSessionTime) - verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader, &errgroup.Group{}) - err = verifier.srcChangeStreamReader.StartChangeStream(ctx) - suite.Require().NoError(err) + suite.startSrcChangeStreamReaderAndHandler(ctx, verifier) // srcStartAtTs derives from the change stream’s resume token, which can // postdate our session time but should not precede it. @@ -229,9 +227,7 @@ func (suite *IntegrationTestSuite) TestNoStartAtTime() { suite.Require().NoError(err) origStartTs := sess.OperationTime() suite.Require().NotNil(origStartTs) - verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader, &errgroup.Group{}) - err = verifier.srcChangeStreamReader.StartChangeStream(ctx) - suite.Require().NoError(err) + suite.startSrcChangeStreamReaderAndHandler(ctx, verifier) suite.Require().NotNil(verifier.srcChangeStreamReader.startAtTs) suite.Require().LessOrEqual(origStartTs.Compare(*verifier.srcChangeStreamReader.startAtTs), 0) } @@ -249,8 +245,7 @@ func (suite *IntegrationTestSuite) TestWithChangeEventsBatching() { verifier := suite.BuildVerifier() - verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader, &errgroup.Group{}) - suite.Require().NoError(verifier.srcChangeStreamReader.StartChangeStream(ctx)) + suite.startSrcChangeStreamReaderAndHandler(ctx, verifier) _, err := coll1.InsertOne(ctx, bson.D{{"_id", 1}}) suite.Require().NoError(err) @@ -474,8 +469,8 @@ func (suite *IntegrationTestSuite) TestRecheckDocsWithDstChangeEvents() { verifier.SetDstNamespaces([]string{dstDBName + ".dstColl1", dstDBName + ".dstColl2"}) verifier.SetNamespaceMap() - verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader, &errgroup.Group{}) suite.Require().NoError(verifier.dstChangeStreamReader.StartChangeStream(ctx)) + verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader, &errgroup.Group{}) _, err := coll1.InsertOne(ctx, bson.D{{"_id", 1}}) suite.Require().NoError(err) From e89d17595bf237cf18d7d949aaca95dfb64f6895 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:35:11 -0500 Subject: [PATCH 18/32] Update change_stream.go --- internal/verifier/change_stream.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index e592cab4..7bfd4451 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -116,6 +116,7 @@ func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *C } err := verifier.HandleChangeStreamEvents(ctx, batch, reader.readerType) if err != nil { + reader.ChangeStreamErrChan <- err return err } } From f14962fdaf71beb2f6fb061a522f1548c981b69d Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:36:54 -0500 Subject: [PATCH 19/32] Update check.go --- internal/verifier/check.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/verifier/check.go b/internal/verifier/check.go index bdf96288..13cb017d 100644 --- a/internal/verifier/check.go +++ b/internal/verifier/check.go @@ -294,7 +294,9 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any if err = verifier.waitForChangeStream(ctx, verifier.dstChangeStreamReader); err != nil { return err } - ceHandlerGroup.Wait() + if err = ceHandlerGroup.Wait(); err != nil { + return err + } verifier.mux.Lock() verifier.lastGeneration = true } From be93d0583d1c380b55e1b29317b84f3298ffa4c9 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Fri, 29 Nov 2024 12:29:47 -0500 Subject: [PATCH 20/32] wait for recheck docs creation --- internal/verifier/change_stream.go | 5 ++++- internal/verifier/migration_verifier_test.go | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index 7bfd4451..854c0ebf 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -114,6 +114,7 @@ func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *C // Change stream reader has closed the event batch channel because it has finished. return nil } + verifier.logger.Trace().Msgf("Verifier is handling a change event batch from %s: %v", reader, batch) err := verifier.HandleChangeStreamEvents(ctx, batch, reader.readerType) if err != nil { reader.ChangeStreamErrChan <- err @@ -277,6 +278,8 @@ func (csr *ChangeStreamReader) readAndHandleOneChangeEventBatch( return errors.Wrapf(err, "failed to decode change event to %T", changeEventBatch[eventsRead]) } + csr.logger.Trace().Msgf("%s received a change event: %v", csr, changeEventBatch[eventsRead]) + if changeEventBatch[eventsRead].ClusterTime != nil && (csr.lastChangeEventTime == nil || csr.lastChangeEventTime.Before(*changeEventBatch[eventsRead].ClusterTime)) { @@ -606,7 +609,7 @@ func (csr *ChangeStreamReader) persistChangeStreamResumeToken(ctx context.Contex Msg("failed to extract resume token timestamp") } - logEvent.Msg("Persisted change stream resume token.") + logEvent.Msgf("Persisted %s's resume token.", csr) return nil } diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index c0675ace..0a615701 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -1553,8 +1553,22 @@ func (suite *IntegrationTestSuite) TestVerifierWithFilter() { <-checkDoneChan } +func (suite *IntegrationTestSuite) waitForRecheckDocs(verifier *Verifier) { + suite.Eventually(func() bool { + cursor, err := suite.metaMongoClient.Database(verifier.metaDBName).Collection(recheckQueue).Find(suite.Context(), bson.D{}) + var docs []bson.D + suite.Require().NoError(err) + suite.Require().NoError(cursor.All(suite.Context(), &docs)) + return len(docs) > 0 + }, 1*time.Minute, 100*time.Millisecond) +} + func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { + zerolog.SetGlobalLevel(zerolog.TraceLevel) + defer zerolog.SetGlobalLevel(zerolog.DebugLevel) + ctx := suite.Context() + collName := "mycoll" srcDB := suite.srcMongoClient.Database(suite.DBNameForTest()) @@ -1574,7 +1588,9 @@ func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { suite.Require().NoError(err) _, err = dstDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 2}}) suite.Require().NoError(err) + suite.Require().NoError(runner.AwaitGenerationEnd()) + suite.waitForRecheckDocs(verifier) // Run generation 2 and get verification status. suite.Require().NoError(runner.StartNextGeneration()) @@ -1591,6 +1607,8 @@ func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { _, err = srcDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 1}}) suite.Require().NoError(err) suite.Require().NoError(runner.AwaitGenerationEnd()) + suite.waitForRecheckDocs(verifier) + status, err = verifier.GetVerificationStatus(ctx) suite.Require().NoError(err) suite.Assert().Equal( @@ -1603,6 +1621,7 @@ func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { _, err = srcDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 2}}) suite.Require().NoError(err) suite.Require().NoError(runner.AwaitGenerationEnd()) + suite.waitForRecheckDocs(verifier) // Everything should match by the end of it. status, err = verifier.GetVerificationStatus(ctx) From c1b7ead518c89b0fc7f273629c8b00447e9464c3 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Fri, 29 Nov 2024 13:25:22 -0500 Subject: [PATCH 21/32] fix --- internal/verifier/change_stream.go | 20 ++++++++++---------- internal/verifier/change_stream_test.go | 2 +- internal/verifier/check.go | 2 +- internal/verifier/nsmap_test.go | 3 ++- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index 854c0ebf..baadf716 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -3,7 +3,6 @@ package verifier import ( "context" "fmt" - "golang.org/x/sync/errgroup" "time" "github.com/10gen/migration-verifier/internal/keystring" @@ -17,6 +16,7 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" + "golang.org/x/sync/errgroup" ) const fauxDocSizeForDeleteEvents = 1024 @@ -83,6 +83,7 @@ func (verifier *Verifier) initializeChangeStreamReaders() { watcherClient: verifier.srcClient, buildInfo: *verifier.srcBuildInfo, changeStreamRunning: false, + ChangeEventBatchChan: make(chan []ParsedEvent), ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), ChangeStreamErrChan: make(chan error), ChangeStreamDoneChan: make(chan struct{}), @@ -95,6 +96,7 @@ func (verifier *Verifier) initializeChangeStreamReaders() { watcherClient: verifier.dstClient, buildInfo: *verifier.dstBuildInfo, changeStreamRunning: false, + ChangeEventBatchChan: make(chan []ParsedEvent), ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), ChangeStreamErrChan: make(chan error), ChangeStreamDoneChan: make(chan struct{}), @@ -111,7 +113,7 @@ func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *C return ctx.Err() case batch, more := <-reader.ChangeEventBatchChan: if !more { - // Change stream reader has closed the event batch channel because it has finished. + verifier.logger.Debug().Msgf("Change Event Batch Channel has been closed by %s, returning...", reader) return nil } verifier.logger.Trace().Msgf("Verifier is handling a change event batch from %s: %v", reader, batch) @@ -279,6 +281,7 @@ func (csr *ChangeStreamReader) readAndHandleOneChangeEventBatch( } csr.logger.Trace().Msgf("%s received a change event: %v", csr, changeEventBatch[eventsRead]) + fmt.Printf("%d %d\n", changeEventBatch[eventsRead].ClusterTime.T, changeEventBatch[eventsRead].ClusterTime.I) if changeEventBatch[eventsRead].ClusterTime != nil && (csr.lastChangeEventTime == nil || @@ -306,8 +309,6 @@ func (csr *ChangeStreamReader) iterateChangeStream( ) error { var lastPersistedTime time.Time - defer close(csr.ChangeEventBatchChan) - persistResumeTokenIfNeeded := func() error { if time.Since(lastPersistedTime) <= minChangeStreamPersistInterval { return nil @@ -357,7 +358,7 @@ func (csr *ChangeStreamReader) iterateChangeStream( csr.logger.Debug(). Interface("currentTimestamp", curTs). Interface("writesOffTimestamp", writesOffTs). - Msg("Change stream has reached the writesOff timestamp. Shutting down.") + Msgf("%s has reached the writesOff timestamp. Shutting down.", csr) break } @@ -487,12 +488,11 @@ func (csr *ChangeStreamReader) StartChangeStream(ctx context.Context) error { // there's no chance of "nonsense" like both channels returning a payload. initialCreateResultChan := make(chan mo.Result[primitive.Timestamp]) - // Change stream reader closes ChangeEventBatchChan each time after - // finish reading change events. - // It needs to be re-initialized when a change stream starts. - csr.ChangeEventBatchChan = make(chan []ParsedEvent) - go func() { + // Closing ChangeEventBatchChan at the end of change stream goroutine + // notifies the verifier's change event handler to exit. + defer close(csr.ChangeEventBatchChan) + retryer := retry.New(retry.DefaultDurationLimit) retryer = retryer.WithErrorCodes(util.CursorKilled) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 52a8b29a..2be22fc9 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -2,7 +2,6 @@ package verifier import ( "context" - "golang.org/x/sync/errgroup" "strings" "time" @@ -17,6 +16,7 @@ import ( "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" "go.mongodb.org/mongo-driver/mongo/readconcern" + "golang.org/x/sync/errgroup" ) func (suite *IntegrationTestSuite) TestChangeStreamFilter() { diff --git a/internal/verifier/check.go b/internal/verifier/check.go index 13cb017d..89f86e11 100644 --- a/internal/verifier/check.go +++ b/internal/verifier/check.go @@ -283,7 +283,7 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any // caught again on the next iteration. if verifier.writesOff { verifier.logger.Debug(). - Msg("Waiting for change stream to end.") + Msg("Waiting for change streams to end.") // It's necessary to wait for the change stream to finish before incrementing the // generation number, or the last changes will not be checked. diff --git a/internal/verifier/nsmap_test.go b/internal/verifier/nsmap_test.go index 6094e755..766fada3 100644 --- a/internal/verifier/nsmap_test.go +++ b/internal/verifier/nsmap_test.go @@ -1,8 +1,9 @@ package verifier import ( - "github.com/stretchr/testify/suite" "testing" + + "github.com/stretchr/testify/suite" ) type UnitTestSuite struct { From c119cb0bdb506d8cc3f23ee146c03cd9f3fd2c9c Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 09:23:50 -0500 Subject: [PATCH 22/32] Update change_stream_test.go --- internal/verifier/change_stream_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 2be22fc9..211b6231 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -385,7 +385,6 @@ func (suite *IntegrationTestSuite) testInsertsBeforeWritesOff(docsCount int) { lo.ToAnySlice(docs), ) suite.Require().NoError(err) - //fmt.Println(fmt.Sprintf("src cluster time %v", suite.getClusterTime(ctx, suite.srcMongoClient))) suite.Require().NoError(verifier.WritesOff(ctx)) From e302d9d309e366707f8221d2bf17d85da951ca33 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 09:24:54 -0500 Subject: [PATCH 23/32] Update change_stream.go --- internal/verifier/change_stream.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index baadf716..1fdc392a 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -281,7 +281,6 @@ func (csr *ChangeStreamReader) readAndHandleOneChangeEventBatch( } csr.logger.Trace().Msgf("%s received a change event: %v", csr, changeEventBatch[eventsRead]) - fmt.Printf("%d %d\n", changeEventBatch[eventsRead].ClusterTime.T, changeEventBatch[eventsRead].ClusterTime.I) if changeEventBatch[eventsRead].ClusterTime != nil && (csr.lastChangeEventTime == nil || From d15b0208805909b4ce506071df91e83bb75b38ca Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 11:51:22 -0500 Subject: [PATCH 24/32] refactors --- internal/verifier/change_stream.go | 100 ++++++++++++------------ internal/verifier/change_stream_test.go | 13 ++- internal/verifier/check.go | 14 ++-- internal/verifier/migration_verifier.go | 10 +-- 4 files changed, 68 insertions(+), 69 deletions(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index 1fdc392a..ab40cd9d 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -16,7 +16,6 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" - "golang.org/x/sync/errgroup" ) const fauxDocSizeForDeleteEvents = 1024 @@ -65,66 +64,64 @@ type ChangeStreamReader struct { watcherClient *mongo.Client buildInfo util.BuildInfo - changeStreamRunning bool - ChangeEventBatchChan chan []ParsedEvent - ChangeStreamWritesOffTsChan chan primitive.Timestamp - ChangeStreamErrChan chan error - ChangeStreamDoneChan chan struct{} + changeStreamRunning bool + ChangeEventBatchChan chan []ParsedEvent + WritesOffTsChan chan primitive.Timestamp + ErrChan chan error + DoneChan chan struct{} startAtTs *primitive.Timestamp } func (verifier *Verifier) initializeChangeStreamReaders() { verifier.srcChangeStreamReader = &ChangeStreamReader{ - readerType: srcReaderType, - logger: verifier.logger, - namespaces: verifier.srcNamespaces, - metaDB: verifier.metaClient.Database(verifier.metaDBName), - watcherClient: verifier.srcClient, - buildInfo: *verifier.srcBuildInfo, - changeStreamRunning: false, - ChangeEventBatchChan: make(chan []ParsedEvent), - ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), - ChangeStreamErrChan: make(chan error), - ChangeStreamDoneChan: make(chan struct{}), + readerType: srcReaderType, + logger: verifier.logger, + namespaces: verifier.srcNamespaces, + metaDB: verifier.metaClient.Database(verifier.metaDBName), + watcherClient: verifier.srcClient, + buildInfo: *verifier.srcBuildInfo, + changeStreamRunning: false, + ChangeEventBatchChan: make(chan []ParsedEvent), + WritesOffTsChan: make(chan primitive.Timestamp), + ErrChan: make(chan error), + DoneChan: make(chan struct{}), } verifier.dstChangeStreamReader = &ChangeStreamReader{ - readerType: dstReaderType, - logger: verifier.logger, - namespaces: verifier.dstNamespaces, - metaDB: verifier.metaClient.Database(verifier.metaDBName), - watcherClient: verifier.dstClient, - buildInfo: *verifier.dstBuildInfo, - changeStreamRunning: false, - ChangeEventBatchChan: make(chan []ParsedEvent), - ChangeStreamWritesOffTsChan: make(chan primitive.Timestamp), - ChangeStreamErrChan: make(chan error), - ChangeStreamDoneChan: make(chan struct{}), + readerType: dstReaderType, + logger: verifier.logger, + namespaces: verifier.dstNamespaces, + metaDB: verifier.metaClient.Database(verifier.metaDBName), + watcherClient: verifier.dstClient, + buildInfo: *verifier.dstBuildInfo, + changeStreamRunning: false, + ChangeEventBatchChan: make(chan []ParsedEvent), + WritesOffTsChan: make(chan primitive.Timestamp), + ErrChan: make(chan error), + DoneChan: make(chan struct{}), } } // StartChangeEventHandler starts a goroutine that handles change event batches from the reader. // It needs to be started after the reader starts. -func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *ChangeStreamReader, errGroup *errgroup.Group) { - errGroup.Go(func() error { - for { - select { - case <-ctx.Done(): - return ctx.Err() - case batch, more := <-reader.ChangeEventBatchChan: - if !more { - verifier.logger.Debug().Msgf("Change Event Batch Channel has been closed by %s, returning...", reader) - return nil - } - verifier.logger.Trace().Msgf("Verifier is handling a change event batch from %s: %v", reader, batch) - err := verifier.HandleChangeStreamEvents(ctx, batch, reader.readerType) - if err != nil { - reader.ChangeStreamErrChan <- err - return err - } +func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *ChangeStreamReader) error { + for { + select { + case <-ctx.Done(): + return ctx.Err() + case batch, more := <-reader.ChangeEventBatchChan: + if !more { + verifier.logger.Debug().Msgf("Change Event Batch Channel has been closed by %s, returning...", reader) + return nil + } + verifier.logger.Trace().Msgf("Verifier is handling a change event batch from %s: %v", reader, batch) + err := verifier.HandleChangeStreamEvents(ctx, batch, reader.readerType) + if err != nil { + reader.ErrChan <- err + return err } } - }) + } } // HandleChangeStreamEvents performs the necessary work for change stream events after receiving a batch. @@ -280,7 +277,8 @@ func (csr *ChangeStreamReader) readAndHandleOneChangeEventBatch( return errors.Wrapf(err, "failed to decode change event to %T", changeEventBatch[eventsRead]) } - csr.logger.Trace().Msgf("%s received a change event: %v", csr, changeEventBatch[eventsRead]) + // This only logs in tests. + csr.logger.Trace().Interface("event", changeEventBatch[eventsRead]).Msgf("%s received a change event", csr) if changeEventBatch[eventsRead].ClusterTime != nil && (csr.lastChangeEventTime == nil || @@ -335,7 +333,7 @@ func (csr *ChangeStreamReader) iterateChangeStream( // source writes are ended and the migration tool is finished / committed. // This means we should exit rather than continue reading the change stream // since there should be no more events. - case writesOffTs := <-csr.ChangeStreamWritesOffTsChan: + case writesOffTs := <-csr.WritesOffTsChan: csr.logger.Debug(). Interface("writesOffTimestamp", writesOffTs). Msgf("%s thread received writesOff timestamp. Finalizing change stream.", csr) @@ -388,7 +386,7 @@ func (csr *ChangeStreamReader) iterateChangeStream( } // since we have started Recheck, we must signal that we have // finished the change stream changes so that Recheck can continue. - csr.ChangeStreamDoneChan <- struct{}{} + csr.DoneChan <- struct{}{} break } } @@ -526,8 +524,8 @@ func (csr *ChangeStreamReader) StartChangeStream(ctx context.Context) error { if err != nil { // NB: This failure always happens after the initial change stream // creation. - csr.ChangeStreamErrChan <- err - close(csr.ChangeStreamErrChan) + csr.ErrChan <- err + close(csr.ErrChan) } }() diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 211b6231..d92b0c2c 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -16,7 +16,6 @@ import ( "go.mongodb.org/mongo-driver/mongo" "go.mongodb.org/mongo-driver/mongo/options" "go.mongodb.org/mongo-driver/mongo/readconcern" - "golang.org/x/sync/errgroup" ) func (suite *IntegrationTestSuite) TestChangeStreamFilter() { @@ -44,7 +43,7 @@ func (suite *IntegrationTestSuite) TestChangeStreamFilter() { func (suite *IntegrationTestSuite) startSrcChangeStreamReaderAndHandler(ctx context.Context, verifier *Verifier) { err := verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) - verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader, &errgroup.Group{}) + go verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) } // TestChangeStreamResumability creates a verifier, starts its change stream, @@ -160,8 +159,8 @@ func (suite *IntegrationTestSuite) TestStartAtTimeNoChanges() { suite.Require().NotNil(origStartTs) suite.startSrcChangeStreamReaderAndHandler(ctx, verifier) suite.Require().Equal(verifier.srcChangeStreamReader.startAtTs, origStartTs) - verifier.srcChangeStreamReader.ChangeStreamWritesOffTsChan <- *origStartTs - <-verifier.srcChangeStreamReader.ChangeStreamDoneChan + verifier.srcChangeStreamReader.WritesOffTsChan <- *origStartTs + <-verifier.srcChangeStreamReader.DoneChan suite.Require().Equal(verifier.srcChangeStreamReader.startAtTs, origStartTs) } @@ -206,8 +205,8 @@ func (suite *IntegrationTestSuite) TestStartAtTimeWithChanges() { "session time after events should exceed the original", ) - verifier.srcChangeStreamReader.ChangeStreamWritesOffTsChan <- *postEventsSessionTime - <-verifier.srcChangeStreamReader.ChangeStreamDoneChan + verifier.srcChangeStreamReader.WritesOffTsChan <- *postEventsSessionTime + <-verifier.srcChangeStreamReader.DoneChan suite.Assert().Equal( *postEventsSessionTime, @@ -469,7 +468,7 @@ func (suite *IntegrationTestSuite) TestRecheckDocsWithDstChangeEvents() { verifier.SetNamespaceMap() suite.Require().NoError(verifier.dstChangeStreamReader.StartChangeStream(ctx)) - verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader, &errgroup.Group{}) + go verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader) _, err := coll1.InsertOne(ctx, bson.D{{"_id", 1}}) suite.Require().NoError(err) diff --git a/internal/verifier/check.go b/internal/verifier/check.go index 89f86e11..76ad661e 100644 --- a/internal/verifier/check.go +++ b/internal/verifier/check.go @@ -44,11 +44,11 @@ func (verifier *Verifier) waitForChangeStream(ctx context.Context, csr *ChangeSt select { case <-ctx.Done(): return ctx.Err() - case err := <-csr.ChangeStreamErrChan: + case err := <-csr.ErrChan: verifier.logger.Warn().Err(err). Msgf("Received error from %s.", csr) return err - case <-csr.ChangeStreamDoneChan: + case <-csr.DoneChan: verifier.logger.Debug(). Msgf("Received completion signal from %s.", csr) break @@ -82,9 +82,9 @@ func (verifier *Verifier) CheckWorker(ctxIn context.Context) error { // If the change stream fails, everything should stop. eg.Go(func() error { select { - case err := <-verifier.srcChangeStreamReader.ChangeStreamErrChan: + case err := <-verifier.srcChangeStreamReader.ErrChan: return errors.Wrapf(err, "%s failed", verifier.srcChangeStreamReader) - case err := <-verifier.dstChangeStreamReader.ChangeStreamErrChan: + case err := <-verifier.dstChangeStreamReader.ErrChan: return errors.Wrapf(err, "%s failed", verifier.dstChangeStreamReader) case <-ctx.Done(): return nil @@ -207,7 +207,7 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any verifier.phase = Idle }() - ceHandlerGroup := &errgroup.Group{} + ceHandlerGroup, groupCtx := errgroup.WithContext(ctx) for _, csReader := range []*ChangeStreamReader{verifier.srcChangeStreamReader, verifier.dstChangeStreamReader} { if csReader.changeStreamRunning { verifier.logger.Debug().Msgf("Check: %s already running.", csReader) @@ -218,7 +218,9 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any if err != nil { return errors.Wrapf(err, "failed to start %s", csReader) } - verifier.StartChangeEventHandler(ctx, csReader, ceHandlerGroup) + ceHandlerGroup.Go(func() error { + return verifier.StartChangeEventHandler(groupCtx, csReader) + }) } } diff --git a/internal/verifier/migration_verifier.go b/internal/verifier/migration_verifier.go index 1389fe1b..f6c898ba 100644 --- a/internal/verifier/migration_verifier.go +++ b/internal/verifier/migration_verifier.go @@ -270,18 +270,18 @@ func (verifier *Verifier) WritesOff(ctx context.Context) error { return errors.Wrapf(err, "failed to fetch destination's cluster time") } - // This has to happen outside the lock because the change stream + // This has to happen outside the lock because the change streams // might be inserting docs into the recheck queue, which happens // under the lock. select { - case verifier.srcChangeStreamReader.ChangeStreamWritesOffTsChan <- srcFinalTs: - case err := <-verifier.srcChangeStreamReader.ChangeStreamErrChan: + case verifier.srcChangeStreamReader.WritesOffTsChan <- srcFinalTs: + case err := <-verifier.srcChangeStreamReader.ErrChan: return errors.Wrapf(err, "tried to send writes-off timestamp to %s, but change stream already failed", verifier.srcChangeStreamReader) } select { - case verifier.dstChangeStreamReader.ChangeStreamWritesOffTsChan <- dstFinalTs: - case err := <-verifier.dstChangeStreamReader.ChangeStreamErrChan: + case verifier.dstChangeStreamReader.WritesOffTsChan <- dstFinalTs: + case err := <-verifier.dstChangeStreamReader.ErrChan: return errors.Wrapf(err, "tried to send writes-off timestamp to %s, but change stream already failed", verifier.dstChangeStreamReader) } From e56dcbc68d286d858a7dea606d963da47cdb1c6e Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 12:53:49 -0500 Subject: [PATCH 25/32] rename clusterType to whichCluster --- internal/verifier/change_stream.go | 16 ++++++++-------- internal/verifier/migration_verifier.go | 18 +++++++++--------- internal/verifier/migration_verifier_test.go | 14 +++++++------- internal/verifier/recheck_test.go | 2 +- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index cbbf52c4..b5d982d0 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -54,7 +54,7 @@ func (uee UnknownEventError) Error() string { } type ChangeStreamReader struct { - readerType clusterType + readerType whichCluster lastChangeEventTime *primitive.Timestamp logger *logger.Logger @@ -75,7 +75,7 @@ type ChangeStreamReader struct { func (verifier *Verifier) initializeChangeStreamReaders() { verifier.srcChangeStreamReader = &ChangeStreamReader{ - readerType: srcReaderType, + readerType: src, logger: verifier.logger, namespaces: verifier.srcNamespaces, metaDB: verifier.metaClient.Database(verifier.metaDBName), @@ -88,7 +88,7 @@ func (verifier *Verifier) initializeChangeStreamReaders() { DoneChan: make(chan struct{}), } verifier.dstChangeStreamReader = &ChangeStreamReader{ - readerType: dstReaderType, + readerType: dst, logger: verifier.logger, namespaces: verifier.dstNamespaces, metaDB: verifier.metaClient.Database(verifier.metaDBName), @@ -125,7 +125,7 @@ func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *C } // HandleChangeStreamEvents performs the necessary work for change stream events after receiving a batch. -func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch []ParsedEvent, eventOrigin clusterType) error { +func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch []ParsedEvent, eventOrigin whichCluster) error { if len(batch) == 0 { return nil } @@ -153,7 +153,7 @@ func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch [] // Recheck Docs are keyed by source namespaces. // We need to retrieve the source namespaces if change events are from the destination. switch eventOrigin { - case dstReaderType: + case dst: if verifier.nsMap.Len() == 0 { // Namespace is not remapped. Source namespace is the same as the destination. srcDBName = changeEvent.Ns.DB @@ -166,7 +166,7 @@ func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch [] } srcDBName, srcCollName = SplitNamespace(srcNs) } - case srcReaderType: + case src: srcDBName = changeEvent.Ns.DB srcCollName = changeEvent.Ns.Coll default: @@ -574,9 +574,9 @@ func (csr *ChangeStreamReader) String() string { func (csr *ChangeStreamReader) resumeTokenDocID() string { switch csr.readerType { - case srcReaderType: + case src: return "srcResumeToken" - case dstReaderType: + case dst: return "dstResumeToken" default: panic("unknown readerType: " + csr.readerType) diff --git a/internal/verifier/migration_verifier.go b/internal/verifier/migration_verifier.go index d5bc49b1..14d0ea96 100644 --- a/internal/verifier/migration_verifier.go +++ b/internal/verifier/migration_verifier.go @@ -76,11 +76,11 @@ const ( notOkSymbol = "\u2757" // heavy exclamation mark symbol ) -type clusterType string +type whichCluster string const ( - srcReaderType clusterType = "source" - dstReaderType clusterType = "destination" + src whichCluster = "source" + dst whichCluster = "destination" ) var timeFormat = time.RFC3339 @@ -202,13 +202,13 @@ func NewVerifier(settings VerifierSettings, logPath string) *Verifier { logger: logger, writer: logWriter, - phase: Idle, - numWorkers: NumWorkers, - readPreference: readpref.Primary(), - partitionSizeInBytes: 400 * 1024 * 1024, - failureDisplaySize: DefaultFailureDisplaySize, + phase: Idle, + numWorkers: NumWorkers, + readPreference: readpref.Primary(), + partitionSizeInBytes: 400 * 1024 * 1024, + failureDisplaySize: DefaultFailureDisplaySize, - readConcernSetting: readConcern, + readConcernSetting: readConcern, // This will get recreated once gen0 starts, but we want it // here in case the change streams gets an event before then. diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index f4855c4c..38ba0124 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -153,7 +153,7 @@ func (suite *IntegrationTestSuite) TestGetNamespaceStatistics_Recheck() { ID: "heyhey", }, }}, - srcReaderType, + src, ) suite.Require().NoError(err) @@ -169,7 +169,7 @@ func (suite *IntegrationTestSuite) TestGetNamespaceStatistics_Recheck() { ID: "hoohoo", }, }}, - srcReaderType, + src, ) suite.Require().NoError(err) @@ -413,19 +413,19 @@ func (suite *IntegrationTestSuite) TestFailedVerificationTaskInsertions() { }, } - err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) + err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, src) suite.Require().NoError(err) event.OpType = "insert" - err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) + err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, src) suite.Require().NoError(err) event.OpType = "replace" - err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) + err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, src) suite.Require().NoError(err) event.OpType = "update" - err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) + err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, src) suite.Require().NoError(err) event.OpType = "flibbity" - err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) + err = verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, src) badEventErr := UnknownEventError{} suite.Require().ErrorAs(err, &badEventErr) suite.Assert().Equal("flibbity", badEventErr.Event.OpType) diff --git a/internal/verifier/recheck_test.go b/internal/verifier/recheck_test.go index 164dd61e..c9ae8b5e 100644 --- a/internal/verifier/recheck_test.go +++ b/internal/verifier/recheck_test.go @@ -53,7 +53,7 @@ func (suite *IntegrationTestSuite) TestFailedCompareThenReplace() { FullDocument: testutil.MustMarshal(bson.D{{"foo", 1}}), } - err := verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, srcReaderType) + err := verifier.HandleChangeStreamEvents(ctx, []ParsedEvent{event}, src) suite.Require().NoError(err) recheckDocs = suite.fetchRecheckDocs(ctx, verifier) From c91eb0d912e385b1295da4b76139398227ba24fb Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 13:08:57 -0500 Subject: [PATCH 26/32] reorder controls in TestChangesOnDstBeforeSrc --- internal/verifier/migration_verifier_test.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index 38ba0124..dc90419c 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -1582,12 +1582,12 @@ func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { // Dry run generation 0 to make sure change stream reader is started. suite.Require().NoError(runner.AwaitGenerationEnd()) - suite.Require().NoError(runner.StartNextGeneration()) // Insert two documents in generation 1. They should be batched and become a verify task in generation 2. _, err := dstDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 1}}) suite.Require().NoError(err) _, err = dstDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 2}}) suite.Require().NoError(err) + suite.Require().NoError(runner.StartNextGeneration()) suite.Require().NoError(runner.AwaitGenerationEnd()) suite.waitForRecheckDocs(verifier) @@ -1603,9 +1603,10 @@ func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { ) // Patch up only one of the two mismatched documents in generation 3. - suite.Require().NoError(runner.StartNextGeneration()) _, err = srcDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 1}}) suite.Require().NoError(err) + + suite.Require().NoError(runner.StartNextGeneration()) suite.Require().NoError(runner.AwaitGenerationEnd()) suite.waitForRecheckDocs(verifier) @@ -1617,9 +1618,9 @@ func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { ) // Patch up both of the 2 mismatched documents in generation 4. - suite.Require().NoError(runner.StartNextGeneration()) _, err = srcDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 2}}) suite.Require().NoError(err) + suite.Require().NoError(runner.StartNextGeneration()) suite.Require().NoError(runner.AwaitGenerationEnd()) suite.waitForRecheckDocs(verifier) From faa2e24909cb4ff33d4b2a4801a8c7a405015d2d Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:23:35 -0500 Subject: [PATCH 27/32] Update migration_verifier_test.go --- internal/verifier/migration_verifier_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index dc90419c..a151b1e3 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -1335,7 +1335,7 @@ func (suite *IntegrationTestSuite) TestGenerationalRechecking() { dbname1 := suite.DBNameForTest("1") dbname2 := suite.DBNameForTest("2") - zerolog.SetGlobalLevel(zerolog.DebugLevel) + zerolog.SetGlobalLevel(zerolog.TraceLevel) verifier := suite.BuildVerifier() verifier.SetSrcNamespaces([]string{dbname1 + ".testColl1"}) verifier.SetDstNamespaces([]string{dbname2 + ".testColl3"}) From c71e9ef956a5f5f399ba1996116c99a4d60e0ca3 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:39:48 -0500 Subject: [PATCH 28/32] Update change_stream_test.go --- internal/verifier/change_stream_test.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index eec31c75..2a905431 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -43,7 +43,9 @@ func (suite *IntegrationTestSuite) TestChangeStreamFilter() { func (suite *IntegrationTestSuite) startSrcChangeStreamReaderAndHandler(ctx context.Context, verifier *Verifier) { err := verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) - go verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) + go func() { + suite.Require().NoError(verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader)) + }() } // TestChangeStreamResumability creates a verifier, starts its change stream, @@ -468,7 +470,9 @@ func (suite *IntegrationTestSuite) TestRecheckDocsWithDstChangeEvents() { verifier.SetNamespaceMap() suite.Require().NoError(verifier.dstChangeStreamReader.StartChangeStream(ctx)) - go verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader) + go func() { + suite.Require().NoError(verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader)) + }() _, err := coll1.InsertOne(ctx, bson.D{{"_id", 1}}) suite.Require().NoError(err) From 18a4cb37da107bd7a83311f8fb0cc0acf3bc96a2 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 16:18:04 -0500 Subject: [PATCH 29/32] reduce log level --- internal/verifier/change_stream_test.go | 6 +++++- internal/verifier/migration_verifier_test.go | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 2a905431..6b3e0e96 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -44,7 +44,11 @@ func (suite *IntegrationTestSuite) startSrcChangeStreamReaderAndHandler(ctx cont err := verifier.srcChangeStreamReader.StartChangeStream(ctx) suite.Require().NoError(err) go func() { - suite.Require().NoError(verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader)) + err := verifier.StartChangeEventHandler(ctx, verifier.srcChangeStreamReader) + if errors.Is(err, context.Canceled) { + return + } + suite.Require().NoError(err) }() } diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index a151b1e3..2d3c666c 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -1336,6 +1336,7 @@ func (suite *IntegrationTestSuite) TestGenerationalRechecking() { dbname2 := suite.DBNameForTest("2") zerolog.SetGlobalLevel(zerolog.TraceLevel) + defer zerolog.SetGlobalLevel(zerolog.DebugLevel) verifier := suite.BuildVerifier() verifier.SetSrcNamespaces([]string{dbname1 + ".testColl1"}) verifier.SetDstNamespaces([]string{dbname2 + ".testColl3"}) From 3961b2dbcc52f6a7d78f0a93241e04b601b09f04 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 16:22:49 -0500 Subject: [PATCH 30/32] Update change_stream_test.go --- internal/verifier/change_stream_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 6b3e0e96..7bbae085 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -475,7 +475,11 @@ func (suite *IntegrationTestSuite) TestRecheckDocsWithDstChangeEvents() { suite.Require().NoError(verifier.dstChangeStreamReader.StartChangeStream(ctx)) go func() { - suite.Require().NoError(verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader)) + err := verifier.StartChangeEventHandler(ctx, verifier.dstChangeStreamReader) + if errors.Is(err, context.Canceled) { + return + } + suite.Require().NoError(err) }() _, err := coll1.InsertOne(ctx, bson.D{{"_id", 1}}) From c2da8c46224e2764a6f4dd394c46e78fdab0081e Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 16:33:50 -0500 Subject: [PATCH 31/32] change to private fields --- internal/verifier/change_stream.go | 42 ++++++++++++------------- internal/verifier/change_stream_test.go | 8 ++--- internal/verifier/check.go | 8 ++--- internal/verifier/migration_verifier.go | 12 ++++--- 4 files changed, 36 insertions(+), 34 deletions(-) diff --git a/internal/verifier/change_stream.go b/internal/verifier/change_stream.go index b5d982d0..60c282a1 100644 --- a/internal/verifier/change_stream.go +++ b/internal/verifier/change_stream.go @@ -65,10 +65,10 @@ type ChangeStreamReader struct { clusterInfo util.ClusterInfo changeStreamRunning bool - ChangeEventBatchChan chan []ParsedEvent - WritesOffTsChan chan primitive.Timestamp - ErrChan chan error - DoneChan chan struct{} + changeEventBatchChan chan []ParsedEvent + writesOffTsChan chan primitive.Timestamp + errChan chan error + doneChan chan struct{} startAtTs *primitive.Timestamp } @@ -82,10 +82,10 @@ func (verifier *Verifier) initializeChangeStreamReaders() { watcherClient: verifier.srcClient, clusterInfo: *verifier.srcClusterInfo, changeStreamRunning: false, - ChangeEventBatchChan: make(chan []ParsedEvent), - WritesOffTsChan: make(chan primitive.Timestamp), - ErrChan: make(chan error), - DoneChan: make(chan struct{}), + changeEventBatchChan: make(chan []ParsedEvent), + writesOffTsChan: make(chan primitive.Timestamp), + errChan: make(chan error), + doneChan: make(chan struct{}), } verifier.dstChangeStreamReader = &ChangeStreamReader{ readerType: dst, @@ -95,10 +95,10 @@ func (verifier *Verifier) initializeChangeStreamReaders() { watcherClient: verifier.dstClient, clusterInfo: *verifier.dstClusterInfo, changeStreamRunning: false, - ChangeEventBatchChan: make(chan []ParsedEvent), - WritesOffTsChan: make(chan primitive.Timestamp), - ErrChan: make(chan error), - DoneChan: make(chan struct{}), + changeEventBatchChan: make(chan []ParsedEvent), + writesOffTsChan: make(chan primitive.Timestamp), + errChan: make(chan error), + doneChan: make(chan struct{}), } } @@ -109,7 +109,7 @@ func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *C select { case <-ctx.Done(): return ctx.Err() - case batch, more := <-reader.ChangeEventBatchChan: + case batch, more := <-reader.changeEventBatchChan: if !more { verifier.logger.Debug().Msgf("Change Event Batch Channel has been closed by %s, returning...", reader) return nil @@ -117,7 +117,7 @@ func (verifier *Verifier) StartChangeEventHandler(ctx context.Context, reader *C verifier.logger.Trace().Msgf("Verifier is handling a change event batch from %s: %v", reader, batch) err := verifier.HandleChangeStreamEvents(ctx, batch, reader.readerType) if err != nil { - reader.ErrChan <- err + reader.errChan <- err return err } } @@ -295,7 +295,7 @@ func (csr *ChangeStreamReader) readAndHandleOneChangeEventBatch( return nil } - csr.ChangeEventBatchChan <- changeEventBatch + csr.changeEventBatchChan <- changeEventBatch return nil } @@ -333,7 +333,7 @@ func (csr *ChangeStreamReader) iterateChangeStream( // source writes are ended and the migration tool is finished / committed. // This means we should exit rather than continue reading the change stream // since there should be no more events. - case writesOffTs := <-csr.WritesOffTsChan: + case writesOffTs := <-csr.writesOffTsChan: csr.logger.Debug(). Interface("writesOffTimestamp", writesOffTs). Msgf("%s thread received writesOff timestamp. Finalizing change stream.", csr) @@ -386,7 +386,7 @@ func (csr *ChangeStreamReader) iterateChangeStream( } // since we have started Recheck, we must signal that we have // finished the change stream changes so that Recheck can continue. - csr.DoneChan <- struct{}{} + csr.doneChan <- struct{}{} break } } @@ -486,9 +486,9 @@ func (csr *ChangeStreamReader) StartChangeStream(ctx context.Context) error { initialCreateResultChan := make(chan mo.Result[primitive.Timestamp]) go func() { - // Closing ChangeEventBatchChan at the end of change stream goroutine + // Closing changeEventBatchChan at the end of change stream goroutine // notifies the verifier's change event handler to exit. - defer close(csr.ChangeEventBatchChan) + defer close(csr.changeEventBatchChan) retryer := retry.New(retry.DefaultDurationLimit) retryer = retryer.WithErrorCodes(util.CursorKilled) @@ -524,8 +524,8 @@ func (csr *ChangeStreamReader) StartChangeStream(ctx context.Context) error { if err != nil { // NB: This failure always happens after the initial change stream // creation. - csr.ErrChan <- err - close(csr.ErrChan) + csr.errChan <- err + close(csr.errChan) } }() diff --git a/internal/verifier/change_stream_test.go b/internal/verifier/change_stream_test.go index 7bbae085..105046c1 100644 --- a/internal/verifier/change_stream_test.go +++ b/internal/verifier/change_stream_test.go @@ -165,8 +165,8 @@ func (suite *IntegrationTestSuite) TestStartAtTimeNoChanges() { suite.Require().NotNil(origStartTs) suite.startSrcChangeStreamReaderAndHandler(ctx, verifier) suite.Require().Equal(verifier.srcChangeStreamReader.startAtTs, origStartTs) - verifier.srcChangeStreamReader.WritesOffTsChan <- *origStartTs - <-verifier.srcChangeStreamReader.DoneChan + verifier.srcChangeStreamReader.writesOffTsChan <- *origStartTs + <-verifier.srcChangeStreamReader.doneChan suite.Require().Equal(verifier.srcChangeStreamReader.startAtTs, origStartTs) } @@ -211,8 +211,8 @@ func (suite *IntegrationTestSuite) TestStartAtTimeWithChanges() { "session time after events should exceed the original", ) - verifier.srcChangeStreamReader.WritesOffTsChan <- *postEventsSessionTime - <-verifier.srcChangeStreamReader.DoneChan + verifier.srcChangeStreamReader.writesOffTsChan <- *postEventsSessionTime + <-verifier.srcChangeStreamReader.doneChan suite.Assert().Equal( *postEventsSessionTime, diff --git a/internal/verifier/check.go b/internal/verifier/check.go index 1790c4b3..f3538d10 100644 --- a/internal/verifier/check.go +++ b/internal/verifier/check.go @@ -44,11 +44,11 @@ func (verifier *Verifier) waitForChangeStream(ctx context.Context, csr *ChangeSt select { case <-ctx.Done(): return ctx.Err() - case err := <-csr.ErrChan: + case err := <-csr.errChan: verifier.logger.Warn().Err(err). Msgf("Received error from %s.", csr) return err - case <-csr.DoneChan: + case <-csr.doneChan: verifier.logger.Debug(). Msgf("Received completion signal from %s.", csr) break @@ -82,9 +82,9 @@ func (verifier *Verifier) CheckWorker(ctxIn context.Context) error { // If the change stream fails, everything should stop. eg.Go(func() error { select { - case err := <-verifier.srcChangeStreamReader.ErrChan: + case err := <-verifier.srcChangeStreamReader.errChan: return errors.Wrapf(err, "%s failed", verifier.srcChangeStreamReader) - case err := <-verifier.dstChangeStreamReader.ErrChan: + case err := <-verifier.dstChangeStreamReader.errChan: return errors.Wrapf(err, "%s failed", verifier.dstChangeStreamReader) case <-ctx.Done(): return nil diff --git a/internal/verifier/migration_verifier.go b/internal/verifier/migration_verifier.go index 14d0ea96..a018e87e 100644 --- a/internal/verifier/migration_verifier.go +++ b/internal/verifier/migration_verifier.go @@ -255,7 +255,6 @@ func (verifier *Verifier) WritesOff(ctx context.Context) error { return errors.New("writesOff already set") } verifier.writesOff = true - verifier.mux.Unlock() verifier.logger.Debug().Msg("Signalling that writes are done.") @@ -266,6 +265,7 @@ func (verifier *Verifier) WritesOff(ctx context.Context) error { ) if err != nil { + verifier.mux.Unlock() return errors.Wrapf(err, "failed to fetch source's cluster time") } @@ -276,21 +276,23 @@ func (verifier *Verifier) WritesOff(ctx context.Context) error { ) if err != nil { + verifier.mux.Unlock() return errors.Wrapf(err, "failed to fetch destination's cluster time") } + verifier.mux.Unlock() // This has to happen outside the lock because the change streams // might be inserting docs into the recheck queue, which happens // under the lock. select { - case verifier.srcChangeStreamReader.WritesOffTsChan <- srcFinalTs: - case err := <-verifier.srcChangeStreamReader.ErrChan: + case verifier.srcChangeStreamReader.writesOffTsChan <- srcFinalTs: + case err := <-verifier.srcChangeStreamReader.errChan: return errors.Wrapf(err, "tried to send writes-off timestamp to %s, but change stream already failed", verifier.srcChangeStreamReader) } select { - case verifier.dstChangeStreamReader.WritesOffTsChan <- dstFinalTs: - case err := <-verifier.dstChangeStreamReader.ErrChan: + case verifier.dstChangeStreamReader.writesOffTsChan <- dstFinalTs: + case err := <-verifier.dstChangeStreamReader.errChan: return errors.Wrapf(err, "tried to send writes-off timestamp to %s, but change stream already failed", verifier.dstChangeStreamReader) } From e3e901cfdebc20c91a855e39e91c81737e968c99 Mon Sep 17 00:00:00 2001 From: Jian Guan <61915096+tdq45gj@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:13:26 -0500 Subject: [PATCH 32/32] Update migration_verifier_test.go --- internal/verifier/migration_verifier_test.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/internal/verifier/migration_verifier_test.go b/internal/verifier/migration_verifier_test.go index 2d3c666c..97eb49b4 100644 --- a/internal/verifier/migration_verifier_test.go +++ b/internal/verifier/migration_verifier_test.go @@ -1584,12 +1584,11 @@ func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { suite.Require().NoError(runner.AwaitGenerationEnd()) // Insert two documents in generation 1. They should be batched and become a verify task in generation 2. + suite.Require().NoError(runner.StartNextGeneration()) _, err := dstDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 1}}) suite.Require().NoError(err) _, err = dstDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 2}}) suite.Require().NoError(err) - suite.Require().NoError(runner.StartNextGeneration()) - suite.Require().NoError(runner.AwaitGenerationEnd()) suite.waitForRecheckDocs(verifier) @@ -1604,10 +1603,9 @@ func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { ) // Patch up only one of the two mismatched documents in generation 3. + suite.Require().NoError(runner.StartNextGeneration()) _, err = srcDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 1}}) suite.Require().NoError(err) - - suite.Require().NoError(runner.StartNextGeneration()) suite.Require().NoError(runner.AwaitGenerationEnd()) suite.waitForRecheckDocs(verifier) @@ -1618,10 +1616,10 @@ func (suite *IntegrationTestSuite) TestChangesOnDstBeforeSrc() { status.FailedTasks, ) - // Patch up both of the 2 mismatched documents in generation 4. + // Patch up the other mismatched document in generation 4. + suite.Require().NoError(runner.StartNextGeneration()) _, err = srcDB.Collection(collName).InsertOne(ctx, bson.D{{"_id", 2}}) suite.Require().NoError(err) - suite.Require().NoError(runner.StartNextGeneration()) suite.Require().NoError(runner.AwaitGenerationEnd()) suite.waitForRecheckDocs(verifier)