Skip to content

Commit d25eb42

Browse files
committed
Revert "revert non-test changes to ensure that existing tests verify"
This reverts commit 293d7b4.
1 parent 0eda704 commit d25eb42

File tree

4 files changed

+119
-91
lines changed

4 files changed

+119
-91
lines changed

internal/verifier/change_stream.go

Lines changed: 75 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
"go.mongodb.org/mongo-driver/bson/primitive"
1313
"go.mongodb.org/mongo-driver/mongo"
1414
"go.mongodb.org/mongo-driver/mongo/options"
15-
"golang.org/x/exp/constraints"
1615
)
1716

1817
const fauxDocSizeForDeleteEvents = 1024
@@ -105,6 +104,11 @@ func (verifier *Verifier) HandleChangeStreamEvents(ctx context.Context, batch []
105104
// GetChangeStreamFilter returns an aggregation pipeline that filters
106105
// namespaces as per configuration.
107106
//
107+
// Note that this omits verifier.globalFilter because we still need to
108+
// recheck any out-filter documents that may have changed in order to
109+
// account for filter traversals (i.e., updates that change whether a
110+
// document matches the filter).
111+
//
108112
// NB: Ideally we could make the change stream give $bsonSize(fullDocument)
109113
// and omit fullDocument, but $bsonSize was new in MongoDB 4.4, and we still
110114
// want to verify migrations from 4.2. fullDocument is unlikely to be a
@@ -122,55 +126,60 @@ func (verifier *Verifier) GetChangeStreamFilter() []bson.D {
122126
return []bson.D{stage}
123127
}
124128

125-
func (verifier *Verifier) iterateChangeStream(ctx context.Context, cs *mongo.ChangeStream) {
126-
defer cs.Close(ctx)
129+
func (verifier *Verifier) readAndHandleOneChangeEventBatch(ctx context.Context, cs *mongo.ChangeStream) error {
130+
eventsRead := 0
131+
var changeEventBatch []ParsedEvent
127132

128-
var lastPersistedTime time.Time
133+
for hasEventInBatch := true; hasEventInBatch; hasEventInBatch = cs.RemainingBatchLength() > 0 {
134+
gotEvent := cs.TryNext(ctx)
129135

130-
persistResumeTokenIfNeeded := func() error {
131-
if time.Since(lastPersistedTime) <= minChangeStreamPersistInterval {
132-
return nil
136+
if cs.Err() != nil {
137+
return errors.Wrap(cs.Err(), "change stream iteration failed")
133138
}
134139

135-
err := verifier.persistChangeStreamResumeToken(ctx, cs)
136-
if err == nil {
137-
lastPersistedTime = time.Now()
140+
if !gotEvent {
141+
break
138142
}
139143

140-
return err
144+
if changeEventBatch == nil {
145+
changeEventBatch = make([]ParsedEvent, cs.RemainingBatchLength()+1)
146+
}
147+
148+
if err := cs.Decode(&changeEventBatch[eventsRead]); err != nil {
149+
return errors.Wrap(err, "failed to decode change event")
150+
}
151+
152+
eventsRead++
141153
}
142154

143-
readAndHandleOneChangeEventBatch := func() (bool, error) {
144-
eventsRead := 0
145-
var changeEventBatch []ParsedEvent
155+
if eventsRead == 0 {
156+
return nil
157+
}
146158

147-
for hasEventInBatch := true; hasEventInBatch; hasEventInBatch = cs.RemainingBatchLength() > 0 {
148-
gotEvent := cs.TryNext(ctx)
159+
err := verifier.HandleChangeStreamEvents(ctx, changeEventBatch)
160+
if err != nil {
161+
return errors.Wrap(err, "failed to handle change events")
162+
}
149163

150-
if !gotEvent || cs.Err() != nil {
151-
break
152-
}
164+
return nil
165+
}
153166

154-
if changeEventBatch == nil {
155-
changeEventBatch = make([]ParsedEvent, cs.RemainingBatchLength()+1)
156-
}
167+
func (verifier *Verifier) iterateChangeStream(ctx context.Context, cs *mongo.ChangeStream) {
168+
defer cs.Close(ctx)
157169

158-
if err := cs.Decode(&changeEventBatch[eventsRead]); err != nil {
159-
return false, errors.Wrap(err, "failed to decode change event")
160-
}
170+
var lastPersistedTime time.Time
161171

162-
eventsRead++
172+
persistResumeTokenIfNeeded := func() error {
173+
if time.Since(lastPersistedTime) <= minChangeStreamPersistInterval {
174+
return nil
163175
}
164176

165-
if eventsRead > 0 {
166-
verifier.logger.Debug().Int("eventsCount", eventsRead).Msgf("Received a batch of events.")
167-
err := verifier.HandleChangeStreamEvents(ctx, changeEventBatch)
168-
if err != nil {
169-
return false, errors.Wrap(err, "failed to handle change events")
170-
}
177+
err := verifier.persistChangeStreamResumeToken(ctx, cs)
178+
if err == nil {
179+
lastPersistedTime = time.Now()
171180
}
172181

173-
return eventsRead > 0, errors.Wrap(cs.Err(), "change stream iteration failed")
182+
return err
174183
}
175184

176185
for {
@@ -189,29 +198,45 @@ func (verifier *Verifier) iterateChangeStream(ctx context.Context, cs *mongo.Cha
189198
// If the changeStreamEnderChan has a message, the user has indicated that
190199
// source writes are ended. This means we should exit rather than continue
191200
// reading the change stream since there should be no more events.
192-
case <-verifier.changeStreamEnderChan:
201+
case finalTs := <-verifier.changeStreamFinalTsChan:
193202
verifier.logger.Debug().
194-
Msg("Change stream thread received shutdown request.")
203+
Interface("finalTimestamp", finalTs).
204+
Msg("Change stream thread received final timestamp. Finalizing change stream.")
195205

196206
changeStreamEnded = true
197207

198208
// Read all change events until the source reports no events.
199209
// (i.e., the `getMore` call returns empty)
200210
for {
201-
var gotEvent bool
202-
gotEvent, err = readAndHandleOneChangeEventBatch()
211+
var curTs primitive.Timestamp
212+
curTs, err = extractTimestampFromResumeToken(cs.ResumeToken())
213+
if err != nil {
214+
err = errors.Wrap(err, "failed to extract timestamp from change stream's resume token")
215+
break
216+
}
217+
218+
if curTs == finalTs || curTs.After(finalTs) {
219+
verifier.logger.Debug().
220+
Interface("currentTimestamp", curTs).
221+
Interface("finalTimestamp", finalTs).
222+
Msg("Change stream has reached the final timestamp. Shutting down.")
203223

204-
if !gotEvent || err != nil {
224+
break
225+
}
226+
227+
err = verifier.readAndHandleOneChangeEventBatch(ctx, cs)
228+
229+
if err != nil {
205230
break
206231
}
207232
}
208233

209234
default:
210-
_, err = readAndHandleOneChangeEventBatch()
211-
}
235+
err = verifier.readAndHandleOneChangeEventBatch(ctx, cs)
212236

213-
if err == nil {
214-
err = persistResumeTokenIfNeeded()
237+
if err == nil {
238+
err = persistResumeTokenIfNeeded()
239+
}
215240
}
216241

217242
if err != nil && !errors.Is(err, context.Canceled) {
@@ -242,9 +267,9 @@ func (verifier *Verifier) iterateChangeStream(ctx context.Context, cs *mongo.Cha
242267

243268
infoLog := verifier.logger.Info()
244269
if verifier.lastChangeEventTime == nil {
245-
infoLog = infoLog.Str("changeStreamStopTime", "none")
270+
infoLog = infoLog.Str("lastEventTime", "none")
246271
} else {
247-
infoLog = infoLog.Interface("changeStreamStopTime", *verifier.lastChangeEventTime)
272+
infoLog = infoLog.Interface("lastEventTime", *verifier.lastChangeEventTime)
248273
}
249274

250275
infoLog.Msg("Change stream is done.")
@@ -270,7 +295,7 @@ func (verifier *Verifier) StartChangeStream(ctx context.Context) error {
270295

271296
ts, err := extractTimestampFromResumeToken(savedResumeToken)
272297
if err == nil {
273-
logEvent = addUnixTimeToLogEvent(ts.T, logEvent)
298+
logEvent = addTimestampToLogEvent(ts, logEvent)
274299
} else {
275300
verifier.logger.Warn().
276301
Err(err).
@@ -323,8 +348,10 @@ func (verifier *Verifier) StartChangeStream(ctx context.Context) error {
323348
return nil
324349
}
325350

326-
func addUnixTimeToLogEvent[T constraints.Integer](unixTime T, event *zerolog.Event) *zerolog.Event {
327-
return event.Time("timestampTime", time.Unix(int64(unixTime), int64(0)))
351+
func addTimestampToLogEvent(ts primitive.Timestamp, event *zerolog.Event) *zerolog.Event {
352+
return event.
353+
Interface("timestamp", ts).
354+
Time("time", time.Unix(int64(ts.T), int64(0)))
328355
}
329356

330357
func (v *Verifier) getChangeStreamMetadataCollection() *mongo.Collection {
@@ -363,7 +390,7 @@ func (verifier *Verifier) persistChangeStreamResumeToken(ctx context.Context, cs
363390
logEvent := verifier.logger.Debug()
364391

365392
if err == nil {
366-
logEvent = addUnixTimeToLogEvent(ts.T, logEvent)
393+
logEvent = addTimestampToLogEvent(ts, logEvent)
367394
} else {
368395
verifier.logger.Warn().Err(err).
369396
Msg("failed to extract resume token timestamp")

internal/verifier/change_stream_test.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,7 @@ func (suite *IntegrationTestSuite) TestStartAtTimeNoChanges() {
142142
err = verifier.StartChangeStream(ctx)
143143
suite.Require().NoError(err)
144144
suite.Require().Equal(verifier.srcStartAtTs, origStartTs)
145-
//verifier.changeStreamFinalTsChan <- *origStartTs
146-
verifier.changeStreamEnderChan <- struct{}{}
145+
verifier.changeStreamFinalTsChan <- *origStartTs
147146
<-verifier.changeStreamDoneChan
148147
suite.Require().Equal(verifier.srcStartAtTs, origStartTs)
149148
}
@@ -185,8 +184,7 @@ func (suite *IntegrationTestSuite) TestStartAtTimeWithChanges() {
185184
"session time after events should exceed the original",
186185
)
187186

188-
//verifier.changeStreamFinalTsChan <- *postEventsSessionTime
189-
verifier.changeStreamEnderChan <- struct{}{}
187+
verifier.changeStreamFinalTsChan <- *postEventsSessionTime
190188
<-verifier.changeStreamDoneChan
191189

192190
suite.Assert().Equal(

internal/verifier/check.go

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,24 @@ func (verifier *Verifier) Check(ctx context.Context, filter map[string]any) {
4242
verifier.MaybeStartPeriodicHeapProfileCollection(ctx)
4343
}
4444

45-
func (verifier *Verifier) waitForChangeStream() error {
45+
func (verifier *Verifier) waitForChangeStream(ctx context.Context) error {
4646
verifier.mux.RLock()
4747
csRunning := verifier.changeStreamRunning
4848
verifier.mux.RUnlock()
4949
if csRunning {
5050
verifier.logger.Debug().Msg("Changestream still running, signalling that writes are done and waiting for change stream to exit")
51-
verifier.changeStreamEnderChan <- struct{}{}
51+
52+
finalTs, err := GetNewClusterTime(
53+
ctx,
54+
verifier.logger,
55+
verifier.srcClient,
56+
)
57+
58+
if err != nil {
59+
return errors.Wrapf(err, "failed to fetch source's cluster time")
60+
}
61+
62+
verifier.changeStreamFinalTsChan <- finalTs
5263
select {
5364
case err := <-verifier.changeStreamErrChan:
5465
verifier.logger.Warn().Err(err).
@@ -66,6 +77,7 @@ func (verifier *Verifier) waitForChangeStream() error {
6677
func (verifier *Verifier) CheckWorker(ctx context.Context) error {
6778
verifier.logger.Debug().Msgf("Starting %d verification workers", verifier.numWorkers)
6879
ctx, cancel := context.WithCancel(ctx)
80+
6981
wg := sync.WaitGroup{}
7082
for i := 0; i < verifier.numWorkers; i++ {
7183
wg.Add(1)
@@ -174,7 +186,9 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any
174186
verifier.mux.RLock()
175187
csRunning := verifier.changeStreamRunning
176188
verifier.mux.RUnlock()
177-
if !csRunning {
189+
if csRunning {
190+
verifier.logger.Debug().Msg("Check: Change stream already running.")
191+
} else {
178192
verifier.logger.Debug().Msg("Change stream not running; starting change stream")
179193

180194
err = verifier.StartChangeStream(ctx)
@@ -242,7 +256,7 @@ func (verifier *Verifier) CheckDriver(ctx context.Context, filter map[string]any
242256
// It's necessary to wait for the change stream to finish before incrementing the
243257
// generation number, or the last changes will not be checked.
244258
verifier.mux.Unlock()
245-
err := verifier.waitForChangeStream()
259+
err := verifier.waitForChangeStream(ctx)
246260
if err != nil {
247261
return err
248262
}
@@ -388,12 +402,15 @@ func (verifier *Verifier) Work(ctx context.Context, workerNum int, wg *sync.Wait
388402
if errors.Is(err, mongo.ErrNoDocuments) {
389403
duration := verifier.workerSleepDelayMillis * time.Millisecond
390404

391-
verifier.logger.Debug().
392-
Int("workerNum", workerNum).
393-
Stringer("duration", duration).
394-
Msg("No tasks found. Sleeping.")
405+
if duration > 0 {
406+
verifier.logger.Debug().
407+
Int("workerNum", workerNum).
408+
Stringer("duration", duration).
409+
Msg("No tasks found. Sleeping.")
410+
411+
time.Sleep(duration)
412+
}
395413

396-
time.Sleep(duration)
397414
continue
398415
} else if err != nil {
399416
panic(err)

internal/verifier/migration_verifier.go

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"github.com/10gen/migration-verifier/internal/uuidutil"
2525
"github.com/olekukonko/tablewriter"
2626
"github.com/pkg/errors"
27-
"github.com/rs/zerolog"
2827
"go.mongodb.org/mongo-driver/bson"
2928
"go.mongodb.org/mongo-driver/bson/primitive"
3029
"go.mongodb.org/mongo-driver/mongo"
@@ -49,7 +48,6 @@ const (
4948
SrcNamespaceField = "query_filter.namespace"
5049
DstNamespaceField = "query_filter.to"
5150
NumWorkers = 10
52-
refetch = "TODO_CHANGE_ME_REFETCH"
5351
Idle = "idle"
5452
Check = "check"
5553
Recheck = "recheck"
@@ -123,12 +121,12 @@ type Verifier struct {
123121
metaDBName string
124122
srcStartAtTs *primitive.Timestamp
125123

126-
mux sync.RWMutex
127-
changeStreamRunning bool
128-
changeStreamEnderChan chan struct{}
129-
changeStreamErrChan chan error
130-
changeStreamDoneChan chan struct{}
131-
lastChangeEventTime *primitive.Timestamp
124+
mux sync.RWMutex
125+
changeStreamRunning bool
126+
changeStreamFinalTsChan chan primitive.Timestamp
127+
changeStreamErrChan chan error
128+
changeStreamDoneChan chan struct{}
129+
lastChangeEventTime *primitive.Timestamp
132130

133131
readConcernSetting ReadConcernSetting
134132

@@ -188,15 +186,15 @@ func NewVerifier(settings VerifierSettings) *Verifier {
188186
}
189187

190188
return &Verifier{
191-
phase: Idle,
192-
numWorkers: NumWorkers,
193-
readPreference: readpref.Primary(),
194-
partitionSizeInBytes: 400 * 1024 * 1024,
195-
failureDisplaySize: DefaultFailureDisplaySize,
196-
changeStreamEnderChan: make(chan struct{}),
197-
changeStreamErrChan: make(chan error),
198-
changeStreamDoneChan: make(chan struct{}),
199-
readConcernSetting: readConcern,
189+
phase: Idle,
190+
numWorkers: NumWorkers,
191+
readPreference: readpref.Primary(),
192+
partitionSizeInBytes: 400 * 1024 * 1024,
193+
failureDisplaySize: DefaultFailureDisplaySize,
194+
changeStreamFinalTsChan: make(chan primitive.Timestamp),
195+
changeStreamErrChan: make(chan error),
196+
changeStreamDoneChan: make(chan struct{}),
197+
readConcernSetting: readConcern,
200198

201199
// This will get recreated once gen0 starts, but we want it
202200
// here in case the change streams gets an event before then.
@@ -315,15 +313,7 @@ func (verifier *Verifier) SetPartitionSizeMB(partitionSizeMB uint32) {
315313
}
316314

317315
func (verifier *Verifier) SetLogger(logPath string) {
318-
writer := getLogWriter(logPath)
319-
verifier.writer = writer
320-
321-
consoleWriter := zerolog.ConsoleWriter{
322-
Out: writer,
323-
TimeFormat: timeFormat,
324-
}
325-
l := zerolog.New(consoleWriter).With().Timestamp().Logger()
326-
verifier.logger = logger.NewLogger(&l, writer)
316+
verifier.logger, verifier.writer = getLoggerAndWriter(logPath)
327317
}
328318

329319
func (verifier *Verifier) SetSrcNamespaces(arg []string) {
@@ -1172,10 +1162,6 @@ func (verifier *Verifier) verificationTaskCollection() *mongo.Collection {
11721162
return verifier.verificationDatabase().Collection(verificationTasksCollection)
11731163
}
11741164

1175-
func (verifier *Verifier) refetchCollection() *mongo.Collection {
1176-
return verifier.verificationDatabase().Collection(refetch)
1177-
}
1178-
11791165
func (verifier *Verifier) srcClientDatabase(dbName string) *mongo.Database {
11801166
db := verifier.srcClient.Database(dbName)
11811167
// No need to check the write concern because we do not write to the source database.

0 commit comments

Comments
 (0)