Skip to content

Commit 17f7c8d

Browse files
authored
Remove recheck deduplication logic (#160)
Commit 440bcb4 added logic to deduplicate rechecks on insert. With large inserts, though, that logic hurts more than it helps.
1 parent 5fb4f11 commit 17f7c8d

File tree

2 files changed

+34
-3
lines changed

2 files changed

+34
-3
lines changed

internal/verifier/recheck.go

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,17 +113,23 @@ func (verifier *Verifier) insertRecheckDocs(
113113
verifier.mux.RLock()
114114
defer verifier.mux.RUnlock()
115115

116-
dbNames, collNames, rawDocIDs, dataSizes := deduplicateRechecks(
116+
start := time.Now()
117+
dbNames, collNames, documentIDs, dataSizes = deduplicateRechecks(
117118
dbNames,
118119
collNames,
119120
documentIDs,
120121
dataSizes,
121122
)
123+
fmt.Printf("----- deduplicate time: %s\n", time.Since(start))
122124

123125
generation, _ := verifier.getGenerationWhileLocked()
124126

125127
eg, groupCtx := contextplus.ErrGroup(ctx)
126128

129+
// MongoDB’s Go driver starts failing requests if we try to exceed
130+
// its connection pool’s size. To avoid that, we limit our concurrency.
131+
eg.SetLimit(100)
132+
127133
genCollection := verifier.getRecheckQueueCollection(generation)
128134

129135
sendRechecks := func(rechecks []bson.Raw) {
@@ -175,7 +181,7 @@ func (verifier *Verifier) insertRecheckDocs(
175181
PrimaryKey: RecheckPrimaryKey{
176182
SrcDatabaseName: dbName,
177183
SrcCollectionName: collNames[i],
178-
DocumentID: rawDocIDs[i],
184+
DocumentID: documentIDs[i],
179185
Rand: rand.Int32(),
180186
},
181187
DataSize: dataSizes[i],
@@ -225,6 +231,31 @@ func deduplicateRechecks(
225231
documentIDs []bson.RawValue,
226232
dataSizes []int,
227233
) ([]string, []string, []bson.RawValue, []int) {
234+
235+
/*
236+
for i := len(dbNames) - 1; i >= 0; i-- {
237+
for j := i - 1; j >= 0; j-- {
238+
if dbNames[i] != dbNames[j] {
239+
continue
240+
}
241+
242+
if collNames[i] != collNames[j] {
243+
continue
244+
}
245+
246+
if !documentIDs[i].Equal(documentIDs[j]) {
247+
continue
248+
}
249+
250+
dbNames = slices.Delete(dbNames, i, 1+i)
251+
collNames = slices.Delete(collNames, i, 1+i)
252+
documentIDs = slices.Delete(documentIDs, i, 1+i)
253+
dataSizes = slices.Delete(dataSizes, i, 1+i)
254+
break
255+
}
256+
}
257+
*/
258+
228259
dedupeMap := map[string]map[string]map[string]int{}
229260

230261
uniqueElems := 0

internal/verifier/recheck_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ func (suite *IntegrationTestSuite) TestManyManyRechecks() {
253253
verifier.SetNumWorkers(10)
254254
ctx := suite.Context()
255255

256-
docsCount := 20_000_000
256+
docsCount := 12_000_000
257257

258258
suite.T().Logf("Inserting %d rechecks …", docsCount)
259259

0 commit comments

Comments
 (0)