From 70ef3f55421fcee6a17b84998c926e7e61ac17c8 Mon Sep 17 00:00:00 2001 From: Aditi Khare Date: Tue, 9 Jul 2024 17:03:21 -0400 Subject: [PATCH 1/7] feat(NODE-5906): optimize toArray to batches --- src/cursor/abstract_cursor.ts | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/cursor/abstract_cursor.ts b/src/cursor/abstract_cursor.ts index da08f1a1a66..8d069787cb0 100644 --- a/src/cursor/abstract_cursor.ts +++ b/src/cursor/abstract_cursor.ts @@ -297,6 +297,7 @@ export abstract class AbstractCursor< return bufferedDocs; } + async *[Symbol.asyncIterator](): AsyncGenerator { if (this.isClosed) { return; @@ -457,9 +458,24 @@ export abstract class AbstractCursor< * cursor.rewind() can be used to reset the cursor. */ async toArray(): Promise { - const array = []; + const array: TSchema[] = []; + + // when each loop iteration ends,documents will be empty and a 'await (const document of this)' will run a getMore operation for await (const document of this) { array.push(document); + let docs = this.readBufferedDocuments(); + if (this.transform != null) { + docs = await Promise.all( + docs.map(async doc => { + if (doc != null) { + return await this.transformDocument(doc); + } else { + throw Error; + } + }) + ); + } + array.push(...docs); } return array; } From f888cd3531b1943b9326271439abd7cdf6ac19c1 Mon Sep 17 00:00:00 2001 From: Aditi Khare Date: Fri, 26 Jul 2024 16:07:21 -0400 Subject: [PATCH 2/7] add test --- .../node-specific/abstract_cursor.test.ts | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/test/integration/node-specific/abstract_cursor.test.ts b/test/integration/node-specific/abstract_cursor.test.ts index bee2333db94..a5e7fba13dd 100644 --- a/test/integration/node-specific/abstract_cursor.test.ts +++ b/test/integration/node-specific/abstract_cursor.test.ts @@ -5,6 +5,7 @@ import { Transform } from 'stream'; import { inspect } from 'util'; import { + AbstractCursor, type Collection, type FindCursor, MongoAPIError, @@ -361,4 +362,37 @@ describe('class AbstractCursor', function () { }); }); }); + + describe('toArray', () => { + let nextSpy; + let client: MongoClient; + let cursor: AbstractCursor; + let col: Collection; + const numBatches = 10; + const batchSize = 4; + + beforeEach(async function () { + client = this.configuration.newClient(); + col = client.db().collection('test'); + await col.deleteMany({}); + for (let i = 0; i < numBatches; i++) { + await col.insertMany([{ a: 1 }, { a: 2 }, { a: 3 }, { a: 4 }]); + } + nextSpy = sinon.spy(AbstractCursor.prototype, 'next'); + }); + + afterEach(async function () { + sinon.restore(); + await cursor.close(); + await client.close(); + }); + + it('iterates per batch not per document', async () => { + cursor = client.db().collection('test').find({}, { batchSize }); + await cursor.toArray(); + expect(nextSpy.callCount).to.equal(numBatches + 1); + const numDocuments = numBatches * batchSize; + expect(nextSpy.callCount).to.be.lessThan(numDocuments); + }); + }); }); From 68b6ba673acf99bd45fa455ce5f3fcbcb0f95f18 Mon Sep 17 00:00:00 2001 From: Aditi Khare Date: Wed, 31 Jul 2024 16:09:16 -0400 Subject: [PATCH 3/7] requested changes --- src/cursor/abstract_cursor.ts | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/src/cursor/abstract_cursor.ts b/src/cursor/abstract_cursor.ts index 8d069787cb0..5b864de9fbe 100644 --- a/src/cursor/abstract_cursor.ts +++ b/src/cursor/abstract_cursor.ts @@ -281,8 +281,8 @@ export abstract class AbstractCursor< } /** Returns current buffered documents */ - readBufferedDocuments(number?: number): TSchema[] { - const bufferedDocs: TSchema[] = []; + readBufferedDocuments(number?: number): NonNullable[] { + const bufferedDocs: NonNullable[] = []; const documentsToRead = Math.min( number ?? this.documents?.length ?? 0, this.documents?.length ?? 0 @@ -459,27 +459,21 @@ export abstract class AbstractCursor< */ async toArray(): Promise { const array: TSchema[] = []; - - // when each loop iteration ends,documents will be empty and a 'await (const document of this)' will run a getMore operation + // at the end of the loop (since readBufferedDocuments is called) the buffer will be empty + // then, the 'await of' syntax will run a getMore call for await (const document of this) { array.push(document); - let docs = this.readBufferedDocuments(); + const docs = this.readBufferedDocuments(); if (this.transform != null) { - docs = await Promise.all( - docs.map(async doc => { - if (doc != null) { - return await this.transformDocument(doc); - } else { - throw Error; - } - }) - ); + for (const doc of docs) { + array.push(await this.transformDocument(doc)); + } + } else { + array.push(...docs); } - array.push(...docs); } return array; } - /** * Add a cursor flag to the cursor * @@ -820,7 +814,7 @@ export abstract class AbstractCursor< } /** @internal */ - private async transformDocument(document: NonNullable): Promise { + private async transformDocument(document: NonNullable): Promise> { if (this.transform == null) return document; try { From ad371c27758fb6b8596f131dc96c7016adf59032 Mon Sep 17 00:00:00 2001 From: Aditi Khare Date: Thu, 1 Aug 2024 17:32:19 -0400 Subject: [PATCH 4/7] add in more sensitive/granular tests - can revert if needed --- results.json | 16 ++++ t.mjs | 33 ++++++++ .../mongoBench/suites/multiBench.js | 80 +++++++++++++++++++ 3 files changed, 129 insertions(+) create mode 100644 results.json create mode 100644 t.mjs diff --git a/results.json b/results.json new file mode 100644 index 00000000000..998a65aafe7 --- /dev/null +++ b/results.json @@ -0,0 +1,16 @@ +[ + { + "info": { + "test_name": "findManyAndToArrayOld", + "tags": [ + "js-bson" + ] + }, + "metrics": [ + { + "name": "megabytes_per_second", + "value": 92.01875393495698 + } + ] + } +] \ No newline at end of file diff --git a/t.mjs b/t.mjs new file mode 100644 index 00000000000..bf8f09fec21 --- /dev/null +++ b/t.mjs @@ -0,0 +1,33 @@ +import * as fs from 'fs/promises'; + +import { MongoClient } from './lib/index.js'; + +const client = await MongoClient.connect('mongodb://localhost/'); +const cursor = client.db('test').aggregate([ + { + $documents: [ + JSON.parse( + await fs.readFile( + 'test/benchmarks/driverBench/spec/single_and_multi_document/tweet.json', + 'utf8' + ) + ) + ] + }, + { + $set: { + field: { + $reduce: { + input: [...Array(20).keys()], + initialValue: [0], + in: { $concatArrays: ['$$value', '$$value'] } + } + } + } + }, + { $unwind: '$field' }, + { $limit: 1000000 } +]); + +await cursor.toArray(); +await client.close(); diff --git a/test/benchmarks/mongoBench/suites/multiBench.js b/test/benchmarks/mongoBench/suites/multiBench.js index ae1f921f948..78aa904732c 100644 --- a/test/benchmarks/mongoBench/suites/multiBench.js +++ b/test/benchmarks/mongoBench/suites/multiBench.js @@ -155,6 +155,86 @@ function makeMultiBench(suite) { }) .teardown(dropDb) .teardown(disconnectClient) + ).benchmark('aggregateAMillionDocuments', benchmark => + benchmark + .taskSize(16) + .setup(makeMakeClient(mongodbDriver)) + .setup(connectClient) + .setup(initDb) + .setup(dropDb) + .task(async function () { + await this.db + .aggregate([ + { $documents: [{}] }, + { + $set: { + field: { + $reduce: { + input: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + initialValue: [0], + in: { $concatArrays: ['$$value', '$$value'] } + } + } + } + }, + { $unwind: '$field' }, + { $limit: 1000000 } + ]) + .toArray(); + }) + .teardown(dropDb) + .teardown(disconnectClient) + ) + .benchmark('aggregateAMillionTweets', benchmark => + benchmark + .taskSize(1500) + .setup(makeLoadJSON('tweet.json')) + .setup(makeMakeClient(mongodbDriver)) + .setup(connectClient) + .setup(initDb) + .setup(dropDb) + .task(async function () { + await this.db + .aggregate([ + { $documents: [this.doc] }, + { + $set: { + id: { + $reduce: { + input: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + initialValue: [0], + in: { $concatArrays: ['$$value', '$$value'] } + } + } + } + }, + { $unwind: '$id' }, + { $limit: 1000000 } + ]) + .toArray(); + }) + .teardown(dropDb) + .teardown(disconnectClient) + ) + .benchmark('findManyAndEmptyAMillionCursor', benchmark => + benchmark + .taskSize(16.22) + .setup(makeMakeClient(mongodbDriver)) + .setup(connectClient) + .setup(initDb) + .setup(dropDb) + .setup(initCollection) + .setup(async function () { + await this.collection.insertMany(Array.from({ length: 1_000_000 }, () => ({ field: 0 }))); + }) + .task(async function findManyAndEmptyCursor() { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + for await (const _ of this.collection.find({})) { + // do nothing + } + }) + .teardown(dropDb) + .teardown(disconnectClient) ); } From 911bdc354aac296adc9ce641f4955838e2073664 Mon Sep 17 00:00:00 2001 From: Aditi Khare <106987683+aditi-khare-mongoDB@users.noreply.github.com> Date: Thu, 1 Aug 2024 17:36:43 -0400 Subject: [PATCH 5/7] Delete t.mjs --- t.mjs | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 t.mjs diff --git a/t.mjs b/t.mjs deleted file mode 100644 index bf8f09fec21..00000000000 --- a/t.mjs +++ /dev/null @@ -1,33 +0,0 @@ -import * as fs from 'fs/promises'; - -import { MongoClient } from './lib/index.js'; - -const client = await MongoClient.connect('mongodb://localhost/'); -const cursor = client.db('test').aggregate([ - { - $documents: [ - JSON.parse( - await fs.readFile( - 'test/benchmarks/driverBench/spec/single_and_multi_document/tweet.json', - 'utf8' - ) - ) - ] - }, - { - $set: { - field: { - $reduce: { - input: [...Array(20).keys()], - initialValue: [0], - in: { $concatArrays: ['$$value', '$$value'] } - } - } - } - }, - { $unwind: '$field' }, - { $limit: 1000000 } -]); - -await cursor.toArray(); -await client.close(); From be0806ade3bc632504388e7789c5e702752c5990 Mon Sep 17 00:00:00 2001 From: Aditi Khare <106987683+aditi-khare-mongoDB@users.noreply.github.com> Date: Thu, 1 Aug 2024 17:37:02 -0400 Subject: [PATCH 6/7] Delete results.json --- results.json | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 results.json diff --git a/results.json b/results.json deleted file mode 100644 index 998a65aafe7..00000000000 --- a/results.json +++ /dev/null @@ -1,16 +0,0 @@ -[ - { - "info": { - "test_name": "findManyAndToArrayOld", - "tags": [ - "js-bson" - ] - }, - "metrics": [ - { - "name": "megabytes_per_second", - "value": 92.01875393495698 - } - ] - } -] \ No newline at end of file From 2f3154d7e86914ba548849dd574c1f2795a79dae Mon Sep 17 00:00:00 2001 From: Aditi Khare Date: Thu, 1 Aug 2024 17:41:17 -0400 Subject: [PATCH 7/7] clean up additional testing --- .../mongoBench/suites/multiBench.js | 29 ++++--------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/test/benchmarks/mongoBench/suites/multiBench.js b/test/benchmarks/mongoBench/suites/multiBench.js index 78aa904732c..c6afab962cc 100644 --- a/test/benchmarks/mongoBench/suites/multiBench.js +++ b/test/benchmarks/mongoBench/suites/multiBench.js @@ -155,10 +155,11 @@ function makeMultiBench(suite) { }) .teardown(dropDb) .teardown(disconnectClient) - ).benchmark('aggregateAMillionDocuments', benchmark => + ) + .benchmark('aggregateAMillionDocumentsAndToArray', benchmark => benchmark .taskSize(16) - .setup(makeMakeClient(mongodbDriver)) + .setup(makeClient) .setup(connectClient) .setup(initDb) .setup(dropDb) @@ -185,11 +186,11 @@ function makeMultiBench(suite) { .teardown(dropDb) .teardown(disconnectClient) ) - .benchmark('aggregateAMillionTweets', benchmark => + .benchmark('aggregateAMillionTweetsAndToArray', benchmark => benchmark .taskSize(1500) .setup(makeLoadJSON('tweet.json')) - .setup(makeMakeClient(mongodbDriver)) + .setup(makeClient) .setup(connectClient) .setup(initDb) .setup(dropDb) @@ -215,26 +216,6 @@ function makeMultiBench(suite) { }) .teardown(dropDb) .teardown(disconnectClient) - ) - .benchmark('findManyAndEmptyAMillionCursor', benchmark => - benchmark - .taskSize(16.22) - .setup(makeMakeClient(mongodbDriver)) - .setup(connectClient) - .setup(initDb) - .setup(dropDb) - .setup(initCollection) - .setup(async function () { - await this.collection.insertMany(Array.from({ length: 1_000_000 }, () => ({ field: 0 }))); - }) - .task(async function findManyAndEmptyCursor() { - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of this.collection.find({})) { - // do nothing - } - }) - .teardown(dropDb) - .teardown(disconnectClient) ); }