Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions test/benchmarks/driverBench/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ benchmarkRunner
const multiBench = average(Object.values(microBench.multiBench));

const parallelBench = average([
microBench.parallel.ldjsonMultiFileUpload,
microBench.parallel.ldjsonMultiFileUpload ?? 0,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I worry that setting it to 0 would mess up the tracked result, could we just take it out? Or set it to the currently known average value if we're concerned about preserving the parallel measurement history?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, removing it makes more sense. I'd rather have accurate numbers than simulate a value here if that sounds good.

microBench.parallel.ldjsonMultiFileExport,
microBench.parallel.gridfsMultiFileUpload,
microBench.parallel.gridfsMultiFileDownload
Expand All @@ -66,7 +66,7 @@ benchmarkRunner
microBench.multiBench.smallDocBulkInsert,
microBench.multiBench.largeDocBulkInsert,
microBench.multiBench.gridFsUpload,
microBench.parallel.ldjsonMultiFileUpload,
microBench.parallel.ldjsonMultiFileUpload ?? 0,
microBench.parallel.gridfsMultiFileUpload
]);

Expand Down
116 changes: 73 additions & 43 deletions test/benchmarks/mongoBench/runner.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
const CONSTANTS = require('./constants');
const { performance } = require('perf_hooks');
const Suite = require('./suite');

const PERCENTILES = [10, 25, 50, 75, 95, 98, 99];
function percentileIndex(percentile, total) {
return Math.max(Math.floor((total * percentile) / 100 - 1), 0);
}
const fs = require('fs');
const child_process = require('child_process');
const stream = require('stream/promises');
const { Writable } = require('stream');

function timeSyncTask(task, ctx) {
const start = performance.now();
Expand All @@ -25,30 +24,54 @@ async function timeAsyncTask(task, ctx) {
return (end - start) / 1000;
}

const awkFindMiddle = [
// For each line, store the line in the array `a` with the current line number as the index
'{ a[NR] = $0 }',
// After processing all lines (END block), calculate the middle index based on the total line count (NR)
'END {',
// Calculate `mid` as the integer division of the line count by 2
' mid = int(NR / 2);',
// If the line count is odd, print the middle line (one-based index: mid + 1)
' if (NR % 2)',
' print a[mid + 1];',
// If the line count is even, print the two middle lines
' else',
' print a[mid], a[mid + 1];',
'}'
].join(' ');

/**
* Returns the execution time for the benchmarks in mb/second
*
* This function internally calculates the 50th percentile execution time and uses
* that as the median.
*
* @param {Benchmark} benchmark
* @param {{ rawData: number[], count: number}} data
* @returns number
*/
function calculateMicroBench(benchmark, data) {
const rawData = data.rawData;
const count = data.count;
async function calculateMicroBench(benchmark) {
const pipeOptions = { stdio: ['pipe', 'pipe', 'inherit'] };
const sort = child_process.spawn('sort', ['-n'], pipeOptions);
const awk = child_process.spawn('awk', [awkFindMiddle], pipeOptions);

let lines = '';
const collect = new Writable({
write: (chunk, encoding, callback) => {
lines += encoding === 'buffer' ? chunk.toString('utf8') : chunk;
callback();
}
});

const sortedData = [].concat(rawData).sort();
await stream.pipeline(fs.createReadStream('raw.dat', 'utf8'), sort.stdin);
await stream.pipeline(sort.stdout, awk.stdin);
await stream.pipeline(awk.stdout, collect);

const percentiles = PERCENTILES.reduce((acc, pct) => {
acc[pct] = sortedData[percentileIndex(pct, count)];
return acc;
}, {});
fs.unlinkSync('raw.dat');

const medianExecution = percentiles[50];
const [value0, value1] = lines.trim().split(' ');
const median = value1 ? (Number(value0) + Number(value1)) / 2 : Number(value0);

return benchmark.taskSize / medianExecution;
return benchmark.taskSize / median;
}

class Runner {
Expand Down Expand Up @@ -126,6 +149,7 @@ class Runner {
for (const [name, benchmark] of benchmarks) {
this.reporter(` Executing Benchmark "${name}"`);
result[name] = await this._runBenchmark(benchmark);
this.reporter(` Executed Benchmark "${name}" =`, result[name]);
}

return result;
Expand All @@ -142,11 +166,12 @@ class Runner {
const ctx = {};
try {
await benchmark.setup.call(ctx);
const result = await this._loopTask(benchmark, ctx);
await this._loopTask(benchmark, ctx);
await benchmark.teardown.call(ctx);
return calculateMicroBench(benchmark, result);
return await calculateMicroBench(benchmark);
} catch (error) {
return this._errorHandler(error);
fs.unlinkSync('raw.dat');
this._errorHandler(error);
}
}

Expand All @@ -157,33 +182,38 @@ class Runner {
* @returns {{ rawData: number[], count: number}}
*/
async _loopTask(benchmark, ctx) {
const start = performance.now();
const rawData = [];
const minExecutionCount = this.minExecutionCount;
const minExecutionTime = this.minExecutionTime;
const maxExecutionTime = this.maxExecutionTime;
let time = performance.now() - start;
let count = 1;

const taskTimer = benchmark._taskType === 'sync' ? timeSyncTask : timeAsyncTask;

while (time < maxExecutionTime && (time < minExecutionTime || count < minExecutionCount)) {
await benchmark.beforeTask.call(ctx);
const executionTime = await taskTimer(benchmark.task, ctx);
rawData.push(executionTime);
count++;
time = performance.now();
const rawDataFile = fs.openSync('raw.dat', 'w');
try {
const start = performance.now();
const minExecutionCount = this.minExecutionCount;
const minExecutionTime = this.minExecutionTime;
const maxExecutionTime = this.maxExecutionTime;
let time = performance.now() - start;
let count = 1;

const taskTimer = benchmark._taskType === 'sync' ? timeSyncTask : timeAsyncTask;

while (time < maxExecutionTime && (time < minExecutionTime || count < minExecutionCount)) {
await benchmark.beforeTask.call(ctx);
const executionTime = await taskTimer(benchmark.task, ctx);
fs.writeSync(rawDataFile, String(executionTime) + '\n');
count++;
time = performance.now();
}
} finally {
fs.closeSync(rawDataFile);
}

return {
rawData,
count
};
}

_errorHandler(e) {
console.error(e);
return NaN;
_errorHandler(error) {
let currentError = error;
while (currentError) {
this.reporter(
`${currentError !== error ? 'Caused by' : 'Error'}: ${currentError.name} - ${currentError.message} - ${currentError.stack}`
);
currentError = currentError.cause;
}
throw error;
}
}

Expand Down
164 changes: 83 additions & 81 deletions test/benchmarks/mongoBench/suites/parallelBench.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,87 +113,89 @@ async function gridfsMultiFileDownload() {
* @returns Benchmark
*/
function makeParallelBenchmarks(suite) {
return suite
.benchmark('ldjsonMultiFileUpload', benchmark =>
// https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#ldjson-multi-file-import
benchmark
.taskSize(565)
.setup(makeClient)
.setup(connectClient)
.setup(initDb)
.setup(dropDb)
.beforeTask(initCollection)
.beforeTask(dropCollection)
.beforeTask(createCollection)
.task(ldjsonMultiUpload)
.teardown(dropDb)
.teardown(disconnectClient)
)
.benchmark('ldjsonMultiFileExport', benchmark =>
// https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#ldjson-multi-file-export
benchmark
.taskSize(565)
.setup(makeClient)
.setup(connectClient)
.setup(initDb)
.setup(dropDb)
.beforeTask(initCollection)
.beforeTask(dropCollection)
.beforeTask(createCollection)
.beforeTask(ldjsonMultiUpload)
.beforeTask(initTemporaryDirectory)
.task(ldjsonMultiExport)
.afterTask(clearTemporaryDirectory)
.teardown(dropDb)
.teardown(async function () {
await rm(this.temporaryDirectory, { recursive: true, force: true });
})
.teardown(disconnectClient)
)
.benchmark('gridfsMultiFileUpload', benchmark =>
// https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#gridfs-multi-file-upload
benchmark
.taskSize(262.144)
.setup(makeClient)
.setup(connectClient)
.setup(initDb)
.setup(dropDb)
.setup(initDb)
.setup(initCollection)
.beforeTask(dropBucket)
.beforeTask(initBucket)
.beforeTask(async function () {
const stream = this.bucket.openUploadStream('setup-file.txt');
const oneByteFile = Readable.from('a');
return pipeline(oneByteFile, stream);
})
.task(gridfsMultiFileUpload)
.teardown(dropDb)
.teardown(disconnectClient)
)
.benchmark('gridfsMultiFileDownload', benchmark =>
// https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#gridfs-multi-file-download
benchmark
.taskSize(262.144)
.setup(makeClient)
.setup(connectClient)
.setup(initDb)
.setup(dropDb)
.setup(initDb)
.setup(initCollection)
.setup(initTemporaryDirectory)
.setup(dropBucket)
.setup(initBucket)
.setup(gridfsMultiFileUpload)
.beforeTask(clearTemporaryDirectory)
.setup(initBucket)
.task(gridfsMultiFileDownload)
.teardown(dropDb)
.teardown(async function () {
await rm(this.temporaryDirectory, { recursive: true, force: true });
})
.teardown(disconnectClient)
);
return (
suite
// .benchmark('ldjsonMultiFileUpload', benchmark =>
// // https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#ldjson-multi-file-import
// benchmark
// .taskSize(565)
// .setup(makeClient)
// .setup(connectClient)
// .setup(initDb)
// .setup(dropDb)
// .beforeTask(initCollection)
// .beforeTask(dropCollection)
// .beforeTask(createCollection)
// .task(ldjsonMultiUpload)
// .teardown(dropDb)
// .teardown(disconnectClient)
// )
.benchmark('ldjsonMultiFileExport', benchmark =>
// https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#ldjson-multi-file-export
benchmark
.taskSize(565)
.setup(makeClient)
.setup(connectClient)
.setup(initDb)
.setup(dropDb)
.beforeTask(initCollection)
.beforeTask(dropCollection)
.beforeTask(createCollection)
.beforeTask(ldjsonMultiUpload)
.beforeTask(initTemporaryDirectory)
.task(ldjsonMultiExport)
.afterTask(clearTemporaryDirectory)
.teardown(dropDb)
.teardown(async function () {
await rm(this.temporaryDirectory, { recursive: true, force: true });
})
.teardown(disconnectClient)
)
.benchmark('gridfsMultiFileUpload', benchmark =>
// https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#gridfs-multi-file-upload
benchmark
.taskSize(262.144)
.setup(makeClient)
.setup(connectClient)
.setup(initDb)
.setup(dropDb)
.setup(initDb)
.setup(initCollection)
.beforeTask(dropBucket)
.beforeTask(initBucket)
.beforeTask(async function () {
const stream = this.bucket.openUploadStream('setup-file.txt');
const oneByteFile = Readable.from('a');
return pipeline(oneByteFile, stream);
})
.task(gridfsMultiFileUpload)
.teardown(dropDb)
.teardown(disconnectClient)
)
.benchmark('gridfsMultiFileDownload', benchmark =>
// https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#gridfs-multi-file-download
benchmark
.taskSize(262.144)
.setup(makeClient)
.setup(connectClient)
.setup(initDb)
.setup(dropDb)
.setup(initDb)
.setup(initCollection)
.setup(initTemporaryDirectory)
.setup(dropBucket)
.setup(initBucket)
.setup(gridfsMultiFileUpload)
.beforeTask(clearTemporaryDirectory)
.setup(initBucket)
.task(gridfsMultiFileDownload)
.teardown(dropDb)
.teardown(async function () {
await rm(this.temporaryDirectory, { recursive: true, force: true });
})
.teardown(disconnectClient)
)
);
}

module.exports = { makeParallelBenchmarks };