mongodb · nbbeeken · Nov 4, 2024 · Nov 4, 2024 · Nov 5, 2024 · Nov 5, 2024
diff --git a/test/benchmarks/driverBench/index.js b/test/benchmarks/driverBench/index.js
@@ -47,7 +47,7 @@ benchmarkRunner
     const multiBench = average(Object.values(microBench.multiBench));
 
     const parallelBench = average([
-      microBench.parallel.ldjsonMultiFileUpload,
+      microBench.parallel.ldjsonMultiFileUpload ?? 0,
       microBench.parallel.ldjsonMultiFileExport,
       microBench.parallel.gridfsMultiFileUpload,
       microBench.parallel.gridfsMultiFileDownload
@@ -66,7 +66,7 @@ benchmarkRunner
       microBench.multiBench.smallDocBulkInsert,
       microBench.multiBench.largeDocBulkInsert,
       microBench.multiBench.gridFsUpload,
-      microBench.parallel.ldjsonMultiFileUpload,
+      microBench.parallel.ldjsonMultiFileUpload ?? 0,
       microBench.parallel.gridfsMultiFileUpload
     ]);
 

@@ -3,11 +3,10 @@
 const CONSTANTS = require('./constants');
 const { performance } = require('perf_hooks');
 const Suite = require('./suite');
-
-const PERCENTILES = [10, 25, 50, 75, 95, 98, 99];
-function percentileIndex(percentile, total) {
-  return Math.max(Math.floor((total * percentile) / 100 - 1), 0);
-}
+const fs = require('fs');
+const child_process = require('child_process');
+const stream = require('stream/promises');
+const { Writable } = require('stream');
 
 function timeSyncTask(task, ctx) {
   const start = performance.now();
@@ -25,30 +24,54 @@ async function timeAsyncTask(task, ctx) {
   return (end - start) / 1000;
 }
 
+const awkFindMiddle = [
+  // For each line, store the line in the array `a` with the current line number as the index
+  '{ a[NR] = $0 }',
+  // After processing all lines (END block), calculate the middle index based on the total line count (NR)
+  'END {',
+  // Calculate `mid` as the integer division of the line count by 2
+  '  mid = int(NR / 2);',
+  // If the line count is odd, print the middle line (one-based index: mid + 1)
+  '  if (NR % 2)',
+  '    print a[mid + 1];',
+  // If the line count is even, print the two middle lines
+  '  else',
+  '    print a[mid], a[mid + 1];',
+  '}'
+].join(' ');
+
 /**
  * Returns the execution time for the benchmarks in mb/second
  *
  * This function internally calculates the 50th percentile execution time and uses
  * that as the median.
  *
  * @param {Benchmark} benchmark
- * @param {{ rawData: number[], count: number}} data
  * @returns number
  */
-function calculateMicroBench(benchmark, data) {
-  const rawData = data.rawData;
-  const count = data.count;
+async function calculateMicroBench(benchmark) {
+  const pipeOptions = { stdio: ['pipe', 'pipe', 'inherit'] };
+  const sort = child_process.spawn('sort', ['-n'], pipeOptions);
+  const awk = child_process.spawn('awk', [awkFindMiddle], pipeOptions);
+
+  let lines = '';
+  const collect = new Writable({
+    write: (chunk, encoding, callback) => {
+      lines += encoding === 'buffer' ? chunk.toString('utf8') : chunk;
+      callback();
+    }
+  });
 
-  const sortedData = [].concat(rawData).sort();
+  await stream.pipeline(fs.createReadStream('raw.dat', 'utf8'), sort.stdin);
+  await stream.pipeline(sort.stdout, awk.stdin);
+  await stream.pipeline(awk.stdout, collect);
 
-  const percentiles = PERCENTILES.reduce((acc, pct) => {
-    acc[pct] = sortedData[percentileIndex(pct, count)];
-    return acc;
-  }, {});
+  fs.unlinkSync('raw.dat');
 
-  const medianExecution = percentiles[50];
+  const [value0, value1] = lines.trim().split(' ');
+  const median = value1 ? (Number(value0) + Number(value1)) / 2 : Number(value0);
 
-  return benchmark.taskSize / medianExecution;
+  return benchmark.taskSize / median;
 }
 
 class Runner {
@@ -126,6 +149,7 @@ class Runner {
     for (const [name, benchmark] of benchmarks) {
       this.reporter(`    Executing Benchmark "${name}"`);
       result[name] = await this._runBenchmark(benchmark);
+      this.reporter(`    Executed Benchmark  "${name}" =`, result[name]);
     }
 
     return result;
@@ -142,11 +166,12 @@ class Runner {
     const ctx = {};
     try {
       await benchmark.setup.call(ctx);
-      const result = await this._loopTask(benchmark, ctx);
+      await this._loopTask(benchmark, ctx);
       await benchmark.teardown.call(ctx);
-      return calculateMicroBench(benchmark, result);
+      return await calculateMicroBench(benchmark);
     } catch (error) {
-      return this._errorHandler(error);
+      fs.unlinkSync('raw.dat');
+      this._errorHandler(error);
     }
   }
 
@@ -157,33 +182,38 @@ class Runner {
    * @returns {{ rawData: number[], count: number}}
    */
   async _loopTask(benchmark, ctx) {
-    const start = performance.now();
-    const rawData = [];
-    const minExecutionCount = this.minExecutionCount;
-    const minExecutionTime = this.minExecutionTime;
-    const maxExecutionTime = this.maxExecutionTime;
-    let time = performance.now() - start;
-    let count = 1;
-
-    const taskTimer = benchmark._taskType === 'sync' ? timeSyncTask : timeAsyncTask;
-
-    while (time < maxExecutionTime && (time < minExecutionTime || count < minExecutionCount)) {
-      await benchmark.beforeTask.call(ctx);
-      const executionTime = await taskTimer(benchmark.task, ctx);
-      rawData.push(executionTime);
-      count++;
-      time = performance.now();
+    const rawDataFile = fs.openSync('raw.dat', 'w');
+    try {
+      const start = performance.now();
+      const minExecutionCount = this.minExecutionCount;
+      const minExecutionTime = this.minExecutionTime;
+      const maxExecutionTime = this.maxExecutionTime;
+      let time = performance.now() - start;
+      let count = 1;
+
+      const taskTimer = benchmark._taskType === 'sync' ? timeSyncTask : timeAsyncTask;
+
+      while (time < maxExecutionTime && (time < minExecutionTime || count < minExecutionCount)) {
+        await benchmark.beforeTask.call(ctx);
+        const executionTime = await taskTimer(benchmark.task, ctx);
+        fs.writeSync(rawDataFile, String(executionTime) + '\n');
+        count++;
+        time = performance.now();
+      }
+    } finally {
+      fs.closeSync(rawDataFile);
     }
-
-    return {
-      rawData,
-      count
-    };
   }
 
-  _errorHandler(e) {
-    console.error(e);
-    return NaN;
+  _errorHandler(error) {
+    let currentError = error;
+    while (currentError) {
+      this.reporter(
+        `${currentError !== error ? 'Caused by' : 'Error'}: ${currentError.name} - ${currentError.message} - ${currentError.stack}`
+      );
+      currentError = currentError.cause;
+    }
+    throw error;
   }
 }
 

diff --git a/test/benchmarks/mongoBench/suites/parallelBench.js b/test/benchmarks/mongoBench/suites/parallelBench.js
@@ -113,87 +113,89 @@ async function gridfsMultiFileDownload() {
  * @returns Benchmark
  */
 function makeParallelBenchmarks(suite) {
-  return suite
-    .benchmark('ldjsonMultiFileUpload', benchmark =>
-      // https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#ldjson-multi-file-import
-      benchmark
-        .taskSize(565)
-        .setup(makeClient)
-        .setup(connectClient)
-        .setup(initDb)
-        .setup(dropDb)
-        .beforeTask(initCollection)
-        .beforeTask(dropCollection)
-        .beforeTask(createCollection)
-        .task(ldjsonMultiUpload)
-        .teardown(dropDb)
-        .teardown(disconnectClient)
-    )
-    .benchmark('ldjsonMultiFileExport', benchmark =>
-      // https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#ldjson-multi-file-export
-      benchmark
-        .taskSize(565)
-        .setup(makeClient)
-        .setup(connectClient)
-        .setup(initDb)
-        .setup(dropDb)
-        .beforeTask(initCollection)
-        .beforeTask(dropCollection)
-        .beforeTask(createCollection)
-        .beforeTask(ldjsonMultiUpload)
-        .beforeTask(initTemporaryDirectory)
-        .task(ldjsonMultiExport)
-        .afterTask(clearTemporaryDirectory)
-        .teardown(dropDb)
-        .teardown(async function () {
-          await rm(this.temporaryDirectory, { recursive: true, force: true });
-        })
-        .teardown(disconnectClient)
-    )
-    .benchmark('gridfsMultiFileUpload', benchmark =>
-      // https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#gridfs-multi-file-upload
-      benchmark
-        .taskSize(262.144)
-        .setup(makeClient)
-        .setup(connectClient)
-        .setup(initDb)
-        .setup(dropDb)
-        .setup(initDb)
-        .setup(initCollection)
-        .beforeTask(dropBucket)
-        .beforeTask(initBucket)
-        .beforeTask(async function () {
-          const stream = this.bucket.openUploadStream('setup-file.txt');
-          const oneByteFile = Readable.from('a');
-          return pipeline(oneByteFile, stream);
-        })
-        .task(gridfsMultiFileUpload)
-        .teardown(dropDb)
-        .teardown(disconnectClient)
-    )
-    .benchmark('gridfsMultiFileDownload', benchmark =>
-      // https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#gridfs-multi-file-download
-      benchmark
-        .taskSize(262.144)
-        .setup(makeClient)
-        .setup(connectClient)
-        .setup(initDb)
-        .setup(dropDb)
-        .setup(initDb)
-        .setup(initCollection)
-        .setup(initTemporaryDirectory)
-        .setup(dropBucket)
-        .setup(initBucket)
-        .setup(gridfsMultiFileUpload)
-        .beforeTask(clearTemporaryDirectory)
-        .setup(initBucket)
-        .task(gridfsMultiFileDownload)
-        .teardown(dropDb)
-        .teardown(async function () {
-          await rm(this.temporaryDirectory, { recursive: true, force: true });
-        })
-        .teardown(disconnectClient)
-    );
+  return (
+    suite
+      // .benchmark('ldjsonMultiFileUpload', benchmark =>
+      //   // https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#ldjson-multi-file-import
+      //   benchmark
+      //     .taskSize(565)
+      //     .setup(makeClient)
+      //     .setup(connectClient)
+      //     .setup(initDb)
+      //     .setup(dropDb)
+      //     .beforeTask(initCollection)
+      //     .beforeTask(dropCollection)
+      //     .beforeTask(createCollection)
+      //     .task(ldjsonMultiUpload)
+      //     .teardown(dropDb)
+      //     .teardown(disconnectClient)
+      // )
+      .benchmark('ldjsonMultiFileExport', benchmark =>
+        // https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#ldjson-multi-file-export
+        benchmark
+          .taskSize(565)
+          .setup(makeClient)
+          .setup(connectClient)
+          .setup(initDb)
+          .setup(dropDb)
+          .beforeTask(initCollection)
+          .beforeTask(dropCollection)
+          .beforeTask(createCollection)
+          .beforeTask(ldjsonMultiUpload)
+          .beforeTask(initTemporaryDirectory)
+          .task(ldjsonMultiExport)
+          .afterTask(clearTemporaryDirectory)
+          .teardown(dropDb)
+          .teardown(async function () {
+            await rm(this.temporaryDirectory, { recursive: true, force: true });
+          })
+          .teardown(disconnectClient)
+      )
+      .benchmark('gridfsMultiFileUpload', benchmark =>
+        // https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#gridfs-multi-file-upload
+        benchmark
+          .taskSize(262.144)
+          .setup(makeClient)
+          .setup(connectClient)
+          .setup(initDb)
+          .setup(dropDb)
+          .setup(initDb)
+          .setup(initCollection)
+          .beforeTask(dropBucket)
+          .beforeTask(initBucket)
+          .beforeTask(async function () {
+            const stream = this.bucket.openUploadStream('setup-file.txt');
+            const oneByteFile = Readable.from('a');
+            return pipeline(oneByteFile, stream);
+          })
+          .task(gridfsMultiFileUpload)
+          .teardown(dropDb)
+          .teardown(disconnectClient)
+      )
+      .benchmark('gridfsMultiFileDownload', benchmark =>
+        // https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#gridfs-multi-file-download
+        benchmark
+          .taskSize(262.144)
+          .setup(makeClient)
+          .setup(connectClient)
+          .setup(initDb)
+          .setup(dropDb)
+          .setup(initDb)
+          .setup(initCollection)
+          .setup(initTemporaryDirectory)
+          .setup(dropBucket)
+          .setup(initBucket)
+          .setup(gridfsMultiFileUpload)
+          .beforeTask(clearTemporaryDirectory)
+          .setup(initBucket)
+          .task(gridfsMultiFileDownload)
+          .teardown(dropDb)
+          .teardown(async function () {
+            await rm(this.temporaryDirectory, { recursive: true, force: true });
+          })
+          .teardown(disconnectClient)
+      )
+  );
 }
 
 module.exports = { makeParallelBenchmarks };