RUST-617 add bson benchmarks

saghm · saghm · commit 4c03d8c36b02 · 2021-01-06T15:33:22.000-05:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -83,7 +83,7 @@ version = "0.11.5"
 optional = true
 
 [dependencies.tokio]
-version = "0.2.18"
+version = "~0.2.18"
 features = ["io-util", "sync", "macros"]
 
 [dependencies.tokio-rustls]
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -2,7 +2,8 @@
 
 This suite implements the benchmarks described in this (spec)[https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst].
 
-In order to run the microbenchmarks, first run `../etc/microbenchmark-test-data.sh` to download the data.
+In order to run the microbenchmarks, first run `./download-data.sh`. (NOTE: the data for the deeply nested BSON encoding and decoding is
+currently broken, so these benchmarks will not be runnable until that's fixed).
 
 Note: make sure you run the download script and the microbenchmarks binary from the benchmark root (the directory containing this README).
 
@@ -11,7 +12,7 @@ connection string by setting the `MONGODB_URI` environment variable). You can sp
 collection by setting the `DATABASE_NAME` or `COLL_NAME` environment variables respectively.
 
 Additionally, you can specify custom time frames for the benchmarks by setting the `MAX_EXECUTION_TIME`, `MIN_EXECUTION_TIME`
-and `MAX_ITERATIONS` environment variables.
+and `TARGET_ITERATION_COUNT` environment variables.
 
 Run `cargo run --release -- --help` to see a full list of testing options.
 
@@ -30,6 +31,12 @@ the single-doc benchmarks. By default, all benchmarks are executed. The table be
 | Large doc bulk insert          | 7  |
 | LDJSON multi-file import       | 8  |
 | LDJSON multi-file export       | 9  |
+| BSON flat decode               | 10 |
+| BSON flat encode               | 11 |
+| BSON deeply nested decode      | 12 |
+| BSON deeply nested encode      | 13 |
+| BSON full document decode      | 14 |
+| BSON full document encode      | 15 |  
 | All benchmarks                 | all|
 
 Note that in order to compare against the other drivers, an inMemory mongod instance should be used.
diff --git a/benchmarks/download-data.sh b/benchmarks/download-data.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+fetch_data_file() {
+	echo "Fetching $1.tgz from github..."
+	
+	curl --retry 5 "https://raw.githubusercontent.com/mongodb/specifications/master/source/benchmarking/data/$1.tgz" --max-time 120 --remote-name --silent
+	mkdir -p data
+	tar xf "$1.tgz"
+	mv "$1" data
+	rm "$1.tgz"
+}
+
+fetch_data_file extended_bson
+fetch_data_file parallel
+fetch_data_file single_and_multi_document
diff --git a/benchmarks/src/bench/bson_decode.rs b/benchmarks/src/bench/bson_decode.rs
@@ -0,0 +1,50 @@
+use std::{convert::TryInto, path::PathBuf};
+
+use anyhow::{bail, Result};
+use mongodb::bson::{Bson, Document};
+use serde_json::Value;
+
+use crate::{bench::Benchmark, fs::read_to_string};
+
+pub struct BsonDecodeBenchmark {
+    num_iter: usize,
+    bytes: Vec<u8>,
+}
+
+pub struct Options {
+    pub num_iter: usize,
+    pub path: PathBuf,
+}
+
+#[async_trait::async_trait]
+impl Benchmark for BsonDecodeBenchmark {
+    type Options = Options;
+
+    async fn setup(options: Self::Options) -> Result<Self> {
+        let mut file = read_to_string(&options.path).await?;
+
+        let json: Value = serde_json::from_str(&mut file)?;
+        let doc = match json.try_into()? {
+            Bson::Document(doc) => doc,
+            _ => bail!("invalid json test file"),
+        };
+
+        let mut bytes: Vec<u8> = Vec::new();
+        doc.to_writer(&mut bytes)?;
+
+        Ok(BsonDecodeBenchmark {
+            num_iter: options.num_iter,
+            bytes,
+        })
+    }
+
+    async fn do_task(&self) -> Result<()> {
+        for _ in 0..self.num_iter {
+            // `&[u8]` implements `Read`, and `from_reader` needs a `&mut R: Read`, so we need a
+            // `&mut &[u8]`.
+            let _doc = Document::from_reader(&mut &self.bytes[..])?;
+        }
+
+        Ok(())
+    }
+}
diff --git a/benchmarks/src/bench/bson_encode.rs b/benchmarks/src/bench/bson_encode.rs
@@ -0,0 +1,46 @@
+use std::{convert::TryInto, path::PathBuf};
+
+use anyhow::{bail, Result};
+use mongodb::bson::{Bson, Document};
+use serde_json::Value;
+
+use crate::{bench::Benchmark, fs::read_to_string};
+
+pub struct BsonEncodeBenchmark {
+    num_iter: usize,
+    doc: Document,
+}
+
+pub struct Options {
+    pub num_iter: usize,
+    pub path: PathBuf,
+}
+
+#[async_trait::async_trait]
+impl Benchmark for BsonEncodeBenchmark {
+    type Options = Options;
+
+    async fn setup(options: Self::Options) -> Result<Self> {
+        let mut file = read_to_string(&options.path).await?;
+
+        let json: Value = serde_json::from_str(&mut file)?;
+        let doc = match json.try_into()? {
+            Bson::Document(doc) => doc,
+            _ => bail!("invalid json test file"),
+        };
+
+        Ok(BsonEncodeBenchmark {
+            num_iter: options.num_iter,
+            doc,
+        })
+    }
+
+    async fn do_task(&self) -> Result<()> {
+        for _ in 0..self.num_iter {
+            let mut bytes: Vec<u8> = Vec::new();
+            self.doc.to_writer(&mut bytes)?;
+        }
+
+        Ok(())
+    }
+}
diff --git a/benchmarks/src/bench/mod.rs b/benchmarks/src/bench/mod.rs
@@ -1,3 +1,5 @@
+pub mod bson_decode;
+pub mod bson_encode;
 pub mod find_many;
 pub mod find_one;
 pub mod insert_many;
@@ -33,10 +35,10 @@ lazy_static! {
         .unwrap_or("60")
         .parse::<u64>()
         .expect("invalid MIN_EXECUTION_TIME");
-    pub static ref MAX_ITERATIONS: usize = option_env!("MAX_ITERATIONS")
+    pub static ref TARGET_ITERATION_COUNT: usize = option_env!("TARGET_ITERATION_COUNT")
         .unwrap_or("100")
         .parse::<usize>()
-        .expect("invalid MAX_ITERATIONS");
+        .expect("invalid TARGET_ITERATION_COUNT");
 }
 
 #[async_trait::async_trait]
@@ -59,7 +61,9 @@ pub trait Benchmark: Sized {
     }
 
     // execute once after benchmarking
-    async fn teardown(&self) -> Result<()>;
+    async fn teardown(&self) -> Result<()> {
+        Ok(())
+    }
 }
 
 pub(crate) async fn parse_json_file_to_documents(file: File) -> Result<Vec<Document>> {
@@ -81,7 +85,7 @@ pub(crate) async fn parse_json_file_to_documents(file: File) -> Result<Vec<Docum
 
 fn finished(duration: Duration, iter: usize) -> bool {
     let elapsed = duration.as_secs();
-    elapsed >= *MAX_EXECUTION_TIME || (iter >= *MAX_ITERATIONS && elapsed > *MIN_EXECUTION_TIME)
+    elapsed >= *MAX_EXECUTION_TIME || (iter >= *TARGET_ITERATION_COUNT && elapsed > *MIN_EXECUTION_TIME)
 }
 
 pub async fn run_benchmark<B: Benchmark + Send + Sync>(
@@ -91,7 +95,7 @@ pub async fn run_benchmark<B: Benchmark + Send + Sync>(
 
     let mut test_durations = Vec::new();
 
-    let progress_bar = ProgressBar::new(*MAX_ITERATIONS as u64);
+    let progress_bar = ProgressBar::new(*TARGET_ITERATION_COUNT as u64);
     progress_bar.set_style(
         ProgressStyle::default_bar()
             .template(
diff --git a/benchmarks/src/main.rs b/benchmarks/src/main.rs