use quanta to measure time

PSeitz · PSeitz · commit 50bc6887a2f2 · 2025-12-16T09:33:42.000+08:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -36,6 +36,7 @@ yansi = { version = "1.0.1", features = ["detect-env", "detect-tty"] }
 rustop = "=1.1.4"
 rustc-hash = "2.0.0"
 bpu_trasher = { version = "0.2.0", optional = true }
+quanta = "0.12"
 
 [target.'cfg(target_os = "linux")'.dependencies]
 perf-event = { version = "0.4.8" }
@@ -62,4 +63,3 @@ harness = false
 [[bench]]
 name = "test_bench"
 harness = false
-
diff --git a/benches/test_bench.rs b/benches/test_bench.rs
@@ -1,12 +1,10 @@
-use std::{
-    collections::HashMap,
-    time::{Duration, Instant},
-};
+use std::{collections::HashMap, time::Duration};
 
 use binggan::{
     plugins::{CacheTrasher, PeakMemAllocPlugin, PerfCounterPlugin},
     BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM,
 };
+use quanta::Instant;
 
 #[global_allocator]
 pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
diff --git a/src/bench.rs b/src/bench.rs
@@ -5,6 +5,7 @@ use crate::{
     plugins::{alloc::*, *},
     stats::*,
 };
+use quanta::Instant;
 
 /// The trait which typically wraps a InputWithBenchmark and allows to hide the generics.
 pub trait Bench<'a> {
@@ -183,7 +184,7 @@ impl<'a, I, O: OutputValue> NamedBench<'a, I, O> {
         const TARGET_NS_PER_BENCH: u128 = TARGET_MS_PER_BENCH as u128 * 1_000_000;
         {
             // Preliminary test if function is very slow
-            let start = std::time::Instant::now();
+            let start = Instant::now();
             #[allow(clippy::unit_arg)]
             black_box((self.fun)(input));
             let elapsed_ms = start.elapsed().as_millis() as u64;
@@ -192,7 +193,7 @@ impl<'a, I, O: OutputValue> NamedBench<'a, I, O> {
             }
         }
 
-        let start = std::time::Instant::now();
+        let start = Instant::now();
         for _ in 0..64 {
             #[allow(clippy::unit_arg)]
             black_box((self.fun)(input));
@@ -217,18 +218,24 @@ impl<'a, I, O: OutputValue> NamedBench<'a, I, O> {
             bench_id: &self.bench_id,
         });
         debug_assert!(num_iter > 0);
-        let start = std::time::Instant::now();
 
         // Defer dropping outputs so destructor cost is not part of the measured time.
         let run_result = if O::defer_drop() {
-            let mut outputs: Vec<O> = Vec::with_capacity(num_iter);
+            let mut sum_ns = 0u64;
+            let mut res: Option<O> = None;
+            // In this mode, we measure each iteration separately to avoid destructor cost.
+            // There may be some overhead, but it should be outweighed by benchmarks that allocate
             for _ in 0..num_iter {
-                outputs.push(black_box((self.fun)(input)));
+                res.take();
+                let start = Instant::now();
+                let val = black_box((self.fun)(input));
+                sum_ns += start.elapsed().as_nanos() as u64;
+                res = Some(val);
             }
-            let duration_ns = start.elapsed().as_nanos() as u64 / num_iter as u64;
-            let last_output = outputs.pop().expect("num_iter > 0");
-            RunResult::new(duration_ns, last_output)
+            let duration_ns = sum_ns / num_iter as u64;
+            RunResult::new(duration_ns, res.unwrap())
         } else {
+            let start = Instant::now();
             let mut res: Option<O> = None;
             for _ in 0..num_iter {
                 res = Some(black_box((self.fun)(input)));
diff --git a/src/bench_runner.rs b/src/bench_runner.rs
@@ -272,6 +272,7 @@ impl BenchRunner {
 
         // In order to make the benchmarks in a group comparable, it is imperative to call them
         // the same numer of times
+        // Note: This doesn't work well for very large groups with very different benchmarks.
         let (min_num_iter, max_num_iter) =
             minmax(benches.iter_mut().map(|b| b.sample_num_iter())).unwrap();
 
diff --git a/src/config.rs b/src/config.rs
@@ -21,10 +21,12 @@ pub struct Config {
 
 impl Default for Config {
     fn default() -> Self {
+        // Check ENV for verbose
+        let verbose = std::env::var("BINGGAN_VERBOSE").is_ok();
         Config {
             interleave: true,
             filter: None,
-            verbose: false,
+            verbose,
             num_iter_bench: None,
             num_iter_group: None,
         }