diff --git a/Cargo.lock b/Cargo.lock index 027e2ece..81c26743 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -458,7 +458,7 @@ dependencies = [ "assert_cmd", "cargo_metadata", "clap", - "codspeed 2.8.0-alpha.0", + "codspeed", "fs_extra", "glob", "itertools 0.13.0", @@ -579,18 +579,6 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" -[[package]] -name = "codspeed" -version = "2.7.2" -source = "git+https://github.com/CodSpeedHQ/codspeed-rust?branch=cod-526-build-and-find-walltime-entrypoint-with-divan#209374e1bc7e49221879f3348a364365992ae065" -dependencies = [ - "colored", - "libc", - "serde", - "serde_json", - "uuid", -] - [[package]] name = "codspeed" version = "2.8.0-alpha.0" @@ -608,7 +596,7 @@ name = "codspeed-bencher-compat" version = "2.8.0-alpha.0" dependencies = [ "bencher", - "codspeed 2.8.0-alpha.0", + "codspeed", ] [[package]] @@ -616,7 +604,7 @@ name = "codspeed-criterion-compat" version = "2.8.0-alpha.0" dependencies = [ "async-std", - "codspeed 2.8.0-alpha.0", + "codspeed", "colored", "criterion", "futures", @@ -628,21 +616,35 @@ dependencies = [ name = "codspeed-divan-compat" version = "2.8.0-alpha.0" dependencies = [ - "codspeed 2.8.0-alpha.0", + "codspeed", "codspeed-divan-compat-macros", - "divan", + "codspeed-divan-compat-walltime", ] [[package]] name = "codspeed-divan-compat-macros" version = "2.8.0-alpha.0" dependencies = [ - "divan-macros 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)", + "divan-macros", "proc-macro2", "quote", "syn", ] +[[package]] +name = "codspeed-divan-compat-walltime" +version = "0.1.17" +dependencies = [ + "cfg-if", + "clap", + "codspeed", + "condtype", + "divan-macros", + "libc", + "mimalloc", + "regex-lite", +] + [[package]] name = "colorchoice" version = "1.0.2" @@ -773,20 +775,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" -[[package]] -name = "divan" -version = "0.1.17" -source = "git+https://github.com/CodSpeedHQ/divan#e605bf0c971aeb08bc55867abecc56bafbbdc3a0" -dependencies = [ - "cfg-if", - "clap", - "codspeed 2.7.2", - "condtype", - "divan-macros 0.1.17 (git+https://github.com/CodSpeedHQ/divan)", - "libc", - "regex-lite", -] - [[package]] name = "divan-macros" version = "0.1.17" @@ -798,16 +786,6 @@ dependencies = [ "syn", ] -[[package]] -name = "divan-macros" -version = "0.1.17" -source = "git+https://github.com/CodSpeedHQ/divan#e605bf0c971aeb08bc55867abecc56bafbbdc3a0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "doc-comment" version = "0.3.3" @@ -1163,6 +1141,16 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +[[package]] +name = "libmimalloc-sys" +version = "0.1.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23aa6811d3bd4deb8a84dde645f943476d13b248d818edcf8ce0b2f37f036b44" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "linux-raw-sys" version = "0.3.8" @@ -1200,6 +1188,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "mimalloc" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68914350ae34959d83f732418d51e2427a794055d0b9529f48259ac07af65633" +dependencies = [ + "libmimalloc-sys", +] + [[package]] name = "miniz_oxide" version = "0.7.4" diff --git a/Cargo.toml b/Cargo.toml index a7ca6d1e..4633e963 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "crates/cargo-codspeed", "crates/divan_compat", "crates/divan_compat/macros", + "crates/divan_compat/divan_fork", ] resolver = "2" diff --git a/crates/codspeed/src/walltime.rs b/crates/codspeed/src/walltime.rs index a3b0e4d5..32517001 100644 --- a/crates/codspeed/src/walltime.rs +++ b/crates/codspeed/src/walltime.rs @@ -48,6 +48,8 @@ impl RawWallTimeData { } /// Entry point called in patched integration to harvest raw walltime data +/// +/// `CODSPEED_CARGO_WORKSPACE_ROOT` is expected to be set for this to work pub fn collect_raw_walltime_results( scope: &str, name: String, diff --git a/crates/divan_compat/Cargo.toml b/crates/divan_compat/Cargo.toml index 9c39f67b..226b2bff 100644 --- a/crates/divan_compat/Cargo.toml +++ b/crates/divan_compat/Cargo.toml @@ -19,7 +19,7 @@ keywords = ["codspeed", "benchmark", "divan"] [dependencies] codspeed = { path = "../codspeed", version = "=2.8.0-alpha.0" } -divan = { git = "https://github.com/CodSpeedHQ/divan" } +divan = { package = "codspeed-divan-compat-walltime", path = "./divan_fork", version = "=0.1.17" } codspeed-divan-compat-macros = { version = "=2.8.0-alpha.0", path = './macros' } [[bench]] diff --git a/crates/divan_compat/divan_fork/.github/FUNDING.yml b/crates/divan_compat/divan_fork/.github/FUNDING.yml new file mode 100644 index 00000000..662ce5d1 --- /dev/null +++ b/crates/divan_compat/divan_fork/.github/FUNDING.yml @@ -0,0 +1,2 @@ +github: ['nvzqz'] +custom: ['https://paypal.me/nvzqz'] diff --git a/crates/divan_compat/divan_fork/.github/workflows/ci.yml b/crates/divan_compat/divan_fork/.github/workflows/ci.yml new file mode 100644 index 00000000..5fecd89c --- /dev/null +++ b/crates/divan_compat/divan_fork/.github/workflows/ci.yml @@ -0,0 +1,191 @@ +on: [push, pull_request] + +name: CI + +env: + CARGO_HOME: ${{ github.workspace }}/.cargo + CARGO_TERM_COLOR: always + RUSTFLAGS: -D warnings -A unused-imports + RUSTDOCFLAGS: -D warnings + RUST_BACKTRACE: full + +jobs: + # Check formatting. + rustfmt: + name: Rustfmt + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: rustup update stable --no-self-update + - run: rustc -Vv + - run: cargo fmt --all -- --check + + # Build documentation. + rustdoc: + name: Rustdoc + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v3.3.2 + with: + path: | + ${{ env.CARGO_HOME }} + target + key: rustdoc-${{ runner.os }} + - run: rustup update stable --no-self-update + - run: rustc -Vv + - run: cargo rustdoc --all-features -- --document-private-items + + # Run linter. + clippy: + name: Clippy + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v3.3.2 + with: + path: | + ${{ env.CARGO_HOME }} + target + key: clippy-${{ runner.os }} + - run: rustup update stable --no-self-update + - run: rustc -Vv + - run: cargo clippy --all --all-targets --all-features + + # Run tests in `src/` and `tests/`. + unit-test: + name: Unit Test + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + rust: + - stable + - nightly + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v3.3.2 + with: + path: | + ${{ env.CARGO_HOME }} + target + key: unit-test-${{ runner.os }}-${{ matrix.rust }} + - run: rustup default ${{ matrix.rust }} + - run: rustup update ${{ matrix.rust }} --no-self-update + - run: rustc -Vv + - run: cargo test -p divan -p divan-macros + + # Run tests in `src/` and `tests/` using Miri. + unit-test-miri: + name: Unit Test (Miri) + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v3.3.2 + with: + path: | + ${{ env.CARGO_HOME }} + target + key: miri-${{ runner.os }} + - run: rustup default nightly + - run: rustup update nightly --no-self-update + - run: rustup component add miri + - run: rustc -Vv + - run: cargo miri test -p divan -p divan-macros + + # Run `examples/` directory as tests. + examples-test: + name: Examples Test + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + rust: + - stable + - nightly + env: + DIVAN_ITEMS_COUNT: 0 + DIVAN_BYTES_COUNT: 1 + DIVAN_CHARS_COUNT: 2 + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v3.3.2 + with: + path: | + ${{ env.CARGO_HOME }} + target + key: examples-test-${{ runner.os }}-${{ matrix.rust }} + - run: rustup default ${{ matrix.rust }} + - run: rustup update ${{ matrix.rust }} --no-self-update + - run: rustc -Vv + - run: cargo test -p examples --all-features --benches + + # Run `examples/` directory as benchmarks. + examples-bench: + name: Examples Bench + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ${{ matrix.os }} + env: + # Run each benchmark within 2 seconds. + DIVAN_MAX_TIME: 2 + strategy: + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v3.3.2 + with: + path: | + ${{ env.CARGO_HOME }} + target + key: examples-bench-${{ runner.os }} + - run: rustup update stable --no-self-update + - run: rustc -Vv + - run: cargo bench -p examples --all-features + + # Run `internal_benches/` directory as benchmarks. + internals-bench: + name: Internals Bench + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ${{ matrix.os }} + env: + # Run each benchmark within 2 seconds. + DIVAN_MAX_TIME: 2 + strategy: + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v3.3.2 + with: + path: | + ${{ env.CARGO_HOME }} + target + key: internals-bench-${{ runner.os }} + - run: rustup update stable --no-self-update + - run: rustc -Vv + - run: cargo bench -p internal_benches --all-features diff --git a/crates/divan_compat/divan_fork/.gitignore b/crates/divan_compat/divan_fork/.gitignore new file mode 100644 index 00000000..8b3cb274 --- /dev/null +++ b/crates/divan_compat/divan_fork/.gitignore @@ -0,0 +1,88 @@ +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Rust ### +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk diff --git a/crates/divan_compat/divan_fork/CHANGELOG.md b/crates/divan_compat/divan_fork/CHANGELOG.md new file mode 100644 index 00000000..d3bd2348 --- /dev/null +++ b/crates/divan_compat/divan_fork/CHANGELOG.md @@ -0,0 +1,391 @@ +# Changelog [![crates.io][crate-badge]][crate] + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic +Versioning](http://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.17] - 2024-12-04 + +### Changed + +- Set [MSRV] to 1.80 for [`LazyLock`] and new `size_of` prelude import. + +- Reduced thread pool memory usage by many kilobytes by using rendezvous + channels instead of array-based channels. + +## [0.1.16] - 2024-11-25 + +### Added + +- Thread pool for reusing threads across multi-threaded benchmarks. The result + is that when running Divan benchmarks under a sampling profiler, the + profiler's output will be cleaner and easier to understand. ([#37]) + +- Track the maximum number of allocations during a benchmark. + +### Changed + +- Make private `Arg::get` trait method not take `self`, so that text editors + don't recommend using it. ([#59]) + +- Cache `BenchOptions` using `LazyLock` instead of `OnceLock`, saving space and + simplifying the implementation. + +## [0.1.15] - 2024-10-31 + +### Added + +- [`CyclesCount`] counter to display cycle throughput as Hertz. + +- Track the maximum number of bytes allocated during a benchmark. + +### Removed + +- Remove `has_cpuid` polyfill due to it no longer being planned for Rust, since + CPUID is assumed to be available on all old x86 Rust targets. + +### Fixed + +- List generic benchmark type parameter `A<4>` before `A<32>`. ([#64]) + +- Improve precision by using `f64` when calculating allocation count and sizes + for the median samples. + +- Multi-thread allocation counting in `sum_alloc_tallies` on macOS was loading a + null pointer instead of the pointer initialized by `sync_threads`. + +### Changes + +- Sort all output benchmark names + [naturally](https://en.wikipedia.org/wiki/Natural_sort_order) instead of + [lexicographically](https://en.wikipedia.org/wiki/Lexicographic_order). + +- Internally reuse [`&[&str]` slice][slice] for [`args`] names. + +- Subtract overhead of [`AllocProfiler`] from timings. Now that Divan also + tracks the maximum bytes allocated, the overhead was apparent in timings. + +- Simplify `ThreadAllocInfo::clear`. + +- Move measured loop overhead from `SharedContext` to global `OnceLock`. + +- Macros no longer rely on `std` being re-exported by Divan. Instead they use + `::std` or `::core` to greatly simplify code. Although this is technically a + breaking change, it is extremely unlikely to do `extern crate std as x`. + +## [0.1.14] - 2024-02-17 + +### Fixed + +- Set correct field in [`Divan::max_time`]. ([#45](https://github.com/nvzqz/divan/pull/45)) + +### Changes + +- Improve [`args`] documentation by relating it to using [`Bencher`]. + +- Define [`BytesCount::of_iter`] in terms of [`BytesCount::of_many`]. + +## [0.1.13] - 2024-02-09 + +### Fixed + +- Missing update to `divan-macros` dependency. + +## [0.1.12] - 2024-02-09 + +### Added + +- Display [`args`] option values with [`Debug`] instead if [`ToString`] is not + implemented. + + This makes it simple to use enums with derived [`Debug`]: + + ```rs + #[derive(Debug)] + enum Arg { A, B } + + #[divan::bench(args = [Arg::A, Arg::B])] + fn bench_args(arg: &Arg) { + ... + } + ``` + +- Documentation of when to use [`black_box`] in benchmarks. + +## [0.1.11] - 2024-01-20 + +### Fixed + +- Sorting negative [`args`] numbers. + +## [0.1.10] - 2024-01-20 + +### Fixed + +- Sort [`args`] numbers like [`consts`]. + +## [0.1.9] - 2024-01-20 + +### Added + +- [`args`] option for providing runtime arguments to benchmarks: + + ```rs + #[divan::bench(args = [1, 2, 3])] + fn args_list(arg: usize) { ... } + + #[divan::bench(args = 1..=3)] + fn args_range(arg: usize) { ... } + + const ARGS: &[usize] = [1, 2, 3]; + + #[divan::bench(args = ARGS)] + fn args_const(arg: usize) { ... } + ``` + + This option may be preferred over the similar [`consts`] option because: + - It is compatible with more types, only requiring that the argument type + implements [`Any`], [`Copy`], [`Send`], [`Sync`], and [`ToString`]. [`Copy`] + is not needed if the argument is used through a reference. + - It does not increase compile times, unlike [`consts`] which needs to + generate new code for each constant used. + +## [0.1.8] - 2023-12-19 + +### Changes + +- Reduce [`AllocProfiler`] footprint from 6-10ns to 1-2ns: + + - Thread-local values are now exclusively owned by their threads and are no + longer kept in a global list. This enables some optimizations: + + - Performing faster unsynchronized arithmetic. + + - Removing one level of pointer indirection by storing the thread-local + value entirely inline in [`thread_local!`], rather than storing a pointer + to a globally-shared instance. + + - Compiler emits SIMD arithmetic for x86_64 using `paddq`. + + - Improved thread-local lookup on x86_64 macOS by using a static lookup key + instead of a dynamic key from [`pthread_key_create`]. Key 11 is used because + it is reserved for Windows. + + The `dyn_thread_local` crate feature disables this optimization. This is + recommended if your code or another dependency uses the same static key. + +### Fixed + +- Remove unused allocations if [`AllocProfiler`] is not active as the global + allocator. + +## [0.1.7] - 2023-12-13 + +### Changes + +- Improve [`AllocProfiler`] implementation documentation. + +- Limit [`AllocProfiler`] mean count outputs to 4 significant digits to not be + very wide and for consistency with other outputs. + +## [0.1.6] - 2023-12-13 + +### Added + +- [`AllocProfiler`] allocator that tracks allocation counts and sizes during + benchmarks. + +## [0.1.5] - 2023-12-05 + +### Added + +- [`black_box_drop`](https://docs.rs/divan/0.1.5/divan/fn.black_box_drop.html) + convenience function for [`black_box`] + [`drop`]. This is useful when + benchmarking a lazy [`Iterator`] to completion with `for_each`: + + ```rust + #[divan::bench] + fn parse_iter() { + let input: &str = // ... + + Parser::new(input) + .for_each(divan::black_box_drop); + } + ``` + +## [0.1.4] - 2023-12-02 + +### Added + +- `From` implementations for counters on references to `u8`–`u64` and `usize`, + such as `From<&u64>` and `From<&&u64>`. This allows for doing: + + ```rust + bencher + .with_inputs(|| { ... }) + .input_counter(ItemsCount::from) + .bench_values(|n| { ... }); + ``` + +- [`Bencher::count_inputs_as`](https://docs.rs/divan/0.1.4/divan/struct.Bencher.html#method.count_inputs_as) + method to convert inputs to a `Counter`: + + ```rust + bencher + .with_inputs(|| -> usize { + // ... + }) + .count_inputs_as::() + .bench_values(|n| -> Vec { + (0..n).collect() + }); + ``` + +## [0.1.3] - 2023-11-21 + +### Added + +- Convenience shorthand options for `#[divan::bench]` and + `#[divan::bench_group]` counters: + - [`bytes_count`](https://docs.rs/divan/0.1.3/divan/attr.bench.html#bytes_count) + for `counter = BytesCount::from(n)` + - [`chars_count`](https://docs.rs/divan/0.1.3/divan/attr.bench.html#chars_count) + for `counter = CharsCount::from(n)` + - [`items_count`](https://docs.rs/divan/0.1.3/divan/attr.bench.html#items_count) + for `counter = ItemsCount::from(n)` + +- Support for NetBSD, DragonFly BSD, and Haiku OS by using pre-`main`. + +- Set global thread counts using: + - [`Divan::threads`](https://docs.rs/divan/0.1.3/divan/struct.Divan.html#method.threads) + - `--threads A B C...` CLI arg + - `DIVAN_THREADS=A,B,C` env var + + The following example will benchmark across 2, 4, and [available parallelism] + thread counts: + + ```sh + DIVAN_THREADS=0,2,4 cargo bench -q -p examples --bench atomic + ``` + +- Set global + [`Counter`s](https://docs.rs/divan/0.1.3/divan/counter/trait.Counter.html) at + runtime using: + - [`Divan::counter`](https://docs.rs/divan/0.1.3/divan/struct.Divan.html#method.counter) + - [`Divan::items_count`](https://docs.rs/divan/0.1.3/divan/struct.Divan.html#method.items_count) + - [`Divan::bytes_count`](https://docs.rs/divan/0.1.3/divan/struct.Divan.html#method.bytes_count) + - [`Divan::chars_count`](https://docs.rs/divan/0.1.3/divan/struct.Divan.html#method.chars_count) + - `--items-count N` CLI arg + - `--bytes-count N` CLI arg + - `--chars-count N` CLI arg + - `DIVAN_ITEMS_COUNT=N` env var + - `DIVAN_BYTES_COUNT=N` env var + - `DIVAN_CHARS_COUNT=N` env var + +- `From` for + [`ItemsCount`](https://docs.rs/divan/0.1.3/divan/counter/struct.ItemsCount.html), + [`BytesCount`](https://docs.rs/divan/0.1.3/divan/counter/struct.BytesCount.html), + and + [`CharsCount`](https://docs.rs/divan/0.1.3/divan/counter/struct.CharsCount.html) + where `C` is `u8`–`u64` or `usize` (via `CountUInt` internally). This provides + an alternative to the `new` constructor. + +- [`BytesCount::of_many`](https://docs.rs/divan/0.1.3/divan/counter/struct.BytesCount.html#method.of_many) + method similar to [`BytesCount::of`](https://docs.rs/divan/0.1/divan/counter/struct.BytesCount.html#method.of), + but with a parameter by which to multiply the size of the type. + +- [`BytesCount::u64`](https://docs.rs/divan/0.1.3/divan/counter/struct.BytesCount.html#method.u64), + [`BytesCount::f64`](https://docs.rs/divan/0.1.3/divan/counter/struct.BytesCount.html#method.f64), + and similar methods based on [`BytesCount::of_many`](https://docs.rs/divan/0.1.3/divan/counter/struct.BytesCount.html#method.of_many). + +### Removed + +- [`black_box`] inside benchmark loop when deferring [`Drop`] of outputs. This + is now done after the loop. + +- [`linkme`](https://docs.rs/linkme) dependency in favor of pre-`main` to + register benchmarks and benchmark groups. This is generally be more portable + and reliable. + +### Changed + +- Now calling [`black_box`] at the end of the benchmark loop when deferring use + of inputs or [`Drop`] of outputs. + +## [0.1.2] - 2023-10-28 + +### Fixed + +- Multi-threaded benchmarks being spread across CPUs, instead of pinning the + main thread to CPU 0 and having all threads inherit the main thread's + affinity. + +## [0.1.1] - 2023-10-25 + +### Fixed + +- Fix using LLD as linker for Linux by using the same pre-`main` approach as + Windows. + +## 0.1.0 - 2023-10-04 + +Initial release. See [blog post](https://nikolaivazquez.com/blog/divan/). + +[crate]: https://crates.io/crates/divan +[crate-badge]: https://img.shields.io/crates/v/divan.svg + +[Unreleased]: https://github.com/nvzqz/divan/compare/v0.1.17...HEAD +[0.1.17]: https://github.com/nvzqz/divan/compare/v0.1.16...v0.1.17 +[0.1.16]: https://github.com/nvzqz/divan/compare/v0.1.15...v0.1.16 +[0.1.15]: https://github.com/nvzqz/divan/compare/v0.1.14...v0.1.15 +[0.1.14]: https://github.com/nvzqz/divan/compare/v0.1.13...v0.1.14 +[0.1.13]: https://github.com/nvzqz/divan/compare/v0.1.12...v0.1.13 +[0.1.12]: https://github.com/nvzqz/divan/compare/v0.1.11...v0.1.12 +[0.1.11]: https://github.com/nvzqz/divan/compare/v0.1.10...v0.1.11 +[0.1.10]: https://github.com/nvzqz/divan/compare/v0.1.9...v0.1.10 +[0.1.9]: https://github.com/nvzqz/divan/compare/v0.1.8...v0.1.9 +[0.1.8]: https://github.com/nvzqz/divan/compare/v0.1.7...v0.1.8 +[0.1.7]: https://github.com/nvzqz/divan/compare/v0.1.6...v0.1.7 +[0.1.6]: https://github.com/nvzqz/divan/compare/v0.1.5...v0.1.6 +[0.1.5]: https://github.com/nvzqz/divan/compare/v0.1.4...v0.1.5 +[0.1.4]: https://github.com/nvzqz/divan/compare/v0.1.3...v0.1.4 +[0.1.3]: https://github.com/nvzqz/divan/compare/v0.1.2...v0.1.3 +[0.1.2]: https://github.com/nvzqz/divan/compare/v0.1.1...v0.1.2 +[0.1.1]: https://github.com/nvzqz/divan/compare/v0.1.0...v0.1.1 + +[#37]: https://github.com/nvzqz/divan/issues/37 +[#59]: https://github.com/nvzqz/divan/issues/59 +[#64]: https://github.com/nvzqz/divan/issues/64 + +[`AllocProfiler`]: https://docs.rs/divan/0.1/divan/struct.AllocProfiler.html +[`args`]: https://docs.rs/divan/latest/divan/attr.bench.html#args +[`Bencher`]: https://docs.rs/divan/0.1/divan/struct.Bencher.html +[`black_box`]: https://docs.rs/divan/latest/divan/fn.black_box.html +[`BytesCount::of_iter`]: https://docs.rs/divan/0.1/divan/counter/struct.BytesCount.html#method.of_iter +[`BytesCount::of_many`]: https://docs.rs/divan/0.1/divan/counter/struct.BytesCount.html#method.of_many +[`consts`]: https://docs.rs/divan/latest/divan/attr.bench.html#consts +[`CyclesCount`]: https://docs.rs/divan/0.1/divan/counter/struct.CyclesCount.html +[`Divan::max_time`]: https://docs.rs/divan/0.1/divan/struct.Divan.html#method.max_time + +[`Any`]: https://doc.rust-lang.org/std/any/trait.Any.html +[`Copy`]: https://doc.rust-lang.org/std/marker/trait.Copy.html +[`Debug`]: https://doc.rust-lang.org/std/fmt/trait.Debug.html +[`drop`]: https://doc.rust-lang.org/std/mem/fn.drop.html +[`Drop`]: https://doc.rust-lang.org/std/ops/trait.Drop.html +[`Iterator`]: https://doc.rust-lang.org/std/iter/trait.Iterator.html +[`LazyLock`]: https://doc.rust-lang.org/std/sync/struct.LazyLock.html +[`Send`]: https://doc.rust-lang.org/std/marker/trait.Send.html +[`size_of`]: https://doc.rust-lang.org/std/mem/fn.size_of.html +[`Sync`]: https://doc.rust-lang.org/std/marker/trait.Sync.html +[`thread_local!`]: https://doc.rust-lang.org/std/macro.thread_local.html +[`ToString`]: https://doc.rust-lang.org/std/string/trait.ToString.html +[available parallelism]: https://doc.rust-lang.org/std/thread/fn.available_parallelism.html +[slice]: https://doc.rust-lang.org/std/primitive.slice.html + +[MSRV]: https://doc.rust-lang.org/cargo/reference/rust-version.html + +[`pthread_key_create`]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_key_create.html diff --git a/crates/divan_compat/divan_fork/Cargo.toml b/crates/divan_compat/divan_fork/Cargo.toml new file mode 100644 index 00000000..1133d559 --- /dev/null +++ b/crates/divan_compat/divan_fork/Cargo.toml @@ -0,0 +1,48 @@ +[package] +name = "codspeed-divan-compat-walltime" +version = "0.1.17" +rust-version = "1.80.0" +edition = "2021" +authors = ["Nikolai Vazquez"] +license = "MIT OR Apache-2.0" +description = "A temporary compatibility layer for CodSpeed to use Divan's walltime entrypoint." +repository = "https://github.com/nvzqz/divan" +homepage = "https://github.com/nvzqz/divan" +documentation = "https://docs.rs/divan" +categories = ["development-tools::profiling"] +keywords = ["benchmark", "criterion", "instrument", "measure", "performance"] +readme = "README.md" + +[dependencies] +divan-macros = { version = "=0.1.17" } + +cfg-if = "1" +clap = { version = "4", default-features = false, features = ["std", "env"] } +condtype = "1.3" +regex = { package = "regex-lite", version = "0.1", default-features = false, features = ["std", "string"] } +codspeed = { path = "../../codspeed", version = "=2.8.0-alpha.0" } + +[target.'cfg(unix)'.dependencies] +libc = "0.2.148" + + +[dev-dependencies] +mimalloc = "0.1" + +[features] +default = ["wrap_help"] +help = ["clap/help"] +wrap_help = ["help", "clap/wrap_help"] + +# Opt out of faster static thread-local access and instead always dynamically +# allocate thread-local storage. +# +# On x86_64 macOS we use TLS key 11 (reserved for Windows ABI compatability): +# https://github.com/apple-oss-distributions/libpthread/blob/libpthread-519/private/pthread/tsd_private.h#L99 +dyn_thread_local = [] + +# Benchmark internals. Not meant for public use. +internal_benches = [] + +[lib] +doctest = false # Disable doctests for the fork diff --git a/crates/divan_compat/divan_fork/LICENSE-APACHE b/crates/divan_compat/divan_fork/LICENSE-APACHE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/crates/divan_compat/divan_fork/LICENSE-APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/crates/divan_compat/divan_fork/LICENSE-MIT b/crates/divan_compat/divan_fork/LICENSE-MIT new file mode 100644 index 00000000..8faad18f --- /dev/null +++ b/crates/divan_compat/divan_fork/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Nikolai Vazquez + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/crates/divan_compat/divan_fork/README.md b/crates/divan_compat/divan_fork/README.md new file mode 100644 index 00000000..2675f8c2 --- /dev/null +++ b/crates/divan_compat/divan_fork/README.md @@ -0,0 +1,106 @@ +
+

Divan

+ + docs.rs badge + + + Downloads badge + + + GitHub stars badge + + + CI build status badge + +

+ Comfy benchmarking for Rust projects, brought to you by + Nikolai Vazquez. +

+
+ +## Sponsor + +If you or your company find Divan valuable, consider [sponsoring on +GitHub](https://github.com/sponsors/nvzqz) or [donating via +PayPal](https://paypal.me/nvzqz). Sponsorships help me progress on what's +possible with benchmarking in Rust. + +## Guide + +A guide is being worked on. In the meantime, see: +- [Announcement post](https://nikolaivazquez.com/blog/divan/) +- ["Proving Performance" FOSDEM talk](https://youtu.be/P87C4jNakGs) + +## Getting Started + +Divan `0.1.17` requires Rust `1.80.0` or later. + +1. Add the following to your project's [`Cargo.toml`](https://doc.rust-lang.org/cargo/reference/manifest.html): + + ```toml + [dev-dependencies] + divan = "0.1.17" + + [[bench]] + name = "example" + harness = false + ``` + +2. Create a benchmarks file at `benches/example.rs`[^1] with your benchmarking code: + + ```rust + fn main() { + // Run registered benchmarks. + divan::main(); + } + + // Register a `fibonacci` function and benchmark it over multiple cases. + #[divan::bench(args = [1, 2, 4, 8, 16, 32])] + fn fibonacci(n: u64) -> u64 { + if n <= 1 { + 1 + } else { + fibonacci(n - 2) + fibonacci(n - 1) + } + } + ``` + +3. Run your benchmarks with [`cargo bench`](https://doc.rust-lang.org/cargo/commands/cargo-bench.html): + + ```txt + example fastest │ slowest │ median │ mean │ samples │ iters + ╰─ fibonacci │ │ │ │ │ + ├─ 1 0.626 ns │ 1.735 ns │ 0.657 ns │ 0.672 ns │ 100 │ 819200 + ├─ 2 2.767 ns │ 3.154 ns │ 2.788 ns │ 2.851 ns │ 100 │ 204800 + ├─ 4 6.816 ns │ 7.671 ns │ 7.061 ns │ 7.167 ns │ 100 │ 102400 + ├─ 8 57.31 ns │ 62.51 ns │ 57.96 ns │ 58.55 ns │ 100 │ 12800 + ├─ 16 2.874 µs │ 3.812 µs │ 2.916 µs │ 3.006 µs │ 100 │ 200 + ╰─ 32 6.267 ms │ 6.954 ms │ 6.283 ms │ 6.344 ms │ 100 │ 100 + ``` + +See [`#[divan::bench]`][bench_attr] for info on benchmark function registration. + +## Examples + +Practical example benchmarks can be found in the [`examples/benches`](https://github.com/nvzqz/divan/tree/main/examples/benches) +directory. These can be benchmarked locally by running: + +```sh +git clone https://github.com/nvzqz/divan.git +cd divan + +cargo bench -q -p examples --all-features +``` + +More thorough usage examples can be found in the [`#[divan::bench]` documentation][bench_attr_examples]. + +## License + +Like the Rust project, this library may be used under either the +[MIT License](https://github.com/nvzqz/divan/blob/main/LICENSE-MIT) or +[Apache License (Version 2.0)](https://github.com/nvzqz/divan/blob/main/LICENSE-APACHE). + +[^1]: Within your crate directory, i.e. [`$CARGO_MANIFEST_DIR`](https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-crates) + +[bench_attr]: https://docs.rs/divan/latest/divan/attr.bench.html +[bench_attr_examples]: https://docs.rs/divan/latest/divan/attr.bench.html#examples diff --git a/crates/divan_compat/divan_fork/WANTED.md b/crates/divan_compat/divan_fork/WANTED.md new file mode 100644 index 00000000..eef56682 --- /dev/null +++ b/crates/divan_compat/divan_fork/WANTED.md @@ -0,0 +1,47 @@ +# Wanted + +It would be great to have the following features added to Divan. If you have +ideas to expand this list, please [find](https://github.com/nvzqz/divan/discussions) +or [create](https://github.com/nvzqz/divan/discussions/new?category=ideas) a +discussion first. + +- Async benchmarks + +- Baseline benchmark + - Should match baselines across equal generic types and constants + - Idea: + ```rs + #[divan::bench] + fn old() { ... } + + #[divan::bench(baseline = old)] + fn new() { ... } + ``` + +- Cross-device: run benchmarks on other devices and report the data on the local +device + +- HTML output + +- CSV output + +- Custom counters + +- Time complexity of counters + - Also space complexity when measuring heap allocation + +- Measure heap allocations + - Custom [`GlobalAlloc`](https://doc.rust-lang.org/std/alloc/trait.GlobalAlloc.html) + that wraps another `GlobalAlloc`, defaulting to [`System`](https://doc.rust-lang.org/std/alloc/struct.System.html) + +- Custom timers + +- Timer for kernel/user mode + - Unix: + - [`getrusage(2)`](https://pubs.opengroup.org/onlinepubs/9699919799/functions/getrusage.html) + - Per-thread: + - Linux/FreeBSD/OpenBSD: [`RUSAGE_THREAD`](https://man7.org/linux/man-pages/man2/getrusage.2.html) + - macOS/iOS: [`thread_info(mach_thread_self(), ...)`](https://www.gnu.org/software/hurd/gnumach-doc/Thread-Information.html) + - Windows: + - [`GetProcessTimes`](https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getprocesstimes) + - [`GetThreadTimes`](https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getthreadtimes) diff --git a/crates/divan_compat/divan_fork/examples/Cargo.toml b/crates/divan_compat/divan_fork/examples/Cargo.toml new file mode 100644 index 00000000..028895a3 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/Cargo.toml @@ -0,0 +1,109 @@ +[package] +name = "examples" +version = "0.0.0" +edition = "2021" +authors = ["Nikolai Vazquez"] +license = "MIT OR Apache-2.0" +description = "Examples for Divan, a comfy benchmarking framework." +readme = "../README.md" +publish = false + +[dependencies] +divan = { workspace = true } +fastrand = "2" +image = { version = "0.24", optional = true } +libc = "0.2.147" +rayon = "1" + +# Search +ordsearch = "0.2.5" +wyhash = "0.5" + +# Hash +blake3 = { version = "1.4", optional = true, features = ["rayon"] } +digest = { version = "*", optional = true } +fnv = { version = "1", optional = true } +highway = { version = "1.1", optional = true } +metrohash = { version = "1", optional = true } +seahash = { version = "4.1", optional = true } +sha1 = { version = "0.10", optional = true } +sha2 = { version = "0.10", optional = true } +sha3 = { version = "0.10", optional = true } +twox-hash = { version = "1.6", optional = true } + +[target.'cfg(unix)'.dependencies] +libc = { workspace = true } + +[target.'cfg(target_os = "macos")'.dependencies] +mach2 = "0.4" + +[target.'cfg(any(windows, target_os = "linux", target_os = "android"))'.dependencies] +winapi = { version = "0.3.9", features = ["processthreadsapi"] } + +[features] +hash = [ + "blake3", + "digest", + "fnv", + "highway", + "metrohash", + "seahash", + "sha1", + "sha2", + "sha3", + "twox-hash", +] + +[[bench]] +name = "atomic" +harness = false + +[[bench]] +name = "collections" +harness = false + +[[bench]] +name = "hash" +harness = false +required-features = ["hash"] + +[[bench]] +name = "image" +harness = false +required-features = ["image"] + +[[bench]] +name = "math" +harness = false + +[[bench]] +name = "memcpy" +harness = false + +[[bench]] +name = "panic" +harness = false + +[[bench]] +name = "scratch" +harness = false + +[[bench]] +name = "search" +harness = false + +[[bench]] +name = "sort" +harness = false + +[[bench]] +name = "string" +harness = false + +[[bench]] +name = "threads" +harness = false + +[[bench]] +name = "time" +harness = false diff --git a/crates/divan_compat/divan_fork/examples/README.md b/crates/divan_compat/divan_fork/examples/README.md new file mode 100644 index 00000000..0508fe95 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/README.md @@ -0,0 +1,13 @@ +# Divan Examples + +Practical example benchmarks can be found in the [`examples/benches`](https://github.com/nvzqz/divan/tree/main/examples/benches) +directory. These can be benchmarked locally by running: + +```sh +git clone https://github.com/nvzqz/divan.git +cd divan + +cargo bench -q -p examples --all-features +``` + +More thorough usage examples can be found in the [`#[divan::bench]` documentation](https://docs.rs/divan/latest/divan/attr.bench.html#examples). diff --git a/crates/divan_compat/divan_fork/examples/benches/README.md b/crates/divan_compat/divan_fork/examples/benches/README.md new file mode 100644 index 00000000..0508fe95 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/README.md @@ -0,0 +1,13 @@ +# Divan Examples + +Practical example benchmarks can be found in the [`examples/benches`](https://github.com/nvzqz/divan/tree/main/examples/benches) +directory. These can be benchmarked locally by running: + +```sh +git clone https://github.com/nvzqz/divan.git +cd divan + +cargo bench -q -p examples --all-features +``` + +More thorough usage examples can be found in the [`#[divan::bench]` documentation](https://docs.rs/divan/latest/divan/attr.bench.html#examples). diff --git a/crates/divan_compat/divan_fork/examples/benches/atomic.rs b/crates/divan_compat/divan_fork/examples/benches/atomic.rs new file mode 100644 index 00000000..973ec1bb --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/atomic.rs @@ -0,0 +1,135 @@ +use std::sync::atomic::*; + +use divan::black_box; + +fn main() { + divan::main(); +} + +// Available parallelism (0), baseline (1), and common CPU core counts. +const THREADS: &[usize] = &[0, 1, 4, 16]; + +#[divan::bench_group(threads = THREADS)] +mod basic { + use super::*; + + #[divan::bench] + fn load() -> usize { + static N: AtomicUsize = AtomicUsize::new(1); + + black_box(&N).load(Ordering::Relaxed) + } + + #[divan::bench] + fn store() { + static N: AtomicUsize = AtomicUsize::new(1); + + black_box(&N).store(black_box(2), Ordering::Relaxed); + } +} + +#[divan::bench_group(threads = THREADS)] +mod update { + use super::*; + + #[divan::bench] + fn fetch_or() -> usize { + static N: AtomicUsize = AtomicUsize::new(1); + + black_box(&N).fetch_or(black_box(1), Ordering::Relaxed) + } + + #[divan::bench] + fn fetch_and() -> usize { + static N: AtomicUsize = AtomicUsize::new(1); + + black_box(&N).fetch_and(black_box(1), Ordering::Relaxed) + } + + #[divan::bench] + fn fetch_xor() -> usize { + static N: AtomicUsize = AtomicUsize::new(1); + + black_box(&N).fetch_xor(black_box(1), Ordering::Relaxed) + } + + #[divan::bench] + fn fetch_nand() -> usize { + static N: AtomicUsize = AtomicUsize::new(1); + + black_box(&N).fetch_nand(black_box(1), Ordering::Relaxed) + } + + #[divan::bench] + fn fetch_add() -> usize { + static N: AtomicUsize = AtomicUsize::new(1); + + black_box(&N).fetch_add(black_box(1), Ordering::Relaxed) + } + + #[divan::bench] + fn fetch_sub() -> usize { + static N: AtomicUsize = AtomicUsize::new(1); + + black_box(&N).fetch_sub(black_box(1), Ordering::Relaxed) + } +} + +#[divan::bench_group(threads = THREADS)] +mod compare_exchange { + use super::*; + + #[divan::bench] + fn fetch_mul() -> usize { + static N: AtomicUsize = AtomicUsize::new(1); + + let mut current = black_box(&N).load(Ordering::Relaxed); + loop { + match black_box(&N).compare_exchange( + current, + current.wrapping_mul(black_box(2)), + Ordering::Relaxed, + Ordering::Relaxed, + ) { + Ok(_) => return current, + Err(n) => current = n, + } + } + } + + #[divan::bench] + fn fetch_div() -> usize { + static N: AtomicUsize = AtomicUsize::new(1); + + let mut current = black_box(&N).load(Ordering::Relaxed); + loop { + match black_box(&N).compare_exchange( + current, + current.wrapping_div(black_box(2)), + Ordering::Relaxed, + Ordering::Relaxed, + ) { + Ok(_) => return current, + Err(n) => current = n, + } + } + } + + #[divan::bench] + fn fetch_mod() -> usize { + static N: AtomicUsize = AtomicUsize::new(1); + + let mut current = black_box(&N).load(Ordering::Relaxed); + loop { + match black_box(&N).compare_exchange( + current, + current % black_box(2), + Ordering::Relaxed, + Ordering::Relaxed, + ) { + Ok(_) => return current, + Err(n) => current = n, + } + } + } +} diff --git a/crates/divan_compat/divan_fork/examples/benches/collections.rs b/crates/divan_compat/divan_fork/examples/benches/collections.rs new file mode 100644 index 00000000..87797146 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/collections.rs @@ -0,0 +1,161 @@ +//! Run with: +//! +//! ```sh +//! cargo bench -q -p examples --bench collections +//! ``` + +use divan::{black_box, AllocProfiler, Bencher}; +use std::collections::{BTreeSet, BinaryHeap, HashSet, LinkedList, VecDeque}; + +pub fn collect_nums>(n: usize) -> T { + black_box(0..(n as i32)).collect() +} + +pub trait WithCapacity { + fn with_capacity(c: usize) -> Self; +} + +pub trait Clear { + fn clear(&mut self); +} + +pub trait PopFront { + fn pop_front(&mut self) -> Option; +} + +impl PopFront for Vec { + fn pop_front(&mut self) -> Option { + if self.is_empty() { + None + } else { + Some(self.remove(0)) + } + } +} + +impl PopFront for VecDeque { + fn pop_front(&mut self) -> Option { + self.pop_front() + } +} + +impl PopFront for LinkedList { + fn pop_front(&mut self) -> Option { + self.pop_front() + } +} + +macro_rules! impl_with_capacity { + ($($t:ident),+) => { + $(impl WithCapacity for $t { + fn with_capacity(c: usize) -> Self { + $t::with_capacity(c) + } + })+ + }; +} + +macro_rules! impl_clear { + ($($t:ident),+) => { + $(impl Clear for $t { + fn clear(&mut self) { + $t::clear(self); + } + })+ + }; +} + +impl_with_capacity!(Vec, VecDeque, BinaryHeap, HashSet); +impl_clear!(Vec, VecDeque, BinaryHeap, HashSet, LinkedList, BTreeSet); + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +fn main() { + divan::main(); +} + +const LENS: &[usize] = &[0, 8, 64, 1024]; + +#[divan::bench(types = [ + Vec, + VecDeque, + LinkedList, + BinaryHeap, + HashSet, + BTreeSet, +])] +fn default() -> T { + T::default() +} + +#[divan::bench( + types = [ + Vec, + VecDeque, + BinaryHeap, + HashSet, + ], + args = LENS, +)] +fn with_capacity(bencher: Bencher, len: usize) { + bencher.counter(len).bench(|| T::with_capacity(len)) +} + +#[divan::bench( + types = [ + Vec, + VecDeque, + LinkedList, + BinaryHeap, + HashSet, + BTreeSet, + ], + args = LENS, +)] +fn from_iter>(bencher: Bencher, len: usize) { + bencher.counter(len).bench(|| collect_nums::(len)) +} + +#[divan::bench( + types = [ + Vec, + VecDeque, + LinkedList, + BinaryHeap, + HashSet, + BTreeSet, + ], + args = LENS, +)] +fn drop>(bencher: Bencher, len: usize) { + bencher.counter(len).with_inputs(|| collect_nums::(len)).bench_values(std::mem::drop); +} + +#[divan::bench( + types = [ + Vec, + VecDeque, + LinkedList, + BinaryHeap, + HashSet, + BTreeSet, + ], + args = LENS, + max_time = 1, +)] +fn clear + Clear>(bencher: Bencher, len: usize) { + bencher.counter(len).with_inputs(|| collect_nums::(len)).bench_refs(T::clear); +} + +#[divan::bench( + types = [ + Vec, + VecDeque, + LinkedList, + ], + args = LENS, +)] +fn pop_front + PopFront>(bencher: Bencher, len: usize) { + bencher.counter(len).with_inputs(|| collect_nums::(len)).bench_refs(T::pop_front); +} diff --git a/crates/divan_compat/divan_fork/examples/benches/hash.rs b/crates/divan_compat/divan_fork/examples/benches/hash.rs new file mode 100644 index 00000000..e595c625 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/hash.rs @@ -0,0 +1,135 @@ +//! Run with: +//! +//! ```sh +//! cargo bench -q -p examples --bench hash --features hash +//! ``` + +use digest::Digest; +use divan::AllocProfiler; + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +fn main() { + divan::main(); +} + +struct Blake3; +struct Blake3Par; +struct Sha1; +struct Sha2_256; +struct Sha2_512; +struct Sha3_256; +struct Sha3_512; + +/// [`Hasher::write`] + [`Hasher::finish`]. +#[divan::bench( + types = [ + Blake3, + Blake3Par, + fnv::FnvHasher, + highway::HighwayHasher, + metrohash::MetroHash128, + metrohash::MetroHash64, + seahash::SeaHasher, + Sha1, + Sha2_256, + Sha2_512, + Sha3_256, + Sha3_512, + std::collections::hash_map::DefaultHasher, + twox_hash::XxHash32, + twox_hash::XxHash64, + wyhash::WyHash, + ], + args = [0, 8, 64, 1024, 1024 * 1024], + max_time = 1, +)] +fn hash(bencher: divan::Bencher, len: usize) +where + H: Hasher, +{ + let bytes: Vec = { + let mut rng = fastrand::Rng::new(); + (0..len).map(|_| rng.u8(..)).collect() + }; + + bencher + .counter(divan::counter::BytesCount::new(len)) + .with_inputs(|| bytes.clone()) + .bench_refs(|bytes| H::hash(bytes)); +} + +trait Hasher { + type Hash; + + fn hash(bytes: &[u8]) -> Self::Hash; +} + +impl Hasher for H { + type Hash = u64; + + fn hash(bytes: &[u8]) -> Self::Hash { + let mut hasher = H::default(); + hasher.write(bytes); + hasher.finish() + } +} + +impl Hasher for Blake3 { + type Hash = [u8; 32]; + + fn hash(bytes: &[u8]) -> Self::Hash { + *blake3::hash(bytes).as_bytes() + } +} + +impl Hasher for Blake3Par { + type Hash = [u8; 32]; + + fn hash(bytes: &[u8]) -> Self::Hash { + let mut hasher = blake3::Hasher::new(); + hasher.update_rayon(bytes); + *hasher.finalize().as_bytes() + } +} + +impl Hasher for Sha1 { + type Hash = [u8; 20]; + + fn hash(bytes: &[u8]) -> Self::Hash { + sha1::Sha1::new_with_prefix(bytes).finalize().into() + } +} + +impl Hasher for Sha2_256 { + type Hash = [u8; 32]; + + fn hash(bytes: &[u8]) -> Self::Hash { + sha2::Sha256::new_with_prefix(bytes).finalize().into() + } +} + +impl Hasher for Sha2_512 { + type Hash = [u8; 64]; + + fn hash(bytes: &[u8]) -> Self::Hash { + sha2::Sha512::new_with_prefix(bytes).finalize().into() + } +} + +impl Hasher for Sha3_256 { + type Hash = [u8; 32]; + + fn hash(bytes: &[u8]) -> Self::Hash { + sha3::Sha3_256::new_with_prefix(bytes).finalize().into() + } +} + +impl Hasher for Sha3_512 { + type Hash = [u8; 64]; + + fn hash(bytes: &[u8]) -> Self::Hash { + sha3::Sha3_512::new_with_prefix(bytes).finalize().into() + } +} diff --git a/crates/divan_compat/divan_fork/examples/benches/image.rs b/crates/divan_compat/divan_fork/examples/benches/image.rs new file mode 100644 index 00000000..4fca23b0 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/image.rs @@ -0,0 +1,43 @@ +//! Benchmarks the [`image`](https://docs.rs/image) crate. +//! +//! Run with: +//! +//! ```sh +//! cargo bench -q -p examples --bench image --features image +//! ``` + +use divan::{black_box, counter::BytesCount, AllocProfiler, Bencher}; +use image::{GenericImage, ImageBuffer, Rgba}; + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +fn main() { + divan::main(); +} + +fn make_image(pixel: Rgba) -> ImageBuffer, Vec> { + ImageBuffer::from_pixel(2048, 2048, pixel) +} + +// https://github.com/image-rs/image/blob/v0.24.6/benches/copy_from.rs +#[divan::bench(max_time = 1)] +fn copy_from(bencher: Bencher) { + let src = make_image(Rgba([255u8, 0, 0, 255])); + let mut dst = make_image(Rgba([0u8, 0, 0, 255])); + + bencher + .counter(BytesCount::of_slice(&*src)) + .bench_local(|| black_box(&mut dst).copy_from(black_box(&src), 0, 0)); +} + +/// Baseline for `copy_from`. +#[divan::bench(max_time = 1)] +fn memcpy(bencher: Bencher) { + let src = make_image(Rgba([255u8, 0, 0, 255])); + let mut dst = vec![0; src.len()]; + + bencher + .counter(BytesCount::of_slice(&*src)) + .bench_local(|| black_box(&mut dst).copy_from_slice(black_box(&src))); +} diff --git a/crates/divan_compat/divan_fork/examples/benches/math.rs b/crates/divan_compat/divan_fork/examples/benches/math.rs new file mode 100644 index 00000000..ef3f2897 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/math.rs @@ -0,0 +1,118 @@ +//! Run with: +//! +//! ```sh +//! cargo bench -q -p examples --bench math +//! ``` + +use divan::black_box; +use std::collections::{BTreeMap, HashMap}; + +fn main() { + divan::main(); +} + +#[divan::bench] +fn add() -> i32 { + black_box(2) + black_box(1) +} + +#[divan::bench] +#[ignore] +fn sub() -> i32 { + black_box(2) - black_box(1) +} + +#[divan::bench] +fn mul() -> i32 { + black_box(2) * black_box(1) +} + +#[divan::bench] +fn div() -> i32 { + black_box(2) / black_box(1) +} + +#[divan::bench] +fn rem() -> i32 { + black_box(2) % black_box(1) +} + +// 1, 1, 2, 3, 5, ... +mod fibonacci { + use super::*; + + const VALUES: &[u64] = &[0, 5, 10, 20, 30, 40]; + + // O(n) + #[divan::bench(args = VALUES)] + fn iterative(n: u64) -> u64 { + let mut previous = 1; + let mut current = 1; + + for _ in 2..=n { + let next = previous + current; + previous = current; + current = next; + } + + current + } + + // O(2^n) + #[divan::bench(args = VALUES, max_time = 1)] + fn recursive(n: u64) -> u64 { + if n <= 1 { + 1 + } else { + recursive(n - 2) + recursive(n - 1) + } + } + + trait Map: Default { + fn get(&self, key: u64) -> Option; + fn set(&mut self, key: u64, value: u64); + } + + impl Map for HashMap { + fn get(&self, key: u64) -> Option { + self.get(&key).copied() + } + + fn set(&mut self, key: u64, value: u64) { + self.insert(key, value); + } + } + + impl Map for BTreeMap { + fn get(&self, key: u64) -> Option { + self.get(&key).copied() + } + + fn set(&mut self, key: u64, value: u64) { + self.insert(key, value); + } + } + + // O(n) + #[divan::bench( + types = [BTreeMap, HashMap], + args = VALUES, + )] + fn recursive_memoized(n: u64) -> u64 { + fn fibonacci(n: u64, cache: &mut M) -> u64 { + if let Some(result) = cache.get(n) { + return result; + } + + if n <= 1 { + return 1; + } + + let result = fibonacci(n - 2, cache) + fibonacci(n - 1, cache); + cache.set(n, result); + result + } + + fibonacci(n, &mut M::default()) + } +} diff --git a/crates/divan_compat/divan_fork/examples/benches/memcpy.rs b/crates/divan_compat/divan_fork/examples/benches/memcpy.rs new file mode 100644 index 00000000..d665d792 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/memcpy.rs @@ -0,0 +1,107 @@ +use divan::{counter::BytesCount, Bencher}; +use fastrand::Rng; + +fn main() { + divan::main(); +} + +const LENS: &[usize] = &[ + 1, + 2, + 8, + 16, + 64, + 512, + 1024 * 4, + 1024 * 16, + 1024 * 64, + 1024 * 256, + 1024 * 1024, + 1024 * 1024 * 4, +]; + +#[divan::bench(args = LENS)] +fn memcpy(bencher: Bencher, len: usize) { + bencher.counter(BytesCount::new(len)).with_inputs(Input::gen(len)).bench_local_refs( + |input| unsafe { + let src_ptr = input.src_ptr(); + let dst_ptr = input.dst_ptr(); + libc::memcpy(dst_ptr.cast(), src_ptr.cast(), len); + }, + ) +} + +#[divan::bench(args = LENS)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn movsb(bencher: Bencher, len: usize) { + use std::arch::asm; + + bencher.counter(BytesCount::new(len)).with_inputs(Input::gen(len)).bench_local_refs( + |input| unsafe { + #[cfg(target_arch = "x86")] + asm!( + "rep movsb", + inout("ecx") len => _, + inout("esi") input.src_ptr() => _, + inout("edi") input.dst_ptr() => _, + options(nostack, preserves_flags), + ); + + #[cfg(target_arch = "x86_64")] + asm!( + "rep movsb", + inout("rcx") len => _, + inout("rsi") input.src_ptr() => _, + inout("rdi") input.dst_ptr() => _, + options(nostack, preserves_flags), + ); + }, + ) +} + +/// Self-referential input. +/// +/// It stores random offsets into the buffers, which are allowed to reference up +/// to the provided length. This enables us to benchmark unaligned writes. We +/// generate these as part of the input to not add benchmark time. +struct Input { + src_buf: Box<[u8]>, + dst_buf: Box<[u8]>, + src_offset: usize, + dst_offset: usize, +} + +impl Input { + fn gen(len: usize) -> impl FnMut() -> Self { + let mut rng = Rng::default(); + move || { + // Very buffers by length rather than adhere to nice numbers. + let max_len = len + (len / 8); + + let src_len = rng.usize(len..=max_len); + let dst_len = rng.usize(len..=max_len); + + let src_buf: Box<[u8]> = (0..src_len).map(|_| rng.u8(..)).collect(); + let dst_buf: Box<[u8]> = (0..dst_len).map(|_| rng.u8(..)).collect(); + + // 50% chance of the copy being aligned. Aligned writes are + // potentially must faster. + let is_aligned = rng.bool(); + let (src_offset, dst_offset) = if is_aligned { + (0, 0) + } else { + (rng.usize(..=src_len - len), rng.usize(..=dst_len - len)) + }; + + Input { src_buf, dst_buf, src_offset, dst_offset } + } + } + + fn src_ptr(&self) -> *const u8 { + self.src_buf.as_ptr().wrapping_add(self.src_offset) + } + + fn dst_ptr(&mut self) -> *mut u8 { + self.dst_buf.as_mut_ptr().wrapping_add(self.dst_offset) + } +} diff --git a/crates/divan_compat/divan_fork/examples/benches/panic.rs b/crates/divan_compat/divan_fork/examples/benches/panic.rs new file mode 100644 index 00000000..078fc47f --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/panic.rs @@ -0,0 +1,64 @@ +//! Run with: +//! +//! ```sh +//! cargo bench -q -p examples --bench panic +//! ``` + +use std::panic; + +use divan::{black_box, black_box_drop, AllocProfiler}; + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +fn main() { + // Silence panics. + panic::set_hook(Box::new(|_| {})); + + divan::main(); +} + +// Available parallelism (0), baseline (1), and common CPU core counts. +const THREADS: &[usize] = &[0, 1, 4, 16]; + +#[divan::bench] +#[track_caller] +fn caller_location() -> &'static panic::Location<'static> { + panic::Location::caller() +} + +#[divan::bench_group(threads = THREADS)] +mod hook { + use super::*; + + #[divan::bench] + fn set() { + panic::set_hook(Box::new(|_| {})); + } + + #[divan::bench] + fn take() -> impl Drop { + panic::take_hook() + } + + #[divan::bench] + fn take_and_drop() { + black_box_drop(panic::take_hook()); + } +} + +mod catch_unwind { + use super::*; + + #[divan::bench] + fn panic() -> std::thread::Result<()> { + let panic: fn() = || panic!(); + panic::catch_unwind(black_box(panic)) + } + + #[divan::bench] + fn success() -> std::thread::Result<()> { + let success: fn() = || {}; + panic::catch_unwind(black_box(success)) + } +} diff --git a/crates/divan_compat/divan_fork/examples/benches/scratch.rs b/crates/divan_compat/divan_fork/examples/benches/scratch.rs new file mode 100644 index 00000000..f8e1aa3c --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/scratch.rs @@ -0,0 +1,15 @@ +//! Scratch space for benchmarks. +//! +//! Run with: +//! +//! ```sh +//! cargo bench -q -p examples --bench scratch +//! ``` + +// Uncomment the code below to measure heap allocations. +// #[global_allocator] +// static ALLOC: divan::AllocProfiler = divan::AllocProfiler::system(); + +fn main() { + divan::main(); +} diff --git a/crates/divan_compat/divan_fork/examples/benches/search.rs b/crates/divan_compat/divan_fork/examples/benches/search.rs new file mode 100644 index 00000000..14e3c106 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/search.rs @@ -0,0 +1,125 @@ +use std::{ + collections::{hash_map::RandomState, BTreeSet, HashSet}, + hash::BuildHasher, +}; + +use divan::{black_box_drop, AllocProfiler, Bencher}; +use fastrand::Rng; +use ordsearch::OrderedCollection; + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +fn main() { + divan::Divan::from_args() + .items_count( + // Every benchmark iteration searches for a single element. + 1u32, + ) + .main(); +} + +const SIZES: &[usize] = + &[1, 2, 8, 16, 64, 512, 4 * 1024, 16 * 1024, 64 * 1024, 256 * 1024, 1024 * 1024]; + +fn gen_inputs(len: usize) -> impl FnMut() -> (Vec, u64) { + let mut rng = Rng::with_seed(len as u64); + + move || { + let haystack: Vec = { + // Use `BTreeSet` to ensure result is sorted and has `len` items. + let mut haystack = BTreeSet::new(); + + for _ in 0..len { + while !haystack.insert(rng.u64(..)) {} + } + + haystack.into_iter().collect() + }; + + let has_needle = rng.bool(); + let needle = if has_needle { + *rng.choice(&haystack).unwrap() + } else { + loop { + let n = rng.u64(..); + if !haystack.contains(&n) { + break n; + } + } + }; + + assert_eq!(haystack.len(), len); + (haystack, needle) + } +} + +#[divan::bench(args = SIZES, max_time = 1)] +fn linear(bencher: Bencher, len: usize) { + bencher + .with_inputs(gen_inputs(len)) + .bench_local_refs(|(haystack, needle)| haystack.iter().find(|v| **v == *needle).copied()) +} + +#[divan::bench(args = SIZES, max_time = 1)] +fn binary(bencher: Bencher, len: usize) { + bencher + .with_inputs(gen_inputs(len)) + .bench_local_refs(|(haystack, needle)| haystack.binary_search_by(|v| v.cmp(needle))) +} + +#[divan::bench(args = SIZES, max_time = 1)] +fn btree_set(bencher: Bencher, len: usize) { + let mut gen_inputs = gen_inputs(len); + + bencher + .with_inputs(|| -> (BTreeSet, u64) { + let (haystack, needle) = gen_inputs(); + (haystack.into_iter().collect(), needle) + }) + .bench_local_refs(|(haystack, needle)| haystack.get(needle).copied()) +} + +/// Local implementation instead of `BuildHasherDefault` to get shorter name in +/// output. +#[derive(Default)] +struct WyHash; + +impl BuildHasher for WyHash { + type Hasher = wyhash::WyHash; + + fn build_hasher(&self) -> Self::Hasher { + wyhash::WyHash::default() + } +} + +#[divan::bench( + args = SIZES, + max_time = 1, + types = [RandomState, WyHash], +)] +fn hash_set(bencher: Bencher, len: usize) +where + H: BuildHasher + Default, +{ + let mut gen_inputs = gen_inputs(len); + + bencher + .with_inputs(|| -> (HashSet, u64) { + let (haystack, needle) = gen_inputs(); + (haystack.into_iter().collect(), needle) + }) + .bench_local_refs(|(haystack, needle)| haystack.get(needle).copied()) +} + +#[divan::bench(args = SIZES, max_time = 1)] +fn ordsearch(bencher: Bencher, len: usize) { + let mut gen_inputs = gen_inputs(len); + + bencher + .with_inputs(|| { + let (haystack, needle) = gen_inputs(); + (OrderedCollection::from_sorted_iter(haystack), needle) + }) + .bench_local_refs(|(haystack, needle)| black_box_drop(haystack.find_gte(*needle))) +} diff --git a/crates/divan_compat/divan_fork/examples/benches/sort.rs b/crates/divan_compat/divan_fork/examples/benches/sort.rs new file mode 100644 index 00000000..e1d799d4 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/sort.rs @@ -0,0 +1,88 @@ +//! Run with: +//! +//! ```sh +//! cargo bench -q -p examples --bench sort +//! ``` + +use divan::{AllocProfiler, Bencher}; +use rayon::slice::ParallelSliceMut; + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +fn main() { + divan::main(); +} + +/// Functions that generate deterministic values. +mod gen { + pub const LEN: usize = 100_000; + + pub fn rand_int_generator() -> impl FnMut() -> i32 { + let mut rng = fastrand::Rng::with_seed(42); + move || rng.i32(..) + } + + pub fn rand_int_vec_generator() -> impl FnMut() -> Vec { + let mut rand_int_generator = rand_int_generator(); + move || (0..LEN).map(|_| rand_int_generator()).collect() + } + + pub fn sorted_int_vec_generator() -> impl FnMut() -> Vec { + move || (0..LEN).map(|i| i as i32).collect() + } +} + +mod random { + use super::*; + + #[divan::bench] + fn sort(bencher: Bencher) { + bencher.with_inputs(gen::rand_int_vec_generator()).bench_local_refs(|v| v.sort()); + } + + #[divan::bench] + fn sort_unstable(bencher: Bencher) { + bencher.with_inputs(gen::rand_int_vec_generator()).bench_local_refs(|v| v.sort_unstable()); + } + + #[divan::bench] + fn par_sort(bencher: Bencher) { + bencher.with_inputs(gen::rand_int_vec_generator()).bench_local_refs(|v| v.par_sort()); + } + + #[divan::bench] + fn par_sort_unstable(bencher: Bencher) { + bencher + .with_inputs(gen::rand_int_vec_generator()) + .bench_local_refs(|v| v.par_sort_unstable()); + } +} + +mod sorted { + use super::*; + + #[divan::bench] + fn sort(bencher: Bencher) { + bencher.with_inputs(gen::sorted_int_vec_generator()).bench_local_refs(|v| v.sort()); + } + + #[divan::bench] + fn sort_unstable(bencher: Bencher) { + bencher + .with_inputs(gen::sorted_int_vec_generator()) + .bench_local_refs(|v| v.sort_unstable()); + } + + #[divan::bench] + fn par_sort(bencher: Bencher) { + bencher.with_inputs(gen::sorted_int_vec_generator()).bench_local_refs(|v| v.par_sort()); + } + + #[divan::bench] + fn par_sort_unstable(bencher: Bencher) { + bencher + .with_inputs(gen::sorted_int_vec_generator()) + .bench_local_refs(|v| v.par_sort_unstable()); + } +} diff --git a/crates/divan_compat/divan_fork/examples/benches/string.rs b/crates/divan_compat/divan_fork/examples/benches/string.rs new file mode 100644 index 00000000..19956d2b --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/string.rs @@ -0,0 +1,180 @@ +//! Run with: +//! +//! ```sh +//! cargo bench -q -p examples --bench string +//! ``` + +use divan::{ + black_box, black_box_drop, + counter::{BytesCount, CharsCount}, + AllocProfiler, Bencher, +}; + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +fn main() { + divan::main(); +} + +const LENS: &[usize] = &[0, 8, 64, 1024]; + +#[derive(Default)] +struct Ascii { + rng: fastrand::Rng, +} + +#[derive(Default)] +struct Unicode { + rng: fastrand::Rng, +} + +trait GenString: Default { + fn gen_string(&mut self, char_count: usize) -> String; +} + +impl GenString for Ascii { + fn gen_string(&mut self, char_count: usize) -> String { + (0..char_count).map(|_| self.rng.alphanumeric()).collect() + } +} + +impl GenString for Unicode { + fn gen_string(&mut self, char_count: usize) -> String { + (0..char_count).map(|_| self.rng.char(..)).collect() + } +} + +#[divan::bench( + types = [Ascii, Unicode], + args = LENS, + max_time = 1, +)] +fn clear(bencher: Bencher, len: usize) { + let mut gen = G::default(); + bencher + .counter(CharsCount::new(len)) + .with_inputs(|| gen.gen_string(len)) + .input_counter(BytesCount::of_str) + .bench_local_refs(String::clear); +} + +#[divan::bench( + types = [Ascii, Unicode], + args = LENS, +)] +fn drop(bencher: Bencher, len: usize) { + let mut gen = G::default(); + bencher + .counter(CharsCount::new(len)) + .with_inputs(|| gen.gen_string(len)) + .input_counter(BytesCount::of_str) + .bench_local_values(std::mem::drop); +} + +#[divan::bench( + types = [Ascii, Unicode], + args = LENS, +)] +fn validate_utf8(bencher: Bencher, len: usize) { + let mut gen = G::default(); + bencher + .counter(CharsCount::new(len)) + .with_inputs(|| gen.gen_string(len)) + .input_counter(BytesCount::of_str) + .bench_local_refs(|s| { + let bytes = black_box(s.as_bytes()); + black_box_drop(std::str::from_utf8(bytes)); + }); +} + +#[divan::bench( + types = [Ascii, Unicode], + args = LENS, +)] +fn char_count(bencher: Bencher, len: usize) { + let mut gen = G::default(); + bencher + .counter(CharsCount::new(len)) + .with_inputs(|| gen.gen_string(len)) + .input_counter(BytesCount::of_str) + .bench_local_refs(|s| s.chars().count()); +} + +#[divan::bench( + types = [Ascii, Unicode], + args = LENS, +)] +fn make_ascii_lowercase(bencher: Bencher, len: usize) { + let mut gen = G::default(); + bencher + .counter(CharsCount::new(len)) + .with_inputs(|| gen.gen_string(len)) + .input_counter(BytesCount::of_str) + .bench_local_refs(|s| s.make_ascii_lowercase()); +} + +#[divan::bench( + types = [Ascii, Unicode], + args = LENS, +)] +fn make_ascii_uppercase(bencher: Bencher, len: usize) { + let mut gen = G::default(); + bencher + .counter(CharsCount::new(len)) + .with_inputs(|| gen.gen_string(len)) + .input_counter(BytesCount::of_str) + .bench_local_refs(|s| s.make_ascii_uppercase()); +} + +#[divan::bench( + types = [Ascii, Unicode], + args = LENS, +)] +fn to_ascii_lowercase(bencher: Bencher, len: usize) { + let mut gen = G::default(); + bencher + .counter(CharsCount::new(len)) + .with_inputs(|| gen.gen_string(len)) + .input_counter(BytesCount::of_str) + .bench_local_refs(|s| s.to_ascii_lowercase()); +} + +#[divan::bench( + types = [Ascii, Unicode], + args = LENS, +)] +fn to_ascii_uppercase(bencher: Bencher, len: usize) { + let mut gen = G::default(); + bencher + .counter(CharsCount::new(len)) + .with_inputs(|| gen.gen_string(len)) + .input_counter(BytesCount::of_str) + .bench_local_refs(|s| s.to_ascii_uppercase()); +} + +#[divan::bench( + types = [Ascii, Unicode], + args = LENS, +)] +fn to_lowercase(bencher: Bencher, len: usize) { + let mut gen = G::default(); + bencher + .counter(CharsCount::new(len)) + .with_inputs(|| gen.gen_string(len)) + .input_counter(BytesCount::of_str) + .bench_local_refs(|s| s.to_lowercase()); +} + +#[divan::bench( + types = [Ascii, Unicode], + args = LENS, +)] +fn to_uppercase(bencher: Bencher, len: usize) { + let mut gen = G::default(); + bencher + .counter(CharsCount::new(len)) + .with_inputs(|| gen.gen_string(len)) + .input_counter(BytesCount::of_str) + .bench_local_refs(|s| s.to_uppercase()); +} diff --git a/crates/divan_compat/divan_fork/examples/benches/threads.rs b/crates/divan_compat/divan_fork/examples/benches/threads.rs new file mode 100644 index 00000000..cf41a0ba --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/threads.rs @@ -0,0 +1,390 @@ +//! Run with: +//! +//! ```sh +//! cargo bench -q -p examples --bench threads +//! ``` + +use std::{ + cell::UnsafeCell, + sync::{ + atomic::{AtomicUsize, Ordering::Relaxed}, + Arc, Mutex, RwLock, + }, + thread::{Thread, ThreadId}, +}; + +use divan::{black_box, black_box_drop, AllocProfiler, Bencher}; + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +fn main() { + divan::main(); +} + +// Available parallelism (0), baseline (1), and common CPU core counts. +const THREADS: &[usize] = &[0, 1, 4, 16]; + +#[divan::bench_group(threads = THREADS)] +mod arc { + use super::*; + + #[divan::bench] + fn clone(bencher: Bencher) { + let arc = Arc::new(42); + bencher.bench(|| arc.clone()); + } + + #[divan::bench] + fn drop(bencher: Bencher) { + let arc = Arc::new(42); + bencher.with_inputs(|| arc.clone()).bench_values(std::mem::drop); + } + + #[divan::bench] + fn get_mut(bencher: Bencher) { + let arc = Arc::new(42); + + bencher.with_inputs(|| arc.clone()).bench_refs(|arc| { + // Black box the branched value to ensure a branch gets emitted. + // This more closely simulates `Arc::get_mut` usage in practice. + if let Some(val) = Arc::get_mut(arc) { + black_box_drop(val); + } + }); + } +} + +#[divan::bench_group(threads = THREADS)] +mod mutex { + use super::*; + + mod lock { + use super::*; + + #[divan::bench] + fn block() { + static M: Mutex = Mutex::new(0); + black_box_drop(M.lock()); + } + + #[divan::bench] + fn r#try() { + static M: Mutex = Mutex::new(0); + black_box_drop(M.try_lock()); + } + } + + mod set { + use super::*; + + #[divan::bench] + fn block() { + static M: Mutex = Mutex::new(0); + *black_box(M.lock().unwrap()) = black_box(42); + } + + #[divan::bench] + fn r#try() { + static M: Mutex = Mutex::new(0); + + if let Ok(lock) = M.try_lock() { + *black_box(lock) = black_box(42); + } + } + } +} + +#[divan::bench_group(threads = THREADS)] +mod rw_lock { + use super::*; + + mod read { + use super::*; + + #[divan::bench] + fn block() { + static L: RwLock = RwLock::new(0); + black_box_drop(L.read()); + } + + #[divan::bench] + fn r#try() { + static L: RwLock = RwLock::new(0); + black_box_drop(L.try_read()); + } + } + + mod write { + use super::*; + + #[divan::bench] + fn block() { + static L: RwLock = RwLock::new(0); + black_box_drop(L.write()); + } + + #[divan::bench] + fn r#try() { + static L: RwLock = RwLock::new(0); + black_box_drop(L.try_write()); + } + } + + mod set { + use super::*; + + #[divan::bench] + fn block() { + static L: RwLock = RwLock::new(0); + *black_box(L.write().unwrap()) = black_box(42); + } + + #[divan::bench] + fn r#try() { + static L: RwLock = RwLock::new(0); + + if let Ok(lock) = L.try_write() { + *black_box(lock) = black_box(42); + } + } + } +} + +/// Benchmark getting an integer or pointer uniquely identifying the current +/// thread or core. +#[divan::bench_group(threads = THREADS)] +mod thread_id { + use super::*; + + #[divan::bench_group(name = "std")] + mod stdlib { + use super::*; + + mod thread_local { + use super::*; + + #[divan::bench] + fn count() -> usize { + static SHARED: AtomicUsize = AtomicUsize::new(0); + + thread_local! { + static LOCAL: usize = SHARED.fetch_add(1, Relaxed); + } + + LOCAL.with(|count| *count) + } + + #[divan::bench] + fn id() -> ThreadId { + thread_local! { + static LOCAL: ThreadId = std::thread::current().id(); + } + + LOCAL.with(|id| *id) + } + + #[divan::bench] + fn ptr() -> *mut u8 { + thread_local! { + static LOCAL: UnsafeCell = const { UnsafeCell::new(0) }; + } + + LOCAL.with(|addr| addr.get()) + } + } + + mod thread { + use super::*; + + #[divan::bench] + fn current() -> Thread { + std::thread::current() + } + + #[divan::bench] + fn current_id() -> ThreadId { + std::thread::current().id() + } + } + } + + #[cfg(unix)] + mod pthread { + use super::*; + + // https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_self.html + #[divan::bench(name = "self")] + fn this() -> libc::pthread_t { + unsafe { libc::pthread_self() } + } + + #[divan::bench] + fn getspecific(bencher: Bencher) { + unsafe { + let mut key: libc::pthread_key_t = 0; + loop { + match libc::pthread_key_create(&mut key, None) { + 0 => break, + libc::EAGAIN => continue, + error => panic!("{}", std::io::Error::from_raw_os_error(error)), + } + } + + bencher.bench(|| libc::pthread_getspecific(key)); + + libc::pthread_key_delete(key); + }; + } + + #[cfg(target_os = "macos")] + #[divan::bench] + fn get_stackaddr_np() -> *mut libc::c_void { + unsafe { libc::pthread_get_stackaddr_np(libc::pthread_self()) } + } + + #[cfg(target_os = "macos")] + #[divan::bench] + fn threadid_np() -> u64 { + unsafe { + let mut tid = 0; + libc::pthread_threadid_np(libc::pthread_self(), &mut tid); + tid + } + } + + #[cfg(target_os = "macos")] + #[divan::bench] + fn cpu_number_np() -> usize { + unsafe { + let mut cpu = 0; + libc::pthread_cpu_number_np(&mut cpu); + cpu + } + } + } + + // https://www.gnu.org/software/hurd/gnumach-doc/Thread-Information.html + #[cfg(target_os = "macos")] + #[divan::bench] + fn mach_thread_self() -> impl Drop { + struct Thread(mach2::mach_types::thread_port_t); + + impl Drop for Thread { + fn drop(&mut self) { + unsafe { + mach2::mach_port::mach_port_deallocate(mach2::traps::mach_task_self(), self.0); + } + } + } + + Thread(unsafe { mach2::mach_init::mach_thread_self() }) + } + + // https://man7.org/linux/man-pages/man2/gettid.2.html + #[cfg(target_os = "linux")] + #[divan::bench] + fn gettid() -> libc::pid_t { + unsafe { libc::gettid() } + } + + // https://man7.org/linux/man-pages/man3/sched_getcpu.3.html + #[cfg(target_os = "linux")] + #[divan::bench] + fn sched_getcpu() -> libc::c_int { + unsafe { libc::sched_getcpu() } + } + + #[cfg(windows)] + #[divan::bench] + #[allow(non_snake_case)] + fn GetCurrentProcessorNumber() -> u32 { + unsafe { winapi::um::processthreadsapi::GetCurrentProcessorNumber() } + } + + #[cfg(windows)] + #[divan::bench] + #[allow(non_snake_case)] + fn GetCurrentProcessorNumberEx() -> (u16, u8) { + unsafe { + let mut result = std::mem::zeroed(); + winapi::um::processthreadsapi::GetCurrentProcessorNumberEx(&mut result); + (result.Group, result.Number) + } + } + + #[cfg(windows)] + #[divan::bench] + #[allow(non_snake_case)] + fn GetCurrentThread() -> std::os::windows::io::RawHandle { + unsafe { winapi::um::processthreadsapi::GetCurrentThread().cast() } + } + + #[cfg(windows)] + #[divan::bench] + #[allow(non_snake_case)] + fn GetCurrentThreadId() -> u32 { + unsafe { winapi::um::processthreadsapi::GetCurrentThreadId() } + } + + #[cfg(windows)] + #[divan::bench] + #[allow(non_snake_case)] + fn TlsGetValue(bencher: Bencher) { + unsafe { + use winapi::um::processthreadsapi::*; + + let tls_index = TlsAlloc(); + if tls_index == TLS_OUT_OF_INDEXES { + panic!("{}", std::io::Error::last_os_error()); + } + + bencher.bench(|| TlsGetValue(tls_index)); + + TlsFree(tls_index); + } + } + + #[cfg(all( + any(target_arch = "x86_64", target_arch = "aarch64"), + any(target_os = "linux", target_os = "macos", target_os = "windows"), + ))] + #[divan::bench] + fn asm() -> usize { + unsafe { + let result: usize; + + #[cfg(all(target_arch = "x86_64", any(target_os = "macos", target_os = "windows")))] + std::arch::asm!( + "mov {}, gs", + out(reg) result, + options(nostack, nomem, preserves_flags) + ); + + #[cfg(all(target_arch = "x86_64", target_os = "linux"))] + std::arch::asm!( + "mov {}, fs", + out(reg) result, + options(nostack, nomem, preserves_flags) + ); + + // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/TPIDRRO-EL0--EL0-Read-Only-Software-Thread-ID-Register?lang=en + #[cfg(all(target_arch = "aarch64", any(target_os = "macos", target_os = "windows")))] + std::arch::asm!( + "mrs {}, tpidrro_el0", + out(reg) result, + options(nostack, nomem, preserves_flags) + ); + + // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/TPIDR-EL0--EL0-Read-Write-Software-Thread-ID-Register?lang=en + #[cfg(all(target_arch = "aarch64", target_os = "linux"))] + std::arch::asm!( + "mrs {}, tpidr_el0", + out(reg) result, + options(nostack, nomem, preserves_flags) + ); + + result + } + } +} diff --git a/crates/divan_compat/divan_fork/examples/benches/time.rs b/crates/divan_compat/divan_fork/examples/benches/time.rs new file mode 100644 index 00000000..dc595974 --- /dev/null +++ b/crates/divan_compat/divan_fork/examples/benches/time.rs @@ -0,0 +1,103 @@ +//! Run with: +//! +//! ```sh +//! cargo bench -q -p examples --bench time +//! ``` + +use std::time::{Instant, SystemTime}; + +use divan::{AllocProfiler, Bencher}; + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +fn main() { + divan::main(); +} + +mod now { + use super::*; + + #[divan::bench] + fn instant() -> Instant { + Instant::now() + } + + #[divan::bench] + fn system_time() -> SystemTime { + SystemTime::now() + } + + #[divan::bench(name = if cfg!(target_arch = "aarch64") { + "tsc (aarch64)" + } else { + "tsc (x86)" + })] + #[cfg(all( + not(miri), + any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"), + ))] + pub fn tsc() -> u64 { + #[cfg(target_arch = "aarch64")] + unsafe { + let timestamp: u64; + std::arch::asm!( + "mrs {}, cntvct_el0", + out(reg) timestamp, + // Leave off `nomem` because this should be a compiler fence. + options(nostack, preserves_flags), + ); + timestamp + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe { + #[cfg(target_arch = "x86")] + use std::arch::x86; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64 as x86; + + x86::_rdtsc() + } + } +} + +mod duration_since { + use super::*; + + #[divan::bench] + fn instant(bencher: Bencher) { + bencher + .with_inputs(|| [Instant::now(), Instant::now()]) + .bench_values(|[start, end]| end.duration_since(start)); + } + + #[divan::bench] + fn system_time(bencher: Bencher) { + bencher + .with_inputs(|| [SystemTime::now(), SystemTime::now()]) + .bench_values(|[start, end]| end.duration_since(start)); + } + + #[divan::bench(name = if cfg!(target_arch = "aarch64") { + "tsc (aarch64)" + } else { + "tsc (x86)" + })] + #[cfg(all( + not(miri), + any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"), + ))] + fn tsc(bencher: Bencher) { + bencher.with_inputs(|| [crate::now::tsc(), crate::now::tsc()]).bench_values( + |[start, end]| { + // Simply subtract because an optimized timing implementation + // would want to keep the value as TSC units for as long as + // possible before dividing by the TSC frequency. + // + // Saturating arithmetic to ensures monotonicity. + end.saturating_sub(start) + }, + ) + } +} diff --git a/crates/divan_compat/divan_fork/internal_benches/Cargo.toml b/crates/divan_compat/divan_fork/internal_benches/Cargo.toml new file mode 100644 index 00000000..a1463ceb --- /dev/null +++ b/crates/divan_compat/divan_fork/internal_benches/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "internal_benches" +version = "0.0.0" +edition = "2021" +authors = ["Nikolai Vazquez"] +license = "MIT OR Apache-2.0" +description = "Internal benchmarks for Divan, a comfy benchmarking framework." +readme = "../README.md" +publish = false + +[dependencies] +divan = { workspace = true, features = ["internal_benches"] } + +[[bench]] +name = "internals" +harness = false diff --git a/crates/divan_compat/divan_fork/internal_benches/README.md b/crates/divan_compat/divan_fork/internal_benches/README.md new file mode 100644 index 00000000..b4e4e9f7 --- /dev/null +++ b/crates/divan_compat/divan_fork/internal_benches/README.md @@ -0,0 +1,27 @@ +# Divan Internal Benchmarks + +This crate demonstrates how to use [Divan] to benchmark internals of a crate by +benchmarking the internals of Divan. + +These can be benchmarked locally by running: + +```sh +git clone https://github.com/nvzqz/divan.git +cd divan + +cargo bench -q -p internal_benches +``` + +As of this writing, the output on my machine is: + +```txt +divan fastest │ slowest │ median │ mean │ samples │ iters +╰─ time │ │ │ │ │ + ╰─ timer │ │ │ │ │ + ├─ get_tsc 0.158 ns │ 0.202 ns │ 0.161 ns │ 0.162 ns │ 100 │ 1638400 + ╰─ measure │ │ │ │ │ + ├─ precision 89.58 µs │ 221.5 µs │ 201.9 µs │ 184.5 µs │ 100 │ 100 + ╰─ sample_loop_overhead 314.2 µs │ 342.5 µs │ 314.5 µs │ 317.1 µs │ 100 │ 100 +``` + +[divan]: https://github.com/nvzqz/divan diff --git a/crates/divan_compat/divan_fork/internal_benches/benches/internals.rs b/crates/divan_compat/divan_fork/internal_benches/benches/internals.rs new file mode 100644 index 00000000..37d0a1a7 --- /dev/null +++ b/crates/divan_compat/divan_fork/internal_benches/benches/internals.rs @@ -0,0 +1,8 @@ +use divan::AllocProfiler; + +#[global_allocator] +static GLOBAL_ALLOC: AllocProfiler = AllocProfiler::system(); + +fn main() { + divan::main(); +} diff --git a/crates/divan_compat/divan_fork/macros/Cargo.toml b/crates/divan_compat/divan_fork/macros/Cargo.toml new file mode 100644 index 00000000..87087a0e --- /dev/null +++ b/crates/divan_compat/divan_fork/macros/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "divan-macros" +version = "0.1.17" +edition = "2021" +authors = ["Nikolai Vazquez"] +license = "MIT OR Apache-2.0" +description = "Macros for Divan, a statistically-comfy benchmarking library." +repository = "https://github.com/nvzqz/divan" +homepage = "https://github.com/nvzqz/divan" +documentation = "https://docs.rs/divan-macros" +categories = ["development-tools::profiling"] +keywords = ["benchmark", "criterion", "instrument", "measure", "performance"] +readme = "../README.md" + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1" +quote = { version = "1", default-features = false } +# Versions prior to *.18 fail to parse empty attribute metadata. +syn = { version = "^2.0.18", default-features = false, features = ["full", "clone-impls", "parsing", "printing", "proc-macro"] } + +[dev-dependencies] +divan = { workspace = true } diff --git a/crates/divan_compat/divan_fork/macros/LICENSE-APACHE b/crates/divan_compat/divan_fork/macros/LICENSE-APACHE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/crates/divan_compat/divan_fork/macros/LICENSE-APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/crates/divan_compat/divan_fork/macros/LICENSE-MIT b/crates/divan_compat/divan_fork/macros/LICENSE-MIT new file mode 100644 index 00000000..8faad18f --- /dev/null +++ b/crates/divan_compat/divan_fork/macros/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Nikolai Vazquez + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/crates/divan_compat/divan_fork/macros/src/attr_options.rs b/crates/divan_compat/divan_fork/macros/src/attr_options.rs new file mode 100644 index 00000000..af2c6184 --- /dev/null +++ b/crates/divan_compat/divan_fork/macros/src/attr_options.rs @@ -0,0 +1,378 @@ +use proc_macro::TokenStream; +use quote::{quote, ToTokens}; +use syn::{ + parse::{Parse, Parser}, + spanned::Spanned, + Expr, ExprArray, Ident, Token, Type, +}; + +use crate::{tokens, Macro}; + +/// Values from parsed options shared between `#[divan::bench]` and +/// `#[divan::bench_group]`. +/// +/// The `crate` option is not included because it is only needed to get proper +/// access to `__private`. +pub(crate) struct AttrOptions { + /// `divan::__private`. + pub private_mod: proc_macro2::TokenStream, + + /// Custom name for the benchmark or group. + pub name_expr: Option, + + /// `IntoIterator` from which to provide runtime arguments. + pub args_expr: Option, + + /// Options for generic functions. + pub generic: GenericOptions, + + /// The `BenchOptions.counters` field and its value, followed by a comma. + pub counters: proc_macro2::TokenStream, + + /// Options used directly as `BenchOptions` fields. + /// + /// Option reuse is handled by the compiler ensuring `BenchOptions` fields + /// are not repeated. + pub bench_options: Vec<(Ident, Expr)>, +} + +impl AttrOptions { + pub fn parse(tokens: TokenStream, target_macro: Macro) -> Result { + let macro_name = target_macro.name(); + + let mut divan_crate = None::; + let mut name_expr = None::; + let mut args_expr = None::; + let mut bench_options = Vec::new(); + + let mut counters = Vec::<(proc_macro2::TokenStream, Option<&str>)>::new(); + let mut counters_ident = None::; + + let mut seen_bytes_count = false; + let mut seen_chars_count = false; + let mut seen_cycles_count = false; + let mut seen_items_count = false; + + let mut generic = GenericOptions::default(); + + let attr_parser = syn::meta::parser(|meta| { + macro_rules! error { + ($($t:tt)+) => { + return Err(meta.error(format_args!($($t)+))) + }; + } + + let Some(ident) = meta.path.get_ident() else { + error!("unsupported '{macro_name}' option"); + }; + + let ident_name = ident.to_string(); + let ident_name = ident_name.strip_prefix("r#").unwrap_or(&ident_name); + + let repeat_error = || error!("repeated '{macro_name}' option '{ident_name}'"); + let unsupported_error = || error!("unsupported '{macro_name}' option '{ident_name}'"); + + macro_rules! parse { + ($storage:expr) => { + if $storage.is_none() { + $storage = Some(meta.value()?.parse()?); + } else { + return repeat_error(); + } + }; + } + + match ident_name { + "crate" => parse!(divan_crate), + "name" => parse!(name_expr), + "types" => { + match target_macro { + Macro::Bench { fn_sig } => { + if fn_sig.generics.type_params().next().is_none() { + error!("generic type required for '{macro_name}' option '{ident_name}'"); + } + } + _ => return unsupported_error(), + } + + parse!(generic.types); + } + "consts" => { + match target_macro { + Macro::Bench { fn_sig } => { + if fn_sig.generics.const_params().next().is_none() { + error!("generic const required for '{macro_name}' option '{ident_name}'"); + } + } + _ => return unsupported_error(), + } + + parse!(generic.consts); + } + "args" => { + match target_macro { + Macro::Bench { fn_sig } => { + if !matches!(fn_sig.inputs.len(), 1 | 2) { + return Err(meta.error(format_args!("function argument required for '{macro_name}' option '{ident_name}'"))); + } + } + _ => return unsupported_error(), + } + + parse!(args_expr); + } + "counter" => { + if counters_ident.is_some() { + return repeat_error(); + } + let value: Expr = meta.value()?.parse()?; + counters.push((value.into_token_stream(), None)); + counters_ident = Some(Ident::new("counters", ident.span())); + } + "counters" => { + if counters_ident.is_some() { + return repeat_error(); + } + let values: ExprArray = meta.value()?.parse()?; + counters.extend( + values.elems.into_iter().map(|elem| (elem.into_token_stream(), None)), + ); + counters_ident = Some(ident.clone()); + } + + "bytes_count" if seen_bytes_count => return repeat_error(), + "chars_count" if seen_chars_count => return repeat_error(), + "cycles_count" if seen_cycles_count => return repeat_error(), + "items_count" if seen_items_count => return repeat_error(), + + "bytes_count" | "chars_count" | "cycles_count" | "items_count" => { + let name = match ident_name { + "bytes_count" => { + seen_bytes_count = true; + "BytesCount" + } + "chars_count" => { + seen_chars_count = true; + "CharsCount" + } + "cycles_count" => { + seen_cycles_count = true; + "CyclesCount" + } + "items_count" => { + seen_items_count = true; + "ItemsCount" + } + _ => unreachable!(), + }; + + let value: Expr = meta.value()?.parse()?; + counters.push((value.into_token_stream(), Some(name))); + counters_ident = Some(Ident::new("counters", proc_macro2::Span::call_site())); + } + + _ => { + let value: Expr = match meta.value() { + Ok(value) => value.parse()?, + + // If the option is missing `=`, use a `true` literal. + Err(_) => Expr::Lit(syn::ExprLit { + lit: syn::LitBool::new(true, meta.path.span()).into(), + attrs: Vec::new(), + }), + }; + + bench_options.push((ident.clone(), value)); + } + } + + Ok(()) + }); + + match attr_parser.parse(tokens) { + Ok(()) => {} + Err(error) => return Err(error.into_compile_error().into()), + } + + let divan_crate = divan_crate.unwrap_or_else(|| syn::parse_quote!(::divan)); + let private_mod = quote! { #divan_crate::__private }; + + let counters = counters.iter().map(|(expr, type_name)| match type_name { + Some(type_name) => { + let type_name = Ident::new(type_name, proc_macro2::Span::call_site()); + quote! { + // We do a scoped import for the expression to override any + // local `From` trait. + { + use ::std::convert::From as _; + + #divan_crate::counter::#type_name::from(#expr) + } + } + } + None => expr.to_token_stream(), + }); + + let counters = counters_ident + .map(|ident| { + quote! { + #ident: #private_mod::new_counter_set() #(.with(#counters))* , + } + }) + .unwrap_or_default(); + + Ok(Self { private_mod, name_expr, args_expr, generic, counters, bench_options }) + } + + /// Produces a function expression for creating `LazyLock`. + /// + /// If the `#[ignore]` attribute is specified, this be provided its + /// identifier to set `BenchOptions` using its span. Doing this instead of + /// creating the `ignore` identifier ourselves improves compiler error + /// diagnostics. + pub fn bench_options_fn( + &self, + ignore_attr_ident: Option<&syn::Path>, + ) -> proc_macro2::TokenStream { + fn is_lit_array(expr: &Expr) -> bool { + let Expr::Array(expr) = expr else { + return false; + }; + expr.elems.iter().all(|elem| matches!(elem, Expr::Lit { .. })) + } + + let private_mod = &self.private_mod; + let option_some = tokens::option_some(); + + // Directly set fields on `BenchOptions`. This simplifies things by: + // - Having a single source of truth + // - Making unknown options a compile error + // + // We use `..` (struct update syntax) to ensure that no option is set + // twice, even if raw identifiers are used. This also has the accidental + // benefit of Rust Analyzer recognizing fields and emitting suggestions + // with docs and type info. + if self.bench_options.is_empty() && self.counters.is_empty() && ignore_attr_ident.is_none() + { + tokens::option_none() + } else { + let options_iter = self.bench_options.iter().map(|(option, value)| { + let option_name = option.to_string(); + let option_name = option_name.strip_prefix("r#").unwrap_or(&option_name); + + let wrapped_value: proc_macro2::TokenStream; + let value: &dyn ToTokens = match option_name { + "threads" => { + wrapped_value = if is_lit_array(value) { + // If array of literals, just use `&[...]`. + quote! { ::std::borrow::Cow::Borrowed(&#value) } + } else { + quote! { #private_mod::IntoThreads::into_threads(#value) } + }; + + &wrapped_value + } + + // If the option is a `Duration`, use `IntoDuration` to be + // polymorphic over `Duration` or `u64`/`f64` seconds. + "min_time" | "max_time" => { + wrapped_value = + quote! { #private_mod::IntoDuration::into_duration(#value) }; + &wrapped_value + } + + _ => value, + }; + + quote! { #option: #option_some(#value), } + }); + + let ignore = match ignore_attr_ident { + Some(ignore_attr_ident) => quote! { #ignore_attr_ident: #option_some(true), }, + None => Default::default(), + }; + + let counters = &self.counters; + + quote! { + #option_some(::std::sync::LazyLock::new(|| { + #[allow(clippy::needless_update)] + #private_mod::BenchOptions { + #(#options_iter)* + + // Ignore comes after options so that options take + // priority in compiler error diagnostics. + #ignore + + #counters + + ..::std::default::Default::default() + } + })) + } + } + } +} + +/// Options for generic functions. +#[derive(Default)] +pub struct GenericOptions { + /// Generic types over which to instantiate benchmark functions. + pub types: Option, + + /// `const` array/slice over which to instantiate benchmark functions. + pub consts: Option, +} + +impl GenericOptions { + /// Returns `true` if set exclusively to either: + /// - `types = []` + /// - `consts = []` + pub fn is_empty(&self) -> bool { + match (&self.types, &self.consts) { + (Some(types), None) => types.is_empty(), + (None, Some(Expr::Array(consts))) => consts.elems.is_empty(), + _ => false, + } + } + + /// Returns an iterator of multiple `Some` for types, or a single `None` if + /// there are no types. + pub fn types_iter(&self) -> Box> + '_> { + match &self.types { + None => Box::new(std::iter::once(None)), + Some(GenericTypes::List(types)) => { + Box::new(types.iter().map(|t| Some(t as &dyn ToTokens))) + } + } + } +} + +/// Generic types over which to instantiate benchmark functions. +pub enum GenericTypes { + /// List of types, e.g. `[i32, String, ()]`. + List(Vec), +} + +impl Parse for GenericTypes { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let content; + syn::bracketed!(content in input); + + Ok(Self::List( + content + .parse_terminated(Type::parse, Token![,])? + .into_iter() + .map(|ty| ty.into_token_stream()) + .collect(), + )) + } +} + +impl GenericTypes { + pub fn is_empty(&self) -> bool { + match self { + Self::List(list) => list.is_empty(), + } + } +} diff --git a/crates/divan_compat/divan_fork/macros/src/lib.rs b/crates/divan_compat/divan_fork/macros/src/lib.rs new file mode 100644 index 00000000..304775c4 --- /dev/null +++ b/crates/divan_compat/divan_fork/macros/src/lib.rs @@ -0,0 +1,610 @@ +//! Macros for [Divan](https://github.com/nvzqz/divan), a statistically-comfy +//! benchmarking library brought to you by [Nikolai Vazquez](https://hachyderm.io/@nikolai). +//! +//! See [`divan`](https://docs.rs/divan) crate for documentation. + +use proc_macro::TokenStream; +use quote::{quote, ToTokens}; + +mod attr_options; +mod tokens; + +use attr_options::*; +use syn::{Expr, FnArg}; + +#[derive(Clone, Copy)] +enum Macro<'a> { + Bench { fn_sig: &'a syn::Signature }, + BenchGroup, +} + +impl Macro<'_> { + fn name(&self) -> &'static str { + match self { + Self::Bench { .. } => "bench", + Self::BenchGroup => "bench_group", + } + } +} + +/// Lists of comma-separated `#[cfg]` parameters. +mod systems { + use super::*; + + pub fn elf() -> proc_macro2::TokenStream { + quote! { + target_os = "android", + target_os = "dragonfly", + target_os = "freebsd", + target_os = "fuchsia", + target_os = "haiku", + target_os = "illumos", + target_os = "linux", + target_os = "netbsd", + target_os = "openbsd" + } + } + + pub fn mach_o() -> proc_macro2::TokenStream { + quote! { + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "watchos" + } + } +} + +/// Attributes applied to a `static` containing a pointer to a function to run +/// before `main`. +fn pre_main_attrs() -> proc_macro2::TokenStream { + let elf = systems::elf(); + let mach_o = systems::mach_o(); + + quote! { + #[used] + #[cfg_attr(windows, link_section = ".CRT$XCU")] + #[cfg_attr(any(#elf), link_section = ".init_array")] + #[cfg_attr(any(#mach_o), link_section = "__DATA,__mod_init_func,mod_init_funcs")] + } +} + +fn unsupported_error(attr_name: &str) -> proc_macro2::TokenStream { + let elf = systems::elf(); + let mach_o = systems::mach_o(); + + let error = format!("Unsupported target OS for `#[divan::{attr_name}]`"); + + quote! { + #[cfg(not(any(windows, #elf, #mach_o)))] + ::std::compile_error!(#error); + } +} + +#[proc_macro_attribute] +pub fn bench(options: TokenStream, item: TokenStream) -> TokenStream { + let option_none = tokens::option_none(); + let option_some = tokens::option_some(); + + let fn_item = item.clone(); + let fn_item = syn::parse_macro_input!(fn_item as syn::ItemFn); + let fn_sig = &fn_item.sig; + + let attr = Macro::Bench { fn_sig }; + let attr_name = attr.name(); + + let options = match AttrOptions::parse(options, attr) { + Ok(options) => options, + Err(compile_error) => return compile_error, + }; + + // Items needed by generated code. + let AttrOptions { private_mod, .. } = &options; + + let fn_ident = &fn_sig.ident; + let fn_name = fn_ident.to_string(); + let fn_name_pretty = fn_name.strip_prefix("r#").unwrap_or(&fn_name); + + // Find any `#[ignore]` attribute so that we can use its span to help + // compiler diagnostics. + let ignore_attr_ident = + fn_item.attrs.iter().map(|attr| attr.meta.path()).find(|path| path.is_ident("ignore")); + + // If the function is `extern "ABI"`, it is wrapped in a Rust-ABI function. + let is_extern_abi = fn_sig.abi.is_some(); + + let fn_args = &fn_sig.inputs; + + let type_param: Option<(usize, &syn::TypeParam)> = fn_sig + .generics + .params + .iter() + .enumerate() + .filter_map(|(i, param)| match param { + syn::GenericParam::Type(param) => Some((i, param)), + _ => None, + }) + .next(); + + let const_param: Option<(usize, &syn::ConstParam)> = fn_sig + .generics + .params + .iter() + .enumerate() + .filter_map(|(i, param)| match param { + syn::GenericParam::Const(param) => Some((i, param)), + _ => None, + }) + .next(); + + let is_type_before_const = match (type_param, const_param) { + (Some((t, _)), Some((c, _))) => t < c, + _ => false, + }; + + // Prefixed with "__" to prevent IDEs from recommending using this symbol. + // + // The static is local to intentionally cause a compile error if this + // attribute is used multiple times on the same function. + let static_ident = syn::Ident::new( + &format!("__DIVAN_BENCH_{}", fn_name_pretty.to_uppercase()), + fn_ident.span(), + ); + + let meta = entry_meta_expr(&fn_name, &options, ignore_attr_ident); + + let bench_entry_runner = quote! { #private_mod::BenchEntryRunner }; + + // Creates a `__DIVAN_ARGS` global variable to be used in the entry. + let bench_args_global = if options.args_expr.is_some() { + quote! { + static __DIVAN_ARGS: #private_mod::BenchArgs = #private_mod::BenchArgs::new(); + } + } else { + Default::default() + }; + + // The last argument type is used as the only `args` item type because we + // currently only support one runtime argument. + let last_arg_type = if options.args_expr.is_some() { + fn_args.last().map(|arg| match arg { + FnArg::Receiver(arg) => &*arg.ty, + FnArg::Typed(arg) => &*arg.ty, + }) + } else { + None + }; + + let last_arg_type_tokens = last_arg_type + .map(|ty| match ty { + // Remove lifetime from references to not use the lifetime outside + // of its declaration. This allows benchmarks to take arguments with + // lifetimes. + syn::Type::Reference(ty) if ty.lifetime.is_some() => { + let mut ty = ty.clone(); + ty.lifetime = None; + ty.to_token_stream() + } + + _ => ty.to_token_stream(), + }) + .unwrap_or_default(); + + // Some argument literals need an explicit type. + let arg_return_tokens = options + .args_expr + .as_ref() + .map(|args| match args { + // Empty array. + Expr::Array(args) if args.elems.is_empty() => quote! { + -> [#last_arg_type_tokens; 0] + }, + + _ => Default::default(), + }) + .unwrap_or_default(); + + // Creates a function expr for the benchmarking function, optionally + // monomorphized with generic parameters. + let make_bench_fn = |generics: &[&dyn ToTokens]| { + let mut fn_expr = if generics.is_empty() { + // Use identifier as-is. + fn_ident.to_token_stream() + } else { + // Apply generic arguments. + quote! { #fn_ident::< #(#generics),* > } + }; + + // Handle function arguments. + match (fn_args.len(), &options.args_expr) { + // Simple benchmark with no arguments provided. + (0, None) => { + // Wrap in Rust ABI. + if is_extern_abi { + fn_expr = quote! { || #fn_expr() }; + } + + quote! { + #bench_entry_runner::Plain(|divan /* Bencher */| divan.bench(#fn_expr)) + } + } + + // `args` option used without function arguments; handled earlier in + // `AttrOptions::parse`. + (0, Some(_)) => unreachable!(), + + // `Bencher` function argument. + (1, None) => { + // Wrap in Rust ABI. + if is_extern_abi { + fn_expr = quote! { |divan /* Bencher */| #fn_expr(divan) }; + } + + quote! { #bench_entry_runner::Plain(#fn_expr) } + } + + // Function argument comes from `args` option. + (1, Some(args)) => quote! { + #bench_entry_runner::Args(|| __DIVAN_ARGS.runner( + || #arg_return_tokens { #args }, + + |arg| #private_mod::ToStringHelper(arg).to_string(), + + |divan, __divan_arg| divan.bench(|| #fn_expr( + #private_mod::Arg::<#last_arg_type_tokens>::get(__divan_arg) + )), + )) + }, + + // `Bencher` and `args` option function arguments. + (2, Some(args)) => quote! { + #bench_entry_runner::Args(|| __DIVAN_ARGS.runner( + || #arg_return_tokens { #args }, + + |arg| #private_mod::ToStringHelper(arg).to_string(), + + |divan, __divan_arg| #fn_expr( + divan, + #private_mod::Arg::<#last_arg_type_tokens>::get(__divan_arg), + ), + )) + }, + + // Ensure `args` is set if arguments are provided after `Bencher`. + (_, None) => quote! { + ::std::compile_error!(::std::concat!( + "expected 'args' option containing '", + ::std::stringify!(#last_arg_type_tokens), + "'", + )) + }, + + // `args` option used with unsupported number of arguments; handled + // earlier in `AttrOptions::parse`. + (_, Some(_)) => unreachable!(), + } + }; + + let pre_main_attrs = pre_main_attrs(); + let unsupported_error = unsupported_error(attr_name); + + // Creates a `GroupEntry` static for generic benchmarks. + let make_generic_group = |generic_benches: proc_macro2::TokenStream| { + let entry = quote! { + #private_mod::GroupEntry { + meta: #meta, + generic_benches: #option_some({ #generic_benches }), + } + }; + + quote! { + #unsupported_error + + // Push this static into `GROUP_ENTRIES` before `main` is called. + static #static_ident: #private_mod::GroupEntry = { + { + // Add `push` to the initializer section. + #pre_main_attrs + static PUSH: extern "C" fn() = push; + + extern "C" fn push() { + static NODE: #private_mod::EntryList<#private_mod::GroupEntry> + = #private_mod::EntryList::new(&#static_ident); + + #private_mod::GROUP_ENTRIES.push(&NODE); + } + } + + // All generic entries share the same `BenchArgs` instance for + // efficiency and to ensure all entries use the same values, or + // at least the same names in the case of interior mutability. + #bench_args_global + + #entry + }; + } + }; + + // Creates a `GenericBenchEntry` expr for a generic benchmark instance. + let make_generic_bench_entry = + |ty: Option<&dyn ToTokens>, const_value: Option<&dyn ToTokens>| { + let generic_const_value = const_value.map(|const_value| quote!({ #const_value })); + + let generics: Vec<&dyn ToTokens> = { + let mut generics = Vec::new(); + + generics.extend(generic_const_value.as_ref().map(|t| t as &dyn ToTokens)); + generics.extend(ty); + + if is_type_before_const { + generics.reverse(); + } + + generics + }; + + let bench_fn = make_bench_fn(&generics); + + let type_value = match ty { + Some(ty) => quote! { + #option_some(#private_mod::EntryType::new::<#ty>()) + }, + None => option_none.clone(), + }; + + let const_value = match const_value { + Some(const_value) => quote! { + #option_some(#private_mod::EntryConst::new(&#const_value)) + }, + None => option_none.clone(), + }; + + quote! { + #private_mod::GenericBenchEntry { + group: &#static_ident, + bench: #bench_fn, + ty: #type_value, + const_value: #const_value, + } + } + }; + + let generated_items: proc_macro2::TokenStream = match &options.generic.consts { + // Only specified `types = []` or `consts = []`; generate nothing. + _ if options.generic.is_empty() => Default::default(), + + None => match &options.generic.types { + // No generics; generate a simple benchmark entry. + None => { + let bench_fn = make_bench_fn(&[]); + + let entry = quote! { + #private_mod::BenchEntry { + meta: #meta, + bench: #bench_fn, + } + }; + + quote! { + // Push this static into `BENCH_ENTRIES` before `main` is + // called. + static #static_ident: #private_mod::BenchEntry = { + { + // Add `push` to the initializer section. + #pre_main_attrs + static PUSH: extern "C" fn() = push; + + extern "C" fn push() { + static NODE: #private_mod::EntryList<#private_mod::BenchEntry> + = #private_mod::EntryList::new(&#static_ident); + + #private_mod::BENCH_ENTRIES.push(&NODE); + } + } + + #bench_args_global + + #entry + }; + } + } + + // Generate a benchmark group entry with generic benchmark entries. + Some(GenericTypes::List(generic_types)) => { + let generic_benches = + generic_types.iter().map(|ty| make_generic_bench_entry(Some(&ty), None)); + + make_generic_group(quote! { + &[&[#(#generic_benches),*]] + }) + } + }, + + // Generate a benchmark group entry with generic benchmark entries. + Some(Expr::Array(generic_consts)) => { + let consts_count = generic_consts.elems.len(); + let const_type = &const_param.unwrap().1.ty; + + let generic_benches = options.generic.types_iter().map(|ty| { + let generic_benches = (0..consts_count).map(move |i| { + let const_value = quote! { __DIVAN_CONSTS[#i] }; + make_generic_bench_entry(ty, Some(&const_value)) + }); + + // `static` is necessary because `EntryConst` uses interior + // mutability to cache the `ToString` result. + quote! { + static __DIVAN_GENERIC_BENCHES: [#private_mod::GenericBenchEntry; #consts_count] = [#(#generic_benches),*]; + &__DIVAN_GENERIC_BENCHES + } + }); + + make_generic_group(quote! { + // We refer to our own slice because it: + // - Type-checks values, even if `generic_benches` is empty + // because the user set `types = []` + // - Prevents re-computing constants, which can slightly improve + // compile time given that Miri is slow + const __DIVAN_CONSTS: &[#const_type] = &#generic_consts; + + &[#({ #generic_benches }),*] + }) + } + + // Generate a benchmark group entry with generic benchmark entries over + // an expression of constants. + // + // This is limited to a maximum of 20 because we need some constant to + // instantiate each function instance. + Some(generic_consts) => { + // The maximum number of elements for non-array expressions. + const MAX_EXTERN_COUNT: usize = 20; + + let const_type = &const_param.unwrap().1.ty; + + let generic_benches = options.generic.types_iter().map(|ty| { + let generic_benches = (0..MAX_EXTERN_COUNT).map(move |i| { + let const_value = quote! { + // Fallback to the first constant if out of bounds. + __DIVAN_CONSTS[if #i < __DIVAN_CONST_COUNT { #i } else { 0 }] + }; + make_generic_bench_entry(ty, Some(&const_value)) + }); + + // `static` is necessary because `EntryConst` uses interior + // mutability to cache the `ToString` result. + quote! { + static __DIVAN_GENERIC_BENCHES: [#private_mod::GenericBenchEntry; __DIVAN_CONST_COUNT] + = match #private_mod::shrink_array([#(#generic_benches),*]) { + Some(array) => array, + _ => panic!("external 'consts' cannot contain more than 20 values"), + }; + + &__DIVAN_GENERIC_BENCHES + } + }); + + make_generic_group(quote! { + const __DIVAN_CONST_COUNT: usize = __DIVAN_CONSTS.len(); + const __DIVAN_CONSTS: &[#const_type] = &#generic_consts; + + &[#({ #generic_benches }),*] + }) + } + }; + + // Append our generated code to the existing token stream. + let mut result = item; + result.extend(TokenStream::from(generated_items)); + result +} + +#[proc_macro_attribute] +pub fn bench_group(options: TokenStream, item: TokenStream) -> TokenStream { + let attr = Macro::BenchGroup; + let attr_name = attr.name(); + + let options = match AttrOptions::parse(options, attr) { + Ok(options) => options, + Err(compile_error) => return compile_error, + }; + + // Items needed by generated code. + let AttrOptions { private_mod, .. } = &options; + + let option_none = tokens::option_none(); + + // TODO: Make module parsing cheaper by parsing only the necessary parts. + let mod_item = item.clone(); + let mod_item = syn::parse_macro_input!(mod_item as syn::ItemMod); + + let mod_ident = &mod_item.ident; + let mod_name = mod_ident.to_string(); + let mod_name_pretty = mod_name.strip_prefix("r#").unwrap_or(&mod_name); + + // Find any `#[ignore]` attribute so that we can use its span to help + // compiler diagnostics. + // + // TODO: Fix `unused_attributes` warning when using `#[ignore]` on a module. + let ignore_attr_ident = + mod_item.attrs.iter().map(|attr| attr.meta.path()).find(|path| path.is_ident("ignore")); + + // Prefixed with "__" to prevent IDEs from recommending using this symbol. + // + // By having the static be local, we cause a compile error if this attribute + // is used multiple times on the same function. + let static_ident = syn::Ident::new( + &format!("__DIVAN_GROUP_{}", mod_name_pretty.to_uppercase()), + mod_ident.span(), + ); + + let meta = entry_meta_expr(&mod_name, &options, ignore_attr_ident); + + let pre_main_attrs = pre_main_attrs(); + let unsupported_error = unsupported_error(attr_name); + + let generated_items = quote! { + #unsupported_error + + // Push this static into `GROUP_ENTRIES` before `main` is called. + static #static_ident: #private_mod::EntryList<#private_mod::GroupEntry> = { + { + // Add `push` to the initializer section. + #pre_main_attrs + static PUSH: extern "C" fn() = push; + + extern "C" fn push() { + #private_mod::GROUP_ENTRIES.push(&#static_ident); + } + } + + #private_mod::EntryList::new({ + static #static_ident: #private_mod::GroupEntry = #private_mod::GroupEntry { + meta: #meta, + generic_benches: #option_none, + }; + + &#static_ident + }) + }; + }; + + // Append our generated code to the existing token stream. + let mut result = item; + result.extend(TokenStream::from(generated_items)); + result +} + +/// Constructs an `EntryMeta` expression. +fn entry_meta_expr( + raw_name: &str, + options: &AttrOptions, + ignore_attr_ident: Option<&syn::Path>, +) -> proc_macro2::TokenStream { + let AttrOptions { private_mod, .. } = &options; + + let raw_name_pretty = raw_name.strip_prefix("r#").unwrap_or(raw_name); + + let display_name: &dyn ToTokens = match &options.name_expr { + Some(name) => name, + None => &raw_name_pretty, + }; + + let bench_options = options.bench_options_fn(ignore_attr_ident); + + quote! { + #private_mod::EntryMeta { + raw_name: #raw_name, + display_name: #display_name, + bench_options: #bench_options, + module_path: ::std::module_path!(), + + // `Span` location info is nightly-only, so use macros. + location: #private_mod::EntryLocation { + file: ::std::file!(), + line: ::std::line!(), + col: ::std::column!(), + }, + } + } +} diff --git a/crates/divan_compat/divan_fork/macros/src/tokens.rs b/crates/divan_compat/divan_fork/macros/src/tokens.rs new file mode 100644 index 00000000..71a52b53 --- /dev/null +++ b/crates/divan_compat/divan_fork/macros/src/tokens.rs @@ -0,0 +1,15 @@ +//! Token generation utilities. +//! +//! These use items from the standard library as `::std`. This works unless +//! users do `extern crate x as std`, which is extremely unlikely. + +use proc_macro2::TokenStream; +use quote::quote; + +pub fn option_some() -> TokenStream { + quote!(::std::option::Option::Some) +} + +pub fn option_none() -> TokenStream { + quote!(::std::option::Option::None) +} diff --git a/crates/divan_compat/divan_fork/rustfmt.toml b/crates/divan_compat/divan_fork/rustfmt.toml new file mode 100644 index 00000000..706917c3 --- /dev/null +++ b/crates/divan_compat/divan_fork/rustfmt.toml @@ -0,0 +1,5 @@ +# Rust code formatting; see https://rust-lang.github.io/rustfmt +edition = "2021" +newline_style = "Unix" +use_field_init_shorthand = true +use_small_heuristics = "Max" diff --git a/crates/divan_compat/divan_fork/src/alloc.rs b/crates/divan_compat/divan_fork/src/alloc.rs new file mode 100644 index 00000000..a00cf4e0 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/alloc.rs @@ -0,0 +1,644 @@ +use std::{alloc::*, fmt, ptr::NonNull}; + +use cfg_if::cfg_if; + +use crate::{stats::StatsSet, util::sync::AtomicFlag}; + +#[cfg(target_os = "macos")] +use crate::util::{sync::CachePadded, thread::PThreadKey}; + +#[cfg(not(target_os = "macos"))] +use std::cell::UnsafeCell; + +/// The `AllocProfiler` when running crate-internal tests. +/// +/// This enables us to test it for: +/// - Undefined behavior with Miri +/// - Correctness when tallying +#[cfg(test)] +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +/// Whether to ignore allocation info set during the benchmark. +pub(crate) static IGNORE_ALLOC: AtomicFlag = AtomicFlag::new(false); + +/// Measures [`GlobalAlloc`] memory usage. +/// +/// # Examples +/// +/// The default usage is to create a +/// [`#[global_allocator]`](macro@global_allocator) that wraps the [`System`] +/// allocator with [`AllocProfiler::system()`]: +/// +/// ``` +/// use std::collections::*; +/// use divan::AllocProfiler; +/// +/// #[global_allocator] +/// static ALLOC: AllocProfiler = AllocProfiler::system(); +/// +/// fn main() { +/// divan::main(); +/// } +/// +/// #[divan::bench(types = [ +/// Vec, +/// LinkedList, +/// HashSet, +/// ])] +/// fn from_iter() -> T +/// where +/// T: FromIterator, +/// { +/// (0..100).collect() +/// } +/// +/// #[divan::bench(types = [ +/// Vec, +/// LinkedList, +/// HashSet, +/// ])] +/// fn drop(bencher: divan::Bencher) +/// where +/// T: FromIterator, +/// { +/// bencher +/// .with_inputs(|| (0..100).collect::()) +/// .bench_values(std::mem::drop); +/// } +/// ``` +/// +/// Wrap other [`GlobalAlloc`] implementations like +/// [`mimalloc`](https://docs.rs/mimalloc) with [`AllocProfiler::new()`]: +/// +/// ``` +/// use divan::AllocProfiler; +/// use mimalloc::MiMalloc; +/// +/// # #[cfg(not(miri))] +/// #[global_allocator] +/// static ALLOC: AllocProfiler = AllocProfiler::new(MiMalloc); +/// ``` +/// +/// See [`string`](https://github.com/nvzqz/divan/blob/main/examples/benches/string.rs) +/// and [`collections`](https://github.com/nvzqz/divan/blob/main/examples/benches/collections.rs) +/// benchmarks for more examples. +/// +/// # Implementation +/// +/// Collecting allocation information happens at any point during which Divan is +/// also measuring the time. As a result, counting allocations affects timing. +/// +/// To reduce Divan's footprint during benchmarking: +/// - Allocation information is recorded in thread-local storage to prevent +/// contention when benchmarks involve multiple threads, either through +/// options like [`threads`](macro@crate::bench#threads) or internally +/// spawning their own threads. +/// - It does not check for overflow and assumes it will not happen. This is +/// subject to change in the future. +/// - Fast thread-local storage access is assembly-optimized on macOS. +/// +/// Allocation information is the only data Divan records outside of timing, and +/// thus it also has the only code that affects timing. Steps for recording +/// alloc info: +/// 1. Load the thread-local slot for allocation information. +/// +/// On macOS, this is via the +/// [`gs`](https://github.com/nvzqz/divan/blob/v0.1.6/src/util/sync.rs#L34)/[`tpidrro_el0`](https://github.com/nvzqz/divan/blob/v0.1.6/src/util/sync.rs#L47) +/// registers for +/// [`pthread_getspecific`](https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_getspecific.html). +/// Although this is not guaranteed as stable ABI, in practice many programs +/// assume these registers store thread-local data. [`thread_local!`] is used +/// on all other platforms. +/// +/// 2. Increment allocation operation invocation count and bytes count +/// (a.k.a. size). +/// +/// Allocation information is recorded in thread-local storage to prevent +/// slowdowns from synchronized sharing when using multiple threads, through +/// options like [`threads`](macro@crate::bench#threads). +/// +/// Note that allocations in threads not controlled by Divan are not currently +/// counted. +#[derive(Debug, Default)] +pub struct AllocProfiler { + alloc: Alloc, +} + +unsafe impl GlobalAlloc for AllocProfiler { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // Tally allocation count. + if let Some(mut info) = ThreadAllocInfo::try_current() { + // SAFETY: We have exclusive access. + let info = unsafe { info.as_mut() }; + + info.tally_alloc(layout.size()); + }; + + self.alloc.alloc(layout) + } + + unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { + // Tally allocation count. + if let Some(mut info) = ThreadAllocInfo::try_current() { + // SAFETY: We have exclusive access. + let info = unsafe { info.as_mut() }; + + info.tally_alloc(layout.size()); + }; + + self.alloc.alloc_zeroed(layout) + } + + unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + // Tally reallocation count. + if let Some(mut info) = ThreadAllocInfo::try_current() { + // SAFETY: We have exclusive access. + let info = unsafe { info.as_mut() }; + + info.tally_realloc(layout.size(), new_size); + }; + + self.alloc.realloc(ptr, layout, new_size) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + // Tally deallocation count. + if let Some(mut info) = ThreadAllocInfo::try_current() { + // SAFETY: We have exclusive access. + let info = unsafe { info.as_mut() }; + + info.tally_dealloc(layout.size()); + }; + + self.alloc.dealloc(ptr, layout) + } +} + +impl AllocProfiler { + /// Profiles the [`System`] allocator. + #[inline] + pub const fn system() -> Self { + Self::new(System) + } +} + +impl AllocProfiler { + /// Profiles a [`GlobalAlloc`]. + #[inline] + pub const fn new(alloc: A) -> Self { + Self { alloc } + } +} + +/// Thread-local allocation information. +#[derive(Clone, Default)] +#[repr(C)] +pub(crate) struct ThreadAllocInfo { + // NOTE: `tallies` should be ordered first so that `tally_realloc` can + // directly index `&self` without an offset. + pub tallies: ThreadAllocTallyMap, + + // NOTE: Max size and count are signed for convenience but can never be + // negative due to it being initialized to 0. + // + // PERF: Grouping current/max fields together by count and size makes + // `tally_alloc` take the least time on M1 Mac. + pub current_count: ThreadAllocCountSigned, + pub max_count: ThreadAllocCountSigned, + pub current_size: ThreadAllocCountSigned, + pub max_size: ThreadAllocCountSigned, +} + +#[cfg(not(target_os = "macos"))] +thread_local! { + /// Instance specific to the current thread. + /// + /// On macOS, we use `ALLOC_PTHREAD_KEY` instead. + static CURRENT_THREAD_INFO: UnsafeCell = const { + UnsafeCell::new(ThreadAllocInfo::new()) + }; +} + +#[cfg(target_os = "macos")] +static ALLOC_PTHREAD_KEY: CachePadded> = CachePadded(PThreadKey::new()); + +impl ThreadAllocInfo { + #[inline] + pub const fn new() -> Self { + Self { + tallies: ThreadAllocTallyMap::new(), + max_count: 0, + current_count: 0, + max_size: 0, + current_size: 0, + } + } + + /// Returns the current thread's allocation information, initializing it on + /// first access. + /// + /// Returns `None` if the thread is terminating and has thus deallocated its + /// local instance. + #[inline] + pub fn current() -> Option> { + cfg_if! { + if #[cfg(target_os = "macos")] { + return Self::try_current().or_else(slow_impl); + } else { + Self::try_current() + } + } + + #[cfg(target_os = "macos")] + #[cold] + #[inline(never)] + fn slow_impl() -> Option> { + unsafe { + let layout = Layout::new::(); + + let Some(info_alloc) = NonNull::new(unsafe { System.alloc_zeroed(layout) }) else { + handle_alloc_error(layout); + }; + + let success = ALLOC_PTHREAD_KEY.0.set(info_alloc.as_ptr().cast(), |this| { + System.dealloc(this.as_ptr().cast(), Layout::new::()); + }); + + if !success { + System.dealloc(info_alloc.as_ptr(), layout); + return None; + } + + // When using static thread local key, write directly because it + // is undefined behavior to call `pthread_setspecific` with a + // key that didn't originate from `pthread_key_create`. + #[cfg(all(not(miri), not(feature = "dyn_thread_local"), target_arch = "x86_64"))] + unsafe { + crate::util::thread::fast::set_static_thread_local(info_alloc.as_ptr()); + }; + + Some(info_alloc.cast()) + } + } + } + + /// Returns the current thread's allocation information if initialized. + /// + /// Returns `None` if the instance has not yet been allocated or the thread + /// is terminating and has thus deallocated its local instance. + #[inline] + pub fn try_current() -> Option> { + cfg_if! { + if #[cfg(target_os = "macos")] { + // Fast path: static thread local. + #[cfg(all( + not(miri), + not(feature = "dyn_thread_local"), + target_arch = "x86_64", + ))] + return NonNull::new(unsafe { + crate::util::thread::fast::get_static_thread_local::().cast_mut() + }); + + #[allow(unreachable_code)] + ALLOC_PTHREAD_KEY.0.get() + } else { + CURRENT_THREAD_INFO.try_with(|info| unsafe { + NonNull::new_unchecked(info.get()) + }).ok() + } + } + } + + /// Sets 0 to all values. + pub fn clear(&mut self) { + *self = Self::new(); + } + + /// Tallies the total count and size of the allocation operation. + #[inline] + pub fn tally_alloc(&mut self, size: usize) { + self.tally_op(AllocOp::Alloc, size); + + self.current_count += 1; + self.max_count = self.max_count.max(self.current_count); + + self.current_size += size as ThreadAllocCountSigned; + self.max_size = self.max_size.max(self.current_size); + } + + /// Tallies the total count and size of the deallocation operation. + #[inline] + pub fn tally_dealloc(&mut self, size: usize) { + self.tally_op(AllocOp::Dealloc, size); + + self.current_count -= 1; + self.current_size -= size as ThreadAllocCountSigned; + } + + /// Tallies the total count and size of the reallocation operation. + #[inline] + pub fn tally_realloc(&mut self, old_size: usize, new_size: usize) { + let (diff, is_shrink) = new_size.overflowing_sub(old_size); + let diff = diff as isize; + let abs_diff = diff.wrapping_abs() as usize; + + self.tally_op(AllocOp::realloc(is_shrink), abs_diff); + + // NOTE: Realloc does not change allocation count. + self.current_size += diff as ThreadAllocCountSigned; + self.max_size = self.max_size.max(self.current_size); + } + + /// Tallies the total count and size of the allocation operation. + #[inline] + fn tally_op(&mut self, op: AllocOp, size: usize) { + let tally = self.tallies.get_mut(op); + tally.count += 1; + tally.size += size as ThreadAllocCount; + } +} + +/// Allocation numbers being accumulated. +/// +/// # Memory Layout +/// +/// Aligning to 16 nudges the compiler to emit aligned SIMD operations. +/// +/// Placing `count` first generates less code on AArch64. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +#[repr(C, align(16))] +pub(crate) struct AllocTally { + /// The number of times this operation was performed. + pub count: Count, + + /// The amount of memory this operation changed. + pub size: Count, +} + +pub(crate) type ThreadAllocCount = condtype::num::Usize64; +pub(crate) type ThreadAllocCountSigned = condtype::num::Isize64; + +pub(crate) type ThreadAllocTally = AllocTally; + +pub(crate) type TotalAllocTally = AllocTally; + +impl AllocTally> { + pub fn is_zero(&self) -> bool { + self.count.is_zero() && self.size.is_zero() + } +} + +impl AllocTally { + #[inline] + pub fn as_array(&self) -> &[C; 2] { + // SAFETY: This is `#[repr(C)]`, so we can treat it as a contiguous + // sequence of items. + unsafe { &*(self as *const _ as *const _) } + } +} + +/// Allocation number categories. +/// +/// Note that grow/shrink are first to improve code generation for `realloc`. +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) enum AllocOp { + Grow, + Shrink, + Alloc, + Dealloc, +} + +impl AllocOp { + pub const ALL: [Self; 4] = { + use AllocOp::*; + + // Use same order as declared so that it can be indexed as-is. + [Grow, Shrink, Alloc, Dealloc] + }; + + #[inline] + pub fn realloc(shrink: bool) -> Self { + // This generates the same code as `std::mem::transmute`. + if shrink { + Self::Shrink + } else { + Self::Grow + } + } + + #[inline] + pub fn name(self) -> &'static str { + match self { + Self::Grow => "grow", + Self::Shrink => "shrink", + Self::Alloc => "alloc", + Self::Dealloc => "dealloc", + } + } + + #[inline] + pub fn prefix(self) -> &'static str { + match self { + Self::Grow => "grow:", + Self::Shrink => "shrink:", + Self::Alloc => "alloc:", + Self::Dealloc => "dealloc:", + } + } +} + +/// Values keyed by `AllocOp`. +#[derive(Clone, Copy, Default, PartialEq, Eq)] +pub(crate) struct AllocOpMap { + pub values: [T; 4], +} + +pub(crate) type ThreadAllocTallyMap = AllocOpMap; + +pub(crate) type TotalAllocTallyMap = AllocOpMap; + +impl fmt::Debug for AllocOpMap { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_map().entries(AllocOp::ALL.iter().map(|&op| (op.name(), self.get(op)))).finish() + } +} + +impl ThreadAllocTallyMap { + #[inline] + pub const fn new() -> Self { + unsafe { std::mem::transmute([0u8; size_of::()]) } + } + + /// Returns `true` if all tallies are 0. + #[inline] + pub fn is_empty(&self) -> bool { + self.values.iter().all(|tally| tally.count == 0 && tally.size == 0) + } + + pub fn add_to_total(&self, total: &mut TotalAllocTallyMap) { + for (i, value) in self.values.iter().enumerate() { + total.values[i].count += value.count as u128; + total.values[i].size += value.size as u128; + } + } +} + +impl AllocOpMap { + #[cfg(test)] + pub fn from_fn(f: F) -> Self + where + F: FnMut(AllocOp) -> T, + { + Self { values: AllocOp::ALL.map(f) } + } + + #[inline] + pub const fn get(&self, op: AllocOp) -> &T { + &self.values[op as usize] + } + + #[inline] + pub fn get_mut(&mut self, op: AllocOp) -> &mut T { + &mut self.values[op as usize] + } +} + +#[cfg(feature = "internal_benches")] +mod benches { + use super::*; + + // We want the approach to scale well with thread count. + const THREADS: &[usize] = &[0, 1, 2, 4, 16]; + + #[crate::bench(crate = crate, threads = THREADS)] + fn tally_alloc(bencher: crate::Bencher) { + IGNORE_ALLOC.set(true); + + // Using 0 simulates tallying without affecting benchmark reporting. + let size = crate::black_box(0); + + bencher.bench(|| { + if let Some(mut info) = ThreadAllocInfo::try_current() { + // SAFETY: We have exclusive access. + let info = unsafe { info.as_mut() }; + + info.tally_alloc(size); + } + }) + } + + #[crate::bench(crate = crate, threads = THREADS)] + fn tally_dealloc(bencher: crate::Bencher) { + IGNORE_ALLOC.set(true); + + // Using 0 simulates tallying without affecting benchmark reporting. + let size = crate::black_box(0); + + bencher.bench(|| { + if let Some(mut info) = ThreadAllocInfo::try_current() { + // SAFETY: We have exclusive access. + let info = unsafe { info.as_mut() }; + + info.tally_dealloc(size); + } + }) + } + + #[crate::bench(crate = crate, threads = THREADS)] + fn tally_realloc(bencher: crate::Bencher) { + IGNORE_ALLOC.set(true); + + // Using 0 simulates tallying without affecting benchmark reporting. + let new_size = crate::black_box(0); + let old_size = crate::black_box(0); + + bencher.bench(|| { + if let Some(mut info) = ThreadAllocInfo::try_current() { + // SAFETY: We have exclusive access. + let info = unsafe { info.as_mut() }; + + info.tally_realloc(old_size, new_size); + } + }) + } + + #[crate::bench_group(crate = crate, threads = THREADS)] + mod current { + use super::*; + + #[crate::bench(crate = crate)] + fn init() -> Option> { + ThreadAllocInfo::current() + } + + #[crate::bench(crate = crate)] + fn r#try() -> Option> { + ThreadAllocInfo::try_current() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Tests that `AllocProfiler` is counting correctly. + #[test] + fn tally() { + // Initialize the thread's alloc info. + // + // SAFETY: This cannot be kept as a reference and is instead a raw + // pointer because a reference would cause undefined behavior when + // `AllocProfiler` attempts to update tallies. + let mut alloc_info = ThreadAllocInfo::current().unwrap(); + + // Resets the allocation tallies and returns the previous tallies. + let mut take_alloc_tallies = || std::mem::take(unsafe { &mut alloc_info.as_mut().tallies }); + + // Start fresh. + _ = take_alloc_tallies(); + + // Helper to create `ThreadAllocTallyMap` since each operation only + // changes `buf` by 1 `i32`. + let item_tally = ThreadAllocTally { count: 1, size: size_of::() as _ }; + let make_tally_map = |op: AllocOp| { + ThreadAllocTallyMap::from_fn(|other_op| { + if other_op == op { + item_tally + } else { + Default::default() + } + }) + }; + + // Test zero. + let mut buf: Vec = Vec::new(); + assert_eq!(take_alloc_tallies(), Default::default()); + + // Test allocation. + buf.reserve_exact(1); + assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Alloc)); + + // Test grow. + buf.reserve_exact(2); + assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Grow)); + + // Test shrink. + buf.shrink_to(1); + assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Shrink)); + + // Test dealloc. + drop(buf); + assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Dealloc)); + + // Test all of the above together. + let mut buf: Vec = Vec::new(); + buf.reserve_exact(1); // alloc + buf.reserve_exact(2); // grow + buf.shrink_to(1); // shrink + drop(buf); // dealloc + assert_eq!(take_alloc_tallies(), ThreadAllocTallyMap { values: [item_tally; 4] }); + } +} diff --git a/crates/divan_compat/divan_fork/src/bench/args.rs b/crates/divan_compat/divan_fork/src/bench/args.rs new file mode 100644 index 00000000..62beb207 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/bench/args.rs @@ -0,0 +1,338 @@ +//! Types used to implement runtime argument support. + +use std::{ + any::{Any, TypeId}, + borrow::Cow, + mem, slice, + sync::OnceLock, +}; + +use crate::{util::ty::TypeCast, Bencher}; + +/// Holds lazily-initialized runtime arguments to be passed into a benchmark. +/// +/// `#[divan::bench]` stores this as a `__DIVAN_ARGS` global for each entry, and +/// then at runtime it is initialized once by a closure that creates the usable +/// `BenchArgsRunner`. +pub struct BenchArgs { + args: OnceLock, +} + +/// The result of making `BenchArgs` runnable from instantiating the arguments +/// list and providing a typed benchmarking implementation. +#[derive(Clone, Copy)] +pub struct BenchArgsRunner { + args: &'static ErasedArgsSlice, + bench: fn(Bencher, &ErasedArgsSlice, arg_index: usize), +} + +/// Type-erased `&'static [T]` that also stores names of the arguments. +struct ErasedArgsSlice { + /// The start of `&[T]`. + args: *const (), + + /// The start of `&[&'static str]`. + names: *const &'static str, + + /// The number of arguments. + len: usize, + + /// The ID of `T` to ensure correctness. + arg_type: TypeId, +} + +// SAFETY: Raw pointers in `ErasedArgsSlice` are used in a thread-safe way, and +// the argument type is required to be `Send + Sync` when initialized from the +// iterator in `BenchArgs::runner`. +unsafe impl Send for ErasedArgsSlice {} +unsafe impl Sync for ErasedArgsSlice {} + +impl BenchArgs { + /// Creates an uninitialized instance. + pub const fn new() -> Self { + Self { args: OnceLock::new() } + } + + /// Initializes `self` with the results of `make_args` and returns a + /// `BenchArgsRunner` that will execute the benchmarking closure. + pub fn runner( + &'static self, + make_args: impl FnOnce() -> I, + arg_to_string: impl Fn(&I::Item) -> String, + _bench_impl: B, + ) -> BenchArgsRunner + where + I: IntoIterator, + I::Item: Any + Send + Sync, + B: FnOnce(Bencher, &I::Item) + Copy, + { + let args = self.args.get_or_init(|| { + let args_iter = make_args().into_iter(); + + // Reuse arguments for names if already a slice of strings. + // + // NOTE: We do this over `I::IntoIter` instead of `I` since it works + // for both slices and `slice::Iter`. + let args_strings: Option<&'static [&str]> = + args_iter.cast_ref::>().map(|iter| iter.as_slice()); + + // Collect arguments into leaked slice. + // + // Leaking the collected `args` simplifies memory management, such + // as when reusing for `names`. We're leaking anyways since this is + // accessed via a global `OnceLock`. + // + // PERF: We could optimize this to reuse arguments when users + // provide slices. However, for slices its `Item` is a reference, so + // `slice::Iter` would never match here. To make this + // optimization, we would need to be able to get the referee type. + let args: &'static [I::Item] = Box::leak(args_iter.collect()); + + // Collect printable representations of arguments. + // + // PERF: We take multiple opportunities to reuse the provided + // arguments buffer or individual strings' buffers: + // - `&[&str]` + // - `IntoIterator` + // - `IntoIterator` + // - `IntoIterator>` + // - `IntoIterator>` + let names: &'static [&str] = 'names: { + // PERF: Reuse arguments strings slice. + if let Some(args) = args_strings { + break 'names args; + } + + // PERF: Reuse our args slice allocation. + if let Some(args) = args.cast_ref::<&[&str]>() { + break 'names args; + } + + Box::leak( + args.iter() + .map(|arg| -> &str { + // PERF: Reuse strings as-is. + if let Some(arg) = arg.cast_ref::() { + return arg; + } + if let Some(arg) = arg.cast_ref::>() { + return arg; + } + if let Some(arg) = arg.cast_ref::>() { + return arg; + } + + // Default to `arg_to_string`, which will format via + // either `ToString` or `Debug`. + Box::leak(arg_to_string(arg).into_boxed_str()) + }) + .collect(), + ) + }; + + ErasedArgsSlice { + // We `black_box` arguments to prevent the compiler from + // optimizing the benchmark for the provided values. + args: crate::black_box(args.as_ptr().cast()), + names: names.as_ptr(), + len: args.len(), + arg_type: TypeId::of::(), + } + }); + + BenchArgsRunner { args, bench: bench:: } + } +} + +impl Default for BenchArgs { + fn default() -> Self { + Self::new() + } +} + +impl BenchArgsRunner { + #[inline] + pub(crate) fn bench(&self, bencher: Bencher, index: usize) { + (self.bench)(bencher, self.args, index) + } + + #[inline] + pub(crate) fn arg_names(&self) -> &'static [&'static str] { + self.args.names() + } +} + +impl ErasedArgsSlice { + /// Retrieves a slice of arguments if the type is `T`. + #[inline] + fn typed_args(&self) -> Option<&[T]> { + if self.arg_type == TypeId::of::() { + // SAFETY: `BenchArgs::runner` guarantees storing `len` instances. + Some(unsafe { slice::from_raw_parts(self.args.cast(), self.len) }) + } else { + None + } + } + + /// Returns the arguments' names. + /// + /// Names are in the same order as args and thus their indices can be used + /// to reference arguments. + #[inline] + fn names(&self) -> &'static [&str] { + // SAFETY: `BenchArgs::runner` guarantees storing `len` names. + unsafe { slice::from_raw_parts(self.names, self.len) } + } +} + +/// The `BenchArgsRunner.bench` implementation. +fn bench(bencher: Bencher, erased_args: &ErasedArgsSlice, arg_index: usize) +where + T: Any, + B: FnOnce(Bencher, &T) + Copy, +{ + // We defer type checking until the benchmark is run to make safety of this + // function easier to audit. Checking here instead of in `BenchArgs::runner` + // is late but fine since this check will only fail due to a bug in Divan's + // macro code generation. + + let Some(typed_args) = erased_args.typed_args::() else { + type_mismatch::(); + + // Reduce code size by using a separate function for each `T` instead of + // each benchmark closure. + #[cold] + #[inline(never)] + fn type_mismatch() -> ! { + unreachable!("incorrect type '{}'", std::any::type_name::()) + } + }; + + // SAFETY: The closure is a ZST, so we can construct one out of thin air. + // This can be done multiple times without invoking a `Drop` destructor + // because it implements `Copy`. + let bench_impl: B = unsafe { + assert_eq!(size_of::(), 0, "benchmark closure expected to be zero-sized"); + mem::zeroed() + }; + + bench_impl(bencher, &typed_args[arg_index]); +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Test that optimizations for string items are applied. + mod optimizations { + use std::borrow::Borrow; + + use super::*; + + /// Tests that two slices contain the same exact strings. + fn test_eq_ptr, B: Borrow>(a: &[A], b: &[B]) { + assert_eq!(a.len(), b.len()); + + for (a, b) in a.iter().zip(b) { + let a = a.borrow(); + let b = b.borrow(); + assert_eq!(a, b); + assert_eq!(a.as_ptr(), b.as_ptr()); + } + } + + /// Tests that `&[&str]` reuses the original slice for names. + #[test] + fn str_slice() { + static ARGS: BenchArgs = BenchArgs::new(); + static ORIG_ARGS: &[&str] = &["a", "b"]; + + let runner = ARGS.runner(|| ORIG_ARGS, ToString::to_string, |_, _| {}); + + let typed_args: Vec<&str> = + runner.args.typed_args::<&&str>().unwrap().iter().copied().copied().collect(); + let names = runner.arg_names(); + + // Test values. + assert_eq!(names, ORIG_ARGS); + assert_eq!(names, typed_args); + + // Test addresses. + assert_eq!(names.as_ptr(), ORIG_ARGS.as_ptr()); + assert_ne!(names.as_ptr(), typed_args.as_ptr()); + } + + /// Tests optimizing `IntoIterator` to reuse the same + /// allocation for also storing argument names. + #[test] + fn str_array() { + static ARGS: BenchArgs = BenchArgs::new(); + + let runner = ARGS.runner(|| ["a", "b"], ToString::to_string, |_, _| {}); + + let typed_args = runner.args.typed_args::<&str>().unwrap(); + let names = runner.arg_names(); + + // Test values. + assert_eq!(names, ["a", "b"]); + assert_eq!(names, typed_args); + + // Test addresses. + assert_eq!(names.as_ptr(), typed_args.as_ptr()); + } + + /// Tests optimizing `IntoIterator` to reuse the same + /// allocation for also storing argument names. + #[test] + fn string_array() { + static ARGS: BenchArgs = BenchArgs::new(); + + let runner = + ARGS.runner(|| ["a".to_owned(), "b".to_owned()], ToString::to_string, |_, _| {}); + + let typed_args = runner.args.typed_args::().unwrap(); + let names = runner.arg_names(); + + assert_eq!(names, ["a", "b"]); + test_eq_ptr(names, typed_args); + } + + /// Tests optimizing `IntoIterator>` to reuse the same + /// allocation for also storing argument names. + #[test] + fn box_str_array() { + static ARGS: BenchArgs = BenchArgs::new(); + + let runner = ARGS.runner( + || ["a".to_owned().into_boxed_str(), "b".to_owned().into_boxed_str()], + ToString::to_string, + |_, _| {}, + ); + + let typed_args = runner.args.typed_args::>().unwrap(); + let names = runner.arg_names(); + + assert_eq!(names, ["a", "b"]); + test_eq_ptr(names, typed_args); + } + + /// Tests optimizing `IntoIterator>` to reuse the same + /// allocation for also storing argument names. + #[test] + fn cow_str_array() { + static ARGS: BenchArgs = BenchArgs::new(); + + let runner = ARGS.runner( + || [Cow::Owned("a".to_owned()), Cow::Borrowed("b")], + ToString::to_string, + |_, _| {}, + ); + + let typed_args = runner.args.typed_args::>().unwrap(); + let names = runner.arg_names(); + + assert_eq!(names, ["a", "b"]); + test_eq_ptr(names, typed_args); + } + } +} diff --git a/crates/divan_compat/divan_fork/src/bench/defer.rs b/crates/divan_compat/divan_fork/src/bench/defer.rs new file mode 100644 index 00000000..67d12f67 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/bench/defer.rs @@ -0,0 +1,188 @@ +use std::{ + cell::UnsafeCell, + mem::{ManuallyDrop, MaybeUninit}, +}; + +/// Defers input usage and output drop during benchmarking. +/// +/// To reduce memory usage, this only allocates storage for inputs if outputs do +/// not need deferred drop. +pub(crate) union DeferStore { + /// The variant used if outputs need to be dropped. + /// + /// Inputs are stored are stored contiguously with outputs in memory. This + /// improves performance by: + /// - Removing the overhead of `zip` between two separate buffers. + /// - Improving cache locality and cache prefetching. Input is strategically + /// placed before output because iteration is from low to high addresses, + /// so doing this makes memory access patterns very predictable. + slots: ManuallyDrop>>, + + /// The variant used if `Self::ONLY_INPUTS`, i.e. outputs do not need to be + /// dropped. + inputs: ManuallyDrop>>, +} + +impl Drop for DeferStore { + #[inline] + fn drop(&mut self) { + // SAFETY: The correct variant is used based on `ONLY_INPUTS`. + unsafe { + if Self::ONLY_INPUTS { + ManuallyDrop::drop(&mut self.inputs) + } else { + ManuallyDrop::drop(&mut self.slots) + } + } + } +} + +impl Default for DeferStore { + #[inline] + fn default() -> Self { + // SAFETY: The correct variant is used based on `ONLY_INPUTS`. + unsafe { + if Self::ONLY_INPUTS { + Self { inputs: ManuallyDrop::new(Vec::new()) } + } else { + Self { slots: ManuallyDrop::new(Vec::new()) } + } + } + } +} + +impl DeferStore { + /// Whether only inputs need to be deferred. + /// + /// If `true`, outputs do not get inserted into `DeferStore`. + const ONLY_INPUTS: bool = !std::mem::needs_drop::(); + + /// Prepares storage for iterating over `DeferSlot`s for a sample. + #[inline] + pub fn prepare(&mut self, sample_size: usize) { + // Common implementation regardless of `Vec` item type. + macro_rules! imp { + ($vec:expr) => {{ + $vec.clear(); + $vec.reserve_exact(sample_size); + + // SAFETY: `Vec` only contains `MaybeUninit` fields, so values + // may be safely created from uninitialized memory. + unsafe { $vec.set_len(sample_size) } + }}; + } + + // SAFETY: The correct variant is used based on `ONLY_INPUTS`. + unsafe { + if Self::ONLY_INPUTS { + imp!(self.inputs) + } else { + imp!(self.slots) + } + } + } + + /// Returns the sample's slots for iteration. + /// + /// The caller is expected to use the returned slice to initialize inputs + /// for the sample loop. + /// + /// This returns `Err` containing only input slots if `O` does not need + /// deferred drop. Ideally this would be implemented directly on `DeferSlot` + /// but there's no way to change its size based on `needs_drop::()`. + #[inline(always)] + pub fn slots(&self) -> Result<&[DeferSlot], &[DeferSlotItem]> { + unsafe { + if Self::ONLY_INPUTS { + Err(&self.inputs) + } else { + Ok(&self.slots) + } + } + } +} + +/// Storage for a single iteration within a sample. +/// +/// Input is stored before output to improve cache prefetching since iteration +/// progresses from low to high addresses. +/// +/// # UnsafeCell +/// +/// `UnsafeCell` is used to allow `output` to safely refer to `input`. Although +/// `output` itself is never aliased, it is also stored as `UnsafeCell` in order +/// to get mutable access through a shared `&DeferSlot`. +/// +/// # Safety +/// +/// All fields **must** be `MaybeUninit`. This allows us to safely set the +/// length of `Vec` within the allocated capacity. +#[repr(C)] +pub(crate) struct DeferSlot { + pub input: DeferSlotItem, + pub output: DeferSlotItem, +} + +type DeferSlotItem = UnsafeCell>; + +#[cfg(test)] +mod tests { + use super::*; + + /// Tests that accessing an uninitialized `DeferSlot` is safe due to all of + /// its fields being `MaybeUninit`. + #[test] + fn access_uninit_slot() { + let mut slot: MaybeUninit> = MaybeUninit::uninit(); + + let slot_ref = unsafe { slot.assume_init_mut() }; + slot_ref.input = UnsafeCell::new(MaybeUninit::new(String::new())); + slot_ref.output = UnsafeCell::new(MaybeUninit::new(String::new())); + + unsafe { + let slot = slot.assume_init(); + assert_eq!(slot.input.into_inner().assume_init(), ""); + assert_eq!(slot.output.into_inner().assume_init(), ""); + } + } + + /// Tests that accessing `DeferSlot.input` through an aliased reference in + /// `DeferSlot.output` is safe due `input` being an `UnsafeCell`. + #[test] + fn access_aliased_input() { + struct Output<'i> { + input: &'i mut String, + } + + impl Drop for Output<'_> { + fn drop(&mut self) { + assert_eq!(self.input, "hello"); + self.input.push_str(" world"); + } + } + + let slot: MaybeUninit> = MaybeUninit::uninit(); + let slot_ref = unsafe { slot.assume_init_ref() }; + + // Loop to ensure previous iterations don't affect later uses of the + // same entry slot. + for _ in 0..5 { + unsafe { + let input_ptr = slot_ref.input.get().cast::(); + let output_ptr = slot_ref.output.get().cast::(); + + // Initialize input and output. + input_ptr.write("hello".to_owned()); + output_ptr.write(Output { input: &mut *input_ptr }); + + // Use and discard output. + assert_eq!((*output_ptr).input, "hello"); + output_ptr.drop_in_place(); + assert_eq!(&*input_ptr, "hello world"); + + // Discard input. + input_ptr.drop_in_place(); + } + } + } +} diff --git a/crates/divan_compat/divan_fork/src/bench/mod.rs b/crates/divan_compat/divan_fork/src/bench/mod.rs new file mode 100644 index 00000000..a8e730b8 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/bench/mod.rs @@ -0,0 +1,1299 @@ +use std::{ + cell::UnsafeCell, + fmt, + mem::{self, MaybeUninit}, + num::NonZeroUsize, + sync::Barrier, +}; + +use crate::{ + alloc::{ + AllocOp, AllocOpMap, AllocTally, ThreadAllocInfo, ThreadAllocTally, TotalAllocTallyMap, + }, + black_box, black_box_drop, + counter::{ + AnyCounter, AsCountUInt, BytesCount, CharsCount, Counter, CounterCollection, CyclesCount, + IntoCounter, ItemsCount, KnownCounterKind, MaxCountUInt, + }, + divan::SharedContext, + stats::{RawSample, SampleCollection, Stats, StatsSet, TimeSample}, + thread_pool::BENCH_POOL, + time::{FineDuration, Timestamp, UntaggedTimestamp}, + util::{self, sync::SyncWrap, Unit}, +}; + +#[cfg(test)] +mod tests; + +mod args; +mod defer; +mod options; + +use defer::{DeferSlot, DeferStore}; + +pub use self::{ + args::{BenchArgs, BenchArgsRunner}, + options::BenchOptions, +}; + +pub(crate) const DEFAULT_SAMPLE_COUNT: u32 = 100; + +/// Enables contextual benchmarking in [`#[divan::bench]`](attr.bench.html). +/// +/// # Examples +/// +/// ``` +/// use divan::{Bencher, black_box}; +/// +/// #[divan::bench] +/// fn copy_from_slice(bencher: Bencher) { +/// // Input and output buffers get used in the closure. +/// let src = (0..100).collect::>(); +/// let mut dst = vec![0; src.len()]; +/// +/// bencher.bench_local(|| { +/// black_box(&mut dst).copy_from_slice(black_box(&src)); +/// }); +/// } +/// ``` +#[must_use = "a benchmark function must be registered"] +pub struct Bencher<'a, 'b, C = BencherConfig> { + pub(crate) context: &'a mut BenchContext<'b>, + pub(crate) config: C, +} + +/// Public-in-private type for statically-typed `Bencher` configuration. +/// +/// This enables configuring `Bencher` using the builder pattern with zero +/// runtime cost. +pub struct BencherConfig { + gen_input: GenI, +} + +impl fmt::Debug for Bencher<'_, '_, C> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Bencher").finish_non_exhaustive() + } +} + +impl<'a, 'b> Bencher<'a, 'b> { + #[inline] + pub(crate) fn new(context: &'a mut BenchContext<'b>) -> Self { + Self { context, config: BencherConfig { gen_input: Unit } } + } +} + +impl<'a, 'b> Bencher<'a, 'b> { + /// Benchmarks a function. + /// + /// The function can be benchmarked in parallel using the [`threads` + /// option](macro@crate::bench#threads). If the function is strictly + /// single-threaded, use [`Bencher::bench_local`] instead. + /// + /// # Examples + /// + /// ``` + /// #[divan::bench] + /// fn bench(bencher: divan::Bencher) { + /// bencher.bench(|| { + /// // Benchmarked code... + /// }); + /// } + /// ``` + pub fn bench(self, benched: B) + where + B: Fn() -> O + Sync, + { + // Reusing `bench_values` for a zero-sized non-drop input type should + // have no overhead. + self.with_inputs(|| ()).bench_values(|_: ()| benched()); + } + + /// Benchmarks a function on the current thread. + /// + /// # Examples + /// + /// ``` + /// #[divan::bench] + /// fn bench(bencher: divan::Bencher) { + /// bencher.bench_local(|| { + /// // Benchmarked code... + /// }); + /// } + /// ``` + pub fn bench_local(self, mut benched: B) + where + B: FnMut() -> O, + { + // Reusing `bench_local_values` for a zero-sized non-drop input type + // should have no overhead. + self.with_inputs(|| ()).bench_local_values(|_: ()| benched()); + } + + /// Generate inputs for the [benchmarked function](#input-bench). + /// + /// Time spent generating inputs does not affect benchmark timing. + /// + /// When [benchmarking in parallel](macro@crate::bench#threads), the input + /// generator is called on the same thread as the sample loop that uses that + /// input. + /// + /// # Examples + /// + /// ``` + /// #[divan::bench] + /// fn bench(bencher: divan::Bencher) { + /// bencher + /// .with_inputs(|| { + /// // Generate input: + /// String::from("...") + /// }) + /// .bench_values(|s| { + /// // Use input by-value: + /// s + "123" + /// }); + /// } + /// ``` + pub fn with_inputs(self, gen_input: G) -> Bencher<'a, 'b, BencherConfig> { + Bencher { context: self.context, config: BencherConfig { gen_input } } + } +} + +impl<'a, 'b, GenI> Bencher<'a, 'b, BencherConfig> { + /// Assign a [`Counter`] for all iterations of the benchmarked function. + /// + /// This will either: + /// - Assign a new counter + /// - Override an existing counter of the same type + /// + /// If the counter depends on [generated inputs](Self::with_inputs), use + /// [`Bencher::input_counter`] instead. + /// + /// If context is not needed, the counter can instead be set via + /// [`#[divan::bench(counters = ...)]`](macro@crate::bench#counters). + /// + /// # Examples + /// + /// ``` + /// use divan::{Bencher, counter::BytesCount}; + /// + /// #[divan::bench] + /// fn char_count(bencher: Bencher) { + /// let s: String = // ... + /// # String::new(); + /// + /// bencher + /// .counter(BytesCount::of_str(&s)) + /// .bench(|| { + /// divan::black_box(&s).chars().count() + /// }); + /// } + /// ``` + #[doc(alias = "throughput")] + pub fn counter(self, counter: C) -> Self + where + C: IntoCounter, + { + let counter = AnyCounter::new(counter); + self.context.counters.set_counter(counter); + self + } +} + +/// Benchmark over [generated inputs](Self::with_inputs). +impl<'a, 'b, I, GenI> Bencher<'a, 'b, BencherConfig> +where + GenI: FnMut() -> I, +{ + /// Calls a closure to create a [`Counter`] for each input of the + /// benchmarked function. + /// + /// This will either: + /// - Assign a new counter + /// - Override an existing counter of the same type + /// + /// If the counter is constant, use [`Bencher::counter`] instead. + /// + /// When [benchmarking in parallel](macro@crate::bench#threads), the input + /// counter is called on the same thread as the sample loop that generates + /// and uses that input. + /// + /// # Examples + /// + /// The following example emits info for the number of bytes processed when + /// benchmarking [`char`-counting](std::str::Chars::count). The byte count + /// is gotten by calling [`BytesCount::of_str`] on each iteration's input + /// [`String`]. + /// + /// ``` + /// use divan::{Bencher, counter::BytesCount}; + /// + /// #[divan::bench] + /// fn char_count(bencher: Bencher) { + /// bencher + /// .with_inputs(|| -> String { + /// // ... + /// # String::new() + /// }) + /// .input_counter(BytesCount::of_str) + /// .bench_refs(|s| { + /// s.chars().count() + /// }); + /// } + /// ``` + pub fn input_counter(self, make_counter: F) -> Self + where + F: Fn(&I) -> C + Sync + 'static, + C: IntoCounter, + { + self.context.counters.set_input_counter(make_counter); + self + } + + /// Creates a [`Counter`] from each input of the benchmarked function. + /// + /// This may be used if the input returns [`u8`]–[`u64`], [`usize`], or any + /// nesting of references to those types. + /// + /// # Examples + /// + /// The following example emits info for the number of items processed when + /// benchmarking [`FromIterator`] from + /// [Range](std::ops::Range)<[usize]> to [`Vec`]. + /// + /// ``` + /// use divan::{Bencher, counter::ItemsCount}; + /// + /// #[divan::bench] + /// fn range_to_vec(bencher: Bencher) { + /// bencher + /// .with_inputs(|| -> usize { + /// // ... + /// # 0 + /// }) + /// .count_inputs_as::() + /// .bench_values(|n| -> Vec { + /// (0..n).collect() + /// }); + /// } + /// ``` + #[inline] + pub fn count_inputs_as(self) -> Self + where + C: Counter, + I: AsCountUInt, + { + match KnownCounterKind::of::() { + KnownCounterKind::Bytes => self.input_counter(|c| BytesCount::from(c)), + KnownCounterKind::Chars => self.input_counter(|c| CharsCount::from(c)), + KnownCounterKind::Cycles => self.input_counter(|c| CyclesCount::from(c)), + KnownCounterKind::Items => self.input_counter(|c| ItemsCount::from(c)), + } + } + + /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), + /// provided by-value. + /// + /// Per-iteration means the benchmarked function is called exactly once for + /// each generated input. + /// + /// The function can be benchmarked in parallel using the [`threads` + /// option](macro@crate::bench#threads). If the function is strictly + /// single-threaded, use [`Bencher::bench_local_values`] instead. + /// + /// # Examples + /// + /// ``` + /// #[divan::bench] + /// fn bench(bencher: divan::Bencher) { + /// bencher + /// .with_inputs(|| { + /// // Generate input: + /// String::from("...") + /// }) + /// .bench_values(|s| { + /// // Use input by-value: + /// s + "123" + /// }); + /// } + /// ``` + pub fn bench_values(self, benched: B) + where + B: Fn(I) -> O + Sync, + GenI: Fn() -> I + Sync, + { + self.context.bench_loop_threaded( + self.config.gen_input, + |input| { + // SAFETY: Input is guaranteed to be initialized and not + // currently referenced by anything else. + let input = unsafe { input.get().read().assume_init() }; + + benched(input) + }, + // Input ownership is transferred to `benched`. + |_input| {}, + ); + } + + /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), + /// provided by-value. + /// + /// Per-iteration means the benchmarked function is called exactly once for + /// each generated input. + /// + /// # Examples + /// + /// ``` + /// #[divan::bench] + /// fn bench(bencher: divan::Bencher) { + /// let mut values = Vec::new(); + /// bencher + /// .with_inputs(|| { + /// // Generate input: + /// String::from("...") + /// }) + /// .bench_local_values(|s| { + /// // Use input by-value: + /// values.push(s); + /// }); + /// } + /// ``` + pub fn bench_local_values(self, mut benched: B) + where + B: FnMut(I) -> O, + { + self.context.bench_loop_local( + self.config.gen_input, + |input| { + // SAFETY: Input is guaranteed to be initialized and not + // currently referenced by anything else. + let input = unsafe { input.get().read().assume_init() }; + + benched(input) + }, + // Input ownership is transferred to `benched`. + |_input| {}, + ); + } + + /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), + /// provided by-reference. + /// + /// Per-iteration means the benchmarked function is called exactly once for + /// each generated input. + /// + /// # Examples + /// + /// ``` + /// #[divan::bench] + /// fn bench(bencher: divan::Bencher) { + /// bencher + /// .with_inputs(|| { + /// // Generate input: + /// String::from("...") + /// }) + /// .bench_refs(|s| { + /// // Use input by-reference: + /// *s += "123"; + /// }); + /// } + /// ``` + pub fn bench_refs(self, benched: B) + where + B: Fn(&mut I) -> O + Sync, + GenI: Fn() -> I + Sync, + { + // TODO: Allow `O` to reference `&mut I` as long as `I` outlives `O`. + self.context.bench_loop_threaded( + self.config.gen_input, + |input| { + // SAFETY: Input is guaranteed to be initialized and not + // currently referenced by anything else. + let input = unsafe { (*input.get()).assume_init_mut() }; + + benched(input) + }, + // Input ownership was not transferred to `benched`. + |input| { + // SAFETY: This function is called after `benched` outputs are + // dropped, so we have exclusive access. + unsafe { (*input.get()).assume_init_drop() } + }, + ); + } + + /// Benchmarks a function over per-iteration [generated inputs](Self::with_inputs), + /// provided by-reference. + /// + /// Per-iteration means the benchmarked function is called exactly once for + /// each generated input. + /// + /// # Examples + /// + /// ``` + /// #[divan::bench] + /// fn bench(bencher: divan::Bencher) { + /// bencher + /// .with_inputs(|| { + /// // Generate input: + /// String::from("...") + /// }) + /// .bench_local_refs(|s| { + /// // Use input by-reference: + /// *s += "123"; + /// }); + /// } + /// ``` + pub fn bench_local_refs(self, mut benched: B) + where + B: FnMut(&mut I) -> O, + { + // TODO: Allow `O` to reference `&mut I` as long as `I` outlives `O`. + self.context.bench_loop_local( + self.config.gen_input, + |input| { + // SAFETY: Input is guaranteed to be initialized and not + // currently referenced by anything else. + let input = unsafe { (*input.get()).assume_init_mut() }; + + benched(input) + }, + // Input ownership was not transferred to `benched`. + |input| { + // SAFETY: This function is called after `benched` outputs are + // dropped, so we have exclusive access. + unsafe { (*input.get()).assume_init_drop() } + }, + ); + } +} + +/// State machine for how the benchmark is being run. +#[derive(Clone, Copy)] +pub(crate) enum BenchMode { + /// The benchmark is being run as `--test`. + /// + /// Don't collect samples and run exactly once. + Test, + + /// Scale `sample_size` to determine the right size for collecting. + Tune { sample_size: u32 }, + + /// Simply collect samples. + Collect { sample_size: u32 }, +} + +impl BenchMode { + #[inline] + pub fn is_test(self) -> bool { + matches!(self, Self::Test) + } + + #[inline] + pub fn is_tune(self) -> bool { + matches!(self, Self::Tune { .. }) + } + + #[inline] + pub fn is_collect(self) -> bool { + matches!(self, Self::Collect { .. }) + } + + #[inline] + pub fn sample_size(self) -> u32 { + match self { + Self::Test => 1, + Self::Tune { sample_size, .. } | Self::Collect { sample_size, .. } => sample_size, + } + } +} + +/// `#[divan::bench]` loop context. +/// +/// Functions called within the benchmark loop should be `#[inline(always)]` to +/// ensure instruction cache locality. +pub(crate) struct BenchContext<'a> { + shared_context: &'a SharedContext, + + /// User-configured options. + pub options: &'a BenchOptions<'a>, + + /// Whether the benchmark loop was started. + pub did_run: bool, + + /// The number of threads to run the benchmark. The default is 1. + /// + /// When set to 1, the benchmark loop is guaranteed to stay on the current + /// thread and not spawn any threads. + pub thread_count: NonZeroUsize, + + /// Recorded samples. + pub samples: SampleCollection, + + /// Per-iteration counters grouped by sample. + counters: CounterCollection, +} + +impl<'a> BenchContext<'a> { + /// Creates a new benchmarking context. + pub fn new( + shared_context: &'a SharedContext, + options: &'a BenchOptions, + thread_count: NonZeroUsize, + ) -> Self { + Self { + shared_context, + options, + thread_count, + did_run: false, + samples: SampleCollection::default(), + counters: options.counters.to_collection(), + } + } + + /// Runs the single-threaded loop for benchmarking `benched`. + /// + /// # Safety + /// + /// See `bench_loop_threaded`. + pub fn bench_loop_local( + &mut self, + gen_input: impl FnMut() -> I, + benched: impl FnMut(&UnsafeCell>) -> O, + drop_input: impl Fn(&UnsafeCell>), + ) { + // SAFETY: Closures are guaranteed to run on the current thread, so they + // can safely be mutable and non-`Sync`. + unsafe { + let gen_input = SyncWrap::new(UnsafeCell::new(gen_input)); + let benched = SyncWrap::new(UnsafeCell::new(benched)); + let drop_input = SyncWrap::new(drop_input); + + self.thread_count = NonZeroUsize::MIN; + self.bench_loop_threaded::( + || (*gen_input.get())(), + |input| (*benched.get())(input), + |input| drop_input(input), + ) + } + } + + /// Runs the multi-threaded loop for benchmarking `benched`. + /// + /// # Safety + /// + /// If `self.threads` is 1, the incoming closures will not escape the + /// current thread. This guarantee ensures `bench_loop_local` can soundly + /// reuse this method with mutable non-`Sync` closures. + /// + /// When `benched` is called: + /// - `I` is guaranteed to be initialized. + /// - No external `&I` or `&mut I` exists. + /// + /// When `drop_input` is called: + /// - All instances of `O` returned from `benched` have been dropped. + /// - The same guarantees for `I` apply as in `benched`, unless `benched` + /// escaped references to `I`. + fn bench_loop_threaded( + &mut self, + gen_input: impl Fn() -> I + Sync, + benched: impl Fn(&UnsafeCell>) -> O + Sync, + drop_input: impl Fn(&UnsafeCell>) + Sync, + ) { + self.did_run = true; + + let mut current_mode = self.initial_mode(); + let is_test = current_mode.is_test(); + + let record_sample = self.sample_recorder(gen_input, benched, drop_input); + + let thread_count = self.thread_count.get(); + let aux_thread_count = thread_count - 1; + + let is_single_thread = aux_thread_count == 0; + + // Per-thread sample info returned by `record_sample`. These are + // processed locally to emit user-facing sample info. As a result, this + // only contains `thread_count` many elements at a time. + let mut raw_samples = Vec::>::new(); + + // The time spent benchmarking, in picoseconds. + // + // Unless `skip_ext_time` is set, this includes time external to + // `benched`, such as time spent generating inputs and running drop. + let mut elapsed_picos: u128 = 0; + + // The minimum time for benchmarking, in picoseconds. + let min_picos = self.options.min_time().picos; + + // The remaining time left for benchmarking, in picoseconds. + let max_picos = self.options.max_time().picos; + + // Don't bother running if user specifies 0 max time or 0 samples. + if max_picos == 0 || !self.options.has_samples() { + return; + } + + let timer = self.shared_context.timer; + let timer_kind = timer.kind(); + + let mut rem_samples = if current_mode.is_collect() { + Some(self.options.sample_count.unwrap_or(DEFAULT_SAMPLE_COUNT)) + } else { + None + }; + + // Only measure precision if we need to tune sample size. + let timer_precision = + if current_mode.is_tune() { timer.precision() } else { FineDuration::default() }; + + if !is_test { + self.samples.time_samples.reserve(self.options.sample_count.unwrap_or(1) as usize); + } + + let skip_ext_time = self.options.skip_ext_time.unwrap_or_default(); + let initial_start = if skip_ext_time { None } else { Some(Timestamp::start(timer_kind)) }; + + let bench_overheads = timer.bench_overheads(); + + while { + // Conditions for when sampling is over: + if elapsed_picos >= max_picos { + // Depleted the benchmarking time budget. This is a strict + // condition regardless of sample count and minimum time. + false + } else if rem_samples.unwrap_or(1) > 0 { + // More samples expected. + true + } else { + // Continue if we haven't reached the time floor. + elapsed_picos < min_picos + } + } { + let sample_size = current_mode.sample_size(); + self.samples.sample_size = sample_size; + + let barrier = if is_single_thread { None } else { Some(Barrier::new(thread_count)) }; + + // Sample loop helper: + let record_sample = || -> RawSample { + let mut counter_totals: [u128; KnownCounterKind::COUNT] = + [0; KnownCounterKind::COUNT]; + + // Updates per-input counter info for this sample. + let mut count_input = |input: &I| { + for counter_kind in KnownCounterKind::ALL { + // SAFETY: The `I` type cannot change since `with_inputs` + // cannot be called more than once on the same `Bencher`. + if let Some(count) = + unsafe { self.counters.get_input_count(counter_kind, input) } + { + let total = &mut counter_totals[counter_kind as usize]; + *total = (*total).saturating_add(count as u128); + } + } + }; + + // Sample loop: + let ([start, end], alloc_info) = + record_sample(sample_size as usize, barrier.as_ref(), &mut count_input); + + RawSample { start, end, timer, alloc_info, counter_totals } + }; + + // Sample loop: + raw_samples.clear(); + BENCH_POOL.par_extend(&mut raw_samples, aux_thread_count, |_| record_sample()); + + // Convert `&[Option]` to `&[Sample]`. + let raw_samples: &[RawSample] = { + if let Some(thread) = raw_samples + .iter() + .enumerate() + .find_map(|(thread, sample)| sample.is_none().then_some(thread)) + { + panic!("Divan benchmarking thread {thread} panicked"); + } + + unsafe { + assert_eq!(mem::size_of::(), mem::size_of::>()); + std::slice::from_raw_parts(raw_samples.as_ptr().cast(), raw_samples.len()) + } + }; + + // If testing, exit the benchmarking loop immediately after timing a + // single run. + if is_test { + break; + } + + let slowest_sample = raw_samples.iter().max_by_key(|s| s.duration()).unwrap(); + let slowest_time = slowest_sample.duration(); + + // TODO: Make tuning be less influenced by early runs. Currently if + // early runs are very quick but later runs are slow, benchmarking + // will take a very long time. + // + // TODO: Make `sample_size` consider time generating inputs and + // dropping inputs/outputs. Currently benchmarks like + // `Bencher::bench_refs(String::clear)` take a very long time. + if current_mode.is_tune() { + // Clear previous smaller samples. + self.samples.clear(); + self.counters.clear_input_counts(); + + // If within 100x timer precision, continue tuning. + let precision_multiple = slowest_time.picos / timer_precision.picos; + if precision_multiple <= 100 { + current_mode = BenchMode::Tune { sample_size: sample_size * 2 }; + } else { + current_mode = BenchMode::Collect { sample_size }; + rem_samples = Some(self.options.sample_count.unwrap_or(DEFAULT_SAMPLE_COUNT)); + } + } + + // Returns the sample's duration adjusted for overhead. + let sample_duration_sub_overhead = |raw_sample: &RawSample| { + let overhead = bench_overheads.total_overhead(sample_size, &raw_sample.alloc_info); + + FineDuration { + picos: raw_sample + .duration() + .clamp_to(timer_precision) + .picos + .saturating_sub(overhead.picos), + } + .clamp_to(timer_precision) + }; + + for raw_sample in raw_samples { + let sample_index = self.samples.time_samples.len(); + + self.samples + .time_samples + .push(TimeSample { duration: sample_duration_sub_overhead(raw_sample) }); + + if !raw_sample.alloc_info.tallies.is_empty() { + self.samples + .alloc_info_by_sample + .insert(sample_index as u32, raw_sample.alloc_info.clone()); + } + + // Insert per-input counter information. + for counter_kind in KnownCounterKind::ALL { + if !self.counters.uses_input_counts(counter_kind) { + continue; + } + + let total_count = raw_sample.counter_totals[counter_kind as usize]; + + // Cannot overflow `MaxCountUInt` because `total_count` + // cannot exceed `MaxCountUInt::MAX * sample_size`. + let per_iter_count = (total_count / sample_size as u128) as MaxCountUInt; + + self.counters.push_counter(AnyCounter::known(counter_kind, per_iter_count)); + } + + if let Some(rem_samples) = &mut rem_samples { + *rem_samples = rem_samples.saturating_sub(1); + } + } + + if let Some(initial_start) = initial_start { + let last_end = raw_samples.iter().map(|s| s.end).max().unwrap(); + elapsed_picos = last_end.duration_since(initial_start, timer).picos; + } else { + // Progress by at least 1ns to prevent extremely fast + // functions from taking forever when `min_time` is set. + let progress_picos = slowest_time.picos.max(1_000); + elapsed_picos = elapsed_picos.saturating_add(progress_picos); + } + } + + // Reset flag for ignoring allocations. + crate::alloc::IGNORE_ALLOC.set(false); + } + + /// Returns a closure that takes the sample size and input counter, and then + /// returns a newly recorded sample. + fn sample_recorder( + &self, + gen_input: impl Fn() -> I, + benched: impl Fn(&UnsafeCell>) -> O, + drop_input: impl Fn(&UnsafeCell>), + ) -> impl Fn(usize, Option<&Barrier>, &mut dyn FnMut(&I)) -> ([Timestamp; 2], ThreadAllocInfo) + { + // We defer: + // - Usage of `gen_input` values. + // - Drop destructor for `O`, preventing it from affecting sample + // measurements. Outputs are stored into a pre-allocated buffer during + // the sample loop. The allocation is reused between samples to reduce + // time spent between samples. + + let timer_kind = self.shared_context.timer.kind(); + + move |sample_size: usize, barrier: Option<&Barrier>, count_input: &mut dyn FnMut(&I)| { + let mut defer_store = DeferStore::::default(); + + let mut saved_alloc_info = ThreadAllocInfo::new(); + let mut save_alloc_info = || { + if crate::alloc::IGNORE_ALLOC.get() { + return; + } + + if let Some(alloc_info) = ThreadAllocInfo::try_current() { + // SAFETY: We have exclusive access. + saved_alloc_info = unsafe { alloc_info.as_ptr().read() }; + } + }; + + // Synchronize all threads to start timed section simultaneously and + // clear every thread's memory profiling info. + // + // This ensures work external to the timed section does not affect + // the timing of other threads. + let sync_threads = |is_start: bool| { + sync_impl(barrier, is_start); + + // Monomorphize implementation to reduce code size. + #[inline(never)] + fn sync_impl(barrier: Option<&Barrier>, is_start: bool) { + // Ensure benchmarked section has a `ThreadAllocInfo` + // allocated for the current thread and clear previous info. + let alloc_info = if is_start { ThreadAllocInfo::current() } else { None }; + + // Synchronize all threads. + // + // This is the final synchronization point for the end. + if let Some(barrier) = barrier { + barrier.wait(); + } + + if let Some(mut alloc_info) = alloc_info { + // SAFETY: We have exclusive access. + let alloc_info = unsafe { alloc_info.as_mut() }; + + alloc_info.clear(); + + // Synchronize all threads. + if let Some(barrier) = barrier { + barrier.wait(); + } + } + } + }; + + // The following logic chooses how to efficiently sample the + // benchmark function once and assigns `sample_start`/`sample_end` + // before/after the sample loop. + // + // NOTE: Testing and benchmarking should behave exactly the same + // when getting the sample time span. We don't want to introduce + // extra work that may worsen measurement quality for real + // benchmarking. + let sample_start: UntaggedTimestamp; + let sample_end: UntaggedTimestamp; + + if size_of::() == 0 && (size_of::() == 0 || !mem::needs_drop::()) { + // Use a range instead of `defer_store` to make the benchmarking + // loop cheaper. + + // Run `gen_input` the expected number of times in case it + // updates external state used by `benched`. + for _ in 0..sample_size { + let input = gen_input(); + count_input(&input); + + // Inputs are consumed/dropped later. + mem::forget(input); + } + + sync_threads(true); + sample_start = UntaggedTimestamp::start(timer_kind); + + // Sample loop: + for _ in 0..sample_size { + // SAFETY: Input is a ZST, so we can construct one out of + // thin air. + let input = unsafe { UnsafeCell::new(MaybeUninit::::zeroed()) }; + + mem::forget(black_box(benched(&input))); + } + + sample_end = UntaggedTimestamp::end(timer_kind); + sync_threads(false); + save_alloc_info(); + + // Drop outputs and inputs. + for _ in 0..sample_size { + // Output only needs drop if ZST. + if size_of::() == 0 { + // SAFETY: Output is a ZST, so we can construct one out + // of thin air. + unsafe { _ = mem::zeroed::() } + } + + if mem::needs_drop::() { + // SAFETY: Input is a ZST, so we can construct one out + // of thin air and not worry about aliasing. + unsafe { drop_input(&UnsafeCell::new(MaybeUninit::::zeroed())) } + } + } + } else { + defer_store.prepare(sample_size); + + match defer_store.slots() { + // Output needs to be dropped. We defer drop in the sample + // loop by inserting it into `defer_store`. + Ok(defer_slots_slice) => { + // Initialize and store inputs. + for DeferSlot { input, .. } in defer_slots_slice { + // SAFETY: We have exclusive access to `input`. + let input = unsafe { &mut *input.get() }; + let input = input.write(gen_input()); + count_input(input); + + // Make input opaque to benchmarked function. + black_box(input); + } + + // Create iterator before the sample timing section to + // reduce benchmarking overhead. + let defer_slots_iter = defer_slots_slice.iter(); + + sync_threads(true); + sample_start = UntaggedTimestamp::start(timer_kind); + + // Sample loop: + for defer_slot in defer_slots_iter { + // SAFETY: All inputs in `defer_store` were + // initialized and we have exclusive access to the + // output slot. + unsafe { + let output = benched(&defer_slot.input); + *defer_slot.output.get() = MaybeUninit::new(output); + } + } + + sample_end = UntaggedTimestamp::end(timer_kind); + sync_threads(false); + save_alloc_info(); + + // Prevent the optimizer from removing writes to inputs + // and outputs in the sample loop. + black_box(defer_slots_slice); + + // Drop outputs and inputs. + for DeferSlot { input, output } in defer_slots_slice { + // SAFETY: All outputs were initialized in the + // sample loop and we have exclusive access. + unsafe { (*output.get()).assume_init_drop() } + + if mem::needs_drop::() { + // SAFETY: The output was dropped and thus we + // have exclusive access to inputs. + unsafe { drop_input(input) } + } + } + } + + // Output does not need to be dropped. + Err(defer_inputs_slice) => { + // Initialize and store inputs. + for input in defer_inputs_slice { + // SAFETY: We have exclusive access to `input`. + let input = unsafe { &mut *input.get() }; + let input = input.write(gen_input()); + count_input(input); + + // Make input opaque to benchmarked function. + black_box(input); + } + + // Create iterator before the sample timing section to + // reduce benchmarking overhead. + let defer_inputs_iter = defer_inputs_slice.iter(); + + sync_threads(true); + sample_start = UntaggedTimestamp::start(timer_kind); + + // Sample loop: + for input in defer_inputs_iter { + // SAFETY: All inputs in `defer_store` were + // initialized. + black_box_drop(unsafe { benched(input) }); + } + + sample_end = UntaggedTimestamp::end(timer_kind); + sync_threads(false); + save_alloc_info(); + + // Prevent the optimizer from removing writes to inputs + // in the sample loop. + black_box(defer_inputs_slice); + + // Drop inputs. + if mem::needs_drop::() { + for input in defer_inputs_slice { + // SAFETY: We have exclusive access to inputs. + unsafe { drop_input(input) } + } + } + } + } + } + + // SAFETY: These values are guaranteed to be the correct variant + // because they were created from the same `timer_kind`. + let interval = unsafe { + [sample_start.into_timestamp(timer_kind), sample_end.into_timestamp(timer_kind)] + }; + + (interval, saved_alloc_info) + } + } + + #[inline] + fn initial_mode(&self) -> BenchMode { + if self.shared_context.action.is_test() { + BenchMode::Test + } else if let Some(sample_size) = self.options.sample_size { + BenchMode::Collect { sample_size } + } else { + BenchMode::Tune { sample_size: 1 } + } + } + + pub fn compute_stats(&self) -> Stats { + let time_samples = &self.samples.time_samples; + let alloc_info_by_sample = &self.samples.alloc_info_by_sample; + + let sample_count = time_samples.len(); + let sample_size = self.samples.sample_size; + + let total_count = self.samples.iter_count(); + + let total_duration = self.samples.total_duration(); + let mean_duration = FineDuration { + picos: total_duration.picos.checked_div(total_count as u128).unwrap_or_default(), + }; + + // Samples sorted by duration. + let sorted_samples = self.samples.sorted_samples(); + let median_samples = util::slice_middle(&sorted_samples); + + let index_of_sample = |sample: &TimeSample| -> usize { + util::slice_ptr_index(&self.samples.time_samples, sample) + }; + + let counter_count_for_sample = + |sample: &TimeSample, counter_kind: KnownCounterKind| -> Option { + let counts = self.counters.counts(counter_kind); + + let index = if self.counters.uses_input_counts(counter_kind) { + index_of_sample(sample) + } else { + 0 + }; + + counts.get(index).copied() + }; + + let min_duration = + sorted_samples.first().map(|s| s.duration / sample_size).unwrap_or_default(); + let max_duration = + sorted_samples.last().map(|s| s.duration / sample_size).unwrap_or_default(); + + let median_duration = if median_samples.is_empty() { + FineDuration::default() + } else { + let sum: u128 = median_samples.iter().map(|s| s.duration.picos).sum(); + FineDuration { picos: sum / median_samples.len() as u128 } / sample_size + }; + + let counts = KnownCounterKind::ALL.map(|counter_kind| { + let median: MaxCountUInt = { + let mut sum: u128 = 0; + + for sample in median_samples { + let sample_count = counter_count_for_sample(sample, counter_kind)? as u128; + + // Saturating add in case `MaxUIntCount > u64`. + sum = sum.saturating_add(sample_count); + } + + (sum / median_samples.len() as u128) as MaxCountUInt + }; + + Some(StatsSet { + fastest: sorted_samples + .first() + .and_then(|s| counter_count_for_sample(s, counter_kind))?, + slowest: sorted_samples + .last() + .and_then(|s| counter_count_for_sample(s, counter_kind))?, + median, + mean: self.counters.mean_count(counter_kind), + }) + }); + + let sample_alloc_info = |sample: Option<&TimeSample>| -> Option<&ThreadAllocInfo> { + sample + .and_then(|sample| u32::try_from(index_of_sample(sample)).ok()) + .and_then(|index| self.samples.alloc_info_by_sample.get(&index)) + }; + + let sample_alloc_tally = |sample: Option<&TimeSample>, op: AllocOp| -> ThreadAllocTally { + sample_alloc_info(sample) + .map(|alloc_info| alloc_info.tallies.get(op)) + .copied() + .unwrap_or_default() + }; + + let mut alloc_total_max_count = 0u128; + let mut alloc_total_max_size = 0u128; + let mut alloc_total_tallies = TotalAllocTallyMap::default(); + + for alloc_info in alloc_info_by_sample.values() { + alloc_total_max_count += alloc_info.max_count as u128; + alloc_total_max_size += alloc_info.max_size as u128; + alloc_info.tallies.add_to_total(&mut alloc_total_tallies); + } + + let sample_size = f64::from(sample_size); + Stats { + sample_count: sample_count as u32, + iter_count: total_count, + time: StatsSet { + fastest: min_duration, + slowest: max_duration, + median: median_duration, + mean: mean_duration, + }, + max_alloc: StatsSet { + fastest: { + let alloc_info = sample_alloc_info(sorted_samples.first().copied()); + + AllocTally { + count: alloc_info.map(|info| info.max_count as f64).unwrap_or_default() + / sample_size, + size: alloc_info.map(|info| info.max_size as f64).unwrap_or_default() + / sample_size, + } + }, + slowest: { + let alloc_info = sample_alloc_info(sorted_samples.last().copied()); + + AllocTally { + count: alloc_info.map(|info| info.max_count as f64).unwrap_or_default() + / sample_size, + size: alloc_info.map(|info| info.max_size as f64).unwrap_or_default() + / sample_size, + } + }, + // TODO: Switch to median of alloc info itself, rather than + // basing off of median times. + median: { + let alloc_info_for_median = + |index| sample_alloc_info(median_samples.get(index).copied()); + + let max_count_for_median = |index: usize| -> f64 { + alloc_info_for_median(index) + .map(|info| info.max_count as f64) + .unwrap_or_default() + }; + + let max_size_for_median = |index: usize| -> f64 { + alloc_info_for_median(index) + .map(|info| info.max_size as f64) + .unwrap_or_default() + }; + + let median_count = median_samples.len().max(1) as f64; + + let median_max_count = max_count_for_median(0) + max_count_for_median(1); + let median_max_size = max_size_for_median(0) + max_size_for_median(1); + + AllocTally { + count: median_max_count / median_count / sample_size, + size: median_max_size / median_count / sample_size, + } + }, + mean: AllocTally { + count: alloc_total_max_count as f64 / total_count as f64, + size: alloc_total_max_size as f64 / total_count as f64, + }, + } + .transpose(), + alloc_tallies: AllocOpMap { + values: AllocOp::ALL + .map(|op| StatsSet { + fastest: { + let fastest = sample_alloc_tally(sorted_samples.first().copied(), op); + + AllocTally { + count: fastest.count as f64 / sample_size, + size: fastest.size as f64 / sample_size, + } + }, + slowest: { + let slowest = sample_alloc_tally(sorted_samples.last().copied(), op); + + AllocTally { + count: slowest.count as f64 / sample_size, + size: slowest.size as f64 / sample_size, + } + }, + median: { + let tally_for_median = |index: usize| -> ThreadAllocTally { + sample_alloc_tally(median_samples.get(index).copied(), op) + }; + + let a = tally_for_median(0); + let b = tally_for_median(1); + + let median_count = median_samples.len().max(1) as f64; + + let avg_count = (a.count as f64 + b.count as f64) / median_count; + let avg_size = (a.size as f64 + b.size as f64) / median_count; + + AllocTally { + count: avg_count / sample_size, + size: avg_size / sample_size, + } + }, + mean: { + let tally = alloc_total_tallies.get(op); + AllocTally { + count: tally.count as f64 / total_count as f64, + size: tally.size as f64 / total_count as f64, + } + }, + }) + .map(StatsSet::transpose), + }, + counts, + } + } +} + +impl StatsSet> { + #[inline] + pub fn transpose(self) -> AllocTally> { + AllocTally { + count: StatsSet { + fastest: self.fastest.count, + slowest: self.slowest.count, + median: self.median.count, + mean: self.mean.count, + }, + size: StatsSet { + fastest: self.fastest.size, + slowest: self.slowest.size, + median: self.median.size, + mean: self.mean.size, + }, + } + } +} diff --git a/crates/divan_compat/divan_fork/src/bench/options.rs b/crates/divan_compat/divan_fork/src/bench/options.rs new file mode 100644 index 00000000..e4f7f96b --- /dev/null +++ b/crates/divan_compat/divan_fork/src/bench/options.rs @@ -0,0 +1,85 @@ +use std::{borrow::Cow, time::Duration}; + +use crate::{counter::CounterSet, time::FineDuration}; + +/// Benchmarking options set directly by the user in `#[divan::bench]` and +/// `#[divan::bench_group]`. +/// +/// Changes to fields must be reflected in the "Options" sections of the docs +/// for `#[divan::bench]` and `#[divan::bench_group]`. +#[derive(Clone, Default)] +pub struct BenchOptions<'a> { + /// The number of sample recordings. + pub sample_count: Option, + + /// The number of iterations inside a single sample. + pub sample_size: Option, + + /// The number of threads to benchmark the sample. This is 1 by default. + /// + /// If set to 0, this will use [`std::thread::available_parallelism`]. + /// + /// We use `&'static [usize]` by leaking the input because `BenchOptions` is + /// cached on first retrieval. + pub threads: Option>, + + /// Counts the number of values processed each iteration of a benchmarked + /// function. + pub counters: CounterSet, + + /// The time floor for benchmarking a function. + pub min_time: Option, + + /// The time ceiling for benchmarking a function. + pub max_time: Option, + + /// When accounting for `min_time` or `max_time`, skip time external to + /// benchmarked functions, such as time spent generating inputs and running + /// [`Drop`]. + pub skip_ext_time: Option, + + /// Whether the benchmark should be ignored. + /// + /// This may be set within the attribute or with a separate + /// [`#[ignore]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-ignore-attribute). + pub ignore: Option, +} + +impl<'a> BenchOptions<'a> { + /// Overwrites `other` with values set in `self`. + #[must_use] + pub(crate) fn overwrite<'b>(&'b self, other: &'b Self) -> Self + where + 'b: 'a, + { + Self { + // `Copy` values: + sample_count: self.sample_count.or(other.sample_count), + sample_size: self.sample_size.or(other.sample_size), + threads: self.threads.as_deref().or(other.threads.as_deref()).map(Cow::Borrowed), + min_time: self.min_time.or(other.min_time), + max_time: self.max_time.or(other.max_time), + skip_ext_time: self.skip_ext_time.or(other.skip_ext_time), + ignore: self.ignore.or(other.ignore), + + // `Clone` values: + counters: self.counters.overwrite(&other.counters), + } + } + + /// Returns `true` if non-zero samples are specified. + #[inline] + pub(crate) fn has_samples(&self) -> bool { + self.sample_count != Some(0) && self.sample_size != Some(0) + } + + #[inline] + pub(crate) fn min_time(&self) -> FineDuration { + self.min_time.map(FineDuration::from).unwrap_or_default() + } + + #[inline] + pub(crate) fn max_time(&self) -> FineDuration { + self.max_time.map(FineDuration::from).unwrap_or(FineDuration::MAX) + } +} diff --git a/crates/divan_compat/divan_fork/src/bench/tests.rs b/crates/divan_compat/divan_fork/src/bench/tests.rs new file mode 100644 index 00000000..22f006f1 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/bench/tests.rs @@ -0,0 +1,544 @@ +//! Tests every benchmarking loop combination in `Bencher`. When run under Miri, +//! this catches memory leaks and UB in `unsafe` code. + +use std::{ + collections::HashSet, + sync::atomic::{AtomicUsize, Ordering::SeqCst}, +}; + +use util::defer; + +use super::*; +use crate::{ + config::Action, + time::{Timer, TimerKind}, +}; + +// We use a small number of runs because Miri is very slow. +const SAMPLE_COUNT: u32 = 3; + +const SAMPLE_SIZE: u32 = 2; + +// Tests `SAMPLE_COUNT` by including it in the middle and having higher numbers +// where `SAMPLE_COUNT % n != 0`. +const THREAD_COUNTS: &[usize] = if cfg!(miri) { + // Speed up Miri tests while still catching UB/memory issues. + &[1, 2] +} else { + // Exhaustively test expectations. + // + // Tests `SAMPLE_COUNT` by: + // - Including it in the middle + // - Having numbers where `SAMPLE_COUNT % n` varies + &[1, 2, 3, 4, 5, 6, 9] +}; + +#[track_caller] +fn test_bencher(test: &mut dyn FnMut(Bencher)) { + // Silence Miri about leaking threads. + let _drop_threads = defer(|| BENCH_POOL.drop_threads()); + + let bench_options = BenchOptions { + sample_count: Some(SAMPLE_COUNT), + sample_size: Some(SAMPLE_SIZE), + ..BenchOptions::default() + }; + + for timer in Timer::available() { + for action in [Action::Bench, Action::Test] { + let shared_context = SharedContext { action, timer }; + + for &thread_count in THREAD_COUNTS { + let mut bench_context = BenchContext::new( + &shared_context, + &bench_options, + NonZeroUsize::new(thread_count).unwrap(), + ); + + test(Bencher::new(&mut bench_context)); + + assert!(bench_context.did_run); + + let samples = &bench_context.samples; + + // '--test' should run the expected number of times but not + // allocate any samples. + if action.is_test() { + assert_eq!(samples.time_samples.capacity(), 0); + } + } + } + } +} + +fn make_string() -> String { + ('a'..='z').collect() +} + +/// Tests that the benchmarked function runs the expected number of times when +/// running either in benchmark or test mode. +/// +/// Tests operate over all input/output combinations of: +/// - `()` +/// - `i32` +/// - `String` +/// - Zero sized type (ZST) that implements `Drop` +/// +/// This ensures that any special handling of `size_of` or `needs_drop` does not +/// affect the number of runs. +#[allow(clippy::unused_unit)] +mod run_count { + use super::*; + + fn test(run_bench: fn(Bencher, &(dyn Fn() + Sync))) { + test_with_drop_counter(&AtomicUsize::new(usize::MAX), run_bench); + } + + fn test_with_drop_counter( + drop_count: &AtomicUsize, + run_bench: fn(Bencher, &(dyn Fn() + Sync)), + ) { + let test_drop_count = drop_count.load(SeqCst) != usize::MAX; + + let bench_count = AtomicUsize::new(0); + let test_count = AtomicUsize::new(0); + + let mut thread_counts = HashSet::::new(); + let mut timer_os = false; + let mut timer_tsc = false; + + test_bencher(&mut |bencher| { + let context = &bencher.context; + + let thread_count = context.thread_count.get(); + thread_counts.insert(thread_count as u32); + + match context.shared_context.timer.kind() { + TimerKind::Os => timer_os = true, + TimerKind::Tsc => timer_tsc = true, + } + + let is_test = context.shared_context.action.is_test(); + + let shared_run_count = if is_test { &test_count } else { &bench_count }; + let start_run_count = shared_run_count.load(SeqCst); + + run_bench(bencher, &|| { + shared_run_count.fetch_add(1, SeqCst); + }); + + let end_run_count = shared_run_count.load(SeqCst); + let run_count = end_run_count - start_run_count; + + if is_test { + assert_eq!(run_count, thread_count); + } else { + let expected_samples = match SAMPLE_COUNT as usize % thread_count { + 0 => SAMPLE_COUNT, + rem => SAMPLE_COUNT + (thread_count - rem) as u32, + }; + + let expected_iters = (expected_samples * SAMPLE_SIZE) as usize; + assert_eq!(run_count, expected_iters); + } + }); + + let thread_count = thread_counts.into_iter().sum::(); + + let timer_count = timer_os as u32 + timer_tsc as u32; + let bench_count = bench_count.into_inner() as u32; + let test_count = test_count.into_inner() as u32; + + let total_count = bench_count + test_count; + assert_ne!(total_count, 0); + + // The drop count should equal the total run count. + if test_drop_count { + assert_eq!(drop_count.load(SeqCst), total_count as usize); + } + + assert_eq!(test_count, timer_count * thread_count); + } + + #[test] + fn bench() { + struct DroppedZst; + + static ZST_DROP_COUNT: AtomicUsize = AtomicUsize::new(0); + + impl Drop for DroppedZst { + fn drop(&mut self) { + ZST_DROP_COUNT.fetch_add(1, SeqCst); + } + } + + // `()` out. + test(|b, f| b.bench(f)); + + // `i32` out. + test(|b, f| { + b.bench(|| -> i32 { + f(); + 100i32 + }) + }); + + // `String` out. + test(|b, f| { + b.bench(|| -> String { + f(); + make_string() + }) + }); + + // `DroppedZst` out. + test_with_drop_counter(&ZST_DROP_COUNT, |b, f| { + b.bench(|| -> DroppedZst { + f(); + DroppedZst + }) + }); + } + + #[test] + fn bench_values() { + struct DroppedZst; + + static ZST_DROP_COUNT: AtomicUsize = AtomicUsize::new(0); + + impl Drop for DroppedZst { + fn drop(&mut self) { + ZST_DROP_COUNT.fetch_add(1, SeqCst); + } + } + + let test_zst_drop = |run_bench| { + ZST_DROP_COUNT.store(0, SeqCst); + test_with_drop_counter(&ZST_DROP_COUNT, run_bench); + }; + + // `()` in, `()` out. + test(|b, f| b.with_inputs(|| ()).bench_values(|_: ()| -> () { f() })); + + // `()` in, `i32` out. + test(|b, f| { + b.with_inputs(|| ()).bench_values(|_: ()| -> i32 { + f(); + 100i32 + }) + }); + + // `()` in, `String` out. + test(|b, f| { + b.with_inputs(|| ()).bench_values(|_: ()| -> String { + f(); + make_string() + }) + }); + + // `()` in, `DroppedZst` out. + test_zst_drop(|b, f| { + b.with_inputs(|| ()).bench_values(|_: ()| -> DroppedZst { + f(); + DroppedZst + }) + }); + + // `i32` in, `()` out. + test(|b, f| b.with_inputs(|| 100i32).bench_values(|_: i32| -> () { f() })); + + // `i32` in, `i32` out. + test(|b, f| { + b.with_inputs(|| 100i32).bench_values(|value: i32| -> i32 { + f(); + value + }) + }); + + // `i32` in, `String` out. + test(|b, f| { + b.with_inputs(|| 100i32).bench_values(|_: i32| -> String { + f(); + make_string() + }) + }); + + // `i32` in, `DroppedZst` out. + test_zst_drop(|b, f| { + b.with_inputs(|| 100i32).bench_values(|_: i32| -> DroppedZst { + f(); + DroppedZst + }) + }); + + // `String` in, `()` out. + test(|b, f| b.with_inputs(make_string).bench_values(|_: String| -> () { f() })); + + // `String` in, `i32` out. + test(|b, f| { + b.with_inputs(make_string).bench_values(|_: String| -> i32 { + f(); + 100i32 + }) + }); + + // `String` in, `String` out. + test(|b, f| { + b.with_inputs(make_string).bench_values(|value: String| -> String { + f(); + value + }) + }); + + // `String` in, `DroppedZst` out. + test_zst_drop(|b, f| { + b.with_inputs(make_string).bench_values(|_: String| -> DroppedZst { + f(); + DroppedZst + }) + }); + + // `DroppedZst` in, `()` out. + test_zst_drop(|b, f| { + b.with_inputs(|| DroppedZst).bench_values(|_: DroppedZst| -> () { f() }) + }); + + // `DroppedZst` in, `i32` out. + test_zst_drop(|b, f| { + b.with_inputs(|| DroppedZst).bench_values(|_: DroppedZst| -> i32 { + f(); + 100i32 + }) + }); + + // `DroppedZst` in, `String` out. + test_zst_drop(|b, f| { + b.with_inputs(|| DroppedZst).bench_values(|_: DroppedZst| -> String { + f(); + make_string() + }) + }); + + // `DroppedZst` in, `DroppedZst` out. + test_zst_drop(|b, f| { + b.with_inputs(|| DroppedZst).bench_values(|value: DroppedZst| -> DroppedZst { + f(); + value + }) + }); + } + + #[test] + fn bench_refs() { + struct DroppedZst; + + static ZST_DROP_COUNT: AtomicUsize = AtomicUsize::new(0); + + impl Drop for DroppedZst { + fn drop(&mut self) { + ZST_DROP_COUNT.fetch_add(1, SeqCst); + } + } + + let test_zst_drop = |run_bench| { + ZST_DROP_COUNT.store(0, SeqCst); + test_with_drop_counter(&ZST_DROP_COUNT, run_bench); + }; + + // `&mut ()` in, `()` out. + test(|b, f| b.with_inputs(|| ()).bench_refs(|_: &mut ()| -> () { f() })); + + // `&mut ()` in, `i32` out. + test(|b, f| { + b.with_inputs(|| ()).bench_refs(|_: &mut ()| -> i32 { + f(); + 100i32 + }) + }); + + // `&mut ()` in, `String` out. + test(|b, f| { + b.with_inputs(|| ()).bench_refs(|_: &mut ()| -> String { + f(); + make_string() + }) + }); + + // `&mut ()` in, `DroppedZst` out. + test_zst_drop(|b, f| { + b.with_inputs(|| ()).bench_refs(|_: &mut ()| -> DroppedZst { + f(); + DroppedZst + }) + }); + + // `&mut i32` in, `()` out. + test(|b, f| b.with_inputs(|| 100i32).bench_refs(|_: &mut i32| -> () { f() })); + + // `&mut i32` in, `i32` out. + test(|b, f| { + b.with_inputs(|| 100i32).bench_refs(|value: &mut i32| -> i32 { + f(); + *value + }) + }); + + // `&mut i32` in, `String` out. + test(|b, f| { + b.with_inputs(|| 100i32).bench_refs(|_: &mut i32| -> String { + f(); + make_string() + }) + }); + + // `&mut i32` in, `DroppedZst` out. + test_zst_drop(|b, f| { + b.with_inputs(|| 100i32).bench_refs(|_: &mut i32| -> DroppedZst { + f(); + DroppedZst + }) + }); + + // `&mut String` in, `()` out. + test(|b, f| b.with_inputs(make_string).bench_refs(|_: &mut String| -> () { f() })); + + // `&mut String` in, `i32` out. + test(|b, f| { + b.with_inputs(make_string).bench_refs(|_: &mut String| -> i32 { + f(); + 100i32 + }) + }); + + // `&mut String` in, `String` out. + test(|b, f| { + b.with_inputs(make_string).bench_refs(|value: &mut String| -> String { + f(); + value.clone() + }) + }); + + // `&mut String` in, `DroppedZst` out. + test_zst_drop(|b, f| { + b.with_inputs(make_string).bench_refs(|_: &mut String| -> DroppedZst { + f(); + DroppedZst + }) + }); + + // `&mut DroppedZst` in, `()` out. + test_zst_drop(|b, f| { + b.with_inputs(|| DroppedZst).bench_refs(|_: &mut DroppedZst| -> () { f() }) + }); + + // `&mut DroppedZst` in, `i32` out. + test_zst_drop(|b, f| { + b.with_inputs(|| DroppedZst).bench_refs(|_: &mut DroppedZst| -> i32 { + f(); + 100i32 + }) + }); + + // `&mut DroppedZst` in, `String` out. + test_zst_drop(|b, f| { + b.with_inputs(|| DroppedZst).bench_refs(|_: &mut DroppedZst| -> String { + f(); + make_string() + }) + }); + + // `&mut DroppedZst` in, `DroppedZst` out. + test_zst_drop(|b, f| { + b.with_inputs(|| { + // Adjust counter for input ZST. + ZST_DROP_COUNT.fetch_sub(1, SeqCst); + + DroppedZst + }) + .bench_refs(|_: &mut DroppedZst| -> DroppedZst { + f(); + DroppedZst + }) + }); + } +} + +mod no_input { + use super::*; + + #[test] + fn string_output() { + test_bencher(&mut |b| b.bench(make_string)); + } + + #[test] + fn no_output() { + test_bencher(&mut |b| b.bench(|| black_box_drop(make_string()))); + } +} + +mod string_input { + use super::*; + + #[test] + fn string_output() { + test_bencher(&mut |b| b.with_inputs(make_string).bench_values(|s| s.to_ascii_uppercase())); + } + + #[test] + fn no_output() { + test_bencher(&mut |b| b.with_inputs(make_string).bench_refs(|s| s.make_ascii_uppercase())); + } +} + +mod zst_input { + use super::*; + + #[test] + fn zst_output() { + struct DroppedZst; + + // Each test has its own `ZST_COUNT` global because tests are run + // independently in parallel. + static ZST_COUNT: AtomicUsize = AtomicUsize::new(0); + + impl Drop for DroppedZst { + fn drop(&mut self) { + ZST_COUNT.fetch_sub(1, SeqCst); + } + } + + test_bencher(&mut |b| { + b.with_inputs(|| { + ZST_COUNT.fetch_add(1, SeqCst); + DroppedZst + }) + .bench_values(black_box); + }); + + assert_eq!(ZST_COUNT.load(SeqCst), 0); + } + + #[test] + fn no_output() { + struct DroppedZst; + + static ZST_COUNT: AtomicUsize = AtomicUsize::new(0); + + impl Drop for DroppedZst { + fn drop(&mut self) { + ZST_COUNT.fetch_sub(1, SeqCst); + } + } + + test_bencher(&mut |b| { + b.with_inputs(|| { + ZST_COUNT.fetch_add(1, SeqCst); + DroppedZst + }) + .bench_values(drop); + }); + + assert_eq!(ZST_COUNT.load(SeqCst), 0); + } +} diff --git a/crates/divan_compat/divan_fork/src/cli.rs b/crates/divan_compat/divan_fork/src/cli.rs new file mode 100644 index 00000000..263e6eaf --- /dev/null +++ b/crates/divan_compat/divan_fork/src/cli.rs @@ -0,0 +1,197 @@ +use clap::{builder::PossibleValue, value_parser, Arg, ArgAction, ColorChoice, Command, ValueEnum}; + +use crate::{ + config::{ParsedSeconds, SortingAttr}, + counter::MaxCountUInt, + time::TimerKind, +}; + +pub(crate) fn command() -> Command { + fn option(name: &'static str) -> Arg { + Arg::new(name).long(name) + } + + fn flag(name: &'static str) -> Arg { + option(name).action(ArgAction::SetTrue) + } + + fn ignored_flag(name: &'static str) -> Arg { + flag(name).hide(true) + } + + // Custom arguments not supported by libtest: + // - bytes-format + // - sample-count + // - sample-size + // - timer + // - sort + // - sortr + + // TODO: `--format ` + + Command::new("divan") + .arg( + Arg::new("filter") + .value_name("FILTER") + .help("Only run benchmarks whose names match this pattern") + .action(ArgAction::Append), + ) + .arg( + flag("test") + .help("Run benchmarks once to ensure they run successfully") + .conflicts_with("list"), + ) + .arg(flag("list").help("Lists benchmarks").conflicts_with("test")) + .arg( + option("color") + .value_name("WHEN") + .help("Controls when to use colors") + .value_parser(value_parser!(ColorChoice)) + ) + .arg( + option("skip") + .value_name("FILTER") + .help("Skip benchmarks whose names match this pattern") + .action(ArgAction::Append), + ) + .arg(flag("exact").help("Filter benchmarks by exact name rather than by pattern")) + .arg(flag("ignored").help("Run only ignored benchmarks").conflicts_with("include-ignored")) + .arg( + flag("include-ignored") + .help("Run ignored and not-ignored benchmarks") + .conflicts_with("ignored"), + ) + .arg( + option("sort") + .env("DIVAN_SORT") + .value_name("ATTRIBUTE") + .help("Sort benchmarks in ascending order") + .value_parser(value_parser!(SortingAttr)) + ) + .arg( + option("sortr") + .env("DIVAN_SORTR") + .value_name("ATTRIBUTE") + .help("Sort benchmarks in descending order") + .value_parser(value_parser!(SortingAttr)) + .overrides_with("sort"), + ) + .arg( + option("timer") + .env("DIVAN_TIMER") + .value_name("os|tsc") + .help("Set the timer used for measuring samples") + .value_parser(value_parser!(TimerKind)), + ) + .arg( + option("sample-count") + .env("DIVAN_SAMPLE_COUNT") + .value_name("N") + .help("Set the number of sampling iterations") + .value_parser(value_parser!(u32)), + ) + .arg( + option("sample-size") + .env("DIVAN_SAMPLE_SIZE") + .value_name("N") + .help("Set the number of iterations inside a single sample") + .value_parser(value_parser!(u32)), + ) + .arg( + option("threads") + .env("DIVAN_THREADS") + .value_name("N") + .value_delimiter(',') + .action(ArgAction::Append) + .help("Run across multiple threads to measure contention on atomics and locks") + .value_parser(value_parser!(usize)), + ) + .arg( + option("min-time") + .env("DIVAN_MIN_TIME") + .value_name("SECS") + .help("Set the minimum seconds spent benchmarking a single function") + .value_parser(value_parser!(ParsedSeconds)), + ) + .arg( + option("max-time") + .env("DIVAN_MAX_TIME") + .value_name("SECS") + .help("Set the maximum seconds spent benchmarking a single function, with priority over '--min-time'") + .value_parser(value_parser!(ParsedSeconds)), + ) + .arg( + option("skip-ext-time") + .env("DIVAN_SKIP_EXT_TIME") + .value_name("true|false") + .help("When '--min-time' or '--max-time' is set, skip time external to benchmarked functions") + .value_parser(value_parser!(bool)) + .num_args(0..=1), + ) + .arg( + option("items-count") + .env("DIVAN_ITEMS_COUNT") + .value_name("N") + .help("Set every benchmark to have a throughput of N items") + .value_parser(value_parser!(MaxCountUInt)), + ) + .arg( + option("bytes-count") + .env("DIVAN_BYTES_COUNT") + .value_name("N") + .help("Set every benchmark to have a throughput of N bytes") + .value_parser(value_parser!(MaxCountUInt)), + ) + .arg( + option("bytes-format") + .env("DIVAN_BYTES_FORMAT") + .help("Set the numerical base for bytes in output") + .value_name("decimal|binary") + .value_parser(value_parser!(crate::counter::PrivBytesFormat)) + ) + .arg( + option("chars-count") + .env("DIVAN_CHARS_COUNT") + .value_name("N") + .help("Set every benchmark to have a throughput of N string scalars") + .value_parser(value_parser!(MaxCountUInt)), + ) + .arg( + option("cycles-count") + .env("DIVAN_CYCLES_COUNT") + .value_name("N") + .help("Set every benchmark to have a throughput of N cycles, displayed as Hertz") + .value_parser(value_parser!(MaxCountUInt)), + ) + // ignored: + .args([ignored_flag("bench"), ignored_flag("nocapture"), ignored_flag("show-output")]) +} + +impl ValueEnum for TimerKind { + fn value_variants<'a>() -> &'a [Self] { + &[Self::Os, Self::Tsc] + } + + fn to_possible_value(&self) -> Option { + let name = match self { + Self::Os => "os", + Self::Tsc => "tsc", + }; + Some(PossibleValue::new(name)) + } +} + +impl ValueEnum for SortingAttr { + fn value_variants<'a>() -> &'a [Self] { + &[Self::Kind, Self::Name, Self::Location] + } + + fn to_possible_value(&self) -> Option { + let name = match self { + Self::Kind => "kind", + Self::Name => "name", + Self::Location => "location", + }; + Some(PossibleValue::new(name)) + } +} diff --git a/crates/divan_compat/divan_fork/src/compile_fail.rs b/crates/divan_compat/divan_fork/src/compile_fail.rs new file mode 100644 index 00000000..8a86b7fe --- /dev/null +++ b/crates/divan_compat/divan_fork/src/compile_fail.rs @@ -0,0 +1,36 @@ +//! Private compile failure tests. +//! +//! # Repeated Options +//! +//! Options repeated in `#[divan::bench]` should cause a compile error, even if +//! they use raw identifiers. The initial implementation allowed raw identifiers +//! to slip through because `syn::Ident` does not consider them to be equal to +//! the normal form without the `r#` prefix. +//! +//! We don't include `r#crate` here because it's not a valid identifier. +//! +//! ```compile_fail +//! #[divan::bench(name = "x", r#name = "x")] +//! fn bench() {} +//! ``` +//! +//! ```compile_fail +//! #[divan::bench(sample_count = 1, r#sample_count = 1)] +//! fn bench() {} +//! ``` +//! +//! ```compile_fail +//! #[divan::bench(sample_size = 1, r#sample_size = 1)] +//! fn bench() {} +//! ``` +//! +//! # Type Checking +//! +//! The following won't produce any benchmarks because `types = []`. However, we +//! still want to ensure that values in `consts = [...]` match the generic +//! const's type of `i32`. +//! +//! ```compile_fail +//! #[divan::bench(types = [], consts = ['a', 'b', 'c'])] +//! fn bench() {} +//! ``` diff --git a/crates/divan_compat/divan_fork/src/config.rs b/crates/divan_compat/divan_fork/src/config.rs new file mode 100644 index 00000000..1c0daae0 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/config.rs @@ -0,0 +1,186 @@ +use std::{cmp::Ordering, error::Error, str::FromStr, time::Duration}; + +use regex::Regex; + +use crate::util::sort::natural_cmp; + +/// `Duration` wrapper for parsing seconds from the CLI. +#[derive(Clone, Copy)] +pub(crate) struct ParsedSeconds(pub Duration); + +impl FromStr for ParsedSeconds { + type Err = Box; + + fn from_str(s: &str) -> Result { + Ok(Self(Duration::try_from_secs_f64(f64::from_str(s)?)?)) + } +} + +/// The primary action to perform. +#[derive(Clone, Copy, Default)] +pub(crate) enum Action { + /// Run benchmark loops. + #[default] + Bench, + + /// Run benchmarked functions once to ensure they run successfully. + Test, + + /// List benchmarks. + List, +} + +#[allow(dead_code)] +impl Action { + #[inline] + pub fn is_bench(&self) -> bool { + matches!(self, Self::Bench) + } + + #[inline] + pub fn is_test(&self) -> bool { + matches!(self, Self::Test) + } + + #[inline] + pub fn is_list(&self) -> bool { + matches!(self, Self::List) + } +} + +/// Filters which benchmark to run based on name. +pub(crate) enum Filter { + Regex(Regex), + Exact(String), +} + +impl Filter { + /// Returns `true` if a string matches this filter. + pub fn is_match(&self, s: &str) -> bool { + match self { + Self::Regex(r) => r.is_match(s), + Self::Exact(e) => e == s, + } + } +} + +/// How to treat benchmarks based on whether they're marked as `#[ignore]`. +#[derive(Copy, Clone, Default)] +pub(crate) enum RunIgnored { + /// Skip ignored. + #[default] + No, + + /// `--include-ignored`. + Yes, + + /// `--ignored`. + Only, +} + +impl RunIgnored { + pub fn run_ignored(self) -> bool { + matches!(self, Self::Yes | Self::Only) + } + + pub fn run_non_ignored(self) -> bool { + matches!(self, Self::Yes | Self::No) + } + + pub fn should_run(self, ignored: bool) -> bool { + if ignored { + self.run_ignored() + } else { + self.run_non_ignored() + } + } +} + +/// The attribute to sort benchmarks by. +#[derive(Clone, Copy, Default)] +pub(crate) enum SortingAttr { + /// Sort by kind, then by name and location. + #[default] + Kind, + + /// Sort by name, then by location and kind. + Name, + + /// Sort by location, then by kind and name. + Location, +} + +impl SortingAttr { + /// Returns an array containing `self` along with other attributes that + /// should break ties if attributes are equal. + pub fn with_tie_breakers(self) -> [Self; 3] { + use SortingAttr::*; + + match self { + Kind => [self, Name, Location], + Name => [self, Location, Kind], + Location => [self, Kind, Name], + } + } + + /// Compares benchmark runtime argument names. + /// + /// This takes `&&str` to handle `SortingAttr::Location` since the strings + /// are considered to be within the same `&[&str]`. + pub fn cmp_bench_arg_names(self, a: &&str, b: &&str) -> Ordering { + for attr in self.with_tie_breakers() { + let ordering = match attr { + SortingAttr::Kind => Ordering::Equal, + + SortingAttr::Name => 'ordering: { + // Compare as integers. + match (a.parse::(), a.parse::()) { + (Ok(a_u128), Ok(b_u128)) => break 'ordering a_u128.cmp(&b_u128), + + (Ok(_), Err(_)) => { + if b.parse::().is_ok() { + // a > b, because b is negative. + break 'ordering Ordering::Greater; + } + } + + (Err(_), Ok(_)) => { + if a.parse::().is_ok() { + // a < b, because a is negative. + break 'ordering Ordering::Less; + } + } + + (Err(_), Err(_)) => { + if let (Ok(a_i128), Ok(b_i128)) = (a.parse::(), a.parse::()) + { + break 'ordering a_i128.cmp(&b_i128); + } + } + } + + // Compare as floats. + if let (Ok(a), Ok(b)) = (a.parse::(), b.parse::()) { + if let Some(ordering) = a.partial_cmp(&b) { + break 'ordering ordering; + } + } + + natural_cmp(a, b) + } + + SortingAttr::Location => { + let a: *const &str = a; + let b: *const &str = b; + a.cmp(&b) + } + }; + + if ordering != Ordering::Equal { + return ordering; + } + } + + Ordering::Equal + } +} diff --git a/crates/divan_compat/divan_fork/src/counter/any_counter.rs b/crates/divan_compat/divan_fork/src/counter/any_counter.rs new file mode 100644 index 00000000..57f60dce --- /dev/null +++ b/crates/divan_compat/divan_fork/src/counter/any_counter.rs @@ -0,0 +1,233 @@ +use std::any::TypeId; + +use crate::{ + counter::{ + BytesCount, BytesFormat, CharsCount, CyclesCount, IntoCounter, ItemsCount, MaxCountUInt, + }, + time::FineDuration, + util::{fmt::DisplayThroughput, ty::TypeCast}, +}; + +/// Type-erased `Counter`. +/// +/// This does not implement `Copy` because in the future it will contain +/// user-defined counters. +#[derive(Clone)] +pub(crate) struct AnyCounter { + pub kind: KnownCounterKind, + count: MaxCountUInt, +} + +impl AnyCounter { + #[inline] + pub(crate) fn new(counter: C) -> Self { + let counter = counter.into_counter(); + + if let Some(bytes) = counter.cast_ref::() { + Self::bytes(bytes.count) + } else if let Some(chars) = counter.cast_ref::() { + Self::chars(chars.count) + } else if let Some(cycles) = counter.cast_ref::() { + Self::cycles(cycles.count) + } else if let Some(items) = counter.cast_ref::() { + Self::items(items.count) + } else { + unreachable!() + } + } + + #[inline] + pub(crate) fn known(kind: KnownCounterKind, count: MaxCountUInt) -> Self { + Self { kind, count } + } + + #[inline] + pub(crate) fn bytes(count: MaxCountUInt) -> Self { + Self::known(KnownCounterKind::Bytes, count) + } + + #[inline] + pub(crate) fn chars(count: MaxCountUInt) -> Self { + Self::known(KnownCounterKind::Chars, count) + } + + #[inline] + pub(crate) fn cycles(count: MaxCountUInt) -> Self { + Self::known(KnownCounterKind::Cycles, count) + } + + #[inline] + pub(crate) fn items(count: MaxCountUInt) -> Self { + Self::known(KnownCounterKind::Items, count) + } + + pub(crate) fn display_throughput( + &self, + duration: FineDuration, + bytes_format: BytesFormat, + ) -> DisplayThroughput { + DisplayThroughput { counter: self, picos: duration.picos as f64, bytes_format } + } + + #[inline] + pub(crate) fn count(&self) -> MaxCountUInt { + self.count + } + + #[inline] + pub(crate) fn known_kind(&self) -> KnownCounterKind { + self.kind + } +} + +/// Kind of `Counter` defined by this crate. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub(crate) enum KnownCounterKind { + Bytes, + Chars, + Cycles, + Items, +} + +impl KnownCounterKind { + pub const COUNT: usize = 4; + + pub const ALL: [Self; Self::COUNT] = [Self::Bytes, Self::Chars, Self::Cycles, Self::Items]; + + /// The maximum width for columns displaying counters. + pub const MAX_COMMON_COLUMN_WIDTH: usize = "1.111 Kitem/s".len(); + + #[inline] + pub fn of() -> Self { + let id = TypeId::of::(); + if id == TypeId::of::() { + Self::Bytes + } else if id == TypeId::of::() { + Self::Chars + } else if id == TypeId::of::() { + Self::Cycles + } else if id == TypeId::of::() { + Self::Items + } else { + unreachable!() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn known_counter_kind() { + macro_rules! test { + ($t:ident, $k:ident) => { + assert_eq!(KnownCounterKind::of::<$t>(), KnownCounterKind::$k); + }; + } + + test!(BytesCount, Bytes); + test!(CharsCount, Chars); + test!(CyclesCount, Cycles); + test!(ItemsCount, Items); + } + + mod display_throughput { + use super::*; + + #[test] + fn bytes() { + #[track_caller] + fn test( + bytes: MaxCountUInt, + picos: u128, + expected_binary: &str, + expected_decimal: &str, + ) { + for (bytes_format, expected) in [ + (BytesFormat::Binary, expected_binary), + (BytesFormat::Decimal, expected_decimal), + ] { + assert_eq!( + AnyCounter::bytes(bytes) + .display_throughput(FineDuration { picos }, bytes_format) + .to_string(), + expected + ); + } + } + + #[track_caller] + fn test_all(bytes: MaxCountUInt, picos: u128, expected: &str) { + test(bytes, picos, expected, expected); + } + + test_all(1, 0, "inf B/s"); + test_all(MaxCountUInt::MAX, 0, "inf B/s"); + + test_all(0, 0, "0 B/s"); + test_all(0, 1, "0 B/s"); + test_all(0, u128::MAX, "0 B/s"); + } + + #[test] + fn chars() { + #[track_caller] + fn test(chars: MaxCountUInt, picos: u128, expected: &str) { + assert_eq!( + AnyCounter::chars(chars) + .display_throughput(FineDuration { picos }, BytesFormat::default()) + .to_string(), + expected + ); + } + + test(1, 0, "inf char/s"); + test(MaxCountUInt::MAX, 0, "inf char/s"); + + test(0, 0, "0 char/s"); + test(0, 1, "0 char/s"); + test(0, u128::MAX, "0 char/s"); + } + + #[test] + fn cycles() { + #[track_caller] + fn test(cycles: MaxCountUInt, picos: u128, expected: &str) { + assert_eq!( + AnyCounter::cycles(cycles) + .display_throughput(FineDuration { picos }, BytesFormat::default()) + .to_string(), + expected + ); + } + + test(1, 0, "inf Hz"); + test(MaxCountUInt::MAX, 0, "inf Hz"); + + test(0, 0, "0 Hz"); + test(0, 1, "0 Hz"); + test(0, u128::MAX, "0 Hz"); + } + + #[test] + fn items() { + #[track_caller] + fn test(items: MaxCountUInt, picos: u128, expected: &str) { + assert_eq!( + AnyCounter::items(items) + .display_throughput(FineDuration { picos }, BytesFormat::default()) + .to_string(), + expected + ); + } + + test(1, 0, "inf item/s"); + test(MaxCountUInt::MAX, 0, "inf item/s"); + + test(0, 0, "0 item/s"); + test(0, 1, "0 item/s"); + test(0, u128::MAX, "0 item/s"); + } + } +} diff --git a/crates/divan_compat/divan_fork/src/counter/collection.rs b/crates/divan_compat/divan_fork/src/counter/collection.rs new file mode 100644 index 00000000..73dcd4bc --- /dev/null +++ b/crates/divan_compat/divan_fork/src/counter/collection.rs @@ -0,0 +1,146 @@ +use crate::counter::{AnyCounter, IntoCounter, KnownCounterKind, MaxCountUInt}; + +/// Multi-map from counters to their counts and input-based initializer. +#[derive(Default)] +pub(crate) struct CounterCollection { + info: [KnownCounterInfo; KnownCounterKind::COUNT], +} + +#[derive(Default)] +struct KnownCounterInfo { + // TODO: Inlinable vector. + counts: Vec, + + /// `BencherConfig::with_inputs` can only be called once, so the input type + /// cannot change. + count_input: Option MaxCountUInt + Sync>>, +} + +impl CounterCollection { + #[inline] + fn info(&self, counter_kind: KnownCounterKind) -> &KnownCounterInfo { + &self.info[counter_kind as usize] + } + + #[inline] + fn info_mut(&mut self, counter_kind: KnownCounterKind) -> &mut KnownCounterInfo { + &mut self.info[counter_kind as usize] + } + + #[inline] + pub(crate) fn counts(&self, counter_kind: KnownCounterKind) -> &[MaxCountUInt] { + &self.info(counter_kind).counts + } + + pub(crate) fn mean_count(&self, counter_kind: KnownCounterKind) -> MaxCountUInt { + let counts = self.counts(counter_kind); + + let sum: u128 = counts.iter().map(|&c| c as u128).sum(); + + (sum / counts.len() as u128) as MaxCountUInt + } + + #[inline] + pub(crate) fn uses_input_counts(&self, counter_kind: KnownCounterKind) -> bool { + self.info(counter_kind).count_input.is_some() + } + + pub(crate) fn set_counter(&mut self, counter: AnyCounter) { + let new_count = counter.count(); + let info = self.info_mut(counter.known_kind()); + + if let Some(old_count) = info.counts.first_mut() { + *old_count = new_count; + } else { + info.counts.push(new_count); + } + } + + pub(crate) fn push_counter(&mut self, counter: AnyCounter) { + self.info_mut(counter.known_kind()).counts.push(counter.count()); + } + + /// Set the input-based count generator function for a counter. + pub(crate) fn set_input_counter(&mut self, make_counter: F) + where + F: Fn(&I) -> C + Sync + 'static, + C: IntoCounter, + { + let info = self.info_mut(KnownCounterKind::of::()); + + // Ignore previously-set counts. + info.counts.clear(); + + info.count_input = Some(Box::new(move |input: *const ()| { + // SAFETY: Callers to `get_input_count` guarantee that the same `&I` + // is passed. + let counter = unsafe { make_counter(&*input.cast::()) }; + + AnyCounter::new(counter).count() + })); + } + + /// Calls the user-provided closure to get the counter count for a given + /// input. + /// + /// # Safety + /// + /// The `I` type must be the same as that used by `set_input_counter`. + pub(crate) unsafe fn get_input_count( + &self, + counter_kind: KnownCounterKind, + input: &I, + ) -> Option { + let from_input = self.info(counter_kind).count_input.as_ref()?; + + // SAFETY: The caller ensures that this is called on the same input type + // used for calling `set_input_counter`. + Some(unsafe { from_input(input as *const I as *const ()) }) + } + + /// Removes counts that came from input. + pub(crate) fn clear_input_counts(&mut self) { + for info in &mut self.info { + if info.count_input.is_some() { + info.counts.clear(); + } + } + } +} + +/// A set of known and (future) custom counters. +#[derive(Clone, Debug, Default)] +pub struct CounterSet { + counts: [Option; KnownCounterKind::COUNT], +} + +impl CounterSet { + pub fn with(mut self, counter: impl IntoCounter) -> Self { + self.insert(counter); + self + } + + pub fn insert(&mut self, counter: impl IntoCounter) -> &mut Self { + let counter = AnyCounter::new(counter); + self.counts[counter.known_kind() as usize] = Some(counter.count()); + self + } + + pub(crate) fn get(&self, counter_kind: KnownCounterKind) -> Option { + self.counts[counter_kind as usize] + } + + /// Overwrites `other` with values set in `self`. + pub(crate) fn overwrite(&self, other: &Self) -> Self { + Self { counts: KnownCounterKind::ALL.map(|kind| self.get(kind).or(other.get(kind))) } + } + + pub(crate) fn to_collection(&self) -> CounterCollection { + CounterCollection { + info: KnownCounterKind::ALL.map(|kind| KnownCounterInfo { + counts: self.get(kind).into_iter().collect(), + count_input: None, + }), + } + } +} diff --git a/crates/divan_compat/divan_fork/src/counter/into_counter.rs b/crates/divan_compat/divan_fork/src/counter/into_counter.rs new file mode 100644 index 00000000..45a09da9 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/counter/into_counter.rs @@ -0,0 +1,38 @@ +use crate::counter::Counter; + +/// Conversion into a [`Counter`]. +/// +/// # Examples +/// +/// This trait is implemented for unsigned integers over +/// [`ItemsCount`](crate::counter::ItemsCount): +/// +/// ``` +/// #[divan::bench] +/// fn sort_values(bencher: divan::Bencher) { +/// # type T = String; +/// let mut values: Vec = // ... +/// # Vec::new(); +/// bencher +/// .counter(values.len()) +/// .bench_local(|| { +/// divan::black_box(&mut values).sort(); +/// }); +/// } +/// ``` +pub trait IntoCounter { + /// Which kind of counter are we turning this into? + type Counter: Counter; + + /// Converts into a [`Counter`]. + fn into_counter(self) -> Self::Counter; +} + +impl IntoCounter for C { + type Counter = C; + + #[inline] + fn into_counter(self) -> Self::Counter { + self + } +} diff --git a/crates/divan_compat/divan_fork/src/counter/mod.rs b/crates/divan_compat/divan_fork/src/counter/mod.rs new file mode 100644 index 00000000..900c9e27 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/counter/mod.rs @@ -0,0 +1,303 @@ +//! Count values processed in each iteration to measure throughput. +//! +//! # Examples +//! +//! The following example measures throughput of converting +//! [`&[i32]`](prim@slice) into [`Vec`](Vec) by providing [`BytesCount`] +//! via [`Bencher::counter`](crate::Bencher::counter): +//! +//! ``` +//! use divan::counter::BytesCount; +//! +//! #[divan::bench] +//! fn slice_into_vec(bencher: divan::Bencher) { +//! let ints: &[i32] = &[ +//! // ... +//! ]; +//! +//! let bytes = BytesCount::of_slice(ints); +//! +//! bencher +//! .counter(bytes) +//! .bench(|| -> Vec { +//! divan::black_box(ints).into() +//! }); +//! } +//! ``` + +use std::any::Any; + +mod any_counter; +mod collection; +mod into_counter; +mod sealed; +mod uint; + +pub(crate) use self::{ + any_counter::{AnyCounter, KnownCounterKind}, + collection::{CounterCollection, CounterSet}, + sealed::Sealed, + uint::{AsCountUInt, CountUInt, MaxCountUInt}, +}; +pub use into_counter::IntoCounter; + +/// Counts the number of values processed in each iteration of a benchmarked +/// function. +/// +/// This is used via: +/// - [`#[divan::bench(counters = ...)]`](macro@crate::bench#counters) +/// - [`#[divan::bench_group(counters = ...)]`](macro@crate::bench_group#counters) +/// - [`Bencher::counter`](crate::Bencher::counter) +/// - [`Bencher::input_counter`](crate::Bencher::input_counter) +#[doc(alias = "throughput")] +pub trait Counter: Sized + Any + Sealed {} + +/// Process N bytes. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct BytesCount { + count: MaxCountUInt, +} + +/// Process N [`char`s](char). +/// +/// This is beneficial when comparing benchmarks between ASCII and Unicode +/// implementations, since the number of code points is a common baseline +/// reference. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct CharsCount { + count: MaxCountUInt, +} + +/// Process N cycles, displayed as Hertz. +/// +/// This value is user-provided and does not necessarily correspond to the CPU's +/// cycle frequency, so it may represent cycles of anything appropriate for the +/// benchmarking context. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct CyclesCount { + count: MaxCountUInt, +} + +/// Process N items. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct ItemsCount { + count: MaxCountUInt, +} + +impl Sealed for BytesCount {} +impl Sealed for CharsCount {} +impl Sealed for CyclesCount {} +impl Sealed for ItemsCount {} + +impl Counter for BytesCount {} +impl Counter for CharsCount {} +impl Counter for CyclesCount {} +impl Counter for ItemsCount {} + +impl From for BytesCount { + #[inline] + fn from(count: C) -> Self { + Self::new(count.as_max_uint()) + } +} + +impl From for CharsCount { + #[inline] + fn from(count: C) -> Self { + Self::new(count.as_max_uint()) + } +} + +impl From for CyclesCount { + #[inline] + fn from(count: C) -> Self { + Self::new(count.as_max_uint()) + } +} + +impl From for ItemsCount { + #[inline] + fn from(count: C) -> Self { + Self::new(count.as_max_uint()) + } +} + +impl BytesCount { + /// Count N bytes. + #[inline] + pub fn new(count: N) -> Self { + Self { count: count.into_max_uint() } + } + + /// Counts the size of a type with [`size_of`]. + #[inline] + #[doc(alias = "size_of")] + pub const fn of() -> Self { + Self { count: size_of::() as MaxCountUInt } + } + + /// Counts the size of multiple instances of a type with [`size_of`]. + #[inline] + #[doc(alias = "size_of")] + pub const fn of_many(n: usize) -> Self { + match (size_of::() as MaxCountUInt).checked_mul(n as MaxCountUInt) { + Some(count) => Self { count }, + None => panic!("overflow"), + } + } + + /// Counts the size of a value with [`size_of_val`]. + #[inline] + #[doc(alias = "size_of_val")] + pub fn of_val(val: &T) -> Self { + // TODO: Make const, https://github.com/rust-lang/rust/issues/46571 + Self { count: size_of_val(val) as MaxCountUInt } + } + + /// Counts the bytes of [`Iterator::Item`s](Iterator::Item). + #[inline] + pub fn of_iter(iter: I) -> Self + where + I: IntoIterator, + { + Self::of_many::(iter.into_iter().count()) + } + + /// Counts the bytes of a [`&str`]. + /// + /// This is like [`BytesCount::of_val`] with the convenience of behaving as + /// expected for [`&String`](String) and other types that convert to + /// [`&str`]. + /// + /// [`&str`]: prim@str + #[inline] + pub fn of_str>(s: &S) -> Self { + Self::of_val(s.as_ref()) + } + + /// Counts the bytes of a [slice](prim@slice). + /// + /// This is like [`BytesCount::of_val`] with the convenience of behaving as + /// expected for [`&Vec`](Vec) and other types that convert to + /// [`&[T]`](prim@slice). + #[inline] + pub fn of_slice>(s: &S) -> Self { + Self::of_val(s.as_ref()) + } +} + +macro_rules! type_bytes { + ($ty:ident) => { + /// Counts the bytes of multiple + #[doc = concat!("[`", stringify!($ty), "`s](", stringify!($ty), ").")] + #[inline] + pub const fn $ty(n: usize) -> Self { + Self::of_many::<$ty>(n) + } + }; +} + +/// Count bytes of multiple values. +impl BytesCount { + type_bytes!(f32); + type_bytes!(f64); + + type_bytes!(i8); + type_bytes!(u8); + type_bytes!(i16); + type_bytes!(u16); + type_bytes!(i32); + type_bytes!(u32); + type_bytes!(i64); + type_bytes!(u64); + type_bytes!(i128); + type_bytes!(u128); + type_bytes!(isize); + type_bytes!(usize); +} + +impl CharsCount { + /// Count N [`char`s](char). + #[inline] + pub fn new(count: N) -> Self { + Self { count: count.into_max_uint() } + } + + /// Counts the [`char`s](prim@char) of a [`&str`](prim@str). + #[inline] + pub fn of_str>(s: &S) -> Self { + Self::new(s.as_ref().chars().count()) + } +} + +impl CyclesCount { + /// Count N cycles. + #[inline] + pub fn new(count: N) -> Self { + Self { count: count.into_max_uint() } + } +} + +impl ItemsCount { + /// Count N items. + #[inline] + pub fn new(count: N) -> Self { + Self { count: count.into_max_uint() } + } + + /// Counts [`Iterator::Item`s](Iterator::Item). + #[inline] + pub fn of_iter(iter: I) -> Self + where + I: IntoIterator, + { + Self::new(iter.into_iter().count()) + } +} + +/// The numerical base for [`BytesCount`] in benchmark outputs. +/// +/// See [`Divan::bytes_format`](crate::Divan::bytes_format) for more info. +#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord)] +#[non_exhaustive] +pub enum BytesFormat { + /// Powers of 1000, starting with KB (kilobyte). This is the default. + #[default] + Decimal, + + /// Powers of 1024, starting with KiB (kibibyte). + Binary, +} + +/// Private `BytesFormat` that prevents leaking trait implementations we don't +/// want to publicly commit to. +#[derive(Clone, Copy)] +pub(crate) struct PrivBytesFormat(pub BytesFormat); + +impl clap::ValueEnum for PrivBytesFormat { + fn value_variants<'a>() -> &'a [Self] { + &[Self(BytesFormat::Decimal), Self(BytesFormat::Binary)] + } + + fn to_possible_value(&self) -> Option { + let name = match self.0 { + BytesFormat::Decimal => "decimal", + BytesFormat::Binary => "binary", + }; + Some(clap::builder::PossibleValue::new(name)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + mod bytes_count { + use super::*; + + #[test] + fn of_iter() { + assert_eq!(BytesCount::of_iter::([1, 2, 3]), BytesCount::of_slice(&[1, 2, 3])); + } + } +} diff --git a/crates/divan_compat/divan_fork/src/counter/sealed.rs b/crates/divan_compat/divan_fork/src/counter/sealed.rs new file mode 100644 index 00000000..fe4cc31f --- /dev/null +++ b/crates/divan_compat/divan_fork/src/counter/sealed.rs @@ -0,0 +1,5 @@ +/// Prevents `Counter` from being implemented externally. +/// +/// Items exist on this trait rather than `Counter` so that they are impossible +/// to access externally. +pub trait Sealed {} diff --git a/crates/divan_compat/divan_fork/src/counter/uint.rs b/crates/divan_compat/divan_fork/src/counter/uint.rs new file mode 100644 index 00000000..2c5770d2 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/counter/uint.rs @@ -0,0 +1,62 @@ +use std::any::Any; + +use crate::counter::{IntoCounter, ItemsCount}; + +/// The largest unsigned integer usable by counters provided by this crate. +/// +/// If `usize > u64`, this is a type alias to `usize`. Otherwise, it is a type +/// alias to `u64`. +pub type MaxCountUInt = condtype::num::Usize64; + +/// `u8`-`u64` and `usize`. +/// +/// We deliberately do not implement this trait for `u128` to make it +/// impossible† to overflow `u128` when summing counts for averaging. +/// +/// †When `usize` is larger than `u64`, it becomes possible to overflow `u128`. +/// In this case, Divan assumes +pub trait CountUInt: Copy + Any { + fn into_max_uint(self) -> MaxCountUInt; +} + +/// A type like `CountUInt` but with more options. +pub trait AsCountUInt { + fn as_max_uint(&self) -> MaxCountUInt; +} + +impl AsCountUInt for &T { + #[inline] + fn as_max_uint(&self) -> MaxCountUInt { + T::as_max_uint(self) + } +} + +macro_rules! impl_uint { + ($($i:ty),+) => { + $(impl CountUInt for $i { + #[inline] + fn into_max_uint(self) -> MaxCountUInt { + self as _ + } + })+ + + $(impl AsCountUInt for $i { + #[inline] + fn as_max_uint(&self) -> MaxCountUInt { + *self as _ + } + })+ + + $(impl IntoCounter for $i { + type Counter = ItemsCount; + + #[inline] + fn into_counter(self) -> ItemsCount { + ItemsCount::new(self) + } + })+ + }; +} + +// These types must be losslessly convertible to `MaxCountUInt`. +impl_uint!(u8, u16, u32, u64, usize); diff --git a/crates/divan_compat/divan_fork/src/divan.rs b/crates/divan_compat/divan_fork/src/divan.rs new file mode 100644 index 00000000..c6007b20 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/divan.rs @@ -0,0 +1,768 @@ +#![allow(clippy::too_many_arguments)] + +use std::{borrow::Cow, cell::RefCell, fmt, num::NonZeroUsize, time::Duration}; + +use clap::ColorChoice; +use regex::Regex; + +use crate::{ + bench::BenchOptions, + config::{Action, Filter, ParsedSeconds, RunIgnored, SortingAttr}, + counter::{ + BytesCount, BytesFormat, CharsCount, CyclesCount, IntoCounter, ItemsCount, MaxCountUInt, + PrivBytesFormat, + }, + entry::{AnyBenchEntry, BenchEntryRunner, EntryTree}, + thread_pool::BENCH_POOL, + time::{Timer, TimerKind}, + tree_painter::{TreeColumn, TreePainter}, + util::{self, defer}, + Bencher, +}; + +/// The benchmark runner. +#[derive(Default)] +pub struct Divan { + action: Action, + timer: TimerKind, + reverse_sort: bool, + sorting_attr: SortingAttr, + color: ColorChoice, + bytes_format: BytesFormat, + filters: Vec, + skip_filters: Vec, + run_ignored: RunIgnored, + bench_options: BenchOptions<'static>, +} + +/// Immutable context shared between entry runs. +pub(crate) struct SharedContext { + /// The specific action being performed. + pub action: Action, + + /// The timer used to measure samples. + pub timer: Timer, +} + +impl fmt::Debug for Divan { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Divan").finish_non_exhaustive() + } +} + +impl Divan { + /// Perform the configured action. + /// + /// By default, this will be [`Divan::run_benches`]. + pub fn main(&self) { + self.run_action(self.action); + } + + /// Benchmark registered functions. + pub fn run_benches(&self) { + self.run_action(Action::Bench); + } + + /// Test registered functions as if the `--test` flag was used. + /// + /// Unlike [`Divan::run_benches`], this runs each benchmarked function only + /// once. + pub fn test_benches(&self) { + self.run_action(Action::Test); + } + + /// Print registered functions as if the `--list` flag was used. + pub fn list_benches(&self) { + self.run_action(Action::Test); + } + + /// Returns `true` if an entry at the given path should be considered for + /// running. + /// + /// This does not take into account `entry.ignored` because that is handled + /// separately. + fn filter(&self, entry_path: &str) -> bool { + if !self.filters.is_empty() + && !self.filters.iter().any(|filter| filter.is_match(entry_path)) + { + return false; + } + + !self.skip_filters.iter().any(|filter| filter.is_match(entry_path)) + } + + pub(crate) fn should_ignore(&self, ignored: bool) -> bool { + !self.run_ignored.should_run(ignored) + } + + pub(crate) fn run_action(&self, action: Action) { + let _drop_threads = defer(|| BENCH_POOL.drop_threads()); + + let mut tree: Vec = if cfg!(miri) { + // Miri does not work with our linker tricks. + Vec::new() + } else { + let group_entries = &crate::entry::GROUP_ENTRIES; + + let generic_bench_entries = group_entries + .iter() + .flat_map(|group| group.generic_benches_iter().map(AnyBenchEntry::GenericBench)); + + let bench_entries = crate::entry::BENCH_ENTRIES + .iter() + .map(AnyBenchEntry::Bench) + .chain(generic_bench_entries); + + let mut tree = EntryTree::from_benches(bench_entries); + + for group in group_entries.iter() { + EntryTree::insert_group(&mut tree, group); + } + + tree + }; + + // Filter after inserting groups so that we can properly use groups' + // display names. + EntryTree::retain(&mut tree, |entry_path| self.filter(entry_path)); + + // Quick exit without doing unnecessary work. + if tree.is_empty() { + return; + } + + // Sorting is after filtering to compare fewer elements. + EntryTree::sort_by_attr(&mut tree, self.sorting_attr, self.reverse_sort); + + let timer = match self.timer { + TimerKind::Os => Timer::Os, + + TimerKind::Tsc => { + match Timer::get_tsc() { + Ok(tsc) => tsc, + Err(error) => { + eprintln!("warning: CPU timestamp counter is unavailable ({error}), defaulting to OS"); + Timer::Os + } + } + } + }; + + if action.is_bench() { + eprintln!("Timer precision: {}", timer.precision()); + } + + let shared_context = SharedContext { action, timer }; + + let column_widths = if action.is_bench() { + TreeColumn::ALL.map(|column| { + if column.is_last() { + // The last column doesn't use padding. + 0 + } else { + EntryTree::common_column_width(&tree, column) + } + }) + } else { + [0; TreeColumn::COUNT] + }; + + let tree_painter = + RefCell::new(TreePainter::new(EntryTree::max_name_span(&tree, 0), column_widths)); + + self.run_tree(action, &tree, &shared_context, None, &tree_painter); + } + + fn run_tree( + &self, + action: Action, + tree: &[EntryTree], + shared_context: &SharedContext, + parent_options: Option<&BenchOptions>, + tree_painter: &RefCell, + ) { + for (i, child) in tree.iter().enumerate() { + let is_last = i == tree.len() - 1; + + let name = child.display_name(); + + let child_options = child.bench_options(); + + // Overwrite `parent_options` with `child_options` if applicable. + let options: BenchOptions; + let options: Option<&BenchOptions> = match (parent_options, child_options) { + (None, None) => None, + (Some(options), None) | (None, Some(options)) => Some(options), + (Some(parent_options), Some(child_options)) => { + options = child_options.overwrite(parent_options); + Some(&options) + } + }; + + match child { + EntryTree::Leaf { entry, args } => self.run_bench_entry( + action, + *entry, + args.as_deref(), + shared_context, + options, + tree_painter, + is_last, + ), + EntryTree::Parent { children, .. } => { + tree_painter.borrow_mut().start_parent(name, is_last); + + self.run_tree(action, children, shared_context, options, tree_painter); + + tree_painter.borrow_mut().finish_parent(); + } + } + } + } + + fn run_bench_entry( + &self, + action: Action, + bench_entry: AnyBenchEntry, + bench_arg_names: Option<&[&&str]>, + shared_context: &SharedContext, + entry_options: Option<&BenchOptions>, + tree_painter: &RefCell, + is_last_entry: bool, + ) { + use crate::bench::BenchContext; + + let entry_display_name = bench_entry.display_name(); + + // User runtime options override all other options. + let options: BenchOptions; + let options: &BenchOptions = match entry_options { + None => &self.bench_options, + Some(entry_options) => { + options = self.bench_options.overwrite(entry_options); + &options + } + }; + + if self.should_ignore(options.ignore.unwrap_or_default()) { + tree_painter.borrow_mut().ignore_leaf(entry_display_name, is_last_entry); + return; + } + + // Paint empty leaf when simply listing. + if action.is_list() { + let mut tree_painter = tree_painter.borrow_mut(); + tree_painter.start_leaf(entry_display_name, is_last_entry); + tree_painter.finish_empty_leaf(); + return; + } + + let mut thread_counts: Vec = options + .threads + .as_deref() + .unwrap_or_default() + .iter() + .map(|&n| match NonZeroUsize::new(n) { + Some(n) => n, + None => crate::util::known_parallelism(), + }) + .collect(); + + thread_counts.sort_unstable(); + thread_counts.dedup(); + + let thread_counts: &[NonZeroUsize] = + if thread_counts.is_empty() { &[NonZeroUsize::MIN] } else { &thread_counts }; + + // Whether we should emit child branches for thread counts. + let has_thread_branches = thread_counts.len() > 1; + + let run_bench = |bench_display_name: &str, + is_last_bench: bool, + with_bencher: &dyn Fn(Bencher)| { + if has_thread_branches { + tree_painter.borrow_mut().start_parent(bench_display_name, is_last_bench); + } else { + tree_painter.borrow_mut().start_leaf(bench_display_name, is_last_bench); + } + + for (i, &thread_count) in thread_counts.iter().enumerate() { + let is_last_thread_count = + if has_thread_branches { i == thread_counts.len() - 1 } else { is_last_bench }; + + if has_thread_branches { + tree_painter + .borrow_mut() + .start_leaf(&format!("t={thread_count}"), is_last_thread_count); + } + + let mut bench_context = BenchContext::new(shared_context, options, thread_count); + with_bencher(Bencher::new(&mut bench_context)); + + if !bench_context.did_run { + eprintln!( + "warning: No benchmark function registered for '{bench_display_name}'" + ); + } + + let should_compute_stats = + bench_context.did_run && shared_context.action.is_bench(); + + if should_compute_stats { + let stats = bench_context.compute_stats(); + { + let name = bench_entry.display_name().to_string(); + let file = bench_entry.meta().location.file; + let mut module_path = bench_entry + .meta() + .module_path_components() + .skip(1) + .collect::>() + .join("::"); + if !module_path.is_empty() { + module_path.push_str("::"); + } + let uri = format!("{file}::{module_path}{name}"); + let iter_per_round = bench_context.samples.sample_size; + let times_ns: Vec<_> = bench_context + .samples + .time_samples + .iter() + .map(|s| s.duration.picos / 1_000) + .collect(); + let max_time_ns = options.max_time.map(|t| t.as_nanos()); + ::codspeed::walltime::collect_raw_walltime_results( + "divan", + name, + uri, + iter_per_round, + max_time_ns, + times_ns, + ); + }; + tree_painter.borrow_mut().finish_leaf( + is_last_thread_count, + &stats, + self.bytes_format, + ); + } else { + tree_painter.borrow_mut().finish_empty_leaf(); + } + } + + if has_thread_branches { + tree_painter.borrow_mut().finish_parent(); + } + }; + + match bench_entry.bench_runner() { + BenchEntryRunner::Plain(bench) => run_bench(entry_display_name, is_last_entry, bench), + + BenchEntryRunner::Args(bench_runner) => { + tree_painter.borrow_mut().start_parent(entry_display_name, is_last_entry); + + let bench_runner = bench_runner(); + let orig_arg_names = bench_runner.arg_names(); + let bench_arg_names = bench_arg_names.unwrap_or_default(); + + for (i, &arg_name) in bench_arg_names.iter().enumerate() { + let is_last_arg = i == bench_arg_names.len() - 1; + let arg_index = util::slice_ptr_index(orig_arg_names, arg_name); + + run_bench(arg_name, is_last_arg, &|bencher| { + bench_runner.bench(bencher, arg_index); + }); + } + + tree_painter.borrow_mut().finish_parent(); + } + } + } +} + +/// Makes `Divan::skip_regex` input polymorphic. +pub trait SkipRegex { + fn skip_regex(self, divan: &mut Divan); +} + +impl SkipRegex for Regex { + fn skip_regex(self, divan: &mut Divan) { + divan.skip_filters.push(Filter::Regex(self)); + } +} + +impl SkipRegex for &str { + #[track_caller] + fn skip_regex(self, divan: &mut Divan) { + Regex::new(self).unwrap().skip_regex(divan); + } +} + +impl SkipRegex for String { + #[track_caller] + fn skip_regex(self, divan: &mut Divan) { + self.as_str().skip_regex(divan) + } +} + +/// Configuration options. +impl Divan { + /// Creates an instance with options set by parsing CLI arguments. + pub fn from_args() -> Self { + Self::default().config_with_args() + } + + /// Sets options by parsing CLI arguments. + /// + /// This may override any previously-set options. + #[must_use] + pub fn config_with_args(mut self) -> Self { + let mut command = crate::cli::command(); + + let matches = command.get_matches_mut(); + let is_exact = matches.get_flag("exact"); + + let mut parse_filter = |filter: &String| { + if is_exact { + Filter::Exact(filter.to_owned()) + } else { + match Regex::new(filter) { + Ok(r) => Filter::Regex(r), + Err(error) => { + let kind = clap::error::ErrorKind::ValueValidation; + command.error(kind, error).exit(); + } + } + } + }; + + if let Some(filters) = matches.get_many::("filter") { + self.filters.extend(filters.map(&mut parse_filter)); + } + + if let Some(skip_filters) = matches.get_many::("skip") { + self.skip_filters.extend(skip_filters.map(&mut parse_filter)); + } + + self.action = if matches.get_flag("list") { + Action::List + } else if matches.get_flag("test") || !matches.get_flag("bench") { + // Either of: + // `cargo bench -- --test` + // `cargo test --benches` + Action::Test + } else { + Action::Bench + }; + + if let Some(&color) = matches.get_one("color") { + self.color = color; + } + + if matches.get_flag("ignored") { + self.run_ignored = RunIgnored::Only; + } else if matches.get_flag("include-ignored") { + self.run_ignored = RunIgnored::Yes; + } + + if let Some(&timer) = matches.get_one("timer") { + self.timer = timer; + } + + if let Some(&sorting_attr) = matches.get_one("sortr") { + self.reverse_sort = true; + self.sorting_attr = sorting_attr; + } else if let Some(&sorting_attr) = matches.get_one("sort") { + self.reverse_sort = false; + self.sorting_attr = sorting_attr; + } + + if let Some(&sample_count) = matches.get_one("sample-count") { + self.bench_options.sample_count = Some(sample_count); + } + + if let Some(&sample_size) = matches.get_one("sample-size") { + self.bench_options.sample_size = Some(sample_size); + } + + if let Some(thread_counts) = matches.get_many::("threads") { + let mut threads: Vec = thread_counts.copied().collect(); + threads.sort_unstable(); + threads.dedup(); + self.bench_options.threads = Some(Cow::Owned(threads)); + } + + if let Some(&ParsedSeconds(min_time)) = matches.get_one("min-time") { + self.bench_options.min_time = Some(min_time); + } + + if let Some(&ParsedSeconds(max_time)) = matches.get_one("max-time") { + self.bench_options.max_time = Some(max_time); + } + + if let Some(mut skip_ext_time) = matches.get_many::("skip-ext-time") { + // If the option is present without a value, then it's `true`. + self.bench_options.skip_ext_time = + Some(matches!(skip_ext_time.next(), Some(true) | None)); + } + + if let Some(&count) = matches.get_one::("items-count") { + self.counter_mut(ItemsCount::new(count)); + } + + if let Some(&count) = matches.get_one::("bytes-count") { + self.counter_mut(BytesCount::new(count)); + } + + if let Some(&PrivBytesFormat(bytes_format)) = matches.get_one("bytes-format") { + self.bytes_format = bytes_format; + } + + if let Some(&count) = matches.get_one::("chars-count") { + self.counter_mut(CharsCount::new(count)); + } + + if let Some(&count) = matches.get_one::("cycles-count") { + self.counter_mut(CyclesCount::new(count)); + } + + self + } + + /// Sets whether output should be colored. + /// + /// This option is equivalent to the `--color` CLI argument, where [`None`] + /// here means "auto". + #[must_use] + pub fn color(mut self, yes: impl Into>) -> Self { + self.color = match yes.into() { + None => ColorChoice::Auto, + Some(true) => ColorChoice::Always, + Some(false) => ColorChoice::Never, + }; + self + } + + /// Also run benchmarks marked [`#[ignore]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-ignore-attribute). + /// + /// This option is equivalent to the `--include-ignored` CLI argument. + #[must_use] + pub fn run_ignored(mut self) -> Self { + self.run_ignored = RunIgnored::Yes; + self + } + + /// Only run benchmarks marked [`#[ignore]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-ignore-attribute). + /// + /// This option is equivalent to the `--ignored` CLI argument. + #[must_use] + pub fn run_only_ignored(mut self) -> Self { + self.run_ignored = RunIgnored::Only; + self + } + + /// Skips benchmarks that match `filter` as a regular expression pattern. + /// + /// This option is equivalent to the `--skip filter` CLI argument, without + /// `--exact`. + /// + /// # Examples + /// + /// This method is commonly used with a [`&str`](prim@str) or [`String`]: + /// + /// ``` + /// # use divan::Divan; + /// let filter = "(add|sub)"; + /// let divan = Divan::default().skip_regex(filter); + /// ``` + /// + /// A pre-built [`Regex`] can also be provided: + /// + /// ``` + /// # use divan::Divan; + /// let filter = regex::Regex::new("(add|sub)").unwrap(); + /// let divan = Divan::default().skip_regex(filter); + /// ``` + /// + /// Calling this repeatedly will add multiple skip filters: + /// + /// ``` + /// # use divan::Divan; + /// let divan = Divan::default() + /// .skip_regex("(add|sub)") + /// .skip_regex("collections.*default"); + /// ``` + /// + /// # Panics + /// + /// Panics if `filter` is a string and [`Regex::new`] fails. + #[must_use] + pub fn skip_regex(mut self, filter: impl SkipRegex) -> Self { + filter.skip_regex(&mut self); + self + } + + /// Skips benchmarks that exactly match `filter`. + /// + /// This option is equivalent to the `--skip filter --exact` CLI arguments. + /// + /// # Examples + /// + /// This method is commonly used with a [`&str`](prim@str) or [`String`]: + /// + /// ``` + /// # use divan::Divan; + /// let filter = "arithmetic::add"; + /// let divan = Divan::default().skip_exact(filter); + /// ``` + /// + /// Calling this repeatedly will add multiple skip filters: + /// + /// ``` + /// # use divan::Divan; + /// let divan = Divan::default() + /// .skip_exact("arithmetic::add") + /// .skip_exact("collections::vec::default"); + /// ``` + #[must_use] + pub fn skip_exact(mut self, filter: impl Into) -> Self { + self.skip_filters.push(Filter::Exact(filter.into())); + self + } + + /// Sets the number of sampling iterations. + /// + /// This option is equivalent to the `--sample-count` CLI argument. + /// + /// If a benchmark enables [`threads`](macro@crate::bench#threads), sample + /// count becomes a multiple of the number of threads. This is because each + /// thread operates over the same sample size to ensure there are always N + /// competing threads doing the same amount of work. + #[inline] + pub fn sample_count(mut self, count: u32) -> Self { + self.bench_options.sample_count = Some(count); + self + } + + /// Sets the number of iterations inside a single sample. + /// + /// This option is equivalent to the `--sample-size` CLI argument. + #[inline] + pub fn sample_size(mut self, count: u32) -> Self { + self.bench_options.sample_size = Some(count); + self + } + + /// Run across multiple threads. + /// + /// This enables you to measure contention on [atomics and + /// locks](std::sync). A value of 0 indicates [available + /// parallelism](std::thread::available_parallelism). + /// + /// This option is equivalent to the `--threads` CLI argument or + /// `DIVAN_THREADS` environment variable. + #[inline] + pub fn threads(mut self, threads: T) -> Self + where + T: IntoIterator, + { + self.bench_options.threads = { + let mut threads: Vec = threads.into_iter().collect(); + threads.sort_unstable(); + threads.dedup(); + Some(Cow::Owned(threads)) + }; + self + } + + /// Sets the time floor for benchmarking a function. + /// + /// This option is equivalent to the `--min-time` CLI argument. + #[inline] + pub fn min_time(mut self, time: Duration) -> Self { + self.bench_options.min_time = Some(time); + self + } + + /// Sets the time ceiling for benchmarking a function. + /// + /// This option is equivalent to the `--max-time` CLI argument. + #[inline] + pub fn max_time(mut self, time: Duration) -> Self { + self.bench_options.max_time = Some(time); + self + } + + /// When accounting for `min_time` or `max_time`, skip time external to + /// benchmarked functions. + /// + /// This option is equivalent to the `--skip-ext-time` CLI argument. + #[inline] + pub fn skip_ext_time(mut self, skip: bool) -> Self { + self.bench_options.skip_ext_time = Some(skip); + self + } +} + +/// Use [`Counter`s](crate::counter::Counter) to get throughput across all +/// benchmarks. +impl Divan { + #[inline] + fn counter_mut(&mut self, counter: C) -> &mut Self { + self.bench_options.counters.insert(counter); + self + } + + /// Counts the number of values processed. + #[inline] + pub fn counter(mut self, counter: C) -> Self { + self.counter_mut(counter); + self + } + + /// Sets the number of items processed. + /// + /// This option is equivalent to the `--items-count` CLI argument or + /// `DIVAN_ITEMS_COUNT` environment variable. + #[inline] + pub fn items_count>(self, count: C) -> Self { + self.counter(count.into()) + } + + /// Sets the number of bytes processed. + /// + /// This option is equivalent to the `--bytes-count` CLI argument or + /// `DIVAN_BYTES_COUNT` environment variable. + #[inline] + pub fn bytes_count>(self, count: C) -> Self { + self.counter(count.into()) + } + + /// Determines how [`BytesCount`] is scaled in benchmark outputs. + /// + /// This option is equivalent to the `--bytes-format` CLI argument or + /// `DIVAN_BYTES_FORMAT` environment variable. + #[inline] + pub fn bytes_format(mut self, format: BytesFormat) -> Self { + self.bytes_format = format; + self + } + + /// Sets the number of bytes processed. + /// + /// This option is equivalent to the `--chars-count` CLI argument or + /// `DIVAN_CHARS_COUNT` environment variable. + #[inline] + pub fn chars_count>(self, count: C) -> Self { + self.counter(count.into()) + } + + /// Sets the number of cycles processed, displayed as Hertz. + /// + /// This option is equivalent to the `--cycles-count` CLI argument or + /// `DIVAN_CYCLES_COUNT` environment variable. + #[inline] + pub fn cycles_count>(self, count: C) -> Self { + self.counter(count.into()) + } +} diff --git a/crates/divan_compat/divan_fork/src/entry/generic.rs b/crates/divan_compat/divan_fork/src/entry/generic.rs new file mode 100644 index 00000000..75cc5a2a --- /dev/null +++ b/crates/divan_compat/divan_fork/src/entry/generic.rs @@ -0,0 +1,180 @@ +use std::{ + any::{Any, TypeId}, + cmp::Ordering, + mem::ManuallyDrop, + sync::OnceLock, +}; + +use crate::{ + entry::{BenchEntryRunner, GroupEntry}, + util::sort::natural_cmp, +}; + +/// Compile-time entry for a generic benchmark function, generated by +/// `#[divan::bench]`. +/// +/// Unlike `BenchEntry`, this is for a specific generic type or `const`. +/// +/// Although this type contains trivially-`Copy` data, it *should not* implement +/// `Clone` because the memory address of each instance is used to determine the +/// relative order in `GroupEntry.generic_benches` when sorting benchmarks by +/// location. +pub struct GenericBenchEntry { + /// The associated group, for entry metadata. + pub group: &'static GroupEntry, + + /// The benchmarking function. + pub bench: BenchEntryRunner, + + /// A generic type. + pub ty: Option, + + /// A `const` value and associated data. + pub const_value: Option, +} + +impl GenericBenchEntry { + pub(crate) fn raw_name(&self) -> &str { + match (&self.ty, &self.const_value) { + (_, Some(const_value)) => const_value.name(), + (Some(ty), None) => ty.raw_name(), + (None, None) => unreachable!(), + } + } + + pub(crate) fn display_name(&self) -> &str { + match (&self.ty, &self.const_value) { + (_, Some(const_value)) => const_value.name(), + (Some(ty), None) => ty.display_name(), + (None, None) => unreachable!(), + } + } + + pub(crate) fn path_components(&self) -> impl Iterator { + let module_path = self.group.meta.module_path_components(); + + // Generic benchmarks consider their group's raw name to be the path + // component after the module path. + let group_component = self.group.meta.raw_name; + + // If this is a generic const benchmark with generic types, the generic + // types are considered to be the parent of the const values. + let type_component = if self.const_value.is_some() { + // FIXME: Switch back to `raw_name` once we have a way to insert + // this `display_name` into `EntryTree::Parent`. The current + // approach allows different types with the same name to become the + // same `EntryTree::Parent`. + self.ty.as_ref().map(|ty| ty.display_name()) + } else { + None + }; + + module_path.chain(Some(group_component)).chain(type_component) + } +} + +/// Generic type instantiation. +pub struct EntryType { + /// [`std::any::type_name`]. + get_type_name: fn() -> &'static str, + + /// [`std::any::TypeId::of`]. + #[allow(dead_code)] + get_type_id: fn() -> TypeId, +} + +impl EntryType { + /// Creates an instance for the given type. + pub const fn new() -> Self { + Self { get_type_name: std::any::type_name::, get_type_id: TypeId::of:: } + } + + pub(crate) fn raw_name(&self) -> &'static str { + (self.get_type_name)() + } + + pub(crate) fn display_name(&self) -> &'static str { + let mut type_name = self.raw_name(); + + // Remove module components in type name. + while let Some((prev, next)) = type_name.split_once("::") { + // Do not go past generic type boundary. + if prev.contains('<') { + break; + } + type_name = next; + } + + type_name + } +} + +/// A reference to a `const` as a `&'static T`. +pub struct EntryConst { + /// `&'static T`. + value: *const (), + + /// [`PartialOrd::partial_cmp`]. + partial_cmp: unsafe fn(*const (), *const ()) -> Option, + + /// [`ToString::to_string`]. + to_string: unsafe fn(*const ()) -> String, + + /// Cached `to_string` result. + cached_string: ManuallyDrop>, +} + +// SAFETY: `T: Send + Sync`. +unsafe impl Send for EntryConst {} +unsafe impl Sync for EntryConst {} + +impl EntryConst { + /// Creates entry data for a `const` values. + pub const fn new(value: &'static T) -> Self + where + T: PartialOrd + ToString + Send + Sync, + { + unsafe fn partial_cmp(a: *const (), b: *const ()) -> Option { + T::partial_cmp(&*a.cast(), &*b.cast()) + } + + unsafe fn to_string(value: *const ()) -> String { + T::to_string(&*value.cast()) + } + + Self { + value: value as *const T as *const (), + partial_cmp: partial_cmp::, + to_string: to_string::, + cached_string: ManuallyDrop::new(OnceLock::new()), + } + } + + /// Returns [`PartialOrd::partial_cmp`] ordering if `<` or `>, falling back + /// to comparing [`ToString::to_string`] otherwise. + pub(crate) fn cmp_name(&self, other: &Self) -> Ordering { + if self.partial_cmp == other.partial_cmp { + // SAFETY: Both constants have the same comparison function, so they + // must be the same type. + if let Some(ordering) = unsafe { (self.partial_cmp)(self.value, other.value) } { + if !ordering.is_eq() { + return ordering; + } + } + } + + // Fallback to name comparison. + natural_cmp(self.name(), other.name()) + } + + /// [`ToString::to_string`]. + #[inline] + pub(crate) fn name(&self) -> &str { + self.cached_string.get_or_init(|| { + // SAFETY: The function is guaranteed to call `T::to_string`. + let string = unsafe { (self.to_string)(self.value) }; + + Box::leak(string.into_boxed_str()) + }) + } +} diff --git a/crates/divan_compat/divan_fork/src/entry/list.rs b/crates/divan_compat/divan_fork/src/entry/list.rs new file mode 100644 index 00000000..5ad06bd8 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/entry/list.rs @@ -0,0 +1,79 @@ +use std::{ + ptr, + sync::atomic::{AtomicPtr, Ordering as AtomicOrdering}, +}; + +/// Linked list of entries. +/// +/// This is implemented in a thread-safe way despite the fact that constructors +/// are run single-threaded. +pub struct EntryList { + entry: Option<&'static T>, + next: AtomicPtr, +} + +impl EntryList { + pub(crate) const fn root() -> Self { + Self { entry: None, next: AtomicPtr::new(ptr::null_mut()) } + } + + /// Dereferences the `next` pointer. + #[inline] + fn next(&self) -> Option<&Self> { + // SAFETY: `next` is only assigned by `push`, which always receives a + // 'static lifetime. + unsafe { self.next.load(AtomicOrdering::Relaxed).as_ref() } + } +} + +// Externally used by macros or tests. +#[allow(missing_docs)] +impl EntryList { + #[inline] + pub const fn new(entry: &'static T) -> Self { + Self { entry: Some(entry), next: AtomicPtr::new(ptr::null_mut()) } + } + + /// Creates an iterator over entries in `self`. + #[inline] + pub fn iter(&self) -> impl Iterator { + let mut list = Some(self); + std::iter::from_fn(move || -> Option> { + let current = list?; + list = current.next(); + Some(current.entry.as_ref().copied()) + }) + .flatten() + } + + /// Inserts `other` to the front of the list. + /// + /// # Safety + /// + /// This function must be safe to call before `main`. + #[inline] + pub fn push(&'static self, other: &'static Self) { + let mut old_next = self.next.load(AtomicOrdering::Relaxed); + loop { + // Each publicly-created instance has `list.next` be null, so we can + // simply store `self.next` there. + other.next.store(old_next, AtomicOrdering::Release); + + // SAFETY: The content of `other` can already be seen, so we don't + // need to strongly order reads into it. + let other = other as *const Self as *mut Self; + match self.next.compare_exchange_weak( + old_next, + other, + AtomicOrdering::AcqRel, + AtomicOrdering::Acquire, + ) { + // Successfully wrote our thread's value to the list. + Ok(_) => return, + + // Lost the race, store winner's value in `other.next`. + Err(new) => old_next = new, + } + } + } +} diff --git a/crates/divan_compat/divan_fork/src/entry/meta.rs b/crates/divan_compat/divan_fork/src/entry/meta.rs new file mode 100644 index 00000000..be75c855 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/entry/meta.rs @@ -0,0 +1,44 @@ +use std::sync::LazyLock; + +use crate::bench::BenchOptions; + +/// Metadata common to `#[divan::bench]` and `#[divan::bench_group]`. +pub struct EntryMeta { + /// The entry's display name. + pub display_name: &'static str, + + /// The entry's original name. + /// + /// This is used to find a `GroupEntry` for a `BenchEntry`. + pub raw_name: &'static str, + + /// The entry's raw `module_path!()`. + pub module_path: &'static str, + + /// Where the entry was defined. + pub location: EntryLocation, + + /// Configures the benchmarker via attribute options. + pub bench_options: Option>>, +} + +/// Where an entry is located. +#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord)] +#[allow(missing_docs)] +pub struct EntryLocation { + pub file: &'static str, + pub line: u32, + pub col: u32, +} + +impl EntryMeta { + #[inline] + pub(crate) fn bench_options(&self) -> Option<&BenchOptions> { + self.bench_options.as_deref() + } + + #[inline] + pub(crate) fn module_path_components<'a>(&self) -> impl Iterator { + self.module_path.split("::") + } +} diff --git a/crates/divan_compat/divan_fork/src/entry/mod.rs b/crates/divan_compat/divan_fork/src/entry/mod.rs new file mode 100644 index 00000000..2070f63f --- /dev/null +++ b/crates/divan_compat/divan_fork/src/entry/mod.rs @@ -0,0 +1,126 @@ +use std::ptr::NonNull; + +use crate::{bench::BenchArgsRunner, Bencher}; + +mod generic; +mod list; +mod meta; +mod tree; + +pub use self::{ + generic::{EntryConst, EntryType, GenericBenchEntry}, + list::EntryList, + meta::{EntryLocation, EntryMeta}, +}; +pub(crate) use tree::EntryTree; + +/// Benchmark entries generated by `#[divan::bench]`. +/// +/// Note: generic-type benchmark entries are instead stored in `GROUP_ENTRIES` +/// in `generic_benches`. +pub static BENCH_ENTRIES: EntryList = EntryList::root(); + +/// Group entries generated by `#[divan::bench_group]`. +pub static GROUP_ENTRIES: EntryList = EntryList::root(); + +/// Determines how the benchmark entry is run. +#[derive(Clone, Copy)] +pub enum BenchEntryRunner { + /// Benchmark without arguments. + Plain(fn(Bencher)), + + /// Benchmark with runtime arguments. + Args(fn() -> BenchArgsRunner), +} + +/// Compile-time entry for a benchmark, generated by `#[divan::bench]`. +pub struct BenchEntry { + /// Entry metadata. + pub meta: EntryMeta, + + /// The benchmarking function. + pub bench: BenchEntryRunner, +} + +/// Compile-time entry for a benchmark group, generated by +/// `#[divan::bench_group]` or a generic-type `#[divan::bench]`. +pub struct GroupEntry { + /// Entry metadata. + pub meta: EntryMeta, + + /// Generic `#[divan::bench]` entries. + /// + /// This is two-dimensional to make code generation simpler. The outer + /// dimension corresponds to types and the inner dimension corresponds to + /// constants. + pub generic_benches: Option<&'static [&'static [GenericBenchEntry]]>, +} + +impl GroupEntry { + pub(crate) fn generic_benches_iter(&self) -> impl Iterator { + self.generic_benches.unwrap_or_default().iter().flat_map(|benches| benches.iter()) + } +} + +/// `BenchEntry` or `GenericBenchEntry`. +#[derive(Clone, Copy)] +pub(crate) enum AnyBenchEntry<'a> { + Bench(&'a BenchEntry), + GenericBench(&'a GenericBenchEntry), +} + +impl<'a> AnyBenchEntry<'a> { + /// Returns a pointer to use as the identity of the entry. + #[inline] + pub fn entry_addr(self) -> NonNull<()> { + match self { + Self::Bench(entry) => NonNull::from(entry).cast(), + Self::GenericBench(entry) => NonNull::from(entry).cast(), + } + } + + /// Returns this entry's benchmark runner. + #[inline] + pub fn bench_runner(self) -> &'a BenchEntryRunner { + match self { + Self::Bench(BenchEntry { bench, .. }) + | Self::GenericBench(GenericBenchEntry { bench, .. }) => bench, + } + } + + /// Returns this entry's argument names. + #[inline] + pub fn arg_names(self) -> Option<&'static [&'static str]> { + match self.bench_runner() { + BenchEntryRunner::Args(bench_runner) => { + let bench_runner = bench_runner(); + Some(bench_runner.arg_names()) + } + _ => None, + } + } + + #[inline] + pub fn meta(self) -> &'a EntryMeta { + match self { + Self::Bench(entry) => &entry.meta, + Self::GenericBench(entry) => &entry.group.meta, + } + } + + #[inline] + pub fn raw_name(self) -> &'a str { + match self { + Self::Bench(entry) => entry.meta.raw_name, + Self::GenericBench(entry) => entry.raw_name(), + } + } + + #[inline] + pub fn display_name(self) -> &'a str { + match self { + Self::Bench(entry) => entry.meta.display_name, + Self::GenericBench(entry) => entry.display_name(), + } + } +} diff --git a/crates/divan_compat/divan_fork/src/entry/tree.rs b/crates/divan_compat/divan_fork/src/entry/tree.rs new file mode 100644 index 00000000..1cd31ee8 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/entry/tree.rs @@ -0,0 +1,412 @@ +use std::{cmp::Ordering, ptr::NonNull}; + +use crate::{ + bench::{BenchOptions, DEFAULT_SAMPLE_COUNT}, + config::SortingAttr, + counter::KnownCounterKind, + entry::{AnyBenchEntry, EntryLocation, EntryMeta, GenericBenchEntry, GroupEntry}, + tree_painter::TreeColumn, + util::sort::natural_cmp, +}; + +/// `BenchEntry` tree organized by path components. +pub(crate) enum EntryTree<'a> { + /// Benchmark group; parent to leaves and other parents. + Parent { raw_name: &'a str, group: Option<&'a GroupEntry>, children: Vec }, + + /// Benchmark entry leaf. + Leaf { + /// The benchmark entry being run. + entry: AnyBenchEntry<'a>, + + /// The names of arguments to run. + args: Option>, + }, +} + +impl<'a> EntryTree<'a> { + /// Constructs a tree from an iterator of benchmark entries in the order + /// they're produced. + pub fn from_benches(benches: I) -> Vec + where + I: IntoIterator>, + { + let mut result = Vec::::new(); + + for bench in benches { + let mut insert_entry = |path_iter| { + Self::insert_entry(&mut result, bench, path_iter); + }; + + match bench { + AnyBenchEntry::Bench(bench) => { + insert_entry(&mut bench.meta.module_path_components()); + } + AnyBenchEntry::GenericBench(bench) => { + insert_entry(&mut bench.path_components()); + } + } + } + + result + } + + /// Returns the maximum span for a name in `tree`. + /// + /// This is the number of terminal columns used for labeling benchmark names + /// prior to emitting stats columns. + pub fn max_name_span(tree: &[Self], depth: usize) -> usize { + // The number of terminal columns used per-depth for box drawing + // characters. For example, "│ ╰─ " is 6 for depth 2. + const DEPTH_COLS: usize = 3; + + tree.iter() + .map(|node| { + let node_name_span = { + let prefix_len = depth * DEPTH_COLS; + let name_len = node.display_name().chars().count(); + prefix_len + name_len + }; + + // The maximum span of any descendent. + let children_max_span = Self::max_name_span(node.children(), depth + 1); + + // The maximum span of any runtime argument. + let args_max_span = node + .arg_names() + .unwrap_or_default() + .iter() + .map(|arg| { + let prefix_len = (depth + 1) * DEPTH_COLS; + let name_len = arg.chars().count(); + prefix_len + name_len + }) + .max() + .unwrap_or_default(); + + node_name_span.max(children_max_span).max(args_max_span) + }) + .max() + .unwrap_or_default() + } + + /// Returns the likely span for a given column. + pub fn common_column_width(tree: &[Self], column: TreeColumn) -> usize { + // Time and throughput info. + if column.is_time_stat() { + return KnownCounterKind::MAX_COMMON_COLUMN_WIDTH; + } + + tree.iter() + .map(|tree| { + let Some(options) = tree.bench_options() else { + return 0; + }; + + let width = match column { + TreeColumn::Samples => { + let sample_count = options.sample_count.unwrap_or(DEFAULT_SAMPLE_COUNT); + 1 + sample_count.checked_ilog10().unwrap_or_default() as usize + } + + // Iters is the last column, so it does not need pad width. + // All other columns are time stats handled previously. + _ => 0, + }; + + width.max(Self::common_column_width(tree.children(), column)) + }) + .max() + .unwrap_or_default() + } + + /// Inserts the benchmark group into a tree. + /// + /// Groups are inserted after tree construction because it prevents having + /// parents without terminating leaves. Groups that do not match an existing + /// parent are not inserted. + pub fn insert_group(mut tree: &mut [Self], group: &'a GroupEntry) { + // Update `tree` to be the innermost set of subtrees whose parents match + // `group.module_path`. + 'component: for component in group.meta.module_path_components() { + for subtree in tree { + match subtree { + EntryTree::Parent { raw_name, children, .. } if component == *raw_name => { + tree = children; + continue 'component; + } + _ => {} + } + } + + // No matches for this component in any subtrees. + return; + } + + // Find the matching tree to insert the group into. + for subtree in tree { + match subtree { + EntryTree::Parent { raw_name, group: slot, .. } + if group.meta.raw_name == *raw_name => + { + *slot = Some(group); + return; + } + _ => {} + } + } + } + + /// Removes entries from the tree whose paths do not match the filter. + pub fn retain(tree: &mut Vec, mut filter: impl FnMut(&str) -> bool) { + fn retain( + tree: &mut Vec, + parent_path: &str, + filter: &mut impl FnMut(&str) -> bool, + ) { + tree.retain_mut(|subtree| { + let subtree_path: String; + let subtree_path: &str = if parent_path.is_empty() { + subtree.display_name() + } else { + subtree_path = format!("{parent_path}::{}", subtree.display_name()); + &subtree_path + }; + + match subtree { + EntryTree::Parent { children, .. } => { + retain(children, subtree_path, filter); + + // If no children exist, filter out this parent. + !children.is_empty() + } + + EntryTree::Leaf { args: None, .. } => filter(subtree_path), + + EntryTree::Leaf { args: Some(args), .. } => { + args.retain(|arg| filter(&format!("{subtree_path}::{arg}"))); + + // If no arguments exist, filter out this leaf. + !args.is_empty() + } + } + }); + } + retain(tree, "", &mut filter); + } + + /// Sorts the tree by the given ordering. + pub fn sort_by_attr(tree: &mut [Self], attr: SortingAttr, reverse: bool) { + let apply_reverse = + |ordering: Ordering| if reverse { ordering.reverse() } else { ordering }; + + tree.sort_unstable_by(|a, b| apply_reverse(a.cmp_by_attr(b, attr))); + + tree.iter_mut().for_each(|tree| { + match tree { + // Sort benchmark arguments. + EntryTree::Leaf { args, .. } => { + if let Some(args) = args { + args.sort_by(|&a, &b| apply_reverse(attr.cmp_bench_arg_names(a, b))); + } + } + + // Sort children. + EntryTree::Parent { children, .. } => { + Self::sort_by_attr(children, attr, reverse); + } + } + }); + } + + fn cmp_by_attr(&self, other: &Self, attr: SortingAttr) -> Ordering { + // We take advantage of the fact that entries have stable addresses, + // unlike `EntryTree`. + let entry_addr_ordering = match (self.entry_addr(), other.entry_addr()) { + (Some(a), Some(b)) => Some(a.cmp(&b)), + _ => None, + }; + + // If entries have the same address, then all attributes will be equal. + if matches!(entry_addr_ordering, Some(Ordering::Equal)) { + return Ordering::Equal; + } + + for attr in attr.with_tie_breakers() { + let ordering = match attr { + SortingAttr::Kind => self.kind().cmp(&other.kind()), + SortingAttr::Name => self.cmp_display_name(other), + SortingAttr::Location => { + let location_ordering = self.location().cmp(&other.location()); + + // Use the entry's address to break location ties. + // + // This makes generic benchmarks use the same order as their + // types and constants. + if location_ordering.is_eq() { + entry_addr_ordering.unwrap_or(Ordering::Equal) + } else { + location_ordering + } + } + }; + + if ordering.is_ne() { + return ordering; + } + } + + Ordering::Equal + } + + /// Helper for constructing a tree. + /// + /// This uses recursion because the iterative approach runs into limitations + /// with mutable borrows. + fn insert_entry( + tree: &mut Vec, + entry: AnyBenchEntry<'a>, + rem_modules: &mut dyn Iterator, + ) { + let Some(current_module) = rem_modules.next() else { + tree.push(Self::Leaf { + entry, + args: entry.arg_names().map(|args| args.iter().collect()), + }); + return; + }; + + let Some(children) = Self::get_children(tree, current_module) else { + tree.push(Self::from_path(entry, current_module, rem_modules)); + return; + }; + + Self::insert_entry(children, entry, rem_modules); + } + + /// Constructs a sequence of branches from a module path. + fn from_path( + entry: AnyBenchEntry<'a>, + current_module: &'a str, + rem_modules: &mut dyn Iterator, + ) -> Self { + let child = if let Some(next_module) = rem_modules.next() { + Self::from_path(entry, next_module, rem_modules) + } else { + Self::Leaf { entry, args: entry.arg_names().map(|args| args.iter().collect()) } + }; + Self::Parent { raw_name: current_module, group: None, children: vec![child] } + } + + /// Finds the `Parent.children` for the corresponding module in `tree`. + fn get_children<'t>(tree: &'t mut [Self], module: &str) -> Option<&'t mut Vec> { + tree.iter_mut().find_map(|tree| match tree { + Self::Parent { raw_name, children, group: _ } if *raw_name == module => Some(children), + _ => None, + }) + } + + /// Returns an integer denoting the enum variant. + /// + /// This is used instead of `std::mem::Discriminant` because it does not + /// implement `Ord`. + pub fn kind(&self) -> i32 { + // Leaves should appear before parents. + match self { + Self::Leaf { .. } => 0, + Self::Parent { .. } => 1, + } + } + + /// Returns a pointer to use as the identity of the entry. + pub fn entry_addr(&self) -> Option> { + match self { + Self::Leaf { entry, .. } => Some(entry.entry_addr()), + Self::Parent { group, .. } => { + group.map(|entry: &GroupEntry| NonNull::from(entry).cast()) + } + } + } + + pub fn meta(&self) -> Option<&'a EntryMeta> { + match self { + Self::Parent { group, .. } => Some(&(*group)?.meta), + Self::Leaf { entry, .. } => Some(entry.meta()), + } + } + + pub fn bench_options(&self) -> Option<&'a BenchOptions> { + self.meta()?.bench_options() + } + + pub fn raw_name(&self) -> &'a str { + match self { + Self::Parent { group: Some(group), .. } => group.meta.raw_name, + Self::Parent { raw_name, .. } => raw_name, + Self::Leaf { entry, .. } => entry.raw_name(), + } + } + + pub fn display_name(&self) -> &'a str { + if let Self::Leaf { entry, .. } = self { + entry.display_name() + } else if let Some(common) = self.meta() { + common.display_name + } else { + let raw_name = self.raw_name(); + raw_name.strip_prefix("r#").unwrap_or(raw_name) + } + } + + /// Returns the location of this entry, group, or the children's earliest + /// location. + fn location(&self) -> Option<&'a EntryLocation> { + if let Some(common) = self.meta() { + Some(&common.location) + } else { + self.children().iter().flat_map(Self::location).min() + } + } + + /// Compares display names naturally, taking into account integers. + /// + /// There is special consideration for the `PartialOrd` implementation of + /// constants, so that `EntryConst` can sort integers and floats by value + /// instead of lexicographically. + fn cmp_display_name(&self, other: &Self) -> Ordering { + match (self, other) { + ( + Self::Leaf { + entry: + AnyBenchEntry::GenericBench(GenericBenchEntry { + const_value: Some(this), .. + }), + .. + }, + Self::Leaf { + entry: + AnyBenchEntry::GenericBench(GenericBenchEntry { + const_value: Some(other), .. + }), + .. + }, + ) => this.cmp_name(other), + + _ => natural_cmp(self.display_name(), other.display_name()), + } + } + + fn children(&self) -> &[Self] { + match self { + Self::Leaf { .. } => &[], + Self::Parent { children, .. } => children, + } + } + + fn arg_names(&self) -> Option<&[&'static &'static str]> { + match self { + Self::Leaf { args, .. } => args.as_deref(), + Self::Parent { .. } => None, + } + } +} diff --git a/crates/divan_compat/divan_fork/src/lib.rs b/crates/divan_compat/divan_fork/src/lib.rs new file mode 100644 index 00000000..7eaa96dd --- /dev/null +++ b/crates/divan_compat/divan_fork/src/lib.rs @@ -0,0 +1,1321 @@ +//! [bench_attr]: macro@bench +//! [bench_attr_examples]: macro@bench#examples +//! [bench_attr_threads]: macro@bench#threads +#![doc = include_str!("../README.md")] +#![warn(missing_docs)] +#![allow( + unknown_lints, + unused_unsafe, + clippy::needless_doctest_main, + clippy::needless_lifetimes, + clippy::new_without_default, + clippy::type_complexity, + clippy::missing_transmute_annotations +)] + +// Used by generated code. Not public API and thus not subject to SemVer. +#[doc(hidden)] +#[path = "private.rs"] +pub mod __private; + +mod alloc; +mod bench; +mod cli; +mod compile_fail; +mod config; +mod divan; +mod entry; +mod stats; +mod thread_pool; +mod time; +mod tree_painter; +mod util; + +pub mod counter; + +/// Prevents compiler optimizations on a value. +/// +/// `black_box` should only be used on [inputs](#benchmark-inputs) and +/// [outputs](#benchmark-outputs) of benchmarks. Newcomers to benchmarking may +/// be tempted to also use `black_box` within the implementation, but doing so +/// will overly pessimize the measured code without any benefit. +/// +/// ## Benchmark Inputs +/// +/// When benchmarking, it's good practice to ensure measurements are accurate by +/// preventing the compiler from optimizing based on assumptions about benchmark +/// inputs. +/// +/// The compiler can optimize code for indices it knows about, such as by +/// removing bounds checks or unrolling loops. If real-world use of your code +/// would not know indices up front, consider preventing optimizations on them +/// in benchmarks: +/// +/// ``` +/// use divan::black_box; +/// +/// const INDEX: usize = // ... +/// # 0; +/// const SLICE: &[u8] = // ... +/// # &[]; +/// +/// #[divan::bench] +/// fn bench() { +/// # fn work(_: T) {} +/// work(&SLICE[black_box(INDEX)..]); +/// } +/// ``` +/// +/// The compiler may also optimize for the data itself, which can also be +/// avoided with `black_box`: +/// +/// ``` +/// # use divan::black_box; +/// # const INDEX: usize = 0; +/// # const SLICE: &[u8] = &[]; +/// #[divan::bench] +/// fn bench() { +/// # fn work(_: T) {} +/// work(black_box(&SLICE[black_box(INDEX)..])); +/// } +/// ``` +/// +/// ## Benchmark Outputs +/// +/// When benchmarking, it's best to ensure that all of the code is actually +/// being run. If the compiler knows an output is unused, it may remove the code +/// that generated the output. This optimization can make benchmarks appear much +/// faster than they really are. +/// +/// At the end of a benchmark, we can force the compiler to treat outputs as if +/// they were actually used: +/// +/// ``` +/// # use divan::black_box; +/// #[divan::bench] +/// fn bench() { +/// # let value = 1; +/// black_box(value.to_string()); +/// } +/// ``` +/// +/// To make the code clearer to readers that the output is discarded, this code +/// could instead call [`black_box_drop`]. +/// +/// Alternatively, the output can be returned from the benchmark: +/// +/// ``` +/// #[divan::bench] +/// fn bench() -> String { +/// # let value = 1; +/// value.to_string() +/// } +/// ``` +/// +/// Returning the output will `black_box` it and also avoid measuring the time +/// to [drop](Drop) the output, which in this case is the time to deallocate a +/// [`String`]. Read more about this in the [`#[divan::bench]` +/// docs](macro@bench#drop). +/// +/// --- +/// +///

Standard Library Documentation

+/// +#[doc(inline)] +pub use std::hint::black_box; + +#[doc(inline)] +pub use crate::{alloc::AllocProfiler, bench::Bencher, divan::Divan}; + +/// Runs all registered benchmarks. +/// +/// # Examples +/// +/// ``` +/// #[divan::bench] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// +/// fn main() { +/// // Run `add` benchmark: +/// divan::main(); +/// } +/// ``` +/// +/// See [`#[divan::bench]`](macro@bench) for more examples. +pub fn main() { + Divan::from_args().main(); +} + +/// [`black_box`] + [`drop`] convenience function. +/// +/// # Examples +/// +/// This is useful when benchmarking a lazy [`Iterator`] to completion with +/// [`for_each`](Iterator::for_each): +/// +/// ``` +/// #[divan::bench] +/// fn parse_iter() { +/// let input: &str = // ... +/// # ""; +/// +/// # struct Parser; +/// # impl Parser { +/// # fn new(_: &str) -> Parser { Parser } +/// # fn for_each(self, _: fn(&'static str)) {} +/// # } +/// Parser::new(input) +/// .for_each(divan::black_box_drop); +/// } +/// ``` +#[inline] +pub fn black_box_drop(dummy: T) { + _ = black_box(dummy); +} + +/// Registers a benchmarking function. +/// +/// # Examples +/// +/// The quickest way to get started is to benchmark the function as-is: +/// +/// ``` +/// use divan::black_box; +/// +/// #[divan::bench] +/// fn add() -> i32 { +/// black_box(1) + black_box(42) +/// } +/// +/// fn main() { +/// // Run `add` benchmark: +/// divan::main(); +/// } +/// ``` +/// +/// If benchmarks need to setup context before running, they can take a +/// [`Bencher`] and use [`Bencher::bench`]: +/// +/// ``` +/// use divan::{Bencher, black_box}; +/// +/// #[divan::bench] +/// fn copy_from_slice(bencher: Bencher) { +/// let src = (0..100).collect::>(); +/// let mut dst = vec![0; src.len()]; +/// +/// bencher.bench_local(move || { +/// black_box(&mut dst).copy_from_slice(black_box(&src)); +/// }); +/// } +/// ``` +/// +/// Applying this attribute multiple times to the same item will cause a compile +/// error: +/// +/// ```compile_fail +/// #[divan::bench] +/// #[divan::bench] +/// fn bench() { +/// // ... +/// } +/// ``` +/// +/// # Drop +/// +/// When a benchmarked function returns a value, it will not be [dropped][Drop] +/// until after the current sample loop is finished. This allows for more +/// precise timing measurements. +/// +/// Note that there is an inherent memory cost to defer drop, including +/// allocations inside not-yet-dropped values. Also, if the benchmark +/// [panics](macro@std::panic), the values will never be dropped. +/// +/// The following example benchmarks will only measure [`String`] construction +/// time, but not deallocation time: +/// +/// ``` +/// use divan::{Bencher, black_box}; +/// +/// #[divan::bench] +/// fn freestanding() -> String { +/// black_box("hello").to_uppercase() +/// } +/// +/// #[divan::bench] +/// fn contextual(bencher: Bencher) { +/// // Setup: +/// let s: String = // ... +/// # String::new(); +/// +/// bencher.bench(|| -> String { +/// black_box(&s).to_lowercase() +/// }); +/// } +/// ``` +/// +/// If the returned value *does not* need to be dropped, there is no memory +/// cost. Because of this, the following example benchmarks are equivalent: +/// +/// ``` +/// #[divan::bench] +/// fn with_return() -> i32 { +/// let n: i32 = // ... +/// # 0; +/// n +/// } +/// +/// #[divan::bench] +/// fn without_return() { +/// let n: i32 = // ... +/// # 0; +/// divan::black_box(n); +/// } +/// ``` +/// +/// # Options +/// +/// - [`name`] +/// - [`crate`] +/// - [`args`] +/// - [`consts`] +/// - [`types`] +/// - [`sample_count`] +/// - [`sample_size`] +/// - [`threads`] +/// - [`counters`] +/// - [`bytes_count`] +/// - [`chars_count`] +/// - [`items_count`] +/// - [`min_time`] +/// - [`max_time`] +/// - [`skip_ext_time`] +/// - [`ignore`] +/// +/// ## `name` +/// [`name`]: #name +/// +/// By default, the benchmark uses the function's name. It can be overridden via +/// the [`name`] option: +/// +/// ``` +/// #[divan::bench(name = "my_add")] +/// fn add() -> i32 { +/// // Will appear as "crate_name::my_add". +/// # 0 +/// } +/// ``` +/// +/// ## `crate` +/// [`crate`]: #crate +/// +/// The path to the specific `divan` crate instance used by this macro's +/// generated code can be specified via the [`crate`] option. This is applicable +/// when using `divan` via a macro from your own crate. +/// +/// ``` +/// extern crate divan as sofa; +/// +/// #[::sofa::bench(crate = ::sofa)] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// ``` +/// +/// ## `args` +/// [`args`]: #args +/// +/// Function arguments can be provided to benchmark the function over multiple +/// cases. This is used for comparing across parameters like collection lengths +/// and [`enum`](https://doc.rust-lang.org/std/keyword.enum.html) variants. If +/// you are not comparing cases and just need to pass a value into the +/// benchmark, instead consider passing local values into the [`Bencher::bench`] +/// closure or use [`Bencher::with_inputs`] for many distinct values. +/// +/// The following example benchmarks converting a [`Range`](std::ops::Range) to +/// [`Vec`] over different lengths: +/// +/// ``` +/// #[divan::bench(args = [1000, LEN, len()])] +/// fn init_vec(len: usize) -> Vec { +/// (0..len).collect() +/// } +/// +/// const LEN: usize = // ... +/// # 0; +/// +/// fn len() -> usize { +/// // ... +/// # 0 +/// } +/// ``` +/// +/// The list of arguments can be shared across multiple benchmarks through an +/// external [`Iterator`]: +/// +/// ``` +/// const LENS: &[usize] = // ... +/// # &[]; +/// +/// #[divan::bench(args = LENS)] +/// fn bench_vec1(len: usize) -> Vec { +/// // ... +/// # vec![] +/// } +/// +/// #[divan::bench(args = LENS)] +/// fn bench_vec2(len: usize) -> Vec { +/// // ... +/// # vec![] +/// } +/// ``` +/// +/// Unlike the [`consts`] option, any argument type is supported if it +/// implements [`Any`], [`Copy`], [`Send`], [`Sync`], and [`ToString`] (or +/// [`Debug`](std::fmt::Debug)): +/// +/// ``` +/// #[derive(Clone, Copy, Debug)] +/// enum Arg { +/// A, B +/// } +/// +/// #[divan::bench(args = [Arg::A, Arg::B])] +/// fn bench_args(arg: Arg) { +/// // ... +/// } +/// ``` +/// +/// The argument type does not need to implement [`Copy`] if it is used through +/// a reference: +/// +/// ``` +/// #[derive(Debug)] +/// enum Arg { +/// A, B +/// } +/// +/// #[divan::bench(args = [Arg::A, Arg::B])] +/// fn bench_args(arg: &Arg) { +/// // ... +/// } +/// ``` +/// +/// For convenience, common string types are coerced to [`&str`](primitive@str): +/// +/// ``` +/// fn strings() -> impl Iterator { +/// // ... +/// # [].into_iter() +/// } +/// +/// #[divan::bench(args = strings())] +/// fn bench_strings(s: &str) { +/// // ... +/// } +/// ``` +/// +/// Arguments can also be used with [`Bencher`]. This allows for generating +/// inputs based on [`args`] values or providing throughput information via +/// [`Counter`s](crate::counter::Counter): +/// +/// ``` +/// # fn new_value(v: T) -> T { v } +/// # fn do_work(_: T) {} +/// use divan::Bencher; +/// +/// #[divan::bench(args = [1, 2, 3])] +/// fn bench(bencher: Bencher, len: usize) { +/// let value = new_value(len); +/// +/// bencher +/// .counter(len) +/// .bench(|| { +/// do_work(value); +/// }); +/// } +/// ``` +/// +/// ## `consts` +/// [`consts`]: #consts +/// +/// Divan supports benchmarking functions with [`const` +/// generics](https://doc.rust-lang.org/reference/items/generics.html#const-generics) +/// via the [`consts`] option. +/// +/// The following example benchmarks initialization of [`[i32; N]`](prim@array) +/// for values of `N` provided by a [literal](https://doc.rust-lang.org/reference/expressions/literal-expr.html), +/// [`const` item](https://doc.rust-lang.org/reference/items/constant-items.html), +/// and [`const fn`](https://doc.rust-lang.org/reference/const_eval.html#const-functions): +/// +/// ``` +/// #[divan::bench(consts = [1000, LEN, len()])] +/// fn init_array() -> [i32; N] { +/// let mut result = [0; N]; +/// +/// for i in 0..N { +/// result[i] = divan::black_box(i as i32); +/// } +/// +/// result +/// } +/// +/// const LEN: usize = // ... +/// # 0; +/// +/// const fn len() -> usize { +/// // ... +/// # 0 +/// } +/// ``` +/// +/// The list of constants can be shared across multiple benchmarks through an +/// external [array](prim@array) or [slice](prim@slice): +/// +/// ``` +/// const SIZES: &[usize] = &[1, 2, 5, 10]; +/// +/// #[divan::bench(consts = SIZES)] +/// fn bench_array1() -> [i32; N] { +/// // ... +/// # [0; N] +/// } +/// +/// #[divan::bench(consts = SIZES)] +/// fn bench_array2() -> [i32; N] { +/// // ... +/// # [0; N] +/// } +/// ``` +/// +/// External constants are limited to lengths 1 through 20, because of +/// implementation details. This limit does not apply if the list is provided +/// directly like in the first example. +/// +/// ```compile_fail +/// const SIZES: [usize; 21] = [ +/// // ... +/// # 0; 21 +/// ]; +/// +/// #[divan::bench(consts = SIZES)] +/// fn bench_array() -> [i32; N] { +/// // ... +/// # [0; N] +/// } +/// ``` +/// +/// ## `types` +/// [`types`]: #types +/// +/// Divan supports benchmarking generic functions over a list of types via the +/// [`types`] option. +/// +/// The following example benchmarks the [`From<&str>`](From) implementations +/// for [`&str`](prim@str) and [`String`]: +/// +/// ``` +/// #[divan::bench(types = [&str, String])] +/// fn from_str<'a, T>() -> T +/// where +/// T: From<&'a str>, +/// { +/// divan::black_box("hello world").into() +/// } +/// ``` +/// +/// The [`types`] and [`args`] options can be combined to benchmark _T_ × _A_ +/// scenarios. The following example benchmarks the [`FromIterator`] +/// implementations for [`Vec`], [`BTreeSet`], and [`HashSet`]: +/// +/// ``` +/// use std::collections::{BTreeSet, HashSet}; +/// +/// #[divan::bench( +/// types = [Vec, BTreeSet, HashSet], +/// args = [0, 2, 4, 16, 256, 4096], +/// )] +/// fn from_range(n: i32) -> T +/// where +/// T: FromIterator, +/// { +/// (0..n).collect() +/// } +/// ``` +/// +/// [`BTreeSet`]: std::collections::BTreeSet +/// [`HashSet`]: std::collections::HashSet +/// +/// ## `sample_count` +/// [`sample_count`]: #sample_count +/// +/// The number of statistical sample recordings can be set to a predetermined +/// [`u32`] value via the [`sample_count`] option. This may be overridden at +/// runtime using either the `DIVAN_SAMPLE_COUNT` environment variable or +/// `--sample-count` CLI argument. +/// +/// ``` +/// #[divan::bench(sample_count = 1000)] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// ``` +/// +/// If the [`threads`] option is enabled, sample count becomes a multiple of the +/// number of threads. This is because each thread operates over the same sample +/// size to ensure there are always N competing threads doing the same amount of +/// work. +/// +/// ## `sample_size` +/// [`sample_size`]: #sample_size +/// +/// The number iterations within each statistics sample can be set to a +/// predetermined [`u32`] value via the [`sample_size`] option. This may be +/// overridden at runtime using either the `DIVAN_SAMPLE_SIZE` environment +/// variable or `--sample-size` CLI argument. +/// +/// ``` +/// #[divan::bench(sample_size = 1000)] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// ``` +/// +/// ## `threads` +/// [`threads`]: #threads +/// +/// Benchmarked functions can be run across multiple threads via the [`threads`] +/// option. This enables you to measure contention on [atomics and +/// locks][std::sync]. The default thread count is the [available parallelism]. +/// +/// ``` +/// use std::sync::Arc; +/// +/// #[divan::bench(threads)] +/// fn arc_clone(bencher: divan::Bencher) { +/// let arc = Arc::new(42); +/// +/// bencher.bench(|| arc.clone()); +/// } +/// ``` +/// +/// The [`threads`] option can be set to any of: +/// - [`bool`] for [available parallelism] (true) or no parallelism. +/// - [`usize`] for a specific number of threads. 0 means use [available +/// parallelism] and 1 means no parallelism. +/// - [`IntoIterator`] over [`usize`] for multiple thread counts, such as: +/// - [`Range`](std::ops::Range) +/// - [`[usize; N]`](prim@array) +/// - [`&[usize]`](prim@slice) +/// +/// ``` +/// #[divan::bench(threads = false)] +/// fn single() { +/// // ... +/// } +/// +/// #[divan::bench(threads = 10)] +/// fn specific() { +/// // ... +/// } +/// +/// #[divan::bench(threads = 0..=8)] +/// fn range() { +/// // Note: Includes 0 for available parallelism. +/// } +/// +/// #[divan::bench(threads = [0, 1, 4, 8, 16])] +/// fn selection() { +/// // ... +/// } +/// ``` +/// +/// ## `counters` +/// [`counters`]: #counters +/// +/// The [`Counter`s](crate::counter::Counter) of each iteration can be set via +/// the [`counters`] option. The following example emits info for the number of +/// bytes and number of ints processed when benchmarking [slice sorting](slice::sort): +/// +/// ``` +/// use divan::{Bencher, counter::{BytesCount, ItemsCount}}; +/// +/// const INTS: &[i32] = &[ +/// // ... +/// ]; +/// +/// #[divan::bench(counters = [ +/// BytesCount::of_slice(INTS), +/// ItemsCount::new(INTS.len()), +/// ])] +/// fn sort(bencher: Bencher) { +/// bencher +/// .with_inputs(|| INTS.to_vec()) +/// .bench_refs(|ints| ints.sort()); +/// } +/// ``` +/// +/// For convenience, singular `counter` allows a single +/// [`Counter`](crate::counter::Counter) to be set. The following example emits +/// info for the number of bytes processed when benchmarking +/// [`char`-counting](std::str::Chars::count): +/// +/// ``` +/// use divan::counter::BytesCount; +/// +/// const STR: &str = "..."; +/// +/// #[divan::bench(counter = BytesCount::of_str(STR))] +/// fn char_count() -> usize { +/// divan::black_box(STR).chars().count() +/// } +/// ``` +/// +/// See: +/// - [`#[divan::bench_group(counters = ...)]`](macro@bench_group#counters) +/// - [`Bencher::counter`] +/// - [`Bencher::input_counter`] +/// +/// ### `bytes_count` +/// [`bytes_count`]: #bytes_count +/// +/// Convenience shorthand for +/// [counter](#counters) = [BytesCount](counter::BytesCount)::from(n). +/// +/// ### `chars_count` +/// [`chars_count`]: #chars_count +/// +/// Convenience shorthand for +/// [counter](#counters) = [CharsCount](counter::CharsCount)::from(n). +/// +/// ### `items_count` +/// [`items_count`]: #items_count +/// +/// Convenience shorthand for +/// [counter](#counters) = [ItemsCount](counter::ItemsCount)::from(n). +/// +/// ## `min_time` +/// [`min_time`]: #min_time +/// +/// The minimum time spent benchmarking each function can be set to a +/// predetermined [`Duration`] via the [`min_time`] option. This may be +/// overridden at runtime using either the `DIVAN_MIN_TIME` environment variable +/// or `--min-time` CLI argument. +/// +/// Unless [`skip_ext_time`] is set, this includes time external to the +/// benchmarked function, such as time spent generating inputs and running +/// [`Drop`]. +/// +/// ``` +/// use std::time::Duration; +/// +/// #[divan::bench(min_time = Duration::from_secs(3))] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// ``` +/// +/// For convenience, [`min_time`] can also be set with seconds as [`u64`] or +/// [`f64`]. Invalid values will cause a panic at runtime. +/// +/// ``` +/// #[divan::bench(min_time = 2)] +/// fn int_secs() -> i32 { +/// // ... +/// # 0 +/// } +/// +/// #[divan::bench(min_time = 1.5)] +/// fn float_secs() -> i32 { +/// // ... +/// # 0 +/// } +/// ``` +/// +/// ## `max_time` +/// [`max_time`]: #max_time +/// +/// The maximum time spent benchmarking each function can be set to a +/// predetermined [`Duration`] via the [`max_time`] option. This may be +/// overridden at runtime using either the `DIVAN_MAX_TIME` environment variable +/// or `--max-time` CLI argument. +/// +/// Unless [`skip_ext_time`] is set, this includes time external to the +/// benchmarked function, such as time spent generating inputs and running +/// [`Drop`]. +/// +/// If `min_time > max_time`, then [`max_time`] has priority and [`min_time`] +/// will not be reached. +/// +/// ``` +/// use std::time::Duration; +/// +/// #[divan::bench(max_time = Duration::from_secs(5))] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// ``` +/// +/// For convenience, like [`min_time`], [`max_time`] can also be set with +/// seconds as [`u64`] or [`f64`]. Invalid values will cause a panic at runtime. +/// +/// ``` +/// #[divan::bench(max_time = 8)] +/// fn int_secs() -> i32 { +/// // ... +/// # 0 +/// } +/// +/// #[divan::bench(max_time = 9.5)] +/// fn float_secs() -> i32 { +/// // ... +/// # 0 +/// } +/// ``` +/// +/// ## `skip_ext_time` +/// [`skip_ext_time`]: #skip_ext_time +/// +/// By default, [`min_time`] and [`max_time`] include time external to the +/// benchmarked function, such as time spent generating inputs and running +/// [`Drop`]. Enabling the [`skip_ext_time`] option will instead make those +/// options only consider time spent within the benchmarked function. This may +/// be overridden at runtime using either the `DIVAN_SKIP_EXT_TIME` environment +/// variable or `--skip-ext-time` CLI argument. +/// +/// In the following example, [`max_time`] only considers time spent running +/// `measured_function`: +/// +/// ``` +/// # fn generate_input() {} +/// # fn measured_function(_: ()) {} +/// #[divan::bench(max_time = 5, skip_ext_time)] +/// fn bench(bencher: divan::Bencher) { +/// bencher +/// .with_inputs(|| generate_input()) +/// .bench_values(|input| measured_function(input)); +/// } +/// ``` +/// +/// This option can be set to an explicit [`bool`] value to override parent +/// values: +/// +/// ``` +/// #[divan::bench(max_time = 5, skip_ext_time = false)] +/// fn bench(bencher: divan::Bencher) { +/// // ... +/// } +/// ``` +/// +/// ## `ignore` +/// [`ignore`]: #ignore +/// +/// Like [`#[test]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-test-attribute), +/// `#[divan::bench]` functions can use [`#[ignore]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-ignore-attribute): +/// +/// ``` +/// #[divan::bench] +/// #[ignore] +/// fn todo() { +/// unimplemented!(); +/// } +/// # divan::main(); +/// ``` +/// +/// This option can also instead be set within the `#[divan::bench]` attribute: +/// +/// ``` +/// #[divan::bench(ignore)] +/// fn todo() { +/// unimplemented!(); +/// } +/// # divan::main(); +/// ``` +/// +/// Like [`skip_ext_time`], this option can be set to an explicit [`bool`] value +/// to override parent values: +/// +/// ``` +/// #[divan::bench(ignore = false)] +/// fn bench() { +/// // ... +/// } +/// ``` +/// +/// This can be used to ignore benchmarks based on a runtime condition. The +/// following example benchmark will be ignored if an [environment +/// variable](std::env::var) is not set to "true": +/// +/// ``` +/// #[divan::bench( +/// ignore = std::env::var("BENCH_EXPENSIVE").as_deref() != Ok("true") +/// )] +/// fn expensive_bench() { +/// // ... +/// } +/// ``` +/// +/// [`Any`]: std::any::Any +/// [`Duration`]: std::time::Duration +/// [available parallelism]: std::thread::available_parallelism +pub use divan_macros::bench; + +/// Registers a benchmarking group. +/// +/// # Examples +/// +/// This is used for setting [options] shared across +/// [`#[divan::bench]`](macro@bench) functions in the same module: +/// +/// ``` +/// #[divan::bench_group( +/// sample_count = 100, +/// sample_size = 500, +/// )] +/// mod math { +/// use divan::black_box; +/// +/// #[divan::bench] +/// fn add() -> i32 { +/// black_box(1) + black_box(42) +/// } +/// +/// #[divan::bench] +/// fn div() -> i32 { +/// black_box(1) / black_box(42) +/// } +/// } +/// +/// fn main() { +/// // Run `math::add` and `math::div` benchmarks: +/// divan::main(); +/// } +/// ``` +/// +/// Benchmarking [options] set on parent groups cascade into child groups and +/// their benchmarks: +/// +/// ``` +/// #[divan::bench_group( +/// sample_count = 100, +/// sample_size = 500, +/// )] +/// mod parent { +/// #[divan::bench_group(sample_size = 1)] +/// mod child1 { +/// #[divan::bench] +/// fn bench() { +/// // Will be sampled 100 times with 1 iteration per sample. +/// } +/// } +/// +/// #[divan::bench_group(sample_count = 42)] +/// mod child2 { +/// #[divan::bench] +/// fn bench() { +/// // Will be sampled 42 times with 500 iterations per sample. +/// } +/// } +/// +/// mod child3 { +/// #[divan::bench(sample_count = 1)] +/// fn bench() { +/// // Will be sampled 1 time with 500 iterations per sample. +/// } +/// } +/// } +/// ``` +/// +/// Applying this attribute multiple times to the same item will cause a compile +/// error: +/// +/// ```compile_fail +/// #[divan::bench_group] +/// #[divan::bench_group] +/// mod math { +/// // ... +/// } +/// ``` +/// +/// # Options +/// [options]: #options +/// +/// - [`name`] +/// - [`crate`] +/// - [`sample_count`] +/// - [`sample_size`] +/// - [`threads`] +/// - [`counters`] +/// - [`bytes_count`] +/// - [`chars_count`] +/// - [`items_count`] +/// - [`min_time`] +/// - [`max_time`] +/// - [`skip_ext_time`] +/// - [`ignore`] +/// +/// ## `name` +/// [`name`]: #name +/// +/// By default, the benchmark group uses the module's name. It can be overridden +/// via the `name` option: +/// +/// ``` +/// #[divan::bench_group(name = "my_math")] +/// mod math { +/// #[divan::bench(name = "my_add")] +/// fn add() -> i32 { +/// // Will appear as "crate_name::my_math::my_add". +/// # 0 +/// } +/// } +/// ``` +/// +/// ## `crate` +/// [`crate`]: #crate +/// +/// The path to the specific `divan` crate instance used by this macro's +/// generated code can be specified via the [`crate`] option. This is applicable +/// when using `divan` via a macro from your own crate. +/// +/// ``` +/// extern crate divan as sofa; +/// +/// #[::sofa::bench_group(crate = ::sofa)] +/// mod math { +/// #[::sofa::bench(crate = ::sofa)] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// } +/// ``` +/// +/// ## `sample_count` +/// [`sample_count`]: #sample_count +/// +/// The number of statistical sample recordings can be set to a predetermined +/// [`u32`] value via the [`sample_count`] option. This may be overridden at +/// runtime using either the `DIVAN_SAMPLE_COUNT` environment variable or +/// `--sample-count` CLI argument. +/// +/// ``` +/// #[divan::bench_group(sample_count = 1000)] +/// mod math { +/// #[divan::bench] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// } +/// ``` +/// +/// If the [`threads`] option is enabled, sample count becomes a multiple of the +/// number of threads. This is because each thread operates over the same sample +/// size to ensure there are always N competing threads doing the same amount of +/// work. +/// +/// ## `sample_size` +/// [`sample_size`]: #sample_size +/// +/// The number iterations within each statistical sample can be set to a +/// predetermined [`u32`] value via the [`sample_size`] option. This may be +/// overridden at runtime using either the `DIVAN_SAMPLE_SIZE` environment +/// variable or `--sample-size` CLI argument. +/// +/// ``` +/// #[divan::bench_group(sample_size = 1000)] +/// mod math { +/// #[divan::bench] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// } +/// ``` +/// +/// ## `threads` +/// [`threads`]: #threads +/// +/// See [`#[divan::bench(threads = ...)]`](macro@bench#threads). +/// +/// ## `counters` +/// [`counters`]: #counters +/// +/// The [`Counter`s](crate::counter::Counter) of each iteration of benchmarked +/// functions in a group can be set via the [`counters`] option. The following +/// example emits info for the number of bytes and number of ints processed when +/// benchmarking [slice sorting](slice::sort): +/// +/// ``` +/// use divan::{Bencher, counter::{BytesCount, ItemsCount}}; +/// +/// const INTS: &[i32] = &[ +/// // ... +/// ]; +/// +/// #[divan::bench_group(counters = [ +/// BytesCount::of_slice(INTS), +/// ItemsCount::new(INTS.len()), +/// ])] +/// mod sort { +/// use super::*; +/// +/// #[divan::bench] +/// fn default(bencher: Bencher) { +/// bencher +/// .with_inputs(|| INTS.to_vec()) +/// .bench_refs(|ints| ints.sort()); +/// } +/// +/// #[divan::bench] +/// fn unstable(bencher: Bencher) { +/// bencher +/// .with_inputs(|| INTS.to_vec()) +/// .bench_refs(|ints| ints.sort_unstable()); +/// } +/// } +/// # fn main() {} +/// ``` +/// +/// For convenience, singular `counter` allows a single +/// [`Counter`](crate::counter::Counter) to be set. The following example emits +/// info for the number of bytes processed when benchmarking +/// [`char`-counting](std::str::Chars::count) and +/// [`char`-collecting](std::str::Chars::collect): +/// +/// ``` +/// use divan::counter::BytesCount; +/// +/// const STR: &str = "..."; +/// +/// #[divan::bench_group(counter = BytesCount::of_str(STR))] +/// mod chars { +/// use super::STR; +/// +/// #[divan::bench] +/// fn count() -> usize { +/// divan::black_box(STR).chars().count() +/// } +/// +/// #[divan::bench] +/// fn collect() -> String { +/// divan::black_box(STR).chars().collect() +/// } +/// } +/// # fn main() {} +/// ``` +/// +/// See: +/// - [`#[divan::bench(counters = ...)]`](macro@bench#counters) +/// - [`Bencher::counter`] +/// - [`Bencher::input_counter`] +/// +/// ### `bytes_count` +/// [`bytes_count`]: #bytes_count +/// +/// Convenience shorthand for +/// [counter](#counters) = [BytesCount](counter::BytesCount)::from(n). +/// +/// ### `chars_count` +/// [`chars_count`]: #chars_count +/// +/// Convenience shorthand for +/// [counter](#counters) = [CharsCount](counter::CharsCount)::from(n). +/// +/// ### `cycles_count` +/// [`cycles_count`]: #cycles_count +/// +/// Convenience shorthand for +/// [counter](#counters) = [CyclesCount](counter::CyclesCount)::from(n). +/// +/// ### `items_count` +/// [`items_count`]: #items_count +/// +/// Convenience shorthand for +/// [counter](#counters) = [ItemsCount](counter::ItemsCount)::from(n). +/// +/// ## `min_time` +/// [`min_time`]: #min_time +/// +/// The minimum time spent benchmarking each function can be set to a +/// predetermined [`Duration`] via the [`min_time`] option. This may be +/// overridden at runtime using either the `DIVAN_MIN_TIME` environment variable +/// or `--min-time` CLI argument. +/// +/// Unless [`skip_ext_time`] is set, this includes time external to benchmarked +/// functions, such as time spent generating inputs and running [`Drop`]. +/// +/// ``` +/// use std::time::Duration; +/// +/// #[divan::bench_group(min_time = Duration::from_secs(3))] +/// mod math { +/// #[divan::bench] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// } +/// ``` +/// +/// For convenience, [`min_time`] can also be set with seconds as [`u64`] or +/// [`f64`]. Invalid values will cause a panic at runtime. +/// +/// ``` +/// #[divan::bench_group(min_time = 2)] +/// mod int_secs { +/// // ... +/// } +/// +/// #[divan::bench_group(min_time = 1.5)] +/// mod float_secs { +/// // ... +/// } +/// ``` +/// +/// ## `max_time` +/// [`max_time`]: #max_time +/// +/// The maximum time spent benchmarking each function can be set to a +/// predetermined [`Duration`] via the [`max_time`] option. This may be +/// overridden at runtime using either the `DIVAN_MAX_TIME` environment variable +/// or `--max-time` CLI argument. +/// +/// Unless [`skip_ext_time`] is set, this includes time external to benchmarked +/// functions, such as time spent generating inputs and running [`Drop`]. +/// +/// If `min_time > max_time`, then [`max_time`] has priority and [`min_time`] +/// will not be reached. +/// +/// ``` +/// use std::time::Duration; +/// +/// #[divan::bench_group(max_time = Duration::from_secs(5))] +/// mod math { +/// #[divan::bench] +/// fn add() -> i32 { +/// // ... +/// # 0 +/// } +/// } +/// ``` +/// +/// For convenience, like [`min_time`], [`max_time`] can also be set with +/// seconds as [`u64`] or [`f64`]. Invalid values will cause a panic at runtime. +/// +/// ``` +/// #[divan::bench_group(max_time = 8)] +/// mod int_secs { +/// // ... +/// } +/// +/// #[divan::bench_group(max_time = 9.5)] +/// mod float_secs { +/// // ... +/// } +/// ``` +/// +/// ## `skip_ext_time` +/// [`skip_ext_time`]: #skip_ext_time +/// +/// By default, [`min_time`] and [`max_time`] include time external to +/// benchmarked functions, such as time spent generating inputs and running +/// [`Drop`]. Enabling the [`skip_ext_time`] option will instead make those +/// options only consider time spent within benchmarked functions. This may be +/// overridden at runtime using either the `DIVAN_SKIP_EXT_TIME` environment +/// variable or `--skip-ext-time` CLI argument. +/// +/// In the following example, [`max_time`] only considers time spent running +/// `measured_function`: +/// +/// ``` +/// #[divan::bench_group(skip_ext_time)] +/// mod group { +/// # fn generate_input() {} +/// # fn measured_function(_: ()) {} +/// #[divan::bench(max_time = 5)] +/// fn bench(bencher: divan::Bencher) { +/// bencher +/// .with_inputs(|| generate_input()) +/// .bench_values(|input| measured_function(input)); +/// } +/// } +/// ``` +/// +/// This option can be set to an explicit [`bool`] value to override parent +/// values: +/// +/// ``` +/// #[divan::bench_group(skip_ext_time = false)] +/// mod group { +/// // ... +/// } +/// ``` +/// +/// ## `ignore` +/// [`ignore`]: #ignore +/// +/// Like [`#[test]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-test-attribute) +/// and [`#[divan::bench]`](macro@bench), `#[divan::bench_group]` functions can +/// use [`#[ignore]`](https://doc.rust-lang.org/reference/attributes/testing.html#the-ignore-attribute): +/// +/// ``` +/// #[divan::bench_group] +/// #[ignore] +/// mod math { +/// #[divan::bench] +/// fn todo() { +/// unimplemented!(); +/// } +/// } +/// # divan::main(); +/// ``` +/// +/// This option can also instead be set within the `#[divan::bench_group]` +/// attribute: +/// +/// ``` +/// #[divan::bench_group(ignore)] +/// mod math { +/// #[divan::bench] +/// fn todo() { +/// unimplemented!(); +/// } +/// } +/// # divan::main(); +/// ``` +/// +/// Like [`skip_ext_time`], this option can be set to an explicit [`bool`] value +/// to override parent values: +/// +/// ``` +/// #[divan::bench_group(ignore = false)] +/// mod group { +/// // ... +/// } +/// ``` +/// +/// This can be used to ignore benchmarks based on a runtime condition. The +/// following example benchmark group will be ignored if an [environment +/// variable](std::env::var) is not set to "true": +/// +/// ``` +/// #[divan::bench_group( +/// ignore = std::env::var("BENCH_EXPENSIVE").as_deref() != Ok("true") +/// )] +/// mod expensive_benches { +/// // ... +/// } +/// ``` +/// +/// [`Duration`]: std::time::Duration +pub use divan_macros::bench_group; diff --git a/crates/divan_compat/divan_fork/src/private.rs b/crates/divan_compat/divan_fork/src/private.rs new file mode 100644 index 00000000..08cbd17e --- /dev/null +++ b/crates/divan_compat/divan_fork/src/private.rs @@ -0,0 +1,229 @@ +use std::{ + borrow::{Borrow, Cow}, + fmt::Debug, +}; + +pub use crate::{ + bench::{BenchArgs, BenchOptions}, + entry::{ + BenchEntry, BenchEntryRunner, EntryConst, EntryList, EntryLocation, EntryMeta, EntryType, + GenericBenchEntry, GroupEntry, BENCH_ENTRIES, GROUP_ENTRIES, + }, + time::IntoDuration, +}; + +/// Helper to convert values to strings via `ToString` or fallback to `Debug`. +/// +/// This works by having a `Debug`-based `ToString::to_string` method that will +/// be chosen if the wrapped type implements `Debug` *but not* `ToString`. If +/// the wrapped type implements `ToString`, then the inherent +/// `ToStringHelper::to_string` method will be chosen instead. +pub struct ToStringHelper<'a, T: 'static>(pub &'a T); + +#[allow(clippy::to_string_trait_impl)] +impl ToString for ToStringHelper<'_, T> { + #[inline] + fn to_string(&self) -> String { + format!("{:?}", self.0) + } +} + +impl ToStringHelper<'_, T> { + #[allow(clippy::inherent_to_string)] + #[inline] + pub fn to_string(&self) -> String { + self.0.to_string() + } +} + +/// Used by `#[divan::bench(args = ...)]` to enable polymorphism. +pub trait Arg { + fn get(this: Self) -> T; +} + +impl Arg for T { + #[inline] + fn get(this: Self) -> T { + this + } +} + +impl<'a, T: ?Sized> Arg<&'a T> for &'a Cow<'a, T> +where + T: ToOwned, +{ + #[inline] + fn get(this: Self) -> &'a T { + this + } +} + +impl<'a> Arg<&'a str> for &'a String { + #[inline] + fn get(this: Self) -> &'a str { + this + } +} + +impl Arg for &T { + #[inline] + fn get(this: Self) -> T { + *this + } +} + +impl Arg for &&T { + #[inline] + fn get(this: Self) -> T { + **this + } +} + +impl Arg for &&&T { + #[inline] + fn get(this: Self) -> T { + ***this + } +} + +/// Used by `#[divan::bench(threads = ...)]` to leak thread counts for easy +/// global usage in [`BenchOptions::threads`]. +/// +/// This enables the `threads` option to be polymorphic over: +/// - `usize` +/// - `bool` +/// - `true` is 0 +/// - `false` is 1 +/// - Iterators: +/// - `[usize; N]` +/// - `&[usize; N]` +/// - `&[usize]` +/// +/// # Orphan Rules Hack +/// +/// Normally we can't implement a trait over both `usize` and `I: IntoIterator` +/// because the compiler has no guarantee that `usize` will never implement +/// `IntoIterator`. Ideally we would handle this with specialization, but that's +/// not stable. +/// +/// The solution here is to make `IntoThreads` generic to implement technically +/// different traits for `usize` and `IntoIterator` because of different `IMP` +/// values. We then call verbatim `IntoThreads::into_threads(val)` and have the +/// compiler infer the generic parameter for the single `IntoThreads` +/// implementation. +/// +/// It's fair to assume that scalar primitives will never implement +/// `IntoIterator`, so this hack shouldn't break in the future 🤠. +pub trait IntoThreads { + fn into_threads(self) -> Cow<'static, [usize]>; +} + +impl IntoThreads<0> for usize { + #[inline] + fn into_threads(self) -> Cow<'static, [usize]> { + let counts = match self { + 0 => &[0], + 1 => &[1], + 2 => &[2], + _ => return Cow::Owned(vec![self]), + }; + Cow::Borrowed(counts) + } +} + +impl IntoThreads<0> for bool { + #[inline] + fn into_threads(self) -> Cow<'static, [usize]> { + let counts = if self { + // Available parallelism. + &[0] + } else { + // No parallelism. + &[1] + }; + Cow::Borrowed(counts) + } +} + +impl IntoThreads<1> for I +where + I: IntoIterator, + I::Item: Borrow, +{ + #[inline] + fn into_threads(self) -> Cow<'static, [usize]> { + let mut options: Vec = self.into_iter().map(|i| *i.borrow()).collect(); + options.sort_unstable(); + options.dedup(); + Cow::Owned(options) + } +} + +/// Used by `#[divan::bench(counters = [...])]`. +#[inline] +pub fn new_counter_set() -> crate::counter::CounterSet { + Default::default() +} + +/// Used by `#[divan::bench]` to truncate arrays for generic `const` benchmarks. +pub const fn shrink_array( + array: [T; IN], +) -> Option<[T; OUT]> { + use std::mem::ManuallyDrop; + + #[repr(C)] + union Transmute { + from: ManuallyDrop, + into: ManuallyDrop, + } + + let from = ManuallyDrop::new(array); + + if OUT <= IN { + Some(unsafe { ManuallyDrop::into_inner(Transmute { from }.into) }) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn into_threads() { + macro_rules! test { + ($value:expr, $expected:expr) => { + assert_eq!(IntoThreads::into_threads($value).as_ref(), $expected); + }; + } + + test!(true, &[0]); + test!(false, &[1]); + + test!(0, &[0]); + test!(1, &[1]); + test!(42, &[42]); + + // test!([0; 0], &[]); + test!([0], &[0]); + test!([0, 0], &[0]); + + test!([0, 2, 3, 1], &[0, 1, 2, 3]); + test!([0, 0, 2, 3, 2, 1, 3], &[0, 1, 2, 3]); + } + + #[test] + fn shrink_array() { + let values = [1, 2, 3, 4, 5]; + + let equal: Option<[i32; 5]> = super::shrink_array(values); + assert_eq!(equal, Some(values)); + + let smaller: Option<[i32; 3]> = super::shrink_array(values); + assert_eq!(smaller, Some([1, 2, 3])); + + let larger: Option<[i32; 100]> = super::shrink_array(values); + assert_eq!(larger, None); + } +} diff --git a/crates/divan_compat/divan_fork/src/stats/mod.rs b/crates/divan_compat/divan_fork/src/stats/mod.rs new file mode 100644 index 00000000..39d0d759 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/stats/mod.rs @@ -0,0 +1,61 @@ +//! Measurement statistics. + +use crate::{ + alloc::{AllocOpMap, AllocTally}, + counter::{KnownCounterKind, MaxCountUInt}, + time::FineDuration, +}; + +mod sample; + +pub(crate) use sample::*; + +/// Statistics from samples. +pub(crate) struct Stats { + /// Total number of samples taken. + pub sample_count: u32, + + /// Total number of iterations (currently `sample_count * `sample_size`). + pub iter_count: u64, + + /// Timing statistics. + pub time: StatsSet, + + /// Maximum allocated bytes and maximum number of allocations associated + /// with the corresponding samples for `time`. + pub max_alloc: AllocTally>, + + /// Allocation statistics associated with the corresponding samples for + /// `time`. + pub alloc_tallies: AllocOpMap>>, + + /// `Counter` counts associated with the corresponding samples for `time`. + pub counts: [Option>; KnownCounterKind::COUNT], +} + +impl Stats { + pub fn get_counts(&self, counter_kind: KnownCounterKind) -> Option<&StatsSet> { + self.counts[counter_kind as usize].as_ref() + } +} + +#[derive(Debug)] +pub(crate) struct StatsSet { + /// Associated with minimum amount of time taken by an iteration. + pub fastest: T, + + /// Associated with maximum amount of time taken by an iteration. + pub slowest: T, + + /// Associated with midpoint time taken by an iteration. + pub median: T, + + /// Associated with average time taken by all iterations. + pub mean: T, +} + +impl StatsSet { + pub fn is_zero(&self) -> bool { + self.fastest == 0.0 && self.slowest == 0.0 && self.median == 0.0 && self.mean == 0.0 + } +} diff --git a/crates/divan_compat/divan_fork/src/stats/sample.rs b/crates/divan_compat/divan_fork/src/stats/sample.rs new file mode 100644 index 00000000..b1e1727d --- /dev/null +++ b/crates/divan_compat/divan_fork/src/stats/sample.rs @@ -0,0 +1,80 @@ +use std::collections::HashMap; + +use crate::{ + alloc::ThreadAllocInfo, + counter::KnownCounterKind, + time::{FineDuration, Timer, Timestamp}, +}; + +/// Timing measurement. +pub(crate) struct TimeSample { + /// The time this sample took to run. + /// + /// This is gotten from [`RawSample`] with: + /// `end.duration_since(start, timer).clamp_to(timer.precision())`. + pub duration: FineDuration, +} + +/// Unprocessed measurement. +/// +/// This cannot be serialized because [`Timestamp`] is an implementation detail +/// for both the `Instant` and TSC timers. +pub(crate) struct RawSample { + pub start: Timestamp, + pub end: Timestamp, + pub timer: Timer, + pub alloc_info: ThreadAllocInfo, + pub counter_totals: [u128; KnownCounterKind::COUNT], +} + +impl RawSample { + /// Simply computes `end - start` without clamping to precision. + #[inline] + pub fn duration(&self) -> FineDuration { + self.end.duration_since(self.start, self.timer) + } +} + +/// Sample collection. +#[derive(Default)] +pub(crate) struct SampleCollection { + /// The number of iterations within each sample. + pub sample_size: u32, + + /// Collected timings. + pub time_samples: Vec, + + /// Allocation information associated with `time_samples` by index. + pub alloc_info_by_sample: HashMap, +} + +impl SampleCollection { + /// Discards all recorded data. + #[inline] + pub fn clear(&mut self) { + self.time_samples.clear(); + self.alloc_info_by_sample.clear(); + } + + /// Computes the total number of iterations across all samples. + /// + /// We use `u64` in case sample count and sizes are huge. + #[inline] + pub fn iter_count(&self) -> u64 { + self.sample_size as u64 * self.time_samples.len() as u64 + } + + /// Computes the total time across all samples. + #[inline] + pub fn total_duration(&self) -> FineDuration { + FineDuration { picos: self.time_samples.iter().map(|s| s.duration.picos).sum() } + } + + /// Returns all samples sorted by duration. + #[inline] + pub fn sorted_samples(&self) -> Vec<&TimeSample> { + let mut result: Vec<&TimeSample> = self.time_samples.iter().collect(); + result.sort_unstable_by_key(|s| s.duration); + result + } +} diff --git a/crates/divan_compat/divan_fork/src/thread_pool.rs b/crates/divan_compat/divan_fork/src/thread_pool.rs new file mode 100644 index 00000000..c607a936 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/thread_pool.rs @@ -0,0 +1,389 @@ +use std::{ + num::NonZeroUsize, + panic::AssertUnwindSafe, + ptr::NonNull, + sync::{ + atomic::{AtomicUsize, Ordering}, + mpsc, Mutex, PoisonError, + }, + thread::Thread, +}; + +use crate::util::{defer, sync::SyncWrap}; + +/// Single shared thread pool for running benchmarks on. +pub(crate) static BENCH_POOL: ThreadPool = ThreadPool::new(); + +/// Reusable threads for broadcasting tasks. +/// +/// This thread pool runs only a single task at a time, since only one benchmark +/// should run at a time. Invoking `broadcast` from two threads will cause one +/// thread to wait for the other to finish. +/// +/// # How It Works +/// +/// Upon calling `broadcast`: +/// +/// 1. The main thread creates a `Task`, which is a pointer to a `TaskShared` +/// pinned on the stack. `TaskShared` stores the function to run, along with +/// other fields for coordinating threads. +/// +/// 2. New threads are spawned if the requested amount is not available. Each +/// receives tasks over an associated channel. +/// +/// 3. The main thread sends the `Task` over the channels to the requested +/// amount of threads. Upon receiving the task, each auxiliary thread will +/// execute it and then decrement the task's reference count. +/// +/// 4. The main thread executes the `Task` like auxiliary threads. It then waits +/// until the reference count is 0 before returning. +pub(crate) struct ThreadPool { + threads: Mutex>>, +} + +impl ThreadPool { + const fn new() -> Self { + Self { threads: Mutex::new(Vec::new()) } + } + + /// Performs the given task and pushes the results into a `vec`. + #[inline] + pub fn par_extend(&self, vec: &mut Vec>, aux_threads: usize, task: F) + where + F: Sync + Fn(usize) -> T, + T: Sync + Send, + { + unsafe { + let old_len = vec.len(); + let additional = aux_threads + 1; + + vec.reserve_exact(additional); + vec.spare_capacity_mut().iter_mut().for_each(|val| { + val.write(None); + }); + vec.set_len(old_len + additional); + + let ptr = SyncWrap::new(vec.as_mut_ptr().add(old_len)); + + self.broadcast(aux_threads, move |index| { + ptr.add(index).write(Some(task(index))); + }); + } + } + + /// Performs the given task across the current thread and auxiliary worker + /// threads. + /// + /// This function returns once all threads complete the task. + #[inline] + pub fn broadcast(&self, aux_threads: usize, task: F) + where + F: Sync + Fn(usize), + { + // SAFETY: The `TaskShared` instance is guaranteed to be accessible to + // all threads until this function returns, because this thread waits + // until `TaskShared.ref_count` is 0 before continuing. + unsafe { + let task = TaskShared::new(aux_threads, task); + let task = Task { shared: NonNull::from(&task).cast() }; + + self.broadcast_task(aux_threads, task); + } + } + + /// Type-erased monomorphized implementation for `broadcast`. + unsafe fn broadcast_task(&self, aux_threads: usize, task: Task) { + // Send task to auxiliary threads. + if aux_threads > 0 { + let threads = &mut *self.threads.lock().unwrap_or_else(PoisonError::into_inner); + + // Spawn more threads if necessary. + if let Some(additional) = NonZeroUsize::new(aux_threads.saturating_sub(threads.len())) { + spawn(additional, threads); + } + + for thread in &threads[..aux_threads] { + thread.send(task).unwrap(); + } + } + + // Run the task on the main thread. + let main_result = std::panic::catch_unwind(AssertUnwindSafe(|| task.run(0))); + + // Wait for other threads to finish writing their results. + // + // SAFETY: The acquire memory ordering ensures that all writes performed + // by the task on other threads will become visible to this thread after + // returning from `broadcast`. + while task.shared.as_ref().ref_count.load(Ordering::Acquire) > 0 { + std::thread::park(); + } + + // Don't drop our result until other threads finish, in case the panic + // error's drop handler itself also panics. + drop(main_result); + } + + pub fn drop_threads(&self) { + *self.threads.lock().unwrap_or_else(PoisonError::into_inner) = Default::default(); + } + + #[cfg(test)] + fn aux_thread_count(&self) -> usize { + self.threads.lock().unwrap_or_else(PoisonError::into_inner).len() + } +} + +/// Type-erased function and metadata. +#[derive(Clone, Copy)] +struct Task { + shared: NonNull>, +} + +unsafe impl Send for Task {} +unsafe impl Sync for Task {} + +impl Task { + /// Runs this task on behalf of `thread_id`. + /// + /// # Safety + /// + /// The caller must ensure: + /// + /// - This task has not outlived the `TaskShared` it came from, or else + /// there will be a use-after-free. + /// + /// - `thread_id` is within the number of `broadcast` threads requested, so + /// that it can be used to index input or output buffers. + #[inline] + unsafe fn run(&self, thread_id: usize) { + let shared_ptr = self.shared.as_ptr(); + let shared = &*shared_ptr; + + (shared.task_fn_ptr)(shared_ptr.cast(), thread_id); + } +} + +/// Data stored on the main thread that gets shared with auxiliary threads. +/// +/// # Memory Layout +/// +/// Since the benchmark may have thrashed the cache, this type's fields are +/// ordered by usage order. This type is also placed on its own cache line. +#[repr(C)] +struct TaskShared { + /// Once an auxiliary thread sets `ref_count` to 0, it should notify the + /// main thread to wake up. + main_thread: Thread, + + /// The number of auxiliary threads executing the task. + /// + /// Once this is 0, the main thread can read any results the task produced. + ref_count: AtomicUsize, + + /// Performs `*result = Some(task_fn(thread))`. + task_fn_ptr: unsafe fn(task: *const TaskShared<()>, thread: usize), + + /// Stores the closure state of the provided task. + /// + /// This must be stored as the last field so that all other fields are in + /// the same place regardless of this field's type. + task_fn: F, +} + +impl TaskShared { + #[inline] + fn new(aux_threads: usize, task_fn: F) -> Self + where + F: Sync + Fn(usize), + { + unsafe fn call(task: *const TaskShared<()>, thread: usize) + where + F: Fn(usize), + { + let task_fn = &(*task.cast::>()).task_fn; + + task_fn(thread); + } + + Self { + main_thread: std::thread::current(), + ref_count: AtomicUsize::new(aux_threads), + task_fn_ptr: call::, + task_fn, + } + } +} + +/// Spawns N additional threads and appends their channels to the list. +/// +/// Threads are given names in the form of `divan-$INDEX`. +#[cold] +fn spawn(additional: NonZeroUsize, threads: &mut Vec>) { + let next_thread_id = threads.len() + 1; + + threads.extend((next_thread_id..(next_thread_id + additional.get())).map(|thread_id| { + // Create single-task channel. Unless another benchmark is running, the + // current thread will be immediately unblocked after the auxiliary + // thread accepts the task. + // + // This uses a rendezvous channel (capacity 0) instead of other standard + // library channels because it reduces memory usage by many kilobytes. + let (sender, receiver) = mpsc::sync_channel::(0); + + let work = move || { + // Abort the process if the caught panic error itself panics when + // dropped. + let panic_guard = defer(|| std::process::abort()); + + while let Ok(task) = receiver.recv() { + // Run the task on this auxiliary thread. + // + // SAFETY: The task is valid until `ref_count == 0`. + let result = + std::panic::catch_unwind(AssertUnwindSafe(|| unsafe { task.run(thread_id) })); + + // Decrement the `ref_count` count to notify the main thread + // that we finished our work. + // + // SAFETY: This release operation makes writes within the task + // become visible to the main thread. + unsafe { + // Clone the main thread's handle for unparking because the + // `TaskShared` will be invalidated when `ref_count` is 0. + let main_thread = task.shared.as_ref().main_thread.clone(); + + if task.shared.as_ref().ref_count.fetch_sub(1, Ordering::Release) == 1 { + main_thread.unpark(); + } + } + + // Don't drop our result until after notifying the main thread, + // in case the panic error's drop handler itself also panics. + drop(result); + } + + std::mem::forget(panic_guard); + }; + + std::thread::Builder::new() + .name(format!("divan-{thread_id}")) + .spawn(work) + .expect("failed to spawn thread"); + + sender + })); +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Make every thread write its ID to a buffer and then check that the + /// buffer contains all IDs. + #[test] + fn extend() { + static TEST_POOL: ThreadPool = ThreadPool::new(); + + fn test(aux_threads: usize, final_aux_threads: usize) { + let total_threads = aux_threads + 1; + + let mut results = Vec::new(); + let expected = (0..total_threads).map(Some).collect::>(); + + TEST_POOL.par_extend(&mut results, aux_threads, |index| index); + + assert_eq!(results, expected); + assert_eq!(TEST_POOL.aux_thread_count(), final_aux_threads); + } + + test(0, 0); + test(1, 1); + test(2, 2); + test(3, 3); + test(4, 4); + test(8, 8); + + // Decreasing auxiliary threads on later calls should still leave + // previously spawned threads running. + test(4, 8); + test(0, 8); + + // Silence Miri about leaking threads. + TEST_POOL.drop_threads(); + } + + /// Execute a task that takes longer on all other threads than the main + /// thread. + #[test] + fn broadcast_sleep() { + use std::time::Duration; + + static TEST_POOL: ThreadPool = ThreadPool::new(); + + TEST_POOL.broadcast(10, |thread_id| { + if thread_id > 0 { + std::thread::sleep(Duration::from_millis(10)); + } + }); + + // Silence Miri about leaking threads. + TEST_POOL.drop_threads(); + } + + /// Checks that thread ID 0 refers to the main thread. + #[test] + fn broadcast_thread_id() { + static TEST_POOL: ThreadPool = ThreadPool::new(); + + let main_thread = std::thread::current().id(); + + TEST_POOL.broadcast(10, |thread_id| { + let is_main = main_thread == std::thread::current().id(); + assert_eq!(is_main, thread_id == 0); + }); + + // Silence Miri about leaking threads. + TEST_POOL.drop_threads(); + } +} + +#[cfg(feature = "internal_benches")] +mod benches { + use super::*; + + fn aux_thread_counts() -> impl Iterator { + let mut available_parallelism = std::thread::available_parallelism().ok().map(|n| n.get()); + + let range = 0..=16; + + if let Some(n) = available_parallelism { + if range.contains(&n) { + available_parallelism = None; + } + } + + range.chain(available_parallelism) + } + + /// Benchmarks repeatedly using `ThreadPool` for the same number of threads + /// on every run. + #[crate::bench(crate = crate, args = aux_thread_counts())] + fn broadcast(bencher: crate::Bencher, aux_threads: usize) { + let pool = ThreadPool::new(); + let benched = move || pool.broadcast(aux_threads, crate::black_box_drop); + + // Warmup to spawn threads. + benched(); + + bencher.bench(benched); + } + + /// Benchmarks using `ThreadPool` once. + #[crate::bench(crate = crate, args = aux_thread_counts(), sample_size = 1)] + fn broadcast_once(bencher: crate::Bencher, aux_threads: usize) { + bencher + .with_inputs(ThreadPool::new) + .bench_refs(|pool| pool.broadcast(aux_threads, crate::black_box_drop)); + } +} diff --git a/crates/divan_compat/divan_fork/src/time/fence.rs b/crates/divan_compat/divan_fork/src/time/fence.rs new file mode 100644 index 00000000..7e123225 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/time/fence.rs @@ -0,0 +1,42 @@ +use std::sync::atomic; + +/// Prevents other operations from affecting timing measurements. +#[inline(always)] +pub fn full_fence() { + asm_fence(); + atomic::fence(atomic::Ordering::SeqCst); +} + +/// Prevents the compiler from reordering operations. +#[inline(always)] +pub fn compiler_fence() { + asm_fence(); + atomic::compiler_fence(atomic::Ordering::SeqCst); +} + +/// Stronger compiler fence on [platforms with stable `asm!`](https://doc.rust-lang.org/nightly/reference/inline-assembly.html). +/// +/// This prevents LLVM from removing loops or hoisting logic out of the +/// benchmark loop. +#[inline(always)] +fn asm_fence() { + // Miri does not support inline assembly. + if cfg!(miri) { + return; + } + + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "loongarch64", + ))] + // SAFETY: The inline assembly is a no-op. + unsafe { + // Preserve flags because we don't want to pessimize user logic. + std::arch::asm!("", options(nostack, preserves_flags)); + } +} diff --git a/crates/divan_compat/divan_fork/src/time/fine_duration.rs b/crates/divan_compat/divan_fork/src/time/fine_duration.rs new file mode 100644 index 00000000..566483ed --- /dev/null +++ b/crates/divan_compat/divan_fork/src/time/fine_duration.rs @@ -0,0 +1,467 @@ +use std::{fmt, ops, time::Duration}; + +use crate::util; + +/// [Picosecond](https://en.wikipedia.org/wiki/Picosecond)-precise [`Duration`]. +#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub(crate) struct FineDuration { + pub picos: u128, +} + +impl From for FineDuration { + #[inline] + fn from(duration: Duration) -> Self { + Self { + picos: duration + .as_nanos() + .checked_mul(1_000) + .unwrap_or_else(|| panic!("{duration:?} is too large to fit in `FineDuration`")), + } + } +} + +impl fmt::Display for FineDuration { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let sig_figs = f.precision().unwrap_or(4); + + let picos = self.picos; + let mut scale = TimeScale::from_picos(picos); + + // Prefer formatting picoseconds as nanoseconds if we can. This makes + // picoseconds easier to read because they are almost always alongside + // nanosecond-scale values. + if scale == TimeScale::PicoSec && sig_figs > 3 { + scale = TimeScale::NanoSec; + } + + let multiple: u128 = { + let sig_figs = u32::try_from(sig_figs).unwrap_or(u32::MAX); + 10_u128.saturating_pow(sig_figs) + }; + + // TODO: Format without heap allocation. + let mut str: String = match picos::DAY.checked_mul(multiple) { + Some(int_day) if picos >= int_day => { + // Format using integer representation to not lose precision. + (picos / picos::DAY).to_string() + } + _ => { + // Format using floating point representation. + + // Multiply to allow `sig_figs` digits of fractional precision. + let val = (((picos * multiple) / scale.picos()) as f64) / multiple as f64; + + util::fmt::format_f64(val, sig_figs) + } + }; + + str.push(' '); + str.push_str(scale.suffix()); + + // Fill up to specified width. + if let Some(fill_len) = f.width().and_then(|width| width.checked_sub(str.len())) { + match f.align() { + None | Some(fmt::Alignment::Left) => { + str.extend(std::iter::repeat(f.fill()).take(fill_len)); + } + _ => return Err(fmt::Error), + } + } + + f.write_str(&str) + } +} + +impl fmt::Debug for FineDuration { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self, f) + } +} + +impl ops::Add for FineDuration { + type Output = Self; + + #[inline] + fn add(self, other: Self) -> Self { + Self { picos: self.picos + other.picos } + } +} + +impl ops::AddAssign for FineDuration { + #[inline] + fn add_assign(&mut self, other: Self) { + self.picos += other.picos + } +} + +impl> ops::Div for FineDuration { + type Output = Self; + + #[inline] + fn div(self, count: I) -> Self { + Self { picos: self.picos / count.into() } + } +} + +impl FineDuration { + pub const ZERO: Self = Self { picos: 0 }; + + pub const MAX: Self = Self { picos: u128::MAX }; + + #[inline] + pub fn is_zero(&self) -> bool { + self.picos == 0 + } + + /// Round up to `other` if `self` is zero. + #[inline] + pub fn clamp_to(self, other: Self) -> Self { + if self.is_zero() { + other + } else { + self + } + } + + /// Returns the smaller non-zero value. + #[inline] + pub fn clamp_to_min(self, other: Self) -> Self { + if self.is_zero() { + other + } else if other.is_zero() { + self + } else { + self.min(other) + } + } +} + +mod picos { + pub const NANOS: u128 = 1_000; + pub const MICROS: u128 = 1_000 * NANOS; + pub const MILLIS: u128 = 1_000 * MICROS; + pub const SEC: u128 = 1_000 * MILLIS; + pub const MIN: u128 = 60 * SEC; + pub const HOUR: u128 = 60 * MIN; + pub const DAY: u128 = 24 * HOUR; +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +enum TimeScale { + PicoSec, + NanoSec, + MicroSec, + MilliSec, + Sec, + Min, + Hour, + Day, +} + +impl TimeScale { + #[cfg(test)] + const ALL: &'static [Self] = &[ + Self::PicoSec, + Self::NanoSec, + Self::MicroSec, + Self::MilliSec, + Self::Sec, + Self::Min, + Self::Hour, + Self::Day, + ]; + + /// Determines the scale of time for representing a number of picoseconds. + fn from_picos(picos: u128) -> Self { + use picos::*; + + if picos < NANOS { + Self::PicoSec + } else if picos < MICROS { + Self::NanoSec + } else if picos < MILLIS { + Self::MicroSec + } else if picos < SEC { + Self::MilliSec + } else if picos < MIN { + Self::Sec + } else if picos < HOUR { + Self::Min + } else if picos < DAY { + Self::Hour + } else { + Self::Day + } + } + + /// Returns the number of picoseconds needed to reach this scale. + fn picos(self) -> u128 { + use picos::*; + + match self { + Self::PicoSec => 1, + Self::NanoSec => NANOS, + Self::MicroSec => MICROS, + Self::MilliSec => MILLIS, + Self::Sec => SEC, + Self::Min => MIN, + Self::Hour => HOUR, + Self::Day => DAY, + } + } + + /// Returns the unit suffix. + fn suffix(self) -> &'static str { + match self { + Self::PicoSec => "ps", + Self::NanoSec => "ns", + Self::MicroSec => "µs", + Self::MilliSec => "ms", + Self::Sec => "s", + Self::Min => "m", + Self::Hour => "h", + Self::Day => "d", + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn clamp_to() { + #[track_caller] + fn test(a: u128, b: u128, expected: u128) { + assert_eq!( + FineDuration { picos: a }.clamp_to(FineDuration { picos: b }), + FineDuration { picos: expected } + ); + } + + test(0, 0, 0); + test(0, 1, 1); + test(0, 2, 2); + test(0, 3, 3); + + test(1, 0, 1); + test(1, 1, 1); + test(1, 2, 1); + test(1, 3, 1); + + test(2, 0, 2); + test(2, 1, 2); + test(2, 2, 2); + test(2, 3, 2); + + test(3, 0, 3); + test(3, 1, 3); + test(3, 2, 3); + test(3, 3, 3); + } + + #[test] + fn clamp_to_min() { + #[track_caller] + fn test(a: u128, b: u128, expected: u128) { + assert_eq!( + FineDuration { picos: a }.clamp_to_min(FineDuration { picos: b }), + FineDuration { picos: expected } + ); + } + + test(0, 0, 0); + test(0, 1, 1); + test(0, 2, 2); + test(0, 3, 3); + + test(1, 0, 1); + test(1, 1, 1); + test(1, 2, 1); + test(1, 3, 1); + + test(2, 0, 2); + test(2, 1, 1); + test(2, 2, 2); + test(2, 3, 2); + + test(3, 0, 3); + test(3, 1, 1); + test(3, 2, 2); + test(3, 3, 3); + } + + #[allow(clippy::zero_prefixed_literal)] + mod fmt { + use super::*; + + #[track_caller] + fn test(picos: u128, expected: &str) { + let duration = FineDuration { picos }; + assert_eq!(duration.to_string(), expected); + assert_eq!(format!("{duration:.4}"), expected); + assert_eq!(format!("{duration:<0}"), expected); + } + + macro_rules! assert_fmt_eq { + ($input:literal, $expected:literal) => { + assert_eq!(format!($input), format!($expected)); + }; + } + + #[test] + fn precision() { + for &scale in TimeScale::ALL { + let base_duration = FineDuration { picos: scale.picos() }; + let incr_duration = FineDuration { picos: scale.picos() + 1 }; + + if scale == TimeScale::PicoSec { + assert_eq!(format!("{base_duration:.0}"), "1 ps"); + assert_eq!(format!("{incr_duration:.0}"), "2 ps"); + } else { + let base_string = base_duration.to_string(); + assert_eq!(format!("{base_duration:.0}"), base_string); + assert_eq!(format!("{incr_duration:.0}"), base_string); + } + } + } + + #[test] + fn fill() { + for &scale in TimeScale::ALL { + // Picoseconds are formatted as nanoseconds by default. + if scale == TimeScale::PicoSec { + continue; + } + + let duration = FineDuration { picos: scale.picos() }; + let suffix = scale.suffix(); + let pad = " ".repeat(8 - suffix.len()); + + assert_fmt_eq!("{duration:<2}", "1 {suffix}"); + assert_fmt_eq!("{duration:<10}", "1 {suffix}{pad}"); + } + } + + #[test] + fn pico_sec() { + test(000, "0 ns"); + + test(001, "0.001 ns"); + test(010, "0.01 ns"); + test(100, "0.1 ns"); + + test(102, "0.102 ns"); + test(120, "0.12 ns"); + test(123, "0.123 ns"); + test(012, "0.012 ns"); + } + + #[test] + fn nano_sec() { + test(001_000, "1 ns"); + test(010_000, "10 ns"); + test(100_000, "100 ns"); + + test(100_002, "100 ns"); + test(100_020, "100 ns"); + test(100_200, "100.2 ns"); + test(102_000, "102 ns"); + test(120_000, "120 ns"); + + test(001_002, "1.002 ns"); + test(001_023, "1.023 ns"); + test(001_234, "1.234 ns"); + test(001_230, "1.23 ns"); + test(001_200, "1.2 ns"); + } + + #[test] + fn micro_sec() { + test(001_000_000, "1 µs"); + test(010_000_000, "10 µs"); + test(100_000_000, "100 µs"); + + test(100_000_002, "100 µs"); + test(100_000_020, "100 µs"); + test(100_000_200, "100 µs"); + test(100_002_000, "100 µs"); + test(100_020_000, "100 µs"); + test(100_200_000, "100.2 µs"); + test(102_000_000, "102 µs"); + + test(120_000_000, "120 µs"); + test(012_000_000, "12 µs"); + test(001_200_000, "1.2 µs"); + + test(001_020_000, "1.02 µs"); + test(001_002_000, "1.002 µs"); + test(001_000_200, "1 µs"); + test(001_000_020, "1 µs"); + test(001_000_002, "1 µs"); + + test(001_230_000, "1.23 µs"); + test(001_234_000, "1.234 µs"); + test(001_234_500, "1.234 µs"); + test(001_234_560, "1.234 µs"); + test(001_234_567, "1.234 µs"); + } + + #[test] + fn milli_sec() { + test(001_000_000_000, "1 ms"); + test(010_000_000_000, "10 ms"); + test(100_000_000_000, "100 ms"); + } + + #[test] + fn sec() { + test(picos::SEC, "1 s"); + test(picos::SEC * 10, "10 s"); + test(picos::SEC * 59, "59 s"); + + test(picos::MILLIS * 59_999, "59.99 s"); + } + + #[test] + fn min() { + test(picos::MIN, "1 m"); + test(picos::MIN * 10, "10 m"); + test(picos::MIN * 59, "59 m"); + + test(picos::MILLIS * 3_599_000, "59.98 m"); + test(picos::MILLIS * 3_599_999, "59.99 m"); + test(picos::HOUR - 1, "59.99 m"); + } + + #[test] + fn hour() { + test(picos::HOUR, "1 h"); + test(picos::HOUR * 10, "10 h"); + test(picos::HOUR * 23, "23 h"); + + test(picos::MILLIS * 86_300_000, "23.97 h"); + test(picos::MILLIS * 86_399_999, "23.99 h"); + test(picos::DAY - 1, "23.99 h"); + } + + #[test] + fn day() { + test(picos::DAY, "1 d"); + + test(picos::DAY + picos::DAY / 10, "1.1 d"); + test(picos::DAY + picos::DAY / 100, "1.01 d"); + test(picos::DAY + picos::DAY / 1000, "1.001 d"); + + test(picos::DAY * 000010, "10 d"); + test(picos::DAY * 000100, "100 d"); + test(picos::DAY * 001000, "1000 d"); + test(picos::DAY * 010000, "10000 d"); + test(picos::DAY * 100000, "100000 d"); + + test(u128::MAX / 1000, "3938453320844195178 d"); + test(u128::MAX, "3938453320844195178974 d"); + } + } +} diff --git a/crates/divan_compat/divan_fork/src/time/mod.rs b/crates/divan_compat/divan_fork/src/time/mod.rs new file mode 100644 index 00000000..4fbae76b --- /dev/null +++ b/crates/divan_compat/divan_fork/src/time/mod.rs @@ -0,0 +1,38 @@ +use std::time::Duration; + +pub mod fence; + +mod fine_duration; +mod timer; +mod timestamp; + +pub(crate) use fine_duration::*; +pub(crate) use timer::*; +pub(crate) use timestamp::*; + +/// Private-public trait for being polymorphic over `Duration`. +pub trait IntoDuration { + /// Converts into a `Duration`. + fn into_duration(self) -> Duration; +} + +impl IntoDuration for Duration { + #[inline] + fn into_duration(self) -> Duration { + self + } +} + +impl IntoDuration for u64 { + #[inline] + fn into_duration(self) -> Duration { + Duration::from_secs(self) + } +} + +impl IntoDuration for f64 { + #[inline] + fn into_duration(self) -> Duration { + Duration::from_secs_f64(self) + } +} diff --git a/crates/divan_compat/divan_fork/src/time/timer.rs b/crates/divan_compat/divan_fork/src/time/timer.rs new file mode 100644 index 00000000..9e6beb28 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/time/timer.rs @@ -0,0 +1,376 @@ +use std::{cmp::Ordering, num::NonZeroU64, sync::OnceLock}; + +use crate::{ + alloc::{AllocOp, ThreadAllocInfo}, + black_box, + time::{FineDuration, TscTimestamp, TscUnavailable, UntaggedTimestamp}, +}; + +/// Measures time. +#[derive(Clone, Copy, Default)] +pub(crate) enum Timer { + /// Operating system timer. + #[default] + Os, + + /// CPU timestamp counter. + Tsc { + /// [`TscTimestamp::frequency`]. + frequency: NonZeroU64, + }, +} + +impl Timer { + const COUNT: usize = 2; + + /// Returns all available timers. + #[cfg(test)] + pub fn available() -> Vec { + let mut timers = vec![Self::Os]; + + if let Ok(tsc) = Self::get_tsc() { + timers.push(tsc); + } + + timers + } + + /// Attempts to get the CPU timestamp counter. + #[inline] + pub fn get_tsc() -> Result { + Ok(Self::Tsc { frequency: TscTimestamp::frequency()? }) + } + + #[inline] + pub fn kind(self) -> TimerKind { + match self { + Self::Os => TimerKind::Os, + Self::Tsc { .. } => TimerKind::Tsc, + } + } + + /// Returns the smallest non-zero duration that this timer can measure. + /// + /// The result is cached. + pub fn precision(self) -> FineDuration { + static CACHED: [OnceLock; Timer::COUNT] = [OnceLock::new(), OnceLock::new()]; + + let cached = &CACHED[self.kind() as usize]; + + *cached.get_or_init(|| self.measure_precision()) + } + + fn measure_precision(self) -> FineDuration { + let timer_kind = self.kind(); + + // Start with the worst possible minimum. + let mut min_sample = FineDuration::MAX; + let mut seen_count = 0; + + // If timing in immediate succession fails to produce a non-zero sample, + // an artificial delay is added by looping. `usize` is intentionally + // used to make looping cheap. + let mut delay_len: usize = 0; + + loop { + for _ in 0..100 { + // Use `UntaggedTimestamp` to minimize overhead. + let sample_start: UntaggedTimestamp; + let sample_end: UntaggedTimestamp; + + if delay_len == 0 { + // Immediate succession. + sample_start = UntaggedTimestamp::start(timer_kind); + sample_end = UntaggedTimestamp::end(timer_kind); + } else { + // Add delay. + sample_start = UntaggedTimestamp::start(timer_kind); + for n in 0..delay_len { + crate::black_box(n); + } + sample_end = UntaggedTimestamp::end(timer_kind); + } + + // SAFETY: These values are guaranteed to be the correct variant + // because they were created from the same `timer_kind`. + let [sample_start, sample_end] = unsafe { + [sample_start.into_timestamp(timer_kind), sample_end.into_timestamp(timer_kind)] + }; + + let sample = sample_end.duration_since(sample_start, self); + + // Discard sample if irrelevant. + if sample.is_zero() { + continue; + } + + match sample.cmp(&min_sample) { + Ordering::Greater => { + // If we already delayed a lot, and not hit the seen + // count threshold, then use current minimum. + if delay_len > 100 { + return min_sample; + } + } + Ordering::Equal => { + seen_count += 1; + + // If we've seen this min 100 times, we have high + // confidence this is the smallest duration. + if seen_count >= 100 { + return min_sample; + } + } + Ordering::Less => { + min_sample = sample; + seen_count = 0; + } + } + } + + delay_len = delay_len.saturating_add(1); + } + } + + /// Returns the overheads added by the benchmarker. + /// + /// `min_time` and `max_time` do not consider this as benchmarking time. + pub fn bench_overheads(self) -> &'static TimedOverhead { + // Miri is slow, so don't waste time on this. + if cfg!(miri) { + return &TimedOverhead::ZERO; + } + + static CACHED: [OnceLock; Timer::COUNT] = [OnceLock::new(), OnceLock::new()]; + + let cached = &CACHED[self.kind() as usize]; + + cached.get_or_init(|| TimedOverhead { + sample_loop: self.sample_loop_overhead(), + tally_alloc: self.measure_tally_alloc_overhead(), + tally_dealloc: self.measure_tally_dealloc_overhead(), + tally_realloc: self.measure_tally_realloc_overhead(), + }) + } + + /// Returns the per-iteration overhead of the benchmarking sample loop. + fn sample_loop_overhead(self) -> FineDuration { + // Miri is slow, so don't waste time on this. + if cfg!(miri) { + return FineDuration::default(); + } + + static CACHED: [OnceLock; Timer::COUNT] = [OnceLock::new(), OnceLock::new()]; + + let cached = &CACHED[self.kind() as usize]; + + *cached.get_or_init(|| self.measure_sample_loop_overhead()) + } + + /// Calculates the per-iteration overhead of the benchmarking sample loop. + fn measure_sample_loop_overhead(self) -> FineDuration { + let timer_kind = self.kind(); + + let sample_count: usize = 100; + let sample_size: usize = 10_000; + + // The minimum non-zero sample. + let mut min_sample = FineDuration::default(); + + for _ in 0..sample_count { + let start = UntaggedTimestamp::start(timer_kind); + + for i in 0..sample_size { + _ = crate::black_box(i); + } + + let end = UntaggedTimestamp::end(timer_kind); + + // SAFETY: These values are guaranteed to be the correct variant because + // they were created from the same `timer_kind`. + let [start, end] = + unsafe { [start.into_timestamp(timer_kind), end.into_timestamp(timer_kind)] }; + + let mut sample = end.duration_since(start, self); + sample.picos /= sample_size as u128; + + min_sample = min_sample.clamp_to_min(sample); + } + + min_sample + } + + fn measure_tally_alloc_overhead(self) -> FineDuration { + let size = black_box(0); + self.measure_alloc_info_overhead(|alloc_info| alloc_info.tally_alloc(size)) + } + + fn measure_tally_dealloc_overhead(self) -> FineDuration { + let size = black_box(0); + self.measure_alloc_info_overhead(|alloc_info| alloc_info.tally_dealloc(size)) + } + + fn measure_tally_realloc_overhead(self) -> FineDuration { + let new_size = black_box(0); + let old_size = black_box(0); + self.measure_alloc_info_overhead(|alloc_info| alloc_info.tally_realloc(old_size, new_size)) + } + + // SAFETY: This function is not reentrant. Calling it within `operation` + // would cause aliasing of `ThreadAllocInfo::current`. + fn measure_alloc_info_overhead(self, operation: impl Fn(&mut ThreadAllocInfo)) -> FineDuration { + // Initialize the current thread's alloc info. + let alloc_info = ThreadAllocInfo::current(); + + let sample_count = 100; + let sample_size = 50_000; + + let result = self.measure_min_time(sample_count, sample_size, || { + if let Some(mut alloc_info) = ThreadAllocInfo::try_current() { + // SAFETY: We have exclusive access. + operation(unsafe { alloc_info.as_mut() }); + } + }); + + // Clear alloc info. + if let Some(mut alloc_info) = alloc_info { + // SAFETY: We have exclusive access. + let alloc_info = unsafe { alloc_info.as_mut() }; + + alloc_info.clear(); + } + + result + } + + /// Calculates the smallest non-zero time to perform an operation. + fn measure_min_time( + self, + sample_count: usize, + sample_size: usize, + operation: impl Fn(), + ) -> FineDuration { + let timer_kind = self.kind(); + + let loop_overhead = self.sample_loop_overhead(); + let mut min_sample = FineDuration::default(); + + for _ in 0..sample_count { + let start = UntaggedTimestamp::start(timer_kind); + + for _ in 0..sample_size { + operation(); + } + + let end = UntaggedTimestamp::end(timer_kind); + + // SAFETY: These values are guaranteed to be the correct variant + // because they were created from the same `timer_kind`. + let [start, end] = + unsafe { [start.into_timestamp(timer_kind), end.into_timestamp(timer_kind)] }; + + let mut sample = end.duration_since(start, self); + sample.picos /= sample_size as u128; + + // Remove benchmarking loop overhead. + sample.picos = sample.picos.saturating_sub(loop_overhead.picos); + + min_sample = min_sample.clamp_to_min(sample); + } + + min_sample + } +} + +/// [`Timer`] kind. +#[derive(Clone, Copy, Default)] +pub(crate) enum TimerKind { + /// Operating system timer. + #[default] + Os, + + /// CPU timestamp counter. + Tsc, +} + +/// The measured overhead of various benchmarking operations. +pub(crate) struct TimedOverhead { + pub sample_loop: FineDuration, + pub tally_alloc: FineDuration, + pub tally_dealloc: FineDuration, + pub tally_realloc: FineDuration, +} + +impl TimedOverhead { + pub const ZERO: Self = Self { + sample_loop: FineDuration::ZERO, + tally_alloc: FineDuration::ZERO, + tally_dealloc: FineDuration::ZERO, + tally_realloc: FineDuration::ZERO, + }; + + pub fn total_overhead(&self, sample_size: u32, alloc_info: &ThreadAllocInfo) -> FineDuration { + let sample_loop_overhead = self.sample_loop.picos.saturating_mul(sample_size as u128); + + let tally_alloc_overhead = self + .tally_alloc + .picos + .saturating_mul(alloc_info.tallies.get(AllocOp::Alloc).count as u128); + + let tally_dealloc_overhead = self + .tally_dealloc + .picos + .saturating_mul(alloc_info.tallies.get(AllocOp::Dealloc).count as u128); + + let tally_realloc_overhead = self.tally_realloc.picos.saturating_mul( + alloc_info.tallies.get(AllocOp::Grow).count as u128 + + alloc_info.tallies.get(AllocOp::Shrink).count as u128, + ); + + FineDuration { + picos: sample_loop_overhead + .saturating_add(tally_alloc_overhead) + .saturating_add(tally_dealloc_overhead) + .saturating_add(tally_realloc_overhead), + } + } +} + +#[cfg(feature = "internal_benches")] +mod benches { + use super::*; + + #[crate::bench(crate = crate)] + fn get_tsc() -> Result { + Timer::get_tsc() + } + + mod measure { + use super::*; + + #[crate::bench(crate = crate)] + fn precision() -> FineDuration { + Timer::Os.measure_precision() + } + + #[crate::bench(crate = crate)] + fn sample_loop_overhead() -> FineDuration { + Timer::Os.measure_sample_loop_overhead() + } + + #[crate::bench(crate = crate)] + fn tally_alloc_overhead() -> FineDuration { + Timer::Os.measure_tally_alloc_overhead() + } + + #[crate::bench(crate = crate)] + fn tally_dealloc_overhead() -> FineDuration { + Timer::Os.measure_tally_dealloc_overhead() + } + + #[crate::bench(crate = crate)] + fn tally_realloc_overhead() -> FineDuration { + Timer::Os.measure_tally_realloc_overhead() + } + } +} diff --git a/crates/divan_compat/divan_fork/src/time/timestamp/mod.rs b/crates/divan_compat/divan_fork/src/time/timestamp/mod.rs new file mode 100644 index 00000000..0124694c --- /dev/null +++ b/crates/divan_compat/divan_fork/src/time/timestamp/mod.rs @@ -0,0 +1,88 @@ +use std::time::Instant; + +use crate::time::{fence, FineDuration, Timer, TimerKind}; + +mod tsc; + +pub(crate) use tsc::*; + +/// A measurement timestamp. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) enum Timestamp { + /// Time provided by the operating system. + Os(Instant), + + /// [CPU timestamp counter](https://en.wikipedia.org/wiki/Time_Stamp_Counter). + Tsc(TscTimestamp), +} + +impl Timestamp { + #[inline(always)] + pub fn start(timer_kind: TimerKind) -> Self { + fence::full_fence(); + let value = match timer_kind { + TimerKind::Os => Self::Os(Instant::now()), + TimerKind::Tsc => Self::Tsc(TscTimestamp::start()), + }; + fence::compiler_fence(); + value + } + + pub fn duration_since(self, earlier: Self, timer: Timer) -> FineDuration { + match (self, earlier, timer) { + (Self::Os(this), Self::Os(earlier), Timer::Os) => this.duration_since(earlier).into(), + (Self::Tsc(this), Self::Tsc(earlier), Timer::Tsc { frequency }) => { + this.duration_since(earlier, frequency) + } + _ => unreachable!(), + } + } +} + +/// A [`Timestamp`] where the variant is determined by an external source of +/// truth. +/// +/// By making the variant tag external to this type, we produce more optimized +/// code by: +/// - Reusing the same condition variable +/// - Reducing the size of the timestamp variables +#[derive(Clone, Copy)] +pub(crate) union UntaggedTimestamp { + /// [`Timestamp::Os`]. + pub os: Instant, + + /// [`Timestamp::Tsc`]. + pub tsc: TscTimestamp, +} + +impl UntaggedTimestamp { + #[inline(always)] + pub fn start(timer_kind: TimerKind) -> Self { + fence::full_fence(); + let value = match timer_kind { + TimerKind::Os => Self { os: Instant::now() }, + TimerKind::Tsc => Self { tsc: TscTimestamp::start() }, + }; + fence::compiler_fence(); + value + } + + #[inline(always)] + pub fn end(timer_kind: TimerKind) -> Self { + fence::compiler_fence(); + let value = match timer_kind { + TimerKind::Os => Self { os: Instant::now() }, + TimerKind::Tsc => Self { tsc: TscTimestamp::end() }, + }; + fence::full_fence(); + value + } + + #[inline(always)] + pub unsafe fn into_timestamp(self, timer_kind: TimerKind) -> Timestamp { + match timer_kind { + TimerKind::Os => Timestamp::Os(self.os), + TimerKind::Tsc => Timestamp::Tsc(self.tsc), + } + } +} diff --git a/crates/divan_compat/divan_fork/src/time/timestamp/tsc/aarch64.rs b/crates/divan_compat/divan_fork/src/time/timestamp/tsc/aarch64.rs new file mode 100644 index 00000000..deff9ee4 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/time/timestamp/tsc/aarch64.rs @@ -0,0 +1,37 @@ +use std::arch::asm; + +use crate::time::TscUnavailable; + +/// Reads the [`cntfrq_el0`](https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/CNTFRQ-EL0--Counter-timer-Frequency-register?lang=en) +/// register. +/// +/// This value is set on system initialization and thus does not change between +/// reads. +#[inline] +pub(crate) fn frequency() -> Result { + unsafe { + let frequency: u64; + asm!( + "mrs {}, cntfrq_el0", + out(reg) frequency, + options(nomem, nostack, preserves_flags, pure), + ); + Ok(frequency) + } +} + +/// Reads the [`cntvct_el0`](https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/CNTVCT-EL0--Counter-timer-Virtual-Count-register?lang=en) +/// register. +#[inline(always)] +pub(crate) fn timestamp() -> u64 { + unsafe { + let timestamp: u64; + asm!( + "mrs {}, cntvct_el0", + out(reg) timestamp, + // Leave off `nomem` because this should be a compiler fence. + options(nostack, preserves_flags), + ); + timestamp + } +} diff --git a/crates/divan_compat/divan_fork/src/time/timestamp/tsc/mod.rs b/crates/divan_compat/divan_fork/src/time/timestamp/tsc/mod.rs new file mode 100644 index 00000000..c8f2455f --- /dev/null +++ b/crates/divan_compat/divan_fork/src/time/timestamp/tsc/mod.rs @@ -0,0 +1,112 @@ +use std::{fmt, num::NonZeroU64}; + +use crate::time::FineDuration; + +#[cfg(target_arch = "aarch64")] +#[path = "aarch64.rs"] +mod arch; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[path = "x86.rs"] +mod arch; + +/// [CPU timestamp counter](https://en.wikipedia.org/wiki/Time_Stamp_Counter). +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub(crate) struct TscTimestamp { + pub value: u64, +} + +impl TscTimestamp { + /// Gets the timestamp frequency. + /// + /// On AArch64, this simply reads `cntfrq_el0`. On x86, this measures the + /// TSC frequency. + #[inline] + #[allow(unreachable_code)] + pub fn frequency() -> Result { + // Miri does not support inline assembly. + #[cfg(miri)] + return Err(TscUnavailable::Unimplemented); + + #[cfg(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"))] + return NonZeroU64::new(arch::frequency()?).ok_or(TscUnavailable::ZeroFrequency); + + Err(TscUnavailable::Unimplemented) + } + + /// Reads the timestamp counter. + #[inline(always)] + pub fn start() -> Self { + #[allow(unused)] + let value = 0; + + #[cfg(target_arch = "aarch64")] + let value = arch::timestamp(); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + let value = arch::start_timestamp(); + + Self { value } + } + + /// Reads the timestamp counter. + #[inline(always)] + pub fn end() -> Self { + #[allow(unused)] + let value = 0; + + #[cfg(target_arch = "aarch64")] + let value = arch::timestamp(); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + let value = arch::end_timestamp(); + + Self { value } + } + + pub fn duration_since(self, earlier: Self, frequency: NonZeroU64) -> FineDuration { + const PICOS: u128 = 1_000_000_000_000; + + let Some(diff) = self.value.checked_sub(earlier.value) else { + return Default::default(); + }; + + FineDuration { picos: (diff as u128 * PICOS) / frequency.get() as u128 } + } +} + +/// Reason for why the timestamp counter cannot be used. +#[derive(Clone, Copy)] +pub(crate) enum TscUnavailable { + /// Not yet implemented for this platform. + Unimplemented, + + /// Got a frequency of 0. + ZeroFrequency, + + /// Missing the appropriate instructions. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + MissingInstructions, + + /// The timestamp counter is not guaranteed to be constant. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + VariableFrequency, +} + +impl fmt::Display for TscUnavailable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let reason = match self { + Self::Unimplemented => "unimplemented", + Self::ZeroFrequency => "zero TSC frequency", + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Self::MissingInstructions => "missing instructions", + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Self::VariableFrequency => "variable TSC frequency", + }; + + f.write_str(reason) + } +} diff --git a/crates/divan_compat/divan_fork/src/time/timestamp/tsc/x86.rs b/crates/divan_compat/divan_fork/src/time/timestamp/tsc/x86.rs new file mode 100644 index 00000000..d1df9d71 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/time/timestamp/tsc/x86.rs @@ -0,0 +1,273 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86; + +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64 as x86; + +use std::time::{Duration, Instant}; + +use crate::time::{fence, TscUnavailable}; + +#[inline(always)] +pub(crate) fn start_timestamp() -> u64 { + // Serialize previous operations before `rdtsc` to ensure they are not + // inside the timed section. + util::lfence(); + + let tsc = util::rdtsc(); + + // Serialize `rdtsc` before any measured code. + util::lfence(); + + tsc +} + +#[inline(always)] +pub(crate) fn end_timestamp() -> u64 { + // `rdtscp` is serialized after all previous operations. + let tsc = util::rdtscp(); + + // Serialize `rdtscp` before any subsequent code. + util::lfence(); + + tsc +} + +pub(crate) fn frequency() -> Result { + if !util::tsc_is_available() { + return Err(TscUnavailable::MissingInstructions); + } + + if !util::tsc_is_invariant() { + return Err(TscUnavailable::VariableFrequency); + } + + let nominal = nominal_frequency(); + let measured = measure::measure_frequency(); + + // Use the nominal frequency if within 0.1% of the measured frequency. + // + // The nominal frequency is used for getting an exact value if the measured + // frequency is slightly off. It is not blindly trusted because it may not + // match the TSC frequency. + if let Some(nominal) = nominal { + if measured * 0.999 < nominal && nominal < measured * 1.001 { + return Ok(nominal.round() as u64); + } + } + + Ok(measured.round() as u64) +} + +/// Parses the CPU frequency in the brand name, e.g. "2.50GHz". +fn nominal_frequency() -> Option { + let name = util::cpu_name()?; + let name = { + let len = name.iter().position(|&ch| ch == 0).unwrap_or(name.len()); + std::str::from_utf8(&name[..len]).ok()? + }; + + #[rustfmt::skip] + let frequencies = [ + ("MHz", 1e6), + ("GHz", 1e9), + ("THz", 1e12), + ]; + + for (unit, scale) in frequencies { + let Some(unit_start) = name.find(unit) else { + continue; + }; + + let pre_unit = &name[..unit_start]; + let num = match pre_unit.rsplit_once(' ') { + Some((_, num)) => num, + None => pre_unit, + }; + + if let Ok(num) = num.parse::() { + return Some(num * scale); + }; + } + + None +} + +mod util { + use super::*; + + #[inline(always)] + pub fn rdtsc() -> u64 { + fence::compiler_fence(); + + // SAFETY: Reading the TSC is memory safe. + let tsc = unsafe { x86::_rdtsc() }; + + fence::compiler_fence(); + tsc + } + + #[inline(always)] + pub fn rdtscp() -> u64 { + fence::compiler_fence(); + + // SAFETY: Reading the TSC is memory safe. + let tsc = unsafe { x86::__rdtscp(&mut 0) }; + + fence::compiler_fence(); + tsc + } + + #[inline(always)] + pub fn lfence() { + // SAFETY: A load fence is memory safe. + unsafe { x86::_mm_lfence() } + } + + #[inline] + fn cpuid(leaf: u32) -> x86::CpuidResult { + // SAFETY: `cpuid` is never unsafe to call. + unsafe { x86::__cpuid(leaf) } + } + + /// Invokes CPUID and converts its output registers to an ordered array. + #[inline] + fn cpuid_array(leaf: u32) -> [u32; 4] { + let cpuid = cpuid(leaf); + [cpuid.eax, cpuid.ebx, cpuid.ecx, cpuid.edx] + } + + /// Returns `true` if the given CPUID leaf is available. + #[inline] + fn cpuid_has_leaf(leaf: u32) -> bool { + cpuid(0x8000_0000).eax >= leaf + } + + /// Returns `true` if CPUID indicates that the `rdtsc` and `rdtscp` + /// instructions are available. + #[inline] + pub fn tsc_is_available() -> bool { + let bits = cpuid(0x8000_0001).edx; + + let rdtsc = 1 << 4; + let rdtscp = 1 << 27; + + bits & (rdtsc | rdtscp) != 0 + } + + /// Returns `true` if CPUID indicates that the timestamp counter has a + /// constant frequency. + #[inline] + pub fn tsc_is_invariant() -> bool { + let leaf = 0x8000_0007; + + if !cpuid_has_leaf(leaf) { + return false; + } + + cpuid(leaf).edx & (1 << 8) != 0 + } + + /// Returns the processor model name as a null-terminated ASCII string. + pub fn cpu_name() -> Option<[u8; 48]> { + if !cpuid_has_leaf(0x8000_0004) { + return None; + } + + #[rustfmt::skip] + let result = [ + cpuid_array(0x8000_0002), + cpuid_array(0x8000_0003), + cpuid_array(0x8000_0004), + ]; + + // SAFETY: Converting from `u32` to bytes. + Some(unsafe { std::mem::transmute(result) }) + } +} + +mod measure { + use super::*; + + /// Returns the TSC frequency by measuring it. + pub fn measure_frequency() -> f64 { + const TRIES: usize = 8; + + // Start with delay of 1ms up to 256ms (2^TRIES). + let mut delay_ms = 1; + + let mut prev_measure = f64::NEG_INFINITY; + let mut measures = [0.0; TRIES]; + + for slot in &mut measures { + let measure = measure_frequency_once(Duration::from_millis(delay_ms)); + + // This measurement is sufficiently accurate if within 0.1% of the + // previous. + if measure * 0.999 < prev_measure && prev_measure < measure * 1.001 { + return measure; + } + + *slot = measure; + prev_measure = measure; + + delay_ms *= 2; + } + + // If no frequencies were within 0.1% of each other, find the frequency + // with the smallest delta. + let mut min_delta = f64::INFINITY; + let mut result_index = 0; + + for i in 0..TRIES { + for j in (i + 1)..TRIES { + let delta = (measures[i] - measures[j]).abs(); + + if delta < min_delta { + min_delta = delta; + result_index = i; + } + } + } + + measures[result_index] + } + + fn measure_frequency_once(delay: Duration) -> f64 { + let (start_tsc, start_instant) = tsc_instant_pair(); + std::thread::sleep(delay); + let (end_tsc, end_instant) = tsc_instant_pair(); + + let elapsed_tsc = end_tsc.saturating_sub(start_tsc); + let elapsed_duration = end_instant.duration_since(start_instant); + + (elapsed_tsc as f64 / elapsed_duration.as_nanos() as f64) * 1e9 + } + + /// Returns a timestamp/instant pair that has a small latency between + /// getting the two values. + fn tsc_instant_pair() -> (u64, Instant) { + let mut best_latency = Duration::MAX; + let mut best_pair = (0, Instant::now()); + + // Make up to 100 attempts to get a low latency pair. + for _ in 0..100 { + let instant = Instant::now(); + let tsc = util::rdtsc(); + let latency = instant.elapsed(); + + let pair = (tsc, instant); + + if latency.is_zero() { + return pair; + } + + if latency < best_latency { + best_latency = latency; + best_pair = pair; + } + } + + best_pair + } +} diff --git a/crates/divan_compat/divan_fork/src/tree_painter.rs b/crates/divan_compat/divan_fork/src/tree_painter.rs new file mode 100644 index 00000000..7e5d668f --- /dev/null +++ b/crates/divan_compat/divan_fork/src/tree_painter.rs @@ -0,0 +1,517 @@ +//! Happy little trees. + +use std::{io::Write, iter::repeat}; + +use crate::{ + alloc::{AllocOp, AllocTally}, + counter::{AnyCounter, BytesFormat, KnownCounterKind}, + stats::{Stats, StatsSet}, + util, +}; + +const TREE_COL_BUF: usize = 2; + +/// Paints tree-style output using box-drawing characters. +pub(crate) struct TreePainter { + /// The maximum number of characters taken by a name and its prefix. Emitted + /// information should be left-padded to start at this column. + max_name_span: usize, + + column_widths: [usize; TreeColumn::COUNT], + + depth: usize, + + /// The current prefix to the name and content, e.g. + /// │ │ for three levels of nesting with the second level + /// being on the last node. + current_prefix: String, + + /// Buffer for writing to before printing to stdout. + write_buf: String, +} + +impl TreePainter { + pub fn new(max_name_span: usize, column_widths: [usize; TreeColumn::COUNT]) -> Self { + Self { + max_name_span, + column_widths, + depth: 0, + current_prefix: String::new(), + write_buf: String::new(), + } + } +} + +impl TreePainter { + /// Enter a parent node. + pub fn start_parent(&mut self, name: &str, is_last: bool) { + let is_top_level = self.depth == 0; + let has_columns = self.has_columns(); + + let buf = &mut self.write_buf; + buf.clear(); + + let branch = if is_top_level { + "" + } else if !is_last { + "├─ " + } else { + "╰─ " + }; + buf.extend([self.current_prefix.as_str(), branch, name]); + + // Right-pad name if `has_columns` + if has_columns { + let max_span = self.max_name_span; + let buf_len = buf.chars().count(); + let pad_len = TREE_COL_BUF + max_span.saturating_sub(buf_len); + buf.extend(repeat(' ').take(pad_len)); + + if buf_len > max_span { + self.max_name_span = buf_len; + } + } + + // Write column headings. + if has_columns && is_top_level { + let names = TreeColumnData::from_fn(TreeColumn::name); + names.write(buf, &mut self.column_widths); + } + + // Write column spacers. + if has_columns && !is_top_level { + TreeColumnData([""; TreeColumn::COUNT]).write(buf, &mut self.column_widths); + } + + println!("{buf}"); + + self.depth += 1; + + if !is_top_level { + self.current_prefix.push_str(if !is_last { "│ " } else { " " }); + } + } + + /// Exit the current parent node. + pub fn finish_parent(&mut self) { + self.depth -= 1; + + // Improve legibility for multiple top-level parents. + if self.depth == 0 { + println!(); + } + + // The prefix is extended by 3 `char`s at a time. + let new_prefix_len = { + let mut iter = self.current_prefix.chars(); + _ = iter.by_ref().rev().nth(2); + iter.as_str().len() + }; + self.current_prefix.truncate(new_prefix_len); + } + + /// Indicate that the next child node was ignored. + /// + /// This semantically combines start/finish operations. + pub fn ignore_leaf(&mut self, name: &str, is_last: bool) { + let has_columns = self.has_columns(); + + let buf = &mut self.write_buf; + buf.clear(); + + let branch = if !is_last { "├─ " } else { "╰─ " }; + buf.extend([self.current_prefix.as_str(), branch, name]); + + right_pad_buffer(buf, &mut self.max_name_span); + + if has_columns { + TreeColumnData::from_first("(ignored)").write(buf, &mut self.column_widths); + } else { + buf.push_str("(ignored)"); + } + + println!("{buf}"); + } + + /// Enter a leaf node. + pub fn start_leaf(&mut self, name: &str, is_last: bool) { + let has_columns = self.has_columns(); + + let buf = &mut self.write_buf; + buf.clear(); + + let branch = if !is_last { "├─ " } else { "╰─ " }; + buf.extend([self.current_prefix.as_str(), branch, name]); + + // Right-pad buffer if this leaf will have info displayed. + if has_columns { + let max_span = self.max_name_span; + let buf_len = buf.chars().count(); + let pad_len = TREE_COL_BUF + max_span.saturating_sub(buf_len); + buf.extend(repeat(' ').take(pad_len)); + + if buf_len > max_span { + self.max_name_span = buf_len; + } + } + + print!("{buf}"); + _ = std::io::stdout().flush(); + } + + /// Exit the current leaf node. + pub fn finish_empty_leaf(&mut self) { + println!(); + } + + /// Exit the current leaf node, emitting statistics. + pub fn finish_leaf(&mut self, is_last: bool, stats: &Stats, bytes_format: BytesFormat) { + let prep_buffer = |buf: &mut String, max_span: &mut usize| { + buf.clear(); + buf.push_str(&self.current_prefix); + + if !is_last { + buf.push('│'); + } + + right_pad_buffer(buf, max_span); + }; + + let buf = &mut self.write_buf; + buf.clear(); + + // Serialize max alloc counts and sizes early so we can resize columns + // early. + let serialized_max_alloc_counts = if stats.max_alloc.size.is_zero() { + None + } else { + Some(TreeColumn::ALL.map(|column| { + let Some(&max_alloc_count) = column.get_stat(&stats.max_alloc.count) else { + return String::new(); + }; + + let prefix = if column.is_first() { " " } else { "" }; + format!("{prefix}{}", util::fmt::format_f64(max_alloc_count, 4)) + })) + }; + + let serialized_max_alloc_sizes = if stats.max_alloc.size.is_zero() { + None + } else { + Some(TreeColumn::ALL.map(|column| { + let Some(&max_alloc_size) = column.get_stat(&stats.max_alloc.size) else { + return String::new(); + }; + + let prefix = if column.is_first() { " " } else { "" }; + format!("{prefix}{}", util::fmt::format_bytes(max_alloc_size, 4, bytes_format)) + })) + }; + + // Serialize alloc tallies early so we can resize columns early. + let serialized_alloc_tallies = AllocOp::ALL.map(|op| { + let tally = stats.alloc_tallies.get(op); + + if tally.is_zero() { + return None; + } + + let column_tallies = TreeColumn::ALL.map(|column| { + let prefix = if column.is_first() { " " } else { "" }; + + let tally = AllocTally { + count: column.get_stat(&tally.count).copied()?, + size: column.get_stat(&tally.size).copied()?, + }; + + Some((prefix, tally)) + }); + + Some(AllocTally { + count: column_tallies.map(|tally| { + if let Some((prefix, tally)) = tally { + format!("{prefix}{}", util::fmt::format_f64(tally.count, 4)) + } else { + String::new() + } + }), + size: column_tallies.map(|tally| { + if let Some((prefix, tally)) = tally { + format!("{prefix}{}", util::fmt::format_bytes(tally.size, 4, bytes_format)) + } else { + String::new() + } + }), + }) + }); + + // Serialize counter stats early so we can resize columns early. + let serialized_counters = KnownCounterKind::ALL.map(|counter_kind| { + let counter_stats = stats.get_counts(counter_kind); + + TreeColumn::ALL + .map(|column| -> Option { + let count = *column.get_stat(counter_stats?)?; + let time = *column.get_stat(&stats.time)?; + + Some( + AnyCounter::known(counter_kind, count) + .display_throughput(time, bytes_format) + .to_string(), + ) + }) + .map(Option::unwrap_or_default) + }); + + // Set column widths based on serialized strings. + for column in TreeColumn::time_stats() { + let width = &mut self.column_widths[column as usize]; + + let mut update_width = |s: &str| { + *width = (*width).max(s.chars().count()); + }; + + for counter in &serialized_counters { + update_width(&counter[column as usize]); + } + + let serialized_max_alloc_counts = serialized_max_alloc_counts.iter().flatten(); + let serialized_max_alloc_sizes = serialized_max_alloc_sizes.iter().flatten(); + for s in serialized_max_alloc_counts.chain(serialized_max_alloc_sizes) { + update_width(s); + } + + for s in serialized_alloc_tallies + .iter() + .flatten() + .flat_map(AllocTally::as_array) + .map(|values| &values[column as usize]) + { + update_width(s); + } + } + + // Write time stats with iter and sample counts. + TreeColumnData::from_fn(|column| -> String { + let stat: &dyn ToString = match column { + TreeColumn::Fastest => &stats.time.fastest, + TreeColumn::Slowest => &stats.time.slowest, + TreeColumn::Median => &stats.time.median, + TreeColumn::Mean => &stats.time.mean, + TreeColumn::Samples => &stats.sample_count, + TreeColumn::Iters => &stats.iter_count, + }; + stat.to_string() + }) + .as_ref::() + .write(buf, &mut self.column_widths); + + println!("{buf}"); + + // Write counter stats. + let counter_stats = serialized_counters.map(TreeColumnData); + for counter_kind in KnownCounterKind::ALL { + let counter_stats = counter_stats[counter_kind as usize].as_ref::(); + + // Skip empty rows. + if counter_stats.0.iter().all(|s| s.is_empty()) { + continue; + } + + prep_buffer(buf, &mut self.max_name_span); + + counter_stats.write(buf, &mut self.column_widths); + println!("{buf}"); + } + + // Write max allocated bytes. + if serialized_max_alloc_counts.is_some() || serialized_max_alloc_sizes.is_some() { + prep_buffer(buf, &mut self.max_name_span); + + TreeColumnData::from_first("max alloc:").write(buf, &mut self.column_widths); + println!("{buf}"); + + for serialized in + [serialized_max_alloc_counts.as_ref(), serialized_max_alloc_sizes.as_ref()] + .into_iter() + .flatten() + { + prep_buffer(buf, &mut self.max_name_span); + + TreeColumnData::from_fn(|column| serialized[column as usize].as_str()) + .write(buf, &mut self.column_widths); + + println!("{buf}"); + } + } + + // Write allocation tallies. + for op in [AllocOp::Alloc, AllocOp::Dealloc, AllocOp::Grow, AllocOp::Shrink] { + let Some(tallies) = &serialized_alloc_tallies[op as usize] else { + continue; + }; + + prep_buffer(buf, &mut self.max_name_span); + + TreeColumnData::from_first(op.prefix()).write(buf, &mut self.column_widths); + println!("{buf}"); + + for value in tallies.as_array() { + prep_buffer(buf, &mut self.max_name_span); + + TreeColumnData::from_fn(|column| value[column as usize].as_str()) + .write(buf, &mut self.column_widths); + + println!("{buf}"); + } + } + } + + fn has_columns(&self) -> bool { + !self.column_widths.iter().all(|&w| w == 0) + } +} + +/// Columns of the table next to the tree. +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) enum TreeColumn { + Fastest, + Slowest, + Median, + Mean, + Samples, + Iters, +} + +impl TreeColumn { + pub const COUNT: usize = 6; + + pub const ALL: [Self; Self::COUNT] = { + use TreeColumn::*; + [Fastest, Slowest, Median, Mean, Samples, Iters] + }; + + #[inline] + pub fn time_stats() -> impl Iterator { + use TreeColumn::*; + [Fastest, Slowest, Median, Mean].into_iter() + } + + #[inline] + pub fn is_first(self) -> bool { + let [first, ..] = Self::ALL; + self == first + } + + #[inline] + pub fn is_last(self) -> bool { + let [.., last] = Self::ALL; + self == last + } + + fn name(self) -> &'static str { + match self { + Self::Fastest => "fastest", + Self::Slowest => "slowest", + Self::Median => "median", + Self::Mean => "mean", + Self::Samples => "samples", + Self::Iters => "iters", + } + } + + #[inline] + pub fn is_time_stat(self) -> bool { + use TreeColumn::*; + matches!(self, Fastest | Slowest | Median | Mean) + } + + #[inline] + fn get_stat(self, stats: &StatsSet) -> Option<&T> { + match self { + Self::Fastest => Some(&stats.fastest), + Self::Slowest => Some(&stats.slowest), + Self::Median => Some(&stats.median), + Self::Mean => Some(&stats.mean), + Self::Samples | Self::Iters => None, + } + } +} + +#[derive(Default)] +struct TreeColumnData([T; TreeColumn::COUNT]); + +impl TreeColumnData { + #[inline] + fn from_first(value: T) -> Self + where + Self: Default, + { + let mut data = Self::default(); + data.0[0] = value; + data + } + + #[inline] + fn from_fn(f: F) -> Self + where + F: FnMut(TreeColumn) -> T, + { + Self(TreeColumn::ALL.map(f)) + } +} + +impl TreeColumnData<&str> { + /// Writes the column data into the buffer. + fn write(&self, buf: &mut String, column_widths: &mut [usize; TreeColumn::COUNT]) { + for (column, value) in self.0.iter().enumerate() { + let is_first = column == 0; + let is_last = column == TreeColumn::COUNT - 1; + + let value_width = value.chars().count(); + + // Write separator. + if !is_first { + let mut sep = " │ "; + + // Prevent trailing spaces. + if is_last && value_width == 0 { + sep = &sep[..sep.len() - 1]; + }; + + buf.push_str(sep); + } + + buf.push_str(value); + + // Right-pad remaining width or update column width to new maximum. + if !is_last { + if let Some(rem_width) = column_widths[column].checked_sub(value_width) { + buf.extend(repeat(' ').take(rem_width)); + } else { + column_widths[column] = value_width; + } + } + } + } +} + +impl TreeColumnData { + #[inline] + fn as_ref(&self) -> TreeColumnData<&U> + where + T: AsRef, + { + TreeColumnData::from_fn(|column| self.0[column as usize].as_ref()) + } +} + +fn right_pad_buffer(buf: &mut String, max_span: &mut usize) { + let buf_len = buf.chars().count(); + let pad_len = TREE_COL_BUF + max_span.saturating_sub(buf_len); + buf.extend(repeat(' ').take(pad_len)); + + if buf_len > *max_span { + *max_span = buf_len; + } +} diff --git a/crates/divan_compat/divan_fork/src/util/fmt.rs b/crates/divan_compat/divan_fork/src/util/fmt.rs new file mode 100644 index 00000000..4b8a4bec --- /dev/null +++ b/crates/divan_compat/divan_fork/src/util/fmt.rs @@ -0,0 +1,229 @@ +use std::fmt; + +use crate::counter::{AnyCounter, BytesFormat, KnownCounterKind}; + +/// Formats an `f64` to the given number of significant figures. +pub(crate) fn format_f64(val: f64, sig_figs: usize) -> String { + let mut str = val.to_string(); + + if let Some(dot_index) = str.find('.') { + let fract_digits = sig_figs.saturating_sub(dot_index); + + if fract_digits == 0 { + str.truncate(dot_index); + } else { + let fract_start = dot_index + 1; + let fract_end = fract_start + fract_digits; + let fract_range = fract_start..fract_end; + + if let Some(fract_str) = str.get(fract_range) { + // Get the offset from the end before all 0s. + let pre_zero = fract_str.bytes().rev().enumerate().find_map(|(i, b)| { + if b != b'0' { + Some(i) + } else { + None + } + }); + + if let Some(pre_zero) = pre_zero { + str.truncate(fract_end - pre_zero); + } else { + str.truncate(dot_index); + } + } + } + } + + str +} + +pub(crate) fn format_bytes(val: f64, sig_figs: usize, bytes_format: BytesFormat) -> String { + let (val, scale) = scale_value(val, bytes_format); + + let mut result = format_f64(val, sig_figs); + result.push(' '); + result.push_str(scale.suffix(ScaleFormat::Bytes(bytes_format))); + result +} + +pub(crate) struct DisplayThroughput<'a> { + pub counter: &'a AnyCounter, + pub picos: f64, + pub bytes_format: BytesFormat, +} + +impl fmt::Debug for DisplayThroughput<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self, f) + } +} + +impl fmt::Display for DisplayThroughput<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let picos = self.picos; + let count = self.counter.count(); + let count_per_sec = if count == 0 { 0. } else { count as f64 * (1e12 / picos) }; + + let format = match self.counter.kind { + KnownCounterKind::Bytes => ScaleFormat::BytesThroughput(self.bytes_format), + KnownCounterKind::Chars => ScaleFormat::CharsThroughput, + KnownCounterKind::Cycles => ScaleFormat::CyclesThroughput, + KnownCounterKind::Items => ScaleFormat::ItemsThroughput, + }; + + let (val, scale) = scale_value(count_per_sec, format.bytes_format()); + + let sig_figs = f.precision().unwrap_or(4); + + let mut str = format_f64(val, sig_figs); + str.push(' '); + str.push_str(scale.suffix(format)); + + // Fill up to specified width. + if let Some(fill_len) = f.width().and_then(|width| width.checked_sub(str.len())) { + match f.align() { + None | Some(fmt::Alignment::Left) => { + str.extend(std::iter::repeat(f.fill()).take(fill_len)); + } + _ => return Err(fmt::Error), + } + } + + f.write_str(&str) + } +} + +/// Converts a value to the appropriate scale. +fn scale_value(value: f64, bytes_format: BytesFormat) -> (f64, Scale) { + let starts = scale_starts(bytes_format); + + let scale = if value.is_infinite() || value < starts[1] { + Scale::One + } else if value < starts[2] { + Scale::Kilo + } else if value < starts[3] { + Scale::Mega + } else if value < starts[4] { + Scale::Giga + } else if value < starts[5] { + Scale::Tera + } else { + Scale::Peta + }; + + (value / starts[scale as usize], scale) +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum Scale { + One, + Kilo, + Mega, + Giga, + Tera, + Peta, +} + +#[derive(Clone, Copy)] +pub(crate) enum ScaleFormat { + Bytes(BytesFormat), + BytesThroughput(BytesFormat), + CharsThroughput, + CyclesThroughput, + ItemsThroughput, +} + +impl ScaleFormat { + pub fn bytes_format(self) -> BytesFormat { + match self { + Self::Bytes(format) | Self::BytesThroughput(format) => format, + Self::CharsThroughput | Self::CyclesThroughput | Self::ItemsThroughput => { + BytesFormat::Decimal + } + } + } +} + +fn scale_starts(bytes_format: BytesFormat) -> &'static [f64; Scale::COUNT] { + const STARTS: &[[f64; Scale::COUNT]; 2] = &[ + [1., 1e3, 1e6, 1e9, 1e12, 1e15], + [ + 1., + 1024., + 1024u64.pow(2) as f64, + 1024u64.pow(3) as f64, + 1024u64.pow(4) as f64, + 1024u64.pow(5) as f64, + ], + ]; + + &STARTS[bytes_format as usize] +} + +impl Scale { + const COUNT: usize = 6; + + pub fn suffix(self, format: ScaleFormat) -> &'static str { + match format { + ScaleFormat::Bytes(format) => { + const SUFFIXES: &[[&str; Scale::COUNT]; 2] = &[ + ["B", "KB", "MB", "GB", "TB", "PB"], + ["B", "KiB", "MiB", "GiB", "TiB", "PiB"], + ]; + + SUFFIXES[format as usize][self as usize] + } + ScaleFormat::BytesThroughput(format) => { + const SUFFIXES: &[[&str; Scale::COUNT]; 2] = &[ + ["B/s", "KB/s", "MB/s", "GB/s", "TB/s", "PB/s"], + ["B/s", "KiB/s", "MiB/s", "GiB/s", "TiB/s", "PiB/s"], + ]; + + SUFFIXES[format as usize][self as usize] + } + ScaleFormat::CharsThroughput => { + const SUFFIXES: &[&str; Scale::COUNT] = + &["char/s", "Kchar/s", "Mchar/s", "Gchar/s", "Tchar/s", "Pchar/s"]; + + SUFFIXES[self as usize] + } + ScaleFormat::CyclesThroughput => { + const SUFFIXES: &[&str; Scale::COUNT] = &["Hz", "KHz", "MHz", "GHz", "THz", "PHz"]; + + SUFFIXES[self as usize] + } + ScaleFormat::ItemsThroughput => { + const SUFFIXES: &[&str; Scale::COUNT] = + &["item/s", "Kitem/s", "Mitem/s", "Gitem/s", "Titem/s", "Pitem/s"]; + + SUFFIXES[self as usize] + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn scale_value() { + #[track_caller] + fn test(n: f64, format: BytesFormat, expected_value: f64, expected_scale: Scale) { + assert_eq!(super::scale_value(n, format), (expected_value, expected_scale)); + } + + #[track_caller] + fn test_decimal(n: f64, expected_value: f64, expected_scale: Scale) { + test(n, BytesFormat::Decimal, expected_value, expected_scale); + } + + test_decimal(1., 1., Scale::One); + test_decimal(1_000., 1., Scale::Kilo); + test_decimal(1_000_000., 1., Scale::Mega); + test_decimal(1_000_000_000., 1., Scale::Giga); + test_decimal(1_000_000_000_000., 1., Scale::Tera); + test_decimal(1_000_000_000_000_000., 1., Scale::Peta); + } +} diff --git a/crates/divan_compat/divan_fork/src/util/mod.rs b/crates/divan_compat/divan_fork/src/util/mod.rs new file mode 100644 index 00000000..6ac8cbfb --- /dev/null +++ b/crates/divan_compat/divan_fork/src/util/mod.rs @@ -0,0 +1,106 @@ +use std::{ + mem::ManuallyDrop, + num::NonZeroUsize, + sync::atomic::{AtomicUsize, Ordering::Relaxed}, +}; + +pub mod fmt; +pub mod sort; +pub mod sync; +pub mod thread; +pub mod ty; + +/// Public-in-private type like `()` but meant to be externally-unreachable. +/// +/// Using this in place of `()` for `GenI` prevents `Bencher::with_inputs` from +/// working with `()` unintentionally. +#[non_exhaustive] +pub struct Unit; + +#[inline] +pub(crate) fn defer(f: F) -> impl Drop { + struct Defer(ManuallyDrop); + + impl Drop for Defer { + #[inline] + fn drop(&mut self) { + let f = unsafe { ManuallyDrop::take(&mut self.0) }; + + f(); + } + } + + Defer(ManuallyDrop::new(f)) +} + +/// Returns the index of `ptr` in the slice, assuming it is in the slice. +#[inline] +pub(crate) fn slice_ptr_index(slice: &[T], ptr: *const T) -> usize { + // Safe pointer `offset_from`. + (ptr as usize - slice.as_ptr() as usize) / size_of::() +} + +/// Returns the values in the middle of `slice`. +/// +/// If the slice has an even length, two middle values exist. +#[inline] +pub(crate) fn slice_middle(slice: &[T]) -> &[T] { + let len = slice.len(); + + if len == 0 { + slice + } else if len % 2 == 0 { + &slice[(len / 2) - 1..][..2] + } else { + &slice[len / 2..][..1] + } +} + +/// Cached [`std::thread::available_parallelism`]. +#[inline] +pub(crate) fn known_parallelism() -> NonZeroUsize { + static CACHED: AtomicUsize = AtomicUsize::new(0); + + #[cold] + fn slow() -> NonZeroUsize { + let n = std::thread::available_parallelism().unwrap_or(NonZeroUsize::MIN); + + match CACHED.compare_exchange(0, n.get(), Relaxed, Relaxed) { + Ok(_) => n, + + // SAFETY: Zero is checked by us and competing threads. + Err(n) => unsafe { NonZeroUsize::new_unchecked(n) }, + } + } + + match NonZeroUsize::new(CACHED.load(Relaxed)) { + Some(n) => n, + None => slow(), + } +} + +#[cfg(test)] +mod tests { + use crate::black_box; + + use super::*; + + #[test] + fn known_parallelism() { + let f: fn() -> NonZeroUsize = super::known_parallelism; + assert_eq!(black_box(f)(), black_box(f)()); + } + + #[test] + fn slice_middle() { + use super::slice_middle; + + // assert_eq!(slice_middle::(&[]), &[]); + + assert_eq!(slice_middle(&[1]), &[1]); + assert_eq!(slice_middle(&[1, 2]), &[1, 2]); + assert_eq!(slice_middle(&[1, 2, 3]), &[2]); + assert_eq!(slice_middle(&[1, 2, 3, 4]), &[2, 3]); + assert_eq!(slice_middle(&[1, 2, 3, 4, 5]), &[3]); + } +} diff --git a/crates/divan_compat/divan_fork/src/util/sort.rs b/crates/divan_compat/divan_fork/src/util/sort.rs new file mode 100644 index 00000000..d86d416a --- /dev/null +++ b/crates/divan_compat/divan_fork/src/util/sort.rs @@ -0,0 +1,139 @@ +use std::cmp::Ordering; + +/// Compares strings by treating internal integers as atomic units. +pub fn natural_cmp(a: &str, b: &str) -> Ordering { + Iterator::cmp(Tokenizer { input: a }, Tokenizer { input: b }) +} + +#[inline] +fn cmp_int(mut a: &str, mut b: &str) -> Ordering { + a = a.trim_start_matches('0'); + b = b.trim_start_matches('0'); + + // Compare to 0. + match (a.is_empty(), b.is_empty()) { + (true, true) => return Ordering::Equal, + (true, false) => return Ordering::Less, + (false, true) => return Ordering::Greater, + _ => {} + } + + // Compare length. + match a.len().cmp(&b.len()) { + Ordering::Equal => {} + ord => return ord, + } + + // Compare digits. + a.cmp(b) +} + +#[derive(PartialEq, Eq)] +#[cfg_attr(test, derive(Debug))] +struct Token<'a> { + is_int: bool, + text: &'a str, +} + +impl PartialOrd for Token<'_> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Token<'_> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + if self.is_int && other.is_int { + cmp_int(self.text, other.text) + } else { + self.text.cmp(other.text) + } + } +} + +/// Lexes a string into "tokens". +struct Tokenizer<'a> { + /// The remaining characters to process. + input: &'a str, +} + +impl<'a> Iterator for Tokenizer<'a> { + type Item = Token<'a>; + + #[inline] + fn next(&mut self) -> Option { + let mut bytes = self.input.bytes(); + let is_int = bytes.next()?.is_ascii_digit(); + + let mut kind_len = 1; + for ch in bytes { + // Stop on character kind change. + if ch.is_ascii_digit() != is_int { + break; + } + + kind_len += 1; + } + + unsafe { + let text = self.input.get_unchecked(..kind_len); + self.input = self.input.get_unchecked(kind_len..); + + Some(Token { is_int, text }) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[track_caller] + fn test_sort(list: &[&str], cmp: fn(&str, &str) -> Ordering) { + let mut copy = list.to_vec(); + copy.sort_by(|a, b| cmp(a, b)); + assert_eq!(list, copy); + } + + #[test] + fn natural_cmp() { + #[track_caller] + fn test(list: &[&str]) { + test_sort(list, super::natural_cmp); + } + + test(&["A<4>", "A<8>", "A<16>", "A<32>", "A<64>"]); + } + + #[test] + fn cmp_int() { + #[track_caller] + fn test(list: &[&str]) { + test_sort(list, super::cmp_int); + } + + test(&["4", "8", "16", "32", "64"]); + test(&["4", "08"]); + test(&["0", "00"]); + } + + #[test] + fn tokenize() { + #[track_caller] + fn test(s: &str, expected: &[Token]) { + let tokens: Vec = Tokenizer { input: s }.collect(); + assert_eq!(tokens, expected); + } + + test( + "A<4>", + &[ + Token { text: "A<", is_int: false }, + Token { text: "4", is_int: true }, + Token { text: ">", is_int: false }, + ], + ); + } +} diff --git a/crates/divan_compat/divan_fork/src/util/sync.rs b/crates/divan_compat/divan_fork/src/util/sync.rs new file mode 100644 index 00000000..d84f07be --- /dev/null +++ b/crates/divan_compat/divan_fork/src/util/sync.rs @@ -0,0 +1,121 @@ +//! Synchronization utilities. + +#![cfg_attr(not(target_os = "macos"), allow(unused))] + +use std::{ + ops::{Deref, DerefMut}, + sync::atomic::*, +}; + +/// Makes the wrapped value [`Send`] + [`Sync`] even though it isn't. +pub struct SyncWrap { + pub value: T, +} + +unsafe impl Sync for SyncWrap {} + +impl Deref for SyncWrap { + type Target = T; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl DerefMut for SyncWrap { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.value + } +} + +impl SyncWrap { + #[inline] + pub const unsafe fn new(value: T) -> Self { + Self { value } + } +} + +/// A convenience wrapper around `AtomicBool`. +pub(crate) struct AtomicFlag(AtomicBool); + +impl AtomicFlag { + #[inline] + pub const fn new(value: bool) -> Self { + Self(AtomicBool::new(value)) + } + + #[inline] + pub fn get(&self) -> bool { + self.0.load(Ordering::Relaxed) + } + + #[inline] + pub fn set(&self, value: bool) { + self.0.store(value, Ordering::Relaxed); + } +} + +/// Prevents false sharing by aligning to the cache line. +#[derive(Clone, Copy)] +#[repr(align(64))] +pub(crate) struct CachePadded(pub T); + +/// Alias to the atomic equivalent of `T`. +pub(crate) type Atomic = ::Atomic; + +/// A type with an associated atomic type. +pub(crate) trait WithAtomic { + type Atomic; +} + +#[cfg(target_has_atomic = "ptr")] +impl WithAtomic for usize { + type Atomic = AtomicUsize; +} + +#[cfg(target_has_atomic = "ptr")] +impl WithAtomic for isize { + type Atomic = AtomicIsize; +} + +#[cfg(target_has_atomic = "8")] +impl WithAtomic for u8 { + type Atomic = AtomicU8; +} + +#[cfg(target_has_atomic = "8")] +impl WithAtomic for i8 { + type Atomic = AtomicI8; +} + +#[cfg(target_has_atomic = "16")] +impl WithAtomic for u16 { + type Atomic = AtomicU16; +} + +#[cfg(target_has_atomic = "16")] +impl WithAtomic for i16 { + type Atomic = AtomicI16; +} + +#[cfg(target_has_atomic = "32")] +impl WithAtomic for u32 { + type Atomic = AtomicU32; +} + +#[cfg(target_has_atomic = "32")] +impl WithAtomic for i32 { + type Atomic = AtomicI32; +} + +#[cfg(target_has_atomic = "64")] +impl WithAtomic for u64 { + type Atomic = AtomicU64; +} + +#[cfg(target_has_atomic = "64")] +impl WithAtomic for i64 { + type Atomic = AtomicI64; +} diff --git a/crates/divan_compat/divan_fork/src/util/thread.rs b/crates/divan_compat/divan_fork/src/util/thread.rs new file mode 100644 index 00000000..6262889e --- /dev/null +++ b/crates/divan_compat/divan_fork/src/util/thread.rs @@ -0,0 +1,193 @@ +//! Threading utilities. + +#![cfg(target_os = "macos")] + +use std::{marker::PhantomData, ptr::NonNull, sync::atomic::Ordering::*}; + +use libc::pthread_key_t; + +use crate::util::sync::Atomic; + +const KEY_UNINIT: pthread_key_t = 0; + +/// Thread-local key accessed via +/// [`pthread_getspecific`](https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_getspecific.html). +pub(crate) struct PThreadKey { + value: AtomicPThreadKey, + marker: PhantomData<&'static T>, +} + +impl PThreadKey { + #[inline] + pub const fn new() -> Self { + Self { value: AtomicPThreadKey::new(KEY_UNINIT), marker: PhantomData } + } + + #[inline] + pub fn get(&self) -> Option> { + match self.value.load(Relaxed) { + KEY_UNINIT => None, + + key => unsafe { + cfg_if::cfg_if! { + if #[cfg(all( + not(miri), + any(target_arch = "x86_64", target_arch = "aarch64"), + ))] { + let thread_local = fast::get_thread_local(key as usize); + + #[cfg(test)] + assert_eq!(thread_local, libc::pthread_getspecific(key)); + } else { + let thread_local = libc::pthread_getspecific(key); + } + } + + NonNull::new(thread_local.cast()) + }, + } + } + + /// Assigns the value with its destructor. + #[inline] + pub fn set(&self, ptr: *const T, _: D) -> bool + where + D: FnOnce(NonNull) + Copy, + { + assert_eq!(size_of::(), 0); + + unsafe extern "C" fn dtor(ptr: *mut libc::c_void) + where + T: 'static, + D: FnOnce(NonNull) + Copy, + { + // SAFETY: The dtor is zero-sized, so we can make one from thin air. + let dtor: D = unsafe { std::mem::zeroed() }; + + // Although we're guaranteed `ptr` is not null, check in case. + if let Some(ptr) = NonNull::new(ptr) { + dtor(ptr.cast()); + } + } + + let shared_key = &self.value; + let mut local_key = shared_key.load(Relaxed); + + // Race against other threads to initialize `shared_key`. + if local_key == KEY_UNINIT { + if unsafe { libc::pthread_key_create(&mut local_key, Some(dtor::)) } == 0 { + // Race to store our key into the global instance. + // + // On failure, delete our key and use the winner's key. + if let Err(their_key) = + shared_key.compare_exchange(KEY_UNINIT, local_key, Relaxed, Relaxed) + { + // SAFETY: No other thread is accessing this key. + unsafe { libc::pthread_key_delete(local_key) }; + + local_key = their_key; + } + } else { + // On create failure, check if another thread succeeded. + local_key = shared_key.load(Relaxed); + if local_key == KEY_UNINIT { + return false; + } + } + } + + // This is the slow path, so don't bother with writing via + // `gs`/`tpidrro_el0` register. + // + // SAFETY: The key has been created by us or another thread. + unsafe { libc::pthread_setspecific(local_key, ptr.cast()) == 0 } + } +} + +/// Alias to the atomic equivalent of `pthread_key_t`. +pub(crate) type AtomicPThreadKey = Atomic; + +/// Optimized alternatives to `pthread_getspecific`. +pub(crate) mod fast { + // Apple reserves key 11 (`__PTK_LIBC_RESERVED_WIN64`) for Windows: + // https://github.com/apple-oss-distributions/libpthread/blob/libpthread-519/private/pthread/tsd_private.h#L99 + // + // Key 6 is also reserved for Windows and Go, but we don't use it because + // it's more well known and likely to be used by more libraries. + + /// Returns a pointer to a static thread-local variable. + #[inline] + #[cfg(all(not(miri), not(feature = "dyn_thread_local"), target_arch = "x86_64"))] + pub fn get_static_thread_local() -> *const T { + unsafe { + let result; + std::arch::asm!( + "mov {}, gs:[88]", + out(reg) result, + options(pure, readonly, nostack, preserves_flags), + ); + result + } + } + + /// Sets the static thread-local variable. + /// + /// # Safety + /// + /// If the slot is in use, we will corrupt the other user's memory. + #[inline] + #[cfg(all(not(miri), not(feature = "dyn_thread_local"), target_arch = "x86_64"))] + pub unsafe fn set_static_thread_local(ptr: *const T) { + unsafe { + std::arch::asm!( + "mov gs:[88], {}", + in(reg) ptr, + options(nostack, preserves_flags), + ); + } + } + + /// Returns a pointer to the corresponding thread-local variable. + /// + /// The first element is reserved for `pthread_self`. This is widely known + /// and also mentioned in page 251 of "*OS Internals Volume 1" by Jonathan + /// Levin. + /// + /// It appears that `pthread_key_create` allocates a slot into the buffer + /// referenced by: + /// - [`gs` on x86_64](https://github.com/apple-oss-distributions/xnu/blob/xnu-10002.41.9/libsyscall/os/tsd.h#L126) + /// - [`tpidrro_el0` on AArch64](https://github.com/apple-oss-distributions/xnu/blob/xnu-10002.41.9/libsyscall/os/tsd.h#L163) + /// + /// # Safety + /// + /// `key` must not cause an out-of-bounds lookup. + #[inline] + #[cfg(all(not(miri), any(target_arch = "x86_64", target_arch = "aarch64")))] + pub unsafe fn get_thread_local(key: usize) -> *mut libc::c_void { + #[cfg(target_arch = "x86_64")] + { + let result; + std::arch::asm!( + "mov {}, gs:[8 * {1}]", + out(reg) result, + in(reg) key, + options(pure, readonly, nostack, preserves_flags), + ); + result + } + + #[cfg(target_arch = "aarch64")] + { + let result: *const *mut libc::c_void; + std::arch::asm!( + "mrs {0}, tpidrro_el0", + // Clear bottom 3 bits just in case. This was historically the CPU + // core ID but that changed at some point. + "and {0}, {0}, #-8", + out(reg) result, + options(pure, nomem, nostack, preserves_flags), + ); + *result.add(key) + } + } +} diff --git a/crates/divan_compat/divan_fork/src/util/ty.rs b/crates/divan_compat/divan_fork/src/util/ty.rs new file mode 100644 index 00000000..0fb7f536 --- /dev/null +++ b/crates/divan_compat/divan_fork/src/util/ty.rs @@ -0,0 +1,38 @@ +use std::{ + any::{Any, TypeId}, + marker::PhantomData, +}; + +/// Returns a [`TypeId`] for any type regardless of whether it is `'static`. +/// +/// Note that **this is not the same** as [`TypeId::of`]. +#[inline] +pub(crate) fn proxy_type_id() -> TypeId { + // Return the type ID of a generic closure. + Any::type_id(&|| PhantomData::) +} + +/// Returns `true` if the given types are equal. +#[inline] +pub(crate) fn is_type_eq() -> bool { + proxy_type_id::
() == proxy_type_id::() +} + +/// Convenience trait for type conversions. +pub(crate) trait TypeCast { + /// Converts a reference if `self` is an instance of `T`. + /// + /// We require `T: 'static` since we want to ensure when providing a type + /// that any lifetimes are static, such as `Cow`. + #[inline] + fn cast_ref(&self) -> Option<&T> { + if is_type_eq::() { + // SAFETY: `self` is `&T`. + Some(unsafe { &*(self as *const Self as *const T) }) + } else { + None + } + } +} + +impl TypeCast for A {} diff --git a/crates/divan_compat/divan_fork/tests/attr_options.rs b/crates/divan_compat/divan_fork/tests/attr_options.rs new file mode 100644 index 00000000..126c94e6 --- /dev/null +++ b/crates/divan_compat/divan_fork/tests/attr_options.rs @@ -0,0 +1,59 @@ +// Tests that attribute options produce the correct results. + +// Miri cannot discover benchmarks. +#![cfg(not(miri))] + +use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; + +extern crate codspeed_divan_compat_walltime as divan; +use divan::Divan; + +static CHILD1_ITERS: AtomicUsize = AtomicUsize::new(0); +static CHILD2_ITERS: AtomicUsize = AtomicUsize::new(0); +static CHILD3_ITERS: AtomicUsize = AtomicUsize::new(0); + +#[divan::bench_group(sample_count = 10, sample_size = 50)] +mod parent { + use super::*; + + // 10 × 1 = 10 + #[divan::bench_group(sample_size = 1)] + mod child1 { + use super::*; + + #[divan::bench] + fn bench() { + CHILD1_ITERS.fetch_add(1, SeqCst); + } + } + + // 42 × 50 = 2100 + #[divan::bench_group(sample_count = 42)] + mod child2 { + use super::*; + + #[divan::bench] + fn bench() { + CHILD2_ITERS.fetch_add(1, SeqCst); + } + } + + mod child3 { + use super::*; + + // 1 × 50 = 50 + #[divan::bench(sample_count = 1)] + fn bench() { + CHILD3_ITERS.fetch_add(1, SeqCst); + } + } +} + +#[test] +fn iter_count() { + Divan::default().run_benches(); + + assert_eq!(CHILD1_ITERS.load(SeqCst), 10); + assert_eq!(CHILD2_ITERS.load(SeqCst), 2100); + assert_eq!(CHILD3_ITERS.load(SeqCst), 50); +} diff --git a/crates/divan_compat/divan_fork/tests/entry_properties.rs b/crates/divan_compat/divan_fork/tests/entry_properties.rs new file mode 100644 index 00000000..0e5b7ea0 --- /dev/null +++ b/crates/divan_compat/divan_fork/tests/entry_properties.rs @@ -0,0 +1,122 @@ +// Tests that entry benchmarks/groups have correct generated properties. + +// Miri cannot discover benchmarks. +#![cfg(not(miri))] + +extern crate codspeed_divan_compat_walltime as divan; +use divan::__private::{EntryMeta, BENCH_ENTRIES, GROUP_ENTRIES}; + +#[divan::bench] +fn outer() {} + +#[divan::bench_group] +mod outer_group { + #[divan::bench] + fn inner() {} + + #[divan::bench_group] + mod inner_group {} +} + +#[divan::bench] +#[ignore] +fn ignored_1() {} + +#[divan::bench(ignore)] +fn ignored_2() {} + +#[divan::bench_group] +#[allow(unused_attributes)] +#[ignore] +mod ignored_group { + #[divan::bench] + fn not_yet_ignored() {} +} + +/// Finds `EntryMeta` based on the entry's raw name. +macro_rules! find_meta { + ($entries:expr, $raw_name:literal) => { + $entries + .iter() + .map(|entry| &entry.meta) + .find(|common| common.raw_name == $raw_name) + .expect(concat!($raw_name, " not found")) + }; +} + +fn find_outer() -> &'static EntryMeta { + find_meta!(BENCH_ENTRIES, "outer") +} + +fn find_inner() -> &'static EntryMeta { + find_meta!(BENCH_ENTRIES, "inner") +} + +fn find_outer_group() -> &'static EntryMeta { + find_meta!(GROUP_ENTRIES, "outer_group") +} + +fn find_inner_group() -> &'static EntryMeta { + find_meta!(GROUP_ENTRIES, "inner_group") +} + +#[test] +fn file() { + let file = file!(); + + assert_eq!(find_outer().location.file, file); + assert_eq!(find_outer_group().location.file, file); + + assert_eq!(find_inner().location.file, file); + assert_eq!(find_inner_group().location.file, file); +} + +#[test] +fn module_path() { + let outer_path = module_path!(); + assert_eq!(find_outer().module_path, outer_path); + assert_eq!(find_outer_group().module_path, outer_path); + + let inner_path = format!("{outer_path}::outer_group"); + assert_eq!(find_inner().module_path, inner_path); + assert_eq!(find_inner_group().module_path, inner_path); +} + +#[ignore = "changed within the fork"] +#[test] +fn line() { + assert_eq!(find_outer().location.line, 8); + assert_eq!(find_outer_group().location.line, 11); + + assert_eq!(find_inner().location.line, 13); + assert_eq!(find_inner_group().location.line, 16); +} + +#[test] +fn column() { + assert_eq!(find_outer().location.col, 1); + assert_eq!(find_outer_group().location.col, 1); + + assert_eq!(find_inner().location.col, 5); + assert_eq!(find_inner_group().location.col, 5); +} + +#[test] +fn ignore() { + fn get_ignore(meta: &EntryMeta) -> bool { + meta.bench_options.as_ref().and_then(|options| options.ignore).unwrap_or_default() + } + + assert!(get_ignore(find_meta!(BENCH_ENTRIES, "ignored_1"))); + assert!(get_ignore(find_meta!(BENCH_ENTRIES, "ignored_2"))); + assert!(get_ignore(find_meta!(GROUP_ENTRIES, "ignored_group"))); + + // Although its parent is marked as `#[ignore]`, it itself is not yet known + // to be ignored. + assert!(!get_ignore(find_meta!(BENCH_ENTRIES, "not_yet_ignored"))); + + assert!(!get_ignore(find_inner())); + assert!(!get_ignore(find_inner_group())); + assert!(!get_ignore(find_outer())); + assert!(!get_ignore(find_outer_group())); +} diff --git a/crates/divan_compat/divan_fork/tests/forbid_unsafe.rs b/crates/divan_compat/divan_fork/tests/forbid_unsafe.rs new file mode 100644 index 00000000..e7bba2db --- /dev/null +++ b/crates/divan_compat/divan_fork/tests/forbid_unsafe.rs @@ -0,0 +1,85 @@ +// Exhaustively tests that macros work when linting against `unsafe`. + +#![forbid(unsafe_code)] + +extern crate codspeed_divan_compat_walltime as divan; +use divan::Bencher; + +const CONST_VALUES: [usize; 3] = [1, 5, 10]; + +#[divan::bench] +fn freestanding() {} + +#[divan::bench(types = [i32, &str])] +fn freestanding_generic_type() {} + +#[divan::bench(consts = [1, 5, 10])] +fn freestanding_generic_const1() {} + +#[divan::bench(consts = CONST_VALUES)] +fn freestanding_generic_const2() {} + +#[divan::bench(types = [i32, &str], consts = [1, 5, 10])] +fn freestanding_generic_type_const1() {} + +#[divan::bench(types = [i32, &str], consts = CONST_VALUES)] +fn freestanding_generic_type_const2() {} + +#[divan::bench] +fn contextual(_: Bencher) {} + +#[divan::bench(types = [i32, &str])] +fn contextual_generic_type(_: Bencher) {} + +#[divan::bench(consts = [1, 5, 10])] +fn contextual_generic_const_1(_: Bencher) {} + +#[divan::bench(consts = CONST_VALUES)] +fn contextual_generic_const_2(_: Bencher) {} + +#[divan::bench(types = [i32, &str], consts = [1, 5, 10])] +fn contextual_generic_type_const_1(_: Bencher) {} + +#[divan::bench(types = [i32, &str], consts = CONST_VALUES)] +fn contextual_generic_type_const_2(_: Bencher) {} + +#[divan::bench_group] +mod group { + use super::*; + + #[divan::bench] + fn freestanding() {} + + #[divan::bench(types = [i32, &str])] + fn freestanding_generic_type() {} + + #[divan::bench(consts = [1, 5, 10])] + fn freestanding_generic_const1() {} + + #[divan::bench(consts = CONST_VALUES)] + fn freestanding_generic_const2() {} + + #[divan::bench(types = [i32, &str], consts = [1, 5, 10])] + fn freestanding_generic_type_const1() {} + + #[divan::bench(types = [i32, &str], consts = CONST_VALUES)] + fn freestanding_generic_type_const2() {} + + #[divan::bench] + fn contextual(_: Bencher) {} + + #[divan::bench(types = [i32, &str])] + fn contextual_generic_type(_: Bencher) {} + + #[divan::bench(consts = [1, 5, 10])] + fn contextual_generic_const1(_: Bencher) {} + + #[divan::bench(consts = CONST_VALUES)] + fn contextual_generic_const2(_: Bencher) {} + + #[divan::bench(types = [i32, &str], consts = [1, 5, 10])] + fn contextual_generic_type_const1(_: Bencher) {} + + #[divan::bench(types = [i32, &str], consts = CONST_VALUES)] + fn contextual_generic_type_const2(_: Bencher) {} +} diff --git a/crates/divan_compat/divan_fork/tests/weird_usage.rs b/crates/divan_compat/divan_fork/tests/weird_usage.rs new file mode 100644 index 00000000..28eb4af5 --- /dev/null +++ b/crates/divan_compat/divan_fork/tests/weird_usage.rs @@ -0,0 +1,136 @@ +// Tests that ensure weird (but valid) usage behave as expected. + +// Miri cannot discover benchmarks. +#![cfg(not(miri))] + +use std::time::Duration; + +extern crate codspeed_divan_compat_walltime as divan; +use divan::{Divan, __private::BENCH_ENTRIES}; + +#[divan::bench(bytes_count = 0u8, chars_count = 0u16, cycles_count = 0u32, items_count = 0u64)] +fn zero_throughput() {} + +#[divan::bench(min_time = Duration::ZERO)] +fn min_min() {} + +#[divan::bench(max_time = Duration::MAX)] +fn max_max() {} + +#[divan::bench] +fn lifetime<'a>() -> &'a str { + "hello" +} + +#[divan::bench] +fn embedded() { + #[divan::bench] + fn inner() { + #[divan::bench] + fn inner() {} + } +} + +#[divan::bench] +fn r#raw_ident() {} + +#[divan::bench(r#name = "raw name ident")] +fn raw_name_ident() {} + +#[divan::bench] +extern "system" fn extern_abi_1() {} + +#[divan::bench] +#[allow(improper_ctypes_definitions)] +extern "C" fn extern_abi_2(_: divan::Bencher) {} + +#[divan::bench(types = [i32, u8])] +extern "system" fn extern_abi_3() {} + +#[divan::bench(r#types = [i32, u8])] +#[allow(improper_ctypes_definitions)] +extern "C" fn extern_abi_4(_: divan::Bencher) {} + +#[divan::bench(consts = [0, -1, isize::MAX])] +extern "system" fn extern_abi_5() {} + +#[divan::bench(consts = [0, -1, isize::MAX])] +#[allow(improper_ctypes_definitions)] +extern "C" fn extern_abi_6(_: divan::Bencher) {} + +macro_rules! consts { + () => { + [0, -1, isize::MAX] + }; +} + +#[divan::bench(consts = consts!())] +fn bench_consts() {} + +#[divan::bench(args = [])] +fn empty_args(_: usize) {} + +#[divan::bench(types = [])] +#[allow(dead_code)] +fn empty_types() {} + +#[divan::bench(consts = [])] +#[allow(dead_code)] +fn empty_consts() {} + +#[divan::bench(args = [], consts = [])] +#[allow(dead_code)] +fn empty_args_consts(_: usize) {} + +#[divan::bench(types = [], consts = [])] +#[allow(dead_code)] +fn empty_types_consts_1() {} + +#[divan::bench(consts = [], types = [])] +#[allow(dead_code)] +fn empty_types_consts_2() {} + +#[divan::bench(types = [], consts = [])] +#[allow(dead_code)] +fn empty_types_consts_3() {} + +#[divan::bench(consts = [], types = [])] +#[allow(dead_code)] +fn empty_types_consts_4() {} + +#[test] +fn test_fn() { + Divan::default().test_benches(); +} + +// Test that each function appears the expected number of times. +#[test] +fn count() { + let mut inner_count = 0; + + for entry in BENCH_ENTRIES.iter() { + if entry.meta.raw_name == "inner" { + inner_count += 1; + } + } + + assert_eq!(inner_count, 2); +} + +// Test expected `BenchEntry.path` values. +#[test] +fn path() { + for entry in BENCH_ENTRIES.iter() { + // Embedded functions do not contain their parent function's name in + // their `module_path!()`. + if entry.meta.raw_name == "inner" { + assert_eq!(entry.meta.module_path, "weird_usage"); + } + + // "r#" is removed from raw identifiers. + if entry.meta.raw_name.contains("raw_ident") { + assert_eq!(entry.meta.raw_name, "r#raw_ident"); + assert_eq!(entry.meta.display_name, "raw_ident"); + } + } +}