From 913c5b36a8a78db959c3a8da590d838a9ecc0bf7 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 20 Sep 2025 17:38:30 +0200 Subject: [PATCH] evaluate codspeed for perfs --- .github/workflows/codspeed.yml | 53 +++++++ Cargo.lock | 49 ++++++- Cargo.toml | 1 + src/uu/wc/Cargo.toml | 8 ++ src/uu/wc/benches/wc_bench.rs | 246 +++++++++++++++++++++++++++++++++ 5 files changed, 352 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/codspeed.yml create mode 100644 src/uu/wc/benches/wc_bench.rs diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml new file mode 100644 index 00000000000..1637eed4d91 --- /dev/null +++ b/.github/workflows/codspeed.yml @@ -0,0 +1,53 @@ +name: CodSpeed Benchmarks + +# spell-checker: disable + +on: + push: + branches: + - "main" + pull_request: + +permissions: + contents: read + +jobs: + benchmarks: + name: Run benchmarks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + with: + persist-credentials: false + + - name: Install system dependencies + shell: bash + run: | + sudo apt-get -y update + sudo apt-get -y install libselinux1-dev + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.9 + + - name: Install cargo-codspeed + shell: bash + run: cargo install cargo-codspeed --locked + + - name: Run benchmarks + uses: CodSpeedHQ/action@v4 + with: + mode: instrumentation + run: | + # Find all utilities with benchmarks and run them + find src/uu/*/benches/ -name "*.rs" 2>/dev/null | while read bench_file; do + crate_dir=$(dirname $(dirname "$bench_file")) + echo "Building benchmarks in $crate_dir" + (cd "$crate_dir" && cargo codspeed build) + echo "Running benchmarks in $crate_dir" + (cd "$crate_dir" && cargo codspeed run) + done + token: ${{ secrets.CODSPEED_TOKEN }} diff --git a/Cargo.lock b/Cargo.lock index 8a7d82ed111..85245bd5cc4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -432,6 +432,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "120133d4db2ec47efe2e26502ee984747630c67f51974fca0b6c1340cf2368d3" +[[package]] +name = "condtype" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" + [[package]] name = "console" version = "0.16.0" @@ -896,6 +902,31 @@ dependencies = [ "syn", ] +[[package]] +name = "divan" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a405457ec78b8fe08b0e32b4a3570ab5dff6dd16eb9e76a5ee0a9d9cbd898933" +dependencies = [ + "cfg-if", + "clap", + "condtype", + "divan-macros", + "libc", + "regex-lite", +] + +[[package]] +name = "divan-macros" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9556bc800956545d6420a640173e5ba7dfa82f38d3ea5a167eb555bc69ac3323" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "dlv-list" version = "0.5.2" @@ -972,7 +1003,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1564,7 +1595,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2335,6 +2366,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30" + [[package]] name = "regex-syntax" version = "0.8.5" @@ -2426,7 +2463,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.0", ] [[package]] @@ -2716,7 +2753,7 @@ dependencies = [ "getrandom 0.3.3", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.0", ] [[package]] @@ -4071,9 +4108,11 @@ version = "0.2.2" dependencies = [ "bytecount", "clap", + "divan", "fluent", "libc", "nix", + "tempfile", "thiserror 2.0.16", "unicode-width 0.2.1", "uucore", @@ -4359,7 +4398,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 0fbe7ee1b5b..271c57630f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -316,6 +316,7 @@ compare = "0.1.0" crossterm = "0.29.0" ctor = "0.5.0" ctrlc = { version = "3.4.7", features = ["termination"] } +divan = "0.1" dns-lookup = { version = "3.0.0" } exacl = "0.12.0" file_diff = "1.0.0" diff --git a/src/uu/wc/Cargo.toml b/src/uu/wc/Cargo.toml index ec12222f68d..aff31fac3d0 100644 --- a/src/uu/wc/Cargo.toml +++ b/src/uu/wc/Cargo.toml @@ -29,6 +29,14 @@ fluent = { workspace = true } nix = { workspace = true } libc = { workspace = true } +[dev-dependencies] +divan = { workspace = true } +tempfile = { workspace = true } + [[bin]] name = "wc" path = "src/main.rs" + +[[bench]] +name = "wc_bench" +harness = false diff --git a/src/uu/wc/benches/wc_bench.rs b/src/uu/wc/benches/wc_bench.rs new file mode 100644 index 00000000000..e4eabf3c664 --- /dev/null +++ b/src/uu/wc/benches/wc_bench.rs @@ -0,0 +1,246 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use divan::{Bencher, black_box}; +use std::fs::File; +use std::io::{BufWriter, Write}; +use tempfile::TempDir; + +/// Generate test data with different characteristics +fn generate_test_data(size_mb: usize, avg_line_length: usize) -> Vec { + let total_size = size_mb * 1024 * 1024; + let mut data = Vec::with_capacity(total_size); + + let mut current_size = 0; + let mut line_chars = 0; + + while current_size < total_size { + if line_chars >= avg_line_length { + data.push(b'\n'); + line_chars = 0; + } else { + // Use various ASCII characters to make it realistic + data.push(b'a' + (current_size % 26) as u8); + line_chars += 1; + } + current_size += 1; + } + + // Ensure we end with a newline + if data.last() != Some(&b'\n') { + data.push(b'\n'); + } + + data +} + +/// Generate test data by line count instead of size +fn generate_test_data_by_lines(num_lines: usize, avg_line_length: usize) -> Vec { + let mut data = Vec::new(); + + for line_num in 0..num_lines { + // Vary line length slightly for realism + let line_length = avg_line_length + (line_num % 40).saturating_sub(20); + + for char_pos in 0..line_length { + // Create more realistic text with spaces + if char_pos > 0 && char_pos % 8 == 0 { + data.push(b' '); // Add spaces every 8 characters + } else { + // Cycle through letters with some variation + let char_offset = (line_num + char_pos) % 26; + data.push(b'a' + char_offset as u8); + } + } + data.push(b'\n'); + } + + data +} + +/// Create a temporary file with test data +fn create_test_file(data: &[u8], temp_dir: &TempDir) -> std::path::PathBuf { + let file_path = temp_dir.path().join("test_data.txt"); + let file = File::create(&file_path).unwrap(); + let mut writer = BufWriter::new(file); + writer.write_all(data).unwrap(); + writer.flush().unwrap(); + file_path +} + +/// Run uutils wc with given arguments +fn run_uutils_wc(args: &[&str]) -> i32 { + use std::process::{Command, Stdio}; + + // Use the binary instead of calling uumain directly to avoid stdout issues + let output = Command::new("../../../target/release/coreutils") + .args(["wc"].iter().chain(args.iter())) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .expect("Failed to execute wc command"); + + i32::from(!output.success()) +} + +/// Benchmark different file sizes for line counting +#[divan::bench(args = [1, 5, 10, 25, 50])] +fn wc_lines_synthetic(bencher: Bencher, size_mb: usize) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data(size_mb, 80); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&["-l", file_path_str])); + }); +} + +/// Benchmark different file sizes for character counting +#[divan::bench(args = [1, 5, 10, 25])] +fn wc_chars_synthetic(bencher: Bencher, size_mb: usize) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data(size_mb, 80); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&["-m", file_path_str])); + }); +} + +/// Benchmark different file sizes for byte counting +#[divan::bench(args = [1, 5, 10, 50, 100])] +fn wc_bytes_synthetic(bencher: Bencher, size_mb: usize) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data(size_mb, 80); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&["-c", file_path_str])); + }); +} + +/// Benchmark word counting (should use traditional read path) +#[divan::bench(args = [1, 5, 10, 25])] +fn wc_words_synthetic(bencher: Bencher, size_mb: usize) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data(size_mb, 80); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&["-w", file_path_str])); + }); +} + +/// Benchmark combined byte+line counting +#[divan::bench(args = [1, 5, 10, 50])] +fn wc_bytes_lines_synthetic(bencher: Bencher, size_mb: usize) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data(size_mb, 80); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&["-cl", file_path_str])); + }); +} + +/// Benchmark default wc behavior (bytes, lines, words) +#[divan::bench(args = [1, 5, 10])] +fn wc_default_synthetic(bencher: Bencher, size_mb: usize) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data(size_mb, 80); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&[file_path_str])); + }); +} + +/// Test different line lengths impact on performance +#[divan::bench(args = [(5, 50), (5, 100), (5, 200), (5, 500)])] +fn wc_lines_variable_length(bencher: Bencher, (size_mb, avg_line_len): (usize, usize)) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data(size_mb, avg_line_len); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&["-l", file_path_str])); + }); +} + +/// Benchmark large files by line count - up to 500K lines! +#[divan::bench(args = [10_000, 50_000, 100_000, 500_000])] +fn wc_lines_large_line_count(bencher: Bencher, num_lines: usize) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data_by_lines(num_lines, 80); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&["-l", file_path_str])); + }); +} + +/// Benchmark character counting on large line counts +#[divan::bench(args = [10_000, 50_000, 100_000])] +fn wc_chars_large_line_count(bencher: Bencher, num_lines: usize) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data_by_lines(num_lines, 80); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&["-m", file_path_str])); + }); +} + +/// Benchmark word counting on large line counts +#[divan::bench(args = [10_000, 50_000, 100_000])] +fn wc_words_large_line_count(bencher: Bencher, num_lines: usize) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data_by_lines(num_lines, 80); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&["-w", file_path_str])); + }); +} + +/// Benchmark default wc (lines, words, bytes) on large line counts +#[divan::bench(args = [10_000, 50_000, 100_000])] +fn wc_default_large_line_count(bencher: Bencher, num_lines: usize) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data_by_lines(num_lines, 80); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&[file_path_str])); + }); +} + +/// Benchmark very short vs very long lines with 100K lines +#[divan::bench(args = [(100_000, 10), (100_000, 200), (100_000, 1000)])] +fn wc_lines_extreme_line_lengths(bencher: Bencher, (num_lines, line_len): (usize, usize)) { + let temp_dir = tempfile::tempdir().unwrap(); + let data = generate_test_data_by_lines(num_lines, line_len); + let file_path = create_test_file(&data, &temp_dir); + let file_path_str = file_path.to_str().unwrap(); + + bencher.bench(|| { + black_box(run_uutils_wc(&["-l", file_path_str])); + }); +} + +fn main() { + divan::main(); +}