diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 184f6776be7..8a9570eaa30 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -60,5 +60,13 @@ name = "sort_bench" harness = false [[bench]] -name = "sort_locale_bench" +name = "sort_locale_c_bench" +harness = false + +[[bench]] +name = "sort_locale_utf8_bench" +harness = false + +[[bench]] +name = "sort_locale_de_bench" harness = false diff --git a/src/uu/sort/benches/sort_locale_bench.rs b/src/uu/sort/benches/sort_locale_bench.rs deleted file mode 100644 index d00ec9f4ac8..00000000000 --- a/src/uu/sort/benches/sort_locale_bench.rs +++ /dev/null @@ -1,189 +0,0 @@ -// This file is part of the uutils coreutils package. -// -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. - -use divan::{Bencher, black_box}; -use std::env; -use tempfile::NamedTempFile; -use uu_sort::uumain; -use uucore::benchmark::{run_util_function, setup_test_file, text_data}; - -/// Benchmark ASCII-only data sorting with C locale (byte comparison) -#[divan::bench] -fn sort_ascii_c_locale(bencher: Bencher) { - let data = text_data::generate_ascii_data_simple(100_000); - let file_path = setup_test_file(&data); - // Reuse the same output file across iterations to reduce filesystem variance - let output_file = NamedTempFile::new().unwrap(); - let output_path = output_file.path().to_str().unwrap().to_string(); - - bencher.bench(|| { - unsafe { - env::set_var("LC_ALL", "C"); - } - black_box(run_util_function( - uumain, - &["-o", &output_path, file_path.to_str().unwrap()], - )); - }); -} - -/// Benchmark ASCII-only data sorting with UTF-8 locale -#[divan::bench] -fn sort_ascii_utf8_locale(bencher: Bencher) { - let data = text_data::generate_ascii_data_simple(200_000); - let file_path = setup_test_file(&data); - // Reuse the same output file across iterations to reduce filesystem variance - let output_file = NamedTempFile::new().unwrap(); - let output_path = output_file.path().to_str().unwrap().to_string(); - - bencher.bench(|| { - unsafe { - env::set_var("LC_ALL", "en_US.UTF-8"); - } - black_box(run_util_function( - uumain, - &["-o", &output_path, file_path.to_str().unwrap()], - )); - }); -} - -/// Benchmark mixed ASCII/Unicode data with C locale -#[divan::bench] -fn sort_mixed_c_locale(bencher: Bencher) { - let data = text_data::generate_mixed_locale_data(50_000); - let file_path = setup_test_file(&data); - // Reuse the same output file across iterations to reduce filesystem variance - let output_file = NamedTempFile::new().unwrap(); - let output_path = output_file.path().to_str().unwrap().to_string(); - - bencher.bench(|| { - unsafe { - env::set_var("LC_ALL", "C"); - } - black_box(run_util_function( - uumain, - &["-o", &output_path, file_path.to_str().unwrap()], - )); - }); -} - -/// Benchmark mixed ASCII/Unicode data with UTF-8 locale -#[divan::bench] -fn sort_mixed_utf8_locale(bencher: Bencher) { - let data = text_data::generate_mixed_locale_data(50_000); - let file_path = setup_test_file(&data); - // Reuse the same output file across iterations to reduce filesystem variance - let output_file = NamedTempFile::new().unwrap(); - let output_path = output_file.path().to_str().unwrap().to_string(); - - bencher.bench(|| { - unsafe { - env::set_var("LC_ALL", "en_US.UTF-8"); - } - black_box(run_util_function( - uumain, - &["-o", &output_path, file_path.to_str().unwrap()], - )); - }); -} - -/// Benchmark German locale-specific data with C locale -#[divan::bench] -fn sort_german_c_locale(bencher: Bencher) { - let data = text_data::generate_german_locale_data(50_000); - let file_path = setup_test_file(&data); - // Reuse the same output file across iterations to reduce filesystem variance - let output_file = NamedTempFile::new().unwrap(); - let output_path = output_file.path().to_str().unwrap().to_string(); - - bencher.bench(|| { - unsafe { - env::set_var("LC_ALL", "C"); - } - black_box(run_util_function( - uumain, - &["-o", &output_path, file_path.to_str().unwrap()], - )); - }); -} - -/// Benchmark German locale-specific data with German locale -#[divan::bench] -fn sort_german_locale(bencher: Bencher) { - let data = text_data::generate_german_locale_data(50_000); - let file_path = setup_test_file(&data); - // Reuse the same output file across iterations to reduce filesystem variance - let output_file = NamedTempFile::new().unwrap(); - let output_path = output_file.path().to_str().unwrap().to_string(); - - bencher.bench(|| { - unsafe { - env::set_var("LC_ALL", "de_DE.UTF-8"); - } - black_box(run_util_function( - uumain, - &["-o", &output_path, file_path.to_str().unwrap()], - )); - }); -} - -/// Benchmark numeric sorting performance -#[divan::bench] -fn sort_numeric(bencher: Bencher) { - let mut data = Vec::new(); - for i in 0..50_000 { - let line = format!("{}\n", 50_000 - i); - data.extend_from_slice(line.as_bytes()); - } - let file_path = setup_test_file(&data); - - bencher.bench(|| { - unsafe { - env::set_var("LC_ALL", "en_US.UTF-8"); - } - black_box(run_util_function( - uumain, - &["-n", file_path.to_str().unwrap()], - )); - }); -} - -/// Benchmark reverse sorting -#[divan::bench] -fn sort_reverse_mixed(bencher: Bencher) { - let data = text_data::generate_mixed_locale_data(50_000); - let file_path = setup_test_file(&data); - - bencher.bench(|| { - unsafe { - env::set_var("LC_ALL", "en_US.UTF-8"); - } - black_box(run_util_function( - uumain, - &["-r", file_path.to_str().unwrap()], - )); - }); -} - -/// Benchmark unique sorting -#[divan::bench] -fn sort_unique_mixed(bencher: Bencher) { - let data = text_data::generate_mixed_locale_data(50_000); - let file_path = setup_test_file(&data); - - bencher.bench(|| { - unsafe { - env::set_var("LC_ALL", "en_US.UTF-8"); - } - black_box(run_util_function( - uumain, - &["-u", file_path.to_str().unwrap()], - )); - }); -} - -fn main() { - divan::main(); -} diff --git a/src/uu/sort/benches/sort_locale_c_bench.rs b/src/uu/sort/benches/sort_locale_c_bench.rs new file mode 100644 index 00000000000..378a2abb9ac --- /dev/null +++ b/src/uu/sort/benches/sort_locale_c_bench.rs @@ -0,0 +1,72 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Benchmarks for sort with C locale (fast byte-wise comparison). +//! +//! Note: The locale is set in main() BEFORE any benchmark runs because +//! the locale is cached on first access via OnceLock and cannot be changed afterwards. + +use divan::{Bencher, black_box}; +use tempfile::NamedTempFile; +use uu_sort::uumain; +use uucore::benchmark::{run_util_function, setup_test_file, text_data}; + +/// Benchmark ASCII-only data sorting with C locale (byte comparison) +#[divan::bench] +fn sort_ascii_c_locale(bencher: Bencher) { + let data = text_data::generate_ascii_data_simple(100_000); + let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap().to_string(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["-o", &output_path, file_path.to_str().unwrap()], + )); + }); +} + +/// Benchmark mixed ASCII/Unicode data with C locale (byte comparison) +#[divan::bench] +fn sort_mixed_c_locale(bencher: Bencher) { + let data = text_data::generate_mixed_locale_data(50_000); + let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap().to_string(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["-o", &output_path, file_path.to_str().unwrap()], + )); + }); +} + +/// Benchmark German locale-specific data with C locale (byte comparison) +#[divan::bench] +fn sort_german_c_locale(bencher: Bencher) { + let data = text_data::generate_german_locale_data(50_000); + let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap().to_string(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["-o", &output_path, file_path.to_str().unwrap()], + )); + }); +} + +fn main() { + // Set C locale BEFORE any benchmarks run. + // This must happen before divan::main() because the locale is cached + // on first access via OnceLock and cannot be changed afterwards. + unsafe { + std::env::set_var("LC_ALL", "C"); + } + divan::main(); +} diff --git a/src/uu/sort/benches/sort_locale_de_bench.rs b/src/uu/sort/benches/sort_locale_de_bench.rs new file mode 100644 index 00000000000..5c760a694e8 --- /dev/null +++ b/src/uu/sort/benches/sort_locale_de_bench.rs @@ -0,0 +1,40 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Benchmarks for sort with German locale (de_DE.UTF-8 collation). +//! +//! Note: The locale is set in main() BEFORE any benchmark runs because +//! the locale is cached on first access via OnceLock and cannot be changed afterwards. + +use divan::{Bencher, black_box}; +use tempfile::NamedTempFile; +use uu_sort::uumain; +use uucore::benchmark::{run_util_function, setup_test_file, text_data}; + +/// Benchmark German locale-specific data with German locale +#[divan::bench] +fn sort_german_de_locale(bencher: Bencher) { + let data = text_data::generate_german_locale_data(50_000); + let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap().to_string(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["-o", &output_path, file_path.to_str().unwrap()], + )); + }); +} + +fn main() { + // Set German locale BEFORE any benchmarks run. + // This must happen before divan::main() because the locale is cached + // on first access via OnceLock and cannot be changed afterwards. + unsafe { + std::env::set_var("LC_ALL", "de_DE.UTF-8"); + } + divan::main(); +} diff --git a/src/uu/sort/benches/sort_locale_utf8_bench.rs b/src/uu/sort/benches/sort_locale_utf8_bench.rs new file mode 100644 index 00000000000..b0ebb340d99 --- /dev/null +++ b/src/uu/sort/benches/sort_locale_utf8_bench.rs @@ -0,0 +1,102 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Benchmarks for sort with UTF-8 locale (locale-aware collation). +//! +//! Note: The locale is set in main() BEFORE any benchmark runs because +//! the locale is cached on first access via OnceLock and cannot be changed afterwards. + +use divan::{Bencher, black_box}; +use tempfile::NamedTempFile; +use uu_sort::uumain; +use uucore::benchmark::{run_util_function, setup_test_file, text_data}; + +/// Benchmark ASCII-only data sorting with UTF-8 locale +#[divan::bench] +fn sort_ascii_utf8_locale(bencher: Bencher) { + let data = text_data::generate_ascii_data_simple(100_000); + let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap().to_string(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["-o", &output_path, file_path.to_str().unwrap()], + )); + }); +} + +/// Benchmark mixed ASCII/Unicode data with UTF-8 locale +#[divan::bench] +fn sort_mixed_utf8_locale(bencher: Bencher) { + let data = text_data::generate_mixed_locale_data(50_000); + let file_path = setup_test_file(&data); + let output_file = NamedTempFile::new().unwrap(); + let output_path = output_file.path().to_str().unwrap().to_string(); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["-o", &output_path, file_path.to_str().unwrap()], + )); + }); +} + +/// Benchmark numeric sorting with UTF-8 locale +#[divan::bench] +fn sort_numeric_utf8_locale(bencher: Bencher) { + let mut data = Vec::new(); + for i in 0..50_000 { + let line = format!("{}\n", 50_000 - i); + data.extend_from_slice(line.as_bytes()); + } + let file_path = setup_test_file(&data); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["-n", file_path.to_str().unwrap()], + )); + }); +} + +/// Benchmark reverse sorting with UTF-8 locale +#[divan::bench] +fn sort_reverse_utf8_locale(bencher: Bencher) { + let data = text_data::generate_mixed_locale_data(50_000); + let file_path = setup_test_file(&data); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["-r", file_path.to_str().unwrap()], + )); + }); +} + +/// Benchmark unique sorting with UTF-8 locale +#[divan::bench] +fn sort_unique_utf8_locale(bencher: Bencher) { + let data = text_data::generate_mixed_locale_data(50_000); + let file_path = setup_test_file(&data); + + bencher.bench(|| { + black_box(run_util_function( + uumain, + &["-u", file_path.to_str().unwrap()], + )); + }); +} + +fn main() { + // Set UTF-8 locale BEFORE any benchmarks run. + // This must happen before divan::main() because the locale is cached + // on first access via OnceLock and cannot be changed afterwards. + unsafe { + std::env::set_var("LC_ALL", "en_US.UTF-8"); + } + divan::main(); +}