|
| 1 | +// This file is part of the uutils coreutils package. |
| 2 | +// |
| 3 | +// For the full copyright and license information, please view the LICENSE |
| 4 | +// file that was distributed with this source code. |
| 5 | + |
| 6 | +use divan::{Bencher, black_box}; |
| 7 | +use std::{fs::File, io::Write}; |
| 8 | +use tempfile::TempDir; |
| 9 | +use uu_join::uumain; |
| 10 | +use uucore::benchmark::run_util_function; |
| 11 | + |
| 12 | +/// Create two sorted files with matching keys for join benchmarking |
| 13 | +fn create_join_files(temp_dir: &TempDir, num_lines: usize) -> (String, String) { |
| 14 | + let file1_path = temp_dir.path().join("file1.txt"); |
| 15 | + let file2_path = temp_dir.path().join("file2.txt"); |
| 16 | + |
| 17 | + let mut file1 = File::create(&file1_path).unwrap(); |
| 18 | + let mut file2 = File::create(&file2_path).unwrap(); |
| 19 | + |
| 20 | + for i in 0..num_lines { |
| 21 | + writeln!(file1, "{i:08} field1_{i} field2_{i}").unwrap(); |
| 22 | + writeln!(file2, "{i:08} data1_{i} data2_{i}").unwrap(); |
| 23 | + } |
| 24 | + |
| 25 | + ( |
| 26 | + file1_path.to_str().unwrap().to_string(), |
| 27 | + file2_path.to_str().unwrap().to_string(), |
| 28 | + ) |
| 29 | +} |
| 30 | + |
| 31 | +/// Create two files with partial overlap for join benchmarking |
| 32 | +fn create_partial_overlap_files( |
| 33 | + temp_dir: &TempDir, |
| 34 | + num_lines: usize, |
| 35 | + overlap_ratio: f64, |
| 36 | +) -> (String, String) { |
| 37 | + let file1_path = temp_dir.path().join("file1.txt"); |
| 38 | + let file2_path = temp_dir.path().join("file2.txt"); |
| 39 | + |
| 40 | + let mut file1 = File::create(&file1_path).unwrap(); |
| 41 | + let mut file2 = File::create(&file2_path).unwrap(); |
| 42 | + |
| 43 | + let overlap_count = (num_lines as f64 * overlap_ratio) as usize; |
| 44 | + |
| 45 | + // File 1: keys 0 to num_lines-1 |
| 46 | + for i in 0..num_lines { |
| 47 | + writeln!(file1, "{i:08} f1_data_{i}").unwrap(); |
| 48 | + } |
| 49 | + |
| 50 | + // File 2: keys (num_lines - overlap_count) to (2*num_lines - overlap_count - 1) |
| 51 | + let start = num_lines - overlap_count; |
| 52 | + for i in 0..num_lines { |
| 53 | + writeln!(file2, "{:08} f2_data_{}", start + i, i).unwrap(); |
| 54 | + } |
| 55 | + |
| 56 | + ( |
| 57 | + file1_path.to_str().unwrap().to_string(), |
| 58 | + file2_path.to_str().unwrap().to_string(), |
| 59 | + ) |
| 60 | +} |
| 61 | + |
| 62 | +/// Benchmark basic join with fully matching keys |
| 63 | +#[divan::bench] |
| 64 | +fn join_full_match(bencher: Bencher) { |
| 65 | + let num_lines = 10000; |
| 66 | + let temp_dir = TempDir::new().unwrap(); |
| 67 | + let (file1, file2) = create_join_files(&temp_dir, num_lines); |
| 68 | + |
| 69 | + bencher.bench(|| { |
| 70 | + black_box(run_util_function(uumain, &[&file1, &file2])); |
| 71 | + }); |
| 72 | +} |
| 73 | + |
| 74 | +/// Benchmark join with partial overlap (50%) |
| 75 | +#[divan::bench] |
| 76 | +fn join_partial_overlap(bencher: Bencher) { |
| 77 | + let num_lines = 10000; |
| 78 | + let temp_dir = TempDir::new().unwrap(); |
| 79 | + let (file1, file2) = create_partial_overlap_files(&temp_dir, num_lines, 0.5); |
| 80 | + |
| 81 | + bencher.bench(|| { |
| 82 | + black_box(run_util_function(uumain, &[&file1, &file2])); |
| 83 | + }); |
| 84 | +} |
| 85 | + |
| 86 | +/// Benchmark join with custom field separator |
| 87 | +#[divan::bench] |
| 88 | +fn join_custom_separator(bencher: Bencher) { |
| 89 | + let num_lines = 10000; |
| 90 | + let temp_dir = TempDir::new().unwrap(); |
| 91 | + let file1_path = temp_dir.path().join("file1.txt"); |
| 92 | + let file2_path = temp_dir.path().join("file2.txt"); |
| 93 | + |
| 94 | + let mut file1 = File::create(&file1_path).unwrap(); |
| 95 | + let mut file2 = File::create(&file2_path).unwrap(); |
| 96 | + |
| 97 | + for i in 0..num_lines { |
| 98 | + writeln!(file1, "{i:08}\tfield1_{i}\tfield2_{i}").unwrap(); |
| 99 | + writeln!(file2, "{i:08}\tdata1_{i}\tdata2_{i}").unwrap(); |
| 100 | + } |
| 101 | + |
| 102 | + let file1_str = file1_path.to_str().unwrap(); |
| 103 | + let file2_str = file2_path.to_str().unwrap(); |
| 104 | + |
| 105 | + bencher.bench(|| { |
| 106 | + black_box(run_util_function( |
| 107 | + uumain, |
| 108 | + &["-t", "\t", file1_str, file2_str], |
| 109 | + )); |
| 110 | + }); |
| 111 | +} |
| 112 | + |
| 113 | +fn main() { |
| 114 | + divan::main(); |
| 115 | +} |
0 commit comments