Skip to content

Commit e4d35b0

Browse files
join: add benchmarks (#10005)
1 parent ea64612 commit e4d35b0

File tree

4 files changed

+127
-0
lines changed

4 files changed

+127
-0
lines changed

.github/workflows/benchmarks.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ jobs:
3131
- { package: uu_du }
3232
- { package: uu_expand }
3333
- { package: uu_fold }
34+
- { package: uu_join }
3435
- { package: uu_ls }
3536
- { package: uu_mv }
3637
- { package: uu_nl }

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/join/Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,12 @@ fluent = { workspace = true }
2727
[[bin]]
2828
name = "join"
2929
path = "src/main.rs"
30+
31+
[dev-dependencies]
32+
divan = { workspace = true }
33+
tempfile = { workspace = true }
34+
uucore = { workspace = true, features = ["benchmark"] }
35+
36+
[[bench]]
37+
name = "join_bench"
38+
harness = false

src/uu/join/benches/join_bench.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
use divan::{Bencher, black_box};
7+
use std::{fs::File, io::Write};
8+
use tempfile::TempDir;
9+
use uu_join::uumain;
10+
use uucore::benchmark::run_util_function;
11+
12+
/// Create two sorted files with matching keys for join benchmarking
13+
fn create_join_files(temp_dir: &TempDir, num_lines: usize) -> (String, String) {
14+
let file1_path = temp_dir.path().join("file1.txt");
15+
let file2_path = temp_dir.path().join("file2.txt");
16+
17+
let mut file1 = File::create(&file1_path).unwrap();
18+
let mut file2 = File::create(&file2_path).unwrap();
19+
20+
for i in 0..num_lines {
21+
writeln!(file1, "{i:08} field1_{i} field2_{i}").unwrap();
22+
writeln!(file2, "{i:08} data1_{i} data2_{i}").unwrap();
23+
}
24+
25+
(
26+
file1_path.to_str().unwrap().to_string(),
27+
file2_path.to_str().unwrap().to_string(),
28+
)
29+
}
30+
31+
/// Create two files with partial overlap for join benchmarking
32+
fn create_partial_overlap_files(
33+
temp_dir: &TempDir,
34+
num_lines: usize,
35+
overlap_ratio: f64,
36+
) -> (String, String) {
37+
let file1_path = temp_dir.path().join("file1.txt");
38+
let file2_path = temp_dir.path().join("file2.txt");
39+
40+
let mut file1 = File::create(&file1_path).unwrap();
41+
let mut file2 = File::create(&file2_path).unwrap();
42+
43+
let overlap_count = (num_lines as f64 * overlap_ratio) as usize;
44+
45+
// File 1: keys 0 to num_lines-1
46+
for i in 0..num_lines {
47+
writeln!(file1, "{i:08} f1_data_{i}").unwrap();
48+
}
49+
50+
// File 2: keys (num_lines - overlap_count) to (2*num_lines - overlap_count - 1)
51+
let start = num_lines - overlap_count;
52+
for i in 0..num_lines {
53+
writeln!(file2, "{:08} f2_data_{}", start + i, i).unwrap();
54+
}
55+
56+
(
57+
file1_path.to_str().unwrap().to_string(),
58+
file2_path.to_str().unwrap().to_string(),
59+
)
60+
}
61+
62+
/// Benchmark basic join with fully matching keys
63+
#[divan::bench]
64+
fn join_full_match(bencher: Bencher) {
65+
let num_lines = 10000;
66+
let temp_dir = TempDir::new().unwrap();
67+
let (file1, file2) = create_join_files(&temp_dir, num_lines);
68+
69+
bencher.bench(|| {
70+
black_box(run_util_function(uumain, &[&file1, &file2]));
71+
});
72+
}
73+
74+
/// Benchmark join with partial overlap (50%)
75+
#[divan::bench]
76+
fn join_partial_overlap(bencher: Bencher) {
77+
let num_lines = 10000;
78+
let temp_dir = TempDir::new().unwrap();
79+
let (file1, file2) = create_partial_overlap_files(&temp_dir, num_lines, 0.5);
80+
81+
bencher.bench(|| {
82+
black_box(run_util_function(uumain, &[&file1, &file2]));
83+
});
84+
}
85+
86+
/// Benchmark join with custom field separator
87+
#[divan::bench]
88+
fn join_custom_separator(bencher: Bencher) {
89+
let num_lines = 10000;
90+
let temp_dir = TempDir::new().unwrap();
91+
let file1_path = temp_dir.path().join("file1.txt");
92+
let file2_path = temp_dir.path().join("file2.txt");
93+
94+
let mut file1 = File::create(&file1_path).unwrap();
95+
let mut file2 = File::create(&file2_path).unwrap();
96+
97+
for i in 0..num_lines {
98+
writeln!(file1, "{i:08}\tfield1_{i}\tfield2_{i}").unwrap();
99+
writeln!(file2, "{i:08}\tdata1_{i}\tdata2_{i}").unwrap();
100+
}
101+
102+
let file1_str = file1_path.to_str().unwrap();
103+
let file2_str = file2_path.to_str().unwrap();
104+
105+
bencher.bench(|| {
106+
black_box(run_util_function(
107+
uumain,
108+
&["-t", "\t", file1_str, file2_str],
109+
));
110+
});
111+
}
112+
113+
fn main() {
114+
divan::main();
115+
}

0 commit comments

Comments
 (0)