Skip to content

Commit b7565f5

Browse files
committed
tsort: write a benchmark
1 parent 0e166f5 commit b7565f5

File tree

3 files changed

+236
-0
lines changed

3 files changed

+236
-0
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/tsort/Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,11 @@ fluent = { workspace = true }
2626
[[bin]]
2727
name = "tsort"
2828
path = "src/main.rs"
29+
30+
[dev-dependencies]
31+
divan = { workspace = true }
32+
tempfile = { workspace = true }
33+
34+
[[bench]]
35+
name = "tsort_bench"
36+
harness = false
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
use divan::{Bencher, black_box};
7+
use std::fs::File;
8+
use std::io::{BufWriter, Write};
9+
use tempfile::TempDir;
10+
11+
/// Generate topological sort test data with different characteristics
12+
fn generate_linear_chain(num_nodes: usize) -> Vec<u8> {
13+
let mut data = Vec::new();
14+
15+
for i in 0..num_nodes.saturating_sub(1) {
16+
data.extend_from_slice(format!("node{} node{}\n", i, i + 1).as_bytes());
17+
}
18+
19+
data
20+
}
21+
22+
/// Generate a DAG with more complex dependencies
23+
fn generate_tree_dag(depth: usize, branching_factor: usize) -> Vec<u8> {
24+
let mut data = Vec::new();
25+
let mut node_id = 0;
26+
27+
// Generate a tree-like DAG
28+
for level in 0..depth {
29+
let nodes_at_level = branching_factor.pow(level as u32);
30+
31+
for parent in 0..nodes_at_level {
32+
let parent_id = node_id + parent;
33+
for child in 0..branching_factor {
34+
if level + 1 < depth {
35+
let child_id = node_id + nodes_at_level + parent * branching_factor + child;
36+
data.extend_from_slice(format!("node{parent_id} node{child_id}\n").as_bytes());
37+
}
38+
}
39+
}
40+
node_id += nodes_at_level;
41+
}
42+
43+
data
44+
}
45+
46+
/// Generate a more complex graph with cross-dependencies
47+
fn generate_complex_dag(num_nodes: usize) -> Vec<u8> {
48+
let mut data = Vec::new();
49+
50+
// Create a diamond-like pattern with multiple levels
51+
let levels = ((num_nodes as f64).sqrt() as usize).max(4);
52+
let nodes_per_level = num_nodes / levels;
53+
54+
for level in 0..levels - 1 {
55+
let start_current = level * nodes_per_level;
56+
let start_next = (level + 1) * nodes_per_level;
57+
let end_current = ((level + 1) * nodes_per_level).min(num_nodes);
58+
let end_next = ((level + 2) * nodes_per_level).min(num_nodes);
59+
60+
for i in start_current..end_current {
61+
// Each node connects to 1-3 nodes in the next level
62+
let connections = ((i % 3) + 1).min(end_next - start_next);
63+
for j in 0..connections {
64+
let target = start_next + ((i + j) % (end_next - start_next));
65+
data.extend_from_slice(format!("node{i} node{target}\n").as_bytes());
66+
}
67+
}
68+
}
69+
70+
data
71+
}
72+
73+
/// Generate a random-like DAG that stresses the algorithm
74+
fn generate_wide_dag(num_nodes: usize) -> Vec<u8> {
75+
let mut data = Vec::new();
76+
77+
// Create many parallel chains that occasionally merge
78+
let num_chains = (num_nodes / 50).max(5);
79+
let chain_length = num_nodes / num_chains;
80+
81+
for chain in 0..num_chains {
82+
let chain_start = chain * chain_length;
83+
let chain_end = ((chain + 1) * chain_length).min(num_nodes);
84+
85+
// Build the chain
86+
for i in chain_start..chain_end.saturating_sub(1) {
87+
data.extend_from_slice(
88+
format!(
89+
"chain{}_{} chain{}_{}\n",
90+
chain,
91+
i - chain_start,
92+
chain,
93+
i + 1 - chain_start
94+
)
95+
.as_bytes(),
96+
);
97+
}
98+
99+
// Occasionally connect chains
100+
if chain > 0 && chain % 3 == 0 {
101+
let prev_chain = chain - 1;
102+
let prev_end = (prev_chain * chain_length + chain_length / 2).min(num_nodes - 1);
103+
let curr_mid = chain_start + chain_length / 4;
104+
data.extend_from_slice(
105+
format!(
106+
"chain{}_{} chain{}_{}\n",
107+
prev_chain,
108+
prev_end - prev_chain * chain_length,
109+
chain,
110+
curr_mid - chain_start
111+
)
112+
.as_bytes(),
113+
);
114+
}
115+
}
116+
117+
data
118+
}
119+
120+
/// Create a temporary file with test data
121+
fn create_test_file(data: &[u8], temp_dir: &TempDir) -> std::path::PathBuf {
122+
let file_path = temp_dir.path().join("test_data.txt");
123+
let file = File::create(&file_path).unwrap();
124+
let mut writer = BufWriter::new(file);
125+
writer.write_all(data).unwrap();
126+
writer.flush().unwrap();
127+
file_path
128+
}
129+
130+
/// Run uutils tsort with given arguments
131+
fn run_uutils_tsort(args: &[&str]) -> i32 {
132+
use std::process::{Command, Stdio};
133+
134+
// Use the binary instead of calling uumain directly to avoid stdout issues
135+
let output = Command::new("../../../target/release/coreutils")
136+
.args(["tsort"].iter().chain(args.iter()))
137+
.stdout(Stdio::null())
138+
.stderr(Stdio::null())
139+
.status()
140+
.expect("Failed to execute tsort command");
141+
142+
i32::from(!output.success())
143+
}
144+
145+
/// Benchmark linear chain graphs of different sizes
146+
/// This tests the performance improvements mentioned in PR #8694
147+
#[divan::bench(args = [1_000, 10_000, 100_000, 1_000_000])]
148+
fn tsort_linear_chain(bencher: Bencher, num_nodes: usize) {
149+
let temp_dir = tempfile::tempdir().unwrap();
150+
let data = generate_linear_chain(num_nodes);
151+
let file_path = create_test_file(&data, &temp_dir);
152+
let file_path_str = file_path.to_str().unwrap();
153+
154+
bencher.bench(|| {
155+
black_box(run_uutils_tsort(&[file_path_str]));
156+
});
157+
}
158+
159+
/// Benchmark tree-like DAG structures
160+
#[divan::bench(args = [(4, 3), (5, 3), (6, 2), (7, 2)])]
161+
fn tsort_tree_dag(bencher: Bencher, (depth, branching): (usize, usize)) {
162+
let temp_dir = tempfile::tempdir().unwrap();
163+
let data = generate_tree_dag(depth, branching);
164+
let file_path = create_test_file(&data, &temp_dir);
165+
let file_path_str = file_path.to_str().unwrap();
166+
167+
bencher.bench(|| {
168+
black_box(run_uutils_tsort(&[file_path_str]));
169+
});
170+
}
171+
172+
/// Benchmark complex DAG with cross-dependencies
173+
#[divan::bench(args = [1_000, 5_000, 10_000, 50_000])]
174+
fn tsort_complex_dag(bencher: Bencher, num_nodes: usize) {
175+
let temp_dir = tempfile::tempdir().unwrap();
176+
let data = generate_complex_dag(num_nodes);
177+
let file_path = create_test_file(&data, &temp_dir);
178+
let file_path_str = file_path.to_str().unwrap();
179+
180+
bencher.bench(|| {
181+
black_box(run_uutils_tsort(&[file_path_str]));
182+
});
183+
}
184+
185+
/// Benchmark wide DAG with many parallel chains
186+
/// This should stress the hashmap optimizations from PR #8694
187+
#[divan::bench(args = [10_000, 50_000, 100_000])]
188+
fn tsort_wide_dag(bencher: Bencher, num_nodes: usize) {
189+
let temp_dir = tempfile::tempdir().unwrap();
190+
let data = generate_wide_dag(num_nodes);
191+
let file_path = create_test_file(&data, &temp_dir);
192+
let file_path_str = file_path.to_str().unwrap();
193+
194+
bencher.bench(|| {
195+
black_box(run_uutils_tsort(&[file_path_str]));
196+
});
197+
}
198+
199+
/// Benchmark input parsing vs computation by using files with different edge densities
200+
#[divan::bench(args = [10_000, 50_000])]
201+
fn tsort_input_parsing_heavy(bencher: Bencher, num_edges: usize) {
202+
let temp_dir = tempfile::tempdir().unwrap();
203+
// Create a scenario with many edges but relatively few unique nodes
204+
// This stresses the input parsing and graph construction optimizations
205+
let num_unique_nodes = (num_edges as f64).sqrt() as usize;
206+
let mut data = Vec::new();
207+
208+
for i in 0..num_edges {
209+
let from = i % num_unique_nodes;
210+
let to = (i / num_unique_nodes) % num_unique_nodes;
211+
if from != to {
212+
data.extend_from_slice(format!("n{from} n{to}\n").as_bytes());
213+
}
214+
}
215+
216+
let file_path = create_test_file(&data, &temp_dir);
217+
let file_path_str = file_path.to_str().unwrap();
218+
219+
bencher.bench(|| {
220+
black_box(run_uutils_tsort(&[file_path_str]));
221+
});
222+
}
223+
224+
fn main() {
225+
divan::main();
226+
}

0 commit comments

Comments
 (0)