|
| 1 | +// This file is part of the uutils coreutils package. |
| 2 | +// |
| 3 | +// For the full copyright and license information, please view the LICENSE |
| 4 | +// file that was distributed with this source code. |
| 5 | + |
| 6 | +use divan::{Bencher, black_box}; |
| 7 | +use std::fs::File; |
| 8 | +use std::io::{BufWriter, Write}; |
| 9 | +use tempfile::TempDir; |
| 10 | + |
| 11 | +/// Generate topological sort test data with different characteristics |
| 12 | +fn generate_linear_chain(num_nodes: usize) -> Vec<u8> { |
| 13 | + let mut data = Vec::new(); |
| 14 | + |
| 15 | + for i in 0..num_nodes.saturating_sub(1) { |
| 16 | + data.extend_from_slice(format!("node{} node{}\n", i, i + 1).as_bytes()); |
| 17 | + } |
| 18 | + |
| 19 | + data |
| 20 | +} |
| 21 | + |
| 22 | +/// Generate a DAG with more complex dependencies |
| 23 | +fn generate_tree_dag(depth: usize, branching_factor: usize) -> Vec<u8> { |
| 24 | + let mut data = Vec::new(); |
| 25 | + let mut node_id = 0; |
| 26 | + |
| 27 | + // Generate a tree-like DAG |
| 28 | + for level in 0..depth { |
| 29 | + let nodes_at_level = branching_factor.pow(level as u32); |
| 30 | + |
| 31 | + for parent in 0..nodes_at_level { |
| 32 | + let parent_id = node_id + parent; |
| 33 | + for child in 0..branching_factor { |
| 34 | + if level + 1 < depth { |
| 35 | + let child_id = node_id + nodes_at_level + parent * branching_factor + child; |
| 36 | + data.extend_from_slice(format!("node{parent_id} node{child_id}\n").as_bytes()); |
| 37 | + } |
| 38 | + } |
| 39 | + } |
| 40 | + node_id += nodes_at_level; |
| 41 | + } |
| 42 | + |
| 43 | + data |
| 44 | +} |
| 45 | + |
| 46 | +/// Generate a more complex graph with cross-dependencies |
| 47 | +fn generate_complex_dag(num_nodes: usize) -> Vec<u8> { |
| 48 | + let mut data = Vec::new(); |
| 49 | + |
| 50 | + // Create a diamond-like pattern with multiple levels |
| 51 | + let levels = ((num_nodes as f64).sqrt() as usize).max(4); |
| 52 | + let nodes_per_level = num_nodes / levels; |
| 53 | + |
| 54 | + for level in 0..levels - 1 { |
| 55 | + let start_current = level * nodes_per_level; |
| 56 | + let start_next = (level + 1) * nodes_per_level; |
| 57 | + let end_current = ((level + 1) * nodes_per_level).min(num_nodes); |
| 58 | + let end_next = ((level + 2) * nodes_per_level).min(num_nodes); |
| 59 | + |
| 60 | + for i in start_current..end_current { |
| 61 | + // Each node connects to 1-3 nodes in the next level |
| 62 | + let connections = ((i % 3) + 1).min(end_next - start_next); |
| 63 | + for j in 0..connections { |
| 64 | + let target = start_next + ((i + j) % (end_next - start_next)); |
| 65 | + data.extend_from_slice(format!("node{i} node{target}\n").as_bytes()); |
| 66 | + } |
| 67 | + } |
| 68 | + } |
| 69 | + |
| 70 | + data |
| 71 | +} |
| 72 | + |
| 73 | +/// Generate a random-like DAG that stresses the algorithm |
| 74 | +fn generate_wide_dag(num_nodes: usize) -> Vec<u8> { |
| 75 | + let mut data = Vec::new(); |
| 76 | + |
| 77 | + // Create many parallel chains that occasionally merge |
| 78 | + let num_chains = (num_nodes / 50).max(5); |
| 79 | + let chain_length = num_nodes / num_chains; |
| 80 | + |
| 81 | + for chain in 0..num_chains { |
| 82 | + let chain_start = chain * chain_length; |
| 83 | + let chain_end = ((chain + 1) * chain_length).min(num_nodes); |
| 84 | + |
| 85 | + // Build the chain |
| 86 | + for i in chain_start..chain_end.saturating_sub(1) { |
| 87 | + data.extend_from_slice( |
| 88 | + format!( |
| 89 | + "chain{}_{} chain{}_{}\n", |
| 90 | + chain, |
| 91 | + i - chain_start, |
| 92 | + chain, |
| 93 | + i + 1 - chain_start |
| 94 | + ) |
| 95 | + .as_bytes(), |
| 96 | + ); |
| 97 | + } |
| 98 | + |
| 99 | + // Occasionally connect chains |
| 100 | + if chain > 0 && chain % 3 == 0 { |
| 101 | + let prev_chain = chain - 1; |
| 102 | + let prev_end = (prev_chain * chain_length + chain_length / 2).min(num_nodes - 1); |
| 103 | + let curr_mid = chain_start + chain_length / 4; |
| 104 | + data.extend_from_slice( |
| 105 | + format!( |
| 106 | + "chain{}_{} chain{}_{}\n", |
| 107 | + prev_chain, |
| 108 | + prev_end - prev_chain * chain_length, |
| 109 | + chain, |
| 110 | + curr_mid - chain_start |
| 111 | + ) |
| 112 | + .as_bytes(), |
| 113 | + ); |
| 114 | + } |
| 115 | + } |
| 116 | + |
| 117 | + data |
| 118 | +} |
| 119 | + |
| 120 | +/// Create a temporary file with test data |
| 121 | +fn create_test_file(data: &[u8], temp_dir: &TempDir) -> std::path::PathBuf { |
| 122 | + let file_path = temp_dir.path().join("test_data.txt"); |
| 123 | + let file = File::create(&file_path).unwrap(); |
| 124 | + let mut writer = BufWriter::new(file); |
| 125 | + writer.write_all(data).unwrap(); |
| 126 | + writer.flush().unwrap(); |
| 127 | + file_path |
| 128 | +} |
| 129 | + |
| 130 | +/// Run uutils tsort with given arguments |
| 131 | +fn run_uutils_tsort(args: &[&str]) -> i32 { |
| 132 | + use std::process::{Command, Stdio}; |
| 133 | + |
| 134 | + // Use the binary instead of calling uumain directly to avoid stdout issues |
| 135 | + let output = Command::new("../../../target/release/coreutils") |
| 136 | + .args(["tsort"].iter().chain(args.iter())) |
| 137 | + .stdout(Stdio::null()) |
| 138 | + .stderr(Stdio::null()) |
| 139 | + .status() |
| 140 | + .expect("Failed to execute tsort command"); |
| 141 | + |
| 142 | + i32::from(!output.success()) |
| 143 | +} |
| 144 | + |
| 145 | +/// Benchmark linear chain graphs of different sizes |
| 146 | +/// This tests the performance improvements mentioned in PR #8694 |
| 147 | +#[divan::bench(args = [1_000, 10_000, 100_000, 1_000_000])] |
| 148 | +fn tsort_linear_chain(bencher: Bencher, num_nodes: usize) { |
| 149 | + let temp_dir = tempfile::tempdir().unwrap(); |
| 150 | + let data = generate_linear_chain(num_nodes); |
| 151 | + let file_path = create_test_file(&data, &temp_dir); |
| 152 | + let file_path_str = file_path.to_str().unwrap(); |
| 153 | + |
| 154 | + bencher.bench(|| { |
| 155 | + black_box(run_uutils_tsort(&[file_path_str])); |
| 156 | + }); |
| 157 | +} |
| 158 | + |
| 159 | +/// Benchmark tree-like DAG structures |
| 160 | +#[divan::bench(args = [(4, 3), (5, 3), (6, 2), (7, 2)])] |
| 161 | +fn tsort_tree_dag(bencher: Bencher, (depth, branching): (usize, usize)) { |
| 162 | + let temp_dir = tempfile::tempdir().unwrap(); |
| 163 | + let data = generate_tree_dag(depth, branching); |
| 164 | + let file_path = create_test_file(&data, &temp_dir); |
| 165 | + let file_path_str = file_path.to_str().unwrap(); |
| 166 | + |
| 167 | + bencher.bench(|| { |
| 168 | + black_box(run_uutils_tsort(&[file_path_str])); |
| 169 | + }); |
| 170 | +} |
| 171 | + |
| 172 | +/// Benchmark complex DAG with cross-dependencies |
| 173 | +#[divan::bench(args = [1_000, 5_000, 10_000, 50_000])] |
| 174 | +fn tsort_complex_dag(bencher: Bencher, num_nodes: usize) { |
| 175 | + let temp_dir = tempfile::tempdir().unwrap(); |
| 176 | + let data = generate_complex_dag(num_nodes); |
| 177 | + let file_path = create_test_file(&data, &temp_dir); |
| 178 | + let file_path_str = file_path.to_str().unwrap(); |
| 179 | + |
| 180 | + bencher.bench(|| { |
| 181 | + black_box(run_uutils_tsort(&[file_path_str])); |
| 182 | + }); |
| 183 | +} |
| 184 | + |
| 185 | +/// Benchmark wide DAG with many parallel chains |
| 186 | +/// This should stress the hashmap optimizations from PR #8694 |
| 187 | +#[divan::bench(args = [10_000, 50_000, 100_000])] |
| 188 | +fn tsort_wide_dag(bencher: Bencher, num_nodes: usize) { |
| 189 | + let temp_dir = tempfile::tempdir().unwrap(); |
| 190 | + let data = generate_wide_dag(num_nodes); |
| 191 | + let file_path = create_test_file(&data, &temp_dir); |
| 192 | + let file_path_str = file_path.to_str().unwrap(); |
| 193 | + |
| 194 | + bencher.bench(|| { |
| 195 | + black_box(run_uutils_tsort(&[file_path_str])); |
| 196 | + }); |
| 197 | +} |
| 198 | + |
| 199 | +/// Benchmark input parsing vs computation by using files with different edge densities |
| 200 | +#[divan::bench(args = [10_000, 50_000])] |
| 201 | +fn tsort_input_parsing_heavy(bencher: Bencher, num_edges: usize) { |
| 202 | + let temp_dir = tempfile::tempdir().unwrap(); |
| 203 | + // Create a scenario with many edges but relatively few unique nodes |
| 204 | + // This stresses the input parsing and graph construction optimizations |
| 205 | + let num_unique_nodes = (num_edges as f64).sqrt() as usize; |
| 206 | + let mut data = Vec::new(); |
| 207 | + |
| 208 | + for i in 0..num_edges { |
| 209 | + let from = i % num_unique_nodes; |
| 210 | + let to = (i / num_unique_nodes) % num_unique_nodes; |
| 211 | + if from != to { |
| 212 | + data.extend_from_slice(format!("n{from} n{to}\n").as_bytes()); |
| 213 | + } |
| 214 | + } |
| 215 | + |
| 216 | + let file_path = create_test_file(&data, &temp_dir); |
| 217 | + let file_path_str = file_path.to_str().unwrap(); |
| 218 | + |
| 219 | + bencher.bench(|| { |
| 220 | + black_box(run_uutils_tsort(&[file_path_str])); |
| 221 | + }); |
| 222 | +} |
| 223 | + |
| 224 | +fn main() { |
| 225 | + divan::main(); |
| 226 | +} |
0 commit comments