|
| 1 | +//! Parallel query benchmark to measure concurrent access performance |
| 2 | +//! |
| 3 | +//! This benchmark uses the SAME queries as profile_bench.rs so results are directly comparable. |
| 4 | +//! It demonstrates that the HilbertRTree is safe to share across threads without interior |
| 5 | +//! mutability, since queries only require &self (immutable borrow). |
| 6 | +
|
| 7 | +use aabb::HilbertRTree; |
| 8 | +use rand::Rng; |
| 9 | +use rand::SeedableRng; |
| 10 | +use std::sync::Arc; |
| 11 | +use std::thread; |
| 12 | +use std::time::Instant; |
| 13 | + |
| 14 | +fn main() { |
| 15 | + println!("AABB Parallel Query Benchmark (vs profile_bench.rs)"); |
| 16 | + println!("===================================================\n"); |
| 17 | + |
| 18 | + let num_items = 1_000_000; |
| 19 | + let num_tests = 1_000; |
| 20 | + let num_threads = 10; |
| 21 | + |
| 22 | + // Create MT19937 RNG with fixed seed for reproducibility (SAME as profile_bench.rs) |
| 23 | + let seed = 95756739_u64; |
| 24 | + let mut rng = rand::rngs::StdRng::seed_from_u64(seed); |
| 25 | + |
| 26 | + // Generate random boxes for indexing (coordinate space: 100x100) |
| 27 | + let mut coords = Vec::new(); |
| 28 | + println!("Generating {} random boxes...", num_items); |
| 29 | + let gen_start = Instant::now(); |
| 30 | + for _ in 0..num_items { |
| 31 | + let min_x = rng.random_range(0.0..100.0); |
| 32 | + let min_y = rng.random_range(0.0..100.0); |
| 33 | + let max_x = (min_x + rng.random_range(0.0..1.0_f64)).min(100.0); |
| 34 | + let max_y = (min_y + rng.random_range(0.0..1.0_f64)).min(100.0); |
| 35 | + |
| 36 | + coords.push(min_x); |
| 37 | + coords.push(min_y); |
| 38 | + coords.push(max_x); |
| 39 | + coords.push(max_y); |
| 40 | + } |
| 41 | + let gen_time = gen_start.elapsed(); |
| 42 | + println!(" Generated in {:.2}ms\n", gen_time.as_secs_f64() * 1000.0); |
| 43 | + |
| 44 | + // Build index (SAME as profile_bench.rs) |
| 45 | + println!("Building index..."); |
| 46 | + let build_start = Instant::now(); |
| 47 | + let mut tree = HilbertRTree::with_capacity(num_items); |
| 48 | + |
| 49 | + for chunk in coords.chunks(4) { |
| 50 | + if chunk.len() == 4 { |
| 51 | + tree.add(chunk[0], chunk[1], chunk[2], chunk[3]); |
| 52 | + } |
| 53 | + } |
| 54 | + tree.build(); |
| 55 | + let build_time = build_start.elapsed(); |
| 56 | + println!(" Index built in {:.2}ms\n", build_time.as_secs_f64() * 1000.0); |
| 57 | + |
| 58 | + // Wrap tree in Arc for safe sharing across threads |
| 59 | + let tree = Arc::new(tree); |
| 60 | + |
| 61 | + // Generate test queries (SAME as profile_bench.rs) |
| 62 | + let mut test_queries_small = Vec::new(); |
| 63 | + let mut test_queries_large = Vec::new(); |
| 64 | + |
| 65 | + for _ in 0..num_tests { |
| 66 | + // Small query (0.01% coverage) |
| 67 | + let min_x = rng.random_range(0.0..99.0); |
| 68 | + let min_y = rng.random_range(0.0..99.0); |
| 69 | + test_queries_small.push((min_x, min_y, min_x + 1.0, min_y + 1.0)); |
| 70 | + |
| 71 | + // Large query (10% coverage) |
| 72 | + let min_x = rng.random_range(0.0..69.0); |
| 73 | + let min_y = rng.random_range(0.0..69.0); |
| 74 | + test_queries_large.push((min_x, min_y, min_x + 31.62, min_y + 31.62)); |
| 75 | + } |
| 76 | + |
| 77 | + // Parallel benchmark: Small queries |
| 78 | + println!("Profiling query_intersecting (parallel):"); |
| 79 | + println!("{}", "-".repeat(40)); |
| 80 | + |
| 81 | + let queries_small = Arc::new(test_queries_small); |
| 82 | + let parallel_start = Instant::now(); |
| 83 | + |
| 84 | + let handles: Vec<_> = (0..num_threads) |
| 85 | + .map(|_| { |
| 86 | + let tree_clone = Arc::clone(&tree); |
| 87 | + let queries_clone = Arc::clone(&queries_small); |
| 88 | + |
| 89 | + thread::spawn(move || { |
| 90 | + let mut results = Vec::new(); |
| 91 | + for (min_x, min_y, max_x, max_y) in queries_clone.iter() { |
| 92 | + tree_clone.query_intersecting(*min_x, *min_y, *max_x, *max_y, &mut results); |
| 93 | + } |
| 94 | + }) |
| 95 | + }) |
| 96 | + .collect(); |
| 97 | + |
| 98 | + for handle in handles { |
| 99 | + handle.join().unwrap(); |
| 100 | + } |
| 101 | + |
| 102 | + let parallel_elapsed = parallel_start.elapsed(); |
| 103 | + let total_queries = num_threads * num_tests; |
| 104 | + println!( |
| 105 | + " {} small queries (parallel {}×{}): {:.2}ms ({:.3}µs/query)", |
| 106 | + total_queries, |
| 107 | + num_threads, |
| 108 | + num_tests, |
| 109 | + parallel_elapsed.as_secs_f64() * 1000.0, |
| 110 | + parallel_elapsed.as_secs_f64() * 1_000_000.0 / total_queries as f64 |
| 111 | + ); |
| 112 | + |
| 113 | + // Parallel benchmark: Large queries |
| 114 | + let queries_large = Arc::new(test_queries_large); |
| 115 | + let parallel_start = Instant::now(); |
| 116 | + |
| 117 | + let handles: Vec<_> = (0..num_threads) |
| 118 | + .map(|_| { |
| 119 | + let tree_clone = Arc::clone(&tree); |
| 120 | + let queries_clone = Arc::clone(&queries_large); |
| 121 | + |
| 122 | + thread::spawn(move || { |
| 123 | + let mut results = Vec::new(); |
| 124 | + for (min_x, min_y, max_x, max_y) in queries_clone.iter() { |
| 125 | + tree_clone.query_intersecting(*min_x, *min_y, *max_x, *max_y, &mut results); |
| 126 | + } |
| 127 | + }) |
| 128 | + }) |
| 129 | + .collect(); |
| 130 | + |
| 131 | + for handle in handles { |
| 132 | + handle.join().unwrap(); |
| 133 | + } |
| 134 | + |
| 135 | + let parallel_elapsed = parallel_start.elapsed(); |
| 136 | + println!( |
| 137 | + " {} large queries (parallel {}×{}): {:.2}ms ({:.3}µs/query)", |
| 138 | + total_queries, |
| 139 | + num_threads, |
| 140 | + num_tests, |
| 141 | + parallel_elapsed.as_secs_f64() * 1000.0, |
| 142 | + parallel_elapsed.as_secs_f64() * 1_000_000.0 / total_queries as f64 |
| 143 | + ); |
| 144 | + |
| 145 | + // Parallel benchmark: query_nearest_k |
| 146 | + println!("\nProfiling query_nearest_k (parallel):"); |
| 147 | + println!("{}", "-".repeat(40)); |
| 148 | + |
| 149 | + let k_values = vec![1, 10, 100, 1000]; |
| 150 | + let coords = Arc::new(coords); |
| 151 | + |
| 152 | + for k in k_values { |
| 153 | + let num_queries = if k == 1000 { 100 } else { num_tests }; |
| 154 | + let total_parallel_queries = num_threads * num_queries; |
| 155 | + |
| 156 | + let parallel_start = Instant::now(); |
| 157 | + |
| 158 | + let handles: Vec<_> = (0..num_threads) |
| 159 | + .map(|thread_id| { |
| 160 | + let tree_clone = Arc::clone(&tree); |
| 161 | + let coords_clone = Arc::clone(&coords); |
| 162 | + |
| 163 | + thread::spawn(move || { |
| 164 | + let mut results = Vec::new(); |
| 165 | + for i in 0..num_queries { |
| 166 | + let idx = (thread_id * num_queries + i) % (coords_clone.len() / 4); |
| 167 | + let x = coords_clone[4 * idx]; |
| 168 | + let y = coords_clone[4 * idx + 1]; |
| 169 | + tree_clone.query_nearest_k(x, y, k, &mut results); |
| 170 | + } |
| 171 | + }) |
| 172 | + }) |
| 173 | + .collect(); |
| 174 | + |
| 175 | + for handle in handles { |
| 176 | + handle.join().unwrap(); |
| 177 | + } |
| 178 | + |
| 179 | + let parallel_elapsed = parallel_start.elapsed(); |
| 180 | + println!( |
| 181 | + " {} queries k={} (parallel {}×{}): {:.2}ms ({:.3}µs/query)", |
| 182 | + total_parallel_queries, |
| 183 | + k, |
| 184 | + num_threads, |
| 185 | + num_queries, |
| 186 | + parallel_elapsed.as_secs_f64() * 1000.0, |
| 187 | + parallel_elapsed.as_secs_f64() * 1_000_000.0 / total_parallel_queries as f64 |
| 188 | + ); |
| 189 | + } |
| 190 | + |
| 191 | + println!("\n{}", "=".repeat(40)); |
| 192 | + println!("Conclusion:"); |
| 193 | + println!("The HilbertRTree is safe to share across threads using Arc!"); |
| 194 | + println!("All queries use &self → lock-free parallel access."); |
| 195 | +} |
| 196 | + |
| 197 | + |
| 198 | +/* |
| 199 | +cargo bench --bench profile_parallel |
| 200 | +
|
| 201 | +Generating 1000000 random boxes... |
| 202 | + Generated in 26.95ms |
| 203 | +
|
| 204 | +Building index... |
| 205 | + Index built in 133.13ms |
| 206 | +
|
| 207 | +Profiling query_intersecting (parallel): |
| 208 | +---------------------------------------- |
| 209 | + 10000 small queries (parallel 10×1000): 6.82ms (0.682µs/query) |
| 210 | + 10000 large queries (parallel 10×1000): 4880.27ms (488.027µs/query) |
| 211 | +
|
| 212 | +Profiling query_nearest_k (parallel): |
| 213 | +---------------------------------------- |
| 214 | + 10000 queries k=1 (parallel 10×1000): 11.07ms (1.107µs/query) |
| 215 | + 10000 queries k=10 (parallel 10×1000): 11.86ms (1.186µs/query) |
| 216 | + 10000 queries k=100 (parallel 10×1000): 24.00ms (2.400µs/query) |
| 217 | + 1000 queries k=1000 (parallel 10×100): 15.13ms (15.128µs/query) |
| 218 | +
|
| 219 | +
|
| 220 | +*/ |
0 commit comments