Skip to content

Commit e2267f5

Browse files
committed
Reverted optimization and addef perf for i32
1 parent b489dee commit e2267f5

File tree

3 files changed

+93
-12
lines changed

3 files changed

+93
-12
lines changed

examples/perf_i32.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//! Performance profiling example for i32 variant query_intersecting
2+
//!
3+
//! This example performs intensive query_intersecting operations on a large spatial index
4+
//! using the i32 coordinate variant for comparison with the f64 version.
5+
//! Designed to be used with low-level profilers like `samply`:
6+
//!
7+
//! ```bash
8+
//! samply record cargo run --release --example perf_i32
9+
//! ```
10+
11+
use aabb::HilbertRTreeI32;
12+
use std::time::Instant;
13+
14+
fn main() {
15+
println!("Building large spatial index (i32 variant)...");
16+
let mut tree = HilbertRTreeI32::with_capacity(1_000_000);
17+
18+
// Generate 1 million random bounding boxes with i32 coordinates
19+
let mut rng = 12345u64; // Simple LCG random number generator
20+
for _ in 0..1_000_000 {
21+
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
22+
let x1 = ((rng >> 32) as i32).abs() % 1000;
23+
24+
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
25+
let y1 = ((rng >> 32) as i32).abs() % 1000;
26+
27+
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
28+
let size_x = ((rng >> 32) as i32).abs() % 50 + 1;
29+
30+
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
31+
let size_y = ((rng >> 32) as i32).abs() % 50 + 1;
32+
33+
tree.add(x1, y1, x1 + size_x, y1 + size_y);
34+
}
35+
36+
let build_start = Instant::now();
37+
tree.build();
38+
let build_duration = build_start.elapsed();
39+
40+
let mut results = Vec::new();
41+
let query_start = Instant::now();
42+
43+
// Perform 100,000 intensive queries for profiling
44+
// Each query covers approximately 10% of the space
45+
for _ in 0..100_000 {
46+
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
47+
let center_x = ((rng >> 32) as i32).abs() % 1000;
48+
49+
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
50+
let center_y = ((rng >> 32) as i32).abs() % 1000;
51+
52+
let query_size = 100; // 10% of space
53+
let min_x = (center_x - query_size / 2).max(0);
54+
let min_y = (center_y - query_size / 2).max(0);
55+
let max_x = (center_x + query_size / 2).min(1000);
56+
let max_y = (center_y + query_size / 2).min(1000);
57+
58+
tree.query_intersecting(min_x, min_y, max_x, max_y, &mut results);
59+
}
60+
61+
let query_duration = query_start.elapsed();
62+
63+
println!(
64+
"\nCompleted 100,000 queries in {:.2}ms ({:.2}µs per query)",
65+
query_duration.as_secs_f64() * 1000.0,
66+
query_duration.as_secs_f64() * 1_000_000.0 / 100_000.0
67+
);
68+
69+
println!("\nProfile Summary (i32 variant):");
70+
println!(" Building: {:.2}ms", build_duration.as_secs_f64() * 1000.0);
71+
println!(" Querying: {:.2}ms", query_duration.as_secs_f64() * 1000.0);
72+
println!(" Total: {:.2}ms", (build_duration + query_duration).as_secs_f64() * 1000.0);
73+
}
74+
75+
76+
/*
77+
Base
78+
Completed 100,000 queries in 11622.38ms (116.22µs per query)
79+
80+
Profile Summary (i32 variant):
81+
Building: 96.11ms
82+
Querying: 11622.38ms
83+
Total: 11718.49ms
84+
85+
*/

src/hilbert_rtree.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,25 +1174,21 @@ impl HilbertRTree {
11741174
}
11751175
}
11761176

1177-
// --- Private helpers ---
1178-
1179-
/// Get box at position (true zero-copy: direct pointer dereference)
1177+
/// Get box at position using read_unaligned
11801178
#[inline(always)]
11811179
pub(crate) fn get_box(&self, pos: usize) -> Box {
11821180
let idx = HEADER_SIZE + pos * size_of::<Box>();
11831181
unsafe {
1184-
// Direct dereference of the pointer - compiler generates a single memcpy at most
1185-
// LLVM will optimize this to a load if alignment allows, memcpy only if needed
1186-
*((&self.data[idx]) as *const u8 as *const Box)
1182+
std::ptr::read_unaligned(&self.data[idx] as *const u8 as *const Box)
11871183
}
11881184
}
11891185

1190-
/// Get index at position (true zero-copy: direct pointer dereference)
1186+
/// Get index at position using read_unaligned
11911187
#[inline(always)]
11921188
pub(crate) fn get_index(&self, pos: usize) -> u32 {
11931189
let indices_start = HEADER_SIZE + self.total_nodes * size_of::<Box>();
11941190
unsafe {
1195-
*((&self.data[indices_start + pos * size_of::<u32>()] ) as *const u8 as *const u32)
1191+
std::ptr::read_unaligned(&self.data[indices_start + pos * size_of::<u32>()] as *const u8 as *const u32)
11961192
}
11971193
}
11981194

src/hilbert_rtree_i32.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -724,21 +724,21 @@ impl HilbertRTreeI32 {
724724

725725
// --- Private helpers ---
726726

727-
/// Get box at position (true zero-copy: direct pointer dereference)
727+
/// Get box at position using read_unaligned
728728
#[inline]
729729
pub(crate) fn get_box(&self, pos: usize) -> BoxI32 {
730730
let idx = HEADER_SIZE + pos * size_of::<BoxI32>();
731731
unsafe {
732-
*((&self.data[idx]) as *const u8 as *const BoxI32)
732+
std::ptr::read_unaligned(&self.data[idx] as *const u8 as *const BoxI32)
733733
}
734734
}
735735

736-
/// Get index at position (true zero-copy: direct pointer dereference)
736+
/// Get index at position using read_unaligned
737737
#[inline(always)]
738738
pub(crate) fn get_index(&self, pos: usize) -> u32 {
739739
let indices_start = HEADER_SIZE + self.total_nodes * size_of::<BoxI32>();
740740
unsafe {
741-
*((&self.data[indices_start + pos * size_of::<u32>()] ) as *const u8 as *const u32)
741+
std::ptr::read_unaligned(&self.data[indices_start + pos * size_of::<u32>()] as *const u8 as *const u32)
742742
}
743743
}
744744

0 commit comments

Comments
 (0)