Skip to content

Commit 0b1119c

Browse files
committed
OPT 9 - Calculate exact total nodes needed
1 parent d07a287 commit 0b1119c

File tree

5 files changed

+94
-25
lines changed

5 files changed

+94
-25
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ codegen-units = 1
2222
panic = "abort"
2323
opt-level = 3
2424
# for samply
25-
debug = 1
25+
#debug = 1
2626

2727
[dev-dependencies]
2828
rand = "0.9"

examples/perf_build.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,5 +115,11 @@ OPT 8 - removed unecessary vector initializations (2.3%)
115115
Average Build: 75.44ms
116116
Overall time: 8124.56ms
117117
___________________________________________________
118+
OPT 9 - Calculate exact total nodes needed
119+
Total Add time: 397.41ms
120+
Average Add: 3.97ms
121+
Total Build time: 7442.48ms
122+
Average Build: 74.42ms
123+
Overall time: 7965.43ms
118124
119125
*/

examples/perf_build_i32.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,14 @@ OPT 8 - removed unecessary vector initializations (2.3%)
9797
Total Build time: 6417.60ms
9898
Average Build: 64.18ms
9999
Overall time: 6895.65ms
100+
________________________________________________
101+
OPT 9 - Calculate exact total nodes needed
102+
Build Summary (100 runs):
103+
Total Add time: 271.15ms
104+
Average Add: 2.71ms
105+
Total Build time: 6414.16ms
106+
Average Build: 64.14ms
107+
Overall time: 6755.35ms
100108
101109
102110
*/

src/hilbert_rtree.rs

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,24 @@ fn estimate_total_nodes(num_items: usize, node_size: usize) -> usize {
6868
(num_items * node_size) / (node_size - 1) + 1
6969
}
7070

71+
/// Helper: Calculate EXACT total nodes by simulating tree construction (O(log n) - tree depth)
72+
#[inline]
73+
fn calculate_exact_total_nodes(num_items: usize, node_size: usize) -> usize {
74+
if num_items == 0 {
75+
return 0;
76+
}
77+
let mut total_nodes = num_items;
78+
let mut count = num_items;
79+
loop {
80+
count = count.div_ceil(node_size);
81+
total_nodes += count;
82+
if count <= 1 {
83+
break;
84+
}
85+
}
86+
total_nodes
87+
}
88+
7189
/// Helper: Calculate required buffer size for estimated nodes
7290
#[inline]
7391
fn estimate_buffer_size(num_items: usize, node_size: usize) -> usize {
@@ -147,36 +165,40 @@ impl HilbertRTree {
147165
let num_items = self.num_items;
148166
let node_size = self.node_size;
149167

150-
// Calculate total nodes and level bounds
168+
// Calculate exact total nodes needed (O(log n) - only tree depth iterations)
169+
let total_nodes = calculate_exact_total_nodes(num_items, node_size);
170+
let data_size = HEADER_SIZE + total_nodes * (size_of::<Box>() + size_of::<u32>());
171+
172+
// Reserve all needed space at once (avoids reallocation during build)
173+
if data_size > self.data.capacity() {
174+
self.data.reserve(data_size - self.data.capacity());
175+
self.allocated_capacity = self.data.capacity();
176+
}
177+
178+
// CRITICAL: Must zero-fill parent node memory. The build process writes parent nodes
179+
// incrementally in the loop below, and during tree traversal we may read indices/boxes
180+
// from parent positions before they're written. Zero values act as sentinels.
181+
// This is unavoidable without additional bookkeeping to track which positions are initialized.
182+
if self.data.len() < data_size {
183+
self.data.resize(data_size, 0);
184+
}
185+
186+
// Calculate level bounds
151187
let mut level_bounds = Vec::with_capacity(16); // Max tree depth ~16 for 1M items
152188
let mut count = num_items;
153-
let mut total_nodes = num_items;
154-
level_bounds.push(total_nodes);
189+
let mut level_total_nodes = num_items;
190+
level_bounds.push(level_total_nodes);
155191

156192
// Create parent levels until we have a single root
157193
loop {
158194
count = count.div_ceil(node_size);
159-
total_nodes += count;
160-
level_bounds.push(total_nodes);
195+
level_total_nodes += count;
196+
level_bounds.push(level_total_nodes);
161197
if count <= 1 {
162198
break;
163199
}
164200
}
165201

166-
// Ensure data buffer has sufficient capacity and length for all writes
167-
let data_size = HEADER_SIZE + total_nodes * (size_of::<Box>() + size_of::<u32>());
168-
if data_size > self.data.capacity() {
169-
self.data.reserve(data_size - self.data.capacity());
170-
self.allocated_capacity = self.data.capacity();
171-
}
172-
// Set length to required size (uninitialized, but we'll overwrite everything)
173-
// This avoids the zero-fill overhead of resize()
174-
if self.data.len() < data_size {
175-
unsafe {
176-
self.data.set_len(data_size);
177-
}
178-
}
179-
180202
// Write header
181203
self.data[0] = 0xfb; // magic
182204
self.data[1] = 0x01; // version 1 + double type (8)

src/hilbert_rtree_i32.rs

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,24 @@ fn estimate_total_nodes(num_items: usize, node_size: usize) -> usize {
7878
(num_items * node_size) / (node_size - 1) + 1
7979
}
8080

81+
/// Helper: Calculate EXACT total nodes by simulating tree construction (O(log n) - tree depth)
82+
#[inline]
83+
fn calculate_exact_total_nodes(num_items: usize, node_size: usize) -> usize {
84+
if num_items == 0 {
85+
return 0;
86+
}
87+
let mut total_nodes = num_items;
88+
let mut count = num_items;
89+
loop {
90+
count = count.div_ceil(node_size);
91+
total_nodes += count;
92+
if count <= 1 {
93+
break;
94+
}
95+
}
96+
total_nodes
97+
}
98+
8199
/// Helper: Calculate required buffer size for estimated nodes
82100
#[inline]
83101
fn estimate_buffer_size(num_items: usize, node_size: usize) -> usize {
@@ -216,17 +234,32 @@ impl HilbertRTreeI32 {
216234
let num_items = self.num_items;
217235
let node_size = self.node_size;
218236

219-
// Calculate total nodes and level bounds
237+
// Calculate exact total nodes needed (O(log n) - only tree depth iterations)
238+
let total_nodes = calculate_exact_total_nodes(num_items, node_size);
239+
let data_size = HEADER_SIZE + total_nodes * (size_of::<BoxI32>() + size_of::<u32>());
240+
241+
// Reserve all needed space at once (avoids reallocation during build)
242+
if data_size > self.data.capacity() {
243+
self.data.reserve(data_size - self.data.capacity());
244+
self.allocated_capacity = self.data.capacity();
245+
}
246+
247+
// Resize to final size (zero-fill is necessary - build reads uninitialized parent positions)
248+
if self.data.len() < data_size {
249+
self.data.resize(data_size, 0);
250+
}
251+
252+
// Calculate level bounds
220253
let mut level_bounds = Vec::with_capacity(16); // Max tree depth ~16 for 1M items
221254
let mut count = num_items;
222-
let mut total_nodes = num_items;
223-
level_bounds.push(total_nodes);
255+
let mut level_total_nodes = num_items;
256+
level_bounds.push(level_total_nodes);
224257

225258
// Create parent levels until we have a single root
226259
loop {
227260
count = count.div_ceil(node_size);
228-
total_nodes += count;
229-
level_bounds.push(total_nodes);
261+
level_total_nodes += count;
262+
level_bounds.push(level_total_nodes);
230263
if count <= 1 {
231264
break;
232265
}

0 commit comments

Comments
 (0)