@@ -76,7 +76,9 @@ pub struct Sort32Buffers {
7676 /// output indices
7777 pub ordering : Vec < u32 > ,
7878 /// bucket counts / offsets (length == RADIX_BASE)
79- pub buckets16 : Vec < u32 > ,
79+ pub buckets16lo : Vec < u32 > ,
80+ /// bucket counts / offsets (length == RADIX_BASE)
81+ pub buckets16hi : Vec < u32 > ,
8082 /// scratch space for indices
8183 pub scratch : Vec < u32 > ,
8284}
@@ -93,8 +95,11 @@ impl Sort32Buffers {
9395 if self . scratch . len ( ) < max_splats {
9496 self . scratch . resize ( max_splats, 0 ) ;
9597 }
96- if self . buckets16 . len ( ) < RADIX_BASE {
97- self . buckets16 . resize ( RADIX_BASE , 0 ) ;
98+ if self . buckets16lo . len ( ) < RADIX_BASE {
99+ self . buckets16lo . resize ( RADIX_BASE , 0 ) ;
100+ }
101+ if self . buckets16hi . len ( ) < RADIX_BASE {
102+ self . buckets16hi . resize ( RADIX_BASE , 0 ) ;
98103 }
99104 }
100105}
@@ -109,20 +114,24 @@ pub fn sort32_internal(
109114 // make sure our buffers can hold `max_splats`
110115 buffers. ensure_size ( max_splats) ;
111116
112- let Sort32Buffers { readback, ordering, buckets16 , scratch } = buffers;
117+ let Sort32Buffers { readback, ordering, buckets16lo , buckets16hi , scratch } = buffers;
113118 let keys = & readback[ ..num_splats] ;
114119
115- // ——— Pass #1: bucket by inv(low 16 bits) ———
116- buckets16. fill ( 0 ) ;
120+ // tally low and high buckets
121+ buckets16lo. fill ( 0 ) ;
122+ buckets16hi. fill ( 0 ) ;
117123 for & key in keys. iter ( ) {
118124 if key < DEPTH_INFINITY_F32 {
119125 let inv = !key;
120- buckets16[ ( inv & 0xFFFF ) as usize ] += 1 ;
126+ buckets16lo[ ( inv & 0xFFFF ) as usize ] += 1 ;
127+ buckets16hi[ ( inv >> 16 ) as usize ] += 1 ;
121128 }
122129 }
130+
131+ // ——— Pass #1: bucket by inv(low 16 bits) ———
123132 // exclusive prefix‑sum → starting offsets
124133 let mut total: u32 = 0 ;
125- for slot in buckets16 . iter_mut ( ) {
134+ for slot in buckets16lo . iter_mut ( ) {
126135 let cnt = * slot;
127136 * slot = total;
128137 total = total. wrapping_add ( cnt) ;
@@ -134,21 +143,15 @@ pub fn sort32_internal(
134143 if key < DEPTH_INFINITY_F32 {
135144 let inv = !key;
136145 let lo = ( inv & 0xFFFF ) as usize ;
137- scratch[ buckets16 [ lo] as usize ] = i as u32 ;
138- buckets16 [ lo] += 1 ;
146+ scratch[ buckets16lo [ lo] as usize ] = i as u32 ;
147+ buckets16lo [ lo] += 1 ;
139148 }
140149 }
141150
142151 // ——— Pass #2: bucket by inv(high 16 bits) ———
143- buckets16. fill ( 0 ) ;
144- for & idx in scratch. iter ( ) . take ( active_splats as usize ) {
145- let key = keys[ idx as usize ] ;
146- let inv = !key;
147- buckets16[ ( inv >> 16 ) as usize ] += 1 ;
148- }
149152 // exclusive prefix‑sum again
150153 let mut sum: u32 = 0 ;
151- for slot in buckets16 . iter_mut ( ) {
154+ for slot in buckets16hi . iter_mut ( ) {
152155 let cnt = * slot;
153156 * slot = sum;
154157 sum = sum. wrapping_add ( cnt) ;
@@ -158,16 +161,16 @@ pub fn sort32_internal(
158161 let key = keys[ idx as usize ] ;
159162 let inv = !key;
160163 let hi = ( inv >> 16 ) as usize ;
161- ordering[ buckets16 [ hi] as usize ] = idx;
162- buckets16 [ hi] += 1 ;
164+ ordering[ buckets16hi [ hi] as usize ] = idx;
165+ buckets16hi [ hi] += 1 ;
163166 }
164167
165168 // sanity‑check: last bucket should have consumed all entries
166- if buckets16 [ RADIX_BASE - 1 ] != active_splats {
169+ if buckets16hi [ RADIX_BASE - 1 ] != active_splats {
167170 return Err ( anyhow ! (
168171 "Expected {} active splats but got {}" ,
169172 active_splats,
170- buckets16 [ RADIX_BASE - 1 ]
173+ buckets16hi [ RADIX_BASE - 1 ]
171174 ) ) ;
172175 }
173176
0 commit comments