Skip to content

Commit 72339a5

Browse files
authored
Correctly handle searching bitpacked arrays with only valid patches (#2236)
1 parent 4433fcf commit 72339a5

File tree

1 file changed

+40
-16
lines changed

1 file changed

+40
-16
lines changed

encodings/fastlanes/src/bitpacking/compute/search_sorted.rs

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use vortex_array::compute::{
1111
use vortex_array::validity::Validity;
1212
use vortex_array::variants::PrimitiveArrayTrait;
1313
use vortex_dtype::{match_each_unsigned_integer_ptype, DType, NativePType};
14-
use vortex_error::{VortexError, VortexExpect as _, VortexResult};
14+
use vortex_error::{vortex_err, VortexError, VortexResult};
1515
use vortex_scalar::Scalar;
1616

1717
use crate::{unpack_single_primitive, BitPackedArray, BitPackedEncoding};
@@ -37,7 +37,7 @@ impl SearchSortedFn<BitPackedArray> for BitPackedEncoding {
3737
side: SearchSortedSide,
3838
) -> VortexResult<Vec<SearchResult>> {
3939
match_each_unsigned_integer_ptype!(array.ptype(), |$P| {
40-
let searcher = BitPackedSearch::<'_, $P>::new(array);
40+
let searcher = BitPackedSearch::<'_, $P>::try_new(array)?;
4141

4242
values
4343
.iter()
@@ -77,7 +77,7 @@ impl SearchSortedUsizeFn<BitPackedArray> for BitPackedEncoding {
7777
side: SearchSortedSide,
7878
) -> VortexResult<Vec<SearchResult>> {
7979
match_each_unsigned_integer_ptype!(array.ptype().to_unsigned(), |$P| {
80-
let searcher = BitPackedSearch::<'_, $P>::new(array);
80+
let searcher = BitPackedSearch::<'_, $P>::try_new(array)?;
8181

8282
values
8383
.iter()
@@ -131,10 +131,10 @@ where
131131
if usize_value > array.max_packed_value() {
132132
patches.search_sorted(usize_value, side)
133133
} else {
134-
Ok(BitPackedSearch::<'_, T>::new(array).search_sorted(&value, side))
134+
Ok(BitPackedSearch::<'_, T>::try_new(array)?.search_sorted(&value, side))
135135
}
136136
} else {
137-
Ok(BitPackedSearch::<'_, T>::new(array).search_sorted(&value, side))
137+
Ok(BitPackedSearch::<'_, T>::try_new(array)?.search_sorted(&value, side))
138138
}
139139
}
140140

@@ -152,33 +152,32 @@ struct BitPackedSearch<'a, T> {
152152
}
153153

154154
impl<'a, T: BitPacking + NativePType> BitPackedSearch<'a, T> {
155-
pub fn new(array: &'a BitPackedArray) -> Self {
155+
pub fn try_new(array: &'a BitPackedArray) -> VortexResult<Self> {
156156
let first_patch_index = array
157157
.patches()
158158
.map(|p| p.min_index())
159-
.transpose()
160-
.vortex_expect("Failed to get min patch index")
159+
.transpose()?
161160
.unwrap_or_else(|| array.len());
162161
let first_non_null_idx = match array.validity() {
163162
Validity::NonNullable | Validity::AllValid => 0,
164163
Validity::AllInvalid => array.len(),
165164
Validity::Array(varray) => {
166-
// In sorted order, nulls come before all the non-null values.
167-
varray
168-
.statistics()
169-
.compute_true_count()
170-
.vortex_expect("Failed to compute true count")
165+
// In sorted order, nulls come before all the non-null values, i.e. we have true count worth of non null values at the end
166+
array.len()
167+
- varray.statistics().compute_true_count().ok_or_else(|| {
168+
vortex_err!("Couldn't compute true count for validity of bitpacked array")
169+
})?
171170
}
172171
};
173172

174-
Self {
173+
Ok(Self {
175174
packed_as_slice: array.packed_slice::<T>(),
176175
offset: array.offset(),
177176
length: array.len(),
178177
bit_width: array.bit_width(),
179178
first_non_null_idx,
180179
first_patch_index,
181-
}
180+
})
182181
}
183182
}
184183

@@ -211,10 +210,12 @@ impl<T> Len for BitPackedSearch<'_, T> {
211210

212211
#[cfg(test)]
213212
mod test {
214-
use vortex_array::array::PrimitiveArray;
213+
use arrow_buffer::BooleanBuffer;
214+
use vortex_array::array::{BoolArray, PrimitiveArray};
215215
use vortex_array::compute::{
216216
search_sorted, search_sorted_many, slice, SearchResult, SearchSortedSide,
217217
};
218+
use vortex_array::validity::Validity;
218219
use vortex_array::IntoArray;
219220
use vortex_buffer::buffer;
220221
use vortex_dtype::Nullability;
@@ -343,4 +344,27 @@ mod test {
343344
SearchResult::NotFound(0)
344345
);
345346
}
347+
348+
#[test]
349+
fn test_non_null_patches() {
350+
let bitpacked = BitPackedArray::encode(
351+
&PrimitiveArray::new(
352+
buffer![0u64, 0, 0, 0, 0, 2815694643789679, 8029759183936649593],
353+
Validity::Array(
354+
BoolArray::from(BooleanBuffer::from(vec![
355+
false, false, false, false, false, true, true,
356+
]))
357+
.into_array(),
358+
),
359+
)
360+
.into_array(),
361+
0,
362+
)
363+
.unwrap()
364+
.into_array();
365+
assert_eq!(
366+
search_sorted(&bitpacked, 0, SearchSortedSide::Right).unwrap(),
367+
SearchResult::NotFound(5)
368+
);
369+
}
346370
}

0 commit comments

Comments
 (0)