Skip to content

Commit 417e06d

Browse files
authored
Remove search sorted usize (#3240)
This PR impacts the performance of random access reads and should be fixed up in the future. For now, it's blocking an incremental migration of compute functions so I will merge and follow up.
1 parent 526fb1f commit 417e06d

File tree

18 files changed

+137
-269
lines changed

18 files changed

+137
-269
lines changed

encodings/fastlanes/src/bitpacking/compute/search_sorted.rs

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use itertools::Itertools;
66
use num_traits::AsPrimitive;
77
use vortex_array::Array;
88
use vortex_array::compute::{
9-
IndexOrd, SearchResult, SearchSorted, SearchSortedFn, SearchSortedSide, SearchSortedUsizeFn,
9+
IndexOrd, SearchResult, SearchSorted, SearchSortedFn, SearchSortedSide,
1010
};
1111
use vortex_array::variants::PrimitiveArrayTrait;
1212
use vortex_dtype::{DType, NativePType, match_each_unsigned_integer_ptype};
@@ -50,46 +50,6 @@ impl SearchSortedFn<&BitPackedArray> for BitPackedEncoding {
5050
}
5151
}
5252

53-
impl SearchSortedUsizeFn<&BitPackedArray> for BitPackedEncoding {
54-
fn search_sorted_usize(
55-
&self,
56-
array: &BitPackedArray,
57-
value: usize,
58-
side: SearchSortedSide,
59-
) -> VortexResult<SearchResult> {
60-
match_each_unsigned_integer_ptype!(array.ptype().to_unsigned(), |$P| {
61-
// NOTE: conversion may truncate silently.
62-
if let Some(pvalue) = num_traits::cast::<usize, $P>(value) {
63-
search_sorted_native(array, pvalue, side)
64-
} else {
65-
// provided u64 is too large to fit in the provided PType, value must be off
66-
// the right end of the array.
67-
Ok(SearchResult::NotFound(array.len()))
68-
}
69-
})
70-
}
71-
72-
fn search_sorted_usize_many(
73-
&self,
74-
array: &BitPackedArray,
75-
values: &[usize],
76-
side: SearchSortedSide,
77-
) -> VortexResult<Vec<SearchResult>> {
78-
match_each_unsigned_integer_ptype!(array.ptype().to_unsigned(), |$P| {
79-
let searcher = BitPackedSearch::<'_, $P>::try_new(array)?;
80-
81-
values
82-
.iter()
83-
.map(|&value| {
84-
// NOTE: truncating cast
85-
let cast_value: $P = value as $P;
86-
Ok(searcher.search_sorted(&cast_value, side))
87-
})
88-
.try_collect()
89-
})
90-
}
91-
}
92-
9353
fn search_sorted_typed<T>(
9454
array: &BitPackedArray,
9555
value: &Scalar,
@@ -191,7 +151,7 @@ impl<T: BitPacking + NativePType> IndexOrd<T> for BitPackedSearch<'_, T> {
191151
Some(val.total_compare(*elem))
192152
}
193153

194-
fn len(&self) -> usize {
154+
fn index_len(&self) -> usize {
195155
self.length
196156
}
197157
}
@@ -206,6 +166,7 @@ mod test {
206166
SearchResult, SearchSortedSide, search_sorted, search_sorted_many,
207167
};
208168
use vortex_array::validity::Validity;
169+
use vortex_array::variants::PrimitiveArrayTrait;
209170
use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical};
210171
use vortex_buffer::buffer;
211172
use vortex_error::VortexUnwrap;
@@ -219,8 +180,6 @@ mod test {
219180
#[case] side: SearchSortedSide,
220181
#[case] expected: SearchResult,
221182
) {
222-
use vortex_array::variants::PrimitiveArrayTrait;
223-
224183
let primitive_array = array.to_primitive().vortex_unwrap();
225184
// force patches
226185
let histogram = bit_width_histogram(&primitive_array).vortex_unwrap();

encodings/runend/src/array.rs

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
use std::fmt::Debug;
22

33
use vortex_array::arrays::PrimitiveArray;
4-
use vortex_array::compute::{SearchSortedSide, search_sorted_usize, search_sorted_usize_many};
4+
use vortex_array::compute::{SearchSorted, SearchSortedSide};
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::variants::{BoolArrayTrait, DecimalArrayTrait, PrimitiveArrayTrait};
77
use vortex_array::vtable::VTableRef;
88
use vortex_array::{
99
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
10-
ArrayVariantsImpl, Canonical, Encoding, IntoArray, ProstMetadata, ToCanonical,
10+
ArrayVariants, ArrayVariantsImpl, Canonical, Encoding, IntoArray, ProstMetadata, ToCanonical,
1111
try_from_array_ref,
1212
};
13-
use vortex_buffer::Buffer;
1413
use vortex_dtype::DType;
1514
use vortex_error::{VortexExpect as _, VortexResult, vortex_bail};
1615
use vortex_mask::Mask;
16+
use vortex_scalar::PValue;
1717

1818
use crate::compress::{runend_decode_bools, runend_decode_primitive, runend_encode};
1919
use crate::serde::RunEndMetadata;
@@ -84,21 +84,33 @@ impl RunEndArray {
8484

8585
/// Convert the given logical index to an index into the `values` array
8686
pub fn find_physical_index(&self, index: usize) -> VortexResult<usize> {
87-
search_sorted_usize(self.ends(), index + self.offset(), SearchSortedSide::Right)
88-
.map(|s| s.to_ends_index(self.ends().len()))
87+
Ok(self
88+
.ends()
89+
.as_primitive_typed()
90+
.vortex_expect("ends array must be primitive")
91+
.search_sorted(
92+
&Some(PValue::from(index + self.offset())),
93+
SearchSortedSide::Right,
94+
)
95+
.to_ends_index(self.ends().len()))
8996
}
9097

9198
/// Convert a batch of logical indices into an index for the values. Expects indices to be adjusted by offset unlike
9299
/// [Self::find_physical_index]
93100
///
94101
/// See: [find_physical_index][Self::find_physical_index].
95-
pub fn find_physical_indices(&self, indices: &[usize]) -> VortexResult<Buffer<u64>> {
96-
search_sorted_usize_many(self.ends(), indices, SearchSortedSide::Right).map(|results| {
97-
results
98-
.into_iter()
99-
.map(|result| result.to_ends_index(self.ends().len()) as u64)
100-
.collect()
101-
})
102+
pub fn find_physical_indices<I: IntoIterator<Item = usize>>(
103+
&self,
104+
indices: I,
105+
) -> impl Iterator<Item = usize> {
106+
self.ends()
107+
.as_primitive_typed()
108+
.vortex_expect("ends array must be primitive")
109+
.search_sorted_many(
110+
indices.into_iter().map(|i| Some(PValue::from(i))),
111+
SearchSortedSide::Right,
112+
)
113+
.map(|result| result.to_ends_index(self.ends().len()))
102114
}
103115

104116
/// Run the array through run-end encoding.

encodings/runend/src/compute/take.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use num_traits::AsPrimitive;
22
use vortex_array::compute::{TakeKernel, TakeKernelAdapter, take};
33
use vortex_array::variants::PrimitiveArrayTrait;
44
use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical, register_kernel};
5+
use vortex_buffer::Buffer;
56
use vortex_dtype::match_each_integer_ptype;
67
use vortex_error::{VortexResult, vortex_bail};
78

@@ -37,11 +38,11 @@ pub fn take_indices_unchecked<T: AsPrimitive<usize>>(
3738
array: &RunEndArray,
3839
indices: &[T],
3940
) -> VortexResult<ArrayRef> {
40-
let adjusted_indices = indices
41-
.iter()
42-
.map(|idx| idx.as_() + array.offset())
43-
.collect::<Vec<_>>();
44-
let physical_indices = array.find_physical_indices(&adjusted_indices)?.into_array();
41+
let physical_indices = array
42+
.find_physical_indices(indices.iter().map(|idx| idx.as_() + array.offset()))
43+
.map(|idx| idx as u64)
44+
.collect::<Buffer<u64>>()
45+
.into_array();
4546
take(array.values(), &physical_indices)
4647
}
4748

encodings/sparse/src/compute/mod.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
use vortex_array::arrays::ConstantArray;
2-
use vortex_array::compute::{
3-
FilterKernel, FilterKernelAdapter, SearchSortedFn, SearchSortedUsizeFn,
4-
};
2+
use vortex_array::compute::{FilterKernel, FilterKernelAdapter, SearchSortedFn};
53
use vortex_array::vtable::ComputeVTable;
64
use vortex_array::{Array, ArrayRef, register_kernel};
75
use vortex_error::VortexResult;
@@ -18,10 +16,6 @@ impl ComputeVTable for SparseEncoding {
1816
fn search_sorted_fn(&self) -> Option<&dyn SearchSortedFn<&dyn Array>> {
1917
Some(self)
2018
}
21-
22-
fn search_sorted_usize_fn(&self) -> Option<&dyn SearchSortedUsizeFn<&dyn Array>> {
23-
Some(self)
24-
}
2519
}
2620

2721
impl FilterKernel for SparseEncoding {

encodings/sparse/src/compute/search_sorted.rs

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::cmp::Ordering;
22

33
use vortex_array::Array;
4-
use vortex_array::compute::{SearchResult, SearchSortedFn, SearchSortedSide, SearchSortedUsizeFn};
4+
use vortex_array::compute::{SearchResult, SearchSortedFn, SearchSortedSide};
55
use vortex_error::{VortexExpect, VortexResult};
66
use vortex_scalar::Scalar;
77

@@ -99,21 +99,6 @@ fn fill_position(array: &SparseArray, side: SearchSortedSide) -> VortexResult<us
9999
})
100100
}
101101

102-
impl SearchSortedUsizeFn<&SparseArray> for SparseEncoding {
103-
fn search_sorted_usize(
104-
&self,
105-
array: &SparseArray,
106-
value: usize,
107-
side: SearchSortedSide,
108-
) -> VortexResult<SearchResult> {
109-
let Ok(target) = Scalar::from(value).cast(array.dtype()) else {
110-
// If the downcast fails, then the target is too large for the dtype.
111-
return Ok(SearchResult::NotFound(array.len()));
112-
};
113-
SearchSortedFn::search_sorted(self, array, &target, side)
114-
}
115-
}
116-
117102
#[cfg(test)]
118103
mod tests {
119104
use vortex_array::compute::conformance::search_sorted::rstest_reuse::apply;

fuzz/src/search_sorted.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ impl<T: PartialOrd + Debug> IndexOrd<Option<T>> for SearchNullableSlice<T> {
1717
unsafe { self.0.get_unchecked(idx) }.partial_cmp(elem)
1818
}
1919

20-
fn len(&self) -> usize {
20+
fn index_len(&self) -> usize {
2121
self.0.len()
2222
}
2323
}
@@ -38,7 +38,7 @@ impl<T: NativePType> IndexOrd<Option<T>> for SearchPrimitiveSlice<T> {
3838
}
3939
}
4040

41-
fn len(&self) -> usize {
41+
fn index_len(&self) -> usize {
4242
self.0.len()
4343
}
4444
}

vortex-array/src/arrays/chunked/compute/take.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use vortex_scalar::Scalar;
77
use crate::arrays::ChunkedEncoding;
88
use crate::arrays::chunked::ChunkedArray;
99
use crate::compute::{
10-
SearchSortedSide, TakeKernel, TakeKernelAdapter, cast, search_sorted_usize, sub_scalar, take,
10+
SearchSortedSide, TakeKernel, TakeKernelAdapter, cast, search_sorted, sub_scalar, take,
1111
};
1212
use crate::{Array, ArrayRef, IntoArray, ToCanonical, register_kernel};
1313

@@ -74,8 +74,7 @@ fn take_strict_sorted(chunked: &ChunkedArray, indices: &dyn Array) -> VortexResu
7474
// Find the end of this chunk, and locate that position in the indices array.
7575
let chunk_begin = usize::try_from(chunked.chunk_offsets()[chunk_idx])?;
7676
let chunk_end = usize::try_from(chunked.chunk_offsets()[chunk_idx + 1])?;
77-
let chunk_end_pos =
78-
search_sorted_usize(indices, chunk_end, SearchSortedSide::Left)?.to_index();
77+
let chunk_end_pos = search_sorted(indices, chunk_end, SearchSortedSide::Left)?.to_index();
7978

8079
// Now we can say the slice of indices belonging to this chunk is [pos, chunk_end_pos)
8180
let chunk_indices = indices.slice(pos, chunk_end_pos)?;

vortex-array/src/arrays/chunked/variants.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@ use std::sync::Arc;
22

33
use vortex_dtype::{DType, FieldName};
44
use vortex_error::{VortexExpect, VortexResult, vortex_err, vortex_panic};
5+
use vortex_scalar::PValue;
56

67
use crate::arrays::chunked::ChunkedArray;
78
use crate::variants::{
89
BinaryArrayTrait, BoolArrayTrait, DecimalArrayTrait, ExtensionArrayTrait, ListArrayTrait,
910
NullArrayTrait, PrimitiveArrayTrait, StructArrayTrait, Utf8ArrayTrait,
1011
};
11-
use crate::{Array, ArrayRef, ArrayVariantsImpl};
12+
use crate::{Array, ArrayRef, ArrayVariants, ArrayVariantsImpl};
1213

1314
/// Chunked arrays support all DTypes
1415
impl ArrayVariantsImpl for ChunkedArray {
@@ -53,7 +54,19 @@ impl NullArrayTrait for ChunkedArray {}
5354

5455
impl BoolArrayTrait for ChunkedArray {}
5556

56-
impl PrimitiveArrayTrait for ChunkedArray {}
57+
impl PrimitiveArrayTrait for ChunkedArray {
58+
fn value(&self, idx: usize) -> Option<PValue> {
59+
let (chunk, offset_in_chunk) = self.find_chunk_idx(idx);
60+
let chunk = self
61+
.chunks()
62+
.get(chunk)
63+
.vortex_expect("Chunk index out of bounds");
64+
chunk
65+
.as_primitive_typed()
66+
.vortex_expect("Chunk was not a PrimitiveArray")
67+
.value(offset_in_chunk)
68+
}
69+
}
5770

5871
impl DecimalArrayTrait for ChunkedArray {}
5972

vortex-array/src/arrays/constant/variants.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use vortex_dtype::FieldName;
22
use vortex_error::VortexResult;
3+
use vortex_scalar::PValue;
34

45
use crate::arrays::constant::ConstantArray;
56
use crate::variants::{
@@ -51,7 +52,11 @@ impl NullArrayTrait for ConstantArray {}
5152

5253
impl BoolArrayTrait for ConstantArray {}
5354

54-
impl PrimitiveArrayTrait for ConstantArray {}
55+
impl PrimitiveArrayTrait for ConstantArray {
56+
fn value(&self, _idx: usize) -> Option<PValue> {
57+
self.scalar().as_primitive().pvalue()
58+
}
59+
}
5560

5661
impl Utf8ArrayTrait for ConstantArray {}
5762

vortex-array/src/arrays/primitive/compute/mod.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::Array;
22
use crate::arrays::PrimitiveEncoding;
3-
use crate::compute::{SearchSortedFn, SearchSortedUsizeFn};
3+
use crate::compute::SearchSortedFn;
44
use crate::vtable::ComputeVTable;
55

66
mod between;
@@ -22,8 +22,4 @@ impl ComputeVTable for PrimitiveEncoding {
2222
fn search_sorted_fn(&self) -> Option<&dyn SearchSortedFn<&dyn Array>> {
2323
Some(self)
2424
}
25-
26-
fn search_sorted_usize_fn(&self) -> Option<&dyn SearchSortedUsizeFn<&dyn Array>> {
27-
Some(self)
28-
}
2925
}

0 commit comments

Comments
 (0)