|
1 | | -use itertools::Itertools; |
2 | 1 | use vortex_buffer::BufferMut; |
3 | 2 | use vortex_dtype::PType; |
4 | 3 | use vortex_error::VortexResult; |
5 | | -use vortex_scalar::Scalar; |
6 | 4 |
|
7 | 5 | use crate::arrays::ChunkedEncoding; |
8 | 6 | use crate::arrays::chunked::ChunkedArray; |
9 | | -use crate::compute::{ |
10 | | - SearchSortedSide, TakeKernel, TakeKernelAdapter, cast, search_sorted, sub_scalar, take, |
11 | | -}; |
| 7 | +use crate::compute::{TakeKernel, TakeKernelAdapter, cast, take}; |
12 | 8 | use crate::{Array, ArrayRef, IntoArray, ToCanonical, register_kernel}; |
13 | 9 |
|
14 | 10 | impl TakeKernel for ChunkedEncoding { |
15 | 11 | fn take(&self, array: &ChunkedArray, indices: &dyn Array) -> VortexResult<ArrayRef> { |
16 | | - // Fast path for strict sorted indices. |
17 | | - if indices |
18 | | - .statistics() |
19 | | - .compute_is_strict_sorted() |
20 | | - .unwrap_or(false) |
21 | | - { |
22 | | - if array.len() == indices.len() { |
23 | | - return Ok(array.to_array().into_array()); |
24 | | - } |
25 | | - |
26 | | - return take_strict_sorted(array, indices); |
27 | | - } |
28 | | - |
29 | 12 | let indices = cast(indices, PType::U64.into())?.to_primitive()?; |
30 | 13 |
|
31 | 14 | // While the chunk idx remains the same, accumulate a list of chunk indices. |
@@ -60,60 +43,6 @@ impl TakeKernel for ChunkedEncoding { |
60 | 43 |
|
61 | 44 | register_kernel!(TakeKernelAdapter(ChunkedEncoding).lift()); |
62 | 45 |
|
63 | | -/// When the indices are non-null and strict-sorted, we can do better |
64 | | -fn take_strict_sorted(chunked: &ChunkedArray, indices: &dyn Array) -> VortexResult<ArrayRef> { |
65 | | - let mut indices_by_chunk = vec![None; chunked.nchunks()]; |
66 | | - |
67 | | - // Track our position in the indices array |
68 | | - let mut pos = 0; |
69 | | - while pos < indices.len() { |
70 | | - // Locate the chunk index for the current index |
71 | | - let idx = usize::try_from(&indices.scalar_at(pos)?)?; |
72 | | - let (chunk_idx, _idx_in_chunk) = chunked.find_chunk_idx(idx); |
73 | | - |
74 | | - // Find the end of this chunk, and locate that position in the indices array. |
75 | | - let chunk_begin = usize::try_from(chunked.chunk_offsets()[chunk_idx])?; |
76 | | - let chunk_end = usize::try_from(chunked.chunk_offsets()[chunk_idx + 1])?; |
77 | | - let chunk_end_pos = search_sorted(indices, chunk_end, SearchSortedSide::Left)?.to_index(); |
78 | | - |
79 | | - // Now we can say the slice of indices belonging to this chunk is [pos, chunk_end_pos) |
80 | | - let chunk_indices = indices.slice(pos, chunk_end_pos)?; |
81 | | - |
82 | | - // Adjust the indices so they're relative to the chunk |
83 | | - // Note. Indices might not have a dtype big enough to fit chunk_begin after cast, |
84 | | - // if it does cast the scalar otherwise upcast the indices. |
85 | | - let chunk_indices = if chunk_begin |
86 | | - < PType::try_from(chunk_indices.dtype())? |
87 | | - .max_value_as_u64() |
88 | | - .try_into()? |
89 | | - { |
90 | | - sub_scalar( |
91 | | - &chunk_indices, |
92 | | - Scalar::from(chunk_begin).cast(chunk_indices.dtype())?, |
93 | | - )? |
94 | | - } else { |
95 | | - // Note. this try_cast (memory copy) is unnecessary, could instead upcast in the subtract fn. |
96 | | - // and avoid an extra |
97 | | - let u64_chunk_indices = cast(&chunk_indices, PType::U64.into())?; |
98 | | - sub_scalar(&u64_chunk_indices, chunk_begin.into())? |
99 | | - }; |
100 | | - |
101 | | - indices_by_chunk[chunk_idx] = Some(chunk_indices); |
102 | | - |
103 | | - pos = chunk_end_pos; |
104 | | - } |
105 | | - |
106 | | - // Now we can take the chunks |
107 | | - let chunks = indices_by_chunk |
108 | | - .into_iter() |
109 | | - .enumerate() |
110 | | - .filter_map(|(chunk_idx, indices)| indices.map(|i| (chunk_idx, i))) |
111 | | - .map(|(chunk_idx, chunk_indices)| take(chunked.chunk(chunk_idx)?, &chunk_indices)) |
112 | | - .try_collect()?; |
113 | | - |
114 | | - Ok(ChunkedArray::try_new(chunks, chunked.dtype().clone())?.into_array()) |
115 | | -} |
116 | | - |
117 | 46 | #[cfg(test)] |
118 | 47 | mod test { |
119 | 48 | use vortex_buffer::buffer; |
|
0 commit comments