|
1 | 1 | // SPDX-License-Identifier: Apache-2.0 |
2 | 2 | // SPDX-FileCopyrightText: Copyright the Vortex contributors |
3 | 3 |
|
4 | | -use std::mem::transmute; |
5 | | - |
6 | 4 | use itertools::Itertools; |
7 | | -use num_traits::AsPrimitive; |
8 | 5 | use vortex_array::ArrayRef; |
9 | 6 | use vortex_array::IntoArray; |
10 | | -use vortex_array::ToCanonical; |
11 | 7 | use vortex_array::arrays::PrimitiveArray; |
12 | | -use vortex_array::arrays::patch_chunk; |
13 | 8 | use vortex_array::patches::Patches; |
14 | 9 | use vortex_array::validity::Validity; |
15 | 10 | use vortex_array::vtable::ValidityHelper; |
16 | 11 | use vortex_buffer::Buffer; |
17 | 12 | use vortex_buffer::BufferMut; |
18 | 13 | use vortex_dtype::PType; |
19 | | -use vortex_dtype::match_each_unsigned_integer_ptype; |
20 | 14 | use vortex_error::VortexResult; |
21 | 15 | use vortex_error::vortex_bail; |
22 | 16 | use vortex_mask::Mask; |
@@ -128,143 +122,21 @@ where |
128 | 122 | Ok((exponents, encoded_array, patches)) |
129 | 123 | } |
130 | 124 |
|
131 | | -/// Decompresses an ALP-encoded array. |
132 | | -/// |
133 | | -/// # Arguments |
134 | | -/// |
135 | | -/// * `array` - The ALP-encoded array to decompress |
136 | | -/// |
137 | | -/// # Returns |
138 | | -/// |
139 | | -/// A `PrimitiveArray` containing the decompressed floating-point values with all patches applied. |
140 | | -pub fn decompress(array: ALPArray) -> PrimitiveArray { |
141 | | - let patches = array.patches().cloned(); |
142 | | - if let Some(patches) = patches |
143 | | - && let Some(chunk_offsets) = patches.chunk_offsets() |
144 | | - { |
145 | | - return decompress_chunked(array, &patches, &chunk_offsets.as_ref().to_primitive()); |
146 | | - } |
147 | | - |
148 | | - decompress_unchunked(array) |
149 | | -} |
150 | | - |
151 | | -/// Decompresses an ALP-encoded array in 1024-element chunks. |
152 | | -/// |
153 | | -/// Decoding and applying patches is done in chunks of 1024 elements for better L1 cache locality. |
154 | | -/// |
155 | | -/// # Arguments |
156 | | -/// |
157 | | -/// * `array` - The ALP-encoded array to decompress |
158 | | -/// * `patches` - The patches containing exceptional values and their positions |
159 | | -/// * `patches_chunk_offsets` - Offsets into the patches array for each chunk |
160 | | -/// |
161 | | -/// # Returns |
162 | | -/// |
163 | | -/// A `PrimitiveArray` containing the decompressed values with all patches applied. |
164 | | -#[expect( |
165 | | - clippy::cognitive_complexity, |
166 | | - reason = "complexity is from nested match_each_* macros" |
167 | | -)] |
168 | | -pub fn decompress_chunked( |
169 | | - array: ALPArray, |
170 | | - patches: &Patches, |
171 | | - patches_chunk_offsets: &PrimitiveArray, |
172 | | -) -> PrimitiveArray { |
173 | | - let encoded = array.encoded().to_primitive(); |
174 | | - let validity = encoded.validity().clone(); |
175 | | - |
176 | | - let patches_indices = patches.indices().as_ref().to_primitive(); |
177 | | - let patches_values = patches.values().as_ref().to_primitive(); |
178 | | - let ptype = array.dtype().as_ptype(); |
179 | | - let array_len = array.len(); |
180 | | - let exponents = array.exponents(); |
181 | | - let patches_offset = patches.offset(); |
182 | | - |
183 | | - // We need to drop ALPArray here in case converting encoded buffer into |
184 | | - // primitive didn't create a copy. In that case both alp_encoded and array |
185 | | - // will hold a reference to the buffer we want to mutate. |
186 | | - drop(array); |
187 | | - |
188 | | - match_each_alp_float_ptype!(ptype, |T| { |
189 | | - let patches_values = patches_values.as_slice::<T>(); |
190 | | - let mut alp_buffer = encoded.into_buffer_mut(); |
191 | | - match_each_unsigned_integer_ptype!(patches_chunk_offsets.ptype(), |C| { |
192 | | - let patches_chunk_offsets = patches_chunk_offsets.as_slice::<C>(); |
193 | | - // There always is at least one chunk offset. |
194 | | - let base_offset = patches_chunk_offsets[0]; |
195 | | - let offset_within_chunk = patches.offset_within_chunk().unwrap_or(0); |
196 | | - |
197 | | - match_each_unsigned_integer_ptype!(patches_indices.ptype(), |I| { |
198 | | - let patches_indices = patches_indices.as_slice::<I>(); |
199 | | - |
200 | | - for (chunk_idx, chunk_start) in (0..array_len).step_by(1024).enumerate() { |
201 | | - let chunk_end = (chunk_start + 1024).min(array_len); |
202 | | - let chunk_slice = &mut alp_buffer.as_mut_slice()[chunk_start..chunk_end]; |
203 | | - |
204 | | - <T>::decode_slice_inplace(chunk_slice, exponents); |
205 | | - |
206 | | - let decoded_chunk: &mut [T] = unsafe { transmute(chunk_slice) }; |
207 | | - patch_chunk( |
208 | | - decoded_chunk, |
209 | | - patches_indices, |
210 | | - patches_values, |
211 | | - patches_offset, |
212 | | - patches_chunk_offsets, |
213 | | - chunk_idx, |
214 | | - base_offset.as_(), |
215 | | - offset_within_chunk, |
216 | | - ); |
217 | | - } |
218 | | - |
219 | | - let decoded_buffer: BufferMut<T> = unsafe { transmute(alp_buffer) }; |
220 | | - PrimitiveArray::new::<T>(decoded_buffer.freeze(), validity) |
221 | | - }) |
222 | | - }) |
223 | | - }) |
224 | | -} |
225 | | - |
226 | | -/// Decompresses an ALP-encoded array without chunk offsets. |
227 | | -/// |
228 | | -/// This function decodes the complete array at once and then applies any patches after. |
229 | | -fn decompress_unchunked(array: ALPArray) -> PrimitiveArray { |
230 | | - let patches = array.patches().cloned(); |
231 | | - let encoded = array.encoded().to_primitive(); |
232 | | - let validity = encoded.validity().clone(); |
233 | | - let exponents = array.exponents(); |
234 | | - let ptype = array.dtype().as_ptype(); |
235 | | - |
236 | | - // We need to drop ALPArray here in case converting encoded buffer into |
237 | | - // primitive didn't create a copy. In that case both alp_encoded and array |
238 | | - // will hold a reference to the buffer we want to mutate. |
239 | | - drop(array); |
240 | | - |
241 | | - let decoded = match_each_alp_float_ptype!(ptype, |T| { |
242 | | - PrimitiveArray::new::<T>( |
243 | | - <T>::decode_buffer(encoded.into_buffer_mut(), exponents), |
244 | | - validity, |
245 | | - ) |
246 | | - }); |
247 | | - |
248 | | - if let Some(patches) = patches { |
249 | | - decoded.patch(&patches) |
250 | | - } else { |
251 | | - decoded |
252 | | - } |
253 | | -} |
254 | | - |
255 | 125 | #[cfg(test)] |
256 | 126 | mod tests { |
257 | 127 | use core::f64; |
258 | 128 |
|
259 | 129 | use f64::consts::E; |
260 | 130 | use f64::consts::PI; |
| 131 | + use vortex_array::ToCanonical; |
261 | 132 | use vortex_array::assert_arrays_eq; |
262 | 133 | use vortex_array::validity::Validity; |
263 | 134 | use vortex_buffer::Buffer; |
264 | 135 | use vortex_buffer::buffer; |
265 | 136 | use vortex_dtype::NativePType; |
266 | 137 |
|
267 | 138 | use super::*; |
| 139 | + use crate::decompress; |
268 | 140 |
|
269 | 141 | #[test] |
270 | 142 | fn test_compress() { |
|
0 commit comments