Skip to content

Commit 0b21351

Browse files
committed
feat: alp decompress to vector
Signed-off-by: Alexander Droste <[email protected]>
1 parent fe4c81b commit 0b21351

File tree

6 files changed

+343
-132
lines changed

6 files changed

+343
-132
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

encodings/alp/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ vortex-fastlanes = { workspace = true }
2929
vortex-mask = { workspace = true }
3030
vortex-scalar = { workspace = true }
3131
vortex-utils = { workspace = true }
32+
vortex-vector = { workspace = true }
3233

3334
[dev-dependencies]
3435
divan = { workspace = true }

encodings/alp/src/alp/array.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use vortex_array::DeserializeMetadata;
1515
use vortex_array::Precision;
1616
use vortex_array::ProstMetadata;
1717
use vortex_array::SerializeMetadata;
18+
use vortex_array::execution::ExecutionCtx;
1819
use vortex_array::patches::Patches;
1920
use vortex_array::patches::PatchesMetadata;
2021
use vortex_array::serde::ArrayChildren;
@@ -40,11 +41,15 @@ use vortex_error::VortexExpect;
4041
use vortex_error::VortexResult;
4142
use vortex_error::vortex_bail;
4243
use vortex_error::vortex_ensure;
44+
use vortex_vector::Vector;
45+
use vortex_vector::VectorMutOps;
4346

4447
use crate::ALPFloat;
4548
use crate::alp::Exponents;
4649
use crate::alp::alp_encode;
47-
use crate::alp::decompress;
50+
use crate::alp::decompress::decompress;
51+
use crate::alp::decompress::decompress_to_pvector;
52+
use crate::match_each_alp_float_ptype;
4853

4954
vtable!(ALP);
5055

@@ -136,6 +141,12 @@ impl VTable for ALPVTable {
136141
patches,
137142
)
138143
}
144+
145+
fn execute(array: &ALPArray, _ctx: &mut dyn ExecutionCtx) -> VortexResult<Vector> {
146+
match_each_alp_float_ptype!(array.dtype().as_ptype(), |T| {
147+
Ok(decompress_to_pvector::<T>(array.clone()).freeze().into())
148+
})
149+
}
139150
}
140151

141152
#[derive(Clone, Debug)]
@@ -349,6 +360,12 @@ impl ALPArray {
349360
pub fn patches(&self) -> Option<&Patches> {
350361
self.patches.as_ref()
351362
}
363+
364+
/// Consumes the array and returns its parts.
365+
#[inline]
366+
pub fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>, DType) {
367+
(self.encoded, self.exponents, self.patches, self.dtype)
368+
}
352369
}
353370

354371
impl ValidityChild<ALPVTable> for ALPVTable {

encodings/alp/src/alp/compress.rs

Lines changed: 2 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,16 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use std::mem::transmute;
5-
64
use itertools::Itertools;
7-
use num_traits::AsPrimitive;
85
use vortex_array::ArrayRef;
96
use vortex_array::IntoArray;
10-
use vortex_array::ToCanonical;
117
use vortex_array::arrays::PrimitiveArray;
12-
use vortex_array::arrays::patch_chunk;
138
use vortex_array::patches::Patches;
149
use vortex_array::validity::Validity;
1510
use vortex_array::vtable::ValidityHelper;
1611
use vortex_buffer::Buffer;
1712
use vortex_buffer::BufferMut;
1813
use vortex_dtype::PType;
19-
use vortex_dtype::match_each_unsigned_integer_ptype;
2014
use vortex_error::VortexResult;
2115
use vortex_error::vortex_bail;
2216
use vortex_mask::Mask;
@@ -128,143 +122,21 @@ where
128122
Ok((exponents, encoded_array, patches))
129123
}
130124

131-
/// Decompresses an ALP-encoded array.
132-
///
133-
/// # Arguments
134-
///
135-
/// * `array` - The ALP-encoded array to decompress
136-
///
137-
/// # Returns
138-
///
139-
/// A `PrimitiveArray` containing the decompressed floating-point values with all patches applied.
140-
pub fn decompress(array: ALPArray) -> PrimitiveArray {
141-
let patches = array.patches().cloned();
142-
if let Some(patches) = patches
143-
&& let Some(chunk_offsets) = patches.chunk_offsets()
144-
{
145-
return decompress_chunked(array, &patches, &chunk_offsets.as_ref().to_primitive());
146-
}
147-
148-
decompress_unchunked(array)
149-
}
150-
151-
/// Decompresses an ALP-encoded array in 1024-element chunks.
152-
///
153-
/// Decoding and applying patches is done in chunks of 1024 elements for better L1 cache locality.
154-
///
155-
/// # Arguments
156-
///
157-
/// * `array` - The ALP-encoded array to decompress
158-
/// * `patches` - The patches containing exceptional values and their positions
159-
/// * `patches_chunk_offsets` - Offsets into the patches array for each chunk
160-
///
161-
/// # Returns
162-
///
163-
/// A `PrimitiveArray` containing the decompressed values with all patches applied.
164-
#[expect(
165-
clippy::cognitive_complexity,
166-
reason = "complexity is from nested match_each_* macros"
167-
)]
168-
pub fn decompress_chunked(
169-
array: ALPArray,
170-
patches: &Patches,
171-
patches_chunk_offsets: &PrimitiveArray,
172-
) -> PrimitiveArray {
173-
let encoded = array.encoded().to_primitive();
174-
let validity = encoded.validity().clone();
175-
176-
let patches_indices = patches.indices().as_ref().to_primitive();
177-
let patches_values = patches.values().as_ref().to_primitive();
178-
let ptype = array.dtype().as_ptype();
179-
let array_len = array.len();
180-
let exponents = array.exponents();
181-
let patches_offset = patches.offset();
182-
183-
// We need to drop ALPArray here in case converting encoded buffer into
184-
// primitive didn't create a copy. In that case both alp_encoded and array
185-
// will hold a reference to the buffer we want to mutate.
186-
drop(array);
187-
188-
match_each_alp_float_ptype!(ptype, |T| {
189-
let patches_values = patches_values.as_slice::<T>();
190-
let mut alp_buffer = encoded.into_buffer_mut();
191-
match_each_unsigned_integer_ptype!(patches_chunk_offsets.ptype(), |C| {
192-
let patches_chunk_offsets = patches_chunk_offsets.as_slice::<C>();
193-
// There always is at least one chunk offset.
194-
let base_offset = patches_chunk_offsets[0];
195-
let offset_within_chunk = patches.offset_within_chunk().unwrap_or(0);
196-
197-
match_each_unsigned_integer_ptype!(patches_indices.ptype(), |I| {
198-
let patches_indices = patches_indices.as_slice::<I>();
199-
200-
for (chunk_idx, chunk_start) in (0..array_len).step_by(1024).enumerate() {
201-
let chunk_end = (chunk_start + 1024).min(array_len);
202-
let chunk_slice = &mut alp_buffer.as_mut_slice()[chunk_start..chunk_end];
203-
204-
<T>::decode_slice_inplace(chunk_slice, exponents);
205-
206-
let decoded_chunk: &mut [T] = unsafe { transmute(chunk_slice) };
207-
patch_chunk(
208-
decoded_chunk,
209-
patches_indices,
210-
patches_values,
211-
patches_offset,
212-
patches_chunk_offsets,
213-
chunk_idx,
214-
base_offset.as_(),
215-
offset_within_chunk,
216-
);
217-
}
218-
219-
let decoded_buffer: BufferMut<T> = unsafe { transmute(alp_buffer) };
220-
PrimitiveArray::new::<T>(decoded_buffer.freeze(), validity)
221-
})
222-
})
223-
})
224-
}
225-
226-
/// Decompresses an ALP-encoded array without chunk offsets.
227-
///
228-
/// This function decodes the complete array at once and then applies any patches after.
229-
fn decompress_unchunked(array: ALPArray) -> PrimitiveArray {
230-
let patches = array.patches().cloned();
231-
let encoded = array.encoded().to_primitive();
232-
let validity = encoded.validity().clone();
233-
let exponents = array.exponents();
234-
let ptype = array.dtype().as_ptype();
235-
236-
// We need to drop ALPArray here in case converting encoded buffer into
237-
// primitive didn't create a copy. In that case both alp_encoded and array
238-
// will hold a reference to the buffer we want to mutate.
239-
drop(array);
240-
241-
let decoded = match_each_alp_float_ptype!(ptype, |T| {
242-
PrimitiveArray::new::<T>(
243-
<T>::decode_buffer(encoded.into_buffer_mut(), exponents),
244-
validity,
245-
)
246-
});
247-
248-
if let Some(patches) = patches {
249-
decoded.patch(&patches)
250-
} else {
251-
decoded
252-
}
253-
}
254-
255125
#[cfg(test)]
256126
mod tests {
257127
use core::f64;
258128

259129
use f64::consts::E;
260130
use f64::consts::PI;
131+
use vortex_array::ToCanonical;
261132
use vortex_array::assert_arrays_eq;
262133
use vortex_array::validity::Validity;
263134
use vortex_buffer::Buffer;
264135
use vortex_buffer::buffer;
265136
use vortex_dtype::NativePType;
266137

267138
use super::*;
139+
use crate::decompress;
268140

269141
#[test]
270142
fn test_compress() {

0 commit comments

Comments
 (0)