Skip to content

Commit 4054cf8

Browse files
committed
feat: alp decompress to vector
Signed-off-by: Alexander Droste <[email protected]>
1 parent 265c3b6 commit 4054cf8

File tree

6 files changed

+342
-132
lines changed

6 files changed

+342
-132
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

encodings/alp/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ vortex-fastlanes = { workspace = true }
2929
vortex-mask = { workspace = true }
3030
vortex-scalar = { workspace = true }
3131
vortex-utils = { workspace = true }
32+
vortex-vector = { workspace = true }
3233

3334
[dev-dependencies]
3435
divan = { workspace = true }

encodings/alp/src/alp/array.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use vortex_array::DeserializeMetadata;
1515
use vortex_array::Precision;
1616
use vortex_array::ProstMetadata;
1717
use vortex_array::SerializeMetadata;
18+
use vortex_array::execution::ExecutionCtx;
1819
use vortex_array::patches::Patches;
1920
use vortex_array::patches::PatchesMetadata;
2021
use vortex_array::serde::ArrayChildren;
@@ -40,11 +41,15 @@ use vortex_error::VortexExpect;
4041
use vortex_error::VortexResult;
4142
use vortex_error::vortex_bail;
4243
use vortex_error::vortex_ensure;
44+
use vortex_vector::Vector;
45+
use vortex_vector::VectorMutOps;
4346

4447
use crate::ALPFloat;
4548
use crate::alp::Exponents;
4649
use crate::alp::alp_encode;
47-
use crate::alp::decompress;
50+
use crate::alp::decompress::decompress;
51+
use crate::alp::decompress::decompress_to_pvector;
52+
use crate::match_each_alp_float_ptype;
4853

4954
vtable!(ALP);
5055

@@ -136,6 +141,12 @@ impl VTable for ALPVTable {
136141
patches,
137142
)
138143
}
144+
145+
fn execute(array: &ALPArray, _ctx: &mut dyn ExecutionCtx) -> VortexResult<Vector> {
146+
match_each_alp_float_ptype!(array.dtype().as_ptype(), |T| {
147+
Ok(decompress_to_pvector::<T>(array.clone()).freeze().into())
148+
})
149+
}
139150
}
140151

141152
#[derive(Clone, Debug)]
@@ -349,6 +360,12 @@ impl ALPArray {
349360
pub fn patches(&self) -> Option<&Patches> {
350361
self.patches.as_ref()
351362
}
363+
364+
/// Consumes the array and returns its parts.
365+
#[inline]
366+
pub fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>, DType) {
367+
(self.encoded, self.exponents, self.patches, self.dtype)
368+
}
352369
}
353370

354371
impl ValidityChild<ALPVTable> for ALPVTable {

encodings/alp/src/alp/compress.rs

Lines changed: 2 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,16 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use std::mem::transmute;
5-
64
use itertools::Itertools;
7-
use num_traits::AsPrimitive;
85
use vortex_array::ArrayRef;
96
use vortex_array::IntoArray;
10-
use vortex_array::ToCanonical;
117
use vortex_array::arrays::PrimitiveArray;
12-
use vortex_array::arrays::patch_chunk;
138
use vortex_array::patches::Patches;
149
use vortex_array::validity::Validity;
1510
use vortex_array::vtable::ValidityHelper;
1611
use vortex_buffer::Buffer;
1712
use vortex_buffer::BufferMut;
1813
use vortex_dtype::PType;
19-
use vortex_dtype::match_each_unsigned_integer_ptype;
2014
use vortex_error::VortexResult;
2115
use vortex_error::vortex_bail;
2216
use vortex_mask::Mask;
@@ -131,143 +125,21 @@ where
131125
Ok((exponents, encoded_array, patches))
132126
}
133127

134-
/// Decompresses an ALP-encoded array.
135-
///
136-
/// # Arguments
137-
///
138-
/// * `array` - The ALP-encoded array to decompress
139-
///
140-
/// # Returns
141-
///
142-
/// A `PrimitiveArray` containing the decompressed floating-point values with all patches applied.
143-
pub fn decompress(array: ALPArray) -> PrimitiveArray {
144-
let patches = array.patches().cloned();
145-
if let Some(patches) = patches
146-
&& let Some(chunk_offsets) = patches.chunk_offsets()
147-
{
148-
return decompress_chunked(array, &patches, &chunk_offsets.as_ref().to_primitive());
149-
}
150-
151-
decompress_unchunked(array)
152-
}
153-
154-
/// Decompresses an ALP-encoded array in 1024-element chunks.
155-
///
156-
/// Decoding and applying patches is done in chunks of 1024 elements for better L1 cache locality.
157-
///
158-
/// # Arguments
159-
///
160-
/// * `array` - The ALP-encoded array to decompress
161-
/// * `patches` - The patches containing exceptional values and their positions
162-
/// * `patches_chunk_offsets` - Offsets into the patches array for each chunk
163-
///
164-
/// # Returns
165-
///
166-
/// A `PrimitiveArray` containing the decompressed values with all patches applied.
167-
#[expect(
168-
clippy::cognitive_complexity,
169-
reason = "complexity is from nested match_each_* macros"
170-
)]
171-
pub fn decompress_chunked(
172-
array: ALPArray,
173-
patches: &Patches,
174-
patches_chunk_offsets: &PrimitiveArray,
175-
) -> PrimitiveArray {
176-
let encoded = array.encoded().to_primitive();
177-
let validity = encoded.validity().clone();
178-
179-
let patches_indices = patches.indices().as_ref().to_primitive();
180-
let patches_values = patches.values().as_ref().to_primitive();
181-
let ptype = array.dtype().as_ptype();
182-
let array_len = array.len();
183-
let exponents = array.exponents();
184-
let patches_offset = patches.offset();
185-
186-
// We need to drop ALPArray here in case converting encoded buffer into
187-
// primitive didn't create a copy. In that case both alp_encoded and array
188-
// will hold a reference to the buffer we want to mutate.
189-
drop(array);
190-
191-
match_each_alp_float_ptype!(ptype, |T| {
192-
let patches_values = patches_values.as_slice::<T>();
193-
let mut alp_buffer = encoded.into_buffer_mut();
194-
match_each_unsigned_integer_ptype!(patches_chunk_offsets.ptype(), |C| {
195-
let patches_chunk_offsets = patches_chunk_offsets.as_slice::<C>();
196-
// There always is at least one chunk offset.
197-
let base_offset = patches_chunk_offsets[0];
198-
let offset_within_chunk = patches.offset_within_chunk().unwrap_or(0);
199-
200-
match_each_unsigned_integer_ptype!(patches_indices.ptype(), |I| {
201-
let patches_indices = patches_indices.as_slice::<I>();
202-
203-
for (chunk_idx, chunk_start) in (0..array_len).step_by(1024).enumerate() {
204-
let chunk_end = (chunk_start + 1024).min(array_len);
205-
let chunk_slice = &mut alp_buffer.as_mut_slice()[chunk_start..chunk_end];
206-
207-
<T>::decode_slice_inplace(chunk_slice, exponents);
208-
209-
let decoded_chunk: &mut [T] = unsafe { transmute(chunk_slice) };
210-
patch_chunk(
211-
decoded_chunk,
212-
patches_indices,
213-
patches_values,
214-
patches_offset,
215-
patches_chunk_offsets,
216-
chunk_idx,
217-
base_offset.as_(),
218-
offset_within_chunk,
219-
);
220-
}
221-
222-
let decoded_buffer: BufferMut<T> = unsafe { transmute(alp_buffer) };
223-
PrimitiveArray::new::<T>(decoded_buffer.freeze(), validity)
224-
})
225-
})
226-
})
227-
}
228-
229-
/// Decompresses an ALP-encoded array without chunk offsets.
230-
///
231-
/// This function decodes the complete array at once and then applies any patches after.
232-
fn decompress_unchunked(array: ALPArray) -> PrimitiveArray {
233-
let patches = array.patches().cloned();
234-
let encoded = array.encoded().to_primitive();
235-
let validity = encoded.validity().clone();
236-
let exponents = array.exponents();
237-
let ptype = array.dtype().as_ptype();
238-
239-
// We need to drop ALPArray here in case converting encoded buffer into
240-
// primitive didn't create a copy. In that case both alp_encoded and array
241-
// will hold a reference to the buffer we want to mutate.
242-
drop(array);
243-
244-
let decoded = match_each_alp_float_ptype!(ptype, |T| {
245-
PrimitiveArray::new::<T>(
246-
<T>::decode_buffer(encoded.into_buffer_mut(), exponents),
247-
validity,
248-
)
249-
});
250-
251-
if let Some(patches) = patches {
252-
decoded.patch(&patches)
253-
} else {
254-
decoded
255-
}
256-
}
257-
258128
#[cfg(test)]
259129
mod tests {
260130
use core::f64;
261131

262132
use f64::consts::E;
263133
use f64::consts::PI;
134+
use vortex_array::ToCanonical;
264135
use vortex_array::assert_arrays_eq;
265136
use vortex_array::validity::Validity;
266137
use vortex_buffer::Buffer;
267138
use vortex_buffer::buffer;
268139
use vortex_dtype::NativePType;
269140

270141
use super::*;
142+
use crate::decompress;
271143

272144
#[test]
273145
fn test_compress() {

0 commit comments

Comments
 (0)