Skip to content

Commit 7017027

Browse files
authored
Chore: bitpacking move stuff around again (#5224)
This is another cosmetic change, just to make things easier as I get ready to migrate bitpacking to decompress into buffers instead of the `PrimitiveBuilder`. It didn't make sense that the `unpack` decompression functions lived in `bitpack_compress` so I added a `bitpack_decompress` module. Signed-off-by: Connor Tsui <[email protected]>
1 parent 701bd6c commit 7017027

File tree

9 files changed

+421
-395
lines changed

9 files changed

+421
-395
lines changed

encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs

Lines changed: 34 additions & 386 deletions
Large diffs are not rendered by default.
Lines changed: 377 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,377 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use fastlanes::BitPacking;
5+
use itertools::Itertools;
6+
use vortex_array::ToCanonical;
7+
use vortex_array::arrays::PrimitiveArray;
8+
use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder, UninitRange};
9+
use vortex_array::patches::Patches;
10+
use vortex_dtype::{
11+
IntegerPType, NativePType, PhysicalPType, match_each_integer_ptype,
12+
match_each_unsigned_integer_ptype,
13+
};
14+
use vortex_error::VortexExpect;
15+
use vortex_mask::Mask;
16+
use vortex_scalar::Scalar;
17+
18+
use crate::BitPackedArray;
19+
use crate::unpack_iter::{BitPacked, UnpackStrategy};
20+
21+
/// BitPacking strategy - uses plain bitpacking without reference value
22+
pub struct BitPackingStrategy;
23+
24+
impl<T: PhysicalPType<Physical: BitPacking>> UnpackStrategy<T> for BitPackingStrategy {
25+
#[inline(always)]
26+
unsafe fn unpack_chunk(
27+
&self,
28+
bit_width: usize,
29+
chunk: &[T::Physical],
30+
dst: &mut [T::Physical],
31+
) {
32+
// SAFETY: Caller must ensure [`BitPacking::unchecked_unpack`] safety requirements hold.
33+
unsafe {
34+
BitPacking::unchecked_unpack(bit_width, chunk, dst);
35+
}
36+
}
37+
}
38+
39+
pub fn unpack(array: &BitPackedArray) -> PrimitiveArray {
40+
match_each_integer_ptype!(array.ptype(), |P| { unpack_primitive::<P>(array) })
41+
}
42+
43+
pub fn unpack_primitive<T: BitPacked>(array: &BitPackedArray) -> PrimitiveArray {
44+
let mut builder = PrimitiveBuilder::with_capacity(array.dtype().nullability(), array.len());
45+
unpack_into::<T>(array, &mut builder);
46+
assert_eq!(builder.len(), array.len());
47+
builder.finish_into_primitive()
48+
}
49+
50+
pub(crate) fn unpack_into<T: BitPacked>(
51+
array: &BitPackedArray,
52+
// TODO(ngates): do we want to use fastlanes alignment for this buffer?
53+
builder: &mut PrimitiveBuilder<T>,
54+
) {
55+
// If the array is empty, then we don't need to add anything to the builder.
56+
if array.is_empty() {
57+
return;
58+
}
59+
60+
let mut uninit_range = builder.uninit_range(array.len());
61+
62+
// SAFETY: We later initialize the the uninitialized range of values with `copy_from_slice`.
63+
unsafe {
64+
// Append a dense null Mask.
65+
uninit_range.append_mask(array.validity_mask());
66+
}
67+
68+
let mut bit_packed_iter = array.unpacked_chunks();
69+
bit_packed_iter.decode_into(&mut uninit_range);
70+
71+
if let Some(patches) = array.patches() {
72+
apply_patches(&mut uninit_range, patches);
73+
};
74+
75+
// SAFETY: We have set a correct validity mask via `append_mask` with `array.len()` values and
76+
// initialized the same number of values needed via calls to `copy_from_slice`.
77+
unsafe {
78+
uninit_range.finish();
79+
}
80+
}
81+
82+
pub fn apply_patches<T: NativePType>(dst: &mut UninitRange<T>, patches: &Patches) {
83+
apply_patches_fn(dst, patches, |x| x)
84+
}
85+
86+
pub fn apply_patches_fn<T: NativePType, F: Fn(T) -> T>(
87+
dst: &mut UninitRange<T>,
88+
patches: &Patches,
89+
f: F,
90+
) {
91+
assert_eq!(patches.array_len(), dst.len());
92+
93+
let indices = patches.indices().to_primitive();
94+
let values = patches.values().to_primitive();
95+
let validity = values.validity_mask();
96+
let values = values.as_slice::<T>();
97+
98+
match_each_unsigned_integer_ptype!(indices.ptype(), |P| {
99+
insert_values_and_validity_at_indices(
100+
dst,
101+
indices.as_slice::<P>(),
102+
values,
103+
validity,
104+
patches.offset(),
105+
f,
106+
)
107+
});
108+
}
109+
110+
fn insert_values_and_validity_at_indices<T: NativePType, IndexT: IntegerPType, F: Fn(T) -> T>(
111+
dst: &mut UninitRange<T>,
112+
indices: &[IndexT],
113+
values: &[T],
114+
values_validity: Mask,
115+
indices_offset: usize,
116+
f: F,
117+
) {
118+
match values_validity {
119+
Mask::AllTrue(_) => {
120+
for (index, &value) in indices.iter().zip_eq(values) {
121+
dst.set_value(index.as_() - indices_offset, f(value));
122+
}
123+
}
124+
Mask::AllFalse(_) => {
125+
for decompressed_index in indices {
126+
dst.set_validity_bit(decompressed_index.as_() - indices_offset, false);
127+
}
128+
}
129+
Mask::Values(vb) => {
130+
for (index, &value) in indices.iter().zip_eq(values) {
131+
let out_index = index.as_() - indices_offset;
132+
dst.set_value(out_index, f(value));
133+
dst.set_validity_bit(out_index, vb.value(out_index));
134+
}
135+
}
136+
}
137+
}
138+
139+
pub fn unpack_single(array: &BitPackedArray, index: usize) -> Scalar {
140+
let bit_width = array.bit_width() as usize;
141+
let ptype = array.ptype();
142+
// let packed = array.packed().into_primitive()?;
143+
let index_in_encoded = index + array.offset() as usize;
144+
let scalar: Scalar = match_each_unsigned_integer_ptype!(ptype.to_unsigned(), |P| {
145+
unsafe {
146+
unpack_single_primitive::<P>(array.packed_slice::<P>(), bit_width, index_in_encoded)
147+
.into()
148+
}
149+
});
150+
// Cast to fix signedness and nullability
151+
scalar.cast(array.dtype()).vortex_expect("cast failure")
152+
}
153+
154+
/// # Safety
155+
///
156+
/// The caller must ensure the following invariants hold:
157+
/// * `packed.len() == (length + 1023) / 1024 * 128 * bit_width`
158+
/// * `index_to_decode < length`
159+
///
160+
/// Where `length` is the length of the array/slice backed by `packed`
161+
/// (but is not provided to this function).
162+
pub unsafe fn unpack_single_primitive<T: NativePType + BitPacking>(
163+
packed: &[T],
164+
bit_width: usize,
165+
index_to_decode: usize,
166+
) -> T {
167+
let chunk_index = index_to_decode / 1024;
168+
let index_in_chunk = index_to_decode % 1024;
169+
let elems_per_chunk: usize = 128 * bit_width / size_of::<T>();
170+
171+
let packed_chunk = &packed[chunk_index * elems_per_chunk..][0..elems_per_chunk];
172+
unsafe { BitPacking::unchecked_unpack_single(bit_width, packed_chunk, index_in_chunk) }
173+
}
174+
175+
pub fn count_exceptions(bit_width: u8, bit_width_freq: &[usize]) -> usize {
176+
if bit_width_freq.len() <= bit_width as usize {
177+
return 0;
178+
}
179+
bit_width_freq[bit_width as usize + 1..].iter().sum()
180+
}
181+
182+
#[cfg(test)]
183+
mod tests {
184+
use vortex_array::validity::Validity;
185+
use vortex_array::{IntoArray, assert_arrays_eq};
186+
use vortex_buffer::{Buffer, BufferMut, buffer};
187+
use vortex_dtype::Nullability;
188+
189+
use super::*;
190+
use crate::BitPackedVTable;
191+
use crate::bitpack_compress::bitpack_encode;
192+
193+
fn compression_roundtrip(n: usize) {
194+
let values = PrimitiveArray::from_iter((0..n).map(|i| (i % 2047) as u16));
195+
let compressed = BitPackedArray::encode(values.as_ref(), 11).unwrap();
196+
let decompressed = compressed.to_primitive();
197+
assert_arrays_eq!(decompressed, values);
198+
199+
values
200+
.as_slice::<u16>()
201+
.iter()
202+
.enumerate()
203+
.for_each(|(i, v)| {
204+
let scalar: u16 = unpack_single(&compressed, i).try_into().unwrap();
205+
assert_eq!(scalar, *v);
206+
});
207+
}
208+
209+
#[test]
210+
fn test_compression_roundtrip_fast() {
211+
compression_roundtrip(125);
212+
}
213+
214+
#[test]
215+
#[cfg_attr(miri, ignore)] // This test is too slow on miri
216+
fn test_compression_roundtrip() {
217+
compression_roundtrip(1024);
218+
compression_roundtrip(10_000);
219+
compression_roundtrip(10_240);
220+
}
221+
222+
#[test]
223+
fn test_all_zeros() {
224+
let zeros = buffer![0u16, 0, 0, 0].into_array().to_primitive();
225+
let bitpacked = bitpack_encode(&zeros, 0, None).unwrap();
226+
let actual = unpack(&bitpacked);
227+
assert_arrays_eq!(actual, PrimitiveArray::from_iter([0u16, 0, 0, 0]));
228+
}
229+
230+
#[test]
231+
fn test_simple_patches() {
232+
let zeros = buffer![0u16, 1, 0, 1].into_array().to_primitive();
233+
let bitpacked = bitpack_encode(&zeros, 0, None).unwrap();
234+
let actual = unpack(&bitpacked);
235+
assert_arrays_eq!(actual, PrimitiveArray::from_iter([0u16, 1, 0, 1]));
236+
}
237+
238+
#[test]
239+
fn test_one_full_chunk() {
240+
let zeros = BufferMut::from_iter(0u16..1024).into_array().to_primitive();
241+
let bitpacked = bitpack_encode(&zeros, 10, None).unwrap();
242+
let actual = unpack(&bitpacked);
243+
assert_arrays_eq!(actual, PrimitiveArray::from_iter(0u16..1024));
244+
}
245+
246+
#[test]
247+
fn test_three_full_chunks_with_patches() {
248+
let zeros = BufferMut::from_iter((5u16..1029).chain(5u16..1029).chain(5u16..1029))
249+
.into_array()
250+
.to_primitive();
251+
let bitpacked = bitpack_encode(&zeros, 10, None).unwrap();
252+
assert!(bitpacked.patches().is_some());
253+
let actual = unpack(&bitpacked);
254+
assert_arrays_eq!(
255+
actual,
256+
PrimitiveArray::from_iter((5u16..1029).chain(5u16..1029).chain(5u16..1029))
257+
);
258+
}
259+
260+
#[test]
261+
fn test_one_full_chunk_and_one_short_chunk_no_patch() {
262+
let zeros = BufferMut::from_iter(0u16..1025).into_array().to_primitive();
263+
let bitpacked = bitpack_encode(&zeros, 11, None).unwrap();
264+
assert!(bitpacked.patches().is_none());
265+
let actual = unpack(&bitpacked);
266+
assert_arrays_eq!(actual, PrimitiveArray::from_iter(0u16..1025));
267+
}
268+
269+
#[test]
270+
fn test_one_full_chunk_and_one_short_chunk_with_patches() {
271+
let zeros = BufferMut::from_iter(512u16..1537)
272+
.into_array()
273+
.to_primitive();
274+
let bitpacked = bitpack_encode(&zeros, 10, None).unwrap();
275+
assert_eq!(bitpacked.len(), 1025);
276+
assert!(bitpacked.patches().is_some());
277+
let actual = unpack(&bitpacked);
278+
assert_arrays_eq!(actual, PrimitiveArray::from_iter(512u16..1537));
279+
}
280+
281+
#[test]
282+
fn test_offset_and_short_chunk_and_patches() {
283+
let zeros = BufferMut::from_iter(512u16..1537)
284+
.into_array()
285+
.to_primitive();
286+
let bitpacked = bitpack_encode(&zeros, 10, None).unwrap();
287+
assert_eq!(bitpacked.len(), 1025);
288+
assert!(bitpacked.patches().is_some());
289+
let bitpacked = bitpacked.slice(1023..1025);
290+
let actual = unpack(bitpacked.as_::<BitPackedVTable>());
291+
assert_arrays_eq!(actual, PrimitiveArray::from_iter([1535u16, 1536]));
292+
}
293+
294+
#[test]
295+
fn test_offset_and_short_chunk_with_chunks_between_and_patches() {
296+
let zeros = BufferMut::from_iter(512u16..2741)
297+
.into_array()
298+
.to_primitive();
299+
let bitpacked = bitpack_encode(&zeros, 10, None).unwrap();
300+
assert_eq!(bitpacked.len(), 2229);
301+
assert!(bitpacked.patches().is_some());
302+
let bitpacked = bitpacked.slice(1023..2049);
303+
let actual = unpack(bitpacked.as_::<BitPackedVTable>());
304+
assert_arrays_eq!(
305+
actual,
306+
PrimitiveArray::from_iter((1023u16..2049).map(|x| x + 512))
307+
);
308+
}
309+
310+
#[test]
311+
fn test_unpack_into_empty_array() {
312+
let empty: PrimitiveArray = PrimitiveArray::from_iter(Vec::<u32>::new());
313+
let bitpacked = bitpack_encode(&empty, 0, None).unwrap();
314+
315+
let mut builder = PrimitiveBuilder::<u32>::new(Nullability::NonNullable);
316+
unpack_into(&bitpacked, &mut builder);
317+
318+
let result = builder.finish_into_primitive();
319+
assert_eq!(
320+
result.len(),
321+
0,
322+
"Empty array should result in empty builder"
323+
);
324+
}
325+
326+
/// This test ensures that the mask is properly appended to the range, not the builder.
327+
#[test]
328+
fn test_unpack_into_with_validity_mask() {
329+
// Create an array with some null values.
330+
let values = Buffer::from_iter([1u32, 0, 3, 4, 0]);
331+
let validity = Validity::from_iter([true, false, true, true, false]);
332+
let array = PrimitiveArray::new(values, validity);
333+
334+
// Bitpack the array.
335+
let bitpacked = bitpack_encode(&array, 3, None).unwrap();
336+
337+
// Unpack into a new builder.
338+
let mut builder = PrimitiveBuilder::<u32>::with_capacity(Nullability::Nullable, 5);
339+
unpack_into(&bitpacked, &mut builder);
340+
341+
let result = builder.finish_into_primitive();
342+
343+
// Verify the validity mask was correctly applied.
344+
assert_eq!(result.len(), 5);
345+
assert!(!result.scalar_at(0).is_null());
346+
assert!(result.scalar_at(1).is_null());
347+
assert!(!result.scalar_at(2).is_null());
348+
assert!(!result.scalar_at(3).is_null());
349+
assert!(result.scalar_at(4).is_null());
350+
}
351+
352+
/// Test that `unpack_into` correctly handles arrays with patches.
353+
#[test]
354+
fn test_unpack_into_with_patches() {
355+
// Create an array where most values fit in 4 bits but some need patches.
356+
let values: Vec<u32> = (0..100)
357+
.map(|i| if i % 20 == 0 { 1000 + i } else { i % 16 })
358+
.collect();
359+
let array = PrimitiveArray::from_iter(values.clone());
360+
361+
// Bitpack with a bit width that will require patches.
362+
let bitpacked = bitpack_encode(&array, 4, None).unwrap();
363+
assert!(
364+
bitpacked.patches().is_some(),
365+
"Should have patches for values > 15"
366+
);
367+
368+
// Unpack into a new builder.
369+
let mut builder = PrimitiveBuilder::<u32>::with_capacity(Nullability::NonNullable, 100);
370+
unpack_into(&bitpacked, &mut builder);
371+
372+
let result = builder.finish_into_primitive();
373+
374+
// Verify all values were correctly unpacked including patches.
375+
assert_arrays_eq!(result, PrimitiveArray::from_iter(values));
376+
}
377+
}

encodings/fastlanes/src/bitpacking/array/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use vortex_dtype::{DType, NativePType, PType};
1212
use vortex_error::{VortexResult, vortex_bail, vortex_ensure};
1313

1414
pub mod bitpack_compress;
15+
pub mod bitpack_decompress;
1516
pub mod unpack_iter;
1617

1718
use crate::bitpack_compress::bitpack_encode;

encodings/fastlanes/src/bitpacking/array/unpack_iter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use vortex_buffer::ByteBuffer;
1414
use vortex_dtype::PhysicalPType;
1515

1616
use crate::BitPackedArray;
17-
use crate::bitpack_compress::BitPackingStrategy;
17+
use crate::bitpacking::bitpack_decompress::BitPackingStrategy;
1818

1919
const CHUNK_SIZE: usize = 1024;
2020

0 commit comments

Comments
 (0)