Skip to content

Commit 2adb2dc

Browse files
authored
Feature: implement bitpacking batch execute (#5446)
Adds unpacking directly into buffers instead of a builder. --------- Signed-off-by: Connor Tsui <[email protected]>
1 parent 878f9b1 commit 2adb2dc

File tree

3 files changed

+376
-17
lines changed

3 files changed

+376
-17
lines changed

encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs

Lines changed: 368 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,117 @@ use vortex_array::ToCanonical;
77
use vortex_array::arrays::PrimitiveArray;
88
use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder, UninitRange};
99
use vortex_array::patches::Patches;
10+
use vortex_array::validity::Validity;
11+
use vortex_array::vtable::ValidityHelper;
12+
use vortex_buffer::BufferMut;
1013
use vortex_dtype::{
11-
IntegerPType, NativePType, match_each_integer_ptype, match_each_unsigned_integer_ptype,
14+
IntegerPType, NativePType, UnsignedPType, match_each_integer_ptype,
15+
match_each_unsigned_integer_ptype,
1216
};
1317
use vortex_error::{VortexExpect, vortex_panic};
14-
use vortex_mask::Mask;
18+
use vortex_mask::{Mask, MaskMut};
1519
use vortex_scalar::Scalar;
20+
use vortex_vector::primitive::{PVectorMut, PrimitiveVectorMut};
1621

1722
use crate::BitPackedArray;
1823
use crate::unpack_iter::BitPacked;
1924

25+
/// Unpacks a bit-packed array into a primitive vector.
26+
pub fn unpack_to_primitive_vector(array: &BitPackedArray) -> PrimitiveVectorMut {
27+
match_each_integer_ptype!(array.ptype(), |P| { unpack_to_pvector::<P>(array).into() })
28+
}
29+
30+
/// Unpacks a bit-packed array into a generic [`PVectorMut`].
31+
pub fn unpack_to_pvector<P: BitPacked>(array: &BitPackedArray) -> PVectorMut<P> {
32+
if array.is_empty() {
33+
return PVectorMut::with_capacity(0);
34+
}
35+
36+
let len = array.len();
37+
let mut elements = BufferMut::<P>::with_capacity(len);
38+
let uninit_slice = &mut elements.spare_capacity_mut()[..len];
39+
40+
// Decode into an uninitialized slice.
41+
let mut bit_packed_iter = array.unpacked_chunks();
42+
bit_packed_iter.decode_into(uninit_slice);
43+
// SAFETY: `decode_into` initialized exactly `len` elements into the spare (existing) capacity.
44+
unsafe { elements.set_len(len) };
45+
46+
let mut validity = array.validity_mask().into_mut();
47+
debug_assert_eq!(validity.len(), len);
48+
49+
// TODO(connor): Implement a fused version of patching instead.
50+
if let Some(patches) = array.patches() {
51+
let patch_indices = patches.indices().to_primitive();
52+
let patch_values = patches.values().to_primitive();
53+
let patches_validity = patch_values.validity();
54+
let patch_offset = patches.offset();
55+
56+
let patch_values_slice = patch_values.as_slice::<P>();
57+
match_each_unsigned_integer_ptype!(patch_indices.ptype(), |I| {
58+
let patch_indices_slice = patch_indices.as_slice::<I>();
59+
60+
// SAFETY:
61+
// - `Patches` invariant guarantees indices are sorted and within array bounds.
62+
// - `patch_indices` and `patch_values` have equal length (from `Patches` invariant).
63+
// - `elements` and `validity` have equal length (both are `len` from the array).
64+
// - All patch indices are valid after offset adjustment (guaranteed by `Patches`).
65+
unsafe {
66+
apply_patches_inner(
67+
&mut elements,
68+
&mut validity,
69+
patch_indices_slice,
70+
patch_offset,
71+
patch_values_slice,
72+
patches_validity,
73+
)
74+
};
75+
});
76+
}
77+
78+
// SAFETY: `elements` and `validity` have the same length.
79+
unsafe { PVectorMut::new_unchecked(elements, validity) }
80+
}
81+
82+
/// # Safety
83+
///
84+
/// - All indices in `patch_indices` after subtracting `patch_offset` must be valid indices
85+
/// into both `buffer` and `validity`.
86+
/// - `patch_indices` must be sorted in ascending order.
87+
/// - `patch_indices` and `patch_values` must have the same length.
88+
/// - `buffer` and `validity` must have the same length.
89+
unsafe fn apply_patches_inner<P, I>(
90+
buffer: &mut [P],
91+
validity: &mut MaskMut,
92+
patch_indices: &[I],
93+
patch_offset: usize,
94+
patch_values: &[P],
95+
patches_validity: &Validity,
96+
) where
97+
P: NativePType,
98+
I: UnsignedPType,
99+
{
100+
debug_assert!(!patch_indices.is_empty());
101+
debug_assert_eq!(patch_indices.len(), patch_values.len());
102+
debug_assert_eq!(buffer.len(), validity.len());
103+
debug_assert!(patch_indices.is_sorted());
104+
debug_assert!(patch_indices.last().vortex_expect("can't be empty").as_() <= validity.len());
105+
106+
match patches_validity {
107+
Validity::NonNullable | Validity::AllValid => {
108+
for (&i, &value) in patch_indices.iter().zip_eq(patch_values) {
109+
let index = i.as_() - patch_offset;
110+
111+
// SAFETY: `index` is valid because caller guarantees all patch indices are within
112+
// bounds after offset adjustment.
113+
unsafe { validity.set_unchecked(index) };
114+
buffer[index] = value;
115+
}
116+
}
117+
_ => vortex_panic!("BitPackedArray somehow had nullable patch values"),
118+
}
119+
}
120+
20121
pub fn unpack_array(array: &BitPackedArray) -> PrimitiveArray {
21122
match_each_integer_ptype!(array.ptype(), |P| { unpack_primitive_array::<P>(array) })
22123
}
@@ -161,6 +262,7 @@ mod tests {
161262
use vortex_array::{IntoArray, assert_arrays_eq};
162263
use vortex_buffer::{Buffer, BufferMut, buffer};
163264
use vortex_dtype::Nullability;
265+
use vortex_vector::{VectorMutOps, VectorOps};
164266

165267
use super::*;
166268
use crate::BitPackedVTable;
@@ -350,4 +452,268 @@ mod tests {
350452
// Verify all values were correctly unpacked including patches.
351453
assert_arrays_eq!(result, PrimitiveArray::from_iter(values));
352454
}
455+
456+
/// Test basic unpacking to primitive vector for multiple types and sizes.
457+
#[test]
458+
fn test_unpack_to_primitive_vector_basic() {
459+
// Test with u8 values.
460+
let u8_values = PrimitiveArray::from_iter([5u8, 10, 15, 20, 25]);
461+
let u8_bitpacked = bitpack_encode(&u8_values, 5, None).unwrap();
462+
let u8_vector = unpack_to_primitive_vector(&u8_bitpacked);
463+
// Compare with existing unpack method.
464+
let expected = unpack_array(&u8_bitpacked);
465+
assert_eq!(u8_vector.len(), expected.len());
466+
// Verify the vector matches expected values by checking specific elements.
467+
let _u8_frozen = u8_vector.freeze();
468+
// We know both produce the same primitive values, just in different forms.
469+
470+
// Test with u32 values - empty array.
471+
let u32_empty: PrimitiveArray = PrimitiveArray::from_iter(Vec::<u32>::new());
472+
let u32_empty_bp = bitpack_encode(&u32_empty, 0, None).unwrap();
473+
let u32_empty_vec = unpack_to_primitive_vector(&u32_empty_bp);
474+
assert_eq!(u32_empty_vec.len(), 0);
475+
476+
// Test with u16 values - exactly one chunk (1024 elements).
477+
let u16_values = PrimitiveArray::from_iter(0u16..1024);
478+
let u16_bitpacked = bitpack_encode(&u16_values, 10, None).unwrap();
479+
let u16_vector = unpack_to_primitive_vector(&u16_bitpacked);
480+
assert_eq!(u16_vector.len(), 1024);
481+
482+
// Test with i32 values - partial chunk (1025 elements).
483+
let i32_values = PrimitiveArray::from_iter((0i32..1025).map(|x| x % 512));
484+
let i32_bitpacked = bitpack_encode(&i32_values, 9, None).unwrap();
485+
let i32_vector = unpack_to_primitive_vector(&i32_bitpacked);
486+
assert_eq!(i32_vector.len(), 1025);
487+
488+
// Verify consistency: unpack_to_primitive_vector and unpack_array should produce same values.
489+
let i32_array = unpack_array(&i32_bitpacked);
490+
assert_eq!(i32_vector.len(), i32_array.len());
491+
}
492+
493+
/// Test unpacking with patches at various positions.
494+
#[test]
495+
fn test_unpack_to_primitive_vector_with_patches() {
496+
// Create an array where patches are needed at start, middle, and end.
497+
let values: Vec<u32> = vec![
498+
2000, // Patch at start
499+
5, 10, 15, 20, 25, 30, 3000, // Patch in middle
500+
35, 40, 45, 50, 55, 4000, // Patch at end
501+
];
502+
let array = PrimitiveArray::from_iter(values.clone());
503+
504+
// Bitpack with a small bit width to force patches.
505+
let bitpacked = bitpack_encode(&array, 6, None).unwrap();
506+
assert!(bitpacked.patches().is_some(), "Should have patches");
507+
508+
// Unpack to vector.
509+
let vector = unpack_to_primitive_vector(&bitpacked);
510+
511+
// Verify length and that patches were applied.
512+
assert_eq!(vector.len(), values.len());
513+
// The vector should have the patched values, which unpack_array also produces.
514+
let expected = unpack_array(&bitpacked);
515+
assert_eq!(vector.len(), expected.len());
516+
517+
// Test with a larger array with multiple patches across chunks.
518+
let large_values: Vec<u16> = (0..3072)
519+
.map(|i| {
520+
if i % 500 == 0 {
521+
2000 + i as u16 // Values that need patches
522+
} else {
523+
(i % 256) as u16 // Values that fit in 8 bits
524+
}
525+
})
526+
.collect();
527+
let large_array = PrimitiveArray::from_iter(large_values);
528+
let large_bitpacked = bitpack_encode(&large_array, 8, None).unwrap();
529+
assert!(large_bitpacked.patches().is_some());
530+
531+
let large_vector = unpack_to_primitive_vector(&large_bitpacked);
532+
assert_eq!(large_vector.len(), 3072);
533+
}
534+
535+
/// Test unpacking with nullability and validity masks.
536+
#[test]
537+
fn test_unpack_to_primitive_vector_nullability() {
538+
// Test with null values at various positions.
539+
let values = Buffer::from_iter([100u32, 0, 200, 0, 300, 0, 400]);
540+
let validity = Validity::from_iter([true, false, true, false, true, false, true]);
541+
let array = PrimitiveArray::new(values, validity);
542+
543+
let bitpacked = bitpack_encode(&array, 9, None).unwrap();
544+
let vector = unpack_to_primitive_vector(&bitpacked);
545+
546+
// Verify length.
547+
assert_eq!(vector.len(), 7);
548+
// Validity should be preserved when unpacking.
549+
550+
// Test combining patches with nullability.
551+
let patch_values = Buffer::from_iter([10u16, 0, 2000, 0, 30, 3000, 0]);
552+
let patch_validity = Validity::from_iter([true, false, true, false, true, true, false]);
553+
let patch_array = PrimitiveArray::new(patch_values, patch_validity);
554+
555+
let patch_bitpacked = bitpack_encode(&patch_array, 5, None).unwrap();
556+
assert!(patch_bitpacked.patches().is_some());
557+
558+
let patch_vector = unpack_to_primitive_vector(&patch_bitpacked);
559+
assert_eq!(patch_vector.len(), 7);
560+
561+
// Test all nulls edge case.
562+
let all_nulls = PrimitiveArray::new(
563+
Buffer::from_iter([0u32, 0, 0, 0]),
564+
Validity::from_iter([false, false, false, false]),
565+
);
566+
let all_nulls_bp = bitpack_encode(&all_nulls, 0, None).unwrap();
567+
let all_nulls_vec = unpack_to_primitive_vector(&all_nulls_bp);
568+
assert_eq!(all_nulls_vec.len(), 4);
569+
}
570+
571+
/// Test that the execute method produces consistent results with other unpacking methods.
572+
#[test]
573+
fn test_execute_method_consistency() {
574+
use vortex_vector::Vector;
575+
576+
// Test that execute(), unpack_to_primitive_vector(), and unpack_array() all produce consistent results.
577+
let test_consistency = |array: &PrimitiveArray, bit_width: u8| {
578+
let bitpacked = bitpack_encode(array, bit_width, None).unwrap();
579+
580+
// Method 1: Using the new unpack_to_primitive_vector.
581+
let vector_result = unpack_to_primitive_vector(&bitpacked);
582+
583+
// Method 2: Using the old unpack_array.
584+
let unpacked_array = unpack_array(&bitpacked);
585+
586+
// Method 3: Using the execute() method (this is what would be used in production).
587+
let executed = bitpacked.into_array().execute().unwrap();
588+
589+
// All three should produce the same length.
590+
assert_eq!(vector_result.len(), array.len(), "vector length mismatch");
591+
assert_eq!(
592+
unpacked_array.len(),
593+
array.len(),
594+
"unpacked array length mismatch"
595+
);
596+
597+
// The executed vector should also have the correct length.
598+
match &executed {
599+
Vector::Primitive(pv) => {
600+
assert_eq!(pv.len(), array.len(), "executed vector length mismatch");
601+
}
602+
_ => panic!("Expected primitive vector from execute"),
603+
}
604+
605+
// Verify that the execute() method works correctly by comparing with unpack_array.
606+
// We convert unpack_array result to a vector using execute() to compare.
607+
let unpacked_executed = unpacked_array.into_array().execute().unwrap();
608+
match (&executed, &unpacked_executed) {
609+
(Vector::Primitive(exec_pv), Vector::Primitive(unpack_pv)) => {
610+
assert_eq!(
611+
exec_pv.len(),
612+
unpack_pv.len(),
613+
"execute() and unpack_array().execute() produced different lengths"
614+
);
615+
// Both should produce identical vectors since they represent the same data.
616+
}
617+
_ => panic!("Expected both to be primitive vectors"),
618+
}
619+
};
620+
621+
// Test various scenarios without patches.
622+
test_consistency(&PrimitiveArray::from_iter(0u16..100), 7);
623+
test_consistency(&PrimitiveArray::from_iter(0u32..1024), 10);
624+
625+
// Test with values that will create patches.
626+
test_consistency(&PrimitiveArray::from_iter((0i16..2048).map(|x| x % 128)), 7);
627+
628+
// Test with an array that definitely has patches.
629+
let patch_values: Vec<u32> = (0..100)
630+
.map(|i| if i % 20 == 0 { 1000 + i } else { i % 16 })
631+
.collect();
632+
let patch_array = PrimitiveArray::from_iter(patch_values);
633+
test_consistency(&patch_array, 4);
634+
635+
// Test with sliced array (offset > 0).
636+
let values = PrimitiveArray::from_iter(0u32..2048);
637+
let bitpacked = bitpack_encode(&values, 11, None).unwrap();
638+
let sliced = bitpacked.slice(500..1500);
639+
640+
// Test all three methods on the sliced array.
641+
let sliced_bp = sliced.as_::<BitPackedVTable>();
642+
let vector_result = unpack_to_primitive_vector(sliced_bp);
643+
let unpacked_array = unpack_array(sliced_bp);
644+
let executed = sliced.execute().unwrap();
645+
646+
assert_eq!(
647+
vector_result.len(),
648+
1000,
649+
"sliced vector length should be 1000"
650+
);
651+
assert_eq!(
652+
unpacked_array.len(),
653+
1000,
654+
"sliced unpacked array length should be 1000"
655+
);
656+
657+
match executed {
658+
Vector::Primitive(pv) => {
659+
assert_eq!(
660+
pv.len(),
661+
1000,
662+
"sliced executed vector length should be 1000"
663+
);
664+
}
665+
_ => panic!("Expected primitive vector from execute on sliced array"),
666+
}
667+
}
668+
669+
/// Test edge cases for unpacking.
670+
#[test]
671+
fn test_unpack_edge_cases() {
672+
// Empty array.
673+
let empty: PrimitiveArray = PrimitiveArray::from_iter(Vec::<u64>::new());
674+
let empty_bp = bitpack_encode(&empty, 0, None).unwrap();
675+
let empty_vec = unpack_to_primitive_vector(&empty_bp);
676+
assert_eq!(empty_vec.len(), 0);
677+
678+
// All zeros (bit_width = 0).
679+
let zeros = PrimitiveArray::from_iter([0u32; 100]);
680+
let zeros_bp = bitpack_encode(&zeros, 0, None).unwrap();
681+
let zeros_vec = unpack_to_primitive_vector(&zeros_bp);
682+
assert_eq!(zeros_vec.len(), 100);
683+
// Verify consistency with unpack_array.
684+
let zeros_array = unpack_array(&zeros_bp);
685+
assert_eq!(zeros_vec.len(), zeros_array.len());
686+
687+
// Maximum bit width for u16 (15 bits, since bitpacking requires bit_width < type bit width).
688+
let max_values = PrimitiveArray::from_iter([32767u16; 50]); // 2^15 - 1
689+
let max_bp = bitpack_encode(&max_values, 15, None).unwrap();
690+
let max_vec = unpack_to_primitive_vector(&max_bp);
691+
assert_eq!(max_vec.len(), 50);
692+
693+
// Exactly 3072 elements with patches across chunks.
694+
let boundary_values: Vec<u32> = (0..3072)
695+
.map(|i| {
696+
if i == 1023 || i == 1024 || i == 2047 || i == 2048 {
697+
50000 // Force patches at chunk boundaries
698+
} else {
699+
(i % 128) as u32
700+
}
701+
})
702+
.collect();
703+
let boundary_array = PrimitiveArray::from_iter(boundary_values);
704+
let boundary_bp = bitpack_encode(&boundary_array, 7, None).unwrap();
705+
assert!(boundary_bp.patches().is_some());
706+
707+
let boundary_vec = unpack_to_primitive_vector(&boundary_bp);
708+
assert_eq!(boundary_vec.len(), 3072);
709+
// Verify consistency.
710+
let boundary_unpacked = unpack_array(&boundary_bp);
711+
assert_eq!(boundary_vec.len(), boundary_unpacked.len());
712+
713+
// Single element.
714+
let single = PrimitiveArray::from_iter([42u8]);
715+
let single_bp = bitpack_encode(&single, 6, None).unwrap();
716+
let single_vec = unpack_to_primitive_vector(&single_bp);
717+
assert_eq!(single_vec.len(), 1);
718+
}
353719
}

0 commit comments

Comments
 (0)