Skip to content

Commit d96a3f3

Browse files
authored
Chore: refactor for module (#5451)
This is a purely cosmetic change. Moves all vtable implementations to their own files and separates decompression functions out into a `for_decompress` file Signed-off-by: Connor Tsui <[email protected]>
1 parent c83ec8a commit d96a3f3

File tree

14 files changed

+460
-647
lines changed

14 files changed

+460
-647
lines changed

encodings/fastlanes/src/bitpacking/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
mod array;
55
pub use array::{BitPackedArray, bitpack_compress, bitpack_decompress, unpack_iter};
66

7+
mod compute;
8+
79
mod vtable;
810
pub use vtable::{BitPackedEncoding, BitPackedVTable};
9-
10-
mod compute;

encodings/fastlanes/src/for/compress.rs renamed to encodings/fastlanes/src/for/array/for_compress.rs

Lines changed: 22 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,14 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use fastlanes::FoR;
5-
use num_traits::{PrimInt, WrappingAdd, WrappingSub};
4+
use num_traits::{PrimInt, WrappingSub};
5+
use vortex_array::IntoArray;
66
use vortex_array::arrays::PrimitiveArray;
7-
use vortex_array::builders::PrimitiveBuilder;
87
use vortex_array::stats::Stat;
9-
use vortex_array::vtable::ValidityHelper;
10-
use vortex_array::{IntoArray, ToCanonical};
11-
use vortex_buffer::{Buffer, BufferMut};
12-
use vortex_dtype::{
13-
NativePType, PhysicalPType, UnsignedPType, match_each_integer_ptype,
14-
match_each_unsigned_integer_ptype,
15-
};
16-
use vortex_error::{VortexExpect, VortexResult, vortex_err};
8+
use vortex_dtype::{NativePType, match_each_integer_ptype};
9+
use vortex_error::{VortexResult, vortex_err};
1710

18-
use crate::unpack_iter::{UnpackStrategy, UnpackedChunks};
19-
use crate::{BitPackedArray, BitPackedVTable, FoRArray, bitpack_decompress};
11+
use crate::FoRArray;
2012

2113
impl FoRArray {
2214
pub fn encode(array: PrimitiveArray) -> VortexResult<FoRArray> {
@@ -38,7 +30,7 @@ fn compress_primitive<T: NativePType + WrappingSub + PrimInt>(
3830
min: T,
3931
) -> VortexResult<PrimitiveArray> {
4032
// Set null values to the min value, ensuring that decompress into a value in the primitive
41-
// range (and stop them wrapping around)
33+
// range (and stop them wrapping around).
4234
parray.map_each_with_validity::<T, _, _>(|(v, bool)| {
4335
if bool {
4436
v.wrapping_sub(&min)
@@ -48,117 +40,6 @@ fn compress_primitive<T: NativePType + WrappingSub + PrimInt>(
4840
})
4941
}
5042

51-
/// FoR unpacking strategy that applies a reference value during unpacking
52-
struct FoRStrategy<T> {
53-
reference: T,
54-
}
55-
56-
impl<T: PhysicalPType<Physical = T> + FoR> UnpackStrategy<T> for FoRStrategy<T> {
57-
#[inline(always)]
58-
unsafe fn unpack_chunk(
59-
&self,
60-
bit_width: usize,
61-
chunk: &[T::Physical],
62-
dst: &mut [T::Physical],
63-
) {
64-
// SAFETY: Caller ensures chunk and dst have correct sizes
65-
unsafe {
66-
FoR::unchecked_unfor_pack(bit_width, chunk, self.reference, dst);
67-
}
68-
}
69-
}
70-
71-
pub fn decompress(array: &FoRArray) -> PrimitiveArray {
72-
let ptype = array.ptype();
73-
74-
// try to do fused unpack
75-
if array.dtype().is_unsigned_int()
76-
&& let Some(bp) = array.encoded().as_opt::<BitPackedVTable>()
77-
{
78-
return match_each_unsigned_integer_ptype!(array.ptype(), |T| {
79-
fused_decompress::<T>(array, bp)
80-
});
81-
}
82-
83-
// TODO(ngates): do we need this to be into_encoded() somehow?
84-
let encoded = array.encoded().to_primitive();
85-
let validity = encoded.validity().clone();
86-
87-
match_each_integer_ptype!(ptype, |T| {
88-
let min = array
89-
.reference_scalar()
90-
.as_primitive()
91-
.typed_value::<T>()
92-
.vortex_expect("reference must be non-null");
93-
if min == 0 {
94-
encoded
95-
} else {
96-
PrimitiveArray::new(
97-
decompress_primitive(encoded.into_buffer_mut::<T>(), min),
98-
validity,
99-
)
100-
}
101-
})
102-
}
103-
104-
fn fused_decompress<T: PhysicalPType<Physical = T> + UnsignedPType + FoR + WrappingAdd>(
105-
for_: &FoRArray,
106-
bp: &BitPackedArray,
107-
) -> PrimitiveArray {
108-
let ref_ = for_
109-
.reference
110-
.as_primitive()
111-
.as_::<T>()
112-
.vortex_expect("cannot be null");
113-
114-
let strategy = FoRStrategy { reference: ref_ };
115-
116-
// Create UnpackedChunks with FoR strategy
117-
let mut unpacked = UnpackedChunks::new_with_strategy(
118-
strategy,
119-
bp.packed().clone(),
120-
bp.bit_width() as usize,
121-
bp.offset() as usize,
122-
bp.len(),
123-
);
124-
125-
let mut builder = PrimitiveBuilder::<T>::with_capacity(for_.dtype().nullability(), bp.len());
126-
let mut uninit_range = builder.uninit_range(bp.len());
127-
unsafe {
128-
// Append a dense null Mask.
129-
uninit_range.append_mask(bp.validity_mask());
130-
}
131-
132-
// SAFETY: `decode_into` will initialize all values in this range.
133-
let uninit_slice = unsafe { uninit_range.slice_uninit_mut(0, bp.len()) };
134-
135-
// Decode all chunks (initial, full, and trailer) in one call
136-
unpacked.decode_into(uninit_slice);
137-
138-
if let Some(patches) = bp.patches() {
139-
bitpack_decompress::apply_patches_to_uninit_range_fn(&mut uninit_range, patches, |v| {
140-
v.wrapping_add(&ref_)
141-
});
142-
};
143-
144-
// SAFETY: We have set a correct validity mask via `append_mask` with `array.len()` values and
145-
// initialized the same number of values needed via `decode_into`.
146-
unsafe {
147-
uninit_range.finish();
148-
}
149-
150-
builder.finish_into_primitive()
151-
}
152-
153-
fn decompress_primitive<T: NativePType + WrappingAdd + PrimInt>(
154-
values: BufferMut<T>,
155-
min: T,
156-
) -> Buffer<T> {
157-
values
158-
.map_each_in_place(move |v| v.wrapping_add(&min))
159-
.freeze()
160-
}
161-
16243
#[cfg(test)]
16344
mod test {
16445
use itertools::Itertools;
@@ -170,10 +51,15 @@ mod test {
17051
use vortex_scalar::Scalar;
17152

17253
use super::*;
54+
use crate::BitPackedArray;
55+
use crate::r#for::array::for_decompress::{decompress, fused_decompress};
17356

17457
#[test]
17558
fn test_compress_round_trip_small() {
176-
let array = PrimitiveArray::new((1i32..10).collect::<Buffer<_>>(), Validity::NonNullable);
59+
let array = PrimitiveArray::new(
60+
(1i32..10).collect::<vortex_buffer::Buffer<_>>(),
61+
Validity::NonNullable,
62+
);
17763
let compressed = FoRArray::encode(array.clone()).unwrap();
17864
assert_eq!(i32::try_from(compressed.reference_scalar()).unwrap(), 1);
17965

@@ -183,9 +69,11 @@ mod test {
18369

18470
#[test]
18571
fn test_compress() {
186-
// Create a range offset by a million
72+
// Create a range offset by a million.
18773
let array = PrimitiveArray::new(
188-
(0u32..10_000).map(|v| v + 1_000_000).collect::<Buffer<_>>(),
74+
(0u32..10_000)
75+
.map(|v| v + 1_000_000)
76+
.collect::<vortex_buffer::Buffer<_>>(),
18977
Validity::NonNullable,
19078
);
19179
let compressed = FoRArray::encode(array).unwrap();
@@ -202,8 +90,8 @@ mod test {
20290

20391
let dtype = array.dtype().clone();
20492
let compressed = FoRArray::encode(array).unwrap();
205-
assert_eq!(compressed.dtype(), &dtype);
206-
assert!(compressed.dtype().is_signed_int());
93+
assert_eq!(compressed.reference_scalar().dtype(), &dtype);
94+
assert!(compressed.reference_scalar().dtype().is_signed_int());
20795
assert!(compressed.encoded().dtype().is_signed_int());
20896

20997
let constant = compressed.encoded().as_constant().unwrap();
@@ -212,7 +100,7 @@ mod test {
212100

213101
#[test]
214102
fn test_decompress() {
215-
// Create a range offset by a million
103+
// Create a range offset by a million.
216104
let array = PrimitiveArray::from_iter((0u32..100_000).step_by(1024).map(|v| v + 1_000_000));
217105
let compressed = FoRArray::encode(array.clone()).unwrap();
218106
let decompressed = compressed.to_primitive();
@@ -221,7 +109,7 @@ mod test {
221109

222110
#[test]
223111
fn test_decompress_fused() {
224-
// Create a range offset by a million
112+
// Create a range offset by a million.
225113
let expect = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7 + 10));
226114
let array = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7));
227115
let bp = BitPackedArray::encode(array.as_ref(), 3).unwrap();
@@ -232,7 +120,7 @@ mod test {
232120

233121
#[test]
234122
fn test_decompress_fused_patches() {
235-
// Create a range offset by a million
123+
// Create a range offset by a million.
236124
let expect = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7 + 10));
237125
let array = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7));
238126
let bp = BitPackedArray::encode(array.as_ref(), 2).unwrap();
@@ -262,7 +150,7 @@ mod test {
262150
let expected_unsigned = PrimitiveArray::from_iter(unsigned);
263151
assert_arrays_eq!(encoded, expected_unsigned);
264152

265-
let decompressed = compressed.to_primitive();
153+
let decompressed = decompress(&compressed);
266154
array
267155
.as_slice::<i8>()
268156
.iter()
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use fastlanes::FoR;
5+
use num_traits::{PrimInt, WrappingAdd};
6+
use vortex_array::ToCanonical;
7+
use vortex_array::arrays::PrimitiveArray;
8+
use vortex_array::builders::PrimitiveBuilder;
9+
use vortex_array::vtable::ValidityHelper;
10+
use vortex_buffer::{Buffer, BufferMut};
11+
use vortex_dtype::{
12+
NativePType, PhysicalPType, UnsignedPType, match_each_integer_ptype,
13+
match_each_unsigned_integer_ptype,
14+
};
15+
use vortex_error::VortexExpect;
16+
17+
use crate::unpack_iter::{UnpackStrategy, UnpackedChunks};
18+
use crate::{BitPackedArray, BitPackedVTable, FoRArray, bitpack_decompress};
19+
20+
/// FoR unpacking strategy that applies a reference value during unpacking.
21+
struct FoRStrategy<T> {
22+
reference: T,
23+
}
24+
25+
impl<T: PhysicalPType<Physical = T> + FoR> UnpackStrategy<T> for FoRStrategy<T> {
26+
#[inline(always)]
27+
unsafe fn unpack_chunk(
28+
&self,
29+
bit_width: usize,
30+
chunk: &[T::Physical],
31+
dst: &mut [T::Physical],
32+
) {
33+
// SAFETY: Caller ensures chunk and dst have correct sizes.
34+
unsafe {
35+
FoR::unchecked_unfor_pack(bit_width, chunk, self.reference, dst);
36+
}
37+
}
38+
}
39+
40+
pub fn decompress(array: &FoRArray) -> PrimitiveArray {
41+
let ptype = array.ptype();
42+
43+
// Try to do fused unpack.
44+
if array.reference_scalar().dtype().is_unsigned_int()
45+
&& let Some(bp) = array.encoded().as_opt::<BitPackedVTable>()
46+
{
47+
return match_each_unsigned_integer_ptype!(array.ptype(), |T| {
48+
fused_decompress::<T>(array, bp)
49+
});
50+
}
51+
52+
// TODO(ngates): Do we need this to be into_encoded() somehow?
53+
let encoded = array.encoded().to_primitive();
54+
let validity = encoded.validity().clone();
55+
56+
match_each_integer_ptype!(ptype, |T| {
57+
let min = array
58+
.reference_scalar()
59+
.as_primitive()
60+
.typed_value::<T>()
61+
.vortex_expect("reference must be non-null");
62+
if min == 0 {
63+
encoded
64+
} else {
65+
PrimitiveArray::new(
66+
decompress_primitive(encoded.into_buffer_mut::<T>(), min),
67+
validity,
68+
)
69+
}
70+
})
71+
}
72+
73+
pub(crate) fn fused_decompress<
74+
T: PhysicalPType<Physical = T> + UnsignedPType + FoR + WrappingAdd,
75+
>(
76+
for_: &FoRArray,
77+
bp: &BitPackedArray,
78+
) -> PrimitiveArray {
79+
let ref_ = for_
80+
.reference_scalar()
81+
.as_primitive()
82+
.as_::<T>()
83+
.vortex_expect("cannot be null");
84+
85+
let strategy = FoRStrategy { reference: ref_ };
86+
87+
// Create [`UnpackedChunks`] with FoR strategy.
88+
let mut unpacked = UnpackedChunks::new_with_strategy(
89+
strategy,
90+
bp.packed().clone(),
91+
bp.bit_width() as usize,
92+
bp.offset() as usize,
93+
bp.len(),
94+
);
95+
96+
let mut builder = PrimitiveBuilder::<T>::with_capacity(
97+
for_.reference_scalar().dtype().nullability(),
98+
bp.len(),
99+
);
100+
let mut uninit_range = builder.uninit_range(bp.len());
101+
unsafe {
102+
// Append a dense null Mask.
103+
uninit_range.append_mask(bp.validity_mask());
104+
}
105+
106+
// SAFETY: `decode_into` will initialize all values in this range.
107+
let uninit_slice = unsafe { uninit_range.slice_uninit_mut(0, bp.len()) };
108+
109+
// Decode all chunks (initial, full, and trailer) in one call.
110+
unpacked.decode_into(uninit_slice);
111+
112+
if let Some(patches) = bp.patches() {
113+
bitpack_decompress::apply_patches_to_uninit_range_fn(&mut uninit_range, patches, |v| {
114+
v.wrapping_add(&ref_)
115+
});
116+
};
117+
118+
// SAFETY: We have set a correct validity mask via `append_mask` with `array.len()` values and
119+
// initialized the same number of values needed via `decode_into`.
120+
unsafe {
121+
uninit_range.finish();
122+
}
123+
124+
builder.finish_into_primitive()
125+
}
126+
127+
fn decompress_primitive<T: NativePType + WrappingAdd + PrimInt>(
128+
values: BufferMut<T>,
129+
min: T,
130+
) -> Buffer<T> {
131+
values
132+
.map_each_in_place(move |v| v.wrapping_add(&min))
133+
.freeze()
134+
}

0 commit comments

Comments
 (0)