Skip to content

Commit 3438bca

Browse files
authored
Chore: Use MaybeUninit instead of UninitRange for bit unpacking (#5427)
This will make porting over to vectors easier. --------- Signed-off-by: Connor Tsui <[email protected]>
1 parent 5cf5ff5 commit 3438bca

File tree

3 files changed

+47
-35
lines changed

3 files changed

+47
-35
lines changed

encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,33 +8,14 @@ use vortex_array::arrays::PrimitiveArray;
88
use vortex_array::builders::{ArrayBuilder, PrimitiveBuilder, UninitRange};
99
use vortex_array::patches::Patches;
1010
use vortex_dtype::{
11-
IntegerPType, NativePType, PhysicalPType, match_each_integer_ptype,
12-
match_each_unsigned_integer_ptype,
11+
IntegerPType, NativePType, match_each_integer_ptype, match_each_unsigned_integer_ptype,
1312
};
1413
use vortex_error::VortexExpect;
1514
use vortex_mask::Mask;
1615
use vortex_scalar::Scalar;
1716

1817
use crate::BitPackedArray;
19-
use crate::unpack_iter::{BitPacked, UnpackStrategy};
20-
21-
/// BitPacking strategy - uses plain bitpacking without reference value
22-
pub struct BitPackingStrategy;
23-
24-
impl<T: PhysicalPType<Physical: BitPacking>> UnpackStrategy<T> for BitPackingStrategy {
25-
#[inline(always)]
26-
unsafe fn unpack_chunk(
27-
&self,
28-
bit_width: usize,
29-
chunk: &[T::Physical],
30-
dst: &mut [T::Physical],
31-
) {
32-
// SAFETY: Caller must ensure [`BitPacking::unchecked_unpack`] safety requirements hold.
33-
unsafe {
34-
BitPacking::unchecked_unpack(bit_width, chunk, dst);
35-
}
36-
}
37-
}
18+
use crate::unpack_iter::BitPacked;
3819

3920
pub fn unpack(array: &BitPackedArray) -> PrimitiveArray {
4021
match_each_integer_ptype!(array.ptype(), |P| { unpack_primitive::<P>(array) })
@@ -65,15 +46,18 @@ pub(crate) fn unpack_into<T: BitPacked>(
6546
uninit_range.append_mask(array.validity_mask());
6647
}
6748

49+
// SAFETY: `decode_into` will initialize all values in this range.
50+
let uninit_slice = unsafe { uninit_range.slice_uninit_mut(0, array.len()) };
51+
6852
let mut bit_packed_iter = array.unpacked_chunks();
69-
bit_packed_iter.decode_into(&mut uninit_range);
53+
bit_packed_iter.decode_into(uninit_slice);
7054

7155
if let Some(patches) = array.patches() {
7256
apply_patches(&mut uninit_range, patches);
7357
};
7458

7559
// SAFETY: We have set a correct validity mask via `append_mask` with `array.len()` values and
76-
// initialized the same number of values needed via calls to `copy_from_slice`.
60+
// initialized the same number of values needed via `decode_into`.
7761
unsafe {
7862
uninit_range.finish();
7963
}

encodings/fastlanes/src/bitpacking/array/unpack_iter.rs

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,10 @@ use lending_iterator::gat;
99
use lending_iterator::prelude::Item;
1010
#[gat(Item)]
1111
use lending_iterator::prelude::LendingIterator;
12-
use vortex_array::builders::UninitRange;
1312
use vortex_buffer::ByteBuffer;
1413
use vortex_dtype::PhysicalPType;
1514

1615
use crate::BitPackedArray;
17-
use crate::bitpacking::bitpack_decompress::BitPackingStrategy;
1816

1917
const CHUNK_SIZE: usize = 1024;
2018

@@ -28,6 +26,24 @@ pub trait UnpackStrategy<T: PhysicalPType> {
2826
unsafe fn unpack_chunk(&self, bit_width: usize, chunk: &[T::Physical], dst: &mut [T::Physical]);
2927
}
3028

29+
/// BitPacking strategy - uses plain bitpacking without reference value
30+
pub struct BitPackingStrategy;
31+
32+
impl<T: PhysicalPType<Physical: BitPacking>> UnpackStrategy<T> for BitPackingStrategy {
33+
#[inline(always)]
34+
unsafe fn unpack_chunk(
35+
&self,
36+
bit_width: usize,
37+
chunk: &[T::Physical],
38+
dst: &mut [T::Physical],
39+
) {
40+
// SAFETY: Caller must ensure [`BitPacking::unchecked_unpack`] safety requirements hold.
41+
unsafe {
42+
BitPacking::unchecked_unpack(bit_width, chunk, dst);
43+
}
44+
}
45+
}
46+
3147
/// Accessor to unpacked chunks of bitpacked arrays
3248
///
3349
/// The usual pattern of usage should follow
@@ -159,29 +175,38 @@ impl<T: PhysicalPType, S: UnpackStrategy<T>> UnpackedChunks<T, S> {
159175

160176
/// Decode all chunks (initial, full, and trailer) into the output range.
161177
/// This consolidates the logic for handling all three chunk types in one place.
162-
pub fn decode_into(&mut self, output: &mut UninitRange<T>) {
178+
pub fn decode_into(&mut self, output: &mut [MaybeUninit<T>]) {
163179
let mut local_idx = 0;
164180

165181
// Handle initial partial chunk if present
166182
if let Some(initial) = self.initial() {
167-
output.copy_from_slice(0, initial);
168183
local_idx = initial.len();
184+
185+
// TODO(connor): use `maybe_uninit_write_slice` feature when it gets stabilized.
186+
// https://github.com/rust-lang/rust/issues/79995
187+
// SAFETY: &[T] and &[MaybeUninit<T>] have the same layout.
188+
let init_initial: &[MaybeUninit<T>] = unsafe { mem::transmute(initial) };
189+
output[..local_idx].copy_from_slice(init_initial);
169190
}
170191

171192
// Handle full chunks
172193
local_idx = self.decode_full_chunks_into_at(output, local_idx);
173194

174195
// Handle trailing partial chunk if present
175196
if let Some(trailer) = self.trailer() {
176-
output.copy_from_slice(local_idx, trailer);
197+
// TODO(connor): use `maybe_uninit_write_slice` feature when it gets stabilized.
198+
// https://github.com/rust-lang/rust/issues/79995
199+
// SAFETY: &[T] and &[MaybeUninit<T>] have the same layout.
200+
let init_trailer: &[MaybeUninit<T>] = unsafe { mem::transmute(trailer) };
201+
output[local_idx..][..init_trailer.len()].copy_from_slice(init_trailer);
177202
}
178203
}
179204

180205
/// Unpack full chunks into output range starting at the given index.
181206
/// Returns the next local index to write to.
182207
fn decode_full_chunks_into_at(
183208
&mut self,
184-
output: &mut UninitRange<T>,
209+
output: &mut [MaybeUninit<T>],
185210
start_idx: usize,
186211
) -> usize {
187212
// If there's only one chunk it has been handled already by `initial` method
@@ -204,8 +229,7 @@ impl<T: PhysicalPType, S: UnpackStrategy<T>> UnpackedChunks<T, S> {
204229
let chunk = &packed_slice[i * elems_per_chunk..][..elems_per_chunk];
205230

206231
unsafe {
207-
// SAFETY: We're about to initialize CHUNK_SIZE elements at local_idx.
208-
let uninit_dst = output.slice_uninit_mut(local_idx, CHUNK_SIZE);
232+
let uninit_dst = &mut output[local_idx..local_idx + CHUNK_SIZE];
209233
// SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
210234
let dst: &mut [T::Physical] = mem::transmute(uninit_dst);
211235
self.strategy.unpack_chunk(self.bit_width, chunk, dst);

encodings/fastlanes/src/for/compress.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,19 +124,23 @@ fn fused_decompress<T: PhysicalPType<Physical = T> + UnsignedPType + FoR + Wrapp
124124

125125
let mut builder = PrimitiveBuilder::<T>::with_capacity(for_.dtype().nullability(), bp.len());
126126
let mut uninit_range = builder.uninit_range(bp.len());
127-
128-
// Decode all chunks (initial, full, and trailer) in one call
129-
unpacked.decode_into(&mut uninit_range);
130-
131127
unsafe {
132128
// Append a dense null Mask.
133129
uninit_range.append_mask(bp.validity_mask());
134130
}
135131

132+
// SAFETY: `decode_into` will initialize all values in this range.
133+
let uninit_slice = unsafe { uninit_range.slice_uninit_mut(0, bp.len()) };
134+
135+
// Decode all chunks (initial, full, and trailer) in one call
136+
unpacked.decode_into(uninit_slice);
137+
136138
if let Some(patches) = bp.patches() {
137139
bitpack_decompress::apply_patches_fn(&mut uninit_range, patches, |v| v.wrapping_add(&ref_));
138140
};
139141

142+
// SAFETY: We have set a correct validity mask via `append_mask` with `array.len()` values and
143+
// initialized the same number of values needed via `decode_into`.
140144
unsafe {
141145
uninit_range.finish();
142146
}

0 commit comments

Comments
 (0)