Skip to content

Commit 1e8af1d

Browse files
committed
fix: patch_chunk offset calc
Signed-off-by: Alexander Droste <[email protected]>
1 parent ee4b921 commit 1e8af1d

File tree

3 files changed

+48
-6
lines changed

3 files changed

+48
-6
lines changed

encodings/alp/src/alp/compress.rs

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,21 +169,27 @@ pub fn decompress_chunked(
169169
let exponents = array.exponents();
170170
let patches_offset = patches.offset();
171171

172-
// We need to drop ALPArray here in case converting encoded buffer into primitive didn't create a copy. In that case
173-
// both alp_encoded and array will hold a reference to the buffer we want to mutate.
172+
// We need to drop ALPArray here in case converting encoded buffer into
173+
// primitive didn't create a copy. In that case both alp_encoded and array
174+
// will hold a reference to the buffer we want to mutate.
174175
drop(array);
175176

176177
match_each_alp_float_ptype!(ptype, |T| {
177178
let patches_values = patches_values.as_slice::<T>();
178179
let mut alp_buffer = encoded.into_buffer_mut();
179180
match_each_unsigned_integer_ptype!(patches_chunk_offsets.ptype(), |C| {
180181
let patches_chunk_offsets = patches_chunk_offsets.as_slice::<C>();
182+
// There always is at least one chunk offset.
183+
let base_offset = patches_chunk_offsets[0];
184+
let offset_within_chunk = patches.offset_within_chunk().unwrap_or(0);
185+
181186
match_each_unsigned_integer_ptype!(patches_indices.ptype(), |I| {
182187
let patches_indices = patches_indices.as_slice::<I>();
183188

184189
for (chunk_idx, chunk_start) in (0..array_len).step_by(1024).enumerate() {
185190
let chunk_end = (chunk_start + 1024).min(array_len);
186191
let chunk_slice = &mut alp_buffer.as_mut_slice()[chunk_start..chunk_end];
192+
187193
<T>::decode_slice_inplace(chunk_slice, exponents);
188194

189195
let decoded_chunk: &mut [T] = unsafe { transmute(chunk_slice) };
@@ -194,6 +200,8 @@ pub fn decompress_chunked(
194200
patches_offset,
195201
patches_chunk_offsets,
196202
chunk_idx,
203+
base_offset as usize,
204+
offset_within_chunk,
197205
);
198206
}
199207

@@ -214,8 +222,9 @@ fn decompress_unchunked(array: ALPArray) -> PrimitiveArray {
214222
let exponents = array.exponents();
215223
let ptype = array.dtype().as_ptype();
216224

217-
// We need to drop ALPArray here in case converting encoded buffer into primitive didn't create a copy. In that case
218-
// both alp_encoded and array will hold a reference to the buffer we want to mutate.
225+
// We need to drop ALPArray here in case converting encoded buffer into
226+
// primitive didn't create a copy. In that case both alp_encoded and array
227+
// will hold a reference to the buffer we want to mutate.
219228
drop(array);
220229

221230
let decoded = match_each_alp_float_ptype!(ptype, |T| {
@@ -503,6 +512,27 @@ mod tests {
503512
assert!(encoded.patches().is_some());
504513
}
505514

515+
#[test]
516+
fn test_slice_across_chunks_with_patches_roundtrip() {
517+
let mut values = vec![1.0f64; 2048];
518+
values[100] = PI;
519+
values[200] = E;
520+
values[600] = 42.42;
521+
values[800] = 42.42;
522+
values[1000] = 42.42;
523+
values[1023] = 42.42;
524+
525+
let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
526+
let encoded = alp_encode(&original, None).unwrap();
527+
528+
let sliced_alp = encoded.slice(1023..1025);
529+
let decoded = sliced_alp.to_primitive();
530+
531+
let expected_slice = original.slice(1023..1025).to_primitive();
532+
assert_eq!(expected_slice.as_slice::<f64>(), decoded.as_slice::<f64>());
533+
assert!(encoded.patches().is_some());
534+
}
535+
506536
#[test]
507537
fn test_slice_half_chunk_nullable_roundtrip() {
508538
let values = (0..1024)

vortex-array/src/arrays/primitive/array/patch.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,22 +67,29 @@ impl PrimitiveArray {
6767
/// * `patches_offset` - Offset to subtract from patch indices
6868
/// * `chunk_offsets_slice` - Slice containing offsets for each chunk
6969
/// * `chunk_idx` - Index of the chunk to patch
70+
/// * `base_offset` - Base offset from the first chunk
71+
/// * `offset_within_chunk` - Offset within chunk for sliced patches
7072
#[inline]
73+
#[allow(clippy::too_many_arguments)]
7174
pub fn patch_chunk<T, I, C>(
7275
decoded_values: &mut [T],
7376
patches_indices: &[I],
7477
patches_values: &[T],
7578
patches_offset: usize,
7679
chunk_offsets_slice: &[C],
7780
chunk_idx: usize,
81+
base_offset: usize,
82+
offset_within_chunk: usize,
7883
) where
7984
T: NativePType,
8085
I: UnsignedPType,
8186
C: UnsignedPType,
8287
{
83-
let patches_start_idx = chunk_offsets_slice[chunk_idx].as_();
88+
// Use the same logic as patches slice implementation for calculating patch ranges.
89+
let patches_start_idx =
90+
(chunk_offsets_slice[chunk_idx].as_() - base_offset).saturating_sub(offset_within_chunk);
8491
let patches_end_idx = if chunk_idx + 1 < chunk_offsets_slice.len() {
85-
chunk_offsets_slice[chunk_idx + 1].as_()
92+
chunk_offsets_slice[chunk_idx + 1].as_() - base_offset - offset_within_chunk
8693
} else {
8794
patches_indices.len()
8895
};

vortex-array/src/patches.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,11 @@ impl Patches {
263263
.vortex_expect("chunk offset must be usize")
264264
}
265265

266+
#[inline]
267+
pub fn offset_within_chunk(&self) -> Option<usize> {
268+
self.offset_within_chunk
269+
}
270+
266271
#[inline]
267272
pub fn indices_ptype(&self) -> PType {
268273
PType::try_from(self.indices.dtype()).vortex_expect("primitive indices")

0 commit comments

Comments
 (0)