Skip to content

Commit 99101b1

Browse files
authored
fix: patch_chunk offset calc (#5222)
Signed-off-by: Alexander Droste <[email protected]>
1 parent b07fb3b commit 99101b1

File tree

3 files changed

+49
-6
lines changed

3 files changed

+49
-6
lines changed

encodings/alp/src/alp/compress.rs

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
use std::mem::transmute;
55

66
use itertools::Itertools;
7+
use num_traits::AsPrimitive;
78
use vortex_array::arrays::{PrimitiveArray, patch_chunk};
89
use vortex_array::patches::Patches;
910
use vortex_array::validity::Validity;
@@ -169,21 +170,27 @@ pub fn decompress_chunked(
169170
let exponents = array.exponents();
170171
let patches_offset = patches.offset();
171172

172-
// We need to drop ALPArray here in case converting encoded buffer into primitive didn't create a copy. In that case
173-
// both alp_encoded and array will hold a reference to the buffer we want to mutate.
173+
// We need to drop ALPArray here in case converting encoded buffer into
174+
// primitive didn't create a copy. In that case both alp_encoded and array
175+
// will hold a reference to the buffer we want to mutate.
174176
drop(array);
175177

176178
match_each_alp_float_ptype!(ptype, |T| {
177179
let patches_values = patches_values.as_slice::<T>();
178180
let mut alp_buffer = encoded.into_buffer_mut();
179181
match_each_unsigned_integer_ptype!(patches_chunk_offsets.ptype(), |C| {
180182
let patches_chunk_offsets = patches_chunk_offsets.as_slice::<C>();
183+
// There always is at least one chunk offset.
184+
let base_offset = patches_chunk_offsets[0];
185+
let offset_within_chunk = patches.offset_within_chunk().unwrap_or(0);
186+
181187
match_each_unsigned_integer_ptype!(patches_indices.ptype(), |I| {
182188
let patches_indices = patches_indices.as_slice::<I>();
183189

184190
for (chunk_idx, chunk_start) in (0..array_len).step_by(1024).enumerate() {
185191
let chunk_end = (chunk_start + 1024).min(array_len);
186192
let chunk_slice = &mut alp_buffer.as_mut_slice()[chunk_start..chunk_end];
193+
187194
<T>::decode_slice_inplace(chunk_slice, exponents);
188195

189196
let decoded_chunk: &mut [T] = unsafe { transmute(chunk_slice) };
@@ -194,6 +201,8 @@ pub fn decompress_chunked(
194201
patches_offset,
195202
patches_chunk_offsets,
196203
chunk_idx,
204+
base_offset.as_(),
205+
offset_within_chunk,
197206
);
198207
}
199208

@@ -214,8 +223,9 @@ fn decompress_unchunked(array: ALPArray) -> PrimitiveArray {
214223
let exponents = array.exponents();
215224
let ptype = array.dtype().as_ptype();
216225

217-
// We need to drop ALPArray here in case converting encoded buffer into primitive didn't create a copy. In that case
218-
// both alp_encoded and array will hold a reference to the buffer we want to mutate.
226+
// We need to drop ALPArray here in case converting encoded buffer into
227+
// primitive didn't create a copy. In that case both alp_encoded and array
228+
// will hold a reference to the buffer we want to mutate.
219229
drop(array);
220230

221231
let decoded = match_each_alp_float_ptype!(ptype, |T| {
@@ -503,6 +513,27 @@ mod tests {
503513
assert!(encoded.patches().is_some());
504514
}
505515

516+
#[test]
517+
fn test_slice_across_chunks_with_patches_roundtrip() {
518+
let mut values = vec![1.0f64; 2048];
519+
values[100] = PI;
520+
values[200] = E;
521+
values[600] = 42.42;
522+
values[800] = 42.42;
523+
values[1000] = 42.42;
524+
values[1023] = 42.42;
525+
526+
let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
527+
let encoded = alp_encode(&original, None).unwrap();
528+
529+
let sliced_alp = encoded.slice(1023..1025);
530+
let decoded = sliced_alp.to_primitive();
531+
532+
let expected_slice = original.slice(1023..1025).to_primitive();
533+
assert_eq!(expected_slice.as_slice::<f64>(), decoded.as_slice::<f64>());
534+
assert!(encoded.patches().is_some());
535+
}
536+
506537
#[test]
507538
fn test_slice_half_chunk_nullable_roundtrip() {
508539
let values = (0..1024)

vortex-array/src/arrays/primitive/array/patch.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,22 +67,29 @@ impl PrimitiveArray {
6767
/// * `patches_offset` - Offset to subtract from patch indices
6868
/// * `chunk_offsets_slice` - Slice containing offsets for each chunk
6969
/// * `chunk_idx` - Index of the chunk to patch
70+
/// * `base_offset` - Base offset from the first chunk
71+
/// * `offset_within_chunk` - Offset within chunk for sliced patches
7072
#[inline]
73+
#[allow(clippy::too_many_arguments)]
7174
pub fn patch_chunk<T, I, C>(
7275
decoded_values: &mut [T],
7376
patches_indices: &[I],
7477
patches_values: &[T],
7578
patches_offset: usize,
7679
chunk_offsets_slice: &[C],
7780
chunk_idx: usize,
81+
base_offset: usize,
82+
offset_within_chunk: usize,
7883
) where
7984
T: NativePType,
8085
I: UnsignedPType,
8186
C: UnsignedPType,
8287
{
83-
let patches_start_idx = chunk_offsets_slice[chunk_idx].as_();
88+
// Use the same logic as patches slice implementation for calculating patch ranges.
89+
let patches_start_idx =
90+
(chunk_offsets_slice[chunk_idx].as_() - base_offset).saturating_sub(offset_within_chunk);
8491
let patches_end_idx = if chunk_idx + 1 < chunk_offsets_slice.len() {
85-
chunk_offsets_slice[chunk_idx + 1].as_()
92+
chunk_offsets_slice[chunk_idx + 1].as_() - base_offset - offset_within_chunk
8693
} else {
8794
patches_indices.len()
8895
};

vortex-array/src/patches.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,11 @@ impl Patches {
263263
.vortex_expect("chunk offset must be usize")
264264
}
265265

266+
#[inline]
267+
pub fn offset_within_chunk(&self) -> Option<usize> {
268+
self.offset_within_chunk
269+
}
270+
266271
#[inline]
267272
pub fn indices_ptype(&self) -> PType {
268273
PType::try_from(self.indices.dtype()).vortex_expect("primitive indices")

0 commit comments

Comments
 (0)