Skip to content

Commit a3a3ed7

Browse files
authored
feat: alp decompress to vector (#5548)
Signed-off-by: Alexander Droste <[email protected]>
1 parent d8cab4c commit a3a3ed7

File tree

7 files changed

+511
-146
lines changed

7 files changed

+511
-146
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

encodings/alp/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ vortex-fastlanes = { workspace = true }
2929
vortex-mask = { workspace = true }
3030
vortex-scalar = { workspace = true }
3131
vortex-utils = { workspace = true }
32+
vortex-vector = { workspace = true }
3233

3334
[dev-dependencies]
3435
divan = { workspace = true }

encodings/alp/benches/alp_compress.rs

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use vortex_alp::ALPFloat;
1111
use vortex_alp::ALPRDFloat;
1212
use vortex_alp::RDEncoder;
1313
use vortex_alp::alp_encode;
14-
use vortex_alp::decompress;
14+
use vortex_alp::decompress_into_array;
1515
use vortex_array::arrays::PrimitiveArray;
1616
use vortex_array::compute::warm_up_vtables;
1717
use vortex_array::validity::Validity;
@@ -97,7 +97,37 @@ fn decompress_alp<T: ALPFloat + NativePType>(bencher: Bencher, args: (usize, f64
9797
)
9898
.unwrap()
9999
})
100-
.bench_values(decompress);
100+
.bench_values(decompress_into_array);
101+
}
102+
103+
#[divan::bench(types = [f32, f64], args = BENCH_ARGS)]
104+
fn decompress_alp_vector<T: ALPFloat + NativePType>(bencher: Bencher, args: (usize, f64, f64)) {
105+
let (n, fraction_patch, fraction_valid) = args;
106+
let mut rng = StdRng::seed_from_u64(0);
107+
let mut values = buffer![T::from(1.234).unwrap(); n].into_mut();
108+
if fraction_patch > 0.0 {
109+
for index in 0..values.len() {
110+
if rng.random_bool(fraction_patch) {
111+
values[index] = T::from(1000.0).unwrap()
112+
}
113+
}
114+
}
115+
let validity = if fraction_valid < 1.0 {
116+
Validity::from_iter((0..values.len()).map(|_| rng.random_bool(fraction_valid)))
117+
} else {
118+
Validity::NonNullable
119+
};
120+
let values = values.freeze();
121+
bencher
122+
.with_inputs(|| {
123+
let alp_array = alp_encode(
124+
&PrimitiveArray::new(Buffer::copy_from(&values), validity.clone()),
125+
None,
126+
)
127+
.unwrap();
128+
alp_array.to_array()
129+
})
130+
.bench_refs(|array| array.execute().unwrap());
101131
}
102132

103133
#[divan::bench(types = [f32, f64], args = [10_000, 100_000])]

encodings/alp/src/alp/array.rs

Lines changed: 247 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use vortex_array::DeserializeMetadata;
1515
use vortex_array::Precision;
1616
use vortex_array::ProstMetadata;
1717
use vortex_array::SerializeMetadata;
18+
use vortex_array::execution::ExecutionCtx;
1819
use vortex_array::patches::Patches;
1920
use vortex_array::patches::PatchesMetadata;
2021
use vortex_array::serde::ArrayChildren;
@@ -40,11 +41,14 @@ use vortex_error::VortexExpect;
4041
use vortex_error::VortexResult;
4142
use vortex_error::vortex_bail;
4243
use vortex_error::vortex_ensure;
44+
use vortex_vector::Vector;
4345

4446
use crate::ALPFloat;
4547
use crate::alp::Exponents;
4648
use crate::alp::alp_encode;
47-
use crate::alp::decompress;
49+
use crate::alp::decompress::decompress_into_array;
50+
use crate::alp::decompress::decompress_into_vector;
51+
use crate::match_each_alp_float_ptype;
4852

4953
vtable!(ALP);
5054

@@ -136,6 +140,31 @@ impl VTable for ALPVTable {
136140
patches,
137141
)
138142
}
143+
144+
fn execute(array: &ALPArray, ctx: &mut dyn ExecutionCtx) -> VortexResult<Vector> {
145+
let encoded_vector = array.encoded().execute_batch(ctx)?;
146+
147+
let patches_vectors = if let Some(patches) = array.patches() {
148+
Some((
149+
patches.indices().execute_batch(ctx)?,
150+
patches.values().execute_batch(ctx)?,
151+
patches
152+
.chunk_offsets()
153+
.as_ref()
154+
.map(|co| co.execute_batch(ctx))
155+
.transpose()?,
156+
))
157+
} else {
158+
None
159+
};
160+
161+
let patches_offset = array.patches().map(|p| p.offset()).unwrap_or(0);
162+
let exponents = array.exponents();
163+
164+
match_each_alp_float_ptype!(array.dtype().as_ptype(), |T| {
165+
decompress_into_vector::<T>(encoded_vector, exponents, patches_vectors, patches_offset)
166+
})
167+
}
139168
}
140169

141170
#[derive(Clone, Debug)]
@@ -349,6 +378,12 @@ impl ALPArray {
349378
pub fn patches(&self) -> Option<&Patches> {
350379
self.patches.as_ref()
351380
}
381+
382+
/// Consumes the array and returns its parts.
383+
#[inline]
384+
pub fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>, DType) {
385+
(self.encoded, self.exponents, self.patches, self.dtype)
386+
}
352387
}
353388

354389
impl ValidityChild<ALPVTable> for ALPVTable {
@@ -387,7 +422,7 @@ impl BaseArrayVTable<ALPVTable> for ALPVTable {
387422

388423
impl CanonicalVTable<ALPVTable> for ALPVTable {
389424
fn canonicalize(array: &ALPArray) -> Canonical {
390-
Canonical::Primitive(decompress(array.clone()))
425+
Canonical::Primitive(decompress_into_array(array.clone()))
391426
}
392427
}
393428

@@ -415,3 +450,213 @@ impl VisitorVTable<ALPVTable> for ALPVTable {
415450
}
416451
}
417452
}
453+
454+
#[cfg(test)]
455+
mod tests {
456+
use std::f64::consts::PI;
457+
458+
use rstest::rstest;
459+
use vortex_array::arrays::PrimitiveArray;
460+
use vortex_array::vtable::ValidityHelper;
461+
use vortex_dtype::PTypeDowncast;
462+
use vortex_vector::VectorOps;
463+
464+
use super::*;
465+
466+
#[rstest]
467+
#[case(0)]
468+
#[case(1)]
469+
#[case(100)]
470+
#[case(1023)]
471+
#[case(1024)]
472+
#[case(1025)]
473+
#[case(2047)]
474+
#[case(2048)]
475+
#[case(2049)]
476+
fn test_execute_f32(#[case] size: usize) {
477+
let values = PrimitiveArray::from_iter((0..size).map(|i| i as f32));
478+
let encoded = alp_encode(&values, None).unwrap();
479+
480+
let result_vector = encoded.to_array().execute().unwrap();
481+
// Compare against the traditional array-based decompress path
482+
let expected = decompress_into_array(encoded);
483+
484+
assert_eq!(result_vector.len(), size);
485+
486+
let result_primitive = result_vector.into_primitive().into_f32();
487+
assert_eq!(result_primitive.as_ref(), expected.as_slice::<f32>());
488+
}
489+
490+
#[rstest]
491+
#[case(0)]
492+
#[case(1)]
493+
#[case(100)]
494+
#[case(1023)]
495+
#[case(1024)]
496+
#[case(1025)]
497+
#[case(2047)]
498+
#[case(2048)]
499+
#[case(2049)]
500+
fn test_execute_f64(#[case] size: usize) {
501+
let values = PrimitiveArray::from_iter((0..size).map(|i| i as f64));
502+
let encoded = alp_encode(&values, None).unwrap();
503+
504+
let result_vector = encoded.to_array().execute().unwrap();
505+
// Compare against the traditional array-based decompress path
506+
let expected = decompress_into_array(encoded);
507+
508+
assert_eq!(result_vector.len(), size);
509+
510+
let result_primitive = result_vector.into_primitive().into_f64();
511+
assert_eq!(result_primitive.as_ref(), expected.as_slice::<f64>());
512+
}
513+
514+
#[rstest]
515+
#[case(100)]
516+
#[case(1023)]
517+
#[case(1024)]
518+
#[case(1025)]
519+
#[case(2047)]
520+
#[case(2048)]
521+
#[case(2049)]
522+
fn test_execute_with_patches(#[case] size: usize) {
523+
let values: Vec<f64> = (0..size)
524+
.map(|i| match i % 4 {
525+
0..=2 => 1.0,
526+
_ => PI,
527+
})
528+
.collect();
529+
530+
let array = PrimitiveArray::from_iter(values);
531+
let encoded = alp_encode(&array, None).unwrap();
532+
assert!(encoded.patches().unwrap().array_len() > 0);
533+
534+
let result_vector = encoded.to_array().execute().unwrap();
535+
// Compare against the traditional array-based decompress path
536+
let expected = decompress_into_array(encoded);
537+
538+
assert_eq!(result_vector.len(), size);
539+
540+
let result_primitive = result_vector.into_primitive().into_f64();
541+
assert_eq!(result_primitive.as_ref(), expected.as_slice::<f64>());
542+
}
543+
544+
#[rstest]
545+
#[case(0)]
546+
#[case(1)]
547+
#[case(100)]
548+
#[case(1023)]
549+
#[case(1024)]
550+
#[case(1025)]
551+
#[case(2047)]
552+
#[case(2048)]
553+
#[case(2049)]
554+
fn test_execute_with_validity(#[case] size: usize) {
555+
let values: Vec<Option<f32>> = (0..size)
556+
.map(|i| if i % 2 == 1 { None } else { Some(1.0) })
557+
.collect();
558+
559+
let array = PrimitiveArray::from_option_iter(values);
560+
let encoded = alp_encode(&array, None).unwrap();
561+
562+
let result_vector = encoded.to_array().execute().unwrap();
563+
// Compare against the traditional array-based decompress path
564+
let expected = decompress_into_array(encoded);
565+
566+
assert_eq!(result_vector.len(), size);
567+
568+
let result_primitive = result_vector.into_primitive().into_f32();
569+
assert_eq!(result_primitive.as_ref(), expected.as_slice::<f32>());
570+
571+
// Test validity masks match
572+
for idx in 0..size {
573+
assert_eq!(
574+
result_primitive.validity().value(idx),
575+
expected.validity().is_valid(idx)
576+
);
577+
}
578+
}
579+
580+
#[rstest]
581+
#[case(100)]
582+
#[case(1023)]
583+
#[case(1024)]
584+
#[case(1025)]
585+
#[case(2047)]
586+
#[case(2048)]
587+
#[case(2049)]
588+
fn test_execute_with_patches_and_validity(#[case] size: usize) {
589+
let values: Vec<Option<f64>> = (0..size)
590+
.map(|idx| match idx % 3 {
591+
0 => Some(1.0),
592+
1 => None,
593+
_ => Some(PI),
594+
})
595+
.collect();
596+
597+
let array = PrimitiveArray::from_option_iter(values);
598+
let encoded = alp_encode(&array, None).unwrap();
599+
assert!(encoded.patches().unwrap().array_len() > 0);
600+
601+
let result_vector = encoded.to_array().execute().unwrap();
602+
// Compare against the traditional array-based decompress path
603+
let expected = decompress_into_array(encoded);
604+
605+
assert_eq!(result_vector.len(), size);
606+
607+
let result_primitive = result_vector.into_primitive().into_f64();
608+
assert_eq!(result_primitive.as_ref(), expected.as_slice::<f64>());
609+
610+
// Test validity masks match
611+
for idx in 0..size {
612+
assert_eq!(
613+
result_primitive.validity().value(idx),
614+
expected.validity().is_valid(idx)
615+
);
616+
}
617+
}
618+
619+
#[rstest]
620+
#[case(500, 100)]
621+
#[case(1000, 200)]
622+
#[case(2048, 512)]
623+
fn test_execute_sliced_vector(#[case] size: usize, #[case] slice_start: usize) {
624+
let values: Vec<Option<f64>> = (0..size)
625+
.map(|i| {
626+
if i % 5 == 0 {
627+
None
628+
} else if i % 4 == 3 {
629+
Some(PI)
630+
} else {
631+
Some(1.0)
632+
}
633+
})
634+
.collect();
635+
636+
let array = PrimitiveArray::from_option_iter(values.clone());
637+
let encoded = alp_encode(&array, None).unwrap();
638+
639+
let slice_end = size - slice_start;
640+
let slice_len = slice_end - slice_start;
641+
let sliced_encoded = encoded.slice(slice_start..slice_end);
642+
643+
let result_vector = sliced_encoded.execute().unwrap();
644+
let result_primitive = result_vector.into_primitive().into_f64();
645+
646+
for idx in 0..slice_len {
647+
let expected_value = values[slice_start + idx];
648+
649+
let result_valid = result_primitive.validity().value(idx);
650+
assert_eq!(
651+
result_valid,
652+
expected_value.is_some(),
653+
"Validity mismatch at idx={idx}",
654+
);
655+
656+
if let Some(expected_val) = expected_value {
657+
let result_val = result_primitive.as_ref()[idx];
658+
assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
659+
}
660+
}
661+
}
662+
}

0 commit comments

Comments
 (0)