diff --git a/Cargo.lock b/Cargo.lock index 7782d8d5635..2fde7dc508d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4870,9 +4870,11 @@ dependencies = [ name = "vortex-alp" version = "0.21.1" dependencies = [ + "arrow-array", "divan", "itertools 0.14.0", "num-traits", + "rand", "rstest", "serde", "vortex-array", diff --git a/encodings/alp/Cargo.toml b/encodings/alp/Cargo.toml index fd7993c50aa..a4a86b79c5a 100644 --- a/encodings/alp/Cargo.toml +++ b/encodings/alp/Cargo.toml @@ -17,6 +17,7 @@ readme = { workspace = true } workspace = true [dependencies] +arrow-array = { workspace = true } itertools = { workspace = true } num-traits = { workspace = true } serde = { workspace = true, features = ["derive"] } @@ -29,6 +30,7 @@ vortex-scalar = { workspace = true } [dev-dependencies] divan = { workspace = true } +rand = { workspace = true } rstest = { workspace = true } vortex-array = { workspace = true, features = ["test-harness"] } diff --git a/encodings/alp/benches/alp_compress.rs b/encodings/alp/benches/alp_compress.rs index d51d34038de..a2505efeb38 100644 --- a/encodings/alp/benches/alp_compress.rs +++ b/encodings/alp/benches/alp_compress.rs @@ -1,27 +1,60 @@ #![allow(clippy::unwrap_used)] use divan::Bencher; -use vortex_alp::{ALPFloat, ALPRDFloat, Exponents, RDEncoder}; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng as _}; +use vortex_alp::{alp_encode, ALPFloat, ALPRDFloat, RDEncoder}; use vortex_array::array::PrimitiveArray; use vortex_array::validity::Validity; use vortex_array::IntoCanonical; -use vortex_buffer::{buffer, Buffer}; +use vortex_buffer::buffer; +use vortex_dtype::NativePType; fn main() { divan::main(); } -#[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])] -fn compress_alp(n: usize) -> (Exponents, Buffer, Buffer, Buffer) { - let values: Vec = vec![T::from(1.234).unwrap(); n]; - T::encode(values.as_slice(), None) +#[divan::bench(types = [f32, f64], args = [ + (100_000, 1.0), + (10_000_000, 1.0), + (100_000, 0.25), + (10_000_000, 0.25), + (100_000, 0.95), + (10_000_000, 0.95), +])] +fn compress_alp(bencher: Bencher, args: (usize, f64)) { + let (n, fraction_valid) = args; + let mut rng = StdRng::seed_from_u64(0); + let values = buffer![T::from(1.234).unwrap(); n]; + let validity = if fraction_valid < 1.0 { + Validity::from_iter((0..values.len()).map(|_| rng.gen_bool(fraction_valid))) + } else { + Validity::NonNullable + }; + bencher.bench_local(move || { + alp_encode(&PrimitiveArray::new(values.clone(), validity.clone())).unwrap() + }) } -#[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])] -fn decompress_alp(bencher: Bencher, n: usize) { - let values: Vec = vec![T::from(1.234).unwrap(); n]; - let (exponents, encoded, ..) = T::encode(values.as_slice(), None); - bencher.bench_local(move || T::decode(&encoded, exponents)); +#[divan::bench(types = [f32, f64], args = [ + (100_000, 1.0), + (10_000_000, 1.0), + (100_000, 0.25), + (10_000_000, 0.25), + (100_000, 0.95), + (10_000_000, 0.95), +])] +fn decompress_alp(bencher: Bencher, args: (usize, f64)) { + let (n, fraction_valid) = args; + let mut rng = StdRng::seed_from_u64(0); + let values = buffer![T::from(1.234).unwrap(); n]; + let validity = if fraction_valid < 1.0 { + Validity::from_iter((0..values.len()).map(|_| rng.gen_bool(fraction_valid))) + } else { + Validity::NonNullable + }; + let array = alp_encode(&PrimitiveArray::new(values, validity)).unwrap(); + bencher.bench_local(move || array.clone().into_canonical().unwrap()); } #[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])] diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index 2148a3dad83..391ee29ca79 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -48,6 +48,14 @@ impl ALPArray { let mut children = Vec::with_capacity(2); children.push(encoded); if let Some(patches) = &patches { + if patches.dtype() != &dtype { + vortex_bail!(MismatchedTypes: dtype, patches.dtype()); + } + + if patches.values().logical_validity().null_count()? != 0 { + vortex_bail!("ALPArray: patches must not contain invalid entries"); + } + children.push(patches.indices().clone()); children.push(patches.values().clone()); } diff --git a/encodings/alp/src/alp/compress.rs b/encodings/alp/src/alp/compress.rs index a86dc589618..87c909abe38 100644 --- a/encodings/alp/src/alp/compress.rs +++ b/encodings/alp/src/alp/compress.rs @@ -1,9 +1,11 @@ use vortex_array::array::PrimitiveArray; use vortex_array::patches::Patches; +use vortex_array::validity::{ArrayValidity as _, LogicalValidity, Validity}; use vortex_array::variants::PrimitiveArrayTrait; use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant}; +use vortex_buffer::Buffer; use vortex_dtype::{NativePType, PType}; -use vortex_error::{vortex_bail, VortexResult, VortexUnwrap}; +use vortex_error::{vortex_bail, VortexResult}; use vortex_scalar::ScalarType; use crate::alp::{ALPArray, ALPFloat}; @@ -24,39 +26,69 @@ macro_rules! match_each_alp_float_ptype { }) } -pub fn alp_encode_components( +pub fn alp_encode(parray: &PrimitiveArray) -> VortexResult { + let (exponents, encoded, patches) = alp_encode_components(parray)?; + ALPArray::try_new(encoded, exponents, patches) +} + +pub fn alp_encode_components( + parray: &PrimitiveArray, +) -> VortexResult<(Exponents, ArrayData, Option)> { + match parray.ptype() { + PType::F32 => alp_encode_components_typed::(parray), + PType::F64 => alp_encode_components_typed::(parray), + _ => vortex_bail!("ALP can only encode f32 and f64"), + } +} + +#[allow(clippy::cast_possible_truncation)] +fn alp_encode_components_typed( values: &PrimitiveArray, - exponents: Option, -) -> (Exponents, ArrayData, Option) +) -> VortexResult<(Exponents, ArrayData, Option)> where T: ALPFloat + NativePType, T::ALPInt: NativePType, T: ScalarType, { - let (exponents, encoded, exc_pos, exc) = T::encode(values.as_slice::(), exponents); - let len = encoded.len(); - ( - exponents, - PrimitiveArray::new(encoded, values.validity()).into_array(), - (!exc.is_empty()).then(|| { - let position_arr = exc_pos.into_array(); - let patch_validity = values.validity().take(&position_arr).vortex_unwrap(); - Patches::new( - len, - position_arr, - PrimitiveArray::new(exc, patch_validity).into_array(), - ) - }), - ) -} + let values_slice = values.as_slice::(); -pub fn alp_encode(parray: &PrimitiveArray) -> VortexResult { - let (exponents, encoded, patches) = match parray.ptype() { - PType::F32 => alp_encode_components::(parray, None), - PType::F64 => alp_encode_components::(parray, None), - _ => vortex_bail!("ALP can only encode f32 and f64"), + let exponents = T::find_best_exponents(values_slice); + let (encoded, exceptional_positions) = T::chunked_encode(values.as_slice::(), exponents); + + let encoded_array = PrimitiveArray::new(encoded, values.validity()).into_array(); + let exceptional_positions = match values.logical_validity() { + LogicalValidity::AllValid(_) => exceptional_positions, + LogicalValidity::AllInvalid(_) => Buffer::empty(), + LogicalValidity::Array(is_valid) => { + let is_valid_buf = is_valid.into_bool()?.boolean_buffer(); + exceptional_positions + .into_iter() + // index is a valid usize because it is an index into values.as_slice::() + .filter(|index| is_valid_buf.value(*index as usize)) + .collect() + } }; - ALPArray::try_new(encoded, exponents, patches) + let patches = if exceptional_positions.is_empty() { + None + } else { + let patches_validity = if values.dtype().is_nullable() { + Validity::AllValid + } else { + Validity::NonNullable + }; + let exceptional_values: Buffer = exceptional_positions + .iter() + .map(|index| values_slice[*index as usize]) + .collect(); + let exceptional_values = + PrimitiveArray::new(exceptional_values, patches_validity).into_array(); + Some(Patches::new( + values_slice.len(), + exceptional_positions.into_array(), + exceptional_values, + )) + }; + Ok((exponents, encoded_array, patches)) } pub fn decompress(array: ALPArray) -> VortexResult { @@ -83,7 +115,7 @@ mod tests { use core::f64; use vortex_array::compute::scalar_at; - use vortex_array::validity::Validity; + use vortex_array::validity::{ArrayValidity as _, Validity}; use vortex_buffer::{buffer, Buffer}; use super::*; @@ -148,6 +180,39 @@ mod tests { assert_eq!(values.as_slice(), decoded.as_slice::()); } + #[test] + #[allow(clippy::approx_constant)] // ALP doesn't like E + fn test_compress_ignores_invalid_exceptional_values() { + let values = buffer![1.234f64, 2.718, f64::consts::PI, 4.0]; + let array = PrimitiveArray::new(values, Validity::from_iter([true, true, false, true])); + let encoded = alp_encode(&array).unwrap(); + assert!(encoded.patches().is_none()); + assert_eq!( + encoded + .encoded() + .into_primitive() + .unwrap() + .as_slice::(), + vec![1234i64, 2718, 1234, 4000] // fill forward + ); + assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 }); + + let decoded = decompress(encoded).unwrap(); + assert_eq!( + scalar_at(&decoded, 0).unwrap(), + scalar_at(&array, 0).unwrap() + ); + assert_eq!( + scalar_at(&decoded, 1).unwrap(), + scalar_at(&array, 1).unwrap() + ); + assert!(!decoded.is_valid(2)); + assert_eq!( + scalar_at(&decoded, 3).unwrap(), + scalar_at(&array, 3).unwrap() + ); + } + #[test] #[allow(clippy::approx_constant)] // ALP doesn't like E fn test_nullable_patched_scalar_at() { @@ -168,6 +233,7 @@ mod tests { assert!(s.is_valid()); } + assert!(!encoded.is_valid(4)); let s = scalar_at(encoded.as_ref(), 4).unwrap(); assert!(s.is_null()); @@ -190,7 +256,6 @@ mod tests { ); let alp_arr = alp_encode(&original).unwrap(); let decompressed = alp_arr.into_primitive().unwrap(); - assert_eq!(original.as_slice::(), decompressed.as_slice::()); assert_eq!(original.validity(), decompressed.validity()); } } diff --git a/encodings/alp/src/alp/compute/mod.rs b/encodings/alp/src/alp/compute/mod.rs index 1a8b147c095..b947844cc01 100644 --- a/encodings/alp/src/alp/compute/mod.rs +++ b/encodings/alp/src/alp/compute/mod.rs @@ -2,6 +2,7 @@ use vortex_array::compute::{ filter, scalar_at, slice, take, ComputeVTable, FilterFn, FilterMask, ScalarAtFn, SliceFn, TakeFn, }; +use vortex_array::validity::ArrayValidity as _; use vortex_array::variants::PrimitiveArrayTrait; use vortex_array::{ArrayDType, ArrayData, IntoArrayData}; use vortex_error::VortexResult; @@ -29,9 +30,13 @@ impl ComputeVTable for ALPEncoding { impl ScalarAtFn for ALPEncoding { fn scalar_at(&self, array: &ALPArray, index: usize) -> VortexResult { + if !array.encoded().is_valid(index) { + return Ok(Scalar::null(array.dtype().clone())); + } + if let Some(patches) = array.patches() { if let Some(patch) = patches.get_patched(index)? { - return Ok(patch); + return patch.cast(array.dtype()); } } diff --git a/encodings/alp/src/alp/mod.rs b/encodings/alp/src/alp/mod.rs index 06ba4876cb2..a879331f80d 100644 --- a/encodings/alp/src/alp/mod.rs +++ b/encodings/alp/src/alp/mod.rs @@ -63,18 +63,17 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static { values .iter() .step_by(values.len() / SAMPLE_SIZE) + .take(SAMPLE_SIZE) .cloned() .collect_vec() }); for e in (0..Self::MAX_EXPONENT).rev() { for f in 0..e { - let (_, encoded, _, exc_patches) = Self::encode( - sample.as_deref().unwrap_or(values), - Some(Exponents { e, f }), - ); + let (encoded, exceptional_positions) = + Self::encode(sample.as_deref().unwrap_or(values), Exponents { e, f }); - let size = Self::estimate_encoded_size(&encoded, &exc_patches); + let size = Self::estimate_encoded_size(&encoded, exceptional_positions.len()); if size < best_nbytes { best_nbytes = size; best_exp = Exponents { e, f }; @@ -88,7 +87,7 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static { } #[inline] - fn estimate_encoded_size(encoded: &[Self::ALPInt], patches: &[Self]) -> usize { + fn estimate_encoded_size(encoded: &[Self::ALPInt], n_exceptions: usize) -> usize { let bits_per_encoded = encoded .iter() .minmax() @@ -107,42 +106,57 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static { let encoded_bytes = (encoded.len() * bits_per_encoded + 7) / 8; // each patch is a value + a position // in practice, patch positions are in [0, u16::MAX] because of how we chunk - let patch_bytes = patches.len() * (size_of::() + size_of::()); + let patch_bytes = n_exceptions * (size_of::() + size_of::()); encoded_bytes + patch_bytes } - fn encode( + /// A quantity of [Self] expected to fit into L1 cache. + const ENCODE_CHUNK_SIZE: usize = (32 << 10) / size_of::(); + + /// ALP encode chunk-by-chunk. + /// + /// Unlike [Self::encode], this operation processes no more than [Self::ENCODE_CHUNK_SIZE] + /// elements at once which can make better use of the L1 cache because [Self::encode] makes two + /// passes over `values`: first to encode and second to extract the exceptional values. + fn chunked_encode( values: &[Self], - exponents: Option, - ) -> (Exponents, Buffer, Buffer, Buffer) { - let exp = exponents.unwrap_or_else(|| Self::find_best_exponents(values)); - - let mut encoded_output = BufferMut::::with_capacity(values.len()); - let mut patch_indices = BufferMut::::with_capacity(values.len()); - let mut patch_values = BufferMut::::with_capacity(values.len()); - let mut fill_value: Option = None; - - // this is intentionally branchless - // we batch this into 32KB of values at a time to make it more L1 cache friendly - let encode_chunk_size: usize = (32 << 10) / size_of::(); - for chunk in values.chunks(encode_chunk_size) { - encode_chunk_unchecked( - chunk, - exp, - &mut encoded_output, - &mut patch_indices, - &mut patch_values, - &mut fill_value, - ); + exponents: Exponents, + ) -> (Buffer, Buffer) { + let mut encoded = BufferMut::::with_capacity(values.len()); + let mut patch_indices = BufferMut::::empty(); + for chunk in values.chunks(Self::ENCODE_CHUNK_SIZE) { + let (encoded_chunk, patches_indices_chunk) = Self::encode(chunk, exponents); + encoded.extend(encoded_chunk); + patch_indices.extend(patches_indices_chunk); } + (encoded.freeze(), patch_indices.freeze()) + } + + /// ALP encode the given values using the given exponents. + /// + /// The index of each value for which encode-decode is not the identity function is returned. + /// + /// See also: [Self::chunked_encode]. + fn encode(values: &[Self], exponents: Exponents) -> (Vec, Vec) { + let (encoded, needs_patch): (Vec, Vec) = values + .iter() + .map(|value| { + let encoded = unsafe { Self::encode_single_unchecked(*value, exponents) }; + let maybe_decoded = Self::decode_single(encoded, exponents); + let needs_patch = maybe_decoded != *value; + (encoded, needs_patch) + }) + .unzip(); + + let patch_indices: Vec = needs_patch + .into_iter() + .enumerate() + .filter(|(_, needs_patch)| *needs_patch) + .map(|(index, _)| index as u64) + .collect(); - ( - exp, - encoded_output.freeze(), - patch_indices.freeze(), - patch_values.freeze(), - ) + (encoded, patch_indices) } #[inline] @@ -184,7 +198,7 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static { } #[allow(clippy::cast_possible_truncation)] -fn encode_chunk_unchecked( +fn _encode_chunk_unchecked( chunk: &[T], exp: Exponents, encoded_output: &mut BufferMut, diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 8a6c8a25080..caa06b8946a 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -16,7 +16,7 @@ use vortex_array::{ IntoCanonical, }; use vortex_buffer::Buffer; -use vortex_dtype::{DType, PType}; +use vortex_dtype::{match_each_unsigned_integer_ptype, DType, PType}; use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; @@ -227,31 +227,113 @@ impl VisitorVTable for RunEndEncoding { impl StatisticsVTable for RunEndEncoding { fn compute_statistics(&self, array: &RunEndArray, stat: Stat) -> VortexResult { - let maybe_stat = match stat { - Stat::Min | Stat::Max => array.values().statistics().compute(stat), - Stat::IsSorted => Some(Scalar::from( - array - .values() - .statistics() - .compute_is_sorted() - .unwrap_or(false) - && array.logical_validity().all_valid(), - )), - _ => None, + let mut stats = StatsSet::default(); + + match stat { + Stat::Min | Stat::Max => { + if let Some(extrema) = array.values().statistics().compute(stat) { + stats.set(stat, extrema); + } + } + Stat::IsSorted => { + let is_sorted = Scalar::from( + array + .values() + .statistics() + .compute_is_sorted() + .unwrap_or(false) + && array.logical_validity().all_valid(), + ); + stats.set(stat, is_sorted); + } + Stat::TrueCount => match array.dtype() { + DType::Bool(_) => { + let ends = array.ends().into_primitive()?; + let bools = array.values().into_bool()?.boolean_buffer(); + let mut true_count: u64 = 0; + let mut null_count: u64 = 0; + + match array.values().logical_validity() { + LogicalValidity::AllValid(_) => { + null_count = 0; + true_count = match_each_unsigned_integer_ptype!(ends.ptype(), |$P| { + let mut begin = array.offset() as $P; + ends + .as_slice::<$P>() + .iter() + .enumerate() + .map(|(index, end)| { + let len = *end - begin; + begin = *end; + (len as u64) * (bools.value(index as usize) as u64) + }) + .sum() + }); + } + LogicalValidity::AllInvalid(_) => { + null_count = array.len() as u64; + true_count = 0; + } + LogicalValidity::Array(is_valid) => { + let is_valid = is_valid.into_bool()?.boolean_buffer(); + + match_each_unsigned_integer_ptype!(ends.ptype(), |$P| { + let mut begin = array.offset() as $P; + for (index, end) in ends.as_slice::<$P>().iter().enumerate() { + let len = *end - begin; + begin = *end; + true_count += (len as u64) * (bools.value(index as usize) as u64) * (is_valid.value(index as usize) as u64); + null_count += (len as u64) * (is_valid.value(index as usize) as u64); + } + }); + } + }; + + stats.set(Stat::TrueCount, true_count); + stats.set(Stat::NullCount, null_count); + } + DType::Primitive(..) => {} + dtype => vortex_bail!("invalid dtype: {}", dtype), + }, + Stat::NullCount => { + let ends = array.ends().into_primitive()?; + let null_count: u64 = match array.values().logical_validity() { + LogicalValidity::AllValid(_) => 0_u64, + LogicalValidity::AllInvalid(_) => array.len() as u64, + LogicalValidity::Array(is_valid) => { + let is_valid = is_valid.into_bool()?.boolean_buffer(); + match_each_unsigned_integer_ptype!(ends.ptype(), |$P| { + let mut begin = array.offset() as $P; + ends + .as_slice::<$P>() + .iter() + .enumerate() + .map(|(index, end)| { + let len = *end - begin; + begin = *end; + (len as u64) * ((!is_valid.value(index as usize)) as u64) + }) + .sum() + }) + } + }; + stats.set(stat, null_count); + } + _ => {} }; - let mut stats = StatsSet::default(); - if let Some(stat_value) = maybe_stat { - stats.set(stat, stat_value); - } Ok(stats) } } #[cfg(test)] mod tests { - use vortex_array::compute::scalar_at; + use arrow_buffer::BooleanBuffer; + use vortex_array::array::BoolArray; + use vortex_array::compute::{scalar_at, slice}; + use vortex_array::stats::{ArrayStatistics as _, Stat}; use vortex_array::test_harness::check_metadata; + use vortex_array::validity::Validity; use vortex_array::{ArrayDType, ArrayLen, IntoArrayData}; use vortex_buffer::buffer; use vortex_dtype::{DType, Nullability, PType}; @@ -292,4 +374,89 @@ mod tests { assert_eq!(scalar_at(arr.as_ref(), 5).unwrap(), 3.into()); assert_eq!(scalar_at(arr.as_ref(), 9).unwrap(), 3.into()); } + + #[test] + fn test_runend_int_stats() { + let arr = RunEndArray::try_new( + buffer![2u32, 5, 10].into_array(), + buffer![1i32, 2, 3].into_array(), + ) + .unwrap(); + + assert_eq!(arr.statistics().compute_as::(Stat::Min).unwrap(), 1); + assert_eq!(arr.statistics().compute_as::(Stat::Max).unwrap(), 3); + assert_eq!( + arr.statistics().compute_as::(Stat::NullCount).unwrap(), + 0 + ); + assert!(arr.statistics().compute_as::(Stat::IsSorted).unwrap()); + } + + #[test] + fn test_runend_bool_stats() { + let arr = RunEndArray::try_new( + buffer![2u32, 5, 10].into_array(), + BoolArray::try_new( + BooleanBuffer::from_iter([true, true, false]), + Validity::Array(BoolArray::from_iter([true, false, true]).into_array()), + ) + .unwrap() + .into_array(), + ) + .unwrap(); + + assert!(!arr.statistics().compute_as::(Stat::Min).unwrap()); + assert!(arr.statistics().compute_as::(Stat::Max).unwrap()); + assert_eq!( + arr.statistics().compute_as::(Stat::NullCount).unwrap(), + 3 + ); + assert!(!arr.statistics().compute_as::(Stat::IsSorted).unwrap()); + assert_eq!( + arr.statistics().compute_as::(Stat::TrueCount).unwrap(), + 2 + ); + + let sliced = slice(arr, 4, 7).unwrap(); + + assert!(!sliced.statistics().compute_as::(Stat::Min).unwrap()); + assert!(!sliced.statistics().compute_as::(Stat::Max).unwrap()); + assert_eq!( + sliced + .statistics() + .compute_as::(Stat::NullCount) + .unwrap(), + 1 + ); + // Not sorted because null must come last + assert!(!sliced + .statistics() + .compute_as::(Stat::IsSorted) + .unwrap()); + assert_eq!( + sliced + .statistics() + .compute_as::(Stat::TrueCount) + .unwrap(), + 0 + ); + } + + #[test] + fn test_all_invalid_true_count() { + let arr = RunEndArray::try_new( + buffer![2u32, 5, 10].into_array(), + BoolArray::from_iter([None, None, None]).into_array(), + ) + .unwrap() + .into_array(); + assert_eq!( + arr.statistics().compute_as::(Stat::TrueCount).unwrap(), + 0 + ); + assert_eq!( + arr.statistics().compute_as::(Stat::NullCount).unwrap(), + 10 + ); + } } diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 32dc4441033..0ce7cce2add 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -516,7 +516,7 @@ impl LogicalValidity { Self::AllInvalid(len) => Ok(*len), Self::Array(a) => { let true_count = a.statistics().compute_true_count().ok_or_else(|| { - vortex_err!("Failed to compute true count from validity array") + vortex_err!("Failed to compute true count from validity array {:?}", a) })?; Ok(a.len() - true_count) } diff --git a/vortex-sampling-compressor/src/compressors/alp.rs b/vortex-sampling-compressor/src/compressors/alp.rs index dbcfe4fe464..878642c7c0a 100644 --- a/vortex-sampling-compressor/src/compressors/alp.rs +++ b/vortex-sampling-compressor/src/compressors/alp.rs @@ -1,5 +1,6 @@ use vortex_alp::{ - alp_encode_components, match_each_alp_float_ptype, ALPArray, ALPEncoding, ALPRDEncoding, + alp_encode_components, alp_encode_components_typed, match_each_alp_float_ptype, ALPArray, + ALPEncoding, ALPRDEncoding, }; use vortex_array::aliases::hash_set::HashSet; use vortex_array::array::PrimitiveArray; @@ -44,12 +45,8 @@ impl EncodingCompressor for ALPCompressor { like: Option>, ctx: SamplingCompressor<'a>, ) -> VortexResult> { - let parray = array.clone().into_primitive()?; - - let (exponents, encoded, patches) = match_each_alp_float_ptype!( - parray.ptype(), |$T| { - alp_encode_components::<$T>(&parray, None) - }); + let (exponents, encoded, patches) = + alp_encode_components(&array.clone().into_primitive()?)?; let compressed_encoded = ctx .named("packed")