Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Use :func:`~vortex.encoding.compress` to compress the Vortex array and check the

>>> cvtx = vortex.compress(vtx)
>>> cvtx.nbytes
16604
16596
>>> cvtx.nbytes / vtx.nbytes
0.11...

Expand Down
1 change: 1 addition & 0 deletions encodings/alp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ vortex-scalar = { workspace = true }

[dev-dependencies]
divan = { workspace = true }
rand = { workspace = true }
rstest = { workspace = true }
vortex-array = { workspace = true, features = ["test-harness"] }

Expand Down
55 changes: 44 additions & 11 deletions encodings/alp/benches/alp_compress.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,60 @@
#![allow(clippy::unwrap_used)]

use divan::Bencher;
use vortex_alp::{ALPFloat, ALPRDFloat, Exponents, RDEncoder};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng as _};
use vortex_alp::{alp_encode, ALPFloat, ALPRDFloat, RDEncoder};
use vortex_array::array::PrimitiveArray;
use vortex_array::validity::Validity;
use vortex_array::IntoCanonical;
use vortex_buffer::{buffer, Buffer};
use vortex_buffer::buffer;
use vortex_dtype::NativePType;

fn main() {
divan::main();
}

#[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
fn compress_alp<T: ALPFloat>(n: usize) -> (Exponents, Buffer<T::ALPInt>, Buffer<u64>, Buffer<T>) {
let values: Vec<T> = vec![T::from(1.234).unwrap(); n];
T::encode(values.as_slice(), None)
#[divan::bench(types = [f32, f64], args = [
(100_000, 1.0),
(10_000_000, 1.0),
(100_000, 0.25),
(10_000_000, 0.25),
(100_000, 0.95),
(10_000_000, 0.95),
])]
fn compress_alp<T: ALPFloat + NativePType>(bencher: Bencher, args: (usize, f64)) {
let (n, fraction_valid) = args;
let mut rng = StdRng::seed_from_u64(0);
let values = buffer![T::from(1.234).unwrap(); n];
let validity = if fraction_valid < 1.0 {
Validity::from_iter((0..values.len()).map(|_| rng.gen_bool(fraction_valid)))
} else {
Validity::NonNullable
};
bencher.bench_local(move || {
alp_encode(&PrimitiveArray::new(values.clone(), validity.clone())).unwrap()
})
}

#[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
fn decompress_alp<T: ALPFloat>(bencher: Bencher, n: usize) {
let values: Vec<T> = vec![T::from(1.234).unwrap(); n];
let (exponents, encoded, ..) = T::encode(values.as_slice(), None);
bencher.bench_local(move || T::decode(&encoded, exponents));
#[divan::bench(types = [f32, f64], args = [
(100_000, 1.0),
(10_000_000, 1.0),
(100_000, 0.25),
(10_000_000, 0.25),
(100_000, 0.95),
(10_000_000, 0.95),
])]
fn decompress_alp<T: ALPFloat + NativePType>(bencher: Bencher, args: (usize, f64)) {
let (n, fraction_valid) = args;
let mut rng = StdRng::seed_from_u64(0);
let values = buffer![T::from(1.234).unwrap(); n];
let validity = if fraction_valid < 1.0 {
Validity::from_iter((0..values.len()).map(|_| rng.gen_bool(fraction_valid)))
} else {
Validity::NonNullable
};
let array = alp_encode(&PrimitiveArray::new(values, validity)).unwrap();
bencher.bench_local(move || array.clone().into_canonical().unwrap());
}

#[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])]
Expand Down
7 changes: 5 additions & 2 deletions encodings/alp/src/alp/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,16 @@ impl ALPArray {
let mut children = Vec::with_capacity(2);
children.push(encoded);
if let Some(patches) = &patches {
if patches.dtype().is_nullable() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is the right way round. It's nice to have the patches dtype match the array dtype, and then assert that patches validity is AllValid.

vortex_bail!(MismatchedTypes: "patches should be non-nullable", patches.dtype());
}
children.push(patches.indices().clone());
children.push(patches.values().clone());
}

let patches = patches
.as_ref()
.map(|p| p.to_metadata(length, &dtype))
.map(|p| p.to_metadata(length, &dtype.as_nonnullable()))
.transpose()?;

Self::try_from_parts(
Expand Down Expand Up @@ -93,7 +96,7 @@ impl ALPArray {
.child(1, &p.indices_dtype(), p.len())
.vortex_expect("ALPArray: patch indices"),
self.as_ref()
.child(2, self.dtype(), p.len())
.child(2, &self.dtype().as_nonnullable(), p.len())
.vortex_expect("ALPArray: patch values"),
)
})
Expand Down
59 changes: 44 additions & 15 deletions encodings/alp/src/alp/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use vortex_array::patches::Patches;
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_dtype::{NativePType, PType};
use vortex_error::{vortex_bail, VortexResult, VortexUnwrap};
use vortex_error::{vortex_bail, VortexResult};
use vortex_scalar::ScalarType;

use crate::alp::{ALPArray, ALPFloat};
Expand All @@ -27,33 +27,29 @@ macro_rules! match_each_alp_float_ptype {
pub fn alp_encode_components<T>(
values: &PrimitiveArray,
exponents: Option<Exponents>,
) -> (Exponents, ArrayData, Option<Patches>)
) -> VortexResult<(Exponents, ArrayData, Option<Patches>)>
where
T: ALPFloat + NativePType,
T::ALPInt: NativePType,
T: ScalarType,
{
let (exponents, encoded, exc_pos, exc) = T::encode(values.as_slice::<T>(), exponents);
let (exponents, encoded, exc_pos, exc) =
T::encode(values.as_slice::<T>(), &values.validity(), exponents)?;
let len = encoded.len();
(
Ok((
exponents,
PrimitiveArray::new(encoded, values.validity()).into_array(),
(!exc.is_empty()).then(|| {
let position_arr = exc_pos.into_array();
let patch_validity = values.validity().take(&position_arr).vortex_unwrap();
Patches::new(
len,
position_arr,
PrimitiveArray::new(exc, patch_validity).into_array(),
)
Patches::new(len, position_arr, exc.into_array())
}),
)
))
}

pub fn alp_encode(parray: &PrimitiveArray) -> VortexResult<ALPArray> {
let (exponents, encoded, patches) = match parray.ptype() {
PType::F32 => alp_encode_components::<f32>(parray, None),
PType::F64 => alp_encode_components::<f64>(parray, None),
PType::F32 => alp_encode_components::<f32>(parray, None)?,
PType::F64 => alp_encode_components::<f64>(parray, None)?,
_ => vortex_bail!("ALP can only encode f32 and f64"),
};
ALPArray::try_new(encoded, exponents, patches)
Expand Down Expand Up @@ -83,7 +79,7 @@ mod tests {
use core::f64;

use vortex_array::compute::scalar_at;
use vortex_array::validity::Validity;
use vortex_array::validity::{ArrayValidity as _, Validity};
use vortex_buffer::{buffer, Buffer};

use super::*;
Expand Down Expand Up @@ -148,6 +144,39 @@ mod tests {
assert_eq!(values.as_slice(), decoded.as_slice::<f64>());
}

#[test]
#[allow(clippy::approx_constant)] // ALP doesn't like E
fn test_compress_ignores_invalid_exceptional_values() {
let values = buffer![1.234f64, 2.718, f64::consts::PI, 4.0];
let array = PrimitiveArray::new(values, Validity::from_iter([true, true, false, true]));
let encoded = alp_encode(&array).unwrap();
assert!(encoded.patches().is_none());
assert_eq!(
encoded
.encoded()
.into_primitive()
.unwrap()
.as_slice::<i64>(),
vec![1234i64, 2718, 3142, 4000] // fill forward
);
assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });

let decoded = decompress(encoded).unwrap();
assert_eq!(
scalar_at(&decoded, 0).unwrap(),
scalar_at(&array, 0).unwrap()
);
assert_eq!(
scalar_at(&decoded, 1).unwrap(),
scalar_at(&array, 1).unwrap()
);
assert!(!decoded.is_valid(2));
assert_eq!(
scalar_at(&decoded, 3).unwrap(),
scalar_at(&array, 3).unwrap()
);
}

#[test]
#[allow(clippy::approx_constant)] // ALP doesn't like E
fn test_nullable_patched_scalar_at() {
Expand All @@ -168,6 +197,7 @@ mod tests {
assert!(s.is_valid());
}

assert!(!encoded.is_valid(4));
let s = scalar_at(encoded.as_ref(), 4).unwrap();
assert!(s.is_null());

Expand All @@ -190,7 +220,6 @@ mod tests {
);
let alp_arr = alp_encode(&original).unwrap();
let decompressed = alp_arr.into_primitive().unwrap();
assert_eq!(original.as_slice::<f64>(), decompressed.as_slice::<f64>());
assert_eq!(original.validity(), decompressed.validity());
}
}
7 changes: 6 additions & 1 deletion encodings/alp/src/alp/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use vortex_array::compute::{
filter, scalar_at, slice, take, ComputeVTable, FilterFn, FilterMask, ScalarAtFn, SliceFn,
TakeFn,
};
use vortex_array::validity::ArrayValidity as _;
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData};
use vortex_error::VortexResult;
Expand Down Expand Up @@ -29,9 +30,13 @@ impl ComputeVTable for ALPEncoding {

impl ScalarAtFn<ALPArray> for ALPEncoding {
fn scalar_at(&self, array: &ALPArray, index: usize) -> VortexResult<Scalar> {
if !array.encoded().is_valid(index) {
return Ok(Scalar::null(array.dtype().clone()));
}

if let Some(patches) = array.patches() {
if let Some(patch) = patches.get_patched(index)? {
return Ok(patch);
return patch.cast(array.dtype());
}
}

Expand Down
Loading
Loading