Skip to content

Commit 1f0d317

Browse files
committed
fix[pco]: fill_null cast sliced validity
Signed-off-by: Joe Isaacs <[email protected]>
1 parent b2bfc64 commit 1f0d317

File tree

2 files changed

+81
-22
lines changed

2 files changed

+81
-22
lines changed

encodings/pco/src/array.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,3 +427,43 @@ impl OperationsVTable<PcoVTable> for PcoVTable {
427427
array._slice(index, index + 1).decompress().scalar_at(0)
428428
}
429429
}
430+
431+
#[cfg(test)]
432+
mod tests {
433+
use vortex_array::arrays::PrimitiveArray;
434+
use vortex_array::validity::Validity;
435+
use vortex_array::{IntoArray, ToCanonical, assert_arrays_eq};
436+
use vortex_buffer::Buffer;
437+
438+
use crate::PcoArray;
439+
440+
#[test]
441+
fn test_slice_nullable() {
442+
// Create a nullable array with some nulls
443+
let values = PrimitiveArray::new(
444+
Buffer::copy_from(vec![10u32, 20, 30, 40, 50, 60]),
445+
Validity::from_iter([false, true, true, true, true, false]),
446+
);
447+
let pco = PcoArray::from_primitive(&values, 0, 128).unwrap();
448+
let decoded = pco.to_primitive();
449+
assert_arrays_eq!(
450+
decoded,
451+
PrimitiveArray::from_option_iter([
452+
None,
453+
Some(20u32),
454+
Some(30),
455+
Some(40),
456+
Some(50),
457+
None
458+
])
459+
);
460+
461+
// Slice to get only the non-null values in the middle
462+
let sliced = pco.slice(1..5);
463+
let expected =
464+
PrimitiveArray::from_option_iter([Some(20u32), Some(30), Some(40), Some(50)])
465+
.into_array();
466+
assert_arrays_eq!(sliced, expected);
467+
assert_arrays_eq!(sliced.to_canonical().into_array(), expected);
468+
}
469+
}

encodings/pco/src/compute/cast.rs

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use vortex_array::compute::{CastKernel, CastKernelAdapter};
5-
use vortex_array::vtable::ValiditySliceHelper;
65
use vortex_array::{ArrayRef, IntoArray, register_kernel};
76
use vortex_dtype::DType;
87
use vortex_error::VortexResult;
@@ -11,26 +10,22 @@ use crate::{PcoArray, PcoVTable};
1110

1211
impl CastKernel for PcoVTable {
1312
fn cast(&self, array: &PcoArray, dtype: &DType) -> VortexResult<Option<ArrayRef>> {
13+
if !dtype.is_nullable() && !array.all_valid() {
14+
// TODO(joe): fixme
15+
// We cannot cast to non-nullable since the validity containing nulls is used to decode
16+
// the PCO array, this would require rewriting tables.
17+
return Ok(None);
18+
}
1419
// PCO (Pcodec) is a compression encoding that stores data in a compressed format.
1520
// It can efficiently handle nullability changes without decompression, but type changes
1621
// require decompression since the compression algorithm is type-specific.
1722
// PCO supports: F16, F32, F64, I16, I32, I64, U16, U32, U64
1823
if array.dtype().eq_ignore_nullability(dtype) {
1924
// Create a new validity with the target nullability
20-
let new_validity = if !dtype.is_nullable() {
21-
// If we are casting to non-nullable we only need to check the sliced validity
22-
// for validity, since the unsliced validity might contain unused nulls.
23-
// We don't do this if the target is nullable since it requires expand the validity
24-
// array.
25-
array
26-
.sliced_validity()
27-
.cast_nullability(dtype.nullability(), array.len())?
28-
} else {
29-
array
30-
.unsliced_validity
31-
.clone()
32-
.cast_nullability(dtype.nullability(), array.len())?
33-
};
25+
let new_validity = array
26+
.unsliced_validity
27+
.clone()
28+
.cast_nullability(dtype.nullability(), array.len())?;
3429

3530
return Ok(Some(
3631
PcoArray::new(
@@ -60,6 +55,7 @@ mod tests {
6055
use vortex_array::arrays::PrimitiveArray;
6156
use vortex_array::compute::cast;
6257
use vortex_array::compute::conformance::cast::test_cast_conformance;
58+
use vortex_array::validity::Validity;
6359
use vortex_buffer::Buffer;
6460
use vortex_dtype::{DType, Nullability, PType};
6561

@@ -69,7 +65,7 @@ mod tests {
6965
fn test_cast_pco_f32_to_f64() {
7066
let values = PrimitiveArray::new(
7167
Buffer::copy_from(vec![1.0f32, 2.0, 3.0, 4.0, 5.0]),
72-
vortex_array::validity::Validity::NonNullable,
68+
Validity::NonNullable,
7369
);
7470
let pco = PcoArray::from_primitive(&values, 0, 128).unwrap();
7571

@@ -94,7 +90,7 @@ mod tests {
9490
// Test casting from NonNullable to Nullable
9591
let values = PrimitiveArray::new(
9692
Buffer::copy_from(vec![10u32, 20, 30, 40]),
97-
vortex_array::validity::Validity::NonNullable,
93+
Validity::NonNullable,
9894
);
9995
let pco = PcoArray::from_primitive(&values, 0, 128).unwrap();
10096

@@ -109,26 +105,49 @@ mod tests {
109105
);
110106
}
111107

108+
#[test]
109+
fn test_cast_sliced_pco_nullable_to_nonnullable() {
110+
let values = PrimitiveArray::new(
111+
Buffer::copy_from(vec![10u32, 20, 30, 40, 50, 60]),
112+
Validity::from_iter([true, true, true, true, true, true]),
113+
);
114+
let pco = PcoArray::from_primitive(&values, 0, 128).unwrap();
115+
let sliced = pco.slice(1..5);
116+
let casted = cast(
117+
sliced.as_ref(),
118+
&DType::Primitive(PType::U32, Nullability::NonNullable),
119+
)
120+
.unwrap();
121+
assert_eq!(
122+
casted.dtype(),
123+
&DType::Primitive(PType::U32, Nullability::NonNullable)
124+
);
125+
// Verify the values are correct
126+
let decoded = casted.to_primitive();
127+
let u32_values = decoded.as_slice::<u32>();
128+
assert_eq!(u32_values, &[20, 30, 40, 50]);
129+
}
130+
112131
#[rstest]
113132
#[case::f32(PrimitiveArray::new(
114133
Buffer::copy_from(vec![1.23f32, 4.56, 7.89, 10.11, 12.13]),
115-
vortex_array::validity::Validity::NonNullable,
134+
Validity::NonNullable,
116135
))]
117136
#[case::f64(PrimitiveArray::new(
118137
Buffer::copy_from(vec![100.1f64, 200.2, 300.3, 400.4, 500.5]),
119-
vortex_array::validity::Validity::NonNullable,
138+
Validity::NonNullable,
120139
))]
121140
#[case::i32(PrimitiveArray::new(
122141
Buffer::copy_from(vec![100i32, 200, 300, 400, 500]),
123-
vortex_array::validity::Validity::NonNullable,
142+
Validity::NonNullable,
124143
))]
125144
#[case::u64(PrimitiveArray::new(
126145
Buffer::copy_from(vec![1000u64, 2000, 3000, 4000]),
127-
vortex_array::validity::Validity::NonNullable,
146+
Validity::NonNullable,
128147
))]
129148
#[case::single(PrimitiveArray::new(
130149
Buffer::copy_from(vec![42.42f64]),
131-
vortex_array::validity::Validity::NonNullable,
150+
Validity::NonNullable,
132151
))]
133152
fn test_cast_pco_conformance(#[case] values: PrimitiveArray) {
134153
let pco = PcoArray::from_primitive(&values, 0, 128).unwrap();

0 commit comments

Comments
 (0)