Skip to content
12 changes: 10 additions & 2 deletions fuzz/src/array/compare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,16 @@ pub fn compare_canonical_array(array: &dyn Array, value: &Scalar, operator: Oper
)
.into_array()
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Null => {
unreachable!("DType null not supported for fuzzing")
}
DType::Extension(..) => {
// Extension arrays delegate comparison to their storage type
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: periods at the end of sentences (this is true everywhere)

compare_canonical_array(
array.to_extension().storage(),
&value.as_extension().storage(),
operator,
)
}
}
}
Expand Down
33 changes: 28 additions & 5 deletions fuzz/src/array/fill_null.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_array::ArrayRef;
use vortex_array::Canonical;
use vortex_array::IntoArray;
use vortex_array::ToCanonical;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::ConstantArray;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::VarBinViewArray;
use vortex_array::compute::fill_null;
Expand All @@ -23,6 +23,7 @@ use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_scalar::Scalar;

use crate::array::clone_ext_dtype;
/// Apply fill_null on the canonical form of the array to get a consistent baseline.
/// This implementation manually fills null values for each canonical type
/// without using the fill_null method, to serve as an independent baseline for testing.
Expand All @@ -40,13 +41,35 @@ pub fn fill_null_canonical_array(
Canonical::VarBinView(array) => {
fill_varbinview_array(&array, fill_value, result_nullability)
}
Canonical::Struct(_)
| Canonical::List(_)
| Canonical::FixedSizeList(_)
| Canonical::Extension(_) => fill_null(canonical.as_ref(), fill_value)?,
Canonical::Struct(_) | Canonical::List(_) | Canonical::FixedSizeList(_) => {
fill_null(canonical.as_ref(), fill_value)?
}
Canonical::Extension(array) => fill_extension_array(&array, fill_value),
})
}

fn fill_extension_array(array: &ExtensionArray, fill_value: &Scalar) -> ArrayRef {
let filled_storage = fill_null_canonical_array(
array.storage().to_canonical(),
&fill_value.as_extension().storage(),
)
.vortex_expect("fill_null should succeed in canonical form");

if filled_storage.dtype().nullability() == array.ext_dtype().storage_dtype().nullability() {
ExtensionArray::new(array.ext_dtype().clone(), filled_storage).into_array()
} else {
ExtensionArray::new(
clone_ext_dtype(
array.ext_dtype().id().clone(),
filled_storage.dtype().clone(),
array.ext_dtype().metadata().cloned(),
),
filled_storage,
)
.into_array()
}
}
Comment on lines +51 to +71
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here you should for now only implemented the fill_null for ext array that are also temporal arrays


fn fill_bool_array(
array: &BoolArray,
fill_value: &Scalar,
Expand Down
25 changes: 23 additions & 2 deletions fuzz/src/array/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use vortex_array::ToCanonical;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::StructArray;
use vortex_array::arrays::VarBinViewArray;
Expand All @@ -19,6 +20,7 @@ use vortex_dtype::match_each_decimal_value_type;
use vortex_dtype::match_each_native_ptype;
use vortex_error::VortexResult;

use crate::array::clone_ext_dtype;
use crate::array::take_canonical_array_non_nullable_indices;

pub fn filter_canonical_array(array: &dyn Array, filter: &[bool]) -> VortexResult<ArrayRef> {
Expand Down Expand Up @@ -115,8 +117,27 @@ pub fn filter_canonical_array(array: &dyn Array, filter: &[bool]) -> VortexResul
}
take_canonical_array_non_nullable_indices(array, indices.as_slice())
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Extension(ext_dtype) => {
// Extension arrays delegate filter to their storage type.
let filtered_storage = filter_canonical_array(array.to_extension().storage(), filter)?;

if filtered_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
Ok(ExtensionArray::new(ext_dtype.clone(), filtered_storage).into_array())
} else {
// The storage dtype changed (i.e., became nullable due to filtering).
Ok(ExtensionArray::new(
Comment on lines +122 to +128
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same for filter

clone_ext_dtype(
ext_dtype.id().clone(),
filtered_storage.dtype().clone(),
ext_dtype.metadata().cloned(),
),
filtered_storage,
)
.into_array())
}
}
DType::Null => {
unreachable!("Cannot search sorted on Null array")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not true

}
}
}
20 changes: 11 additions & 9 deletions fuzz/src/array/mask.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::sync::Arc;

use vortex_array::ArrayRef;
use vortex_array::Canonical;
use vortex_array::IntoArray;
Expand All @@ -15,12 +13,13 @@ use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::StructArray;
use vortex_array::arrays::VarBinViewArray;
use vortex_array::vtable::ValidityHelper;
use vortex_dtype::ExtDType;
use vortex_dtype::match_each_decimal_value_type;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_mask::Mask;

use crate::array::clone_ext_dtype;

/// Apply mask on the canonical form of the array to get a consistent baseline.
/// This implementation manually applies the mask to each canonical type
/// without using the mask_fn method, to serve as an independent baseline for testing.
Expand Down Expand Up @@ -108,12 +107,15 @@ pub fn mask_canonical_array(canonical: Canonical, mask: &Mask) -> VortexResult<A
ExtensionArray::new(array.ext_dtype().clone(), masked_storage).into_array()
} else {
// The storage dtype changed (i.e., became nullable due to masking)
let ext_dtype = Arc::new(ExtDType::new(
array.ext_dtype().id().clone(),
Arc::new(masked_storage.dtype().clone()),
array.ext_dtype().metadata().cloned(),
));
ExtensionArray::new(ext_dtype, masked_storage).into_array()
ExtensionArray::new(
clone_ext_dtype(
array.ext_dtype().id().clone(),
masked_storage.dtype().clone(),
array.ext_dtype().metadata().cloned(),
),
masked_storage,
)
.into_array()
}
}
})
Expand Down
29 changes: 29 additions & 0 deletions fuzz/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ mod take;

use std::iter;
use std::ops::Range;
use std::sync::Arc;

use arbitrary::Arbitrary;
use arbitrary::Error::EmptyChoose;
Expand All @@ -40,14 +41,19 @@ pub(crate) use take::*;
use vortex_array::Array;
use vortex_array::ArrayRef;
use vortex_array::IntoArray;
use vortex_array::ToCanonical;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::arbitrary::ArbitraryArray;
use vortex_array::arrays::validator_for_ext_type;
use vortex_array::compute::MinMaxResult;
use vortex_array::compute::Operator;
use vortex_array::search_sorted::SearchResult;
use vortex_array::search_sorted::SearchSortedSide;
use vortex_btrblocks::BtrBlocksCompressor;
use vortex_dtype::DType;
use vortex_dtype::ExtDType;
use vortex_dtype::ExtID;
use vortex_dtype::ExtMetadata;
use vortex_dtype::Nullability;
use vortex_error::VortexExpect;
use vortex_error::vortex_panic;
Expand Down Expand Up @@ -491,6 +497,14 @@ fn random_action_from_list(
u.choose_iter(actions).copied()
}

fn clone_ext_dtype(
ext_id: ExtID,
storage_dtype: DType,
metadata: Option<ExtMetadata>,
) -> Arc<ExtDType> {
Arc::new(ExtDType::new(ext_id, Arc::new(storage_dtype), metadata))
}

/// Compress an array using the given strategy.
#[cfg(feature = "zstd")]
pub fn compress_array(array: &dyn Array, strategy: CompressorStrategy) -> ArrayRef {
Expand Down Expand Up @@ -699,7 +713,22 @@ pub fn assert_array_eq(
Backtrace::capture(),
));
}

// Also validate the expected array's domain constraints for extension types
if matches!(lhs.dtype(), DType::Extension(..)) {
let validator = validator_for_ext_type(lhs.to_extension().ext_dtype());
if !validator(&l) {
Comment on lines +717 to +720
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I missed this before, but is there a reason why the DomainValidator takes a Scalar and not an ArrayRef? I feel like it makes more sense for the validator to take the entire array and check the values directly instead of requiring the caller to do that for every scalar (note that our current Scalars are super inefficient and slow to get because they usually make one or even several allocations).

return Err(VortexFuzzError::DomainValidationFailed(
l,
idx,
lhs.clone(),
step,
Backtrace::capture(),
));
}
}
}

Ok(())
}

Expand Down
12 changes: 10 additions & 2 deletions fuzz/src/array/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,16 @@ pub fn search_sorted_canonical_array(
let scalar_vals = (0..array.len()).map(|i| array.scalar_at(i)).collect_vec();
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side))
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Extension(..) => {
// Extension arrays delegate search to their storage type
search_sorted_canonical_array(
array.to_extension().storage(),
&scalar.as_extension().storage(),
side,
)
}
DType::Null => {
unreachable!("Cannot search sorted on Null array")
}
}
}
28 changes: 25 additions & 3 deletions fuzz/src/array/slice.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_array::Array;
use vortex_array::ArrayRef;
use vortex_array::IntoArray;
use vortex_array::ToCanonical;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::FixedSizeListArray;
use vortex_array::arrays::ListViewArray;
use vortex_array::arrays::PrimitiveArray;
Expand All @@ -19,6 +19,8 @@ use vortex_dtype::match_each_decimal_value_type;
use vortex_dtype::match_each_native_ptype;
use vortex_error::VortexResult;

use crate::array::clone_ext_dtype;

#[allow(clippy::unnecessary_fallible_conversions)]
pub fn slice_canonical_array(
array: &dyn Array,
Expand Down Expand Up @@ -113,8 +115,28 @@ pub fn slice_canonical_array(
.to_array(),
)
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Extension(ext_dtype) => {
// Extension arrays delegate slicing to their storage type.
let sliced_storage =
slice_canonical_array(array.to_extension().storage(), start, stop)?;

if sliced_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
Ok(ExtensionArray::new(ext_dtype.clone(), sliced_storage).into_array())
} else {
// The storage dtype changed (i.e., became nullable due to slicing).
Ok(ExtensionArray::new(
clone_ext_dtype(
ext_dtype.id().clone(),
sliced_storage.dtype().clone(),
ext_dtype.metadata().cloned(),
),
sliced_storage,
)
.into_array())
}
}
DType::Null => {
unreachable!("Cannot search sorted on Null array")
}
}
}
26 changes: 24 additions & 2 deletions fuzz/src/array/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use vortex_array::ToCanonical;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::VarBinViewArray;
use vortex_dtype::DType;
Expand All @@ -19,6 +20,7 @@ use vortex_dtype::match_each_native_ptype;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;

use crate::array::clone_ext_dtype;
use crate::array::take_canonical_array_non_nullable_indices;

pub fn sort_canonical_array(array: &dyn Array) -> VortexResult<ArrayRef> {
Expand Down Expand Up @@ -80,8 +82,28 @@ pub fn sort_canonical_array(array: &dyn Array) -> VortexResult<ArrayRef> {
});
take_canonical_array_non_nullable_indices(array, &sort_indices)
}
d @ (DType::Null | DType::Extension(_)) => {
unreachable!("DType {d} not supported for fuzzing")
DType::Null => {
// Null arrays don't need sorting - all elements are null
Ok(array.to_array())
}
DType::Extension(ext_dtype) => {
// Extension arrays delegate sorting to their storage type.
let sorted_storage = sort_canonical_array(array.to_extension().storage())?;

if sorted_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
Ok(ExtensionArray::new(ext_dtype.clone(), sorted_storage).into_array())
} else {
// The storage dtype changed (i.e., became nullable due to sorting)
Ok(ExtensionArray::new(
clone_ext_dtype(
ext_dtype.id().clone(),
sorted_storage.dtype().clone(),
ext_dtype.metadata().cloned(),
),
sorted_storage,
)
.into_array())
}
}
}
}
Expand Down
Loading