-
Notifications
You must be signed in to change notification settings - Fork 113
Feature: introduces fuzzing with extension arrays #5819
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0e7ab69
5b99fa6
fc25bd6
0bf82e0
ad63b19
49d26e5
981daba
bcf7135
8ef26ef
e06dc9c
b325a56
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,13 +1,13 @@ | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
| // SPDX-FileCopyrightText: Copyright the Vortex contributors | ||
|
|
||
| use vortex_array::ArrayRef; | ||
| use vortex_array::Canonical; | ||
| use vortex_array::IntoArray; | ||
| use vortex_array::ToCanonical; | ||
| use vortex_array::arrays::BoolArray; | ||
| use vortex_array::arrays::ConstantArray; | ||
| use vortex_array::arrays::DecimalArray; | ||
| use vortex_array::arrays::ExtensionArray; | ||
| use vortex_array::arrays::PrimitiveArray; | ||
| use vortex_array::arrays::VarBinViewArray; | ||
| use vortex_array::compute::fill_null; | ||
|
|
@@ -23,6 +23,7 @@ use vortex_error::VortexExpect; | |
| use vortex_error::VortexResult; | ||
| use vortex_scalar::Scalar; | ||
|
|
||
| use crate::array::clone_ext_dtype; | ||
| /// Apply fill_null on the canonical form of the array to get a consistent baseline. | ||
| /// This implementation manually fills null values for each canonical type | ||
| /// without using the fill_null method, to serve as an independent baseline for testing. | ||
|
|
@@ -40,13 +41,35 @@ pub fn fill_null_canonical_array( | |
| Canonical::VarBinView(array) => { | ||
| fill_varbinview_array(&array, fill_value, result_nullability) | ||
| } | ||
| Canonical::Struct(_) | ||
| | Canonical::List(_) | ||
| | Canonical::FixedSizeList(_) | ||
| | Canonical::Extension(_) => fill_null(canonical.as_ref(), fill_value)?, | ||
| Canonical::Struct(_) | Canonical::List(_) | Canonical::FixedSizeList(_) => { | ||
| fill_null(canonical.as_ref(), fill_value)? | ||
| } | ||
| Canonical::Extension(array) => fill_extension_array(&array, fill_value), | ||
| }) | ||
| } | ||
|
|
||
| fn fill_extension_array(array: &ExtensionArray, fill_value: &Scalar) -> ArrayRef { | ||
| let filled_storage = fill_null_canonical_array( | ||
| array.storage().to_canonical(), | ||
| &fill_value.as_extension().storage(), | ||
| ) | ||
| .vortex_expect("fill_null should succeed in canonical form"); | ||
|
|
||
| if filled_storage.dtype().nullability() == array.ext_dtype().storage_dtype().nullability() { | ||
| ExtensionArray::new(array.ext_dtype().clone(), filled_storage).into_array() | ||
| } else { | ||
| ExtensionArray::new( | ||
| clone_ext_dtype( | ||
| array.ext_dtype().id().clone(), | ||
| filled_storage.dtype().clone(), | ||
| array.ext_dtype().metadata().cloned(), | ||
| ), | ||
| filled_storage, | ||
| ) | ||
| .into_array() | ||
| } | ||
| } | ||
|
Comment on lines
+51
to
+71
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you should for now only implemented the fill_null for ext array that are also temporal arrays |
||
|
|
||
| fn fill_bool_array( | ||
| array: &BoolArray, | ||
| fill_value: &Scalar, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ use vortex_array::ToCanonical; | |
| use vortex_array::accessor::ArrayAccessor; | ||
| use vortex_array::arrays::BoolArray; | ||
| use vortex_array::arrays::DecimalArray; | ||
| use vortex_array::arrays::ExtensionArray; | ||
| use vortex_array::arrays::PrimitiveArray; | ||
| use vortex_array::arrays::StructArray; | ||
| use vortex_array::arrays::VarBinViewArray; | ||
|
|
@@ -19,6 +20,7 @@ use vortex_dtype::match_each_decimal_value_type; | |
| use vortex_dtype::match_each_native_ptype; | ||
| use vortex_error::VortexResult; | ||
|
|
||
| use crate::array::clone_ext_dtype; | ||
| use crate::array::take_canonical_array_non_nullable_indices; | ||
|
|
||
| pub fn filter_canonical_array(array: &dyn Array, filter: &[bool]) -> VortexResult<ArrayRef> { | ||
|
|
@@ -115,8 +117,27 @@ pub fn filter_canonical_array(array: &dyn Array, filter: &[bool]) -> VortexResul | |
| } | ||
| take_canonical_array_non_nullable_indices(array, indices.as_slice()) | ||
| } | ||
| d @ (DType::Null | DType::Extension(_)) => { | ||
| unreachable!("DType {d} not supported for fuzzing") | ||
| DType::Extension(ext_dtype) => { | ||
| // Extension arrays delegate filter to their storage type. | ||
| let filtered_storage = filter_canonical_array(array.to_extension().storage(), filter)?; | ||
|
|
||
| if filtered_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() { | ||
| Ok(ExtensionArray::new(ext_dtype.clone(), filtered_storage).into_array()) | ||
| } else { | ||
| // The storage dtype changed (i.e., became nullable due to filtering). | ||
| Ok(ExtensionArray::new( | ||
|
Comment on lines
+122
to
+128
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same for filter |
||
| clone_ext_dtype( | ||
| ext_dtype.id().clone(), | ||
| filtered_storage.dtype().clone(), | ||
| ext_dtype.metadata().cloned(), | ||
| ), | ||
| filtered_storage, | ||
| ) | ||
| .into_array()) | ||
| } | ||
| } | ||
| DType::Null => { | ||
| unreachable!("Cannot search sorted on Null array") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not true |
||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ mod take; | |
|
|
||
| use std::iter; | ||
| use std::ops::Range; | ||
| use std::sync::Arc; | ||
|
|
||
| use arbitrary::Arbitrary; | ||
| use arbitrary::Error::EmptyChoose; | ||
|
|
@@ -40,14 +41,19 @@ pub(crate) use take::*; | |
| use vortex_array::Array; | ||
| use vortex_array::ArrayRef; | ||
| use vortex_array::IntoArray; | ||
| use vortex_array::ToCanonical; | ||
| use vortex_array::arrays::PrimitiveArray; | ||
| use vortex_array::arrays::arbitrary::ArbitraryArray; | ||
| use vortex_array::arrays::validator_for_ext_type; | ||
| use vortex_array::compute::MinMaxResult; | ||
| use vortex_array::compute::Operator; | ||
| use vortex_array::search_sorted::SearchResult; | ||
| use vortex_array::search_sorted::SearchSortedSide; | ||
| use vortex_btrblocks::BtrBlocksCompressor; | ||
| use vortex_dtype::DType; | ||
| use vortex_dtype::ExtDType; | ||
| use vortex_dtype::ExtID; | ||
| use vortex_dtype::ExtMetadata; | ||
| use vortex_dtype::Nullability; | ||
| use vortex_error::VortexExpect; | ||
| use vortex_error::vortex_panic; | ||
|
|
@@ -491,6 +497,14 @@ fn random_action_from_list( | |
| u.choose_iter(actions).copied() | ||
| } | ||
|
|
||
| fn clone_ext_dtype( | ||
| ext_id: ExtID, | ||
| storage_dtype: DType, | ||
| metadata: Option<ExtMetadata>, | ||
| ) -> Arc<ExtDType> { | ||
| Arc::new(ExtDType::new(ext_id, Arc::new(storage_dtype), metadata)) | ||
| } | ||
|
|
||
| /// Compress an array using the given strategy. | ||
| #[cfg(feature = "zstd")] | ||
| pub fn compress_array(array: &dyn Array, strategy: CompressorStrategy) -> ArrayRef { | ||
|
|
@@ -699,7 +713,22 @@ pub fn assert_array_eq( | |
| Backtrace::capture(), | ||
| )); | ||
| } | ||
|
|
||
| // Also validate the expected array's domain constraints for extension types | ||
| if matches!(lhs.dtype(), DType::Extension(..)) { | ||
| let validator = validator_for_ext_type(lhs.to_extension().ext_dtype()); | ||
| if !validator(&l) { | ||
|
Comment on lines
+717
to
+720
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I missed this before, but is there a reason why the |
||
| return Err(VortexFuzzError::DomainValidationFailed( | ||
| l, | ||
| idx, | ||
| lhs.clone(), | ||
| step, | ||
| Backtrace::capture(), | ||
| )); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: periods at the end of sentences (this is true everywhere)