Skip to content

Commit 0e7ab69

Browse files
committed
feat: introduces fuzzing with extension arrays
Signed-off-by: Pratham Agarwal <[email protected]>
1 parent c7b0ef9 commit 0e7ab69

File tree

11 files changed

+233
-26
lines changed

11 files changed

+233
-26
lines changed

fuzz/src/array/compare.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,16 @@ pub fn compare_canonical_array(array: &dyn Array, value: &Scalar, operator: Oper
123123
)
124124
.into_array()
125125
}
126-
d @ (DType::Null | DType::Extension(_)) => {
127-
unreachable!("DType {d} not supported for fuzzing")
126+
DType::Null => {
127+
unreachable!("DType null not supported for fuzzing")
128+
}
129+
DType::Extension(..) => {
130+
// Extension arrays delegate comparison to their storage type
131+
compare_canonical_array(
132+
array.to_extension().storage(),
133+
&value.as_extension().storage(),
134+
operator,
135+
)
128136
}
129137
}
130138
}

fuzz/src/array/fill_null.rs

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use std::sync::Arc;
5+
46
use vortex_array::ArrayRef;
57
use vortex_array::Canonical;
68
use vortex_array::IntoArray;
79
use vortex_array::ToCanonical;
810
use vortex_array::arrays::BoolArray;
911
use vortex_array::arrays::ConstantArray;
1012
use vortex_array::arrays::DecimalArray;
13+
use vortex_array::arrays::ExtensionArray;
1114
use vortex_array::arrays::PrimitiveArray;
1215
use vortex_array::arrays::VarBinViewArray;
1316
use vortex_array::compute::fill_null;
@@ -16,13 +19,13 @@ use vortex_array::vtable::ValidityHelper;
1619
use vortex_buffer::Buffer;
1720
use vortex_buffer::BufferMut;
1821
use vortex_dtype::DType;
22+
use vortex_dtype::ExtDType;
1923
use vortex_dtype::Nullability;
2024
use vortex_dtype::match_each_decimal_value_type;
2125
use vortex_dtype::match_each_native_ptype;
2226
use vortex_error::VortexExpect;
2327
use vortex_error::VortexResult;
2428
use vortex_scalar::Scalar;
25-
2629
/// Apply fill_null on the canonical form of the array to get a consistent baseline.
2730
/// This implementation manually fills null values for each canonical type
2831
/// without using the fill_null method, to serve as an independent baseline for testing.
@@ -40,13 +43,33 @@ pub fn fill_null_canonical_array(
4043
Canonical::VarBinView(array) => {
4144
fill_varbinview_array(&array, fill_value, result_nullability)
4245
}
43-
Canonical::Struct(_)
44-
| Canonical::List(_)
45-
| Canonical::FixedSizeList(_)
46-
| Canonical::Extension(_) => fill_null(canonical.as_ref(), fill_value)?,
46+
Canonical::Extension(array) => fill_extension_array(&array, fill_value),
47+
Canonical::Struct(_) | Canonical::List(_) | Canonical::FixedSizeList(_) => {
48+
fill_null(canonical.as_ref(), fill_value)?
49+
}
4750
})
4851
}
4952

53+
fn fill_extension_array(array: &ExtensionArray, fill_value: &Scalar) -> ArrayRef {
54+
let filled_storage = fill_null_canonical_array(
55+
array.storage().to_canonical(),
56+
&fill_value.as_extension().storage(),
57+
)
58+
.vortex_expect("fill_null should succeed in canonical form");
59+
60+
if filled_storage.dtype().nullability() == array.ext_dtype().storage_dtype().nullability() {
61+
ExtensionArray::new(array.ext_dtype().clone(), filled_storage).into_array()
62+
} else {
63+
let new_ext_dtype = Arc::new(ExtDType::new(
64+
array.ext_dtype().id().clone(),
65+
Arc::new(filled_storage.dtype().clone()),
66+
array.ext_dtype().metadata().cloned(),
67+
));
68+
69+
ExtensionArray::new(new_ext_dtype, filled_storage).into_array()
70+
}
71+
}
72+
5073
fn fill_bool_array(
5174
array: &BoolArray,
5275
fill_value: &Scalar,

fuzz/src/array/filter.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use std::sync::Arc;
5+
46
use vortex_array::Array;
57
use vortex_array::ArrayRef;
68
use vortex_array::IntoArray;
79
use vortex_array::ToCanonical;
810
use vortex_array::accessor::ArrayAccessor;
911
use vortex_array::arrays::BoolArray;
1012
use vortex_array::arrays::DecimalArray;
13+
use vortex_array::arrays::ExtensionArray;
1114
use vortex_array::arrays::PrimitiveArray;
1215
use vortex_array::arrays::StructArray;
1316
use vortex_array::arrays::VarBinViewArray;
1417
use vortex_array::validity::Validity;
1518
use vortex_buffer::BitBuffer;
1619
use vortex_buffer::Buffer;
1720
use vortex_dtype::DType;
21+
use vortex_dtype::ExtDType;
1822
use vortex_dtype::match_each_decimal_value_type;
1923
use vortex_dtype::match_each_native_ptype;
2024
use vortex_error::VortexResult;
@@ -115,8 +119,23 @@ pub fn filter_canonical_array(array: &dyn Array, filter: &[bool]) -> VortexResul
115119
}
116120
take_canonical_array_non_nullable_indices(array, indices.as_slice())
117121
}
118-
d @ (DType::Null | DType::Extension(_)) => {
119-
unreachable!("DType {d} not supported for fuzzing")
122+
DType::Extension(ext_dtype) => {
123+
// Extension arrays delegate filter to their storage type
124+
let filtered_storage = filter_canonical_array(array.to_extension().storage(), filter)?;
125+
126+
if filtered_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
127+
Ok(ExtensionArray::new(ext_dtype.clone(), filtered_storage).into_array())
128+
} else {
129+
let new_ext_dtype = Arc::new(ExtDType::new(
130+
ext_dtype.id().clone(),
131+
Arc::new(filtered_storage.dtype().clone()),
132+
ext_dtype.metadata().cloned(),
133+
));
134+
Ok(ExtensionArray::new(new_ext_dtype, filtered_storage).into_array())
135+
}
136+
}
137+
DType::Null => {
138+
unreachable!("Cannot search sorted on Null array")
120139
}
121140
}
122141
}

fuzz/src/array/search_sorted.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,16 @@ pub fn search_sorted_canonical_array(
129129
let scalar_vals = (0..array.len()).map(|i| array.scalar_at(i)).collect_vec();
130130
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side))
131131
}
132-
d @ (DType::Null | DType::Extension(_)) => {
133-
unreachable!("DType {d} not supported for fuzzing")
132+
DType::Extension(..) => {
133+
// Extension arrays delegate search to their storage type
134+
search_sorted_canonical_array(
135+
array.to_extension().storage(),
136+
&scalar.as_extension().storage(),
137+
side,
138+
)
139+
}
140+
DType::Null => {
141+
unreachable!("Cannot search sorted on Null array")
134142
}
135143
}
136144
}

fuzz/src/array/slice.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
use std::sync::Arc;
34

45
use vortex_array::Array;
56
use vortex_array::ArrayRef;
@@ -8,13 +9,15 @@ use vortex_array::ToCanonical;
89
use vortex_array::accessor::ArrayAccessor;
910
use vortex_array::arrays::BoolArray;
1011
use vortex_array::arrays::DecimalArray;
12+
use vortex_array::arrays::ExtensionArray;
1113
use vortex_array::arrays::FixedSizeListArray;
1214
use vortex_array::arrays::ListViewArray;
1315
use vortex_array::arrays::PrimitiveArray;
1416
use vortex_array::arrays::StructArray;
1517
use vortex_array::arrays::VarBinViewArray;
1618
use vortex_array::validity::Validity;
1719
use vortex_dtype::DType;
20+
use vortex_dtype::ExtDType;
1821
use vortex_dtype::match_each_decimal_value_type;
1922
use vortex_dtype::match_each_native_ptype;
2023
use vortex_error::VortexResult;
@@ -113,8 +116,24 @@ pub fn slice_canonical_array(
113116
.to_array(),
114117
)
115118
}
116-
d @ (DType::Null | DType::Extension(_)) => {
117-
unreachable!("DType {d} not supported for fuzzing")
119+
DType::Extension(ext_dtype) => {
120+
// Extension arrays delegate slicing to their storage type
121+
let sliced_storage =
122+
slice_canonical_array(array.to_extension().storage(), start, stop)?;
123+
124+
if sliced_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
125+
Ok(ExtensionArray::new(ext_dtype.clone(), sliced_storage).into_array())
126+
} else {
127+
let new_ext_dtype = Arc::new(ExtDType::new(
128+
ext_dtype.id().clone(),
129+
Arc::new(sliced_storage.dtype().clone()),
130+
ext_dtype.metadata().cloned(),
131+
));
132+
Ok(ExtensionArray::new(new_ext_dtype, sliced_storage).into_array())
133+
}
134+
}
135+
DType::Null => {
136+
unreachable!("Cannot search sorted on Null array")
118137
}
119138
}
120139
}

fuzz/src/array/sort.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::cmp::Ordering;
5+
use std::sync::Arc;
56

67
use vortex_array::Array;
78
use vortex_array::ArrayRef;
@@ -10,9 +11,11 @@ use vortex_array::ToCanonical;
1011
use vortex_array::accessor::ArrayAccessor;
1112
use vortex_array::arrays::BoolArray;
1213
use vortex_array::arrays::DecimalArray;
14+
use vortex_array::arrays::ExtensionArray;
1315
use vortex_array::arrays::PrimitiveArray;
1416
use vortex_array::arrays::VarBinViewArray;
1517
use vortex_dtype::DType;
18+
use vortex_dtype::ExtDType;
1619
use vortex_dtype::NativePType;
1720
use vortex_dtype::match_each_decimal_value_type;
1821
use vortex_dtype::match_each_native_ptype;
@@ -80,8 +83,24 @@ pub fn sort_canonical_array(array: &dyn Array) -> VortexResult<ArrayRef> {
8083
});
8184
take_canonical_array_non_nullable_indices(array, &sort_indices)
8285
}
83-
d @ (DType::Null | DType::Extension(_)) => {
84-
unreachable!("DType {d} not supported for fuzzing")
86+
DType::Null => {
87+
// Null arrays don't need sorting - all elements are null
88+
Ok(array.to_array())
89+
}
90+
DType::Extension(ext_dtype) => {
91+
// Extension arrays delegate sorting to their storage type
92+
let sorted_storage = sort_canonical_array(array.to_extension().storage())?;
93+
94+
if sorted_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
95+
Ok(ExtensionArray::new(ext_dtype.clone(), sorted_storage).into_array())
96+
} else {
97+
let new_ext_dtype = Arc::new(ExtDType::new(
98+
ext_dtype.id().clone(),
99+
Arc::new(sorted_storage.dtype().clone()),
100+
ext_dtype.metadata().cloned(),
101+
));
102+
Ok(ExtensionArray::new(new_ext_dtype, sorted_storage).into_array())
103+
}
85104
}
86105
}
87106
}

fuzz/src/array/take.rs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use std::sync::Arc;
5+
46
use vortex_array::Array;
57
use vortex_array::ArrayRef;
68
use vortex_array::IntoArray;
79
use vortex_array::ToCanonical;
810
use vortex_array::accessor::ArrayAccessor;
911
use vortex_array::arrays::BoolArray;
1012
use vortex_array::arrays::DecimalArray;
13+
use vortex_array::arrays::ExtensionArray;
1114
use vortex_array::arrays::PrimitiveArray;
1215
use vortex_array::arrays::StructArray;
1316
use vortex_array::arrays::VarBinViewArray;
@@ -16,14 +19,14 @@ use vortex_array::validity::Validity;
1619
use vortex_buffer::Buffer;
1720
use vortex_dtype::DType;
1821
use vortex_dtype::DecimalDType;
22+
use vortex_dtype::ExtDType;
1923
use vortex_dtype::NativeDecimalType;
2024
use vortex_dtype::NativePType;
2125
use vortex_dtype::Nullability;
2226
use vortex_dtype::match_each_decimal_value_type;
2327
use vortex_dtype::match_each_native_ptype;
2428
use vortex_error::VortexExpect;
2529
use vortex_error::VortexResult;
26-
2730
pub fn take_canonical_array_non_nullable_indices(
2831
array: &dyn Array,
2932
indices: &[usize],
@@ -141,8 +144,23 @@ pub fn take_canonical_array(
141144
}
142145
Ok(builder.finish())
143146
}
144-
d @ (DType::Null | DType::Extension(_)) => {
145-
unreachable!("DType {d} not supported for fuzzing")
147+
DType::Extension(ext_dtype) => {
148+
// Extension arrays delegate take to their storage type
149+
let taken_storage = take_canonical_array(array.to_extension().storage(), indices)?;
150+
151+
if taken_storage.dtype().nullability() == ext_dtype.storage_dtype().nullability() {
152+
Ok(ExtensionArray::new(ext_dtype.clone(), taken_storage).into_array())
153+
} else {
154+
let new_ext_dtype = Arc::new(ExtDType::new(
155+
ext_dtype.id().clone(),
156+
Arc::new(taken_storage.dtype().clone()),
157+
ext_dtype.metadata().cloned(),
158+
));
159+
Ok(ExtensionArray::new(new_ext_dtype, taken_storage).into_array())
160+
}
161+
}
162+
DType::Null => {
163+
unreachable!("Null type not supported for fuzzing")
146164
}
147165
}
148166
}

vortex-array/src/arrays/arbitrary.rs

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use arbitrary::Unstructured;
1010
use vortex_buffer::BitBuffer;
1111
use vortex_buffer::Buffer;
1212
use vortex_dtype::DType;
13+
use vortex_dtype::ExtDType;
1314
use vortex_dtype::IntegerPType;
1415
use vortex_dtype::NativePType;
1516
use vortex_dtype::Nullability;
@@ -164,12 +165,42 @@ fn random_array_chunk(
164165
DType::FixedSizeList(elem_dtype, list_size, null) => {
165166
random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
166167
}
167-
DType::Extension(..) => {
168-
todo!("Extension arrays are not implemented")
169-
}
168+
DType::Extension(ext_dtype) => random_extension(u, ext_dtype, chunk_len),
170169
}
171170
}
172171

172+
/// Creates a random extension array.
173+
///
174+
/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
175+
fn random_extension(
176+
u: &mut Unstructured,
177+
ext_dtype: &Arc<ExtDType>,
178+
chunk_len: Option<usize>,
179+
) -> Result<ArrayRef> {
180+
use crate::builders::ExtensionBuilder;
181+
182+
// Determine array length
183+
let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
184+
185+
// Create builder for the extension array
186+
let mut builder = ExtensionBuilder::with_capacity(ext_dtype.clone(), array_length);
187+
188+
// Generate random values
189+
for _ in 0..array_length {
190+
// Wrap in extension scalar using Scalar::extension()
191+
let ext_scalar = Scalar::extension(
192+
ext_dtype.clone(),
193+
random_scalar(u, ext_dtype.storage_dtype())?,
194+
);
195+
196+
// Append to builder
197+
builder
198+
.append_scalar(&ext_scalar)
199+
.vortex_expect("can append extension scalar");
200+
}
201+
202+
Ok(builder.finish())
203+
}
173204
/// Creates a random fixed-size list array.
174205
///
175206
/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.

0 commit comments

Comments
 (0)