Skip to content

Commit e67f256

Browse files
authored
Fuzzer cleanup (#2107)
1 parent eb208c3 commit e67f256

File tree

8 files changed

+134
-149
lines changed

8 files changed

+134
-149
lines changed

fuzz/fuzz_targets/array_ops.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus {
4242
}
4343
Action::SearchSorted(s, side) => {
4444
// TODO(robert): Ideally we'd preserve the encoding perfectly but this is close enough
45-
let mut sorted = sort_canonical_array(&current_array);
45+
let mut sorted = sort_canonical_array(&current_array).unwrap();
4646
if !HashSet::from([
4747
&PrimitiveEncoding as EncodingRef,
4848
&VarBinEncoding,
@@ -85,8 +85,8 @@ fn assert_search_sorted(
8585
) {
8686
let search_result = search_sorted(&array, s.clone(), side).unwrap();
8787
assert_eq!(
88-
search_result,
8988
expected,
89+
search_result,
9090
"Expected to find {s}({}) at {expected} in {} from {side} but instead found it at {search_result} in step {step}",
9191
s.dtype(),
9292
array.tree_display()

fuzz/fuzz_targets/file_io.rs

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use libfuzzer_sys::{fuzz_target, Corpus};
99
use vortex_array::array::ChunkedArray;
1010
use vortex_array::compute::{compare, Operator};
1111
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant, IntoCanonical};
12-
use vortex_dtype::Nullability;
12+
use vortex_dtype::DType;
1313
use vortex_file::{Scan, VortexOpenOptions, VortexWriteOptions};
1414
use vortex_sampling_compressor::ALL_ENCODINGS_CONTEXT;
1515

@@ -18,7 +18,7 @@ fuzz_target!(|array_data: ArrayData| -> Corpus {
1818
return Corpus::Reject;
1919
}
2020

21-
if array_data.dtype().nullability() != Nullability::NonNullable {
21+
if has_nullable_struct(array_data.dtype()) {
2222
return Corpus::Reject;
2323
}
2424

@@ -68,25 +68,31 @@ fuzz_target!(|array_data: ArrayData| -> Corpus {
6868
Corpus::Keep
6969
});
7070

71-
fn compare_struct(lhs: ArrayData, rhs: ArrayData) {
72-
assert!(lhs.dtype().eq_ignore_nullability(rhs.dtype()));
73-
assert_eq!(lhs.len(), rhs.len(), "Arrays length isn't preserved");
71+
fn compare_struct(expected: ArrayData, actual: ArrayData) {
72+
assert_eq!(
73+
expected.dtype(),
74+
actual.dtype(),
75+
"DTypes aren't preserved expected {}, actual {}",
76+
expected.dtype(),
77+
actual.dtype()
78+
);
79+
assert_eq!(
80+
expected.len(),
81+
actual.len(),
82+
"Arrays length isn't preserved expected: {}, actual: {}",
83+
expected.len(),
84+
actual.len()
85+
);
7486

75-
if lhs.dtype().as_struct().unwrap().names().len() == 0
76-
&& lhs.dtype().as_struct().unwrap().names().len()
77-
== rhs.dtype().as_struct().unwrap().names().len()
87+
if expected.dtype().as_struct().unwrap().names().len() == 0
88+
&& expected.dtype().as_struct().unwrap().names().len()
89+
== actual.dtype().as_struct().unwrap().names().len()
7890
{
7991
return;
8092
}
8193

82-
if let Some(st) = lhs.dtype().as_struct() {
83-
if st.names().len() == 0 {
84-
return;
85-
}
86-
}
87-
88-
let arrow_lhs = lhs.clone().into_arrow().unwrap();
89-
let arrow_rhs = rhs.clone().into_arrow().unwrap();
94+
let arrow_lhs = expected.clone().into_arrow().unwrap();
95+
let arrow_rhs = actual.clone().into_arrow().unwrap();
9096

9197
let cmp_fn = make_comparator(&arrow_lhs, &arrow_rhs, SortOptions::default()).unwrap();
9298

@@ -99,8 +105,16 @@ fn compare_struct(lhs: ArrayData, rhs: ArrayData) {
99105
assert_eq!(
100106
bool_builder.finish().count_set_bits(),
101107
arrow_lhs.len(),
102-
"\nLHS: {}RHS: {}",
103-
lhs.tree_display(),
104-
rhs.tree_display()
108+
"\nEXPECTED: {}ACTUAL: {}",
109+
expected.tree_display(),
110+
actual.tree_display()
105111
);
106112
}
113+
114+
fn has_nullable_struct(dtype: &DType) -> bool {
115+
dtype.is_nullable()
116+
|| dtype
117+
.as_struct()
118+
.map(|sdt| sdt.dtypes().any(|dtype| has_nullable_struct(&dtype)))
119+
.unwrap_or(false)
120+
}

fuzz/src/filter.rs

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,16 @@ use vortex_array::variants::StructArrayTrait;
55
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
66
use vortex_buffer::Buffer;
77
use vortex_dtype::{match_each_native_ptype, DType};
8-
use vortex_error::VortexExpect;
8+
use vortex_error::VortexResult;
99

1010
use crate::take::take_canonical_array;
1111

12-
pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
12+
pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> VortexResult<ArrayData> {
1313
let validity = if array.dtype().is_nullable() {
1414
let validity_buff = array
15-
.logical_validity()
16-
.vortex_expect("Failed to get logical validity")
15+
.logical_validity()?
1716
.into_array()
18-
.into_bool()
19-
.unwrap()
17+
.into_bool()?
2018
.boolean_buffer();
2119
Validity::from_iter(
2220
filter
@@ -31,7 +29,7 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
3129

3230
match array.dtype() {
3331
DType::Bool(_) => {
34-
let bool_array = array.clone().into_bool().unwrap();
32+
let bool_array = array.clone().into_bool()?;
3533
BoolArray::try_new(
3634
BooleanBuffer::from_iter(
3735
filter
@@ -42,12 +40,11 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
4240
),
4341
validity,
4442
)
45-
.vortex_expect("Validity length cannot mismatch")
46-
.into_array()
43+
.map(|a| a.into_array())
4744
}
4845
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
49-
let primitive_array = array.clone().into_primitive().unwrap();
50-
PrimitiveArray::new(
46+
let primitive_array = array.clone().into_primitive()?;
47+
Ok(PrimitiveArray::new(
5148
filter
5249
.iter()
5350
.zip(primitive_array.as_slice::<$P>().iter().copied())
@@ -56,35 +53,32 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
5653
.collect::<Buffer<_>>(),
5754
validity,
5855
)
59-
.into_array()
56+
.into_array())
6057
}),
6158
DType::Utf8(_) | DType::Binary(_) => {
62-
let utf8 = array.clone().into_varbinview().unwrap();
63-
let values = utf8
64-
.with_iterator(|iter| {
65-
iter.zip(filter.iter())
66-
.filter(|(_, f)| **f)
67-
.map(|(v, _)| v.map(|u| u.to_vec()))
68-
.collect::<Vec<_>>()
69-
})
70-
.unwrap();
71-
VarBinViewArray::from_iter(values, array.dtype().clone()).into_array()
59+
let utf8 = array.clone().into_varbinview()?;
60+
let values = utf8.with_iterator(|iter| {
61+
iter.zip(filter.iter())
62+
.filter(|(_, f)| **f)
63+
.map(|(v, _)| v.map(|u| u.to_vec()))
64+
.collect::<Vec<_>>()
65+
})?;
66+
Ok(VarBinViewArray::from_iter(values, array.dtype().clone()).into_array())
7267
}
7368
DType::Struct(..) => {
74-
let struct_array = array.clone().into_struct().unwrap();
69+
let struct_array = array.clone().into_struct()?;
7570
let filtered_children = struct_array
7671
.children()
7772
.map(|c| filter_canonical_array(&c, filter))
78-
.collect::<Vec<_>>();
73+
.collect::<VortexResult<Vec<_>>>()?;
7974

8075
StructArray::try_new(
8176
struct_array.names().clone(),
8277
filtered_children,
8378
filter.iter().filter(|b| **b).map(|b| *b as usize).sum(),
8479
validity,
8580
)
86-
.unwrap()
87-
.into_array()
81+
.map(|a| a.into_array())
8882
}
8983
DType::List(..) => {
9084
let mut indices = Vec::new();

fuzz/src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
9191
1 => {
9292
let start = u.choose_index(current_array.len())?;
9393
let stop = u.int_in_range(start..=current_array.len())?;
94-
current_array = slice_canonical_array(&current_array, start, stop);
94+
current_array = slice_canonical_array(&current_array, start, stop).unwrap();
9595

9696
(
9797
Action::Slice(start..stop),
@@ -104,7 +104,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
104104
}
105105

106106
let indices = random_vec_in_range(u, 0, current_array.len() - 1)?;
107-
current_array = take_canonical_array(&current_array, &indices);
107+
current_array = take_canonical_array(&current_array, &indices).unwrap();
108108
let indices_array = indices
109109
.iter()
110110
.map(|i| *i as u64)
@@ -129,7 +129,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
129129
return Err(EmptyChoose);
130130
}
131131

132-
let sorted = sort_canonical_array(&current_array);
132+
let sorted = sort_canonical_array(&current_array).unwrap();
133133

134134
let side = if u.arbitrary()? {
135135
SearchSortedSide::Left
@@ -147,7 +147,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
147147
let mask = (0..current_array.len())
148148
.map(|_| bool::arbitrary(u))
149149
.collect::<Result<Vec<_>>>()?;
150-
current_array = filter_canonical_array(&current_array, &mask);
150+
current_array = filter_canonical_array(&current_array, &mask).unwrap();
151151
(
152152
Action::Filter(Mask::from_iter(mask)),
153153
ExpectedValue::Array(current_array.clone()),

fuzz/src/search_sorted.rs

Lines changed: 20 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::cmp::Ordering;
2+
use std::fmt::Debug;
23

34
use vortex_array::accessor::ArrayAccessor;
45
use vortex_array::compute::{
@@ -13,18 +14,10 @@ use vortex_scalar::Scalar;
1314

1415
struct SearchNullableSlice<T>(Vec<Option<T>>);
1516

16-
impl<T: PartialOrd> IndexOrd<Option<T>> for SearchNullableSlice<T> {
17+
impl<T: PartialOrd + Debug> IndexOrd<Option<T>> for SearchNullableSlice<T> {
1718
fn index_cmp(&self, idx: usize, elem: &Option<T>) -> Option<Ordering> {
18-
match elem {
19-
None => unreachable!("Can't search for None"),
20-
Some(v) => {
21-
// SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds
22-
match unsafe { self.0.get_unchecked(idx) } {
23-
None => Some(Ordering::Greater),
24-
Some(i) => i.partial_cmp(v),
25-
}
26-
}
27-
}
19+
// SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds
20+
unsafe { self.0.get_unchecked(idx) }.partial_cmp(elem)
2821
}
2922
}
3023

@@ -43,7 +36,7 @@ impl<T: NativePType> IndexOrd<Option<T>> for SearchPrimitiveSlice<T> {
4336
Some(v) => {
4437
// SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds
4538
match unsafe { self.0.get_unchecked(idx) } {
46-
None => Some(Ordering::Greater),
39+
None => Some(Ordering::Less),
4740
Some(i) => Some(i.total_compare(*v)),
4841
}
4942
}
@@ -64,19 +57,19 @@ pub fn search_sorted_canonical_array(
6457
) -> VortexResult<SearchResult> {
6558
match array.dtype() {
6659
DType::Bool(_) => {
67-
let bool_array = array.clone().into_bool().unwrap();
60+
let bool_array = array.clone().into_bool()?;
6861
let validity = bool_array.logical_validity()?.to_boolean_buffer();
6962
let opt_values = bool_array
7063
.boolean_buffer()
7164
.iter()
7265
.zip(validity.iter())
7366
.map(|(b, v)| v.then_some(b))
7467
.collect::<Vec<_>>();
75-
let to_find = scalar.try_into().unwrap();
68+
let to_find = scalar.try_into()?;
7669
Ok(SearchNullableSlice(opt_values).search_sorted(&Some(to_find), side))
7770
}
7871
DType::Primitive(p, _) => {
79-
let primitive_array = array.clone().into_primitive().unwrap();
72+
let primitive_array = array.clone().into_primitive()?;
8073
let validity = primitive_array.logical_validity()?.to_boolean_buffer();
8174
match_each_native_ptype!(p, |$P| {
8275
let opt_values = primitive_array
@@ -86,37 +79,32 @@ pub fn search_sorted_canonical_array(
8679
.zip(validity.iter())
8780
.map(|(b, v)| v.then_some(b))
8881
.collect::<Vec<_>>();
89-
let to_find: $P = scalar.try_into().unwrap();
82+
let to_find: $P = scalar.try_into()?;
9083
Ok(SearchPrimitiveSlice(opt_values).search_sorted(&Some(to_find), side))
9184
})
9285
}
9386
DType::Utf8(_) | DType::Binary(_) => {
94-
let utf8 = array.clone().into_varbinview().unwrap();
95-
let opt_values = utf8
96-
.with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::<Vec<_>>())
97-
.unwrap();
87+
let utf8 = array.clone().into_varbinview()?;
88+
let opt_values =
89+
utf8.with_iterator(|iter| iter.map(|v| v.map(|u| u.to_vec())).collect::<Vec<_>>())?;
9890
let to_find = if matches!(array.dtype(), DType::Utf8(_)) {
99-
BufferString::try_from(scalar)
100-
.unwrap()
101-
.as_str()
102-
.as_bytes()
103-
.to_vec()
91+
BufferString::try_from(scalar)?.as_str().as_bytes().to_vec()
10492
} else {
105-
ByteBuffer::try_from(scalar).unwrap().to_vec()
93+
ByteBuffer::try_from(scalar)?.to_vec()
10694
};
10795
Ok(SearchNullableSlice(opt_values).search_sorted(&Some(to_find), side))
10896
}
10997
DType::Struct(..) => {
11098
let scalar_vals = (0..array.len())
111-
.map(|i| scalar_at(array, i).unwrap())
112-
.collect::<Vec<_>>();
113-
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype()).unwrap(), side))
99+
.map(|i| scalar_at(array, i))
100+
.collect::<VortexResult<Vec<_>>>()?;
101+
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side))
114102
}
115103
DType::List(..) => {
116104
let scalar_vals = (0..array.len())
117-
.map(|i| scalar_at(array, i).unwrap())
118-
.collect::<Vec<_>>();
119-
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype()).unwrap(), side))
105+
.map(|i| scalar_at(array, i))
106+
.collect::<VortexResult<Vec<_>>>()?;
107+
Ok(scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side))
120108
}
121109
_ => unreachable!("Not a canonical array"),
122110
}

0 commit comments

Comments
 (0)