Skip to content

Commit 4c88db1

Browse files
chore[fuzzer]: move array and file ops into their own modules (#3595)
1 parent f9c55df commit 4c88db1

File tree

10 files changed

+284
-274
lines changed

10 files changed

+284
-274
lines changed
File renamed without changes.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use vortex_dtype::{DType, match_each_native_ptype};
99
use vortex_error::VortexResult;
1010
use vortex_scalar::match_each_decimal_value_type;
1111

12-
use crate::take::take_canonical_array_non_nullable_indices;
12+
use crate::array::take_canonical_array_non_nullable_indices;
1313

1414
pub fn filter_canonical_array(array: &dyn Array, filter: &[bool]) -> VortexResult<ArrayRef> {
1515
let validity = if array.dtype().is_nullable() {

fuzz/src/array/mod.rs

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
mod cast;
2+
mod compare;
3+
mod filter;
4+
mod search_sorted;
5+
mod slice;
6+
mod sort;
7+
mod take;
8+
9+
use std::iter;
10+
use std::ops::Range;
11+
12+
pub(crate) use cast::*;
13+
pub(crate) use compare::*;
14+
pub(crate) use filter::*;
15+
use libfuzzer_sys::arbitrary::Error::EmptyChoose;
16+
use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured};
17+
pub(crate) use search_sorted::*;
18+
pub(crate) use slice::*;
19+
pub use sort::sort_canonical_array;
20+
use strum::EnumCount;
21+
pub(crate) use take::*;
22+
use vortex_array::arrays::PrimitiveArray;
23+
use vortex_array::arrays::arbitrary::ArbitraryArray;
24+
use vortex_array::compute::{CastOutcome, Operator, allowed_casting};
25+
use vortex_array::search_sorted::{SearchResult, SearchSortedSide};
26+
use vortex_array::{ArrayRef, IntoArray};
27+
use vortex_btrblocks::BtrBlocksCompressor;
28+
use vortex_dtype::{DType, Nullability};
29+
use vortex_error::{VortexExpect, VortexUnwrap, vortex_panic};
30+
use vortex_mask::Mask;
31+
use vortex_scalar::Scalar;
32+
use vortex_scalar::arbitrary::random_scalar;
33+
use vortex_utils::aliases::hash_set::HashSet;
34+
35+
use crate::array::Action::Cast;
36+
37+
#[derive(Debug)]
38+
pub struct FuzzArrayAction {
39+
pub array: ArrayRef,
40+
pub actions: Vec<(Action, ExpectedValue)>,
41+
}
42+
43+
#[derive(Debug, EnumCount)]
44+
pub enum Action {
45+
Compress,
46+
Slice(Range<usize>),
47+
Take(ArrayRef),
48+
SearchSorted(Scalar, SearchSortedSide),
49+
Filter(Mask),
50+
Compare(Scalar, Operator),
51+
Cast(DType),
52+
}
53+
54+
#[derive(Debug)]
55+
pub enum ExpectedValue {
56+
Array(ArrayRef),
57+
Search(SearchResult),
58+
}
59+
60+
impl ExpectedValue {
61+
pub fn array(self) -> ArrayRef {
62+
match self {
63+
ExpectedValue::Array(array) => array,
64+
_ => vortex_panic!("expected array"),
65+
}
66+
}
67+
68+
pub fn search(self) -> SearchResult {
69+
match self {
70+
ExpectedValue::Search(s) => s,
71+
_ => vortex_panic!("expected search"),
72+
}
73+
}
74+
}
75+
76+
const ALL_ACTIONS: Range<usize> = 0..Action::COUNT;
77+
78+
impl<'a> Arbitrary<'a> for FuzzArrayAction {
79+
fn arbitrary(u: &mut Unstructured<'a>) -> libfuzzer_sys::arbitrary::Result<Self> {
80+
let array = ArbitraryArray::arbitrary(u)?.0;
81+
let mut current_array = array.to_array();
82+
83+
let mut valid_actions = actions_for_dtype(current_array.dtype())
84+
.into_iter()
85+
.collect::<Vec<_>>();
86+
valid_actions.sort_unstable();
87+
88+
let mut actions = Vec::new();
89+
let action_count = u.int_in_range(1..=4)?;
90+
for _ in 0..action_count {
91+
actions.push(match random_value_from_list(u, valid_actions.as_slice())? {
92+
0 => {
93+
if actions
94+
.last()
95+
.map(|(l, _)| matches!(l, Action::Compress))
96+
.unwrap_or(false)
97+
{
98+
return Err(EmptyChoose);
99+
}
100+
(
101+
Action::Compress,
102+
ExpectedValue::Array(current_array.to_array()),
103+
)
104+
}
105+
1 => {
106+
let start = u.choose_index(current_array.len())?;
107+
let stop = u.int_in_range(start..=current_array.len())?;
108+
current_array =
109+
slice_canonical_array(&current_array, start, stop).vortex_unwrap();
110+
111+
(
112+
Action::Slice(start..stop),
113+
ExpectedValue::Array(current_array.to_array()),
114+
)
115+
}
116+
2 => {
117+
if current_array.is_empty() {
118+
return Err(EmptyChoose);
119+
}
120+
121+
let indices = random_vec_in_range(u, 0, current_array.len() - 1)?;
122+
current_array = take_canonical_array(&current_array, &indices).vortex_unwrap();
123+
let indices_array = PrimitiveArray::from_option_iter(
124+
indices.iter().map(|i| i.map(|i| i as u64)),
125+
)
126+
.into_array();
127+
128+
let compressed = BtrBlocksCompressor.compress(&indices_array).vortex_unwrap();
129+
(
130+
Action::Take(compressed),
131+
ExpectedValue::Array(current_array.to_array()),
132+
)
133+
}
134+
3 => {
135+
if current_array.dtype().is_struct() {
136+
return Err(EmptyChoose);
137+
}
138+
139+
let scalar = if u.arbitrary()? {
140+
current_array
141+
.scalar_at(u.choose_index(current_array.len())?)
142+
.vortex_unwrap()
143+
} else {
144+
random_scalar(u, current_array.dtype())?
145+
};
146+
147+
if scalar.is_null() {
148+
return Err(EmptyChoose);
149+
}
150+
151+
let sorted = sort_canonical_array(&current_array).vortex_unwrap();
152+
153+
let side = if u.arbitrary()? {
154+
SearchSortedSide::Left
155+
} else {
156+
SearchSortedSide::Right
157+
};
158+
(
159+
Action::SearchSorted(scalar.clone(), side),
160+
ExpectedValue::Search(
161+
search_sorted_canonical_array(&sorted, &scalar, side).vortex_unwrap(),
162+
),
163+
)
164+
}
165+
4 => {
166+
let mask = (0..current_array.len())
167+
.map(|_| bool::arbitrary(u))
168+
.collect::<libfuzzer_sys::arbitrary::Result<Vec<_>>>()?;
169+
current_array = filter_canonical_array(&current_array, &mask).vortex_unwrap();
170+
(
171+
Action::Filter(Mask::from_iter(mask)),
172+
ExpectedValue::Array(current_array.to_array()),
173+
)
174+
}
175+
5 => {
176+
let scalar = if u.arbitrary()? {
177+
current_array
178+
.scalar_at(u.choose_index(current_array.len())?)
179+
.vortex_unwrap()
180+
} else {
181+
// We can compare arrays with different nullability
182+
let null: Nullability = u.arbitrary()?;
183+
random_scalar(u, &current_array.dtype().union_nullability(null))?
184+
};
185+
186+
let op = u.arbitrary()?;
187+
current_array =
188+
compare_canonical_array(&current_array, &scalar, op).vortex_unwrap();
189+
(
190+
Action::Compare(scalar, op),
191+
ExpectedValue::Array(current_array.to_array()),
192+
)
193+
}
194+
6 => {
195+
let to: DType = u.arbitrary()?;
196+
if Some(CastOutcome::Infallible) == allowed_casting(current_array.dtype(), &to)
197+
{
198+
return Err(EmptyChoose);
199+
}
200+
let Some(result) = cast_canonical_array(&current_array, &to)
201+
.vortex_expect("should fail to create array")
202+
else {
203+
return Err(EmptyChoose);
204+
};
205+
206+
(Cast(to), ExpectedValue::Array(result))
207+
}
208+
7.. => unreachable!(),
209+
})
210+
}
211+
212+
Ok(Self { array, actions })
213+
}
214+
}
215+
216+
fn actions_for_dtype(dtype: &DType) -> HashSet<usize> {
217+
match dtype {
218+
DType::Struct(sdt, _) => sdt
219+
.fields()
220+
.map(|child| actions_for_dtype(&child))
221+
// exclude compare
222+
.fold((0..=4).chain(iter::once(6)).collect(), |acc, actions| {
223+
acc.intersection(&actions).copied().collect()
224+
}),
225+
// Once we support more list operations also recurse here on child dtype
226+
// compress, slice
227+
DType::List(..) => [0, 1].into_iter().collect(),
228+
_ => ALL_ACTIONS.collect(),
229+
}
230+
}
231+
232+
fn random_vec_in_range(
233+
u: &mut Unstructured<'_>,
234+
min: usize,
235+
max: usize,
236+
) -> libfuzzer_sys::arbitrary::Result<Vec<Option<usize>>> {
237+
iter::from_fn(|| {
238+
u.arbitrary().unwrap_or(false).then(|| {
239+
if u.arbitrary()? {
240+
Ok(None)
241+
} else {
242+
Ok(Some(u.int_in_range(min..=max)?))
243+
}
244+
})
245+
})
246+
.collect::<libfuzzer_sys::arbitrary::Result<Vec<_>>>()
247+
}
248+
249+
fn random_value_from_list(
250+
u: &mut Unstructured<'_>,
251+
vec: &[usize],
252+
) -> libfuzzer_sys::arbitrary::Result<usize> {
253+
u.choose_iter(vec).cloned()
254+
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use vortex_dtype::{DType, NativePType, match_each_native_ptype};
77
use vortex_error::{VortexExpect, VortexResult, VortexUnwrap};
88
use vortex_scalar::match_each_decimal_value_type;
99

10-
use crate::take::take_canonical_array_non_nullable_indices;
10+
use crate::array::take_canonical_array_non_nullable_indices;
1111

1212
pub fn sort_canonical_array(array: &dyn Array) -> VortexResult<ArrayRef> {
1313
match array.dtype() {
File renamed without changes.

fuzz/src/file/mod.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured};
2+
use vortex_array::ArrayRef;
3+
use vortex_array::arrays::arbitrary::ArbitraryArray;
4+
use vortex_expr::ExprRef;
5+
use vortex_expr::arbitrary::{filter_expr, projection_expr};
6+
7+
#[derive(Debug)]
8+
pub struct FuzzFileAction {
9+
pub array: ArrayRef,
10+
pub projection: Option<ExprRef>,
11+
pub filter: Option<ExprRef>,
12+
}
13+
14+
impl<'a> Arbitrary<'a> for FuzzFileAction {
15+
fn arbitrary(u: &mut Unstructured<'a>) -> libfuzzer_sys::arbitrary::Result<Self> {
16+
let array = ArbitraryArray::arbitrary(u)?.0;
17+
let dtype = array.dtype().clone();
18+
Ok(FuzzFileAction {
19+
array,
20+
projection: projection_expr(u, &dtype)?,
21+
filter: filter_expr(u, &dtype)?,
22+
})
23+
}
24+
}

0 commit comments

Comments
 (0)