Skip to content

Commit f9c55df

Browse files
chore[fuzzer]: add projection and filter expressions to the fuzzer (#3586)
I will refactor the fuzz into two [file and array] in a future PR Signed-off-by: Joe Isaacs <[email protected]>
1 parent 61dfc79 commit f9c55df

File tree

7 files changed

+106
-3
lines changed

7 files changed

+106
-3
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fuzz/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ vortex-btrblocks = { workspace = true }
3030
vortex-buffer = { workspace = true }
3131
vortex-dtype = { workspace = true, features = ["arbitrary"] }
3232
vortex-error = { workspace = true }
33+
vortex-expr = { workspace = true, features = ["arbitrary"] }
3334
vortex-file = { workspace = true }
3435
vortex-mask = { workspace = true }
3536
vortex-scalar = { workspace = true, features = ["arbitrary"] }

fuzz/fuzz_targets/file_io.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,25 @@ use arrow_ord::sort::SortOptions;
66
use futures_util::TryStreamExt;
77
use libfuzzer_sys::{Corpus, fuzz_target};
88
use vortex_array::arrays::ChunkedArray;
9-
use vortex_array::arrays::arbitrary::ArbitraryArray;
109
use vortex_array::arrow::IntoArrowArray;
1110
use vortex_array::compute::{Operator, compare};
1211
use vortex_array::{Array, ArrayRef, Canonical, IntoArray, ToCanonical};
1312
use vortex_buffer::ByteBufferMut;
1413
use vortex_dtype::{DType, StructFields};
1514
use vortex_error::{VortexExpect, VortexUnwrap, vortex_panic};
15+
use vortex_expr::root;
1616
use vortex_file::{VortexOpenOptions, VortexWriteOptions};
17+
use vortex_fuzz::FuzzFileAction;
1718
use vortex_utils::aliases::DefaultHashBuilder;
1819
use vortex_utils::aliases::hash_set::HashSet;
1920

20-
fuzz_target!(|array_data: ArbitraryArray| -> Corpus {
21-
let array_data = array_data.0;
21+
fuzz_target!(|fuzz: FuzzFileAction| -> Corpus {
22+
let FuzzFileAction {
23+
array,
24+
projection,
25+
filter,
26+
} = fuzz;
27+
let array_data = array;
2228

2329
if has_nullable_struct(array_data.dtype()) || has_duplicate_field_names(array_data.dtype()) {
2430
return Corpus::Reject;
@@ -41,6 +47,8 @@ fuzz_target!(|array_data: ArbitraryArray| -> Corpus {
4147
.vortex_unwrap()
4248
.scan()
4349
.vortex_unwrap()
50+
.with_projection(projection.unwrap_or_else(|| root()))
51+
.with_some_filter(filter)
4452
.into_array_stream()
4553
.vortex_unwrap()
4654
.try_collect::<Vec<_>>()

fuzz/src/lib.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ use vortex_array::{Array, ArrayRef, IntoArray};
2525
use vortex_btrblocks::BtrBlocksCompressor;
2626
use vortex_dtype::{DType, Nullability};
2727
use vortex_error::{VortexExpect, VortexUnwrap, vortex_panic};
28+
use vortex_expr::ExprRef;
29+
use vortex_expr::arbitrary::{filter_expr, projection_expr};
2830
use vortex_mask::Mask;
2931
use vortex_scalar::Scalar;
3032
use vortex_scalar::arbitrary::random_scalar;
@@ -253,3 +255,22 @@ fn actions_for_dtype(dtype: &DType) -> HashSet<usize> {
253255
_ => ALL_ACTIONS.collect(),
254256
}
255257
}
258+
259+
#[derive(Debug)]
260+
pub struct FuzzFileAction {
261+
pub array: ArrayRef,
262+
pub projection: Option<ExprRef>,
263+
pub filter: Option<ExprRef>,
264+
}
265+
266+
impl<'a> Arbitrary<'a> for FuzzFileAction {
267+
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
268+
let array = ArbitraryArray::arbitrary(u)?.0;
269+
let dtype = array.dtype().clone();
270+
Ok(FuzzFileAction {
271+
array,
272+
projection: projection_expr(u, &dtype)?,
273+
filter: filter_expr(u, &dtype)?,
274+
})
275+
}
276+
}

vortex-expr/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ version = { workspace = true }
1717
workspace = true
1818

1919
[dependencies]
20+
arbitrary = { workspace = true, optional = true }
2021
dyn-hash = { workspace = true }
2122
itertools = { workspace = true }
2223
prost = { workspace = true, optional = true }
@@ -34,6 +35,11 @@ vortex-utils = { workspace = true }
3435
vortex-expr = { path = ".", features = ["test-harness"] }
3536

3637
[features]
38+
arbitrary = [
39+
"dep:arbitrary",
40+
"vortex-scalar/arbitrary",
41+
"vortex-dtype/arbitrary",
42+
]
3743
serde = ["dep:serde", "vortex-dtype/serde", "vortex-error/serde"]
3844
proto = ["vortex-proto/expr", "vortex-error/prost", "dep:prost", "serde"]
3945
test-harness = []

vortex-expr/src/arbitrary.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
use std::cmp::max;
2+
3+
use arbitrary::{Result as AResult, Unstructured};
4+
use vortex_dtype::{DType, FieldName};
5+
use vortex_scalar::arbitrary::random_scalar;
6+
7+
use crate::{BinaryExpr, ExprRef, Operator, and_collect, get_item_scope, lit, pack};
8+
9+
pub fn projection_expr(u: &mut Unstructured<'_>, dtype: &DType) -> AResult<Option<ExprRef>> {
10+
let Some(struct_dtype) = dtype.as_struct() else {
11+
return Ok(None);
12+
};
13+
14+
let column_count = u.int_in_range::<usize>(0..=max(struct_dtype.nfields(), 10))?;
15+
16+
let cols = (0..column_count)
17+
.map(|_| {
18+
let get_item = u.choose(struct_dtype.names().iter().as_slice())?;
19+
Ok((get_item.clone(), get_item_scope(get_item.clone())))
20+
})
21+
.collect::<AResult<Vec<_>>>()?;
22+
23+
Ok(Some(pack(cols, u.arbitrary()?)))
24+
}
25+
26+
pub fn filter_expr(u: &mut Unstructured<'_>, dtype: &DType) -> AResult<Option<ExprRef>> {
27+
let Some(struct_dtype) = dtype.as_struct() else {
28+
return Ok(None);
29+
};
30+
31+
let filter_count = u.int_in_range::<usize>(0..=max(struct_dtype.nfields(), 10))?;
32+
33+
let filters = (0..filter_count)
34+
.map(|_| {
35+
let (col, dtype) =
36+
u.choose_iter(struct_dtype.names().iter().zip(struct_dtype.fields()))?;
37+
random_comparison(u, col, &dtype)
38+
})
39+
.collect::<AResult<Vec<_>>>()?;
40+
41+
Ok(and_collect(filters))
42+
}
43+
44+
fn random_comparison(u: &mut Unstructured<'_>, col: &FieldName, dtype: &DType) -> AResult<ExprRef> {
45+
let scalar = random_scalar(u, dtype)?;
46+
Ok(BinaryExpr::new_expr(
47+
get_item_scope(col.clone()),
48+
arbitrary_comparison_operator(u)?,
49+
lit(scalar),
50+
))
51+
}
52+
53+
fn arbitrary_comparison_operator(u: &mut Unstructured<'_>) -> AResult<Operator> {
54+
Ok(match u.int_in_range(0..=5)? {
55+
0 => Operator::Eq,
56+
1 => Operator::NotEq,
57+
2 => Operator::Gt,
58+
3 => Operator::Gte,
59+
4 => Operator::Lt,
60+
5 => Operator::Lte,
61+
_ => unreachable!("range 0..=5"),
62+
})
63+
}

vortex-expr/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ use dyn_hash::DynHash;
88
mod binary;
99

1010
mod analysis;
11+
#[cfg(feature = "arbitrary")]
12+
pub mod arbitrary;
1113
mod between;
1214
mod cast;
1315
mod field;

0 commit comments

Comments
 (0)