Skip to content

Commit 8d2fcd9

Browse files
Support list in the fuzzer (only for implemented actions) (#1735)
This allows the fuzzer to support array where only some of the compute functions are implemented
1 parent 29da540 commit 8d2fcd9

File tree

13 files changed

+195
-41
lines changed

13 files changed

+195
-41
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fuzz/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ cargo-fuzz = true
1919

2020
[dependencies]
2121
libfuzzer-sys = { workspace = true }
22+
arrow-buffer = { workspace = true }
2223
vortex-array = { workspace = true, features = ["arbitrary"] }
2324
vortex-buffer = { workspace = true }
2425
vortex-dtype = { workspace = true, features = ["arbitrary"] }

fuzz/fuzz_targets/array_ops.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
use libfuzzer_sys::{fuzz_target, Corpus};
44
use vortex_array::aliases::hash_set::HashSet;
55
use vortex_array::array::{
6-
BoolEncoding, PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
6+
BoolEncoding, ListEncoding, PrimitiveEncoding, StructEncoding, VarBinEncoding,
7+
VarBinViewEncoding,
78
};
89
use vortex_array::compute::{
910
filter, scalar_at, search_sorted, slice, take, SearchResult, SearchSortedSide,
@@ -48,6 +49,7 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus {
4849
&VarBinViewEncoding,
4950
&BoolEncoding,
5051
&StructEncoding,
52+
&ListEncoding,
5153
])
5254
.contains(&current_array.encoding())
5355
{

fuzz/src/filter.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ use vortex_buffer::Buffer;
77
use vortex_dtype::{match_each_native_ptype, DType};
88
use vortex_error::VortexExpect;
99

10+
use crate::take::take_canonical_array;
11+
1012
pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
1113
let validity = if array.dtype().is_nullable() {
1214
let validity_buff = array
@@ -83,6 +85,15 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
8385
.unwrap()
8486
.into_array()
8587
}
88+
DType::List(..) => {
89+
let mut indices = Vec::new();
90+
for (idx, bool) in filter.iter().enumerate() {
91+
if *bool {
92+
indices.push(idx);
93+
}
94+
}
95+
take_canonical_array(array, &indices)
96+
}
8697
_ => unreachable!("Not a canonical array"),
8798
}
8899
}

fuzz/src/lib.rs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,15 @@ mod take;
66

77
use std::fmt::Debug;
88
use std::iter;
9-
use std::ops::Range;
9+
use std::ops::{Range, RangeInclusive};
1010

1111
use libfuzzer_sys::arbitrary::Error::EmptyChoose;
1212
use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured};
1313
pub use sort::sort_canonical_array;
14+
use vortex_array::aliases::hash_set::HashSet;
15+
use vortex_array::array::ListEncoding;
1416
use vortex_array::compute::{scalar_at, FilterMask, SearchResult, SearchSortedSide};
17+
use vortex_array::encoding::{Encoding, EncodingRef};
1518
use vortex_array::{ArrayDType, ArrayData, IntoArrayData};
1619
use vortex_buffer::Buffer;
1720
use vortex_sampling_compressor::SamplingCompressor;
@@ -64,10 +67,13 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
6467
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
6568
let array = ArrayData::arbitrary(u)?;
6669
let mut current_array = array.clone();
70+
71+
let valid_actions = actions_for_array(&current_array);
72+
6773
let mut actions = Vec::new();
6874
let action_count = u.int_in_range(1..=4)?;
6975
for _ in 0..action_count {
70-
actions.push(match u.int_in_range(0..=4)? {
76+
actions.push(match random_value_from_list(u, valid_actions.as_slice())? {
7177
0 => {
7278
if actions
7379
.last()
@@ -164,3 +170,28 @@ fn random_vec_in_range(u: &mut Unstructured<'_>, min: usize, max: usize) -> Resu
164170
})
165171
.collect::<Result<Vec<_>>>()
166172
}
173+
174+
fn random_value_from_list(u: &mut Unstructured<'_>, vec: &[usize]) -> Result<usize> {
175+
u.choose_iter(vec).cloned()
176+
}
177+
178+
const ALL_ACTIONS: RangeInclusive<usize> = 0..=4;
179+
180+
fn actions_for_encoding(encoding: EncodingRef) -> HashSet<usize> {
181+
if ListEncoding::ID == encoding.id() {
182+
// compress, slice and filter
183+
vec![0, 1, 4].into_iter().collect()
184+
} else {
185+
ALL_ACTIONS.collect()
186+
}
187+
}
188+
189+
fn actions_for_array(array: &ArrayData) -> Vec<usize> {
190+
array
191+
.depth_first_traversal()
192+
.map(|child| actions_for_encoding(child.encoding()))
193+
.fold(ALL_ACTIONS.collect::<Vec<_>>(), |mut acc, actions| {
194+
acc.retain(|a| actions.contains(a));
195+
acc
196+
})
197+
}

fuzz/src/search_sorted.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ pub fn search_sorted_canonical_array(
121121
.collect::<Vec<_>>();
122122
scalar_vals.search_sorted(&scalar.cast(array.dtype()).unwrap(), side)
123123
}
124+
DType::List(..) => {
125+
let scalar_vals = (0..array.len())
126+
.map(|i| scalar_at(array, i).unwrap())
127+
.collect::<Vec<_>>();
128+
scalar_vals.search_sorted(&scalar.cast(array.dtype()).unwrap(), side)
129+
}
124130
_ => unreachable!("Not a canonical array"),
125131
}
126132
}

fuzz/src/slice.rs

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
use arrow_buffer::ArrowNativeType;
12
use vortex_array::accessor::ArrayAccessor;
2-
use vortex_array::array::{BoolArray, PrimitiveArray, StructArray, VarBinViewArray};
3+
use vortex_array::array::{BoolArray, ListArray, PrimitiveArray, StructArray, VarBinViewArray};
34
use vortex_array::validity::{ArrayValidity, Validity};
4-
use vortex_array::variants::StructArrayTrait;
5-
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
6-
use vortex_dtype::{match_each_native_ptype, DType};
5+
use vortex_array::variants::{PrimitiveArrayTrait, StructArrayTrait};
6+
use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant};
7+
use vortex_dtype::{match_each_native_ptype, DType, NativePType};
78
use vortex_error::VortexExpect;
89

910
pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> ArrayData {
@@ -28,10 +29,12 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar
2829
.vortex_expect("Validity length cannot mismatch")
2930
.into_array()
3031
}
31-
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
32+
DType::Primitive(p, _) => {
3233
let primitive_array = array.clone().into_primitive().unwrap();
33-
PrimitiveArray::new(primitive_array.buffer::<$P>().slice(start..stop), validity).into_array()
34-
}),
34+
match_each_native_ptype!(p, |$P| {
35+
PrimitiveArray::new(primitive_array.buffer::<$P>().slice(start..stop), validity).into_array()
36+
})
37+
}
3538
DType::Utf8(_) | DType::Binary(_) => {
3639
let utf8 = array.clone().into_varbinview().unwrap();
3740
let values = utf8
@@ -55,6 +58,34 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar
5558
.unwrap()
5659
.into_array()
5760
}
61+
DType::List(..) => {
62+
let list_array = array.clone().into_list().unwrap();
63+
let offsets = slice_canonical_array(&list_array.offsets(), start, stop + 1)
64+
.into_primitive()
65+
.unwrap();
66+
67+
let elements = slice_canonical_array(
68+
&list_array.elements(),
69+
offsets.get_as_cast::<u64>(0) as usize,
70+
offsets.get_as_cast::<u64>(offsets.len() - 1) as usize,
71+
);
72+
let offsets = match_each_native_ptype!(offsets.ptype(), |$P| {
73+
shift_offsets::<$P>(offsets)
74+
})
75+
.into_array();
76+
ListArray::try_new(elements, offsets, validity)
77+
.unwrap()
78+
.into_array()
79+
}
5880
_ => unreachable!("Not a canonical array"),
5981
}
6082
}
83+
84+
fn shift_offsets<O: NativePType + ArrowNativeType>(offsets: PrimitiveArray) -> PrimitiveArray {
85+
if offsets.is_empty() {
86+
return offsets;
87+
}
88+
let offsets: Vec<O> = offsets.as_slice().to_vec();
89+
let start = offsets[0];
90+
PrimitiveArray::from_iter(offsets.into_iter().map(|o| o - start).collect::<Vec<_>>())
91+
}

fuzz/src/sort.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,17 @@ pub fn sort_canonical_array(array: &ArrayData) -> ArrayData {
7070
});
7171
take_canonical_array(array, &sort_indices)
7272
}
73-
_ => unreachable!("Not a canonical array"),
73+
DType::List(..) => {
74+
let mut sort_indices = (0..array.len()).collect::<Vec<_>>();
75+
sort_indices.sort_by(|a, b| {
76+
scalar_at(array, *a)
77+
.unwrap()
78+
.partial_cmp(&scalar_at(array, *b).unwrap())
79+
.unwrap()
80+
});
81+
take_canonical_array(array, &sort_indices)
82+
}
83+
a => unreachable!("Not a canonical array {:?}", a),
7484
}
7585
}
7686

fuzz/src/take.rs

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
use arrow_buffer::ArrowNativeType;
12
use vortex_array::accessor::ArrayAccessor;
23
use vortex_array::array::{BoolArray, PrimitiveArray, StructArray, VarBinViewArray};
4+
use vortex_array::builders::{builder_with_capacity, ArrayBuilderExt};
5+
use vortex_array::compute::scalar_at;
36
use vortex_array::validity::{ArrayValidity, Validity};
47
use vortex_array::variants::StructArrayTrait;
58
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
69
use vortex_buffer::Buffer;
7-
use vortex_dtype::{match_each_native_ptype, DType};
10+
use vortex_dtype::{match_each_native_ptype, DType, NativePType};
811
use vortex_error::VortexExpect;
912

1013
pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData {
@@ -31,16 +34,12 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData {
3134
.vortex_expect("Validity length cannot mismatch")
3235
.into_array()
3336
}
34-
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
37+
DType::Primitive(p, _) => {
3538
let primitive_array = array.clone().into_primitive().unwrap();
36-
let vec_values = primitive_array
37-
.as_slice::<$P>()
38-
.iter()
39-
.copied()
40-
.collect::<Vec<_>>();
41-
PrimitiveArray::new(indices.iter().map(|i| vec_values[*i]).collect::<Buffer<$P>>(), validity)
42-
.into_array()
43-
}),
39+
match_each_native_ptype!(p, |$P| {
40+
take_primitive::<$P>(primitive_array, validity, indices)
41+
})
42+
}
4443
DType::Utf8(_) | DType::Binary(_) => {
4544
let utf8 = array.clone().into_varbinview().unwrap();
4645
let values = utf8
@@ -68,6 +67,31 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData {
6867
.unwrap()
6968
.into_array()
7069
}
70+
DType::List(..) => {
71+
let mut builder = builder_with_capacity(array.dtype(), indices.len());
72+
for idx in indices {
73+
builder
74+
.append_scalar(&scalar_at(array, *idx).unwrap())
75+
.unwrap();
76+
}
77+
builder.finish().unwrap()
78+
}
7179
_ => unreachable!("Not a canonical array"),
7280
}
7381
}
82+
83+
fn take_primitive<T: NativePType + ArrowNativeType>(
84+
primitive_array: PrimitiveArray,
85+
validity: Validity,
86+
indices: &[usize],
87+
) -> ArrayData {
88+
let vec_values = primitive_array.as_slice::<T>().to_vec();
89+
PrimitiveArray::new(
90+
indices
91+
.iter()
92+
.map(|i| vec_values[*i])
93+
.collect::<Buffer<T>>(),
94+
validity,
95+
)
96+
.into_array()
97+
}

vortex-array/src/array/arbitrary.rs

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Resu
8888
.vortex_unwrap()
8989
.into_array())
9090
}
91-
DType::List(ldt, n) => random_list(u, ldt, n),
91+
DType::List(ldt, n) => random_list(u, ldt, n, chunk_len),
9292
DType::Extension(..) => {
9393
todo!("Extension arrays are not implemented")
9494
}
@@ -106,14 +106,19 @@ fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Resu
106106
}
107107
}
108108

109-
fn random_list(u: &mut Unstructured, ldt: &Arc<DType>, n: &Nullability) -> Result<ArrayData> {
109+
fn random_list(
110+
u: &mut Unstructured,
111+
ldt: &Arc<DType>,
112+
n: &Nullability,
113+
chunk_len: Option<usize>,
114+
) -> Result<ArrayData> {
110115
match u.int_in_range(0..=5)? {
111-
0 => random_list_offset::<i16>(u, ldt, n),
112-
1 => random_list_offset::<i32>(u, ldt, n),
113-
2 => random_list_offset::<i64>(u, ldt, n),
114-
3 => random_list_offset::<u16>(u, ldt, n),
115-
4 => random_list_offset::<u32>(u, ldt, n),
116-
5 => random_list_offset::<u64>(u, ldt, n),
116+
0 => random_list_offset::<i16>(u, ldt, n, chunk_len),
117+
1 => random_list_offset::<i32>(u, ldt, n, chunk_len),
118+
2 => random_list_offset::<i64>(u, ldt, n, chunk_len),
119+
3 => random_list_offset::<u16>(u, ldt, n, chunk_len),
120+
4 => random_list_offset::<u32>(u, ldt, n, chunk_len),
121+
5 => random_list_offset::<u64>(u, ldt, n, chunk_len),
117122
_ => unreachable!("int_in_range returns a value in the above range"),
118123
}
119124
}
@@ -122,14 +127,15 @@ fn random_list_offset<O>(
122127
u: &mut Unstructured,
123128
ldt: &Arc<DType>,
124129
n: &Nullability,
130+
chunk_len: Option<usize>,
125131
) -> Result<ArrayData>
126132
where
127133
O: PrimInt + NativePType,
128134
Scalar: From<O>,
129135
usize: AsPrimitive<O>,
130136
{
131-
let list_len = u.int_in_range(0..=20)?;
132-
let mut builder = ListBuilder::<O>::with_capacity(ldt.clone(), *n, 1);
137+
let list_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
138+
let mut builder = ListBuilder::<O>::with_capacity(ldt.clone(), *n, 10);
133139
for _ in 0..list_len {
134140
if matches!(n, Nullability::Nullable) || u.arbitrary::<bool>()? {
135141
let elem_len = u.int_in_range(0..=20)?;

0 commit comments

Comments
 (0)