Skip to content

Commit dc18375

Browse files
chore[fuzzer]: add array casting op (#3583)
Signed-off-by: Joe Isaacs <[email protected]>
1 parent 2f658ec commit dc18375

File tree

7 files changed

+139
-10
lines changed

7 files changed

+139
-10
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ similar = "2.7.0"
159159
simplelog = "0.12"
160160
sketches-ddsketch = "0.3.0"
161161
static_assertions = "1.1"
162+
strum = "0.25"
162163
tabled = { version = "0.19.0", default-features = false }
163164
taffy = "0.8.0"
164165
tar = "0.4"

fuzz/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ arrow-buffer = { workspace = true }
2222
arrow-ord = { workspace = true }
2323
futures-util = { workspace = true }
2424
libfuzzer-sys = { workspace = true }
25+
strum = { workspace = true, features = ["derive"] }
2526
thiserror = { workspace = true }
2627
tokio = { workspace = true, features = ["full"] }
2728
vortex-array = { workspace = true, features = ["arbitrary"] }

fuzz/fuzz_targets/array_ops.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use vortex_array::arrays::{
66
BoolEncoding, ConstantArray, ListEncoding, PrimitiveEncoding, StructEncoding, VarBinEncoding,
77
VarBinViewEncoding,
88
};
9-
use vortex_array::compute::{compare, filter, take};
9+
use vortex_array::compute::{cast, compare, filter, take};
1010
use vortex_array::search_sorted::{SearchResult, SearchSorted, SearchSortedSide};
1111
use vortex_array::{Array, ArrayRef, IntoArray};
1212
use vortex_btrblocks::BtrBlocksCompressor;
@@ -74,6 +74,16 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus {
7474
}
7575
current_array = compare_result;
7676
}
77+
Action::Cast(to) => {
78+
let cast_result = cast(&current_array, &to).vortex_unwrap();
79+
if let Err(e) = assert_array_eq(&expected.array(), &cast_result, i) {
80+
vortex_panic!(
81+
"Failed to cast {} to dtype {to}\nError: {e}",
82+
current_array.tree_display()
83+
)
84+
}
85+
current_array = cast_result;
86+
}
7787
}
7888
}
7989
Corpus::Keep

fuzz/src/cast.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
use vortex_array::arrays::PrimitiveArray;
2+
use vortex_array::validity::Validity;
3+
use vortex_array::{Array, ArrayRef, ToCanonical};
4+
use vortex_buffer::Buffer;
5+
use vortex_dtype::{DType, match_each_integer_ptype};
6+
use vortex_error::VortexResult;
7+
8+
pub fn cast_canonical_array(array: &ArrayRef, target: &DType) -> VortexResult<Option<ArrayRef>> {
9+
// TODO(joe): support more casting options
10+
if !target.is_int() || !array.dtype().is_int() {
11+
return Ok(None);
12+
}
13+
Ok(Some(match_each_integer_ptype!(
14+
array.dtype().as_ptype(),
15+
|In| {
16+
match_each_integer_ptype!(target.as_ptype(), |Out| {
17+
// Since the cast itself would truncate.
18+
#[allow(clippy::cast_possible_truncation)]
19+
PrimitiveArray::new(
20+
array
21+
.to_primitive()?
22+
.as_slice::<In>()
23+
.iter()
24+
.map(|v| *v as Out)
25+
.collect::<Buffer<Out>>(),
26+
Validity::from_mask(array.validity_mask()?, target.nullability()),
27+
)
28+
.to_array()
29+
})
30+
}
31+
)))
32+
}

fuzz/src/lib.rs

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#![feature(error_generic_member_access)]
22

3+
mod cast;
34
mod compare;
45
pub mod error;
56
mod filter;
@@ -10,24 +11,27 @@ mod take;
1011

1112
use std::fmt::Debug;
1213
use std::iter;
13-
use std::ops::{Range, RangeInclusive};
14+
use std::ops::Range;
1415

1516
use libfuzzer_sys::arbitrary::Error::EmptyChoose;
1617
use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured};
1718
pub use sort::sort_canonical_array;
19+
use strum::EnumCount;
1820
use vortex_array::arrays::PrimitiveArray;
1921
use vortex_array::arrays::arbitrary::ArbitraryArray;
20-
use vortex_array::compute::Operator;
22+
use vortex_array::compute::{CastOutcome, Operator, allowed_casting};
2123
use vortex_array::search_sorted::{SearchResult, SearchSortedSide};
2224
use vortex_array::{Array, ArrayRef, IntoArray};
2325
use vortex_btrblocks::BtrBlocksCompressor;
2426
use vortex_dtype::{DType, Nullability};
25-
use vortex_error::{VortexUnwrap, vortex_panic};
27+
use vortex_error::{VortexExpect, VortexUnwrap, vortex_panic};
2628
use vortex_mask::Mask;
2729
use vortex_scalar::Scalar;
2830
use vortex_scalar::arbitrary::random_scalar;
2931
use vortex_utils::aliases::hash_set::HashSet;
3032

33+
use crate::Action::Cast;
34+
use crate::cast::cast_canonical_array;
3135
use crate::compare::compare_canonical_array;
3236
use crate::filter::filter_canonical_array;
3337
use crate::search_sorted::search_sorted_canonical_array;
@@ -62,14 +66,15 @@ pub struct FuzzArrayAction {
6266
pub actions: Vec<(Action, ExpectedValue)>,
6367
}
6468

65-
#[derive(Debug)]
69+
#[derive(Debug, EnumCount)]
6670
pub enum Action {
6771
Compress,
6872
Slice(Range<usize>),
6973
Take(ArrayRef),
7074
SearchSorted(Scalar, SearchSortedSide),
7175
Filter(Mask),
7276
Compare(Scalar, Operator),
77+
Cast(DType),
7378
}
7479

7580
impl<'a> Arbitrary<'a> for FuzzArrayAction {
@@ -188,7 +193,21 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
188193
ExpectedValue::Array(current_array.to_array()),
189194
)
190195
}
191-
_ => unreachable!(),
196+
6 => {
197+
let to: DType = u.arbitrary()?;
198+
if Some(CastOutcome::Infallible) == allowed_casting(current_array.dtype(), &to)
199+
{
200+
return Err(EmptyChoose);
201+
}
202+
let Some(result) = cast_canonical_array(&current_array, &to)
203+
.vortex_expect("should fail to create array")
204+
else {
205+
return Err(EmptyChoose);
206+
};
207+
208+
(Cast(to), ExpectedValue::Array(result))
209+
}
210+
7.. => unreachable!(),
192211
})
193212
}
194213

@@ -217,15 +236,15 @@ fn random_value_from_list(u: &mut Unstructured<'_>, vec: &[usize]) -> Result<usi
217236
u.choose_iter(vec).cloned()
218237
}
219238

220-
const ALL_ACTIONS: RangeInclusive<usize> = 0..=5;
239+
const ALL_ACTIONS: Range<usize> = 0..Action::COUNT;
221240

222241
fn actions_for_dtype(dtype: &DType) -> HashSet<usize> {
223242
match dtype {
224-
// All but compare
225243
DType::Struct(sdt, _) => sdt
226244
.fields()
227245
.map(|child| actions_for_dtype(&child))
228-
.fold((0..=4).collect(), |acc, actions| {
246+
// exclude compare
247+
.fold((0..=4).chain(iter::once(6)).collect(), |acc, actions| {
229248
acc.intersection(&actions).copied().collect()
230249
}),
231250
// Once we support more list operations also recurse here on child dtype

vortex-array/src/compute/cast.rs

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
use std::sync::LazyLock;
22

33
use arcref::ArcRef;
4-
use vortex_dtype::DType;
4+
use vortex_dtype::Nullability::Nullable;
5+
use vortex_dtype::{DType, PType};
56
use vortex_error::{VortexError, VortexResult, vortex_bail, vortex_err};
67

78
use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output};
@@ -137,3 +138,67 @@ impl<V: VTable + CastKernel> Kernel for CastKernelAdapter<V> {
137138
Ok(Some(V::cast(&self.0, array, dtype)?.into()))
138139
}
139140
}
141+
142+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
143+
pub enum CastOutcome {
144+
Fallible,
145+
Infallible,
146+
}
147+
148+
pub fn allowed_casting(from: &DType, to: &DType) -> Option<CastOutcome> {
149+
// Can cast to include nullability
150+
if &from.with_nullability(Nullable) == to {
151+
return Some(CastOutcome::Infallible);
152+
}
153+
match (from, to) {
154+
(DType::Primitive(from_ptype, _), DType::Primitive(to_ptype, _)) => {
155+
allowed_casting_ptype(*from_ptype, *to_ptype)
156+
}
157+
_ => None,
158+
}
159+
}
160+
161+
pub fn allowed_casting_ptype(from: PType, to: PType) -> Option<CastOutcome> {
162+
use CastOutcome::*;
163+
use PType::*;
164+
165+
match (from, to) {
166+
// Identity casts
167+
(a, b) if a == b => Some(Infallible),
168+
169+
// Integer widening (always infallible)
170+
(U8, U16 | U32 | U64)
171+
| (U16, U32 | U64)
172+
| (U32, U64)
173+
| (I8, I16 | I32 | I64)
174+
| (I16, I32 | I64)
175+
| (I32, I64) => Some(Infallible),
176+
177+
// Integer narrowing (may truncate)
178+
(U16 | U32 | U64, U8)
179+
| (U32 | U64, U16)
180+
| (U64, U32)
181+
| (I16 | I32 | I64, I8)
182+
| (I32 | I64, I16)
183+
| (I64, I32) => Some(Fallible),
184+
185+
// Between signed and unsigned (fallible if negative or too big)
186+
(I8 | I16 | I32 | I64, U8 | U16 | U32 | U64)
187+
| (U8 | U16 | U32 | U64, I8 | I16 | I32 | I64) => Some(Fallible),
188+
189+
// TODO(joe): shall we allow float/int casting?
190+
// Integer -> Float
191+
// (U8 | U16 | U32 | U64 | I8 | I16 | I32 | I64, F16 | F32 | F64) => Some(Fallible),
192+
193+
// Float -> Integer (truncates, overflows possible)
194+
// (F16 | F32 | F64, U8 | U16 | U32 | U64 | I8 | I16 | I32 | I64) => Some(Fallible),
195+
196+
// Float widening (safe)
197+
(F16, F32 | F64) | (F32, F64) => Some(Infallible),
198+
199+
// Float narrowing (lossy)
200+
(F64, F32 | F16) | (F32, F16) => Some(Fallible),
201+
202+
_ => None,
203+
}
204+
}

0 commit comments

Comments
 (0)