fix: Fuzzer generates valid DecimalArrays (#3424)

robert3005 · web-flow · commit 0a45e8a1b77e · 2025-05-30T19:50:35.000Z
Signed-off-by: Robert Kruszewski &lt;github@robertk.io&gt;
diff --git a/vortex-array/src/arrays/arbitrary.rs b/vortex-array/src/arrays/arbitrary.rs
@@ -7,14 +7,15 @@ use builders::ListBuilder;
 use vortex_buffer::Buffer;
 use vortex_dtype::{DType, NativePType, Nullability, PType};
 use vortex_error::{VortexExpect, VortexUnwrap};
-use vortex_scalar::Scalar;
-use vortex_scalar::arbitrary::random_scalar;
+use vortex_scalar::arbitrary::{random_decimal, random_scalar};
+use vortex_scalar::{Scalar, match_each_decimal_value_type};
 
 use super::{
-    BoolArray, ChunkedArray, DecimalArray, NullArray, OffsetPType, PrimitiveArray, StructArray,
+    BoolArray, ChunkedArray, NullArray, OffsetPType, PrimitiveArray, StructArray,
+    smallest_storage_type,
 };
 use crate::arrays::{VarBinArray, VarBinViewArray};
-use crate::builders::ArrayBuilder;
+use crate::builders::{ArrayBuilder, ArrayBuilderExt, DecimalBuilder};
 use crate::validity::Validity;
 use crate::{Array, ArrayRef, IntoArray, ToCanonical, builders};
 
@@ -69,13 +70,17 @@ fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Resu
                     PType::F64 => random_primitive::<f64>(u, *n, chunk_len),
                 },
                 DType::Decimal(decimal, n) => {
-                    // TODO(aduffy): also do i256.
-                    let chunk: Vec<i128> = arbitrary_vec_of_len(u, chunk_len)?;
-                    let validity = random_validity(u, *n, chunk.len())?;
-                    Ok(
-                        DecimalArray::new(Buffer::from_iter(chunk), *decimal, validity)
-                            .into_array(),
-                    )
+                    let elem_len = u.int_in_range(0..=20)?;
+                    match_each_decimal_value_type!(smallest_storage_type(decimal), |DVT| {
+                        let mut builder =
+                            DecimalBuilder::new::<DVT>(decimal.precision(), decimal.scale(), *n);
+                        for _i in 0..elem_len {
+                            builder
+                                .append_scalar_value(random_decimal(u, decimal)?)
+                                .vortex_unwrap();
+                        }
+                        Ok(builder.finish())
+                    })
                 }
                 DType::Utf8(n) => random_string(u, *n, chunk_len),
                 DType::Binary(n) => random_bytes(u, *n, chunk_len),
diff --git a/vortex-scalar/src/arbitrary/decimal.rs b/vortex-scalar/src/arbitrary/decimal.rs
@@ -1,29 +1,62 @@
+use arbitrary::unstructured::Int;
 use arbitrary::{Result, Unstructured};
+use num_traits::{CheckedAdd, WrappingAdd, WrappingSub};
 use vortex_dtype::{DECIMAL128_MAX_PRECISION, DecimalDType};
-use vortex_error::VortexUnwrap;
 
 use crate::{DecimalValue, InnerScalarValue, ScalarValue, i256};
 
-/// Generate an arbitrary decimal scalar that is confined to the bounds of
+#[allow(clippy::same_name_method)]
+impl Int for i256 {
+    type Unsigned = i256;
+    const ZERO: Self = i256::ZERO;
+    const ONE: Self = i256::ONE;
+    const MAX: Self = i256::MAX;
+
+    fn from_u8(b: u8) -> Self {
+        Self::from_i128(b as i128)
+    }
+
+    fn from_usize(u: usize) -> Self {
+        Self::from_i128(u as i128)
+    }
+
+    fn checked_add(self, rhs: Self) -> Option<Self> {
+        <Self as CheckedAdd>::checked_add(&self, &rhs)
+    }
+
+    fn wrapping_add(self, rhs: Self) -> Self {
+        <Self as WrappingAdd>::wrapping_add(&self, &rhs)
+    }
+
+    fn wrapping_sub(self, rhs: Self) -> Self {
+        <Self as WrappingSub>::wrapping_sub(&self, &rhs)
+    }
+
+    fn to_unsigned(self) -> Self::Unsigned {
+        self
+    }
+
+    fn from_unsigned(unsigned: Self::Unsigned) -> Self {
+        unsigned
+    }
+}
+
+/// Generate an arbitrary decimal scalar confined to the bounds of
 pub fn random_decimal(u: &mut Unstructured, decimal_type: &DecimalDType) -> Result<ScalarValue> {
     let precision = decimal_type.precision();
-    if decimal_type.precision() <= DECIMAL128_MAX_PRECISION {
+    if precision <= DECIMAL128_MAX_PRECISION {
         Ok(ScalarValue(InnerScalarValue::Decimal(DecimalValue::I128(
             u.int_in_range(
                 MIN_DECIMAL128_FOR_EACH_PRECISION[precision as usize]
                     ..=MAX_DECIMAL128_FOR_EACH_PRECISION[precision as usize],
             )?,
         ))))
     } else {
-        // Generate a random i256 value in between the min/max range, inclusive
-        let min = MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize];
-        let max = MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize];
-        let delta = (max - min) + i256::ONE;
-
-        let rand_bytes = i256::from_le_bytes(u.bytes(32)?.try_into().vortex_unwrap());
-        let value = (rand_bytes % delta) + min;
         Ok(ScalarValue(InnerScalarValue::Decimal(DecimalValue::I256(
-            value,
+            u.int_in_range(
+                MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize]
+                    ..=MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize],
+            )?,
         ))))
     }
 }
diff --git a/vortex-scalar/src/arbitrary/mod.rs b/vortex-scalar/src/arbitrary/mod.rs
@@ -4,7 +4,7 @@ use std::iter;
 use std::sync::Arc;
 
 use arbitrary::{Result, Unstructured};
-use decimal::random_decimal;
+pub use decimal::random_decimal;
 use vortex_buffer::{BufferString, ByteBuffer};
 use vortex_dtype::half::f16;
 use vortex_dtype::{DType, PType};
diff --git a/vortex-scalar/src/bigint/mod.rs b/vortex-scalar/src/bigint/mod.rs
@@ -1,10 +1,11 @@
 mod bigcast;
 
 use std::fmt::Display;
-use std::ops::{Add, Div, Mul, Rem, Sub};
+use std::ops::{Add, BitOr, Div, Mul, Rem, Shl, Shr, Sub};
 
 pub use bigcast::*;
-use num_traits::{CheckedAdd, CheckedSub, ConstZero, One, Zero};
+use num_traits::{CheckedAdd, CheckedSub, ConstZero, One, WrappingAdd, WrappingSub, Zero};
+use vortex_error::VortexExpect;
 
 /// Signed 256-bit integer type.
 ///
@@ -15,6 +16,7 @@ use num_traits::{CheckedAdd, CheckedSub, ConstZero, One, Zero};
 #[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Hash, PartialOrd, Ord)]
 pub struct i256(arrow_buffer::i256);
 
+#[allow(clippy::same_name_method)]
 impl i256 {
     pub const ZERO: Self = Self(arrow_buffer::i256::ZERO);
     pub const ONE: Self = Self(arrow_buffer::i256::ONE);
@@ -157,12 +159,60 @@ impl CheckedAdd for i256 {
     }
 }
 
+impl WrappingAdd for i256 {
+    fn wrapping_add(&self, v: &Self) -> Self {
+        Self(self.0.wrapping_add(v.0))
+    }
+}
+
 impl CheckedSub for i256 {
     fn checked_sub(&self, v: &Self) -> Option<Self> {
         self.0.checked_sub(v.0).map(Self)
     }
 }
 
+impl WrappingSub for i256 {
+    fn wrapping_sub(&self, v: &Self) -> Self {
+        Self(self.0.wrapping_sub(v.0))
+    }
+}
+
+impl Shr<Self> for i256 {
+    type Output = Self;
+
+    fn shr(self, rhs: Self) -> Self::Output {
+        use num_traits::ToPrimitive;
+
+        Self(
+            self.0.shr(
+                rhs.0
+                    .to_u8()
+                    .vortex_expect("Can't shift more than 256 bits"),
+            ),
+        )
+    }
+}
+
+impl Shl<usize> for i256 {
+    type Output = Self;
+
+    fn shl(self, rhs: usize) -> Self::Output {
+        use num_traits::ToPrimitive;
+        Self(
+            self.0
+                .shl(rhs.to_u8().vortex_expect("Can't shift more than 256 bits")),
+        )
+    }
+}
+
+impl BitOr<Self> for i256 {
+    type Output = Self;
+
+    fn bitor(self, rhs: Self) -> Self::Output {
+        Self(self.0.bitor(rhs.0))
+    }
+}
+
 impl num_traits::ToPrimitive for i256 {
     fn to_i64(&self) -> Option<i64> {
         self.maybe_i128().and_then(|v| v.to_i64())
diff --git a/vortex-scalar/src/decimal.rs b/vortex-scalar/src/decimal.rs
@@ -1,6 +1,6 @@
 use std::cmp::Ordering;
 use std::fmt;
-use std::fmt::{Display, Formatter};
+use std::fmt::{Debug, Display, Formatter};
 use std::hash::Hash;
 
 use vortex_dtype::{DType, DecimalDType, Nullability};
@@ -148,20 +148,7 @@ impl Hash for DecimalValue {
 
 /// Type of decimal scalar values.
 pub trait NativeDecimalType:
-    Copy
-    + Eq
-    + Ord
-    + Default
-    + Send
-    + Sync
-    + BigCast
-    // + AsPrimitive<i8>
-    // + AsPrimitive<i16>
-    // + AsPrimitive<i32>
-    // + AsPrimitive<i64>
-    // + AsPrimitive<i128>
-    // + AsPrimitive<i256>
-    + 'static
+    Copy + Eq + Ord + Default + Send + Sync + BigCast + Debug + Display + 'static
 {
     const VALUES_TYPE: DecimalValueType;