fix[array]: invalid arbitrary decimal scalar value (#5143)

joseph-isaacs · web-flow · commit a008ea6785bd · 2025-11-03T13:45:04.000Z
Signed-off-by: Joe Isaacs &lt;joe.isaacs@live.co.uk&gt;
diff --git a/encodings/sparse/src/canonical.rs b/encodings/sparse/src/canonical.rs
@@ -8,7 +8,7 @@ use num_traits::NumCast;
 use vortex_array::arrays::binary_view::BinaryView;
 use vortex_array::arrays::{
     BoolArray, ConstantArray, FixedSizeListArray, ListViewArray, NullArray, PrimitiveArray,
-    StructArray, VarBinViewArray, smallest_decimal_value_type,
+    StructArray, VarBinViewArray,
 };
 use vortex_array::builders::{
     ArrayBuilder, DecimalBuilder, ListViewBuilder, builder_with_capacity,
@@ -19,8 +19,8 @@ use vortex_array::vtable::{CanonicalVTable, ValidityHelper};
 use vortex_array::{Array, Canonical, ToCanonical};
 use vortex_buffer::{BitBuffer, Buffer, BufferString, ByteBuffer, buffer, buffer_mut};
 use vortex_dtype::{
-    DType, DecimalDType, IntegerPType, NativeDecimalType, NativePType, Nullability, StructFields,
-    match_each_integer_ptype, match_each_native_ptype,
+    DType, DecimalDType, DecimalType, IntegerPType, NativeDecimalType, NativePType, Nullability,
+    StructFields, match_each_integer_ptype, match_each_native_ptype,
 };
 use vortex_error::{VortexError, VortexExpect, vortex_panic};
 use vortex_scalar::{
@@ -58,7 +58,8 @@ impl CanonicalVTable<SparseVTable> for SparseVTable {
                 array.len(),
             ),
             DType::Decimal(decimal_dtype, nullability) => {
-                let canonical_decimal_value_type = smallest_decimal_value_type(decimal_dtype);
+                let canonical_decimal_value_type =
+                    DecimalType::smallest_decimal_value_type(decimal_dtype);
                 let fill_value = array.fill_scalar().as_decimal();
                 match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
                     canonicalize_sparse_decimal::<D>(
diff --git a/vortex-array/src/arrays/arbitrary.rs b/vortex-array/src/arrays/arbitrary.rs
@@ -12,7 +12,7 @@ use vortex_scalar::arbitrary::random_scalar;
 use vortex_scalar::{Scalar, match_each_decimal_value_type};
 
 use super::{BoolArray, ChunkedArray, NullArray, PrimitiveArray, StructArray};
-use crate::arrays::{VarBinArray, VarBinViewArray, smallest_decimal_value_type};
+use crate::arrays::{VarBinArray, VarBinViewArray};
 use crate::builders::{ArrayBuilder, DecimalBuilder, FixedSizeListBuilder, ListViewBuilder};
 use crate::validity::Validity;
 use crate::{Array, ArrayRef, IntoArray, ToCanonical};
@@ -95,17 +95,20 @@ fn random_array_chunk(
         },
         DType::Decimal(decimal, n) => {
             let elem_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
-            match_each_decimal_value_type!(smallest_decimal_value_type(decimal), |DVT| {
-                let mut builder =
-                    DecimalBuilder::new::<DVT>(decimal.precision(), decimal.scale(), *n);
-                for _i in 0..elem_len {
-                    let random_decimal = random_scalar(u, &DType::Decimal(*decimal, *n))?;
-                    builder.append_scalar(&random_decimal).vortex_expect(
-                        "was somehow unable to append a decimal to a decimal builder",
-                    );
+            match_each_decimal_value_type!(
+                DecimalType::smallest_decimal_value_type(decimal),
+                |DVT| {
+                    let mut builder =
+                        DecimalBuilder::new::<DVT>(decimal.precision(), decimal.scale(), *n);
+                    for _i in 0..elem_len {
+                        let random_decimal = random_scalar(u, &DType::Decimal(*decimal, *n))?;
+                        builder.append_scalar(&random_decimal).vortex_expect(
+                            "was somehow unable to append a decimal to a decimal builder",
+                        );
+                    }
+                    Ok(builder.finish())
                 }
-                Ok(builder.finish())
-            })
+            )
         }
         DType::Utf8(n) => random_string(u, *n, chunk_len),
         DType::Binary(n) => random_bytes(u, *n, chunk_len),
diff --git a/vortex-array/src/arrays/constant/vtable/canonical.rs b/vortex-array/src/arrays/constant/vtable/canonical.rs
@@ -4,7 +4,7 @@
 use std::sync::Arc;
 
 use vortex_buffer::{BitBuffer, Buffer, buffer};
-use vortex_dtype::{DType, Nullability, match_each_native_ptype};
+use vortex_dtype::{DType, DecimalType, Nullability, match_each_native_ptype};
 use vortex_error::VortexExpect;
 use vortex_scalar::{
     BinaryScalar, BoolScalar, DecimalValue, ExtScalar, ListScalar, Scalar, StructScalar,
@@ -16,7 +16,7 @@ use crate::arrays::constant::ConstantArray;
 use crate::arrays::primitive::PrimitiveArray;
 use crate::arrays::{
     BoolArray, ConstantVTable, DecimalArray, ExtensionArray, FixedSizeListArray, ListViewArray,
-    NullArray, StructArray, VarBinViewArray, smallest_decimal_value_type,
+    NullArray, StructArray, VarBinViewArray,
 };
 use crate::builders::builder_with_capacity;
 use crate::validity::Validity;
@@ -66,7 +66,7 @@ impl CanonicalVTable<ConstantVTable> for ConstantVTable {
                 })
             }
             DType::Decimal(decimal_type, ..) => {
-                let size = smallest_decimal_value_type(decimal_type);
+                let size = DecimalType::smallest_decimal_value_type(decimal_type);
                 let decimal = scalar.as_decimal();
                 let Some(value) = decimal.decimal_value() else {
                     let all_null = match_each_decimal_value_type!(size, |D| {
diff --git a/vortex-array/src/arrays/decimal/array.rs b/vortex-array/src/arrays/decimal/array.rs
@@ -11,7 +11,6 @@ use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
 use vortex_scalar::match_each_decimal_value_type;
 
 use crate::ToCanonical;
-use crate::arrays::is_compatible_decimal_value_type;
 use crate::patches::Patches;
 use crate::stats::ArrayStats;
 use crate::validity::Validity;
@@ -279,7 +278,7 @@ where
     PatchDVT: NativeDecimalType,
     ValuesDVT: NativeDecimalType,
 {
-    if !is_compatible_decimal_value_type(ValuesDVT::DECIMAL_TYPE, decimal_dtype) {
+    if !ValuesDVT::DECIMAL_TYPE.is_compatible_decimal_value_type(decimal_dtype) {
         vortex_panic!(
             "patch_typed: {:?} cannot represent every value in {}.",
             ValuesDVT::DECIMAL_TYPE,
diff --git a/vortex-array/src/arrays/decimal/compute/sum.rs b/vortex-array/src/arrays/decimal/compute/sum.rs
@@ -3,13 +3,13 @@
 
 use arrow_schema::DECIMAL256_MAX_PRECISION;
 use num_traits::AsPrimitive;
-use vortex_dtype::DecimalDType;
 use vortex_dtype::Nullability::Nullable;
+use vortex_dtype::{DecimalDType, DecimalType};
 use vortex_error::{VortexResult, vortex_bail};
 use vortex_mask::Mask;
 use vortex_scalar::{DecimalValue, Scalar, match_each_decimal_value_type};
 
-use crate::arrays::{DecimalArray, DecimalVTable, smallest_decimal_value_type};
+use crate::arrays::{DecimalArray, DecimalVTable};
 use crate::compute::{SumKernel, SumKernelAdapter};
 use crate::register_kernel;
 
@@ -54,7 +54,7 @@ impl SumKernel for DecimalVTable {
                 vortex_bail!("invalid state, all-null array should be checked by top-level sum fn")
             }
             Mask::AllTrue(_) => {
-                let values_type = smallest_decimal_value_type(&return_dtype);
+                let values_type = DecimalType::smallest_decimal_value_type(&return_dtype);
                 match_each_decimal_value_type!(array.values_type(), |I| {
                     match_each_decimal_value_type!(values_type, |O| {
                         Ok(Scalar::decimal(
@@ -66,7 +66,7 @@ impl SumKernel for DecimalVTable {
                 })
             }
             Mask::Values(mask_values) => {
-                let values_type = smallest_decimal_value_type(&return_dtype);
+                let values_type = DecimalType::smallest_decimal_value_type(&return_dtype);
                 match_each_decimal_value_type!(array.values_type(), |I| {
                     match_each_decimal_value_type!(values_type, |O| {
                         Ok(Scalar::decimal(
diff --git a/vortex-array/src/arrays/decimal/utils.rs b/vortex-array/src/arrays/decimal/utils.rs
@@ -2,32 +2,12 @@
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
 use itertools::{Itertools, MinMaxResult};
-use vortex_dtype::DecimalDType;
 use vortex_error::VortexExpect;
 use vortex_scalar::{DecimalType, i256};
 
 use crate::arrays::DecimalArray;
 use crate::vtable::ValidityHelper;
 
-/// Maps a decimal precision into the smallest type that can represent it.
-pub fn smallest_decimal_value_type(decimal_dtype: &DecimalDType) -> DecimalType {
-    match decimal_dtype.precision() {
-        1..=2 => DecimalType::I8,
-        3..=4 => DecimalType::I16,
-        5..=9 => DecimalType::I32,
-        10..=18 => DecimalType::I64,
-        19..=38 => DecimalType::I128,
-        39..=76 => DecimalType::I256,
-        0 => unreachable!("precision must be greater than 0"),
-        p => unreachable!("precision larger than 76 is invalid found precision {p}"),
-    }
-}
-
-/// True if `value_type` can represent every value of the type `dtype`.
-pub fn is_compatible_decimal_value_type(value_type: DecimalType, dtype: DecimalDType) -> bool {
-    value_type >= smallest_decimal_value_type(&dtype)
-}
-
 macro_rules! try_downcast {
     ($array:expr, from: $src:ty, to: $($dst:ty),*) => {{
         use vortex_dtype::BigCast;
diff --git a/vortex-array/src/builders/decimal.rs b/vortex-array/src/builders/decimal.rs
@@ -5,7 +5,9 @@ use std::any::Any;
 
 use vortex_buffer::BufferMut;
 use vortex_dtype::{BigCast, DType, DecimalDType, NativeDecimalType, Nullability};
-use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
+use vortex_error::{
+    VortexExpect, VortexResult, VortexUnwrap, vortex_ensure, vortex_err, vortex_panic,
+};
 use vortex_mask::Mask;
 use vortex_scalar::{
     DecimalValue, Scalar, i256, match_each_decimal_value, match_each_decimal_value_type,
@@ -212,7 +214,18 @@ impl ArrayBuilder for DecimalBuilder {
 impl DecimalBuffer {
     fn push<V: NativeDecimalType>(&mut self, value: V) {
         delegate_fn!(self, |T, buffer| {
-            buffer.push(<T as BigCast>::from(value).vortex_expect("decimal conversion failure"))
+            buffer.push(
+                <T as BigCast>::from(value)
+                    .ok_or_else(|| {
+                        vortex_err!(
+                            "decimal conversion failure {:?}, type: {:?} to {:?}",
+                            value,
+                            V::DECIMAL_TYPE,
+                            T::DECIMAL_TYPE,
+                        )
+                    })
+                    .vortex_unwrap(),
+            )
         });
     }
 
diff --git a/vortex-array/src/builders/mod.rs b/vortex-array/src/builders/mod.rs
@@ -35,7 +35,6 @@ use vortex_error::{VortexResult, vortex_panic};
 use vortex_mask::Mask;
 use vortex_scalar::{Scalar, match_each_decimal_value_type};
 
-use crate::arrays::smallest_decimal_value_type;
 use crate::canonical::Canonical;
 use crate::{Array, ArrayRef};
 
@@ -246,13 +245,16 @@ pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBui
             })
         }
         DType::Decimal(decimal_type, n) => {
-            match_each_decimal_value_type!(smallest_decimal_value_type(decimal_type), |D| {
-                Box::new(DecimalBuilder::with_capacity::<D>(
-                    capacity,
-                    *decimal_type,
-                    *n,
-                ))
-            })
+            match_each_decimal_value_type!(
+                DecimalType::smallest_decimal_value_type(decimal_type),
+                |D| {
+                    Box::new(DecimalBuilder::with_capacity::<D>(
+                        capacity,
+                        *decimal_type,
+                        *n,
+                    ))
+                }
+            )
         }
         DType::Utf8(n) => Box::new(VarBinViewBuilder::with_capacity(DType::Utf8(*n), capacity)),
         DType::Binary(n) => Box::new(VarBinViewBuilder::with_capacity(
diff --git a/vortex-dtype/src/decimal/precision.rs b/vortex-dtype/src/decimal/precision.rs
@@ -122,3 +122,18 @@ impl<D: NativeDecimalType> TryFrom<&DecimalDType> for PrecisionScale<D> {
         PrecisionScale::try_new(value.precision, value.scale)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::PrecisionScale;
+
+    #[test]
+    fn max_precision() {
+        let prec = PrecisionScale::<i8>::new(2, 1);
+        assert!(prec.is_valid(8));
+        assert!(prec.is_valid(99));
+        assert!(prec.is_valid(-9));
+        assert!(prec.is_valid(0));
+        assert!(prec.is_valid(-99))
+    }
+}
diff --git a/vortex-dtype/src/decimal/types.rs b/vortex-dtype/src/decimal/types.rs
@@ -10,7 +10,7 @@ use paste::paste;
 use crate::decimal::max_precision::{
     MAX_DECIMAL256_FOR_EACH_PRECISION, MIN_DECIMAL256_FOR_EACH_PRECISION,
 };
-use crate::{BigCast, i256};
+use crate::{BigCast, DecimalDType, i256};
 
 /// Type of the decimal values.
 ///
@@ -35,6 +35,27 @@ pub enum DecimalType {
     I256 = 5,
 }
 
+impl DecimalType {
+    /// Maps a `DecimalDType` (precision) into the smallest `DecimalType` that can represent it.
+    pub fn smallest_decimal_value_type(decimal_dtype: &DecimalDType) -> DecimalType {
+        match decimal_dtype.precision() {
+            1..=2 => DecimalType::I8,
+            3..=4 => DecimalType::I16,
+            5..=9 => DecimalType::I32,
+            10..=18 => DecimalType::I64,
+            19..=38 => DecimalType::I128,
+            39..=76 => DecimalType::I256,
+            0 => unreachable!("precision must be greater than 0"),
+            p => unreachable!("precision larger than 76 is invalid found precision {p}"),
+        }
+    }
+
+    /// True if `Self` can represent every value of the type `DecimalDType`.
+    pub fn is_compatible_decimal_value_type(self, dtype: DecimalDType) -> bool {
+        self >= Self::smallest_decimal_value_type(&dtype)
+    }
+}
+
 /// Type of decimal scalar values.
 ///
 /// This trait is implemented by native integer types that can be used to store decimal values.
@@ -61,8 +82,11 @@ pub trait NativeDecimalType:
     const MAX_SCALE: i8;
 
     /// The minimum value for each precision supported by this decimal type.
+    /// This is an array of length `MAX_PRECISION + 1` where the `i`th element is the minimum value
+    /// for a precision of `i` (including precision 0).
     const MIN_BY_PRECISION: &'static [Self];
     /// The maximum value for each precision supported by this decimal type.
+    /// similar to `MIN_BY_PRECISION`.
     const MAX_BY_PRECISION: &'static [Self];
 
     /// Downcast the provided object to a type-specific instance.
@@ -127,24 +151,24 @@ macro_rules! impl_decimal {
                 const MAX_SCALE: i8 = Self::MAX_PRECISION as i8;
 
                 const MIN_BY_PRECISION: &'static [Self] = &{
-                    let mut mins = [$T::ZERO; Self::MAX_PRECISION as usize];
+                    let mut mins = [$T::ZERO; Self::MAX_PRECISION as usize + 1];
                     let mut p = $T::ONE;
                     let mut i = 0;
                     while i < Self::MAX_PRECISION as usize {
                         p = p * 10;
-                        mins[i] = -(p - 1);
+                        mins[i + 1] = -(p - 1);
                         i += 1;
                     }
                     mins
                 };
 
                 const MAX_BY_PRECISION: &'static [Self] = &{
-                    let mut maxs = [$T::ZERO; Self::MAX_PRECISION as usize];
+                    let mut maxs = [$T::ZERO; Self::MAX_PRECISION as usize + 1];
                     let mut p = $T::ONE;
                     let mut i = 0;
                     while i < Self::MAX_PRECISION as usize {
                         p = p * 10;
-                        maxs[i] = p - 1;
+                        maxs[i + 1] = p - 1;
                         i += 1;
                     }
                     maxs
diff --git a/vortex-scalar/src/arbitrary.rs b/vortex-scalar/src/arbitrary.rs
@@ -12,9 +12,11 @@ use std::sync::Arc;
 use arbitrary::{Result, Unstructured};
 use vortex_buffer::{BufferString, ByteBuffer};
 use vortex_dtype::half::f16;
-use vortex_dtype::{DType, DecimalDType, NativeDecimalType, PType, i256};
+use vortex_dtype::{DType, DecimalDType, NativeDecimalType, PType};
 
-use crate::{DecimalValue, InnerScalarValue, PValue, Scalar, ScalarValue};
+use crate::{
+    DecimalValue, InnerScalarValue, PValue, Scalar, ScalarValue, match_each_decimal_value_type,
+};
 
 /// Generate an arbitrary scalar value of the given data type.
 pub fn random_scalar(u: &mut Unstructured, dtype: &DType) -> Result<Scalar> {
@@ -82,19 +84,14 @@ fn random_pvalue(u: &mut Unstructured, ptype: &PType) -> Result<PValue> {
 /// Generate an arbitrary decimal scalar confined to the given bounds of precision and scale.
 pub fn random_decimal(u: &mut Unstructured, decimal_type: &DecimalDType) -> Result<ScalarValue> {
     let precision = decimal_type.precision();
-    if precision <= i128::MAX_PRECISION {
-        Ok(ScalarValue(InnerScalarValue::Decimal(DecimalValue::I128(
-            u.int_in_range(
-                i128::MIN_BY_PRECISION[precision as usize]
-                    ..=i128::MAX_BY_PRECISION[precision as usize],
-            )?,
-        ))))
-    } else {
-        Ok(ScalarValue(InnerScalarValue::Decimal(DecimalValue::I256(
-            u.int_in_range(
-                i256::MIN_BY_PRECISION[precision as usize]
-                    ..=i256::MAX_BY_PRECISION[precision as usize],
-            )?,
-        ))))
-    }
+    let value = match_each_decimal_value_type!(
+        DecimalType::smallest_decimal_value_type(decimal_type),
+        |D| {
+            DecimalValue::from(u.int_in_range(
+                D::MIN_BY_PRECISION[precision as usize]..=D::MAX_BY_PRECISION[precision as usize],
+            )?)
+        }
+    );
+
+    Ok(ScalarValue(InnerScalarValue::Decimal(value)))
 }
diff --git a/vortex-scalar/src/decimal/macros.rs b/vortex-scalar/src/decimal/macros.rs
@@ -105,7 +105,6 @@ macro_rules! match_each_decimal_value_type {
                 type $enc = $crate::i256;
                 $body
             }
-            ty => unreachable!("unknown decimal value type {:?}", ty),
         }
     }};
 }