fix: Decimal sum doesn't panic but returns null on overflow

robert3005 · robert3005 · commit efdf70e9fc77 · 2025-11-27T16:25:37.000Z
Signed-off-by: Robert Kruszewski &lt;github@robertk.io&gt;
diff --git a/vortex-array/src/arrays/decimal/compute/sum.rs b/vortex-array/src/arrays/decimal/compute/sum.rs
@@ -1,16 +1,20 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use arrow_schema::DECIMAL256_MAX_PRECISION;
+use itertools::Itertools;
 use num_traits::AsPrimitive;
+use num_traits::CheckedAdd;
+use vortex_buffer::BitBuffer;
+use vortex_buffer::Buffer;
+use vortex_dtype::DType;
 use vortex_dtype::DecimalDType;
 use vortex_dtype::DecimalType;
+use vortex_dtype::MAX_PRECISION;
 use vortex_dtype::Nullability::Nullable;
 use vortex_dtype::match_each_decimal_value_type;
 use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
 use vortex_error::vortex_bail;
-use vortex_error::vortex_err;
 use vortex_mask::Mask;
 use vortex_scalar::DecimalScalar;
 use vortex_scalar::DecimalValue;
@@ -22,32 +26,6 @@ use crate::compute::SumKernel;
 use crate::compute::SumKernelAdapter;
 use crate::register_kernel;
 
-// Its safe to use `AsPrimitive` here because we always cast up.
-macro_rules! sum_decimal {
-    ($ty:ty, $values:expr, $initial:expr) => {{
-        let mut sum: $ty = $initial;
-        for v in $values.iter() {
-            let v: $ty = (*v).as_();
-            sum = num_traits::CheckedAdd::checked_add(&sum, &v)
-                .ok_or_else(|| vortex_err!("Overflow when summing decimal {sum:?} + {v:?}"))?
-        }
-        sum
-    }};
-    ($ty:ty, $values:expr, $validity:expr, $initial:expr) => {{
-        use itertools::Itertools;
-
-        let mut sum: $ty = $initial;
-        for (v, valid) in $values.iter().zip_eq($validity) {
-            if valid {
-                let v: $ty = (*v).as_();
-                sum = num_traits::CheckedAdd::checked_add(&sum, &v)
-                    .ok_or_else(|| vortex_err!("Overflow when summing decimal {sum:?} + {v:?}"))?
-            }
-        }
-        sum
-    }};
-}
-
 impl SumKernel for DecimalVTable {
     #[expect(
         clippy::cognitive_complexity,
@@ -59,7 +37,7 @@ impl SumKernel for DecimalVTable {
         // Both Spark and DataFusion use this heuristic.
         // - https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
         // - https://github.com/apache/datafusion/blob/4153adf2c0f6e317ef476febfdc834208bd46622/datafusion/functions-aggregate/src/sum.rs#L188
-        let new_precision = u8::min(DECIMAL256_MAX_PRECISION, decimal_dtype.precision() + 10);
+        let new_precision = u8::min(MAX_PRECISION, decimal_dtype.precision() + 10);
         let new_scale = decimal_dtype.scale();
         let return_dtype = DecimalDType::new(new_precision, new_scale);
 
@@ -80,11 +58,15 @@ impl SumKernel for DecimalVTable {
                         let initial_val: O = initial_decimal
                             .cast()
                             .vortex_expect("cannot fail to cast initial value");
-                        Ok(Scalar::decimal(
-                            DecimalValue::from(sum_decimal!(O, array.buffer::<I>(), initial_val)),
-                            return_dtype,
-                            Nullable,
-                        ))
+                        if let Some(sum) = sum_decimal(array.buffer::<I>(), initial_val) {
+                            Ok(Scalar::decimal(
+                                DecimalValue::from(sum),
+                                return_dtype,
+                                Nullable,
+                            ))
+                        } else {
+                            Ok(Scalar::null(DType::Decimal(return_dtype, Nullable)))
+                        }
                     })
                 })
             }
@@ -95,23 +77,54 @@ impl SumKernel for DecimalVTable {
                         let initial_val: O = initial_decimal
                             .cast()
                             .vortex_expect("cannot fail to cast initial value");
-                        Ok(Scalar::decimal(
-                            DecimalValue::from(sum_decimal!(
-                                O,
-                                array.buffer::<I>(),
-                                mask_values.bit_buffer(),
-                                initial_val
-                            )),
-                            return_dtype,
-                            Nullable,
-                        ))
+
+                        if let Some(sum) = sum_decimal_with_validity(
+                            array.buffer::<I>(),
+                            mask_values.bit_buffer(),
+                            initial_val,
+                        ) {
+                            Ok(Scalar::decimal(
+                                DecimalValue::from(sum),
+                                return_dtype,
+                                Nullable,
+                            ))
+                        } else {
+                            Ok(Scalar::null(DType::Decimal(return_dtype, Nullable)))
+                        }
                     })
                 })
             }
         }
     }
 }
 
+fn sum_decimal<T: AsPrimitive<I>, I: Copy + CheckedAdd + 'static>(
+    values: Buffer<T>,
+    initial: I,
+) -> Option<I> {
+    let mut sum = initial;
+    for v in values.iter() {
+        let v: I = v.as_();
+        sum = CheckedAdd::checked_add(&sum, &v)?;
+    }
+    Some(sum)
+}
+
+fn sum_decimal_with_validity<T: AsPrimitive<I>, I: Copy + CheckedAdd + 'static>(
+    values: Buffer<T>,
+    validity: &BitBuffer,
+    initial: I,
+) -> Option<I> {
+    let mut sum = initial;
+    for (v, valid) in values.iter().zip_eq(validity) {
+        if valid {
+            let v: I = v.as_();
+            sum = CheckedAdd::checked_add(&sum, &v)?;
+        }
+    }
+    Some(sum)
+}
+
 register_kernel!(SumKernelAdapter(DecimalVTable).lift());
 
 #[cfg(test)]
@@ -120,9 +133,11 @@ mod tests {
     use vortex_dtype::DType;
     use vortex_dtype::DecimalDType;
     use vortex_dtype::Nullability;
+    use vortex_error::VortexUnwrap;
     use vortex_scalar::DecimalValue;
     use vortex_scalar::Scalar;
     use vortex_scalar::ScalarValue;
+    use vortex_scalar::i256;
 
     use crate::arrays::DecimalArray;
     use crate::compute::sum;
@@ -327,8 +342,6 @@ mod tests {
 
     #[test]
     fn test_sum_i128_to_i256_boundary() {
-        use vortex_scalar::i256;
-
         // Test the boundary between i128 and i256 accumulation
         let large_i128 = i128::MAX / 10;
         let decimal = DecimalArray::new(
@@ -351,4 +364,19 @@ mod tests {
 
         assert_eq!(result, expected);
     }
+
+    #[test]
+    fn test_i256_overflow() {
+        let decimal_dtype = DecimalDType::new(76, 0);
+        let decimal = DecimalArray::new(
+            buffer![i256::MAX, i256::MAX, i256::MAX],
+            decimal_dtype,
+            Validity::AllValid,
+        );
+
+        assert_eq!(
+            sum(decimal.as_ref()).vortex_unwrap(),
+            Scalar::null(DType::Decimal(decimal_dtype, Nullability::Nullable))
+        );
+    }
 }
diff --git a/vortex-dtype/src/decimal/mod.rs b/vortex-dtype/src/decimal/mod.rs
@@ -23,8 +23,10 @@ use vortex_error::vortex_panic;
 use crate::DType;
 use crate::i256;
 
-const MAX_PRECISION: u8 = <i256 as NativeDecimalType>::MAX_PRECISION;
-const MAX_SCALE: i8 = <i256 as NativeDecimalType>::MAX_SCALE;
+/// The maximum precision allowed for a decimal type.
+pub const MAX_PRECISION: u8 = <i256 as NativeDecimalType>::MAX_PRECISION;
+/// The maximum scale allowed for a decimal type.
+pub const MAX_SCALE: i8 = <i256 as NativeDecimalType>::MAX_SCALE;
 
 /// Parameters that define the precision and scale of a decimal type.
 ///