vortex-data
diff --git a/‎fuzz/fuzz_targets/array_ops.rs‎
Lines changed: 2 additions & 0 deletions b/‎fuzz/fuzz_targets/array_ops.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎vortex-array/src/arrays/bool/compute/sum.rs‎
Lines changed: 11 additions & 3 deletions b/‎vortex-array/src/arrays/bool/compute/sum.rs‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎vortex-array/src/arrays/chunked/compute/sum.rs‎
Lines changed: 33 additions & 14 deletions b/‎vortex-array/src/arrays/chunked/compute/sum.rs‎
Lines changed: 33 additions & 14 deletions
diff --git a/‎vortex-array/src/arrays/constant/compute/sum.rs‎
Lines changed: 70 additions & 25 deletions b/‎vortex-array/src/arrays/constant/compute/sum.rs‎
Lines changed: 70 additions & 25 deletions
diff --git a/‎vortex-array/src/arrays/decimal/compute/sum.rs‎
Lines changed: 22 additions & 11 deletions b/‎vortex-array/src/arrays/decimal/compute/sum.rs‎
Lines changed: 22 additions & 11 deletions
@@ -89,6 +89,8 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus {
                 current_array = cast_result;
             }
             Action::Sum => {
+                println!("sum {}", current_array.display_tree());
+                println!("sum {}", current_array.display_values());
                 let sum_result = sum(&current_array).vortex_unwrap();
                 assert_scalar_eq(&expected.scalar(), &sum_result, i).unwrap();
             }
 
@@ -3,7 +3,7 @@
 
 use std::ops::BitAnd;
 
-use vortex_error::VortexResult;
+use vortex_error::{VortexExpect, VortexResult};
 use vortex_mask::AllOr;
 use vortex_scalar::Scalar;
 
@@ -12,7 +12,7 @@ use crate::compute::{SumKernel, SumKernelAdapter};
 use crate::register_kernel;
 
 impl SumKernel for BoolVTable {
-    fn sum(&self, array: &BoolArray) -> VortexResult<Scalar> {
+    fn sum(&self, array: &BoolArray, initial_value: &Scalar) -> VortexResult<Scalar> {
         let true_count: Option<u64> = match array.validity_mask().bit_buffer() {
             AllOr::All => {
                 // All-valid
@@ -26,7 +26,15 @@ impl SumKernel for BoolVTable {
                 Some(array.bit_buffer().bitand(validity_mask).true_count() as u64)
             }
         };
-        Ok(Scalar::from(true_count))
+
+        // Add initial_value to true_count
+        let initial_u64 = initial_value
+            .as_primitive()
+            .as_::<u64>()
+            .vortex_expect("cannot be null");
+        Ok(Scalar::from(
+            true_count.and_then(|tc| tc.checked_add(initial_u64)),
+        ))
     }
 }
 
 
@@ -1,10 +1,12 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
+use std::ops::AddAssign;
+
 use num_traits::PrimInt;
 use vortex_dtype::Nullability::Nullable;
-use vortex_dtype::{DType, DecimalDType, NativePType, i256, match_each_native_ptype};
-use vortex_error::{VortexResult, vortex_bail, vortex_err};
+use vortex_dtype::{DType, DecimalDType, NativePType, match_each_native_ptype};
+use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
 use vortex_scalar::{DecimalScalar, DecimalValue, Scalar};
 
 use crate::arrays::{ChunkedArray, ChunkedVTable};
@@ -13,19 +15,26 @@ use crate::stats::Stat;
 use crate::{ArrayRef, register_kernel};
 
 impl SumKernel for ChunkedVTable {
-    fn sum(&self, array: &ChunkedArray) -> VortexResult<Scalar> {
+    fn sum(&self, array: &ChunkedArray, initial_value: &Scalar) -> VortexResult<Scalar> {
         let sum_dtype = Stat::Sum
             .dtype(array.dtype())
             .ok_or_else(|| vortex_err!("Sum not supported for dtype {}", array.dtype()))?;
 
         match sum_dtype {
-            DType::Decimal(decimal_dtype, _) => sum_decimal(array.chunks(), decimal_dtype),
+            DType::Decimal(decimal_dtype, _) => sum_decimal(
+                array.chunks(),
+                decimal_dtype,
+                initial_value
+                    .as_decimal()
+                    .decimal_value()
+                    .vortex_expect("cannot be null"),
+            ),
             DType::Primitive(sum_ptype, _) => {
                 let scalar_value = match_each_native_ptype!(
                     sum_ptype,
-                    unsigned: |T| { sum_int::<u64>(array.chunks())?.into() },
-                    signed: |T| { sum_int::<i64>(array.chunks())?.into() },
-                    floating: |T| { sum_float(array.chunks())?.into() }
+                    unsigned: |T| { sum_int::<u64>(array.chunks(), initial_value.as_primitive().as_::<u64>().vortex_expect("cannot be null"))?.into() },
+                    signed: |T| { sum_int::<i64>(array.chunks(), initial_value.as_primitive().as_::<i64>().vortex_expect("cannot be null"))?.into() },
+                    floating: |T| { sum_float::<f64>(array.chunks(), initial_value.as_primitive().as_::<f64>().vortex_expect("cannot be null"))?.into() }
                 );
 
                 Ok(Scalar::new(sum_dtype, scalar_value))
@@ -39,8 +48,11 @@ impl SumKernel for ChunkedVTable {
 
 register_kernel!(SumKernelAdapter(ChunkedVTable).lift());
 
-fn sum_int<T: NativePType + PrimInt>(chunks: &[ArrayRef]) -> VortexResult<Option<T>> {
-    let mut result: T = T::zero();
+fn sum_int<T: NativePType + PrimInt>(
+    chunks: &[ArrayRef],
+    initial_value: T,
+) -> VortexResult<Option<T>> {
+    let mut result: T = initial_value;
     for chunk in chunks {
         let chunk_sum = sum(chunk)?;
         let Some(chunk_sum) = chunk_sum
@@ -56,19 +68,26 @@ fn sum_int<T: NativePType + PrimInt>(chunks: &[ArrayRef]) -> VortexResult<Option
     Ok(Some(result))
 }
 
-fn sum_float(chunks: &[ArrayRef]) -> VortexResult<Option<f64>> {
-    let mut result = 0f64;
+fn sum_float<T: NativePType + AddAssign>(
+    chunks: &[ArrayRef],
+    initial_value: T,
+) -> VortexResult<Option<T>> {
+    let mut result = initial_value;
     for chunk in chunks {
-        let Some(chunk_sum) = sum(chunk)?.as_primitive().as_::<f64>() else {
+        let Some(chunk_sum) = sum(chunk)?.as_primitive().as_::<T>() else {
             return Ok(None);
         };
         result += chunk_sum;
     }
     Ok(Some(result))
 }
 
-fn sum_decimal(chunks: &[ArrayRef], result_decimal_type: DecimalDType) -> VortexResult<Scalar> {
-    let mut result = DecimalValue::I256(i256::ZERO);
+fn sum_decimal(
+    chunks: &[ArrayRef],
+    result_decimal_type: DecimalDType,
+    initial_value: DecimalValue,
+) -> VortexResult<Scalar> {
+    let mut result = initial_value;
 
     let null = || Scalar::null(DType::Decimal(result_decimal_type, Nullable));
 
 
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use num_traits::{CheckedMul, ToPrimitive};
+use num_traits::{CheckedAdd, CheckedMul, ToPrimitive};
 use vortex_dtype::{DType, DecimalDType, NativePType, Nullability, i256, match_each_native_ptype};
 use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
 use vortex_scalar::{DecimalScalar, DecimalValue, PrimitiveScalar, Scalar, ScalarValue};
@@ -12,32 +12,44 @@ use crate::register_kernel;
 use crate::stats::Stat;
 
 impl SumKernel for ConstantVTable {
-    fn sum(&self, array: &ConstantArray) -> VortexResult<Scalar> {
+    fn sum(&self, array: &ConstantArray, initial_value: &Scalar) -> VortexResult<Scalar> {
         // Compute the expected dtype of the sum.
         let sum_dtype = Stat::Sum
             .dtype(array.dtype())
             .ok_or_else(|| vortex_err!("Sum not supported for dtype {}", array.dtype()))?;
 
-        let sum_value = sum_scalar(array.scalar(), array.len())?;
+        let sum_value = sum_scalar(array.scalar(), array.len(), initial_value)?;
         Ok(Scalar::new(sum_dtype, sum_value))
     }
 }
 
-fn sum_scalar(scalar: &Scalar, len: usize) -> VortexResult<ScalarValue> {
+fn sum_scalar(scalar: &Scalar, len: usize, acc: &Scalar) -> VortexResult<ScalarValue> {
     match scalar.dtype() {
-        DType::Bool(_) => Ok(ScalarValue::from(match scalar.as_bool().value() {
-            None => unreachable!("Handled before reaching this point"),
-            Some(false) => 0u64,
-            Some(true) => len as u64,
-        })),
-        DType::Primitive(ptype, _) => Ok(match_each_native_ptype!(
-            ptype,
-            unsigned: |T| { sum_integral::<u64>(scalar.as_primitive(), len)?.into() },
-            signed: |T| { sum_integral::<i64>(scalar.as_primitive(), len)?.into() },
-            floating: |T| { sum_float(scalar.as_primitive(), len)?.into() }
-        )),
-        DType::Decimal(decimal_dtype, _) => sum_decimal(scalar.as_decimal(), len, *decimal_dtype),
-        DType::Extension(_) => sum_scalar(&scalar.as_extension().storage(), len),
+        DType::Bool(_) => {
+            let count = match scalar.as_bool().value() {
+                None => unreachable!("Handled before reaching this point"),
+                Some(false) => 0u64,
+                Some(true) => len as u64,
+            };
+            let initial_u64 = acc
+                .as_primitive()
+                .as_::<u64>()
+                .vortex_expect("cannot be null");
+            Ok(ScalarValue::from(initial_u64.checked_add(count)))
+        }
+        DType::Primitive(ptype, _) => {
+            let result = match_each_native_ptype!(
+                ptype,
+                unsigned: |T| { sum_integral::<u64>(scalar.as_primitive(), len, acc)?.into() },
+                signed: |T| { sum_integral::<i64>(scalar.as_primitive(), len, acc)?.into() },
+                floating: |T| { sum_float(scalar.as_primitive(), len, acc)?.into() }
+            );
+            Ok(result)
+        }
+        DType::Decimal(decimal_dtype, _) => {
+            sum_decimal(scalar.as_decimal(), len, *decimal_dtype, acc)
+        }
+        DType::Extension(_) => sum_scalar(&scalar.as_extension().storage(), len, acc),
         dtype => vortex_bail!("Unsupported dtype for sum: {}", dtype),
     }
 }
@@ -46,6 +58,7 @@ fn sum_decimal(
     decimal_scalar: DecimalScalar,
     array_len: usize,
     decimal_dtype: DecimalDType,
+    initial_value: &Scalar,
 ) -> VortexResult<ScalarValue> {
     let result_dtype = Stat::Sum
         .dtype(&DType::Decimal(decimal_dtype, Nullability::Nullable))
@@ -63,43 +76,75 @@ fn sum_decimal(
     let len_value = DecimalValue::I256(i256::from_i128(array_len as i128));
 
     // Multiply value * len
-    let sum = value.checked_mul(&len_value).and_then(|result| {
+    let array_sum = value.checked_mul(&len_value).and_then(|result| {
         // Check if result fits in the precision
         result
             .fits_in_precision(*result_decimal_type)
             .unwrap_or(false)
             .then_some(result)
     });
 
-    match sum {
-        Some(result_value) => Ok(ScalarValue::from(result_value)),
+    // Add initial_value to array_sum
+    let initial_decimal = DecimalScalar::try_from(initial_value)?;
+    let initial_dec_value = initial_decimal
+        .decimal_value()
+        .unwrap_or(DecimalValue::I256(i256::ZERO));
+
+    match array_sum {
+        Some(array_sum_value) => {
+            let total = array_sum_value
+                .checked_add(&initial_dec_value)
+                .and_then(|result| {
+                    result
+                        .fits_in_precision(*result_decimal_type)
+                        .unwrap_or(false)
+                        .then_some(result)
+                });
+            match total {
+                Some(result_value) => Ok(ScalarValue::from(result_value)),
+                None => Ok(ScalarValue::null()), // Overflow
+            }
+        }
         None => Ok(ScalarValue::null()), // Overflow
     }
 }
 
 fn sum_integral<T>(
     primitive_scalar: PrimitiveScalar<'_>,
     array_len: usize,
+    initial_value: &Scalar,
 ) -> VortexResult<Option<T>>
 where
-    T: NativePType + CheckedMul,
+    T: NativePType + CheckedMul + CheckedAdd,
     Scalar: From<Option<T>>,
 {
     let v = primitive_scalar.as_::<T>();
     let array_len =
         T::from(array_len).ok_or_else(|| vortex_err!("array_len must fit the sum type"))?;
-    let sum = v.and_then(|v| v.checked_mul(&array_len));
+    let Some(array_sum) = v.and_then(|v| v.checked_mul(&array_len)) else {
+        return Ok(None);
+    };
 
-    Ok(sum)
+    let initial = initial_value
+        .as_primitive()
+        .as_::<T>()
+        .unwrap_or_else(T::zero);
+    Ok(initial.checked_add(&array_sum))
 }
 
-fn sum_float(primitive_scalar: PrimitiveScalar<'_>, array_len: usize) -> VortexResult<Option<f64>> {
+fn sum_float(
+    primitive_scalar: PrimitiveScalar<'_>,
+    array_len: usize,
+    initial_value: &Scalar,
+) -> VortexResult<Option<f64>> {
     let v = primitive_scalar.as_::<f64>();
     let array_len = array_len
         .to_f64()
         .ok_or_else(|| vortex_err!("array_len must fit the sum type"))?;
 
-    Ok(v.map(|v| v * array_len))
+    let array_sum = v.map(|v| v * array_len).unwrap_or(0.0);
+    let initial = initial_value.as_primitive().as_::<f64>().unwrap_or(0.0);
+    Ok(Some(initial + array_sum))
 }
 
 register_kernel!(SumKernelAdapter(ConstantVTable).lift());
 
@@ -5,32 +5,34 @@ use arrow_schema::DECIMAL256_MAX_PRECISION;
 use num_traits::AsPrimitive;
 use vortex_dtype::Nullability::Nullable;
 use vortex_dtype::{DecimalDType, DecimalType, match_each_decimal_value_type};
-use vortex_error::{VortexResult, vortex_bail};
+use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
 use vortex_mask::Mask;
-use vortex_scalar::{DecimalValue, Scalar};
+use vortex_scalar::{DecimalScalar, DecimalValue, Scalar};
 
 use crate::arrays::{DecimalArray, DecimalVTable};
 use crate::compute::{SumKernel, SumKernelAdapter};
 use crate::register_kernel;
 
 // Its safe to use `AsPrimitive` here because we always cast up.
 macro_rules! sum_decimal {
-    ($ty:ty, $values:expr) => {{
-        let mut sum: $ty = <$ty>::default();
+    ($ty:ty, $values:expr, $initial:expr) => {{
+        let mut sum: $ty = $initial;
         for v in $values.iter() {
             let v: $ty = (*v).as_();
-            sum += v;
+            sum = num_traits::CheckedAdd::checked_add(&sum, &v)
+                .ok_or_else(|| vortex_err!("Overflow when summing decimal {sum:?} + {v:?}"))?
         }
         sum
     }};
-    ($ty:ty, $values:expr, $validity:expr) => {{
+    ($ty:ty, $values:expr, $validity:expr, $initial:expr) => {{
         use itertools::Itertools;
 
-        let mut sum: $ty = <$ty>::default();
+        let mut sum: $ty = $initial;
         for (v, valid) in $values.iter().zip_eq($validity) {
             if valid {
                 let v: $ty = (*v).as_();
-                sum += v;
+                sum = num_traits::CheckedAdd::checked_add(&sum, &v)
+                    .ok_or_else(|| vortex_err!("Overflow when summing decimal {sum:?} + {v:?}"))?
             }
         }
         sum
@@ -39,7 +41,7 @@ macro_rules! sum_decimal {
 
 impl SumKernel for DecimalVTable {
     #[allow(clippy::cognitive_complexity)]
-    fn sum(&self, array: &DecimalArray) -> VortexResult<Scalar> {
+    fn sum(&self, array: &DecimalArray, initial_value: &Scalar) -> VortexResult<Scalar> {
         let decimal_dtype = array.decimal_dtype();
 
         // Both Spark and DataFusion use this heuristic.
@@ -49,6 +51,12 @@ impl SumKernel for DecimalVTable {
         let new_scale = decimal_dtype.scale();
         let return_dtype = DecimalDType::new(new_precision, new_scale);
 
+        // Extract the initial value as a DecimalValue
+        let initial_decimal = DecimalScalar::try_from(initial_value)
+            .vortex_expect("must be a decimal")
+            .decimal_value()
+            .vortex_expect("cannot be null");
+
         match array.validity_mask() {
             Mask::AllFalse(_) => {
                 vortex_bail!("invalid state, all-null array should be checked by top-level sum fn")
@@ -57,8 +65,9 @@ impl SumKernel for DecimalVTable {
                 let values_type = DecimalType::smallest_decimal_value_type(&return_dtype);
                 match_each_decimal_value_type!(array.values_type(), |I| {
                     match_each_decimal_value_type!(values_type, |O| {
+                        let initial_val: O = initial_decimal.cast().unwrap_or_else(O::default);
                         Ok(Scalar::decimal(
-                            DecimalValue::from(sum_decimal!(O, array.buffer::<I>())),
+                            DecimalValue::from(sum_decimal!(O, array.buffer::<I>(), initial_val)),
                             return_dtype,
                             Nullable,
                         ))
@@ -69,11 +78,13 @@ impl SumKernel for DecimalVTable {
                 let values_type = DecimalType::smallest_decimal_value_type(&return_dtype);
                 match_each_decimal_value_type!(array.values_type(), |I| {
                     match_each_decimal_value_type!(values_type, |O| {
+                        let initial_val: O = initial_decimal.cast().unwrap_or_else(O::default);
                         Ok(Scalar::decimal(
                             DecimalValue::from(sum_decimal!(
                                 O,
                                 array.buffer::<I>(),
-                                mask_values.bit_buffer()
+                                mask_values.bit_buffer(),
+                                initial_val
                             )),
                             return_dtype,
                             Nullable,
Original file line number	Diff line number	Diff line change
`@@ -89,6 +89,8 @@ fuzz_target!(\|fuzz_action: FuzzArrayAction\| -> Corpus {`
`89`	`89`	`current_array = cast_result;`
`90`	`90`	`}`
`91`	`91`	`Action::Sum => {`
	`92`	`+ println!("sum {}", current_array.display_tree());`
	`93`	`+ println!("sum {}", current_array.display_values());`
`92`	`94`	`let sum_result = sum(&current_array).vortex_unwrap();`
`93`	`95`	`assert_scalar_eq(&expected.scalar(), &sum_result, i).unwrap();`
`94`	`96`	`}`