feat: Add checked_add and checked_subtract for Spark decimal arithmetic

n0r0shi · n0r0shi · commit 8805bc22e630 · 2026-02-14T05:12:21.000Z
Add checked decimal add and subtract functions that throw on overflow
instead of returning null. These are needed for Spark's ANSI mode where
arithmetic overflow should raise an error rather than silently produce null.
diff --git a/velox/docs/functions/spark/decimal.rst b/velox/docs/functions/spark/decimal.rst
@@ -143,6 +143,40 @@ Arithmetic Functions
     Division by zero or overflow results in an error.
     Corresponds to Spark's operator ``div`` with ``spark.sql.ansi.enabled`` set to true.
 
+.. spark:function:: add(x: decimal(p1, s1), y: decimal(p2, s2)) -> r: decimal(p3, s3)
+
+    Returns the result of adding ``x`` and ``y``. The result type is determined
+    by the precision and scale computation rules described above.
+    Returns NULL when the result overflows.
+    Corresponds to Spark's operator ``+`` with ``spark.sql.ansi.enabled`` set to false.  ::
+
+        SELECT CAST(1.1 as DECIMAL(3, 1)) + CAST(2.2 as DECIMAL(3, 1)); -- 3.3
+        SELECT CAST('99999999999999999999999999999999999999' as DECIMAL(38, 0)) + CAST(1 as DECIMAL(38, 0)); -- NULL
+
+.. spark:function:: checked_add(x: decimal(p1, s1), y: decimal(p2, s2)) -> r: decimal(p3, s3)
+
+    Returns the result of adding ``x`` and ``y``. The result type is determined
+    by the precision and scale computation rules described above.
+    Throws an error when the result overflows.
+    Corresponds to Spark's operator ``+`` with ``spark.sql.ansi.enabled`` set to true.
+
+.. spark:function:: subtract(x: decimal(p1, s1), y: decimal(p2, s2)) -> r: decimal(p3, s3)
+
+    Returns the result of subtracting ``y`` from ``x``. The result type is determined
+    by the precision and scale computation rules described above.
+    Returns NULL when the result overflows.
+    Corresponds to Spark's operator ``-`` with ``spark.sql.ansi.enabled`` set to false.  ::
+
+        SELECT CAST(1.1 as DECIMAL(3, 1)) - CAST(2.2 as DECIMAL(3, 1)); -- -1.1
+        SELECT CAST('-99999999999999999999999999999999999999' as DECIMAL(38, 0)) - CAST(1 as DECIMAL(38, 0)); -- NULL
+
+.. spark:function:: checked_subtract(x: decimal(p1, s1), y: decimal(p2, s2)) -> r: decimal(p3, s3)
+
+    Returns the result of subtracting ``y`` from ``x``. The result type is determined
+    by the precision and scale computation rules described above.
+    Throws an error when the result overflows.
+    Corresponds to Spark's operator ``-`` with ``spark.sql.ansi.enabled`` set to true.
+
 Decimal Functions
 -----------------
 .. spark:function:: ceil(x: decimal(p, s)) -> r: decimal(pr, 0)
diff --git a/velox/functions/sparksql/DecimalArithmetic.cpp b/velox/functions/sparksql/DecimalArithmetic.cpp
@@ -328,6 +328,50 @@ struct DecimalSubtractFunction : DecimalAddSubtractBase {
   }
 };
 
+// Decimal add function that returns error on overflow.
+template <typename TExec, bool allowPrecisionLoss>
+struct CheckedDecimalAddFunction : DecimalAddSubtractBase {
+  VELOX_DEFINE_FUNCTION_TYPES(TExec);
+
+  template <typename A, typename B>
+  void initialize(
+      const std::vector<TypePtr>& inputTypes,
+      const core::QueryConfig& /*config*/,
+      A* /*a*/,
+      B* /*b*/) {
+    initializeBase<allowPrecisionLoss>(inputTypes);
+  }
+
+  template <typename R, typename A, typename B>
+  Status call(R& out, const A& a, const B& b) {
+    bool valid = applyAdd<R, A, B>(out, a, b);
+    VELOX_USER_RETURN(!valid, "Decimal overflow in add");
+    return Status::OK();
+  }
+};
+
+// Decimal subtract function that returns error on overflow.
+template <typename TExec, bool allowPrecisionLoss>
+struct CheckedDecimalSubtractFunction : DecimalAddSubtractBase {
+  VELOX_DEFINE_FUNCTION_TYPES(TExec);
+
+  template <typename A, typename B>
+  void initialize(
+      const std::vector<TypePtr>& inputTypes,
+      const core::QueryConfig& /*config*/,
+      A* /*a*/,
+      B* /*b*/) {
+    initializeBase<allowPrecisionLoss>(inputTypes);
+  }
+
+  template <typename R, typename A, typename B>
+  Status call(R& out, const A& a, const B& b) {
+    bool valid = applyAdd<R, A, B>(out, a, B(-b));
+    VELOX_USER_RETURN(!valid, "Decimal overflow in subtract");
+    return Status::OK();
+  }
+};
+
 template <typename TExec, bool allowPrecisionLoss>
 struct DecimalMultiplyFunction {
   VELOX_DEFINE_FUNCTION_TYPES(TExec);
@@ -686,6 +730,22 @@ using DivideFunctionAllowPrecisionLoss = DecimalDivideFunction<TExec, true>;
 template <typename TExec>
 using DivideFunctionDenyPrecisionLoss = DecimalDivideFunction<TExec, false>;
 
+template <typename TExec>
+using CheckedAddFunctionAllowPrecisionLoss =
+    CheckedDecimalAddFunction<TExec, true>;
+
+template <typename TExec>
+using CheckedAddFunctionDenyPrecisionLoss =
+    CheckedDecimalAddFunction<TExec, false>;
+
+template <typename TExec>
+using CheckedSubtractFunctionAllowPrecisionLoss =
+    CheckedDecimalSubtractFunction<TExec, true>;
+
+template <typename TExec>
+using CheckedSubtractFunctionDenyPrecisionLoss =
+    CheckedDecimalSubtractFunction<TExec, false>;
+
 std::vector<exec::SignatureVariable> getDivideConstraintsDenyPrecisionLoss() {
   std::string wholeDigits = fmt::format(
       "min(38, {a_precision} - {a_scale} + {b_scale})",
@@ -781,6 +841,11 @@ void registerDecimalAdd(const std::string& prefix) {
   registerDecimalBinary<AddFunctionDenyPrecisionLoss>(
       prefix + "add" + kDenyPrecisionLoss,
       makeConstraints(rPrecision, rScale, false));
+  registerDecimalBinary<CheckedAddFunctionAllowPrecisionLoss>(
+      prefix + "checked_add", makeConstraints(rPrecision, rScale, true));
+  registerDecimalBinary<CheckedAddFunctionDenyPrecisionLoss>(
+      prefix + "checked_add" + kDenyPrecisionLoss,
+      makeConstraints(rPrecision, rScale, false));
 }
 
 void registerDecimalSubtract(const std::string& prefix) {
@@ -790,6 +855,11 @@ void registerDecimalSubtract(const std::string& prefix) {
   registerDecimalBinary<SubtractFunctionDenyPrecisionLoss>(
       prefix + "subtract" + kDenyPrecisionLoss,
       makeConstraints(rPrecision, rScale, false));
+  registerDecimalBinary<CheckedSubtractFunctionAllowPrecisionLoss>(
+      prefix + "checked_subtract", makeConstraints(rPrecision, rScale, true));
+  registerDecimalBinary<CheckedSubtractFunctionDenyPrecisionLoss>(
+      prefix + "checked_subtract" + kDenyPrecisionLoss,
+      makeConstraints(rPrecision, rScale, false));
 }
 
 void registerDecimalMultiply(const std::string& prefix) {
diff --git a/velox/functions/sparksql/tests/DecimalArithmeticTest.cpp b/velox/functions/sparksql/tests/DecimalArithmeticTest.cpp
@@ -88,6 +88,25 @@ class DecimalArithmeticTest : public SparkFunctionBaseTest {
       std::optional<U> u) {
     return evaluateOnce<int64_t>("checked_div(c0, c1)", {tType, uType}, t, u);
   }
+
+  template <typename T, typename U>
+  std::optional<int128_t> checked_add(
+      const TypePtr& tType,
+      const TypePtr& uType,
+      std::optional<T> t,
+      std::optional<U> u) {
+    return evaluateOnce<int128_t>("checked_add(c0, c1)", {tType, uType}, t, u);
+  }
+
+  template <typename T, typename U>
+  std::optional<int128_t> checked_subtract(
+      const TypePtr& tType,
+      const TypePtr& uType,
+      std::optional<T> t,
+      std::optional<U> u) {
+    return evaluateOnce<int128_t>(
+        "checked_subtract(c0, c1)", {tType, uType}, t, u);
+  }
 };
 
 TEST_F(DecimalArithmeticTest, add) {
@@ -833,5 +852,170 @@ TEST_F(DecimalArithmeticTest, checkedDiv) {
           1)),
       "Overflow in integral divide");
 }
+
+TEST_F(DecimalArithmeticTest, checkedAdd) {
+  // Normal cases should work.
+  // Use DECIMAL(18, 2) so result precision (19) exceeds 18 (long decimal).
+  EXPECT_EQ(
+      (checked_add<int64_t, int64_t>(DECIMAL(18, 2), DECIMAL(18, 2), 100, 200)),
+      300);
+  EXPECT_EQ(
+      (checked_add<int64_t, int128_t>(
+          DECIMAL(18, 2), DECIMAL(20, 2), 100, 200)),
+      300);
+  EXPECT_EQ(
+      (checked_add<int128_t, int64_t>(
+          DECIMAL(20, 2), DECIMAL(18, 2), 100, 200)),
+      300);
+  EXPECT_EQ(
+      (checked_add<int128_t, int128_t>(
+          DECIMAL(20, 2), DECIMAL(20, 2), 100, 200)),
+      300);
+
+  // Adding with zero.
+  EXPECT_EQ(
+      (checked_add<int64_t, int64_t>(DECIMAL(18, 2), DECIMAL(18, 2), 0, 100)),
+      100);
+
+  // Adding negative numbers.
+  EXPECT_EQ(
+      (checked_add<int64_t, int64_t>(DECIMAL(18, 2), DECIMAL(18, 2), -100, 50)),
+      -50);
+
+  // Result precision capped at 38, no overflow.
+  EXPECT_EQ(
+      (checked_add<int128_t, int128_t>(
+          DECIMAL(38, 0), DECIMAL(38, 0), 100, 200)),
+      300);
+
+  // Near-boundary success: large values through addLarge path, but fits.
+  EXPECT_EQ(
+      (checked_add<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("49999999999999999999999999999999999999"),
+          HugeInt::parse("49999999999999999999999999999999999999"))),
+      HugeInt::parse("99999999999999999999999999999999999998"));
+
+  // Positive overflow should throw.
+  VELOX_ASSERT_USER_THROW(
+      (checked_add<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("99999999999999999999999999999999999999"),
+          HugeInt::parse("99999999999999999999999999999999999999"))),
+      "Decimal overflow in add");
+
+  // Positive overflow with large positive and small positive.
+  VELOX_ASSERT_USER_THROW(
+      (checked_add<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("99999999999999999999999999999999999999"),
+          1)),
+      "Decimal overflow in add");
+
+  // Negative overflow should throw.
+  VELOX_ASSERT_USER_THROW(
+      (checked_add<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("-99999999999999999999999999999999999999"),
+          HugeInt::parse("-99999999999999999999999999999999999999"))),
+      "Decimal overflow in add");
+
+  // Negative overflow with large negative and small negative.
+  VELOX_ASSERT_USER_THROW(
+      (checked_add<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("-99999999999999999999999999999999999999"),
+          -1)),
+      "Decimal overflow in add");
+}
+
+TEST_F(DecimalArithmeticTest, checkedSubtract) {
+  // Normal cases should work.
+  // Use DECIMAL(18, 2) so result precision (19) exceeds 18 (long decimal).
+  EXPECT_EQ(
+      (checked_subtract<int64_t, int64_t>(
+          DECIMAL(18, 2), DECIMAL(18, 2), 300, 200)),
+      100);
+  EXPECT_EQ(
+      (checked_subtract<int64_t, int128_t>(
+          DECIMAL(18, 2), DECIMAL(20, 2), 300, 200)),
+      100);
+  EXPECT_EQ(
+      (checked_subtract<int128_t, int64_t>(
+          DECIMAL(20, 2), DECIMAL(18, 2), 300, 200)),
+      100);
+  EXPECT_EQ(
+      (checked_subtract<int128_t, int128_t>(
+          DECIMAL(20, 2), DECIMAL(20, 2), 300, 200)),
+      100);
+
+  // Subtracting zero.
+  EXPECT_EQ(
+      (checked_subtract<int64_t, int64_t>(
+          DECIMAL(18, 2), DECIMAL(18, 2), 100, 0)),
+      100);
+
+  // Subtracting negative (effectively adding).
+  EXPECT_EQ(
+      (checked_subtract<int64_t, int64_t>(
+          DECIMAL(18, 2), DECIMAL(18, 2), 100, -50)),
+      150);
+
+  // Result precision capped at 38, no overflow.
+  EXPECT_EQ(
+      (checked_subtract<int128_t, int128_t>(
+          DECIMAL(38, 0), DECIMAL(38, 0), 300, 200)),
+      100);
+
+  // Near-boundary success: large values through addLarge path, but fits.
+  EXPECT_EQ(
+      (checked_subtract<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("49999999999999999999999999999999999999"),
+          HugeInt::parse("-49999999999999999999999999999999999999"))),
+      HugeInt::parse("99999999999999999999999999999999999998"));
+
+  // Negative overflow should throw.
+  VELOX_ASSERT_USER_THROW(
+      (checked_subtract<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("-99999999999999999999999999999999999999"),
+          HugeInt::parse("99999999999999999999999999999999999999"))),
+      "Decimal overflow in subtract");
+
+  // Negative overflow with large negative and small positive.
+  VELOX_ASSERT_USER_THROW(
+      (checked_subtract<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("-99999999999999999999999999999999999999"),
+          1)),
+      "Decimal overflow in subtract");
+
+  // Positive overflow should throw.
+  VELOX_ASSERT_USER_THROW(
+      (checked_subtract<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("99999999999999999999999999999999999999"),
+          HugeInt::parse("-99999999999999999999999999999999999999"))),
+      "Decimal overflow in subtract");
+
+  // Positive overflow with large positive and small negative.
+  VELOX_ASSERT_USER_THROW(
+      (checked_subtract<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("99999999999999999999999999999999999999"),
+          -1)),
+      "Decimal overflow in subtract");
+}
 } // namespace
 } // namespace facebook::velox::functions::sparksql::test