feat: Add checked_multiply for Spark decimal arithmetic

n0r0shi · n0r0shi · commit 0118b9e7c3ef · 2026-02-14T05:23:03.000Z
Add checked decimal multiply function that throws on overflow instead of
returning null. This is needed for Spark's ANSI mode where arithmetic
overflow should raise an error rather than silently produce null.
diff --git a/velox/docs/functions/spark/decimal.rst b/velox/docs/functions/spark/decimal.rst
@@ -143,6 +143,23 @@ Arithmetic Functions
     Division by zero or overflow results in an error.
     Corresponds to Spark's operator ``div`` with ``spark.sql.ansi.enabled`` set to true.
 
+.. spark:function:: multiply(x: decimal(p1, s1), y: decimal(p2, s2)) -> r: decimal(p3, s3)
+
+    Returns the result of multiplying ``x`` and ``y``. The result type is determined
+    by the precision and scale computation rules described above.
+    Returns NULL when the result overflows.
+    Corresponds to Spark's operator ``*`` with ``spark.sql.ansi.enabled`` set to false.  ::
+
+        SELECT CAST(1.1 as DECIMAL(3, 1)) * CAST(2.0 as DECIMAL(3, 1)); -- 2.20
+        SELECT CAST('99999999999999999999999999999999999999' as DECIMAL(38, 0)) * CAST(10 as DECIMAL(38, 0)); -- NULL
+
+.. spark:function:: checked_multiply(x: decimal(p1, s1), y: decimal(p2, s2)) -> r: decimal(p3, s3)
+
+    Returns the result of multiplying ``x`` and ``y``. The result type is determined
+    by the precision and scale computation rules described above.
+    Throws an error when the result overflows.
+    Corresponds to Spark's operator ``*`` with ``spark.sql.ansi.enabled`` set to true.
+
 Decimal Functions
 -----------------
 .. spark:function:: ceil(x: decimal(p, s)) -> r: decimal(pr, 0)
diff --git a/velox/functions/sparksql/DecimalArithmetic.cpp b/velox/functions/sparksql/DecimalArithmetic.cpp
@@ -442,6 +442,21 @@ struct DecimalMultiplyFunction {
   int32_t deltaScale_;
 };
 
+// Decimal multiply function that returns error on overflow.
+template <typename TExec, bool allowPrecisionLoss>
+struct CheckedDecimalMultiplyFunction
+    : DecimalMultiplyFunction<TExec, allowPrecisionLoss> {
+  VELOX_DEFINE_FUNCTION_TYPES(TExec);
+
+  template <typename R, typename A, typename B>
+  Status call(R& out, const A& a, const B& b) {
+    bool valid = DecimalMultiplyFunction<TExec, allowPrecisionLoss>::
+        template call<R, A, B>(out, a, b);
+    VELOX_USER_RETURN(!valid, "Decimal overflow in multiply");
+    return Status::OK();
+  }
+};
+
 template <typename TExec, bool allowPrecisionLoss>
 struct DecimalDivideFunction {
   VELOX_DEFINE_FUNCTION_TYPES(TExec);
@@ -686,6 +701,14 @@ using DivideFunctionAllowPrecisionLoss = DecimalDivideFunction<TExec, true>;
 template <typename TExec>
 using DivideFunctionDenyPrecisionLoss = DecimalDivideFunction<TExec, false>;
 
+template <typename TExec>
+using CheckedMultiplyFunctionAllowPrecisionLoss =
+    CheckedDecimalMultiplyFunction<TExec, true>;
+
+template <typename TExec>
+using CheckedMultiplyFunctionDenyPrecisionLoss =
+    CheckedDecimalMultiplyFunction<TExec, false>;
+
 std::vector<exec::SignatureVariable> getDivideConstraintsDenyPrecisionLoss() {
   std::string wholeDigits = fmt::format(
       "min(38, {a_precision} - {a_scale} + {b_scale})",
@@ -806,6 +829,11 @@ void registerDecimalMultiply(const std::string& prefix) {
   registerDecimalBinary<MultiplyFunctionDenyPrecisionLoss>(
       prefix + "multiply" + kDenyPrecisionLoss,
       makeConstraints(rPrecision, rScale, false));
+  registerDecimalBinary<CheckedMultiplyFunctionAllowPrecisionLoss>(
+      prefix + "checked_multiply", makeConstraints(rPrecision, rScale, true));
+  registerDecimalBinary<CheckedMultiplyFunctionDenyPrecisionLoss>(
+      prefix + "checked_multiply" + kDenyPrecisionLoss,
+      makeConstraints(rPrecision, rScale, false));
 }
 
 void registerDecimalDivide(const std::string& prefix) {
diff --git a/velox/functions/sparksql/tests/DecimalArithmeticTest.cpp b/velox/functions/sparksql/tests/DecimalArithmeticTest.cpp
@@ -88,6 +88,16 @@ class DecimalArithmeticTest : public SparkFunctionBaseTest {
       std::optional<U> u) {
     return evaluateOnce<int64_t>("checked_div(c0, c1)", {tType, uType}, t, u);
   }
+
+  template <typename T, typename U>
+  std::optional<int128_t> checked_multiply(
+      const TypePtr& tType,
+      const TypePtr& uType,
+      std::optional<T> t,
+      std::optional<U> u) {
+    return evaluateOnce<int128_t>(
+        "checked_multiply(c0, c1)", {tType, uType}, t, u);
+  }
 };
 
 TEST_F(DecimalArithmeticTest, add) {
@@ -833,5 +843,83 @@ TEST_F(DecimalArithmeticTest, checkedDiv) {
           1)),
       "Overflow in integral divide");
 }
+
+TEST_F(DecimalArithmeticTest, checkedMultiply) {
+  // Normal cases: DECIMAL(17,3) * DECIMAL(17,3) -> result precision 35 (long).
+  // 1.000 * 2.000 = 2.000000 (unscaled: 1000 * 2000 = 2000000).
+  EXPECT_EQ(
+      (checked_multiply<int64_t, int64_t>(
+          DECIMAL(17, 3), DECIMAL(17, 3), 1000, 2000)),
+      2000000);
+  EXPECT_EQ(
+      (checked_multiply<int64_t, int128_t>(
+          DECIMAL(17, 3), DECIMAL(20, 3), 1000, 2000)),
+      2000000);
+  EXPECT_EQ(
+      (checked_multiply<int128_t, int64_t>(
+          DECIMAL(20, 3), DECIMAL(17, 3), 1000, 2000)),
+      2000000);
+  EXPECT_EQ(
+      (checked_multiply<int128_t, int128_t>(
+          DECIMAL(20, 3), DECIMAL(20, 3), 1000, 2000)),
+      2000000);
+
+  // Multiplying by zero.
+  EXPECT_EQ(
+      (checked_multiply<int64_t, int64_t>(
+          DECIMAL(17, 3), DECIMAL(17, 3), 0, 2000)),
+      0);
+
+  // Multiplying negative numbers: (-1.000) * 2.000 = -2.000000.
+  EXPECT_EQ(
+      (checked_multiply<int64_t, int64_t>(
+          DECIMAL(17, 3), DECIMAL(17, 3), -1000, 2000)),
+      -2000000);
+
+  // Result precision capped at 38, no overflow (small values).
+  // DECIMAL(38,0) * DECIMAL(38,0) -> result precision capped at 38, scale 0.
+  EXPECT_EQ(
+      (checked_multiply<int128_t, int128_t>(
+          DECIMAL(38, 0), DECIMAL(38, 0), 100, 200)),
+      20000);
+
+  // Near-boundary success: large values that just fit.
+  // 1e18 * 1e19 = 1e37, which fits in DECIMAL(38,0).
+  EXPECT_EQ(
+      (checked_multiply<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("1000000000000000000"),
+          HugeInt::parse("10000000000000000000"))),
+      HugeInt::parse("10000000000000000000000000000000000000"));
+
+  // Positive overflow should throw.
+  // 1e19 * 1e19 = 1e38, which exceeds max DECIMAL(38,0).
+  VELOX_ASSERT_USER_THROW(
+      (checked_multiply<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("10000000000000000000"),
+          HugeInt::parse("10000000000000000000"))),
+      "Decimal overflow in multiply");
+
+  // Negative overflow should throw (positive * negative -> overflow).
+  VELOX_ASSERT_USER_THROW(
+      (checked_multiply<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("10000000000000000000"),
+          HugeInt::parse("-10000000000000000000"))),
+      "Decimal overflow in multiply");
+
+  // Negative * negative overflow should throw (result is positive but too large).
+  VELOX_ASSERT_USER_THROW(
+      (checked_multiply<int128_t, int128_t>(
+          DECIMAL(38, 0),
+          DECIMAL(38, 0),
+          HugeInt::parse("-10000000000000000000"),
+          HugeInt::parse("-10000000000000000000"))),
+      "Decimal overflow in multiply");
+}
 } // namespace
 } // namespace facebook::velox::functions::sparksql::test