Merge pull request #733 from cppalliance/faster_dec128_mul

mborland · web-flow · commit 91811d0f6051 · 2024-09-24T18:24:00.000-04:00
Faster dec128 mul
diff --git a/include/boost/decimal/decimal128.hpp b/include/boost/decimal/decimal128.hpp
@@ -1682,15 +1682,9 @@ constexpr auto operator*(decimal128 lhs, decimal128 rhs) noexcept -> decimal128
 
     auto lhs_sig {lhs.full_significand()};
     auto lhs_exp {lhs.biased_exponent()};
-    const auto lhs_zeros {detail::remove_trailing_zeros(lhs_sig)};
-    lhs_sig = lhs_zeros.trimmed_number;
-    lhs_exp += static_cast<std::int32_t>(lhs_zeros.number_of_removed_zeros);
 
     auto rhs_sig {rhs.full_significand()};
     auto rhs_exp {rhs.biased_exponent()};
-    const auto rhs_zeros {detail::remove_trailing_zeros(rhs_sig)};
-    rhs_sig = rhs_zeros.trimmed_number;
-    rhs_exp += static_cast<std::int32_t>(rhs_zeros.number_of_removed_zeros);
 
     return detail::d128_mul_impl<decimal128>(
             lhs_sig, lhs_exp, lhs.isneg(),
diff --git a/include/boost/decimal/decimal128_fast.hpp b/include/boost/decimal/decimal128_fast.hpp
@@ -909,9 +909,21 @@ constexpr auto operator*(decimal128_fast lhs, decimal128_fast rhs) noexcept -> d
     }
     #endif
 
-    return detail::d128_fast_mul_impl<decimal128_fast>(
-            lhs.significand_, lhs.biased_exponent(), lhs.sign_,
-            rhs.significand_, rhs.biased_exponent(), rhs.sign_);
+    auto lhs_sig {lhs.full_significand()};
+    auto lhs_exp {lhs.biased_exponent()};
+    const auto lhs_zeros {detail::remove_trailing_zeros(lhs_sig)};
+    lhs_sig = lhs_zeros.trimmed_number;
+    lhs_exp += static_cast<std::int32_t>(lhs_zeros.number_of_removed_zeros);
+
+    auto rhs_sig {rhs.full_significand()};
+    auto rhs_exp {rhs.biased_exponent()};
+    const auto rhs_zeros {detail::remove_trailing_zeros(rhs_sig)};
+    rhs_sig = rhs_zeros.trimmed_number;
+    rhs_exp += static_cast<std::int32_t>(rhs_zeros.number_of_removed_zeros);
+
+    return detail::d128_mul_impl<decimal128_fast>(
+            lhs_sig, lhs_exp, lhs.sign_,
+            rhs_sig, rhs_exp, rhs.sign_);
 }
 
 template <typename Integer>
diff --git a/include/boost/decimal/detail/mul_impl.hpp b/include/boost/decimal/detail/mul_impl.hpp
@@ -165,28 +165,42 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, boo
     return {res_sig_64, res_exp, sign};
 }
 
-template <typename ReturnType, typename T1, typename T2>
-constexpr auto d128_mul_impl(T1 lhs_sig, std::int32_t lhs_exp, bool lhs_sign,
-                             T2 rhs_sig, std::int32_t rhs_exp, bool rhs_sign) noexcept -> ReturnType
+template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T1, BOOST_DECIMAL_INTEGRAL U1,
+                               BOOST_DECIMAL_INTEGRAL T2, BOOST_DECIMAL_INTEGRAL U2>
+constexpr auto d128_mul_impl(T1 lhs_sig, U1 lhs_exp, bool lhs_sign,
+                             T2 rhs_sig, U2 rhs_exp, bool rhs_sign) noexcept -> ReturnType
 {
     bool sign {lhs_sign != rhs_sign};
 
-    // Once we have the normalized significands and exponents all we have to do is
-    // multiply the significands and add the exponents
-    auto res_sig {detail::umul256(lhs_sig, rhs_sig)};
-    auto res_exp {lhs_exp + rhs_exp};
-
-    const auto sig_dig {detail::num_digits(res_sig)};
+    const auto lhs_dig {detail::num_digits(lhs_sig)};
+    const auto rhs_dig {detail::num_digits(rhs_sig)};
 
-    if (sig_dig > std::numeric_limits<detail::uint128>::digits10)
+    // If we can avoid it don't do 256 bit multiplication because it is slow
+    if (lhs_dig * rhs_dig <= std::numeric_limits<uint128>::digits10)
     {
-        const auto digit_delta {sig_dig - std::numeric_limits<detail::uint128>::digits10};
-        res_sig /= detail::uint256_t(pow10(detail::uint128(digit_delta)));
-        res_exp += digit_delta;
+        auto res_sig {lhs_sig * rhs_sig};
+        auto res_exp {lhs_exp + rhs_exp};
+        return {res_sig, res_exp, sign};
+    }
+    else
+    {
+        // Once we have the normalized significands and exponents all we have to do is
+        // multiply the significands and add the exponents
+        auto res_sig {detail::umul256(lhs_sig, rhs_sig)};
+        auto res_exp {lhs_exp + rhs_exp};
+
+        const auto sig_dig {detail::num_digits(res_sig)};
+
+        if (sig_dig > std::numeric_limits<detail::uint128>::digits10)
+        {
+            const auto digit_delta {sig_dig - std::numeric_limits<detail::uint128>::digits10};
+            res_sig /= detail::uint256_t(pow10(detail::uint128(digit_delta)));
+            res_exp += digit_delta;
+        }
+
+        BOOST_DECIMAL_ASSERT(res_sig.high == uint128(0, 0));
+        return {res_sig.low, res_exp, sign};
     }
-
-    BOOST_DECIMAL_ASSERT(res_sig.high == uint128(0,0));
-    return {res_sig.low, res_exp, sign};
 }
 
 template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T1, BOOST_DECIMAL_INTEGRAL U1,