Merge pull request #770 from cppalliance/32fma

mborland · web-flow · commit ca9fb0ae88e1 · 2025-01-06T13:32:58.000-05:00
Add 32-bit FMA implementation
diff --git a/include/boost/decimal/decimal32.hpp b/include/boost/decimal/decimal32.hpp
@@ -37,6 +37,7 @@
 #include <boost/decimal/detail/mul_impl.hpp>
 #include <boost/decimal/detail/div_impl.hpp>
 #include <boost/decimal/detail/promote_significand.hpp>
+#include <boost/decimal/detail/components.hpp>
 
 #ifndef BOOST_DECIMAL_BUILD_MODULE
 
@@ -121,16 +122,6 @@ BOOST_DECIMAL_CONSTEXPR_VARIABLE std::uint32_t d32_big_combination_field_mask =
 //BOOST_DECIMAL_CONSTEXPR_VARIABLE std::uint32_t d32_construct_exp_mask = UINT32_C(0b0'00000'111111'0000000000'0000000000);
 //BOOST_DECIMAL_CONSTEXPR_VARIABLE std::uint32_t d32_construct_significand_mask = d32_no_combination;
 
-struct decimal32_components
-{
-    using significand_type = std::uint32_t;
-    using biased_exponent_type = std::int32_t;
-
-    significand_type sig;
-    biased_exponent_type exp;
-    bool sign;
-};
-
 } // namespace detail
 
 #if defined(__GNUC__) && __GNUC__ >= 8
diff --git a/include/boost/decimal/decimal32_fast.hpp b/include/boost/decimal/decimal32_fast.hpp
@@ -31,16 +31,6 @@ BOOST_DECIMAL_CONSTEXPR_VARIABLE auto d32_fast_inf = std::numeric_limits<std::ui
 BOOST_DECIMAL_CONSTEXPR_VARIABLE auto d32_fast_qnan = std::numeric_limits<std::uint_fast32_t>::max() - 1;
 BOOST_DECIMAL_CONSTEXPR_VARIABLE auto d32_fast_snan = std::numeric_limits<std::uint_fast32_t>::max() - 2;
 
-struct decimal32_fast_components
-{
-    using significand_type = std::uint_fast32_t;
-    using biased_exponent_type = std::int_fast32_t;
-
-    significand_type sig;
-    biased_exponent_type exp;
-    bool sign;
-};
-
 }
 
 BOOST_DECIMAL_EXPORT class decimal32_fast final
diff --git a/include/boost/decimal/detail/cmath/fma.hpp b/include/boost/decimal/detail/cmath/fma.hpp
@@ -9,14 +9,98 @@
 #include <boost/decimal/decimal32_fast.hpp>
 #include <boost/decimal/decimal64.hpp>
 #include <boost/decimal/decimal128.hpp>
+#include <boost/decimal/decimal128_fast.hpp>
 #include <boost/decimal/detail/config.hpp>
 
 namespace boost {
 namespace decimal {
 
+namespace detail {
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4127)
+#endif
+
+template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE Dec>
+using components_type = std::conditional_t<std::is_same<Dec, decimal32>::value, decimal32_components,
+                        std::conditional_t<std::is_same<Dec, decimal32_fast>::value, decimal32_fast_components,
+                        std::conditional_t<std::is_same<Dec, decimal64>::value, decimal64_components,
+                        std::conditional_t<std::is_same<Dec, decimal64_fast>::value, decimal64_fast_components,
+                        std::conditional_t<std::is_same<Dec, decimal128>::value, decimal128_components, decimal128_fast_components
+                        >>>>>;
+
+template <bool checked, BOOST_DECIMAL_DECIMAL_FLOATING_TYPE T>
+constexpr auto d32_fma_impl(T x, T y, T z) noexcept -> T
+{
+    using T_components_type = components_type<T>;
+    using exp_type = typename T::biased_exponent_type;
+
+    // Apply the add
+    #ifndef BOOST_DECIMAL_FAST_MATH
+    BOOST_DECIMAL_IF_CONSTEXPR (checked)
+    {
+        if (!isfinite(x) || !isfinite(y))
+        {
+            return detail::check_non_finite(x, y);
+        }
+    }
+    #endif
+
+    int exp_lhs {};
+    auto sig_lhs = frexp10(x, &exp_lhs);
+
+    int exp_rhs {};
+    auto sig_rhs = frexp10(y, &exp_rhs);
+
+    auto first_res = detail::mul_impl<T_components_type>(sig_lhs, static_cast<exp_type>(exp_lhs), x < 0,
+                                                         sig_rhs, static_cast<exp_type>(exp_rhs), y < 0);
+
+    // Apply the mul on the carried components
+    // We still create the result as a decimal type to check for non-finite values and comparisons,
+    // but we do not use it for the resultant calculation
+    const T complete_lhs {first_res.sig, first_res.exp, first_res.sign};
+
+    #ifndef BOOST_DECIMAL_FAST_MATH
+    BOOST_DECIMAL_IF_CONSTEXPR (checked)
+    {
+        if (!isfinite(complete_lhs) || !isfinite(z))
+        {
+            return detail::check_non_finite(complete_lhs, z);
+        }
+    }
+    #endif
+
+    const bool abs_lhs_bigger {abs(complete_lhs) > abs(z)};
+
+    int exp_z {};
+    auto sig_z = frexp10(z, &exp_z);
+    detail::normalize<T>(first_res.sig, first_res.exp);
+
+    return detail::d32_add_impl<T>(first_res.sig, first_res.exp, first_res.sign,
+                                   sig_z, static_cast<exp_type>(exp_z), z < 0,
+                                   abs_lhs_bigger);
+}
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+constexpr auto unchecked_fma(decimal32 x, decimal32 y, decimal32 z) noexcept -> decimal32
+{
+    return detail::d32_fma_impl<false>(x, y, z);
+}
+
+constexpr auto unchecked_fma(decimal32_fast x, decimal32_fast y, decimal32_fast z) noexcept -> decimal32_fast
+{
+    return detail::d32_fma_impl<false>(x, y, z);
+}
+
+} // Namespace detail
+
 BOOST_DECIMAL_EXPORT constexpr auto fma(decimal32 x, decimal32 y, decimal32 z) noexcept -> decimal32
 {
-    return x * y + z;
+    return detail::d32_fma_impl<true>(x, y, z);
 }
 
 BOOST_DECIMAL_EXPORT constexpr auto fma(decimal64 x, decimal64 y, decimal64 z) noexcept -> decimal64
@@ -31,7 +115,7 @@ BOOST_DECIMAL_EXPORT constexpr auto fma(decimal128 x, decimal128 y, decimal128 z
 
 BOOST_DECIMAL_EXPORT constexpr auto fma(decimal32_fast x, decimal32_fast y, decimal32_fast z) noexcept -> decimal32_fast
 {
-    return x * y + z;
+    return detail::d32_fma_impl<true>(x, y, z);
 }
 
 BOOST_DECIMAL_EXPORT constexpr auto fma(decimal64_fast x, decimal64_fast y, decimal64_fast z) noexcept -> decimal64_fast
diff --git a/include/boost/decimal/detail/components.hpp b/include/boost/decimal/detail/components.hpp
@@ -0,0 +1,42 @@
+// Copyright 2025 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#ifndef BOOST_DECIMAL_DETAIL_COMPONENTS_HPP
+#define BOOST_DECIMAL_DETAIL_COMPONENTS_HPP
+
+#include <boost/decimal/detail/config.hpp>
+
+#ifndef BOOST_DECIMAL_BUILD_MODULE
+#include <cstdint>
+#endif
+
+namespace boost {
+namespace decimal {
+namespace detail {
+
+struct decimal32_components
+{
+    using significand_type = std::uint32_t;
+    using biased_exponent_type = std::int32_t;
+
+    significand_type sig;
+    biased_exponent_type exp;
+    bool sign;
+};
+
+struct decimal32_fast_components
+{
+    using significand_type = std::uint_fast32_t;
+    using biased_exponent_type = std::int_fast32_t;
+
+    significand_type sig;
+    biased_exponent_type exp;
+    bool sign;
+};
+
+} // namespace detail
+} // namespace decimal
+} // namespace boost
+
+#endif // BOOST_DECIMAL_DETAIL_COMPONENTS_HPP
diff --git a/include/boost/decimal/detail/mul_impl.hpp b/include/boost/decimal/detail/mul_impl.hpp
@@ -11,6 +11,7 @@
 #include <boost/decimal/detail/emulated128.hpp>
 #include <boost/decimal/detail/emulated256.hpp>
 #include <boost/decimal/detail/power_tables.hpp>
+#include <boost/decimal/detail/components.hpp>
 
 #ifndef BOOST_DECIMAL_BUILD_MODULE
 #include <cstdint>
@@ -24,21 +25,10 @@ namespace detail {
 // 1) Returns a decimal type and lets the constructor handle with shrinking the significand
 // 2) Returns a struct of the constituent components (used with FMAs)
 
-template <typename ReturnType, typename T, typename U>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                   T rhs_sig, U rhs_exp, bool rhs_sign) noexcept -> std::enable_if_t<std::is_same<ReturnType, decimal32_fast>::value, ReturnType>
-{
-    using mul_type = std::uint_fast64_t;
-
-    const auto res_sig {static_cast<mul_type>(lhs_sig) * static_cast<mul_type>(rhs_sig)};
-    const auto res_exp {lhs_exp + rhs_exp};
-
-    return {res_sig, res_exp, lhs_sign != rhs_sign};
-}
 
 template <typename ReturnType, typename T, typename U>
 BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                   T rhs_sig, U rhs_exp, bool rhs_sign) noexcept -> std::enable_if_t<std::is_same<ReturnType, decimal32>::value, ReturnType>
+                                                   T rhs_sig, U rhs_exp, bool rhs_sign) noexcept -> ReturnType
 {
     using mul_type = std::uint_fast64_t;
 
@@ -54,47 +44,6 @@ BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lh
     return {static_cast<std::uint32_t>(res_sig), res_exp, lhs_sign != rhs_sign};
 }
 
-template <typename ReturnType, typename T, typename U>
-BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
-                                                   T rhs_sig, U rhs_exp, bool rhs_sign) noexcept -> std::enable_if_t<!detail::is_decimal_floating_point_v<ReturnType>, ReturnType>
-{
-    using mul_type = std::uint_fast64_t;
-
-    #ifdef BOOST_DECIMAL_DEBUG
-    std::cerr << "sig lhs: " << sig_lhs
-              << "\nexp lhs: " << exp_lhs
-              << "\nsig rhs: " << sig_rhs
-              << "\nexp rhs: " << exp_rhs;
-    #endif
-
-    bool sign {lhs_sign != rhs_sign};
-
-    // Once we have the normalized significands and exponents all we have to do is
-    // multiply the significands and add the exponents
-    //
-    // We use a 64 bit resultant significand because the two 23-bit unsigned significands will always fit
-
-    auto res_sig {static_cast<mul_type>(lhs_sig) * static_cast<mul_type>(rhs_sig)};
-    auto res_exp {lhs_exp + rhs_exp};
-
-    // We don't need to use the regular binary search tree detail::num_digits(res_sig)
-    // because we know that res_sig must be [1'000'000^2, 9'999'999^2] which only differ by one order
-    // of magnitude in their number of digits
-    const auto sig_dig {res_sig >= UINT64_C(10000000000000) ? 14 : 13};
-    constexpr auto max_dig {std::numeric_limits<typename ReturnType::significand_type>::digits10};
-    res_sig /= detail::pow10(static_cast<mul_type>(sig_dig - max_dig));
-    res_exp += sig_dig - max_dig;
-
-    const auto res_sig_32 {static_cast<typename ReturnType::significand_type>(res_sig)};
-
-    #ifdef BOOST_DECIMAL_DEBUG
-    std::cerr << "\nres sig: " << res_sig_32
-              << "\nres exp: " << res_exp << std::endl;
-    #endif
-
-    return {res_sig_32, res_exp, sign};
-}
-
 template <typename ReturnType, BOOST_DECIMAL_INTEGRAL T, BOOST_DECIMAL_INTEGRAL U>
 BOOST_DECIMAL_FORCE_INLINE constexpr auto d64_mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,
                                                        T rhs_sig, U rhs_exp, bool rhs_sign) noexcept
diff --git a/test/test_big_uints.cpp b/test/test_big_uints.cpp
@@ -30,6 +30,11 @@ int main()
 #  pragma clang diagnostic ignored "-Wsign-conversion"
 #  pragma clang diagnostic ignored "-Wfloat-equal"
 #  pragma clang diagnostic ignored "-Wdeprecated-declarations"
+
+#  if __clang_major__ >= 20
+#    pragma clang diagnostic ignored "-Wdeprecated-literal-operator"
+#  endif
+
 #elif defined(__GNUC__)
 #  pragma GCC diagnostic push
 #  pragma GCC diagnostic ignored "-Wold-style-cast"