Merge pull request #584 from cppalliance/d64FMA

mborland · web-flow · commit d901dd4b79f1 · 2024-05-21T19:13:29.000+02:00
diff --git a/include/boost/decimal/decimal128.hpp b/include/boost/decimal/decimal128.hpp
@@ -2414,75 +2414,6 @@ constexpr auto scalbnd128(decimal128 num, int expval) noexcept -> decimal128
     return scalblnd128(num, static_cast<long>(expval));
 }
 
-constexpr auto fmad128(decimal128 x, decimal128 y, decimal128 z) noexcept -> decimal128
-{
-    // First calculate x * y without rounding
-    constexpr decimal128 zero {0, 0};
-
-    const auto res {detail::check_non_finite(x, y)};
-    if (res != zero)
-    {
-        return res;
-    }
-
-    auto sig_lhs {x.full_significand()};
-    auto exp_lhs {x.biased_exponent()};
-    detail::normalize<decimal128>(sig_lhs, exp_lhs);
-
-    auto sig_rhs {y.full_significand()};
-    auto exp_rhs {y.biased_exponent()};
-    detail::normalize<decimal128>(sig_rhs, exp_rhs);
-
-    auto mul_result {d128_mul_impl(sig_lhs, exp_lhs, x.isneg(), sig_rhs, exp_rhs, y.isneg())};
-    const decimal128 dec_result {mul_result.sig, mul_result.exp, mul_result.sign};
-
-    return dec_result + z;
-
-    /*
-    const auto res_add {detail::check_non_finite(dec_result, z)};
-    if (res_add != zero)
-    {
-        return res_add;
-    }
-
-    bool lhs_bigger {dec_result > z};
-    if (dec_result.isneg() && z.isneg())
-    {
-        lhs_bigger = !lhs_bigger;
-    }
-    bool abs_lhs_bigger {abs(dec_result) > abs(z)};
-
-    detail::normalize<decimal128>(mul_result.sig, mul_result.exp);
-
-    auto sig_z {z.full_significand()};
-    auto exp_z {z.biased_exponent()};
-    detail::normalize<decimal128>(sig_z, exp_z);
-    detail::decimal128_components z_components {sig_z, exp_z, z.isneg()};
-
-    if (!lhs_bigger)
-    {
-        detail::swap(mul_result, z_components);
-        abs_lhs_bigger = !abs_lhs_bigger;
-    }
-
-    detail::decimal128_components result {};
-
-    if (!mul_result.sign && z_components.sign)
-    {
-        result = d128_sub_impl(mul_result.sig, mul_result.exp, mul_result.sign,
-                               z_components.sig, z_components.exp, z_components.sign,
-                               abs_lhs_bigger);
-    }
-    else
-    {
-        result = d128_add_impl(mul_result.sig, mul_result.exp, mul_result.sign,
-                               z_components.sig, z_components.exp, z_components.sign);
-    }
-
-    return {result.sig, result.exp, result.sign};
-    */
-}
-
 } //namespace decimal
 } //namespace boost
 
diff --git a/include/boost/decimal/decimal64.hpp b/include/boost/decimal/decimal64.hpp
@@ -2347,71 +2347,6 @@ constexpr auto copysignd64(decimal64 mag, decimal64 sgn) noexcept -> decimal64
     return mag;
 }
 
-constexpr auto fmad64(decimal64 x, decimal64 y, decimal64 z) noexcept -> decimal64
-{
-    // First calculate x * y without rounding
-    constexpr decimal64 zero {0, 0};
-
-    const auto res {detail::check_non_finite(x, y)};
-    if (res != zero)
-    {
-        return res;
-    }
-
-    auto sig_lhs {x.full_significand()};
-    auto exp_lhs {x.biased_exponent()};
-    detail::normalize<decimal64>(sig_lhs, exp_lhs);
-
-    auto sig_rhs {y.full_significand()};
-    auto exp_rhs {y.biased_exponent()};
-    detail::normalize<decimal64>(sig_rhs, exp_rhs);
-
-    auto mul_result {d64_mul_impl(sig_lhs, exp_lhs, x.isneg(), sig_rhs, exp_rhs, y.isneg())};
-    const decimal64 dec_result {mul_result.sig, mul_result.exp, mul_result.sign};
-
-    const auto res_add {detail::check_non_finite(dec_result, z)};
-    if (res_add != zero)
-    {
-        return res_add;
-    }
-
-    bool lhs_bigger {dec_result > z};
-    if (dec_result.isneg() && z.isneg())
-    {
-        lhs_bigger = !lhs_bigger;
-    }
-    bool abs_lhs_bigger {abs(dec_result) > abs(z)};
-
-    detail::normalize<decimal64>(mul_result.sig, mul_result.exp);
-
-    auto sig_z {z.full_significand()};
-    auto exp_z {z.biased_exponent()};
-    detail::normalize<decimal64>(sig_z, exp_z);
-    detail::decimal64_components z_components {sig_z, exp_z, z.isneg()};
-
-    if (!lhs_bigger)
-    {
-        detail::swap(mul_result, z_components);
-        abs_lhs_bigger = !abs_lhs_bigger;
-    }
-
-    detail::decimal64_components result {};
-
-    if (!mul_result.sign && z_components.sign)
-    {
-        result = d64_sub_impl(mul_result.sig, mul_result.exp, mul_result.sign,
-                              z_components.sig, z_components.exp, z_components.sign,
-                              abs_lhs_bigger);
-    }
-    else
-    {
-        result = d64_add_impl(mul_result.sig, mul_result.exp, mul_result.sign,
-                              z_components.sig, z_components.exp, z_components.sign);
-    }
-
-    return {result.sig, result.exp, result.sign};
-}
-
 } //namespace decimal
 } //namespace boost
 
diff --git a/include/boost/decimal/detail/cmath/fma.hpp b/include/boost/decimal/detail/cmath/fma.hpp
@@ -83,6 +83,85 @@ constexpr auto fmad32(decimal32 x, decimal32 y, decimal32 z) noexcept -> decimal
     return {result.sig, result.exp, result.sign};
 }
 
+constexpr auto fmad64(decimal64 x, decimal64 y, decimal64 z) noexcept -> decimal64
+{
+    // First calculate x * y without rounding
+    constexpr decimal64 zero {0, 0};
+
+    const auto res {detail::check_non_finite(x, y)};
+    if (res != zero)
+    {
+        return res;
+    }
+
+    auto sig_lhs {x.full_significand()};
+    auto exp_lhs {x.biased_exponent()};
+    detail::normalize<decimal64>(sig_lhs, exp_lhs);
+
+    auto sig_rhs {y.full_significand()};
+    auto exp_rhs {y.biased_exponent()};
+    detail::normalize<decimal64>(sig_rhs, exp_rhs);
+
+    auto mul_result {d64_mul_impl(sig_lhs, exp_lhs, x.isneg(), sig_rhs, exp_rhs, y.isneg())};
+    const decimal64 dec_result {mul_result.sig, mul_result.exp, mul_result.sign};
+
+    const auto res_add {detail::check_non_finite(dec_result, z)};
+    if (res_add != zero)
+    {
+        return res_add;
+    }
+
+    bool lhs_bigger {dec_result > z};
+    if (dec_result.isneg() && z.isneg())
+    {
+        lhs_bigger = !lhs_bigger;
+    }
+    bool abs_lhs_bigger {abs(dec_result) > abs(z)};
+
+    // To avoid the rounding step we promote the constituent pieces to the next higher type
+    detail::decimal128_components promoted_mul_result {static_cast<detail::uint128>(mul_result.sig),
+                                                       mul_result.exp, mul_result.sign};
+
+    detail::normalize<decimal128>(promoted_mul_result.sig, promoted_mul_result.exp);
+
+    auto sig_z {static_cast<detail::uint128>(z.full_significand())};
+    auto exp_z {z.biased_exponent()};
+    detail::normalize<decimal128>(sig_z, exp_z);
+    detail::decimal128_components z_components {sig_z, exp_z, z.isneg()};
+
+    if (!lhs_bigger)
+    {
+        detail::swap(promoted_mul_result, z_components);
+        abs_lhs_bigger = !abs_lhs_bigger;
+    }
+
+    detail::decimal128_components result {};
+
+    if (!promoted_mul_result.sign && z_components.sign)
+    {
+        result = d128_sub_impl(promoted_mul_result.sig, promoted_mul_result.exp, promoted_mul_result.sign,
+                               z_components.sig, z_components.exp, z_components.sign,
+                               abs_lhs_bigger);
+    }
+    else
+    {
+        result = d128_add_impl(promoted_mul_result.sig, promoted_mul_result.exp, promoted_mul_result.sign,
+                               z_components.sig, z_components.exp, z_components.sign);
+    }
+
+    return {result.sig, result.exp, result.sign};
+}
+
+constexpr auto fmad128(decimal128 x, decimal128 y, decimal128 z) noexcept -> decimal128
+{
+    return x * y + z;
+}
+
+constexpr auto fmad32f(decimal32_fast x, decimal32_fast y, decimal32_fast z) noexcept -> decimal32_fast
+{
+    return x * y + z;
+}
+
 BOOST_DECIMAL_EXPORT constexpr auto fma(decimal32 x, decimal32 y, decimal32 z) noexcept -> decimal32
 {
     return fmad32(x, y, z);
@@ -98,6 +177,11 @@ BOOST_DECIMAL_EXPORT constexpr auto fma(decimal128 x, decimal128 y, decimal128 z
     return fmad128(x, y, z);
 }
 
+BOOST_DECIMAL_EXPORT constexpr auto fma(decimal32_fast x, decimal32_fast y, decimal32_fast z) noexcept -> decimal32_fast
+{
+    return fmad32f(x, y, z);
+}
+
 } //namespace decimal
 } //namespace boost