Merge pull request #527 from cppalliance/cosh128

ckormanyos · web-flow · commit de795d489c60 · 2024-05-04T15:14:48.000+02:00
Fix #522 via impl of cosh/exp at 128 bits and a few tests
diff --git a/include/boost/decimal/detail/cmath/cosh.hpp b/include/boost/decimal/detail/cmath/cosh.hpp
@@ -7,9 +7,10 @@
 #define BOOST_DECIMAL_DETAIL_CMATH_COSH_HPP
 
 #include <boost/decimal/fwd.hpp> // NOLINT(llvm-include-order)
-#include <boost/decimal/detail/type_traits.hpp>
+#include <boost/decimal/detail/cmath/impl/cosh_impl.hpp>
 #include <boost/decimal/detail/concepts.hpp>
 #include <boost/decimal/detail/config.hpp>
+#include <boost/decimal/detail/type_traits.hpp>
 #include <boost/decimal/numbers.hpp>
 
 #ifndef BOOST_DECIMAL_BUILD_MODULE
@@ -58,48 +59,9 @@ constexpr auto cosh_impl(T x) noexcept
         {
             if (x < one)
             {
-                using coefficient_array_type = std::array<T, static_cast<std::size_t>(UINT8_C(9))>;
-
-                #if (defined(__clang__) && (__clang__ < 6))
-                #  pragma clang diagnostic push
-                #  pragma clang diagnostic ignored "-Wmissing-braces"
-                #endif
-
-                constexpr auto coefficient_table =
-                    coefficient_array_type
-                    {
-                        // Series[Cosh[x], {x, 0, 18}]
-                        //            (1),                             // * 1
-                        T { 5, -1 },                                   // * x^2
-                        T { UINT64_C(416666666666666667), - 18 -  1 }, // * x^4
-                        T { UINT64_C(138888888888888889), - 18 -  2 }, // * x^6
-                        T { UINT64_C(248015873015873016), - 18 -  4 }, // * x^8
-                        T { UINT64_C(275573192239858907), - 18 -  6 }, // * x^10
-                        T { UINT64_C(208767569878680990), - 18 -  8 }, // * x^12
-                        T { UINT64_C(114707455977297247), - 18 - 10 }, // * x^14
-                        T { UINT64_C(477947733238738530), - 18 - 13 }, // * x^16
-                        T { UINT64_C(156192069685862265), - 18 - 15 }  // * x^18
-                    };
-
-                #if (defined(__clang__) && (__clang__ < 6))
-                #  pragma clang diagnostic pop
-                #endif
-
-                auto rit =
-                    coefficient_table.crbegin()
-                  + static_cast<std::size_t>
-                    (
-                      (sizeof(T) == static_cast<std::size_t>(UINT8_C(4))) ? 4U : 0U
-                    );
-
-                result = *rit;
-
                 const auto xsq = x * x;
 
-                while(rit != coefficient_table.crend())
-                {
-                    result = fma(result, xsq, *rit++);
-                }
+                result = detail::cosh_series_expansion(xsq);
 
                 result = fma(result, xsq, one);
             }
diff --git a/include/boost/decimal/detail/cmath/exp.hpp b/include/boost/decimal/detail/cmath/exp.hpp
@@ -7,6 +7,7 @@
 #define BOOST_DECIMAL_DETAIL_CMATH_EXP_HPP
 
 #include <boost/decimal/fwd.hpp> // NOLINT(llvm-include-order)
+#include <boost/decimal/detail/cmath/impl/exp_impl.hpp>
 #include <boost/decimal/detail/cmath/impl/pow_impl.hpp>
 #include <boost/decimal/detail/type_traits.hpp>
 #include <boost/decimal/detail/concepts.hpp>
@@ -75,18 +76,7 @@ constexpr auto exp_impl(T x) noexcept
                 x -= numbers::ln2_v<T> * nf2;
             }
 
-            // PadeApproximant[Exp[x] - 1, {x, 0, {6, 6}}]
-            // FullSimplify[%]
-            //   (84 x (7920 + 240 x^2 + x^4))
-            // / (665280 + x (-332640 + x (75600 + x (-10080 + x (840 + (-42 + x) x)))))
-
-            const auto x2 = x * x;
-
-            // Use the small-argument Pade approximation having coefficients shown above.
-            const T top = T { UINT8_C(84), 0 } * x * ( T { UINT16_C(7920), 0 } + ( T { UINT8_C(240), 0 } + x2) * x2);
-            const T bot = T { UINT32_C(665280), 0 } + x * (T { INT32_C(-332640), 0 } + x * (T { UINT32_C(75600), 0 } + x * (T { INT16_C(-10080), 0 } + x * (T { UINT16_C(840), 0 } + (T { INT8_C(-42), 0 } + x) * x))));
-
-            result = one + (top / bot);
+            result = detail::exp_pade_appxroximant(x);
 
             if (nf2 > 0)
             {
diff --git a/include/boost/decimal/detail/cmath/impl/cosh_impl.hpp b/include/boost/decimal/detail/cmath/impl/cosh_impl.hpp
@@ -0,0 +1,126 @@
+// Copyright 2024 Matt Borland
+// Copyright 2024 Christopher Kormanyos
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#ifndef BOOST_DECIMAL_DETAIL_CMATH_IMPL_COSH_IMPL_HPP
+#define BOOST_DECIMAL_DETAIL_CMATH_IMPL_COSH_IMPL_HPP
+
+#include <boost/decimal/detail/concepts.hpp>
+#include <boost/decimal/detail/cmath/impl/taylor_series_result.hpp>
+
+#ifndef BOOST_DECIMAL_BUILD_MODULE
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#endif
+
+namespace boost {
+namespace decimal {
+namespace detail {
+
+namespace cosh_detail {
+
+template <bool b>
+struct cosh_table_imp
+{
+private:
+    using d32_coeffs_t  = std::array<decimal32,   6>;
+    using d64_coeffs_t  = std::array<decimal64,   9>;
+    using d128_coeffs_t = std::array<decimal128, 17>;
+
+public:
+    static constexpr d32_coeffs_t d32_coeffs =
+    {{
+         // Series[Cosh[x], {x, 0, 10}]
+         //            (1),                                                        // * 1
+         ::boost::decimal::decimal32 { 5, -1 },                                    // * x^2
+         ::boost::decimal::decimal32 { UINT64_C(4166666666666666667), - 19 -  1 }, // * x^6
+         ::boost::decimal::decimal32 { UINT64_C(1388888888888888889), - 19 -  2 }, // * x^8
+         ::boost::decimal::decimal32 { UINT64_C(2480158730158730159), - 19 -  4 }, // * x^10
+         ::boost::decimal::decimal32 { UINT64_C(2755731922398589065), - 19 -  6 }, // * x^12
+         ::boost::decimal::decimal32 { UINT64_C(2087675698786809898), - 19 -  8 }, // * x^12
+    }};
+
+    static constexpr d64_coeffs_t d64_coeffs =
+    {{
+         // Series[Cosh[x], {x, 0, 18}]
+         //            (1),                                                        // * 1
+         ::boost::decimal::decimal64 { 5, -1 },                                    // * x^2
+         ::boost::decimal::decimal64 { UINT64_C(4166666666666666667), - 19 -  1 }, // * x^4
+         ::boost::decimal::decimal64 { UINT64_C(1388888888888888889), - 19 -  2 }, // * x^6
+         ::boost::decimal::decimal64 { UINT64_C(2480158730158730159), - 19 -  4 }, // * x^8
+         ::boost::decimal::decimal64 { UINT64_C(2755731922398589065), - 19 -  6 }, // * x^10
+         ::boost::decimal::decimal64 { UINT64_C(2087675698786809898), - 19 -  8 }, // * x^12
+         ::boost::decimal::decimal64 { UINT64_C(1147074559772972471), - 19 - 10 }, // * x^14
+         ::boost::decimal::decimal64 { UINT64_C(4779477332387385297), - 19 - 13 }, // * x^16
+         ::boost::decimal::decimal64 { UINT64_C(1561920696858622646), - 19 - 15 }  // * x^18
+     }};
+
+    static constexpr d128_coeffs_t d128_coeffs =
+    {{
+         // Series[Cosh[x], {x, 0, 34}]
+         //            (1),                                                                                                                   // * 1
+         ::boost::decimal::decimal128 { 5, -1 },                                                                                              // * x^2
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(225875452601146), UINT64_C(13965751134118914724) }, -35 }, // * x^4
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(75291817533715),  UINT64_C(10804165069276155440) }, -36 }, // * x^6
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(134449674167349), UINT64_C(4799281565792772746)  }, -38 }, // * x^8
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(149388526852610), UINT64_C(5332535073103080820)  }, -40 }, // * x^10
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(113173126403492), UINT64_C(11865690723015477068) }, -42 }, // * x^12
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(62183036485435),  UINT64_C(9560282387433155251)  }, -44 }, // * x^14
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(259095985355981), UINT64_C(6015479145837302244)  }, -47 }, // * x^16
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(84671890639209),  UINT64_C(10767230553416093986) }, -49 }, // * x^18
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(222820764840025), UINT64_C(4062785569898205740)  }, -52 }, // * x^20
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(482296027792262), UINT64_C(7037075391028107068)  }, -55 }, // * x^22
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(87372468802946),  UINT64_C(1542176615384940434)  }, -57 }, // * x^24
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(134419182773763), UINT64_C(3791559721646796942)  }, -60 }, // * x^26
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(177803151817147), UINT64_C(1794430560736952558)  }, -63 }, // * x^28
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(204371438870284), UINT64_C(366311534299067156)   }, -66 }, // * x^30
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(206019595635366), UINT64_C(17625897212400736954) }, -69 }, // * x^32
+         ::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(183618177928134), UINT64_C(9987905770721758456)  }, -72 }, // * x^34
+    }};
+};
+
+#if !(defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L) && (!defined(_MSC_VER) || _MSC_VER != 1900)
+
+template <bool b>
+constexpr typename cosh_table_imp<b>::d32_coeffs_t cosh_table_imp<b>::d32_coeffs;
+
+template <bool b>
+constexpr typename cosh_table_imp<b>::d64_coeffs_t cosh_table_imp<b>::d64_coeffs;
+
+template <bool b>
+constexpr typename cosh_table_imp<b>::d128_coeffs_t cosh_table_imp<b>::d128_coeffs;
+
+#endif
+
+} //namespace cosh_detail
+
+using cosh_table = cosh_detail::cosh_table_imp<true>;
+
+template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE T>
+constexpr auto cosh_series_expansion(T z) noexcept;
+
+template <>
+constexpr auto cosh_series_expansion<decimal32>(decimal32 z2) noexcept
+{
+    return taylor_series_result(z2, cosh_table::d32_coeffs);
+}
+
+template <>
+constexpr auto cosh_series_expansion<decimal64>(decimal64 z2) noexcept
+{
+    return taylor_series_result(z2, cosh_table::d64_coeffs);
+}
+
+template <>
+constexpr auto cosh_series_expansion<decimal128>(decimal128 z2) noexcept
+{
+    return taylor_series_result(z2, cosh_table::d128_coeffs);
+}
+
+} //namespace detail
+} //namespace decimal
+} //namespace boost
+
+#endif //BOOST_DECIMAL_DETAIL_CMATH_IMPL_COSH_IMPL_HPP
diff --git a/include/boost/decimal/detail/cmath/impl/exp_impl.hpp b/include/boost/decimal/detail/cmath/impl/exp_impl.hpp
@@ -0,0 +1,115 @@
+// Copyright 2024 Matt Borland
+// Copyright 2024 Christopher Kormanyos
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#ifndef BOOST_DECIMAL_DETAIL_CMATH_IMPL_EXP_IMPL_HPP
+#define BOOST_DECIMAL_DETAIL_CMATH_IMPL_EXP_IMPL_HPP
+
+#include <boost/decimal/detail/concepts.hpp>
+
+#ifndef BOOST_DECIMAL_BUILD_MODULE
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#endif
+
+namespace boost {
+namespace decimal {
+namespace detail {
+
+template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE T>
+constexpr auto exp_pade_appxroximant(T x) noexcept;
+
+template <>
+constexpr auto exp_pade_appxroximant<decimal32>(decimal32 x) noexcept
+{
+    // TODO: Chris: At 32-bit, reduce the number of coefficients in the Pade appxorimant of the exp() function.
+
+    using local_float_t = decimal32;
+
+    // PadeApproximant[Exp[x] - 1, {x, 0, {6, 6}}]
+    // FullSimplify[%]
+    //   (84 x (7920 + 240 x^2 + x^4))
+    // / (665280 + x (-332640 + x (75600 + x (-10080 + x (840 + (-42 + x) x)))))
+
+    const auto x2 = x * x;
+
+    // Use the small-argument Pade approximation having coefficients shown above.
+    const local_float_t top = local_float_t { UINT8_C(84), 0 } * x * ( local_float_t { UINT16_C(7920), 0 } + ( local_float_t { UINT8_C(240), 0 } + x2) * x2);
+    const local_float_t bot = local_float_t { UINT32_C(665280), 0 } + x * (local_float_t { INT32_C(-332640), 0 } + x * (local_float_t { UINT32_C(75600), 0 } + x * (local_float_t { INT16_C(-10080), 0 } + x * (local_float_t { UINT16_C(840), 0 } + (local_float_t { INT8_C(-42), 0 } + x) * x))));
+
+    return local_float_t { 1 } + (top / bot);
+}
+
+template <>
+constexpr auto exp_pade_appxroximant<decimal64>(decimal64 x) noexcept
+{
+    using local_float_t = decimal64;
+
+    // PadeApproximant[Exp[x] - 1, {x, 0, {6, 6}}]
+    // FullSimplify[%]
+    //   (84 x (7920 + 240 x^2 + x^4))
+    // / (665280 + x (-332640 + x (75600 + x (-10080 + x (840 + (-42 + x) x)))))
+
+    const auto x2 = x * x;
+
+    // Use the small-argument Pade approximation having coefficients shown above.
+    const local_float_t top = local_float_t { UINT8_C(84), 0 } * x * ( local_float_t { UINT16_C(7920), 0 } + ( local_float_t { UINT8_C(240), 0 } + x2) * x2);
+    const local_float_t bot = local_float_t { UINT32_C(665280), 0 } + x * (local_float_t { INT32_C(-332640), 0 } + x * (local_float_t { UINT32_C(75600), 0 } + x * (local_float_t { INT16_C(-10080), 0 } + x * (local_float_t { UINT16_C(840), 0 } + (local_float_t { INT8_C(-42), 0 } + x) * x))));
+
+    return local_float_t { 1 } + (top / bot);
+}
+
+template <>
+constexpr auto exp_pade_appxroximant<decimal128>(decimal128 x) noexcept
+{
+    // Compute exp(x) - 1 for x small.
+
+    // Use an order-12 Pade approximation of the exponential function.
+    // PadeApproximant[Exp[x] - 1, {x, 0, 12, 12}].
+
+    using local_float_t = decimal128;
+
+    // Rescale the argument even further (and note the three squarings below).
+    x /= 8;
+
+    const local_float_t x2 = (x * x);
+
+    const local_float_t top = (((((  local_float_t { boost::decimal::detail::uint128 { UINT64_C(130576843339991), UINT64_C(2348781707059460614)  }, -46 }   * x2
+                                   + local_float_t { boost::decimal::detail::uint128 { UINT64_C(502720846858965), UINT64_C(15499169997977266440) }, -43 } ) * x2
+                                   + local_float_t { boost::decimal::detail::uint128 { UINT64_C(492264253244299), UINT64_C(6469924059228430936)  }, -40 } ) * x2
+                                   + local_float_t { boost::decimal::detail::uint128 { UINT64_C(168354374609550), UINT64_C(6971973999273187690)  }, -37 } ) * x2
+                                   + local_float_t { boost::decimal::detail::uint128 { UINT64_C(196413437044475), UINT64_C(8133969665818718980)  }, -35 } ) * x2
+                                   + local_float_t { boost::decimal::detail::uint128 { UINT64_C(54210108624275),  UINT64_C(4089650035136921600)  }, -33 } )
+                                   ;
+
+    const local_float_t bot = ((((((((((((  local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(418515523525612), UINT64_C(10839100561497421498) }, -49 } )  * x
+                                          + local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(65288421669995),  UINT64_C(10397762890384506116) }, -46 } )) * x
+                                          + local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(502720846858965), UINT64_C(15499169997977266440) }, -45 } )) * x
+                                          + local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(251360423429482), UINT64_C(16972957035843409028) }, -43 } )) * x
+                                          + local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(90489752434613),  UINT64_C(15702571451232594082) }, -41 } )) * x
+                                          + local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(246132126622149), UINT64_C(12458334066468991276) }, -40 } )) * x
+                                          + local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(516877465906514), UINT64_C(5871083058504374896)  }, -39 } )) * x
+                                          + local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(84177187304775),  UINT64_C(3485986999636593840)  }, -37 } )) * x
+                                          + local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(105221484130968), UINT64_C(18192541804827906022) }, -36 } )) * x
+                                          + local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(98206718522237),  UINT64_C(13290356869764135298) }, -35 } )) * x
+                                          + local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(64816434224676),  UINT64_C(16519268045002340975) }, -34 } )) * x
+                                          + local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(271050543121376), UINT64_C(2001506101975056384)  }, -34 } )) * x
+                                          + local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(54210108624275),  UINT64_C(4089650035136921600)  }, -33 } ))
+                                          ;
+
+    local_float_t result { local_float_t { 1 } + ((x * top) / bot) };
+
+    result *= result;
+    result *= result;
+    result *= result;
+
+    return result;
+}
+
+} //namespace detail
+} //namespace decimal
+} //namespace boost
+
+#endif //BOOST_DECIMAL_DETAIL_CMATH_IMPL_EXP_IMPL_HPP
diff --git a/include/boost/decimal/detail/cmath/tgamma.hpp b/include/boost/decimal/detail/cmath/tgamma.hpp
@@ -9,9 +9,9 @@
 #include <boost/decimal/fwd.hpp> // NOLINT(llvm-include-order)
 #include <boost/decimal/detail/cmath/impl/tgamma_impl.hpp>
 #include <boost/decimal/detail/cmath/sin.hpp>
+#include <boost/decimal/detail/config.hpp>
 #include <boost/decimal/detail/type_traits.hpp>
 #include <boost/decimal/numbers.hpp>
-#include <boost/decimal/detail/config.hpp>
 
 #ifndef BOOST_DECIMAL_BUILD_MODULE
 #include <iterator>
diff --git a/test/test_cosh.cpp b/test/test_cosh.cpp
diff --git a/test/test_sqrt.cpp b/test/test_sqrt.cpp