Skip to content

Commit 6102696

Browse files
committed
Improve exp() efficiency
1 parent 81ce2e4 commit 6102696

File tree

3 files changed

+191
-55
lines changed

3 files changed

+191
-55
lines changed

include/boost/decimal/detail/cmath/exp.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ constexpr auto exp_impl(T x) noexcept
7676
x -= numbers::ln2_v<T> * nf2;
7777
}
7878

79-
result = detail::exp_pade_appxroximant(x);
79+
result = detail::exp_pade_appxroximant_or_series(x);
8080

8181
if (nf2 > 0)
8282
{

include/boost/decimal/detail/cmath/impl/exp_impl.hpp

Lines changed: 73 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#define BOOST_DECIMAL_DETAIL_CMATH_IMPL_EXP_IMPL_HPP
88

99
#include <boost/decimal/detail/concepts.hpp>
10+
#include <boost/decimal/detail/cmath/impl/taylor_series_result.hpp>
1011

1112
#ifndef BOOST_DECIMAL_BUILD_MODULE
1213
#include <array>
@@ -18,32 +19,76 @@ namespace boost {
1819
namespace decimal {
1920
namespace detail {
2021

22+
namespace exp_detail {
23+
24+
template <bool b>
25+
struct exp_table_imp
26+
{
27+
private:
28+
using d128_coeffs_t = std::array<decimal128, 17>;
29+
30+
public:
31+
static constexpr d128_coeffs_t d128_coeffs =
32+
{{
33+
// Series[Exp[x] - 1, {x, 0, 18}]
34+
// (1), // * x
35+
::boost::decimal::decimal128 { 5, -1 }, // * x^2
36+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(90350181040458), UINT64_C(12964998083131386532) }, -34 },
37+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(225875452601146), UINT64_C(13965751134118914724) }, -35 },
38+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(451750905202293), UINT64_C(9484758194528277842) }, -36 },
39+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(75291817533715), UINT64_C(10804165069276155440) }, -36 },
40+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(107559739333879), UINT64_C(7528774067376128516) }, -37 },
41+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(134449674167349), UINT64_C(4799281565792772746) }, -38 },
42+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(149388526852610), UINT64_C(5332535073103080820) }, -39 },
43+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(149388526852610), UINT64_C(5332535073103080820) }, -40 },
44+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(135807751684191), UINT64_C(3170782423392841514) }, -41 },
45+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(113173126403492), UINT64_C(11865690723015477068) }, -42 },
46+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(87056251079609), UINT64_C(13384395342406417346) }, -43 },
47+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(62183036485435), UINT64_C(9560282387433155251) }, -44 },
48+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(414553576569570), UINT64_C(2246069003855862950) }, -46 },
49+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(259095985355981), UINT64_C(6015479145837302244) }, -47 },
50+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(152409403150577), UINT64_C(4623619737181327888) }, -48 },
51+
::boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(84671890639209), UINT64_C(10767230553416093986) }, -49 },
52+
}};
53+
};
54+
55+
#if !(defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L) && (!defined(_MSC_VER) || _MSC_VER != 1900)
56+
57+
template <bool b>
58+
constexpr typename exp_table_imp<b>::d128_coeffs_t exp_table_imp<b>::d128_coeffs;
59+
60+
#endif
61+
62+
} //namespace exp_detail
63+
64+
using exp_table = exp_detail::exp_table_imp<true>;
65+
2166
template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE T>
22-
constexpr auto exp_pade_appxroximant(T x) noexcept;
67+
constexpr auto exp_pade_appxroximant_or_series(T x) noexcept;
2368

2469
template <>
25-
constexpr auto exp_pade_appxroximant<decimal32>(decimal32 x) noexcept
70+
constexpr auto exp_pade_appxroximant_or_series<decimal32>(decimal32 x) noexcept
2671
{
27-
// TODO: Chris: At 32-bit, reduce the number of coefficients in the Pade appxorimant of the exp() function.
28-
2972
using local_float_t = decimal32;
3073

31-
// PadeApproximant[Exp[x] - 1, {x, 0, {6, 6}}]
74+
// PadeApproximant[Exp[x] - 1, {x, 0, {3, 4}}]
3275
// FullSimplify[%]
33-
// (84 x (7920 + 240 x^2 + x^4))
34-
// / (665280 + x (-332640 + x (75600 + x (-10080 + x (840 + (-42 + x) x)))))
76+
// (40 x (42 + x^2))
77+
// / (1680 + x (-840 + x (180 + (-20 + x) x)))
3578

3679
const auto x2 = x * x;
3780

3881
// Use the small-argument Pade approximation having coefficients shown above.
39-
const local_float_t top = local_float_t { UINT8_C(84), 0 } * x * ( local_float_t { UINT16_C(7920), 0 } + ( local_float_t { UINT8_C(240), 0 } + x2) * x2);
40-
const local_float_t bot = local_float_t { UINT32_C(665280), 0 } + x * (local_float_t { INT32_C(-332640), 0 } + x * (local_float_t { UINT32_C(75600), 0 } + x * (local_float_t { INT16_C(-10080), 0 } + x * (local_float_t { UINT16_C(840), 0 } + (local_float_t { INT8_C(-42), 0 } + x) * x))));
82+
const local_float_t top { local_float_t { UINT8_C(40), 0 } * x * (local_float_t { UINT8_C(42), 0 } + x2) };
83+
const local_float_t bot { local_float_t { UINT16_C(1680), 0 } + x * (local_float_t { INT16_C(-840), 0 } + x * (local_float_t { UINT8_C(180), 0 } + (local_float_t { INT8_C(-20), 0 } + x) * x)) };
4184

42-
return local_float_t { 1 } + (top / bot);
85+
constexpr local_float_t one { 1 };
86+
87+
return one + (top / bot);
4388
}
4489

4590
template <>
46-
constexpr auto exp_pade_appxroximant<decimal64>(decimal64 x) noexcept
91+
constexpr auto exp_pade_appxroximant_or_series<decimal64>(decimal64 x) noexcept
4792
{
4893
using local_float_t = decimal64;
4994

@@ -58,57 +103,38 @@ constexpr auto exp_pade_appxroximant<decimal64>(decimal64 x) noexcept
58103
const local_float_t top = local_float_t { UINT8_C(84), 0 } * x * ( local_float_t { UINT16_C(7920), 0 } + ( local_float_t { UINT8_C(240), 0 } + x2) * x2);
59104
const local_float_t bot = local_float_t { UINT32_C(665280), 0 } + x * (local_float_t { INT32_C(-332640), 0 } + x * (local_float_t { UINT32_C(75600), 0 } + x * (local_float_t { INT16_C(-10080), 0 } + x * (local_float_t { UINT16_C(840), 0 } + (local_float_t { INT8_C(-42), 0 } + x) * x))));
60105

61-
return local_float_t { 1 } + (top / bot);
106+
constexpr local_float_t one { 1 };
107+
108+
return one + (top / bot);
62109
}
63110

64111
template <>
65-
constexpr auto exp_pade_appxroximant<decimal128>(decimal128 x) noexcept
112+
constexpr auto exp_pade_appxroximant_or_series<decimal128>(decimal128 x) noexcept
66113
{
67114
// Compute exp(x) - 1 for x small.
68-
69-
// TODO: Does it make sense to try and improve accuracy/precision with more Pade terms?
70-
// Or would a simple Tylor expansion here simply be better?
71-
72-
// Use an order-12 Pade approximation of the exponential function.
73-
// PadeApproximant[Exp[x] - 1, {x, 0, 12, 12}].
115+
// Use argument scaling in combination with a Taylor series expansion to order-18.
74116

75117
using local_float_t = decimal128;
76118

77119
// Rescale the argument even further (and note the three squarings below).
78120
x /= 8;
79121

80-
const local_float_t x2 = (x * x);
81-
82-
const local_float_t top = ((((( local_float_t { boost::decimal::detail::uint128 { UINT64_C(130576843339991), UINT64_C(2348781707059460614) }, -46 } * x2
83-
+ local_float_t { boost::decimal::detail::uint128 { UINT64_C(502720846858965), UINT64_C(15499169997977266440) }, -43 } ) * x2
84-
+ local_float_t { boost::decimal::detail::uint128 { UINT64_C(492264253244299), UINT64_C(6469924059228430936) }, -40 } ) * x2
85-
+ local_float_t { boost::decimal::detail::uint128 { UINT64_C(168354374609550), UINT64_C(6971973999273187690) }, -37 } ) * x2
86-
+ local_float_t { boost::decimal::detail::uint128 { UINT64_C(196413437044475), UINT64_C(8133969665818718980) }, -35 } ) * x2
87-
+ local_float_t { boost::decimal::detail::uint128 { UINT64_C(54210108624275), UINT64_C(4089650035136921600) }, -33 } )
88-
;
89-
90-
const local_float_t bot = (((((((((((( local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(418515523525612), UINT64_C(10839100561497421498) }, -49 } ) * x
91-
+ local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(65288421669995), UINT64_C(10397762890384506116) }, -46 } )) * x
92-
+ local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(502720846858965), UINT64_C(15499169997977266440) }, -45 } )) * x
93-
+ local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(251360423429482), UINT64_C(16972957035843409028) }, -43 } )) * x
94-
+ local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(90489752434613), UINT64_C(15702571451232594082) }, -41 } )) * x
95-
+ local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(246132126622149), UINT64_C(12458334066468991276) }, -40 } )) * x
96-
+ local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(516877465906514), UINT64_C(5871083058504374896) }, -39 } )) * x
97-
+ local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(84177187304775), UINT64_C(3485986999636593840) }, -37 } )) * x
98-
+ local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(105221484130968), UINT64_C(18192541804827906022) }, -36 } )) * x
99-
+ local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(98206718522237), UINT64_C(13290356869764135298) }, -35 } )) * x
100-
+ local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(64816434224676), UINT64_C(16519268045002340975) }, -34 } )) * x
101-
+ local_float_t( -boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(271050543121376), UINT64_C(2001506101975056384) }, -34 } )) * x
102-
+ local_float_t( +boost::decimal::decimal128 { boost::decimal::detail::uint128 { UINT64_C(54210108624275), UINT64_C(4089650035136921600) }, -33 } ))
103-
;
104-
105-
local_float_t result { local_float_t { 1 } + ((x * top) / bot) };
122+
constexpr local_float_t one { 1, 0 };
106123

107-
result *= result;
124+
// Note: The Taylor series expansion begins with coefficients of order-2.
125+
// So we need to multiply by x^2 and add the two skipped terms (1 + x).
126+
127+
local_float_t
128+
result
129+
{
130+
one + (x * (one + (x * taylor_series_result(x, exp_table::d128_coeffs))))
131+
};
132+
133+
// Scale up with three squarings in order to obtain the result.
108134
result *= result;
109135
result *= result;
110136

111-
return result;
137+
return result *= result;
112138
}
113139

114140
} //namespace detail

test/test_exp.cpp

Lines changed: 117 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// Copyright 2023 Matt Borland
2-
// Copyright 2023 Christopher Kormanyos
1+
// Copyright 2023 - 2024 Matt Borland
2+
// Copyright 2023 - 2024 Christopher Kormanyos
33
// Distributed under the Boost Software License, Version 1.0.
44
// https://www.boost.org/LICENSE_1_0.txt
55

@@ -54,17 +54,31 @@ namespace local
5454

5555
auto result_is_ok = bool { };
5656

57+
NumericType delta { };
58+
5759
if(b == static_cast<NumericType>(0))
5860
{
59-
result_is_ok = (fabs(a - b) < tol);
61+
delta = fabs(a - b); // LCOV_EXCL_LINE
62+
63+
result_is_ok = (delta < tol); // LCOV_EXCL_LINE
6064
}
6165
else
6266
{
63-
const auto delta = fabs(1 - (a / b));
67+
delta = fabs(1 - (a / b));
6468

6569
result_is_ok = (delta < tol);
6670
}
6771

72+
// LCOV_EXCL_START
73+
if (!result_is_ok)
74+
{
75+
std::cerr << std::setprecision(std::numeric_limits<NumericType>::digits10) << "a: " << a
76+
<< "\nb: " << b
77+
<< "\ndelta: " << delta
78+
<< "\ntol: " << tol << std::endl;
79+
}
80+
// LCOV_EXCL_STOP
81+
6882
return result_is_ok;
6983
}
7084

@@ -115,9 +129,9 @@ namespace local
115129
if(!result_val_is_ok)
116130
{
117131
// LCOV_EXCL_START
118-
std::cout << "x_flt : " << std::scientific << std::setprecision(std::numeric_limits<float_type>::digits10) << x_flt << std::endl;
119-
std::cout << "val_flt: " << std::scientific << std::setprecision(std::numeric_limits<float_type>::digits10) << val_flt << std::endl;
120-
std::cout << "val_dec: " << std::scientific << std::setprecision(std::numeric_limits<float_type>::digits10) << val_dec << std::endl;
132+
std::cerr << "x_flt : " << std::scientific << std::setprecision(std::numeric_limits<float_type>::digits10) << x_flt << std::endl;
133+
std::cerr << "val_flt: " << std::scientific << std::setprecision(std::numeric_limits<float_type>::digits10) << val_flt << std::endl;
134+
std::cerr << "val_dec: " << std::scientific << std::setprecision(std::numeric_limits<float_type>::digits10) << val_dec << std::endl;
121135

122136
break;
123137
// LCOV_EXCL_STOP
@@ -213,6 +227,92 @@ namespace local
213227

214228
return result_is_ok;
215229
}
230+
231+
auto test_exp_128(const int tol_factor) -> bool
232+
{
233+
using decimal_type = boost::decimal::decimal128;
234+
235+
using str_ctrl_array_type = std::array<const char*, 39U>;
236+
237+
const str_ctrl_array_type ctrl_strings =
238+
{{
239+
// Table[N[Exp[n/10 + n/100], 36], {n, 1, 39, 1}]
240+
"1.11627807045887129150073776905298390",
241+
"1.24607673058738081952026478299269624",
242+
"1.39096812846378026624274780495311882",
243+
"1.55270721851133604205007964619169497",
244+
"1.73325301786739523682191676713732884",
245+
"1.93479233440203152169312515101969168",
246+
"2.15976625378491500838755239034002685",
247+
"2.41089970641720985089088491613290280",
248+
"2.69123447234926228909987940407101397",
249+
"3.00416602394643311205840795358867239",
250+
"3.35348465254902368100358942737571204",
251+
"3.74342137726086256855805582982587323",
252+
"4.17869919192324615658039176435293801",
253+
"4.66459027098812590279338676624377783",
254+
"5.20697982717984873765730709271233513",
255+
"5.81243739440258864988034062444969445",
256+
"6.48829639928671111502903132434912956",
257+
"7.24274298516101220851243475314474762",
258+
"8.08491516430506017497344071644188155",
259+
"9.02501349943412092647177716688866403",
260+
"10.0744246550135862002454552896844711",
261+
"11.2458593148818460799615892055305690",
262+
"12.5535061366682314080320232000754142",
263+
"14.0132036077336131602667577975340025",
264+
"15.6426318841881716102126980461566588",
265+
"17.4615269365799904170450682499698346",
266+
"19.4919195960311175203209452590133521",
267+
"21.7584023961970778443863882601062266",
268+
"24.2884274430945556043070982961719396",
269+
"27.1126389206578874268183721102312223",
270+
"30.2652442594000813446015323588968824",
271+
"33.7844284638495538820910085630299049",
272+
"37.7128166171817490996824895604598120",
273+
"42.0979901649969005914744807079465071",
274+
"46.9930632315792808648304762411623248",
275+
"52.4573259490990503124315131185087067",
276+
"58.5569625918923670285321923410850419",
277+
"65.3658532140099181652435900015868107",
278+
"72.9664684996328018947164376727604433",
279+
}};
280+
281+
std::array<decimal_type, std::tuple_size<str_ctrl_array_type>::value> exp_values { };
282+
std::array<decimal_type, std::tuple_size<str_ctrl_array_type>::value> ctrl_values { };
283+
284+
int nx { 1 };
285+
286+
bool result_is_ok { true };
287+
288+
const decimal_type my_tol { std::numeric_limits<decimal_type>::epsilon() * static_cast<decimal_type>(tol_factor) };
289+
290+
for(auto i = static_cast<std::size_t>(UINT8_C(0)); i < std::tuple_size<str_ctrl_array_type>::value; ++i)
291+
{
292+
const decimal_type
293+
x_arg
294+
{
295+
decimal_type { nx, -1 }
296+
+ decimal_type { nx, -2 }
297+
};
298+
299+
++nx;
300+
301+
exp_values[i] = exp(x_arg);
302+
303+
static_cast<void>
304+
(
305+
from_chars(ctrl_strings[i], ctrl_strings[i] + std::strlen(ctrl_strings[i]), ctrl_values[i])
306+
);
307+
308+
const auto result_exp_is_ok = is_close_fraction(exp_values[i], ctrl_values[i], my_tol);
309+
310+
result_is_ok = (result_exp_is_ok && result_is_ok);
311+
}
312+
313+
return result_is_ok;
314+
}
315+
216316
} // namespace local
217317

218318
auto main() -> int
@@ -277,6 +377,16 @@ auto main() -> int
277377
result_is_ok = (result_edge_is_ok && result_is_ok);
278378
}
279379

380+
{
381+
using decimal_type = boost::decimal::decimal128;
382+
383+
const auto result_pos128_is_ok = local::test_exp_128(400000);
384+
385+
BOOST_TEST(result_pos128_is_ok);
386+
387+
result_is_ok = (result_pos128_is_ok && result_is_ok);
388+
}
389+
280390
result_is_ok = ((boost::report_errors() == 0) && result_is_ok);
281391

282392
return (result_is_ok ? 0 : -1);

0 commit comments

Comments
 (0)