1- // Copyright 2023 Matt Borland
2- // Copyright 2023 Christopher Kormanyos
1+ // Copyright 2023 - 2024 Matt Borland
2+ // Copyright 2023 - 2024 Christopher Kormanyos
33// Distributed under the Boost Software License, Version 1.0.
44// https://www.boost.org/LICENSE_1_0.txt
55
@@ -24,65 +24,149 @@ namespace decimal {
2424namespace detail {
2525
2626template <typename T>
27- constexpr auto cbrt_impl (T val ) noexcept
27+ constexpr auto cbrt_impl (T x ) noexcept
2828 BOOST_DECIMAL_REQUIRES(detail::is_decimal_floating_point_v, T)
2929{
30- constexpr T zero {0 , 0 };
31- constexpr T one {1 , 0 };
30+ const auto fpc = fpclassify (x);
3231
3332 T result { };
3433
35- if (isnan (val ) || abs (val) == zero )
34+ if ((fpc == FP_NAN ) || (fpc == FP_ZERO) )
3635 {
37- result = val;
38- }
39- else if (isinf (val))
40- {
41- if (signbit (val))
42- {
43- result = std::numeric_limits<T>::quiet_NaN ();
44- }
45- else
46- {
47- result = val;
48- }
36+ result = x;
4937 }
50- else if (val < zero )
38+ else if (signbit (x) )
5139 {
52- result = std::numeric_limits<T>:: quiet_NaN ( );
40+ result = - cbrt (-x );
5341 }
54- else if (val == one )
42+ else if (fpc == FP_INFINITE )
5543 {
56- result = one ;
44+ result = std::numeric_limits<T>:: infinity () ;
5745 }
5846 else
5947 {
60- constexpr T epsilon = std::numeric_limits<T>::epsilon () * 100 ;
61- T error = one / epsilon;
48+ int exp10val { };
49+
50+ const auto gn { frexp10 (x, &exp10val) };
6251
63- T x {};
64- if (val > one)
52+ const auto
53+ zeros_removal
54+ {
55+ remove_trailing_zeros (gn)
56+ };
57+
58+ const bool is_pure { static_cast <unsigned >(zeros_removal.trimmed_number ) == 1U };
59+
60+ if (is_pure)
6561 {
66- // Scale down if val is large by dividing the exp by 3
67- int exp {};
68- auto sig = frexp10 (val, &exp);
69- x = T{sig, exp / 3 };
62+ // Here, a pure power-of-10 argument gets a straightforward result.
63+ // For argument 10^n where n is a multiple of 3, the result is exact.
64+
65+ const int p10 { exp10val + static_cast <int >(zeros_removal.number_of_removed_zeros ) };
66+
67+ if (p10 == 0 )
68+ {
69+ result = T { 1 };
70+ }
71+ else
72+ {
73+ const int p10_mod3 = (p10 % 3 );
74+ const int p10_div3 = (p10 / 3 );
75+
76+ result = T { 1 , p10_div3 };
77+
78+ switch (p10_mod3)
79+ {
80+ case 2 :
81+ result *= numbers::cbrt10_v<T>;
82+ // fallthrough
83+
84+ case 1 :
85+ result *= numbers::cbrt10_v<T>;
86+ break ;
87+
88+ case -2 :
89+ result /= numbers::cbrt10_v<T>;
90+ // fallthrough
91+
92+ case -1 :
93+ result /= numbers::cbrt10_v<T>;
94+ break ;
95+ }
96+ }
7097 }
7198 else
7299 {
73- // Trivial heuristic
74- x = val * 2 ;
75- }
76-
77- while (error > epsilon)
78- {
79- const T new_x {(2 * x + val / (x * x)) / 3 };
80-
81- error = fabs (new_x - x);
82- x = new_x;
100+ // Scale the argument to the interval 1/10 <= x < 1.
101+ T gx { gn, -std::numeric_limits<T>::digits10 };
102+
103+ exp10val += std::numeric_limits<T>::digits10;
104+
105+ // For this work we perform an order-2 Pade approximation of the cube-root
106+ // at argument x = 1/2. This results in slightly more than 2 decimal digits
107+ // of accuracy over the interval 1/10 <= x < 1.
108+
109+ // PadeApproximant[x^(1/3), {x, 1/2, {2, 2}}]
110+ // FullSimplify[%]
111+
112+ // HornerForm[Numerator[Out[2]]]
113+ // Results in:
114+ // 5 + x (70 + 56 x)
115+
116+ // HornerForm[Denominator[Out[2]]]
117+ // Results in:
118+ // 2^(1/3) (14 + x (70 + 20 x))
119+
120+ constexpr T five { 5 };
121+ constexpr T fourteen { 14 };
122+ constexpr T seventy { 7 , 1 };
123+
124+ result =
125+ (five + gx * (seventy + gx * 56 ))
126+ / (numbers::cbrt2_v<T> * (fourteen + gx * (seventy + gx * 20 )));
127+
128+ // Perform 2, 3 or 4 Newton-Raphson iterations depending on precision.
129+ // Note from above, we start with slightly more than 2 decimal digits
130+ // of accuracy.
131+
132+ constexpr int iter_loops
133+ {
134+ std::numeric_limits<T>::digits10 < 10 ? 2
135+ : std::numeric_limits<T>::digits10 < 20 ? 3 : 4
136+ };
137+
138+ for (int idx = 0 ; idx < iter_loops; ++idx)
139+ {
140+ result = ((result + result) + gx / (result * result)) / 3 ;
141+ }
142+
143+ if (exp10val != 0 )
144+ {
145+ const int exp10val_mod3 = (exp10val % 3 );
146+ const int exp10val_div3 = (exp10val / 3 );
147+
148+ result *= T { 1 , exp10val_div3 };
149+
150+ switch (exp10val_mod3)
151+ {
152+ case 2 :
153+ result *= numbers::cbrt10_v<T>;
154+ // fallthrough
155+
156+ case 1 :
157+ result *= numbers::cbrt10_v<T>;
158+ break ;
159+
160+ case -2 :
161+ result /= numbers::cbrt10_v<T>;
162+ // fallthrough
163+
164+ case -1 :
165+ result /= numbers::cbrt10_v<T>;
166+ break ;
167+ }
168+ }
83169 }
84-
85- result = x;
86170 }
87171
88172 return result;
0 commit comments