Skip to content

Commit 10089df

Browse files
authored
Merge pull request #574 from cppalliance/study_mul256
fix #559 and fix #575 via repair mul128/256
2 parents 04f5974 + fe4392e commit 10089df

22 files changed

+1636
-403
lines changed

doc/decimal/numbers.adoc

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
////
2-
Copyright 2023 Matt Borland
2+
Copyright 2023 - 2024 Matt Borland
33
Distributed under the Boost Software License, Version 1.0.
44
https://www.boost.org/LICENSE_1_0.txt
55
////
@@ -10,7 +10,7 @@ https://www.boost.org/LICENSE_1_0.txt
1010

1111
== Overview
1212

13-
Contains all constants provided by C+\+20's `<numbers>` specialized for the decimal types. These does not require C++20.
13+
Contains all constants provided by C+\+20's `<numbers>` specialized for the decimal types. These do not require C++20.
1414

1515
- e_v - https://en.wikipedia.org/wiki/E_(mathematical_constant)[Euler's Number]
1616
- log2e_v - log2(e)
@@ -22,11 +22,14 @@ Contains all constants provided by C+\+20's `<numbers>` specialized for the deci
2222
- ln10_v - ln(10)
2323
- sqrt2_v - sqrt(2)
2424
- sqrt3_v - sqrt(3)
25+
- sqrt10_v - sqrt(10)
2526
- inv_sqrt3_v - 1 / sqrt(3)
27+
- cbrt2_v - cbrt(2)
28+
- cbrt10_v - cbrt(10)
2629
- egamma_v - https://en.wikipedia.org/wiki/Euler%27s_constant[Euler–Mascheroni constant]
2730
- phi_v - https://en.wikipedia.org/wiki/Golden_ratio[The Golden Ratio]
2831

29-
There are also non-template variables that provide the constant as a decimal32 type.
32+
There are also non-template variables that provide the constant as a decimal64 type.
3033

3134
== Reference
3235

@@ -68,32 +71,44 @@ static constexpr Decimal sqrt2_v;
6871
template <typename Decimal>
6972
static constexpr Decimal sqrt3_v;
7073
74+
template <typename Decimal>
75+
static constexpr Decimal sqrt10_v;
76+
7177
template <typename Decimal>
7278
static constexpr Decimal inv_sqrt2_v;
7379
7480
template <typename Decimal>
7581
static constexpr Decimal inv_sqrt3_v;
7682
83+
template <typename Decimal>
84+
static constexpr Decimal cbrt2_v;
85+
86+
template <typename Decimal>
87+
static constexpr Decimal cbrt10_v;
88+
7789
template <typename Decimal>
7890
static constexpr Decimal egamma_v;
7991
8092
template <typename Decimal>
8193
static constexpr Decimal phi_v;
8294
83-
static constexpr auto e {e_v<decimal32>};
84-
static constexpr auto log2e {log2e_v<decimal32>};
85-
static constexpr auto log10e {log10e_v<decimal32>};
86-
static constexpr auto pi {pi_v<decimal32>};
87-
static constexpr auto inv_pi {inv_pi_v<decimal32>};
88-
static constexpr auto inv_sqrtpi {inv_sqrtpi_v<decimal32>};
89-
static constexpr auto ln2 {ln2_v<decimal32>};
90-
static constexpr auto ln10 {ln10_v<decimal32>};
91-
static constexpr auto sqrt2 {sqrt2_v<decimal32>};
92-
static constexpr auto sqrt3 {sqrt3_v<decimal32>};
93-
static constexpr auto inv_sqrt2 {inv_sqrt2_v<decimal32>};
94-
static constexpr auto inv_sqrt3 {inv_sqrt3_v<decimal32>};
95-
static constexpr auto egamma {egamma_v<decimal32>};
96-
static constexpr auto phi {phi_v<decimal32>};
95+
static constexpr auto e {e_v<decimal64>};
96+
static constexpr auto log2e {log2e_v<decimal64>};
97+
static constexpr auto log10e {log10e_v<decimal64>};
98+
static constexpr auto pi {pi_v<decimal64>};
99+
static constexpr auto inv_pi {inv_pi_v<decimal64>};
100+
static constexpr auto inv_sqrtpi {inv_sqrtpi_v<decimal64>};
101+
static constexpr auto ln2 {ln2_v<decimal64>};
102+
static constexpr auto ln10 {ln10_v<decimal64>};
103+
static constexpr auto sqrt2 {sqrt2_v<decimal64>};
104+
static constexpr auto sqrt3 {sqrt3_v<decimal64>};
105+
static constexpr auto sqrt10 {sqrt10_v<decimal64>};
106+
static constexpr auto inv_sqrt2 {inv_sqrt2_v<decimal64>};
107+
static constexpr auto inv_sqrt3 {inv_sqrt3_v<decimal64>};
108+
static constexpr auto cbrt2 {cbrt2_v<decimal64>};
109+
static constexpr auto cbrt10 {cbrt10_v<decimal64>};
110+
static constexpr auto egamma {egamma_v<decimal64>};
111+
static constexpr auto phi {phi_v<decimal64>};
97112
98113
} //namespace decimal
99114
} //namespace boost

include/boost/decimal/detail/cmath/cbrt.hpp

Lines changed: 126 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// Copyright 2023 Matt Borland
2-
// Copyright 2023 Christopher Kormanyos
1+
// Copyright 2023 - 2024 Matt Borland
2+
// Copyright 2023 - 2024 Christopher Kormanyos
33
// Distributed under the Boost Software License, Version 1.0.
44
// https://www.boost.org/LICENSE_1_0.txt
55

@@ -24,65 +24,149 @@ namespace decimal {
2424
namespace detail {
2525

2626
template <typename T>
27-
constexpr auto cbrt_impl(T val) noexcept
27+
constexpr auto cbrt_impl(T x) noexcept
2828
BOOST_DECIMAL_REQUIRES(detail::is_decimal_floating_point_v, T)
2929
{
30-
constexpr T zero {0, 0};
31-
constexpr T one {1, 0};
30+
const auto fpc = fpclassify(x);
3231

3332
T result { };
3433

35-
if (isnan(val) || abs(val) == zero)
34+
if ((fpc == FP_NAN) || (fpc == FP_ZERO))
3635
{
37-
result = val;
38-
}
39-
else if (isinf(val))
40-
{
41-
if (signbit(val))
42-
{
43-
result = std::numeric_limits<T>::quiet_NaN();
44-
}
45-
else
46-
{
47-
result = val;
48-
}
36+
result = x;
4937
}
50-
else if (val < zero)
38+
else if (signbit(x))
5139
{
52-
result = std::numeric_limits<T>::quiet_NaN();
40+
result = -cbrt(-x);
5341
}
54-
else if (val == one)
42+
else if (fpc == FP_INFINITE)
5543
{
56-
result = one;
44+
result = std::numeric_limits<T>::infinity();
5745
}
5846
else
5947
{
60-
constexpr T epsilon = std::numeric_limits<T>::epsilon() * 100;
61-
T error = one / epsilon;
48+
int exp10val { };
49+
50+
const auto gn { frexp10(x, &exp10val) };
6251

63-
T x {};
64-
if (val > one)
52+
const auto
53+
zeros_removal
54+
{
55+
remove_trailing_zeros(gn)
56+
};
57+
58+
const bool is_pure { static_cast<unsigned>(zeros_removal.trimmed_number) == 1U };
59+
60+
if(is_pure)
6561
{
66-
// Scale down if val is large by dividing the exp by 3
67-
int exp {};
68-
auto sig = frexp10(val, &exp);
69-
x = T{sig, exp / 3};
62+
// Here, a pure power-of-10 argument gets a straightforward result.
63+
// For argument 10^n where n is a multiple of 3, the result is exact.
64+
65+
const int p10 { exp10val + static_cast<int>(zeros_removal.number_of_removed_zeros) };
66+
67+
if (p10 == 0)
68+
{
69+
result = T { 1 };
70+
}
71+
else
72+
{
73+
const int p10_mod3 = (p10 % 3);
74+
const int p10_div3 = (p10 / 3);
75+
76+
result = T { 1, p10_div3 };
77+
78+
switch (p10_mod3)
79+
{
80+
case 2:
81+
result *= numbers::cbrt10_v<T>;
82+
// fallthrough
83+
84+
case 1:
85+
result *= numbers::cbrt10_v<T>;
86+
break;
87+
88+
case -2:
89+
result /= numbers::cbrt10_v<T>;
90+
// fallthrough
91+
92+
case -1:
93+
result /= numbers::cbrt10_v<T>;
94+
break;
95+
}
96+
}
7097
}
7198
else
7299
{
73-
// Trivial heuristic
74-
x = val * 2;
75-
}
76-
77-
while (error > epsilon)
78-
{
79-
const T new_x {(2 * x + val / (x * x)) / 3};
80-
81-
error = fabs(new_x - x);
82-
x = new_x;
100+
// Scale the argument to the interval 1/10 <= x < 1.
101+
T gx { gn, -std::numeric_limits<T>::digits10 };
102+
103+
exp10val += std::numeric_limits<T>::digits10;
104+
105+
// For this work we perform an order-2 Pade approximation of the cube-root
106+
// at argument x = 1/2. This results in slightly more than 2 decimal digits
107+
// of accuracy over the interval 1/10 <= x < 1.
108+
109+
// PadeApproximant[x^(1/3), {x, 1/2, {2, 2}}]
110+
// FullSimplify[%]
111+
112+
// HornerForm[Numerator[Out[2]]]
113+
// Results in:
114+
// 5 + x (70 + 56 x)
115+
116+
// HornerForm[Denominator[Out[2]]]
117+
// Results in:
118+
// 2^(1/3) (14 + x (70 + 20 x))
119+
120+
constexpr T five { 5 };
121+
constexpr T fourteen { 14 };
122+
constexpr T seventy { 7, 1 };
123+
124+
result =
125+
(five + gx * (seventy + gx * 56))
126+
/ (numbers::cbrt2_v<T> * (fourteen + gx * (seventy + gx * 20)));
127+
128+
// Perform 2, 3 or 4 Newton-Raphson iterations depending on precision.
129+
// Note from above, we start with slightly more than 2 decimal digits
130+
// of accuracy.
131+
132+
constexpr int iter_loops
133+
{
134+
std::numeric_limits<T>::digits10 < 10 ? 2
135+
: std::numeric_limits<T>::digits10 < 20 ? 3 : 4
136+
};
137+
138+
for (int idx = 0; idx < iter_loops; ++idx)
139+
{
140+
result = ((result + result) + gx / (result * result)) / 3;
141+
}
142+
143+
if (exp10val != 0)
144+
{
145+
const int exp10val_mod3 = (exp10val % 3);
146+
const int exp10val_div3 = (exp10val / 3);
147+
148+
result *= T { 1, exp10val_div3 };
149+
150+
switch (exp10val_mod3)
151+
{
152+
case 2:
153+
result *= numbers::cbrt10_v<T>;
154+
// fallthrough
155+
156+
case 1:
157+
result *= numbers::cbrt10_v<T>;
158+
break;
159+
160+
case -2:
161+
result /= numbers::cbrt10_v<T>;
162+
// fallthrough
163+
164+
case -1:
165+
result /= numbers::cbrt10_v<T>;
166+
break;
167+
}
168+
}
83169
}
84-
85-
result = x;
86170
}
87171

88172
return result;

0 commit comments

Comments
 (0)