Skip to content

Commit 672b569

Browse files
authored
Merge pull request #549 from cppalliance/atan_further
Fix #538 simplify and extend atan()
2 parents a6fab8e + 49a5605 commit 672b569

File tree

3 files changed

+342
-195
lines changed

3 files changed

+342
-195
lines changed

include/boost/decimal/detail/cmath/atan.hpp

Lines changed: 55 additions & 194 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
1-
// Copyright 2023 Matt Borland
2-
// Copyright 2023 Christopher Kormanyos
1+
// Copyright 2023 - 2024 Matt Borland
2+
// Copyright 2023 - 2024 Christopher Kormanyos
33
// Distributed under the Boost Software License, Version 1.0.
44
// https://www.boost.org/LICENSE_1_0.txt
55

66
#ifndef BOOST_DECIMAL_DETAIL_CMATH_ATAN_HPP
77
#define BOOST_DECIMAL_DETAIL_CMATH_ATAN_HPP
88

9-
#include <boost/decimal/fwd.hpp>
10-
#include <boost/decimal/numbers.hpp>
11-
#include <boost/decimal/detail/type_traits.hpp>
9+
#include <boost/decimal/fwd.hpp> // NOLINT(llvm-include-order)
10+
#include <boost/decimal/detail/cmath/impl/atan_impl.hpp>
1211
#include <boost/decimal/detail/concepts.hpp>
1312
#include <boost/decimal/detail/config.hpp>
14-
#include <boost/decimal/detail/cmath/fpclassify.hpp>
15-
#include <boost/decimal/detail/cmath/fabs.hpp>
13+
#include <boost/decimal/detail/type_traits.hpp>
14+
#include <boost/decimal/numbers.hpp>
1615

1716
#ifndef BOOST_DECIMAL_BUILD_MODULE
1817
#include <type_traits>
@@ -24,208 +23,70 @@ namespace decimal {
2423

2524
namespace detail {
2625

27-
template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE T>
28-
constexpr auto atan_small_impl(T x) noexcept
29-
{
30-
T result {};
31-
32-
// 10th degree remez polynomial calculated from 0, 0.4375
33-
// Estimated max error: 2.3032664387910605e-12
34-
constexpr T a0 {UINT64_C(61037779951304161), -18, true};
35-
constexpr T a1 {UINT64_C(10723099589331457), -17};
36-
constexpr T a2 {UINT64_C(22515613909953665), -18};
37-
constexpr T a3 {UINT64_C(15540713402718176), -17, true};
38-
constexpr T a4 {UINT64_C(35999727706986597), -19};
39-
constexpr T a5 {UINT64_C(19938867353282852), -17};
40-
constexpr T a6 {UINT64_C(62252075283915644), -22};
41-
constexpr T a7 {UINT64_C(33333695504913247), -17, true};
42-
constexpr T a8 {UINT64_C(10680927642397763), -24};
43-
constexpr T a9 {UINT64_C(99999999877886492), -17};
44-
constexpr T a10 {UINT64_C(23032664387910606), -29};
45-
46-
result = a0;
47-
result = fma(result, x, a1);
48-
result = fma(result, x, a2);
49-
result = fma(result, x, a3);
50-
result = fma(result, x, a4);
51-
result = fma(result, x, a5);
52-
result = fma(result, x, a6);
53-
result = fma(result, x, a7);
54-
result = fma(result, x, a8);
55-
result = fma(result, x, a9);
56-
result = fma(result, x, a10);
57-
58-
return result;
59-
}
60-
61-
template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE T>
62-
constexpr auto atan_med_impl(T x) noexcept
63-
{
64-
T result {};
65-
66-
// 10th degree remez polynomial from 2.4375, 6
67-
// Estimated max error: 2.6239664084435361e-9
68-
constexpr T a0 {UINT64_C(35895331641408534), -24, true};
69-
constexpr T a1 {UINT64_C(1734850544519432), -21};
70-
constexpr T a2 {UINT64_C(38064221484608425), -21, true};
71-
constexpr T a3 {UINT64_C(50135011697517902), -20};
72-
constexpr T a4 {UINT64_C(44154446962804779), -19, true};
73-
constexpr T a5 {UINT64_C(27400572833763747), -18};
74-
constexpr T a6 {UINT64_C(12289830364128736), -17, true};
75-
constexpr T a7 {UINT64_C(40157034119189716), -17};
76-
constexpr T a8 {UINT64_C(94842703533437844), -17, true};
77-
constexpr T a9 {UINT64_C(15738722141839421), -16};
78-
constexpr T a10 {UINT64_C(1425850153011925), -16, true};
79-
80-
result = a0;
81-
result = fma(result, x, a1);
82-
result = fma(result, x, a2);
83-
result = fma(result, x, a3);
84-
result = fma(result, x, a4);
85-
result = fma(result, x, a5);
86-
result = fma(result, x, a6);
87-
result = fma(result, x, a7);
88-
result = fma(result, x, a8);
89-
result = fma(result, x, a9);
90-
result = fma(result, x, a10);
91-
92-
return result;
93-
}
94-
95-
template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE T>
96-
constexpr auto atan_large_impl(T x) noexcept
97-
{
98-
T result {};
99-
100-
// 10th degree remez polynomial from 6, 12
101-
// Estimated max error: 6.2743529396040705e-10
102-
constexpr T a0 {UINT64_C(33711825733904967), -27, true};
103-
constexpr T a1 {UINT64_C(33373728162443893), -25};
104-
constexpr T a2 {UINT64_C(14947421096171872), -23, true};
105-
constexpr T a3 {UINT64_C(39987662607208282), -22};
106-
constexpr T a4 {UINT64_C(7102051362837618), -20, true};
107-
constexpr T a5 {UINT64_C(87975481286276403), -20};
108-
constexpr T a6 {UINT64_C(77627729565466572), -19, true};
109-
constexpr T a7 {UINT64_C(48865382040050205), -18};
110-
constexpr T a8 {UINT64_C(21563130284459425), -17, true};
111-
constexpr T a9 {UINT64_C(63862756812924015), -17};
112-
constexpr T a10 {UINT64_C(41486607456081259), -17};
113-
114-
result = a0;
115-
result = fma(result, x, a1);
116-
result = fma(result, x, a2);
117-
result = fma(result, x, a3);
118-
result = fma(result, x, a4);
119-
result = fma(result, x, a5);
120-
result = fma(result, x, a6);
121-
result = fma(result, x, a7);
122-
result = fma(result, x, a8);
123-
result = fma(result, x, a9);
124-
result = fma(result, x, a10);
125-
126-
return result;
127-
}
128-
129-
template <BOOST_DECIMAL_DECIMAL_FLOATING_TYPE T>
130-
constexpr auto atan_huge_impl(T x) noexcept
131-
{
132-
T result {};
133-
134-
// 10th degree remez polynomial from 12, 24
135-
// Estimated max error: 4.1947468509456082e-10
136-
constexpr T a0 {UINT64_C(21000582873393547), -30, true};
137-
constexpr T a1 {UINT64_C(41414963913943078), -28};
138-
constexpr T a2 {UINT64_C(36923801494687764), -26, true};
139-
constexpr T a3 {UINT64_C(19644631420406287), -24};
140-
constexpr T a4 {UINT64_C(69300227486259428), -23, true};
141-
constexpr T a5 {UINT64_C(17021586749644597), -21};
142-
constexpr T a6 {UINT64_C(29708833721842521), -20, true};
143-
constexpr T a7 {UINT64_C(36858037393285633), -19};
144-
constexpr T a8 {UINT64_C(31874262996720311), -18, true};
145-
constexpr T a9 {UINT64_C(18322547884211479), -17};
146-
constexpr T a10 {UINT64_C(9387703660037886), -16};
147-
148-
result = a0;
149-
result = fma(result, x, a1);
150-
result = fma(result, x, a2);
151-
result = fma(result, x, a3);
152-
result = fma(result, x, a4);
153-
result = fma(result, x, a5);
154-
result = fma(result, x, a6);
155-
result = fma(result, x, a7);
156-
result = fma(result, x, a8);
157-
result = fma(result, x, a9);
158-
result = fma(result, x, a10);
159-
160-
return result;
161-
}
162-
16326
template <typename T>
16427
constexpr auto atan_impl(T x) noexcept
16528
BOOST_DECIMAL_REQUIRES(detail::is_decimal_floating_point_v, T)
16629
{
167-
const auto fpc {fpclassify(x)};
168-
const auto isneg {signbit(x)};
30+
const auto fpc { fpclassify(x) };
16931

170-
if (fpc == FP_ZERO || fpc == FP_NAN)
171-
{
172-
return x;
173-
}
174-
else if (fpc == FP_INFINITE)
175-
{
176-
constexpr auto half_pi {numbers::pi_v<T> / 2};
177-
return isneg ? -half_pi : half_pi;
178-
}
32+
T result { };
17933

180-
// Small angles
181-
const auto absx {fabs(x)};
182-
T result {};
34+
constexpr T my_pi_half { numbers::pi_v<T> / 2 };
18335

184-
if (absx <= std::numeric_limits<T>::epsilon())
185-
{
186-
return x;
187-
}
188-
else if (absx <= T{4375, -4})
189-
{
190-
result = detail::atan_small_impl(absx);
191-
}
192-
else if (absx <= T{6875, -4})
193-
{
194-
constexpr T atan_half {UINT64_C(4636476090008061162), -19};
195-
result = atan_half + detail::atan_small_impl((x - T{5, -1}) / (1 + x / 2));
196-
}
197-
else if (absx <= T{11875, -4})
198-
{
199-
constexpr T atan_one {UINT64_C(7853981633974483096), -19};
200-
result = atan_one + detail::atan_small_impl((x - 1) / (x + 1));
201-
}
202-
else if (absx <= T{24375, -4})
203-
{
204-
constexpr T atan_three_halves {UINT64_C(9827937232473290679), -19};
205-
result = atan_three_halves + detail::atan_small_impl((x - T{15, -1}) / (1 + T{15, -1} * x));
206-
}
207-
else if (absx <= T{6})
36+
if (fpc == FP_ZERO || fpc == FP_NAN)
20837
{
209-
result = detail::atan_med_impl(x);
38+
result = x;
21039
}
211-
else if (absx <= T{12})
40+
else if (signbit(x))
21241
{
213-
result = detail::atan_large_impl(x);
42+
result = -atan_impl(-x);
21443
}
215-
else if (absx <= T{24})
44+
else if (fpc == FP_INFINITE)
21645
{
217-
result = detail::atan_huge_impl(x);
46+
result = my_pi_half;
21847
}
21948
else
22049
{
221-
constexpr T atan_inf {numbers::pi_v<T> / 2};
222-
result = atan_inf - detail::atan_small_impl(1 / x);
223-
}
224-
225-
// arctan(-x) == -arctan(x)
226-
if (isneg)
227-
{
228-
result = -result;
50+
constexpr T one { 1 };
51+
52+
if (x <= T { 48 })
53+
{
54+
// Define small-ish argiments to be less than 39/16.
55+
const bool is_smallish { x <= T { 24375, -4 } };
56+
57+
// The portion of the algorithm for arc-tangent regarding scaling large-valued
58+
// argument is based on Chapter 11, page 194 of Cody and Waite, "Software Manual
59+
// for the Elementary Functions", Prentice Hall, 1980.
60+
61+
const T
62+
fx_arg
63+
{
64+
(!is_smallish)
65+
? ((x * numbers::sqrt3_v<T>) - one) / (numbers::sqrt3_v<T> + x)
66+
: x
67+
};
68+
69+
constexpr T half { 5, -1 };
70+
constexpr T three_halves { 15, -1 };
71+
72+
result = (fx_arg <= std::numeric_limits<T>::epsilon()) ? fx_arg
73+
: (fx_arg <= T { 4375, -4 }) ? detail::atan_series (fx_arg)
74+
: (fx_arg <= T { 6875, -4 }) ? detail::atan_values<T>(0U) + detail::atan_series((fx_arg - half) / (one + fx_arg / 2))
75+
: (fx_arg <= T { 11875, -4 }) ? detail::atan_values<T>(1U) + detail::atan_series((fx_arg - one) / (fx_arg + one))
76+
: detail::atan_values<T>(2U) + detail::atan_series((fx_arg - three_halves) / (one + three_halves * fx_arg))
77+
;
78+
79+
if(!is_smallish)
80+
{
81+
constexpr T my_pi_over_six { numbers::pi_v<T> / static_cast<int>(INT8_C(6)) };
82+
83+
result += my_pi_over_six;
84+
}
85+
}
86+
else
87+
{
88+
result = my_pi_half - detail::atan_series(one / x);
89+
}
22990
}
23091

23192
return result;

0 commit comments

Comments
 (0)