|
| 1 | +//===-- Half-precision atanpi function ------------------------------------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#include "src/math/atanpif16.h" |
| 10 | +#include "hdr/errno_macros.h" |
| 11 | +#include "hdr/fenv_macros.h" |
| 12 | +#include "src/__support/FPUtil/FEnvImpl.h" |
| 13 | +#include "src/__support/FPUtil/FPBits.h" |
| 14 | +#include "src/__support/FPUtil/PolyEval.h" |
| 15 | +#include "src/__support/FPUtil/cast.h" |
| 16 | +#include "src/__support/FPUtil/multiply_add.h" |
| 17 | +#include "src/__support/FPUtil/sqrt.h" |
| 18 | +#include "src/__support/macros/optimization.h" |
| 19 | + |
| 20 | +namespace LIBC_NAMESPACE_DECL { |
| 21 | + |
| 22 | +// Using Python's SymPy library, we can obtain the polynomial approximation of |
| 23 | +// arctan(x)/pi. The steps are as follows: |
| 24 | +// >>> from sympy import * |
| 25 | +// >>> import math |
| 26 | +// >>> x = symbols('x') |
| 27 | +// >>> print(series(atan(x)/math.pi, x, 0, 17)) |
| 28 | +// |
| 29 | +// Output: |
| 30 | +// 0.318309886183791*x - 0.106103295394597*x**3 + 0.0636619772367581*x**5 - |
| 31 | +// 0.0454728408833987*x**7 + 0.0353677651315323*x**9 - 0.0289372623803446*x**11 |
| 32 | +// + 0.0244853758602916*x**13 - 0.0212206590789194*x**15 + O(x**17) |
| 33 | +// |
| 34 | +// We will assign this degree-15 Taylor polynomial as g(x). This polynomial |
| 35 | +// approximation is accurate for arctan(x)/pi when |x| is in the range [0, 0.5]. |
| 36 | +// |
| 37 | +// |
| 38 | +// To compute arctan(x) for all real x, we divide the domain into the following |
| 39 | +// cases: |
| 40 | +// |
| 41 | +// * Case 1: |x| <= 0.5 |
| 42 | +// In this range, the direct polynomial approximation is used: |
| 43 | +// arctan(x)/pi = sign(x) * g(|x|) |
| 44 | +// or equivalently, arctan(x) = sign(x) * pi * g(|x|). |
| 45 | +// |
| 46 | +// * Case 2: 0.5 < |x| <= 1 |
| 47 | +// We use the double-angle identity for the tangent function, specifically: |
| 48 | +// arctan(x) = 2 * arctan(x / (1 + sqrt(1 + x^2))). |
| 49 | +// Applying this, we have: |
| 50 | +// arctan(x)/pi = sign(x) * 2 * arctan(x')/pi, |
| 51 | +// where x' = |x| / (1 + sqrt(1 + x^2)). |
| 52 | +// Thus, arctan(x)/pi = sign(x) * 2 * g(x') |
| 53 | +// |
| 54 | +// When |x| is in (0.5, 1], the value of x' will always fall within the |
| 55 | +// interval [0.207, 0.414], which is within the accurate range of g(x). |
| 56 | +// |
| 57 | +// * Case 3: |x| > 1 |
| 58 | +// For values of |x| greater than 1, we use the reciprocal transformation |
| 59 | +// identity: |
| 60 | +// arctan(x) = pi/2 - arctan(1/x) for x > 0. |
| 61 | +// For any x (real number), this generalizes to: |
| 62 | +// arctan(x)/pi = sign(x) * (1/2 - arctan(1/|x|)/pi). |
| 63 | +// Then, using g(x) for arctan(1/|x|)/pi: |
| 64 | +// arctan(x)/pi = sign(x) * (1/2 - g(1/|x|)). |
| 65 | +// |
| 66 | +// Note that if 1/|x| still falls outside the |
| 67 | +// g(x)'s primary range of accuracy (i.e., if 0.5 < 1/|x| <= 1), the rule |
| 68 | +// from Case 2 must be applied recursively to 1/|x|. |
| 69 | + |
| 70 | +LLVM_LIBC_FUNCTION(float16, atanpif16, (float16 x)) { |
| 71 | + using FPBits = fputil::FPBits<float16>; |
| 72 | + |
| 73 | + FPBits xbits(x); |
| 74 | + bool is_neg = xbits.is_neg(); |
| 75 | + |
| 76 | + auto signed_result = [is_neg](double r) -> float16 { |
| 77 | + return fputil::cast<float16>(is_neg ? -r : r); |
| 78 | + }; |
| 79 | + |
| 80 | + if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { |
| 81 | + if (xbits.is_nan()) { |
| 82 | + if (xbits.is_signaling_nan()) { |
| 83 | + fputil::raise_except_if_required(FE_INVALID); |
| 84 | + return FPBits::quiet_nan().get_val(); |
| 85 | + } |
| 86 | + return x; |
| 87 | + } |
| 88 | + // atanpi(±∞) = ±0.5 |
| 89 | + return signed_result(0.5); |
| 90 | + } |
| 91 | + |
| 92 | + if (LIBC_UNLIKELY(xbits.is_zero())) |
| 93 | + return x; |
| 94 | + |
| 95 | + double x_abs = fputil::cast<double>(xbits.abs().get_val()); |
| 96 | + |
| 97 | + if (LIBC_UNLIKELY(x_abs == 1.0)) |
| 98 | + return signed_result(0.25); |
| 99 | + |
| 100 | + // evaluate atan(x)/pi using polynomial approximation, valid for |x| <= 0.5 |
| 101 | + constexpr auto atanpi_eval = [](double x) -> double { |
| 102 | + // polynomial coefficients for atan(x)/pi taylor series |
| 103 | + // generated using sympy: series(atan(x)/pi, x, 0, 17) |
| 104 | + constexpr static double POLY_COEFFS[] = { |
| 105 | + 0x1.45f306dc9c889p-2, // x^1: 1/pi |
| 106 | + -0x1.b2995e7b7b60bp-4, // x^3: -1/(3*pi) |
| 107 | + 0x1.04c26be3b06ccp-4, // x^5: 1/(5*pi) |
| 108 | + -0x1.7483758e69c08p-5, // x^7: -1/(7*pi) |
| 109 | + 0x1.21bb945252403p-5, // x^9: 1/(9*pi) |
| 110 | + -0x1.da1bace3cc68ep-6, // x^11: -1/(11*pi) |
| 111 | + 0x1.912b1c2336cf2p-6, // x^13: 1/(13*pi) |
| 112 | + -0x1.5bade52f95e7p-6, // x^15: -1/(15*pi) |
| 113 | + }; |
| 114 | + double x_sq = x * x; |
| 115 | + return x * fputil::polyeval(x_sq, POLY_COEFFS[0], POLY_COEFFS[1], |
| 116 | + POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4], |
| 117 | + POLY_COEFFS[5], POLY_COEFFS[6], POLY_COEFFS[7]); |
| 118 | + }; |
| 119 | + |
| 120 | + // Case 1: |x| <= 0.5 - Direct polynomial evaluation |
| 121 | + if (LIBC_LIKELY(x_abs <= 0.5)) { |
| 122 | + double result = atanpi_eval(x_abs); |
| 123 | + return signed_result(result); |
| 124 | + } |
| 125 | + |
| 126 | + // case 2: 0.5 < |x| <= 1 - use double-angle reduction |
| 127 | + // atan(x) = 2 * atan(x / (1 + sqrt(1 + x^2))) |
| 128 | + // so atanpi(x) = 2 * atanpi(x') where x' = x / (1 + sqrt(1 + x^2)) |
| 129 | + if (x_abs <= 1.0) { |
| 130 | + double x_abs_sq = x_abs * x_abs; |
| 131 | + double sqrt_term = fputil::sqrt<double>(1.0 + x_abs_sq); |
| 132 | + double x_prime = x_abs / (1.0 + sqrt_term); |
| 133 | + double result = 2.0 * atanpi_eval(x_prime); |
| 134 | + return signed_result(result); |
| 135 | + } |
| 136 | + |
| 137 | + // case 3: |x| > 1 - use reciprocal transformation |
| 138 | + // atan(x) = pi/2 - atan(1/x) for x > 0 |
| 139 | + // so atanpi(x) = 1/2 - atanpi(1/x) |
| 140 | + double x_recip = 1.0 / x_abs; |
| 141 | + double result; |
| 142 | + |
| 143 | + // if 1/|x| > 0.5, we need to apply Case 2 transformation to 1/|x| |
| 144 | + if (x_recip > 0.5) { |
| 145 | + double x_recip_sq = x_recip * x_recip; |
| 146 | + double sqrt_term = fputil::sqrt<double>(1.0 + x_recip_sq); |
| 147 | + double x_prime = x_recip / (1.0 + sqrt_term); |
| 148 | + result = fputil::multiply_add(-2.0, atanpi_eval(x_prime), 0.5); |
| 149 | + } else { |
| 150 | + // direct evaluation since 1/|x| <= 0.5 |
| 151 | + result = 0.5 - atanpi_eval(x_recip); |
| 152 | + } |
| 153 | + |
| 154 | + return signed_result(result); |
| 155 | +} |
| 156 | + |
| 157 | +} // namespace LIBC_NAMESPACE_DECL |
0 commit comments