|
7 | 7 | //===----------------------------------------------------------------------===// |
8 | 8 |
|
9 | 9 | #include "src/math/acoshf16.h" |
10 | | -#include "hdr/errno_macros.h" |
11 | | -#include "hdr/fenv_macros.h" |
12 | 10 | #include "src/__support/FPUtil/FEnvImpl.h" |
13 | 11 | #include "src/__support/FPUtil/FPBits.h" |
14 | 12 | #include "src/__support/FPUtil/cast.h" |
| 13 | +#include "src/__support/FPUtil/except_value_utils.h" |
| 14 | +#include "src/__support/FPUtil/generic/sqrt.h" |
15 | 15 | #include "src/__support/FPUtil/multiply_add.h" |
16 | | -#include "src/__support/FPUtil/sqrt.h" |
17 | 16 | #include "src/__support/macros/optimization.h" |
18 | | -#include "src/math/generic/common_constants.h" |
19 | 17 | #include "src/math/generic/explogxf.h" |
20 | 18 |
|
21 | 19 | namespace LIBC_NAMESPACE_DECL { |
22 | 20 |
|
| 21 | +static constexpr size_t N_EXCEPTS = 1; |
| 22 | +static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ACOSHF16_EXCEPTS{ |
| 23 | + {// (input, RZ output, RU offset, RD offset, RN offset) |
| 24 | + {0x41B7, 0x3ED8, 0, 1, 0}}}; |
| 25 | + |
23 | 26 | LLVM_LIBC_FUNCTION(float16, acoshf16, (float16 x)) { |
24 | 27 | using FPBits = fputil::FPBits<float16>; |
25 | 28 | FPBits xbits(x); |
26 | 29 | uint16_t x_u = xbits.uintval(); |
27 | 30 | uint16_t x_abs = x_u & 0x7fff; |
28 | 31 |
|
29 | | - // if (LIBC_UNLIKELY(x <= 1.0f)) { |
30 | | - // if (x == 1.0f) |
31 | | - // return 0.0f; |
32 | | - // // x < 1. |
33 | | - // fputil::set_errno_if_required(EDOM); |
34 | | - // fputil::raise_except_if_required(FE_INVALID); |
35 | | - // return FPBits::quiet_nan().get_val(); |
36 | | - // } |
37 | | - |
38 | 32 | // Check for NaN input first. |
39 | 33 | if (LIBC_UNLIKELY(xbits.is_nan())) { |
40 | 34 | if (xbits.is_signaling_nan()) { |
@@ -68,25 +62,20 @@ LLVM_LIBC_FUNCTION(float16, acoshf16, (float16 x)) { |
68 | 62 | float xf32 = x; |
69 | 63 |
|
70 | 64 | // High precision for inputs very close to 1.0 |
71 | | - // if (LIBC_UNLIKELY(xf32 < 1.25f)) { |
72 | | - // float delta = xf32 - 1.0f; |
73 | | - // float sqrt_2 = fputil::sqrt<float>(2.0f * delta); |
74 | | - // float sqrt_2d = fputil::sqrt<float>(2.0f * delta); |
75 | | - // float d32 = delta * fputil::sqrt<float>(delta); |
76 | | - // float term2 = d32 / (6.0f * fputil::sqrt<float>(2.0f)); |
77 | | - // float d52 = d32 * delta; |
78 | | - // float term3 = 3.0f * d52 / (80.0f * sqrt_2); |
79 | | - // float d72 = d52 * delta; |
80 | | - // float term4 = 5.0f * d72 / (1792.0f * sqrt_2); |
81 | | - // float result = sqrt_2d - term2 + term3 - term4; |
82 | | - // return fputil::cast<float16>(result); |
83 | | - // } |
| 65 | + if (LIBC_UNLIKELY(xf32 < 1.25f)) { |
| 66 | + float delta = xf32 - 1.0f; |
| 67 | + float sqrt_2_delta = fputil::sqrt<float>(2.0 * delta); |
| 68 | + float x2 = delta; |
| 69 | + float pe = fputil::polyeval(x2, 0x1.0000000000000p+0f, |
| 70 | + -0x1.55551a83a9472p-4f, 0x1.331601c4b8ecfp-6f, |
| 71 | + -0x1.6890f49eb0acbp-8f, 0x1.8f3a617040a6ap-10f); |
| 72 | + float approx = sqrt_2_delta * pe; |
| 73 | + return fputil::cast<float16>(approx); |
| 74 | + } |
84 | 75 |
|
85 | | - // Special optimization for large input values. |
86 | | - // if (LIBC_UNLIKELY(xf32 >= 32.0f)) { |
87 | | - // float result = static_cast<float>(log_eval(2.0f * xf32)); |
88 | | - // return fputil::cast<float16>(result); |
89 | | - // } |
| 76 | + if (auto r = ACOSHF16_EXCEPTS.lookup(xbits.uintval()); |
| 77 | + LIBC_UNLIKELY(r.has_value())) |
| 78 | + return r.value(); |
90 | 79 |
|
91 | 80 | // Standard computation for general case. |
92 | 81 | float sqrt_term = |
|
0 commit comments