|
7 | 7 | //===----------------------------------------------------------------------===// |
8 | 8 |
|
9 | 9 | #include "src/math/acoshf16.h" |
| 10 | +#include "hdr/errno_macros.h" |
| 11 | +#include "hdr/fenv_macros.h" |
10 | 12 | #include "src/__support/FPUtil/FEnvImpl.h" |
11 | 13 | #include "src/__support/FPUtil/FPBits.h" |
12 | 14 | #include "src/__support/FPUtil/cast.h" |
13 | 15 | #include "src/__support/FPUtil/except_value_utils.h" |
14 | | -#include "src/__support/FPUtil/generic/sqrt.h" |
15 | 16 | #include "src/__support/FPUtil/multiply_add.h" |
| 17 | +#include "src/__support/FPUtil/sqrt.h" |
16 | 18 | #include "src/__support/macros/optimization.h" |
17 | 19 | #include "src/math/generic/explogxf.h" |
18 | 20 |
|
19 | 21 | namespace LIBC_NAMESPACE_DECL { |
20 | 22 |
|
21 | 23 | static constexpr size_t N_EXCEPTS = 2; |
22 | | -static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ACOSHF16_EXCEPTS{ |
23 | | - {// (input, RZ output, RU offset, RD offset, RN offset) |
24 | | - {0x41B7, 0x3ED8, 1, 0, 0}, |
25 | | - {0x3CE4, 0x393E, 1, 0, 1}}}; |
| 24 | +static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ACOSHF16_EXCEPTS{{ |
| 25 | + // (input, RZ output, RU offset, RD offset, RN offset) |
| 26 | + // x = 0x1.6ep+1, acoshf16(x) = 0x1.dbp+0 (RZ) |
| 27 | + {0x41B7, 0x3ED8, 1, 0, 0}, |
| 28 | + // x = 0x1.c8p+0, acoshf16(x) = 0x1.27cp-1 (RZ) |
| 29 | + {0x3CE4, 0x393E, 1, 0, 1}, |
| 30 | +}}; |
26 | 31 |
|
27 | 32 | LLVM_LIBC_FUNCTION(float16, acoshf16, (float16 x)) { |
28 | 33 | using FPBits = fputil::FPBits<float16>; |
@@ -58,29 +63,48 @@ LLVM_LIBC_FUNCTION(float16, acoshf16, (float16 x)) { |
58 | 63 |
|
59 | 64 | // acosh(1.0) exactly equals 0.0 |
60 | 65 | if (LIBC_UNLIKELY(x_u == 0x3c00U)) |
61 | | - return float16(0.0f); |
| 66 | + return FPBits::zero().get_val(); |
62 | 67 |
|
63 | 68 | if (auto r = ACOSHF16_EXCEPTS.lookup(xbits.uintval()); |
64 | 69 | LIBC_UNLIKELY(r.has_value())) |
65 | 70 | return r.value(); |
66 | 71 |
|
67 | | - float xf32 = x; |
| 72 | + float xf = x; |
68 | 73 | // High precision for inputs very close to 1.0 |
69 | | - if (LIBC_UNLIKELY(xf32 < 1.25f)) { |
70 | | - float delta = xf32 - 1.0f; |
| 74 | + // For inputs close to 1 (1 <= x < 1.25), use polynomial approximation: |
| 75 | + // |
| 76 | + // Step-by-step derivation: |
| 77 | + // 1. Let y = acosh(x), thus x = cosh(y). |
| 78 | + // |
| 79 | + // 2. Rewrite cosh identity using exponential form: |
| 80 | + // cosh(y) = (e^y + e^{-y}) / 2 |
| 81 | + // For y close to 0, let y = sqrt(2 * delta), thus: |
| 82 | + // x = cosh(y) ≈ 1 + delta (since cosh(0) = 1, and delta is small) |
| 83 | + // thus delta = x - 1. |
| 84 | + // |
| 85 | + // 3: Express y in terms of delta (for small delta): |
| 86 | + // y ≈ sqrt(2 * delta) |
| 87 | + // |
| 88 | + // 4: Expand acosh(1 + delta) using a Taylor expansion around delta = 0: |
| 89 | + // acosh(1 + delta) ≈ sqrt(2 * delta) * P(delta), where P(delta) |
| 90 | + // is a polynomial approximation obtained by fitting the function |
| 91 | + // precisely in the interval [0, 0.25]. |
| 92 | + // |
| 93 | + // Because delta = x - 1 and 0 <= delta < 0.25, the polynomial approximation |
| 94 | + // remains numerically stable and accurate in this domain, ensuring high |
| 95 | + // precision. |
| 96 | + if (LIBC_UNLIKELY(xf < 1.25f)) { |
| 97 | + float delta = xf - 1.0f; |
71 | 98 | float sqrt_2_delta = fputil::sqrt<float>(2.0 * delta); |
72 | | - float x2 = delta; |
73 | | - float pe = |
74 | | - fputil::polyeval(x2, 0x1.000000p+0f, -0x1.55551ap-4f, 0x1.33160cp-6f, |
75 | | - -0x1.6890f4p-8f, 0x1.8f3a62p-10f); |
| 99 | + float pe = fputil::polyeval(delta, 0x1p+0f, -0x1.55551ap-4f, 0x1.33160cp-6f, |
| 100 | + -0x1.6890f4p-8f, 0x1.8f3a62p-10f); |
76 | 101 | float approx = sqrt_2_delta * pe; |
77 | 102 | return fputil::cast<float16>(approx); |
78 | 103 | } |
79 | 104 |
|
80 | | - // Standard computation for general case. |
81 | | - float sqrt_term = |
82 | | - fputil::sqrt<float>(fputil::multiply_add(xf32, xf32, -1.0f)); |
83 | | - float result = static_cast<float>(log_eval(xf32 + sqrt_term)); |
| 105 | + // acosh(x) = log(x + sqrt(x^2 - 1)) |
| 106 | + float sqrt_term = fputil::sqrt<float>(fputil::multiply_add(xf, xf, -1.0f)); |
| 107 | + float result = static_cast<float>(log_eval(xf + sqrt_term)); |
84 | 108 |
|
85 | 109 | return fputil::cast<float16>(result); |
86 | 110 | } |
|
0 commit comments