|
18 | 18 |
|
19 | 19 | namespace __llvm_libc {
|
20 | 20 |
|
21 |
| -// Lookup table for 2^(m * 2^(-7)) with m = 0, ..., 127. |
| 21 | +// Lookup table for 2^(m * 2^(-6)) with m = 0, ..., 63. |
22 | 22 | // Table is generated with Sollya as follow:
|
23 | 23 | // > display = hexadecimal;
|
24 |
| -// > for i from 0 to 127 do { D(2^(i / 128)); }; |
25 |
| -static constexpr double EXP_M[128] = { |
26 |
| - 0x1.0000000000000p0, 0x1.0163da9fb3335p0, 0x1.02c9a3e778061p0, |
27 |
| - 0x1.04315e86e7f85p0, 0x1.059b0d3158574p0, 0x1.0706b29ddf6dep0, |
28 |
| - 0x1.0874518759bc8p0, 0x1.09e3ecac6f383p0, 0x1.0b5586cf9890fp0, |
29 |
| - 0x1.0cc922b7247f7p0, 0x1.0e3ec32d3d1a2p0, 0x1.0fb66affed31bp0, |
30 |
| - 0x1.11301d0125b51p0, 0x1.12abdc06c31ccp0, 0x1.1429aaea92de0p0, |
31 |
| - 0x1.15a98c8a58e51p0, 0x1.172b83c7d517bp0, 0x1.18af9388c8deap0, |
32 |
| - 0x1.1a35beb6fcb75p0, 0x1.1bbe084045cd4p0, 0x1.1d4873168b9aap0, |
33 |
| - 0x1.1ed5022fcd91dp0, 0x1.2063b88628cd6p0, 0x1.21f49917ddc96p0, |
34 |
| - 0x1.2387a6e756238p0, 0x1.251ce4fb2a63fp0, 0x1.26b4565e27cddp0, |
35 |
| - 0x1.284dfe1f56381p0, 0x1.29e9df51fdee1p0, 0x1.2b87fd0dad990p0, |
36 |
| - 0x1.2d285a6e4030bp0, 0x1.2ecafa93e2f56p0, 0x1.306fe0a31b715p0, |
37 |
| - 0x1.32170fc4cd831p0, 0x1.33c08b26416ffp0, 0x1.356c55f929ff1p0, |
38 |
| - 0x1.371a7373aa9cbp0, 0x1.38cae6d05d866p0, 0x1.3a7db34e59ff7p0, |
39 |
| - 0x1.3c32dc313a8e5p0, 0x1.3dea64c123422p0, 0x1.3fa4504ac801cp0, |
40 |
| - 0x1.4160a21f72e2ap0, 0x1.431f5d950a897p0, 0x1.44e086061892dp0, |
41 |
| - 0x1.46a41ed1d0057p0, 0x1.486a2b5c13cd0p0, 0x1.4a32af0d7d3dep0, |
42 |
| - 0x1.4bfdad5362a27p0, 0x1.4dcb299fddd0dp0, 0x1.4f9b2769d2ca7p0, |
43 |
| - 0x1.516daa2cf6642p0, 0x1.5342b569d4f82p0, 0x1.551a4ca5d920fp0, |
44 |
| - 0x1.56f4736b527dap0, 0x1.58d12d497c7fdp0, 0x1.5ab07dd485429p0, |
45 |
| - 0x1.5c9268a5946b7p0, 0x1.5e76f15ad2148p0, 0x1.605e1b976dc09p0, |
46 |
| - 0x1.6247eb03a5585p0, 0x1.6434634ccc320p0, 0x1.6623882552225p0, |
47 |
| - 0x1.68155d44ca973p0, 0x1.6a09e667f3bcdp0, 0x1.6c012750bdabfp0, |
48 |
| - 0x1.6dfb23c651a2fp0, 0x1.6ff7df9519484p0, 0x1.71f75e8ec5f74p0, |
49 |
| - 0x1.73f9a48a58174p0, 0x1.75feb564267c9p0, 0x1.780694fde5d3fp0, |
50 |
| - 0x1.7a11473eb0187p0, 0x1.7c1ed0130c132p0, 0x1.7e2f336cf4e62p0, |
51 |
| - 0x1.80427543e1a12p0, 0x1.82589994cce13p0, 0x1.8471a4623c7adp0, |
52 |
| - 0x1.868d99b4492edp0, 0x1.88ac7d98a6699p0, 0x1.8ace5422aa0dbp0, |
53 |
| - 0x1.8cf3216b5448cp0, 0x1.8f1ae99157736p0, 0x1.9145b0b91ffc6p0, |
54 |
| - 0x1.93737b0cdc5e5p0, 0x1.95a44cbc8520fp0, 0x1.97d829fde4e50p0, |
55 |
| - 0x1.9a0f170ca07bap0, 0x1.9c49182a3f090p0, 0x1.9e86319e32323p0, |
56 |
| - 0x1.a0c667b5de565p0, 0x1.a309bec4a2d33p0, 0x1.a5503b23e255dp0, |
57 |
| - 0x1.a799e1330b358p0, 0x1.a9e6b5579fdbfp0, 0x1.ac36bbfd3f37ap0, |
58 |
| - 0x1.ae89f995ad3adp0, 0x1.b0e07298db666p0, 0x1.b33a2b84f15fbp0, |
59 |
| - 0x1.b59728de5593ap0, 0x1.b7f76f2fb5e47p0, 0x1.ba5b030a1064ap0, |
60 |
| - 0x1.bcc1e904bc1d2p0, 0x1.bf2c25bd71e09p0, 0x1.c199bdd85529cp0, |
61 |
| - 0x1.c40ab5fffd07ap0, 0x1.c67f12e57d14bp0, 0x1.c8f6d9406e7b5p0, |
62 |
| - 0x1.cb720dcef9069p0, 0x1.cdf0b555dc3fap0, 0x1.d072d4a07897cp0, |
63 |
| - 0x1.d2f87080d89f2p0, 0x1.d5818dcfba487p0, 0x1.d80e316c98398p0, |
64 |
| - 0x1.da9e603db3285p0, 0x1.dd321f301b460p0, 0x1.dfc97337b9b5fp0, |
65 |
| - 0x1.e264614f5a129p0, 0x1.e502ee78b3ff6p0, 0x1.e7a51fbc74c83p0, |
66 |
| - 0x1.ea4afa2a490dap0, 0x1.ecf482d8e67f1p0, 0x1.efa1bee615a27p0, |
67 |
| - 0x1.f252b376bba97p0, 0x1.f50765b6e4540p0, 0x1.f7bfdad9cbe14p0, |
68 |
| - 0x1.fa7c1819e90d8p0, 0x1.fd3c22b8f71f1p0, |
| 24 | +// > for i from 0 to 63 do { D(2^(i / 64)); }; |
| 25 | +static constexpr double EXP_M[64] = { |
| 26 | + 0x1.0000000000000p0, 0x1.02c9a3e778061p0, 0x1.059b0d3158574p0, |
| 27 | + 0x1.0874518759bc8p0, 0x1.0b5586cf9890fp0, 0x1.0e3ec32d3d1a2p0, |
| 28 | + 0x1.11301d0125b51p0, 0x1.1429aaea92de0p0, 0x1.172b83c7d517bp0, |
| 29 | + 0x1.1a35beb6fcb75p0, 0x1.1d4873168b9aap0, 0x1.2063b88628cd6p0, |
| 30 | + 0x1.2387a6e756238p0, 0x1.26b4565e27cddp0, 0x1.29e9df51fdee1p0, |
| 31 | + 0x1.2d285a6e4030bp0, 0x1.306fe0a31b715p0, 0x1.33c08b26416ffp0, |
| 32 | + 0x1.371a7373aa9cbp0, 0x1.3a7db34e59ff7p0, 0x1.3dea64c123422p0, |
| 33 | + 0x1.4160a21f72e2ap0, 0x1.44e086061892dp0, 0x1.486a2b5c13cd0p0, |
| 34 | + 0x1.4bfdad5362a27p0, 0x1.4f9b2769d2ca7p0, 0x1.5342b569d4f82p0, |
| 35 | + 0x1.56f4736b527dap0, 0x1.5ab07dd485429p0, 0x1.5e76f15ad2148p0, |
| 36 | + 0x1.6247eb03a5585p0, 0x1.6623882552225p0, 0x1.6a09e667f3bcdp0, |
| 37 | + 0x1.6dfb23c651a2fp0, 0x1.71f75e8ec5f74p0, 0x1.75feb564267c9p0, |
| 38 | + 0x1.7a11473eb0187p0, 0x1.7e2f336cf4e62p0, 0x1.82589994cce13p0, |
| 39 | + 0x1.868d99b4492edp0, 0x1.8ace5422aa0dbp0, 0x1.8f1ae99157736p0, |
| 40 | + 0x1.93737b0cdc5e5p0, 0x1.97d829fde4e50p0, 0x1.9c49182a3f090p0, |
| 41 | + 0x1.a0c667b5de565p0, 0x1.a5503b23e255dp0, 0x1.a9e6b5579fdbfp0, |
| 42 | + 0x1.ae89f995ad3adp0, 0x1.b33a2b84f15fbp0, 0x1.b7f76f2fb5e47p0, |
| 43 | + 0x1.bcc1e904bc1d2p0, 0x1.c199bdd85529cp0, 0x1.c67f12e57d14bp0, |
| 44 | + 0x1.cb720dcef9069p0, 0x1.d072d4a07897cp0, 0x1.d5818dcfba487p0, |
| 45 | + 0x1.da9e603db3285p0, 0x1.dfc97337b9b5fp0, 0x1.e502ee78b3ff6p0, |
| 46 | + 0x1.ea4afa2a490dap0, 0x1.efa1bee615a27p0, 0x1.f50765b6e4540p0, |
| 47 | + 0x1.fa7c1819e90d8p0, |
69 | 48 | };
|
70 | 49 |
|
71 | 50 | INLINE_FMA
|
72 | 51 | LLVM_LIBC_FUNCTION(float, exp2f, (float x)) {
|
73 | 52 | using FPBits = typename fputil::FPBits<float>;
|
74 | 53 | FPBits xbits(x);
|
75 | 54 |
|
76 |
| - // When x =< -150 or nan |
77 |
| - if (unlikely(xbits.uintval() >= 0xc316'0000U)) { |
78 |
| - // exp(-Inf) = 0 |
79 |
| - if (xbits.is_inf()) |
80 |
| - return 0.0f; |
81 |
| - // exp(nan) = nan |
82 |
| - if (xbits.is_nan()) |
83 |
| - return x; |
84 |
| - if (fputil::get_round() == FE_UPWARD) |
85 |
| - return static_cast<float>(FPBits(FPBits::MIN_SUBNORMAL)); |
86 |
| - if (x != 0.0f) |
87 |
| - errno = ERANGE; |
88 |
| - return 0.0f; |
89 |
| - } |
90 |
| - // x >= 128 or nan |
91 |
| - if (unlikely(!xbits.get_sign() && (xbits.uintval() >= 0x4300'0000U))) { |
92 |
| - if (xbits.uintval() < 0x7f80'0000U) { |
93 |
| - int rounding = fputil::get_round(); |
94 |
| - if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) |
95 |
| - return static_cast<float>(FPBits(FPBits::MAX_NORMAL)); |
| 55 | + uint32_t x_u = xbits.uintval(); |
| 56 | + uint32_t x_abs = x_u & 0x7fff'ffffU; |
96 | 57 |
|
97 |
| - errno = ERANGE; |
98 |
| - } |
99 |
| - return x + static_cast<float>(FPBits::inf()); |
100 |
| - } |
101 |
| - // |x| < 2^-25 |
102 |
| - if (unlikely(xbits.get_unbiased_exponent() <= 101)) { |
103 |
| - return 1.0f + x; |
104 |
| - } |
105 | 58 | // Exceptional values.
|
106 |
| - switch (xbits.uintval()) { |
| 59 | + switch (x_u) { |
107 | 60 | case 0x3b42'9d37U: // x = 0x1.853a6ep-9f
|
108 | 61 | if (fputil::get_round() == FE_TONEAREST)
|
109 | 62 | return 0x1.00870ap+0f;
|
110 | 63 | break;
|
| 64 | + case 0x3c02'a9adU: // x = 0x1.05535ap-7f |
| 65 | + if (fputil::get_round() == FE_TONEAREST) |
| 66 | + return 0x1.016b46p+0f; |
| 67 | + break; |
| 68 | + case 0x3ca6'6e26U: { // x = 0x1.4cdc4cp-6f |
| 69 | + int round_mode = fputil::get_round(); |
| 70 | + if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD) |
| 71 | + return 0x1.03a16ap+0f; |
| 72 | + return 0x1.03a168p+0f; |
| 73 | + } |
| 74 | + case 0x3d92'a282U: // x = 0x1.254504p-4f |
| 75 | + if (fputil::get_round() == FE_UPWARD) |
| 76 | + return 0x1.0d0688p+0f; |
| 77 | + return 0x1.0d0686p+0f; |
111 | 78 | case 0xbcf3'a937U: // x = -0x1.e7526ep-6f
|
112 | 79 | if (fputil::get_round() == FE_TONEAREST)
|
113 | 80 | return 0x1.f58d62p-1f;
|
114 | 81 | break;
|
| 82 | + case 0xb8d3'd026U: // x = -0x1.a7a04cp-14f |
| 83 | + if (fputil::get_round() == FE_TONEAREST) |
| 84 | + return 0x1.fff6d2p-1f; |
| 85 | + break; |
115 | 86 | }
|
116 | 87 |
|
| 88 | + // // When |x| >= 128, |x| < 2^-25, or x is nan |
| 89 | + if (unlikely(x_abs >= 0x4300'0000U || x_abs <= 0x3280'0000U)) { |
| 90 | + // |x| < 2^-25 |
| 91 | + if (x_abs <= 0x3280'0000U) { |
| 92 | + return 1.0f + x; |
| 93 | + } |
| 94 | + // x >= 128 |
| 95 | + if (!xbits.get_sign()) { |
| 96 | + // x is finite |
| 97 | + if (x_u < 0x7f80'0000U) { |
| 98 | + int rounding = fputil::get_round(); |
| 99 | + if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) |
| 100 | + return static_cast<float>(FPBits(FPBits::MAX_NORMAL)); |
| 101 | + |
| 102 | + errno = ERANGE; |
| 103 | + } |
| 104 | + // x is +inf or nan |
| 105 | + return x + static_cast<float>(FPBits::inf()); |
| 106 | + } |
| 107 | + // x < -150 |
| 108 | + if (x_u >= 0xc316'0000U) { |
| 109 | + // exp(-Inf) = 0 |
| 110 | + if (xbits.is_inf()) |
| 111 | + return 0.0f; |
| 112 | + // exp(nan) = nan |
| 113 | + if (xbits.is_nan()) |
| 114 | + return x; |
| 115 | + if (fputil::get_round() == FE_UPWARD) |
| 116 | + return static_cast<float>(FPBits(FPBits::MIN_SUBNORMAL)); |
| 117 | + if (x != 0.0f) |
| 118 | + errno = ERANGE; |
| 119 | + return 0.0f; |
| 120 | + } |
| 121 | + } |
117 | 122 | // For -150 <= x < 128, to compute 2^x, we perform the following range
|
118 | 123 | // reduction: find hi, mid, lo such that:
|
119 | 124 | // x = hi + mid + lo, in which
|
120 | 125 | // hi is an integer,
|
121 |
| - // mid * 2^7 is an integer |
122 |
| - // -2^(-8) <= lo < 2^-8. |
| 126 | + // mid * 2^6 is an integer |
| 127 | + // -2^(-7) <= lo < 2^-7. |
123 | 128 | // In particular,
|
124 |
| - // hi + mid = round(x * 2^7) * 2^(-7). |
| 129 | + // hi + mid = round(x * 2^6) * 2^(-6). |
125 | 130 | // Then,
|
126 | 131 | // 2^(x) = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
|
127 | 132 | // Multiply by 2^hi is simply adding hi to the exponent field. We store
|
128 |
| - // exp(mid) in the lookup tables EXP_M. exp(lo) is computed using a degree-7 |
| 133 | + // exp(mid) in the lookup tables EXP_M. exp(lo) is computed using a degree-4 |
129 | 134 | // minimax polynomial generated by Sollya.
|
130 | 135 |
|
131 |
| - // x_hi = hi + mid. |
132 |
| - int x_hi = static_cast<int>(x * 0x1.0p7f); |
133 |
| - // Subtract (hi + mid) from x to get lo. |
134 |
| - x -= static_cast<float>(x_hi) * 0x1.0p-7f; |
135 |
| - double xd = static_cast<double>(x); |
136 |
| - // Make sure that -2^(-8) <= lo < 2^-8. |
137 |
| - if (x >= 0x1.0p-8f) { |
138 |
| - ++x_hi; |
139 |
| - xd -= 0x1.0p-7; |
140 |
| - } |
141 |
| - if (x < -0x1.0p-8f) { |
142 |
| - --x_hi; |
143 |
| - xd += 0x1.0p-7; |
144 |
| - } |
| 136 | + // x_hi = round(hi + mid). |
| 137 | + // The default rounding mode for float-to-int conversion in C++ is |
| 138 | + // round-toward-zero. To make it round-to-nearest, we add (-1)^sign(x) * 0.5 |
| 139 | + // before conversion. |
| 140 | + int x_hi = |
| 141 | + static_cast<int>(x * 0x1.0p+6f + (xbits.get_sign() ? -0.5f : 0.5f)); |
145 | 142 | // For 2-complement integers, arithmetic right shift is the same as dividing
|
146 | 143 | // by a power of 2 and then round down (toward negative infinity).
|
147 |
| - int hi = x_hi >> 7; |
148 |
| - // mid = x_hi & 0x0000'007fU; |
149 |
| - double exp_mid = EXP_M[x_hi & 0x7f]; |
150 |
| - // Degree-6 minimax polynomial generated by Sollya with the following |
| 144 | + int e_hi = (x_hi >> 6) + |
| 145 | + static_cast<int>(fputil::FloatProperties<double>::EXPONENT_BIAS); |
| 146 | + fputil::FPBits<double> exp_hi( |
| 147 | + static_cast<uint64_t>(e_hi) |
| 148 | + << fputil::FloatProperties<double>::MANTISSA_WIDTH); |
| 149 | + // mid = x_hi & 0x0000'003fU; |
| 150 | + double exp_hi_mid = static_cast<double>(exp_hi) * EXP_M[x_hi & 0x3f]; |
| 151 | + // Subtract (hi + mid) from x to get lo. |
| 152 | + x -= static_cast<float>(x_hi) * 0x1.0p-6f; |
| 153 | + double xd = static_cast<double>(x); |
| 154 | + // Degree-4 minimax polynomial generated by Sollya with the following |
151 | 155 | // commands:
|
152 | 156 | // > display = hexadecimal;
|
153 |
| - // > Q = fpminimax((2^x - 1)/x, 5, [|D...|], [-2^-8, 2^-8]); |
| 157 | + // > Q = fpminimax((2^x - 1)/x, 3, [|D...|], [-2^-7, 2^-7]); |
154 | 158 | // > Q;
|
155 | 159 | double exp_lo =
|
156 |
| - fputil::polyeval(xd, 0x1p0, 0x1.62e42fefa39efp-1, 0x1.ebfbdff82c58ep-3, |
157 |
| - 0x1.c6b08d711fe2fp-5, 0x1.3b2ab6fe3deb5p-7, |
158 |
| - 0x1.5d72a05f45c04p-10, 0x1.4284d40c33326p-13); |
159 |
| - fputil::FPBits<double> result(exp_mid * exp_lo); |
160 |
| - result.set_unbiased_exponent(static_cast<uint16_t>( |
161 |
| - static_cast<int>(result.get_unbiased_exponent()) + hi)); |
162 |
| - return static_cast<float>(static_cast<double>(result)); |
| 160 | + fputil::polyeval(xd, 0x1p0, 0x1.62e42fefa2417p-1, 0x1.ebfbdff82f809p-3, |
| 161 | + 0x1.c6b0b92131c47p-5, 0x1.3b2ab6fb568a3p-7); |
| 162 | + double result = exp_hi_mid * exp_lo; |
| 163 | + return static_cast<float>(result); |
163 | 164 | }
|
164 | 165 |
|
165 | 166 | } // namespace __llvm_libc
|
0 commit comments