@@ -174,6 +174,119 @@ LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) {
174174 return {exp2_hi_mid, exp10_lo};
175175}
176176
177+ // Generated by Sollya with the following commands:
178+ // > display = hexadecimal;
179+ // > round(log2(exp(1)), SG, RN);
180+ static constexpr float LOG2F_E = 0x1 .715476p+0f ;
181+
182+ // Generated by Sollya with the following commands:
183+ // > display = hexadecimal;
184+ // > round(log(2), SG, RN);
185+ static constexpr float LOGF_2 = 0x1 .62e43p-1f ;
186+
187+ // Generated by Sollya with the following commands:
188+ // > display = hexadecimal;
189+ // > for i from 0 to 31 do printsingle(round(2^(i * 2^-5), SG, RN));
190+ static constexpr cpp::array<uint32_t , 32 > EXP2_MID_5_BITS = {
191+ 0x3f80'0000U , 0x3f82'cd87U , 0x3f85'aac3U , 0x3f88'980fU , 0x3f8b'95c2U ,
192+ 0x3f8e'a43aU , 0x3f91'c3d3U , 0x3f94'f4f0U , 0x3f98'37f0U , 0x3f9b'8d3aU ,
193+ 0x3f9e'f532U , 0x3fa2'7043U , 0x3fa5'fed7U , 0x3fa9'a15bU , 0x3fad'583fU ,
194+ 0x3fb1'23f6U , 0x3fb5'04f3U , 0x3fb8'fbafU , 0x3fbd'08a4U , 0x3fc1'2c4dU ,
195+ 0x3fc5'672aU , 0x3fc9'b9beU , 0x3fce'248cU , 0x3fd2'a81eU , 0x3fd7'44fdU ,
196+ 0x3fdb'fbb8U , 0x3fe0'ccdfU , 0x3fe5'b907U , 0x3fea'c0c7U , 0x3fef'e4baU ,
197+ 0x3ff5'257dU , 0x3ffa'83b3U ,
198+ };
199+
200+ // This function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
201+ // and exp(-x) simultaneously.
202+ // To compute e^x, we perform the following range reduction:
203+ // find hi, mid, lo such that:
204+ // x = (hi + mid) * log(2) + lo, in which
205+ // hi is an integer,
206+ // 0 <= mid * 2^5 < 32 is an integer
207+ // -2^(-5) <= lo * log2(e) <= 2^-5.
208+ // In particular,
209+ // hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
210+ // Then,
211+ // e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
212+ // We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi * 2^mid
213+ // by adding hi to the exponent field of 2^mid.
214+ // e^lo is computed using a degree-3 minimax polynomial generated by Sollya:
215+ // e^lo ~ P(lo)
216+ // = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
217+ // = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
218+ // = P_even + lo * P_odd
219+ // To compute e^(-x), notice that:
220+ // e^(-x) = 2^(-(hi + mid)) * e^(-lo)
221+ // ~ 2^(-(hi + mid)) * P(-lo)
222+ // = 2^(-(hi + mid)) * (P_even - lo * P_odd)
223+ // So:
224+ // sinh(x) = (e^x - e^(-x)) / 2
225+ // ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
226+ // 2^(-(hi + mid)) * (P_even - lo * P_odd))
227+ // = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
228+ // lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
229+ // And similarly:
230+ // cosh(x) = (e^x + e^(-x)) / 2
231+ // ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
232+ // lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
233+ // The main point of these formulas is that the expensive part of calculating
234+ // the polynomials approximating lower parts of e^x and e^(-x) is shared and
235+ // only done once.
236+ template <bool IsSinh> LIBC_INLINE float16 eval_sinh_or_cosh (float16 x) {
237+ float xf = x;
238+ float kf = fputil::nearest_integer (xf * (LOG2F_E * 0x1 .0p+5f ));
239+ int x_hi_mid_p = static_cast <int >(kf);
240+ int x_hi_mid_m = -x_hi_mid_p;
241+
242+ int x_hi_p = x_hi_mid_p >> 5 ;
243+ int x_hi_m = x_hi_mid_m >> 5 ;
244+ int x_mid_p = x_hi_mid_p & 0x1f ;
245+ int x_mid_m = x_hi_mid_m & 0x1f ;
246+
247+ uint32_t exp2_hi_mid_bits_p =
248+ EXP2_MID_5_BITS[x_mid_p] +
249+ static_cast <uint32_t >(x_hi_p << fputil::FPBits<float >::FRACTION_LEN);
250+ uint32_t exp2_hi_mid_bits_m =
251+ EXP2_MID_5_BITS[x_mid_m] +
252+ static_cast <uint32_t >(x_hi_m << fputil::FPBits<float >::FRACTION_LEN);
253+ // exp2_hi_mid_p = 2^(hi + mid)
254+ float exp2_hi_mid_p = fputil::FPBits<float >(exp2_hi_mid_bits_p).get_val ();
255+ // exp2_hi_mid_m = 2^(-(hi + mid))
256+ float exp2_hi_mid_m = fputil::FPBits<float >(exp2_hi_mid_bits_m).get_val ();
257+
258+ // exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid))
259+ float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m;
260+ // exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid))
261+ float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m;
262+
263+ // lo = x - (hi + mid) = round(x * log2(e) * 2^5) * log(2) * (-2^(-5)) + x
264+ float lo = fputil::multiply_add (kf, LOGF_2 * -0x1 .0p-5f , xf);
265+ float lo_sq = lo * lo;
266+
267+ // Degree-3 minimax polynomial generated by Sollya with the following
268+ // commands:
269+ // > display = hexadecimal;
270+ // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
271+ // > 1 + x * P;
272+ constexpr cpp::array<float , 4 > COEFFS = {0x1p+0f , 0x1p+0f , 0x1 .0004p-1f ,
273+ 0x1 .555778p-3f };
274+ float half_p_odd =
275+ fputil::polyeval (lo_sq, COEFFS[1 ] * 0 .5f , COEFFS[3 ] * 0 .5f );
276+ float half_p_even =
277+ fputil::polyeval (lo_sq, COEFFS[0 ] * 0 .5f , COEFFS[2 ] * 0 .5f );
278+
279+ // sinh(x) = lo * (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) +
280+ // (0.5 * P_even * (2^(hi + mid) - 2^(-(hi + mid))))
281+ if constexpr (IsSinh)
282+ return static_cast <float16>(fputil::multiply_add (
283+ lo, half_p_odd * exp2_hi_mid_sum, half_p_even * exp2_hi_mid_diff));
284+ // cosh(x) = lo * (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) +
285+ // (0.5 * P_even * (2^(hi + mid) + 2^(-(hi + mid))))
286+ return static_cast <float16>(fputil::multiply_add (
287+ lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum));
288+ }
289+
177290} // namespace LIBC_NAMESPACE_DECL
178291
179292#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
0 commit comments