|
| 1 | +//===-- BFloat16 e^x function ---------------------------------------------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#include "src/math/expbf16.h" |
| 10 | + |
| 11 | +#include "hdr/errno_macros.h" |
| 12 | +#include "hdr/fenv_macros.h" |
| 13 | +#include "src/__support/FPUtil/FEnvImpl.h" |
| 14 | +#include "src/__support/FPUtil/FPBits.h" |
| 15 | +#include "src/__support/FPUtil/PolyEval.h" |
| 16 | +#include "src/__support/FPUtil/bfloat16.h" |
| 17 | +#include "src/__support/FPUtil/cast.h" |
| 18 | +#include "src/__support/FPUtil/except_value_utils.h" |
| 19 | +#include "src/__support/FPUtil/multiply_add.h" |
| 20 | +#include "src/__support/FPUtil/nearest_integer.h" |
| 21 | +#include "src/__support/FPUtil/rounding_mode.h" |
| 22 | +#include "src/__support/common.h" |
| 23 | +#include "src/__support/macros/config.h" |
| 24 | +#include "src/__support/macros/optimization.h" |
| 25 | +#include "src/__support/macros/properties/types.h" |
| 26 | + |
| 27 | +namespace LIBC_NAMESPACE_DECL { |
| 28 | + |
| 29 | +// Generated by Sollya with the following commands: |
| 30 | +// > display = hexadecimal; |
| 31 | +// > for i from -96 to 88 by 8 do print(i, round(exp(i), SG, RN) @ "f,"); |
| 32 | +static constexpr float EXP_HI[24] = { |
| 33 | + 0x1.6a4p-139f, 0x1.07b71p-127f, 0x1.7fd974p-116f, 0x1.175afp-104f, |
| 34 | + 0x1.969d48p-93f, 0x1.27ec46p-81f, 0x1.aebabap-70f, 0x1.397924p-58f, |
| 35 | + 0x1.c8465p-47f, 0x1.4c1078p-35f, 0x1.e355bcp-24f, 0x1.5fc21p-12f, |
| 36 | + 0x1p0f, 0x1.749ea8p11f, 0x1.0f2ebep23f, 0x1.8ab7fcp34f, |
| 37 | + 0x1.1f43fcp46f, 0x1.a220d4p57f, 0x1.304d6ap69f, 0x1.baed16p80f, |
| 38 | + 0x1.425982p92f, 0x1.d531d8p103f, 0x1.55779cp115f, 0x1.f1056ep126f, |
| 39 | +}; |
| 40 | + |
| 41 | +// Generated by Sollya with the following commands: |
| 42 | +// > display = hexadecimal; |
| 43 | +// > for i from 0 to 7.75 by 0.25 do print(round(exp(i), SG, RN) @ "f,"); |
| 44 | +static constexpr float EXP_MID[32] = { |
| 45 | + 0x1p0f, 0x1.48b5e4p0f, 0x1.a61298p0f, 0x1.0ef9dcp1f, |
| 46 | + 0x1.5bf0a8p1f, 0x1.bec38ep1f, 0x1.1ed3fep2f, 0x1.704b6ap2f, |
| 47 | + 0x1.d8e64cp2f, 0x1.2f9b88p3f, 0x1.85d6fep3f, 0x1.f4907p3f, |
| 48 | + 0x1.415e5cp4f, 0x1.9ca53cp4f, 0x1.08ec72p5f, 0x1.542b2ep5f, |
| 49 | + 0x1.b4c902p5f, 0x1.186bf2p6f, 0x1.68118ap6f, 0x1.ce564ep6f, |
| 50 | + 0x1.28d38ap7f, 0x1.7d21eep7f, 0x1.e96244p7f, 0x1.3a30dp8f, |
| 51 | + 0x1.936dc6p8f, 0x1.0301a4p9f, 0x1.4c9222p9f, 0x1.ab0786p9f, |
| 52 | + 0x1.122886p10f, 0x1.6006b6p10f, 0x1.c402b6p10f, 0x1.223252p11f, |
| 53 | +}; |
| 54 | + |
| 55 | +constexpr fputil::ExceptValues<bfloat16, 4> EXPBF16_EXCEPTS = {{ |
| 56 | + // (input, RZ output, RU offset, RD offset, RN offset) |
| 57 | + // x = 0x40DB (6.84375) |
| 58 | + // MPFR: RU=0x446B, RD=0x446A, RZ=0x446A, RN=0x446B |
| 59 | + {0x40DBU, 0x446AU, 1U, 0U, 1U}, |
| 60 | + // x = 0x419D, keep original |
| 61 | + {0x419DU, 0x4D9FU, 1U, 0U, 0U}, |
| 62 | + // x = 0x41F9 (31.125) |
| 63 | + // MPFR: RU=0x55F0, RD=0x55EF, RZ=0x55EF, RN=0x55F0 |
| 64 | + {0x41F9U, 0x55EFU, 1U, 0U, 1U}, |
| 65 | + // x = 0xC19F (-19.875) |
| 66 | + // MPFR: RU=0x3121, RD=0x3120, RZ=0x3120, RN=0x3121 |
| 67 | + {0xC19FU, 0x3120U, 1U, 0U, 1U}, |
| 68 | +}}; |
| 69 | + |
| 70 | +LLVM_LIBC_FUNCTION(bfloat16, expbf16, (bfloat16 x)) { |
| 71 | + using FPBits = fputil::FPBits<bfloat16>; |
| 72 | + FPBits x_bits(x); |
| 73 | + |
| 74 | + uint16_t x_u = x_bits.uintval(); |
| 75 | + uint16_t x_abs = x_u & 0x7fffU; |
| 76 | + |
| 77 | + // 0 <= |x| <= 2^(-3), or |x| >= 89, or x is NaN |
| 78 | + if (LIBC_UNLIKELY(x_abs <= 0x3e00U || x_abs >= 0x42b2U)) { |
| 79 | + |
| 80 | + // exp(NaN) = NaN |
| 81 | + if (x_bits.is_nan()) { |
| 82 | + if (x_bits.is_signaling_nan()) { |
| 83 | + fputil::raise_except_if_required(FE_INVALID); |
| 84 | + return FPBits::quiet_nan().get_val(); |
| 85 | + } |
| 86 | + |
| 87 | + return x; |
| 88 | + } |
| 89 | + |
| 90 | + // if x >= 89 |
| 91 | + if (x_bits.is_pos() && x_abs >= 0x42b2U) { |
| 92 | + // exp(inf) = inf |
| 93 | + if (x_bits.is_inf()) |
| 94 | + return FPBits::inf().get_val(); |
| 95 | + |
| 96 | + switch (fputil::quick_get_round()) { |
| 97 | + case FE_TONEAREST: |
| 98 | + case FE_UPWARD: |
| 99 | + fputil::set_errno_if_required(ERANGE); |
| 100 | + fputil::raise_except_if_required(FE_OVERFLOW); |
| 101 | + return FPBits::inf().get_val(); |
| 102 | + default: |
| 103 | + return FPBits::max_normal().get_val(); |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + // x <= -93 |
| 108 | + if (x_u >= 0xc2baU) { |
| 109 | + // exp(-inf) = +0 |
| 110 | + if (x_bits.is_inf()) |
| 111 | + return FPBits::zero().get_val(); |
| 112 | + |
| 113 | + fputil::set_errno_if_required(ERANGE); |
| 114 | + fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); |
| 115 | + |
| 116 | + switch (fputil::quick_get_round()) { |
| 117 | + case FE_UPWARD: |
| 118 | + return FPBits::min_subnormal().get_val(); |
| 119 | + default: |
| 120 | + return FPBits::zero().get_val(); |
| 121 | + } |
| 122 | + } |
| 123 | + |
| 124 | + // 0 < |x| <= 2^(-3) |
| 125 | + if (x_abs <= 0x3e00U && !x_bits.is_zero()) { |
| 126 | + float xf = static_cast<float>(x); |
| 127 | + // Degree-3 minimax polynomial generated by Sollya with the following |
| 128 | + // commands: |
| 129 | + // > display = hexadecimal; |
| 130 | + // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-7, 2^-7]); |
| 131 | + // > 1 + x * P; |
| 132 | + // 0x1p0 + x * (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3)) |
| 133 | + return fputil::cast<bfloat16>( |
| 134 | + fputil::polyeval(xf, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f)); |
| 135 | + } |
| 136 | + |
| 137 | + // exp(0) = 1 |
| 138 | + if (x_bits.is_zero()) |
| 139 | + return bfloat16(1.0f); |
| 140 | + } |
| 141 | + |
| 142 | + if (auto r = EXPBF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) |
| 143 | + return r.value(); |
| 144 | + |
| 145 | + // For -93 < x < 89, we do the following range reduction: |
| 146 | + // x = hi + mid + lo |
| 147 | + // where, |
| 148 | + // hi / 2^3 is an integer |
| 149 | + // mid * 2^2 is an integer |
| 150 | + // -2^3 <= lo <= 2^3 |
| 151 | + // also, hi + mid = round(4 * x) / x |
| 152 | + // then, |
| 153 | + // exp(x) = exp(hi + mid + lo) |
| 154 | + // = exp(hi) * exp(mid) * exp(lo) |
| 155 | + // we store 184/8 + 1 = 24 values for looking up exp(hi) |
| 156 | + // from -96 to 88 |
| 157 | + // we store 8*4 = 32 values for looking up exp(mid) since |
| 158 | + // mid will always have the bit pattern |bbb.bb| where |
| 159 | + // b can be either 0 or 1 |
| 160 | + |
| 161 | + float xf = static_cast<float>(x); |
| 162 | + float kf = fputil::nearest_integer(xf * 4.0f); |
| 163 | + int x_hi_mid = static_cast<int>(kf); |
| 164 | + int x_hi = x_hi_mid >> 5; |
| 165 | + int x_mid = x_hi_mid & 0b11111; |
| 166 | + // lo = x - (hi + mid) = round(x * 4) / (-4) + x |
| 167 | + float lo = fputil::multiply_add(kf, -0.25f, xf); |
| 168 | + |
| 169 | + float exp_hi = EXP_HI[x_hi + 12]; |
| 170 | + float exp_mid = EXP_MID[x_mid]; |
| 171 | + |
| 172 | + // Degree-3 minimax polynomial generated by Sollya with the following |
| 173 | + // commands: |
| 174 | + // > display = hexadecimal; |
| 175 | + // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-7, 2^-7]); |
| 176 | + // > 1 + x * P; |
| 177 | + // 0x1p0 + x * (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3)) |
| 178 | + float exp_lo = |
| 179 | + fputil::polyeval(lo, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f); |
| 180 | + |
| 181 | + return fputil::cast<bfloat16>(exp_hi * exp_mid * exp_lo); |
| 182 | +} |
| 183 | + |
| 184 | +} // namespace LIBC_NAMESPACE_DECL |
0 commit comments