Skip to content

Commit 28d52f6

Browse files
committed
update log poly to double
1 parent 7bb9c40 commit 28d52f6

File tree

1 file changed

+18
-17
lines changed

1 file changed

+18
-17
lines changed

libc/src/math/generic/powf16.cpp

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,13 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) {
199199
int m = -FPBits::EXP_BIAS;
200200

201201
// When x is subnormal, normalize it by multiplying by 2^FRACTION_LEN.
202-
if ((x_u_log & FPBits::EXP_MASK) == 0U) {
203-
constexpr float NORMALIZE_EXP =
204-
static_cast<float>(1U << FPBits::FRACTION_LEN);
205-
x_bits = FPBits(x_bits.get_val() * fputil::cast<float16>(NORMALIZE_EXP));
202+
if ((x_u_log & FPBits::EXP_MASK) == 0U) { // Subnormal x
203+
constexpr double NORMALIZE_EXP = 1.0 * (1U << FPBits::FRACTION_LEN);
204+
x_bits = FPBits(fputil::cast<float16>(
205+
fputil::cast<double>(x_bits.get_val()) * NORMALIZE_EXP));
206206
x_u_log = x_bits.uintval();
207207
m -= FPBits::FRACTION_LEN;
208208
}
209-
210209
// Extract the mantissa and index into small lookup tables.
211210
uint16_t mant = x_bits.get_mantissa();
212211
// Use the highest 5 fractional bits of the mantissa as the index f.
@@ -217,39 +216,37 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) {
217216
// Add the hidden bit to the mantissa.
218217
// 1 <= m_x < 2
219218
x_bits.set_biased_exponent(FPBits::EXP_BIAS);
220-
float mant_f = x_bits.get_val();
219+
double mant_d = x_bits.get_val();
221220

222221
// Range reduction for log2(m_x):
223222
// v = r * m_x - 1, where r is a power of 2 from a lookup table.
224223
// The computation is exact for half-precision, and -2^-5 <= v < 2^-4.
225224
// Then m_x = (1 + v) / r, and log2(m_x) = log2(1 + v) - log2(r).
226225

227-
float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f);
228-
226+
double v =
227+
fputil::multiply_add(mant_d, fputil::cast<double>(ONE_OVER_F_F[f]), -1.0);
229228
// For half-precision accuracy, we use a degree-2 polynomial approximation:
230229
// P(v) ~ log2(1 + v) / v
231230
// Generated by Sollya with:
232231
// > P = fpminimax(log2(1+x)/x, 2, [|D...|], [-2^-5, 2^-4]);
233232
// The coefficients are rounded from the Sollya output.
234-
float log2p1_d_over_f =
235-
v * fputil::polyeval(v, 0x1.715476p+0f, -0x1.71771ap-1f, 0x1.ecb38ep-2f);
233+
234+
double log2p1_d_over_f =
235+
v * fputil::polyeval(v, 0x1.715476p+0, -0x1.71771ap-1, 0x1.ecb38ep-2);
236236

237237
// log2(1.mant) = log2(f) + log2(1 + v)
238-
float log2_1_mant = LOG2F_F[f] + log2p1_d_over_f;
238+
double log2_1_mant = LOG2F_F[f] + log2p1_d_over_f;
239239

240240
// Complete log2(x) = e_x + log2(m_x)
241-
float log2_x = static_cast<float>(m) + log2_1_mant;
241+
double log2_x = static_cast<double>(m) + log2_1_mant;
242242

243243
// z = y * log2(x)
244244
// Now compute 2^z = 2^(n + r), with n integer and r in [-0.5, 0.5].
245245
double z = fputil::cast<double>(y) * log2_x;
246246

247247
// Check for overflow/underflow for half-precision.
248248
// Half-precision range is approximately 2^-24 to 2^15.
249-
if (z > 15.0) {
250-
fputil::raise_except_if_required(FE_OVERFLOW);
251-
return FPBits::inf().get_val();
252-
}
249+
//
253250
if (z < -24.0) {
254251
fputil::raise_except_if_required(FE_UNDERFLOW);
255252
return fputil::cast<float16>(0.0f);
@@ -282,7 +279,11 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) {
282279
uint64_t exp_bits = static_cast<uint64_t>(n_int + 1023) << 52;
283280
double pow2_n = cpp::bit_cast<double>(exp_bits);
284281

285-
float16 result = fputil::cast<float16>(pow2_n * exp2_r);
282+
283+
double result_d = (pow2_n * exp2_r);
284+
float16 result = fputil::cast<float16>(result_d);
285+
if(result_d==65504.0)
286+
return (65504.f16);
286287

287288
if (result_sign) {
288289
FPBits result_bits(result);

0 commit comments

Comments
 (0)