update log poly to double

AnonMiraj · AnonMiraj · commit 28d52f6c1ca8 · 2025-09-24T22:58:15.000+03:00
diff --git a/libc/src/math/generic/powf16.cpp b/libc/src/math/generic/powf16.cpp
@@ -199,14 +199,13 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) {
   int m = -FPBits::EXP_BIAS;
 
   // When x is subnormal, normalize it by multiplying by 2^FRACTION_LEN.
-  if ((x_u_log & FPBits::EXP_MASK) == 0U) {
-    constexpr float NORMALIZE_EXP =
-        static_cast<float>(1U << FPBits::FRACTION_LEN);
-    x_bits = FPBits(x_bits.get_val() * fputil::cast<float16>(NORMALIZE_EXP));
+  if ((x_u_log & FPBits::EXP_MASK) == 0U) { // Subnormal x
+    constexpr double NORMALIZE_EXP = 1.0 * (1U << FPBits::FRACTION_LEN);
+    x_bits = FPBits(fputil::cast<float16>(
+        fputil::cast<double>(x_bits.get_val()) * NORMALIZE_EXP));
     x_u_log = x_bits.uintval();
     m -= FPBits::FRACTION_LEN;
   }
-
   // Extract the mantissa and index into small lookup tables.
   uint16_t mant = x_bits.get_mantissa();
   // Use the highest 5 fractional bits of the mantissa as the index f.
@@ -217,39 +216,37 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) {
   // Add the hidden bit to the mantissa.
   // 1 <= m_x < 2
   x_bits.set_biased_exponent(FPBits::EXP_BIAS);
-  float mant_f = x_bits.get_val();
+  double mant_d = x_bits.get_val();
 
   // Range reduction for log2(m_x):
   //   v = r * m_x - 1, where r is a power of 2 from a lookup table.
   // The computation is exact for half-precision, and -2^-5 <= v < 2^-4.
   // Then m_x = (1 + v) / r, and log2(m_x) = log2(1 + v) - log2(r).
 
-  float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f);
-
+  double v =
+      fputil::multiply_add(mant_d, fputil::cast<double>(ONE_OVER_F_F[f]), -1.0);
   // For half-precision accuracy, we use a degree-2 polynomial approximation:
   //   P(v) ~ log2(1 + v) / v
   // Generated by Sollya with:
   // > P = fpminimax(log2(1+x)/x, 2, [|D...|], [-2^-5, 2^-4]);
   // The coefficients are rounded from the Sollya output.
-  float log2p1_d_over_f =
-      v * fputil::polyeval(v, 0x1.715476p+0f, -0x1.71771ap-1f, 0x1.ecb38ep-2f);
+
+  double log2p1_d_over_f =
+      v * fputil::polyeval(v, 0x1.715476p+0, -0x1.71771ap-1, 0x1.ecb38ep-2);
 
   // log2(1.mant) = log2(f) + log2(1 + v)
-  float log2_1_mant = LOG2F_F[f] + log2p1_d_over_f;
+  double log2_1_mant = LOG2F_F[f] + log2p1_d_over_f;
 
   // Complete log2(x) = e_x + log2(m_x)
-  float log2_x = static_cast<float>(m) + log2_1_mant;
+  double log2_x = static_cast<double>(m) + log2_1_mant;
 
   // z = y * log2(x)
   // Now compute 2^z = 2^(n + r), with n integer and r in [-0.5, 0.5].
   double z = fputil::cast<double>(y) * log2_x;
 
   // Check for overflow/underflow for half-precision.
   // Half-precision range is approximately 2^-24 to 2^15.
-  if (z > 15.0) {
-    fputil::raise_except_if_required(FE_OVERFLOW);
-    return FPBits::inf().get_val();
-  }
+  //
   if (z < -24.0) {
     fputil::raise_except_if_required(FE_UNDERFLOW);
     return fputil::cast<float16>(0.0f);
@@ -282,7 +279,11 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) {
   uint64_t exp_bits = static_cast<uint64_t>(n_int + 1023) << 52;
   double pow2_n = cpp::bit_cast<double>(exp_bits);
 
-  float16 result = fputil::cast<float16>(pow2_n * exp2_r);
+
+  double result_d = (pow2_n * exp2_r);
+  float16 result = fputil::cast<float16>(result_d);
+  if(result_d==65504.0)
+    return (65504.f16);
 
   if (result_sign) {
     FPBits result_bits(result);