From f433dfd345fddec4bdadac0104d4edf5978b4238 Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Sat, 20 Sep 2025 07:58:40 +0300 Subject: [PATCH 01/12] [libc][math] Implement C23 half precision pow function --- libc/config/gpu/amdgpu/entrypoints.txt | 1 + libc/config/gpu/nvptx/entrypoints.txt | 1 + libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/arm/entrypoints.txt | 1 + libc/config/linux/riscv/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/include/math.yaml | 6 + libc/src/math/CMakeLists.txt | 1 + libc/src/math/generic/CMakeLists.txt | 24 ++ libc/src/math/generic/powf16.cpp | 296 ++++++++++++++++++++++ libc/src/math/powf16.h | 21 ++ libc/test/src/math/CMakeLists.txt | 11 + libc/test/src/math/powf16_test.cpp | 122 +++++++++ libc/test/src/math/smoke/CMakeLists.txt | 10 + libc/test/src/math/smoke/powf16_test.cpp | 232 +++++++++++++++++ 15 files changed, 729 insertions(+) create mode 100644 libc/src/math/generic/powf16.cpp create mode 100644 libc/src/math/powf16.h create mode 100644 libc/test/src/math/powf16_test.cpp create mode 100644 libc/test/src/math/smoke/powf16_test.cpp diff --git a/libc/config/gpu/amdgpu/entrypoints.txt b/libc/config/gpu/amdgpu/entrypoints.txt index 4b6f3337036aa..7dd5118665456 100644 --- a/libc/config/gpu/amdgpu/entrypoints.txt +++ b/libc/config/gpu/amdgpu/entrypoints.txt @@ -451,6 +451,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.nextupl libc.src.math.pow libc.src.math.powf + libc.src.math.powf16 libc.src.math.remainder libc.src.math.remainderf libc.src.math.remainderl diff --git a/libc/config/gpu/nvptx/entrypoints.txt b/libc/config/gpu/nvptx/entrypoints.txt index d24cc740d4234..9aae829cd4004 100644 --- a/libc/config/gpu/nvptx/entrypoints.txt +++ b/libc/config/gpu/nvptx/entrypoints.txt @@ -451,6 +451,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.nextupl libc.src.math.pow libc.src.math.powf + libc.src.math.powf16 libc.src.math.remainder libc.src.math.remainderf libc.src.math.remainderl diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 1bc5df9d45a93..584323d086079 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -589,6 +589,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.nextupl libc.src.math.pow libc.src.math.powf + libc.src.math.powf16 libc.src.math.remainder libc.src.math.remainderf libc.src.math.remainderl diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index ec01030c77d4f..a5759d1901cb8 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -405,6 +405,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.nextupl libc.src.math.pow libc.src.math.powf + libc.src.math.powf16 libc.src.math.remainder libc.src.math.remainderf libc.src.math.remainderl diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 54ea983d64839..b19d1033f2b75 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -599,6 +599,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.nextupl libc.src.math.pow libc.src.math.powf + libc.src.math.powf16 libc.src.math.remainder libc.src.math.remainderf libc.src.math.remainderl diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 1fc9a2b901c14..03785ce2f8779 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -629,6 +629,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.nextupl libc.src.math.pow libc.src.math.powf + libc.src.math.powf16 libc.src.math.remainder libc.src.math.remainderf libc.src.math.remainderl diff --git a/libc/include/math.yaml b/libc/include/math.yaml index 4e398676bf91e..ff1608e3ec3dd 100644 --- a/libc/include/math.yaml +++ b/libc/include/math.yaml @@ -2083,6 +2083,12 @@ functions: arguments: - type: float - type: float + - name: powf16 + standards: + - stdc + return_type: _Float16 + arguments: + - type: _Float16 - name: powi standards: llvm_libc_ext return_type: double diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 187bc92e5c2c6..3a1ab17341265 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -451,6 +451,7 @@ add_math_entrypoint_object(nextupf128) add_math_entrypoint_object(pow) add_math_entrypoint_object(powf) +add_math_entrypoint_object(powf16) add_math_entrypoint_object(powi) add_math_entrypoint_object(powif) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 419f562713b5f..fa5e0ed5b8451 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1581,6 +1581,30 @@ add_entrypoint_object( libc.src.__support.math.expxf16_utils ) +add_entrypoint_object( + powf16 + SRCS + powf16.cpp + HDRS + ../pow.h + DEPENDS + .common_constants + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.__support.CPP.bit + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization + libc.src.__support.math.expxf16_utils + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( powf SRCS diff --git a/libc/src/math/generic/powf16.cpp b/libc/src/math/generic/powf16.cpp new file mode 100644 index 0000000000000..8e2e6b552632f --- /dev/null +++ b/libc/src/math/generic/powf16.cpp @@ -0,0 +1,296 @@ +//===-- Half-precision x^y function ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/powf16.h" +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" +#include "src/__support/macros/properties/types.h" +#include "src/__support/math/expxf16_utils.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace { + +bool is_odd_integer(float16 x) { + using FPBits = fputil::FPBits; + FPBits xbits(x); + uint16_t x_u = xbits.uintval(); + unsigned x_e = static_cast(xbits.get_biased_exponent()); + unsigned lsb = static_cast( + cpp::countr_zero(static_cast(x_u | FPBits::EXP_MASK))); + constexpr unsigned UNIT_EXPONENT = + static_cast(FPBits::EXP_BIAS + FPBits::FRACTION_LEN); + return (x_e + lsb == UNIT_EXPONENT); +} + +bool is_integer(float16 x) { + using FPBits = fputil::FPBits; + FPBits xbits(x); + uint16_t x_u = xbits.uintval(); + unsigned x_e = static_cast(xbits.get_biased_exponent()); + unsigned lsb = static_cast( + cpp::countr_zero(static_cast(x_u | FPBits::EXP_MASK))); + constexpr unsigned UNIT_EXPONENT = + static_cast(FPBits::EXP_BIAS + FPBits::FRACTION_LEN); + return (x_e + lsb >= UNIT_EXPONENT); +} + +} // namespace + +LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { + using FPBits = fputil::FPBits; + + FPBits xbits(x), ybits(y); + bool x_sign = xbits.is_neg(); + bool y_sign = ybits.is_neg(); + + FPBits x_abs = xbits.abs(); + FPBits y_abs = ybits.abs(); + + uint16_t x_u = xbits.uintval(); + uint16_t x_a = x_abs.uintval(); + uint16_t y_a = y_abs.uintval(); + bool result_sign = false; + + ///////// BEGIN - Check exceptional cases //////////////////////////////////// + // If x or y is signaling NaN + if (xbits.is_signaling_nan() || ybits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + // + if (LIBC_UNLIKELY(ybits.is_zero() || x_u == FPBits::one().uintval() || + x_u >= FPBits::inf().uintval() || + x_u < FPBits::min_normal().uintval())) { + // pow(x, 0) = 1 + if (ybits.is_zero()) { + return fputil::cast(1.0f); + } + + // pow(1, Y) = 1 + if (x_u == FPBits::one().uintval()) { + return fputil::cast(1.0f); + } + + switch (y_a) { + + case 0x3800U: { // y = +-0.5 + if (LIBC_UNLIKELY( + (x == 0.0 || x_u == FPBits::inf(Sign::NEG).uintval()))) { + // pow(-0, 1/2) = +0 + // pow(-inf, 1/2) = +inf + // Make sure it works correctly for FTZ/DAZ. + return fputil::cast(y_sign ? (1.0 / (x * x)) : (x * x)); + } + return fputil::cast(y_sign ? (1.0 / fputil::sqrt(x)) + : fputil::sqrt(x)); + } + case 0x3c00U: // y = +-1.0 + return fputil::cast(y_sign ? (1.0 / x) : x); + + case 0x4000U: // y = +-2.0 + return fputil::cast(y_sign ? (1.0 / (x * x)) : (x * x)); + } + // TODO: Speed things up with pow(2, y) = exp2(y) and pow(10, y) = exp10(y). + + // Propagate remaining quiet NaNs. + if (xbits.is_quiet_nan()) { + return x; + } + if (ybits.is_quiet_nan()) { + return y; + } + + // x = -1: special case for integer exponents + if (x_u == FPBits::one(Sign::NEG).uintval()) { + if (is_integer(y)) { + if (is_odd_integer(y)) { + return fputil::cast(-1.0f); + } else { + return fputil::cast(1.0f); + } + } + // pow(-1, non-integer) = NaN + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + // x = 0 cases + if (xbits.is_zero()) { + if (y_sign) { + // pow(+-0, negative) = +-inf and raise FE_DIVBYZERO + fputil::raise_except_if_required(FE_DIVBYZERO); + bool result_neg = x_sign && ybits.is_finite() && is_odd_integer(y); + return FPBits::inf(result_neg ? Sign::NEG : Sign::POS).get_val(); + } else { + // pow(+-0, positive) = +-0 + bool out_is_neg = x_sign && is_odd_integer(y); + return out_is_neg ? FPBits::zero(Sign::NEG).get_val() + : FPBits::zero(Sign::POS).get_val(); + } + } + + if (xbits.is_inf()) { + bool out_is_neg = x_sign && ybits.is_finite() && is_odd_integer(y); + if (y_sign) // pow(+-inf, negative) = +-0 + return out_is_neg ? FPBits::zero(Sign::NEG).get_val() + : FPBits::zero(Sign::POS).get_val(); + // pow(+-inf, positive) = +-inf + return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val(); + } + + // y = +-inf cases + if (ybits.is_inf()) { + // pow(1, inf) handled above. + bool x_abs_less_than_one = x_a < FPBits::one().uintval(); + if ((x_abs_less_than_one && !y_sign) || + (!x_abs_less_than_one && y_sign)) { + return fputil::cast(0.0f); + } else { + return FPBits::inf().get_val(); + } + } + + // pow( negative, non-integer ) = NaN + if (x_sign && !is_integer(y)) { + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + // For negative x with integer y, compute pow(|x|, y) and adjust sign + if (x_sign) { + x = -x; + if (is_odd_integer(y)) { + result_sign = true; + } + } + } + ///////// END - Check exceptional cases ////////////////////////////////////// + + // x^y = 2^( y * log2(x) ) + // = 2^( y * ( e_x + log2(m_x) ) ) + // First we compute log2(x) = e_x + log2(m_x) + + using namespace math::expxf16_internal; + FPBits x_bits(x); + + uint16_t x_u_log = x_bits.uintval(); + + // Extract exponent field of x. + int m = -FPBits::EXP_BIAS; + + // When x is subnormal, normalize it by multiplying by 2^FRACTION_LEN. + if ((x_u_log & FPBits::EXP_MASK) == 0U) { + constexpr float NORMALIZE_EXP = + static_cast(1U << FPBits::FRACTION_LEN); + x_bits = FPBits(x_bits.get_val() * fputil::cast(NORMALIZE_EXP)); + x_u_log = x_bits.uintval(); + m -= FPBits::FRACTION_LEN; + } + + // Extract the mantissa and index into small lookup tables. + uint16_t mant = x_bits.get_mantissa(); + // Use the highest 5 fractional bits of the mantissa as the index f. + int f = mant >> 5; + + m += (x_u_log >> FPBits::FRACTION_LEN); + + // Add the hidden bit to the mantissa. + // 1 <= m_x < 2 + x_bits.set_biased_exponent(FPBits::EXP_BIAS); + float mant_f = x_bits.get_val(); + + // Range reduction for log2(m_x): + // v = r * m_x - 1, where r is a power of 2 from a lookup table. + // The computation is exact for half-precision, and -2^-5 <= v < 2^-4. + // Then m_x = (1 + v) / r, and log2(m_x) = log2(1 + v) - log2(r). + + float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f); + + // For half-precision accuracy, we use a degree-2 polynomial approximation: + // P(v) ~ log2(1 + v) / v + // Generated by Sollya with: + // > P = fpminimax(log2(1+x)/x, 2, [|D...|], [-2^-5, 2^-4]); + // The coefficients are rounded from the Sollya output. + float log2p1_d_over_f = + v * fputil::polyeval(v, 0x1.715476p+0f, -0x1.71771ap-1f, 0x1.ecb38ep-2f); + + // log2(1.mant) = log2(f) + log2(1 + v) + float log2_1_mant = LOG2F_F[f] + log2p1_d_over_f; + + // Complete log2(x) = e_x + log2(m_x) + float log2_x = static_cast(m) + log2_1_mant; + + // z = y * log2(x) + // Now compute 2^z = 2^(n + r), with n integer and r in [-0.5, 0.5]. + double z = fputil::cast(y) * log2_x; + + // Check for overflow/underflow for half-precision. + // Half-precision range is approximately 2^-24 to 2^15. + if (z > 15.0) { + fputil::raise_except_if_required(FE_OVERFLOW); + return FPBits::inf().get_val(); + } + if (z < -24.0) { + fputil::raise_except_if_required(FE_UNDERFLOW); + return fputil::cast(0.0f); + } + + double n = fputil::nearest_integer(z); + double r = z - n; + + // Compute 2^r using a degree-7 polynomial for r in [-0.5, 0.5]. + // Generated by Sollya with: + // > P = fpminimax(2^x, 7, [|D...|], [-0.5, 0.5]); + // The polynomial coefficients are rounded from the Sollya output. + constexpr double EXP2_COEFFS[] = { + 0x1p+0, // 1.0 + 0x1.62e42fefa39efp-1, // ln(2) + 0x1.ebfbdff82c58fp-3, // ln(2)^2 / 2 + 0x1.c6b08d704a0c0p-5, // ln(2)^3 / 6 + 0x1.3b2ab6fba4e77p-7, // ln(2)^4 / 24 + 0x1.5d87fe78a6737p-10, // ln(2)^5 / 120 + 0x1.430912f86a805p-13, // ln(2)^6 / 720 + 0x1.10e4104ac8015p-17 // ln(2)^7 / 5040 + }; + + double exp2_r = fputil::polyeval( + r, EXP2_COEFFS[0], EXP2_COEFFS[1], EXP2_COEFFS[2], EXP2_COEFFS[3], + EXP2_COEFFS[4], EXP2_COEFFS[5], EXP2_COEFFS[6], EXP2_COEFFS[7]); + + // Compute 2^n by direct bit manipulation. + int n_int = static_cast(n); + uint64_t exp_bits = static_cast(n_int + 1023) << 52; + double pow2_n = cpp::bit_cast(exp_bits); + + float16 result = fputil::cast(pow2_n * exp2_r); + + if (result_sign) { + FPBits result_bits(result); + result_bits.set_sign(Sign::NEG); + result = result_bits.get_val(); + } + + return result; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/powf16.h b/libc/src/math/powf16.h new file mode 100644 index 0000000000000..52f9848f4a2be --- /dev/null +++ b/libc/src/math/powf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for powf16 --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_POWF16_H +#define LLVM_LIBC_SRC_MATH_POWF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +float16 powf16(float16 x, float16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_POWF16_H diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index e15df147c3c35..eec048e094bd8 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -2470,6 +2470,17 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + powf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + powf16_test.cpp + DEPENDS + libc.src.math.powf16 + libc.src.__support.FPUtil.fp_bits +) add_fp_unittest( atan2f_test NEED_MPFR diff --git a/libc/test/src/math/powf16_test.cpp b/libc/test/src/math/powf16_test.cpp new file mode 100644 index 0000000000000..171cb098114d4 --- /dev/null +++ b/libc/test/src/math/powf16_test.cpp @@ -0,0 +1,122 @@ +//===-- Unittests for powf16 ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +#include "src/math/powf16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +using LlvmLibcPowF16Test = LIBC_NAMESPACE::testing::FPTest; +using LIBC_NAMESPACE::testing::tlog; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +TEST_F(LlvmLibcPowF16Test, TrickyInputs) { + // These values are in half precision. + constexpr mpfr::BinaryInput INPUTS[] = { + {static_cast(0x1.08p-2f), static_cast(0x1.0cp-1f)}, + {static_cast(0x1.66p-1f), static_cast(0x1.f1p+1f)}, + {static_cast(0x1.c04p-1f), static_cast(0x1.2p+12f)}, + {static_cast(0x1.aep-1f), static_cast(0x1.f9p-1f)}, + {static_cast(0x1.ffcp-1f), static_cast(0x1.fffp-2f)}, + {static_cast(0x1.f55p-1f), static_cast(0x1.88p+12f)}, + {static_cast(0x1.e84p-1f), static_cast(0x1.2cp+13f)}, + }; + + for (auto input : INPUTS) { + float16 x = input.x; + float16 y = input.y; + EXPECT_MPFR_MATCH(mpfr::Operation::Pow, input, LIBC_NAMESPACE::powf16(x, y), + 1.0); // 1 ULP tolerance is enough for f16 + } +} + +TEST_F(LlvmLibcPowF16Test, InFloat16Range) { + constexpr uint16_t X_COUNT = 63; + constexpr uint16_t X_START = FPBits(static_cast(0.25)).uintval(); + constexpr uint16_t X_STOP = FPBits(static_cast(4.0)).uintval(); + constexpr uint16_t X_STEP = (X_STOP - X_START) / X_COUNT; + + constexpr uint16_t Y_COUNT = 59; + constexpr uint16_t Y_START = FPBits(static_cast(0.25)).uintval(); + constexpr uint16_t Y_STOP = FPBits(static_cast(4.0)).uintval(); + constexpr uint16_t Y_STEP = (Y_STOP - Y_START) / Y_COUNT; + + auto test = [&](mpfr::RoundingMode rounding_mode) { + mpfr::ForceRoundingMode __r(rounding_mode); + if (!__r.success) + return; + + uint64_t fails = 0; + uint64_t count = 0; + uint64_t cc = 0; + float16 mx = 0.0, my = 0.0, mr = 0.0; + double tol = 1.0; // start with 1 ULP for half precision + + for (uint16_t i = 0, v = X_START; i <= X_COUNT; ++i, v += X_STEP) { + float16 x = FPBits(v).get_val(); + if (FPBits(x).is_inf_or_nan() || x < static_cast(0.0)) + continue; + + for (uint16_t j = 0, w = Y_START; j <= Y_COUNT; ++j, w += Y_STEP) { + float16 y = FPBits(w).get_val(); + if (FPBits(y).is_inf_or_nan()) + continue; + + float16 result = LIBC_NAMESPACE::powf16(x, y); + ++cc; + if (FPBits(result).is_inf_or_nan()) + continue; + + ++count; + mpfr::BinaryInput inputs{x, y}; + + if (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Pow, inputs, + result, 1.0, rounding_mode)) { + ++fails; + while (!TEST_MPFR_MATCH_ROUNDING_SILENTLY( + mpfr::Operation::Pow, inputs, result, tol, rounding_mode)) { + mx = x; + my = y; + mr = result; + + if (tol > 128.0) // half precision is only ~11 bits + break; + + tol *= 2.0; + } + } + } + } + if (fails || (count < cc)) { + tlog << " powf16 failed: " << fails << "/" << count << "/" << cc + << " tests.\n" + << " Max ULPs is at most: " << static_cast(tol) + << ".\n"; + } + if (fails) { + mpfr::BinaryInput inputs{mx, my}; + EXPECT_MPFR_MATCH(mpfr::Operation::Pow, inputs, mr, 1.0, rounding_mode); + } + }; + + tlog << " Test Rounding To Nearest...\n"; + test(mpfr::RoundingMode::Nearest); + + tlog << " Test Rounding Downward...\n"; + test(mpfr::RoundingMode::Downward); + + tlog << " Test Rounding Upward...\n"; + test(mpfr::RoundingMode::Upward); + + tlog << " Test Rounding Toward Zero...\n"; + test(mpfr::RoundingMode::TowardZero); +} + + diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index b800f7aba98d1..fa44c72e95618 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -4580,6 +4580,16 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + powf16_test + SUITE + libc-math-smoke-tests + SRCS + powf16_test.cpp + DEPENDS + libc.src.math.powf16 + libc.src.__support.FPUtil.fp_bits +) add_fp_unittest( totalorder_test SUITE diff --git a/libc/test/src/math/smoke/powf16_test.cpp b/libc/test/src/math/smoke/powf16_test.cpp new file mode 100644 index 0000000000000..b3611e141dc01 --- /dev/null +++ b/libc/test/src/math/smoke/powf16_test.cpp @@ -0,0 +1,232 @@ +//===-- Unittests for powf16 ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/fenv_macros.h" +#include "src/math/powf16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcPowF16Test = LIBC_NAMESPACE::testing::FPTest; +using LIBC_NAMESPACE::fputil::testing::ForceRoundingMode; +using LIBC_NAMESPACE::fputil::testing::RoundingMode; + +TEST_F(LlvmLibcPowF16Test, SpecialNumbers) { + constexpr float16 NEG_ODD_INTEGER = -3.0f16; + constexpr float16 NEG_EVEN_INTEGER = -6.0f16; + constexpr float16 NEG_NON_INTEGER = -1.5f16; + constexpr float16 POS_ODD_INTEGER = 5.0f16; + constexpr float16 POS_EVEN_INTEGER = 8.0f16; + constexpr float16 POS_NON_INTEGER = 1.5f16; + constexpr float16 ONE_HALF = 0.5f16; + + for (int i = 0; i < N_ROUNDING_MODES; ++i) { + + ForceRoundingMode __r(ROUNDING_MODES[i]); + if (!__r.success) + continue; + + // powf16( sNaN, exponent ) + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(sNaN, sNaN), + FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION( + aNaN, LIBC_NAMESPACE::powf16(sNaN, NEG_ODD_INTEGER), FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION( + aNaN, LIBC_NAMESPACE::powf16(sNaN, NEG_EVEN_INTEGER), FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION( + aNaN, LIBC_NAMESPACE::powf16(sNaN, POS_ODD_INTEGER), FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION( + aNaN, LIBC_NAMESPACE::powf16(sNaN, POS_EVEN_INTEGER), FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(sNaN, ONE_HALF), + FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(sNaN, zero), + FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(sNaN, neg_zero), + FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(sNaN, inf), + FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(sNaN, neg_inf), + FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(sNaN, aNaN), + FE_INVALID); + + // powf16( 0.0, exponent ) + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(zero, sNaN), + FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION( + inf, LIBC_NAMESPACE::powf16(zero, NEG_ODD_INTEGER), FE_DIVBYZERO); + EXPECT_FP_EQ_WITH_EXCEPTION( + inf, LIBC_NAMESPACE::powf16(zero, NEG_EVEN_INTEGER), FE_DIVBYZERO); + EXPECT_FP_EQ_WITH_EXCEPTION( + inf, LIBC_NAMESPACE::powf16(zero, NEG_NON_INTEGER), FE_DIVBYZERO); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(zero, POS_ODD_INTEGER)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(zero, POS_EVEN_INTEGER)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(zero, POS_NON_INTEGER)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(zero, ONE_HALF)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(zero, zero)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(zero, neg_zero)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::powf16(zero, inf)); + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::powf16(zero, neg_inf), + FE_DIVBYZERO); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(zero, aNaN)); + + // powf16( -0.0, exponent ) + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(neg_zero, sNaN), + FE_INVALID); + EXPECT_FP_EQ_WITH_EXCEPTION( + neg_inf, LIBC_NAMESPACE::powf16(neg_zero, NEG_ODD_INTEGER), + FE_DIVBYZERO); + EXPECT_FP_EQ_WITH_EXCEPTION( + inf, LIBC_NAMESPACE::powf16(neg_zero, NEG_EVEN_INTEGER), FE_DIVBYZERO); + EXPECT_FP_EQ_WITH_EXCEPTION( + inf, LIBC_NAMESPACE::powf16(neg_zero, NEG_NON_INTEGER), FE_DIVBYZERO); + EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::powf16(neg_zero, POS_ODD_INTEGER)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_zero, POS_EVEN_INTEGER)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_zero, POS_NON_INTEGER)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_zero, ONE_HALF)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(neg_zero, zero)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(neg_zero, neg_zero)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::powf16(neg_zero, inf)); + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::powf16(neg_zero, neg_inf), + FE_DIVBYZERO); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(neg_zero, aNaN)); + + // powf16( 1.0, exponent ) + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(1.0, sNaN), + FE_INVALID); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, zero)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, neg_zero)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, 1.0)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, -1.0)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, NEG_ODD_INTEGER)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, NEG_EVEN_INTEGER)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, NEG_NON_INTEGER)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, POS_ODD_INTEGER)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, POS_EVEN_INTEGER)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, POS_NON_INTEGER)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, inf)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, neg_inf)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, aNaN)); + + // powf16( -1.0, exponent ) + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(-1.0, sNaN), + FE_INVALID); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, zero)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, neg_zero)); + EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::powf16(-1.0, 1.0)); + EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::powf16(-1.0, -1.0)); + EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::powf16(-1.0, NEG_ODD_INTEGER)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, NEG_EVEN_INTEGER)); + EXPECT_FP_IS_NAN_WITH_EXCEPTION( + LIBC_NAMESPACE::powf16(-1.0, NEG_NON_INTEGER), FE_INVALID); + EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::powf16(-1.0, POS_ODD_INTEGER)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, POS_EVEN_INTEGER)); + EXPECT_FP_IS_NAN_WITH_EXCEPTION( + LIBC_NAMESPACE::powf16(-1.0, POS_NON_INTEGER), FE_INVALID); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, inf)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, neg_inf)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(-1.0, aNaN)); + + // powf16( inf, exponent ) + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(inf, sNaN), + FE_INVALID); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(inf, zero)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(inf, neg_zero)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(inf, 1.0)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(inf, -1.0)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(inf, NEG_ODD_INTEGER)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(inf, NEG_EVEN_INTEGER)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(inf, NEG_NON_INTEGER)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(inf, POS_ODD_INTEGER)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(inf, POS_EVEN_INTEGER)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(inf, POS_NON_INTEGER)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(inf, ONE_HALF)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(inf, inf)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(inf, neg_inf)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(inf, aNaN)); + + // powf16( -inf, exponent ) + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(neg_inf, sNaN), + FE_INVALID); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(neg_inf, zero)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(neg_inf, neg_zero)); + EXPECT_FP_EQ(neg_inf, LIBC_NAMESPACE::powf16(neg_inf, 1.0)); + EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::powf16(neg_inf, -1.0)); + EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::powf16(neg_inf, NEG_ODD_INTEGER)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_inf, NEG_EVEN_INTEGER)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_inf, NEG_NON_INTEGER)); + EXPECT_FP_EQ(neg_inf, LIBC_NAMESPACE::powf16(neg_inf, POS_ODD_INTEGER)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(neg_inf, POS_EVEN_INTEGER)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(neg_inf, POS_NON_INTEGER)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(neg_inf, ONE_HALF)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(neg_inf, inf)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_inf, neg_inf)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(neg_inf, aNaN)); + + // powf16 ( aNaN, exponent ) + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(aNaN, sNaN), + FE_INVALID); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(aNaN, zero)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(aNaN, neg_zero)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, 1.0)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, -1.0)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, NEG_ODD_INTEGER)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, NEG_EVEN_INTEGER)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, NEG_NON_INTEGER)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, POS_ODD_INTEGER)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, POS_EVEN_INTEGER)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, POS_NON_INTEGER)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, inf)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, neg_inf)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, aNaN)); + + // powf16 ( base, inf ) + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(0.1f16, inf)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(-0.1f16, inf)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(1.1f16, inf)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(-1.1f16, inf)); + + // powf16 ( base, -inf ) + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(0.1f16, neg_inf)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(-0.1f16, neg_inf)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(1.1f16, neg_inf)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(-1.1f16, neg_inf)); + + // Exact powers of 2: + // TODO: Enable these tests when we use exp2. + // EXPECT_FP_EQ(0x1.0p15, LIBC_NAMESPACE::powf16(2.0, 15.0)); + // EXPECT_FP_EQ(0x1.0p126, LIBC_NAMESPACE::powf16(2.0, 126.0)); + // EXPECT_FP_EQ(0x1.0p-45, LIBC_NAMESPACE::powf16(2.0, -45.0)); + // EXPECT_FP_EQ(0x1.0p-126, LIBC_NAMESPACE::powf16(2.0, -126.0)); + // EXPECT_FP_EQ(0x1.0p-149, LIBC_NAMESPACE::powf16(2.0, -149.0)); + + // Exact powers of 10: + // TODO: Enable these tests when we use exp10 + // EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(10.0, 0.0)); + // EXPECT_FP_EQ(10.0, LIBC_NAMESPACE::powf16(10.0, 1.0)); + // EXPECT_FP_EQ(100.0, LIBC_NAMESPACE::powf16(10.0, 2.0)); + // EXPECT_FP_EQ(1000.0, LIBC_NAMESPACE::powf16(10.0, 3.0)); + // EXPECT_FP_EQ(10000.0, LIBC_NAMESPACE::powf16(10.0, 4.0)); + // EXPECT_FP_EQ(100000.0, LIBC_NAMESPACE::powf16(10.0, 5.0)); + // EXPECT_FP_EQ(1000000.0, LIBC_NAMESPACE::powf16(10.0, 6.0)); + // EXPECT_FP_EQ(10000000.0, LIBC_NAMESPACE::powf16(10.0, 7.0)); + // EXPECT_FP_EQ(100000000.0, LIBC_NAMESPACE::powf16(10.0, 8.0)); + // EXPECT_FP_EQ(1000000000.0, LIBC_NAMESPACE::powf16(10.0, 9.0)); + // EXPECT_FP_EQ(10000000000.0, LIBC_NAMESPACE::powf16(10.0, 10.0)); + + // Overflow / Underflow: + if (ROUNDING_MODES[i] != RoundingMode::Downward && + ROUNDING_MODES[i] != RoundingMode::TowardZero) { + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::powf16(3.1f16, 21.0), + FE_OVERFLOW); + } + if (ROUNDING_MODES[i] != RoundingMode::Upward) { + EXPECT_FP_EQ_WITH_EXCEPTION(0.0, LIBC_NAMESPACE::powf16(3.1f16, -21.0), + FE_UNDERFLOW); + } + } +} From 21275a3d2e754ae2e08779f70d3e35d0375530a0 Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Sat, 20 Sep 2025 08:09:23 +0300 Subject: [PATCH 02/12] fix formatting --- libc/src/math/powf16.h | 2 +- libc/test/src/math/powf16_test.cpp | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/libc/src/math/powf16.h b/libc/src/math/powf16.h index 52f9848f4a2be..bd50f16edeede 100644 --- a/libc/src/math/powf16.h +++ b/libc/src/math/powf16.h @@ -1,4 +1,4 @@ -//===-- Implementation header for powf16 --------------------------*- C++ -*-===// +//===-- Implementation header for powf16 ------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libc/test/src/math/powf16_test.cpp b/libc/test/src/math/powf16_test.cpp index 171cb098114d4..030f0e9cf8be3 100644 --- a/libc/test/src/math/powf16_test.cpp +++ b/libc/test/src/math/powf16_test.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// - #include "src/math/powf16.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" @@ -97,8 +96,7 @@ TEST_F(LlvmLibcPowF16Test, InFloat16Range) { if (fails || (count < cc)) { tlog << " powf16 failed: " << fails << "/" << count << "/" << cc << " tests.\n" - << " Max ULPs is at most: " << static_cast(tol) - << ".\n"; + << " Max ULPs is at most: " << static_cast(tol) << ".\n"; } if (fails) { mpfr::BinaryInput inputs{mx, my}; @@ -118,5 +116,3 @@ TEST_F(LlvmLibcPowF16Test, InFloat16Range) { tlog << " Test Rounding Toward Zero...\n"; test(mpfr::RoundingMode::TowardZero); } - - From 7db870385ea3d2aec374b96749504eb8b01c73fb Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Tue, 23 Sep 2025 09:31:48 +0300 Subject: [PATCH 03/12] add missing argument in math.yml --- libc/include/math.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/include/math.yaml b/libc/include/math.yaml index ff1608e3ec3dd..09e9bfed6dad4 100644 --- a/libc/include/math.yaml +++ b/libc/include/math.yaml @@ -2089,6 +2089,7 @@ functions: return_type: _Float16 arguments: - type: _Float16 + - type: _Float16 - name: powi standards: llvm_libc_ext return_type: double From 7a8db3f659e67bed70a8e301079d6f888b0d14c2 Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Tue, 23 Sep 2025 09:32:00 +0300 Subject: [PATCH 04/12] remove optimzation option --- libc/src/math/generic/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index fa5e0ed5b8451..b1bc47d64fa82 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1601,8 +1601,6 @@ add_entrypoint_object( libc.src.__support.FPUtil.sqrt libc.src.__support.macros.optimization libc.src.__support.math.expxf16_utils - COMPILE_OPTIONS - -O3 ) add_entrypoint_object( From 7bb9c40553f60f5f24b5dbc65289fc8ae480c74b Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Wed, 24 Sep 2025 22:58:01 +0300 Subject: [PATCH 05/12] make tests exaustive --- libc/test/src/math/powf16_test.cpp | 145 ++++++++++------------------- 1 file changed, 51 insertions(+), 94 deletions(-) diff --git a/libc/test/src/math/powf16_test.cpp b/libc/test/src/math/powf16_test.cpp index 030f0e9cf8be3..4f70291041bfc 100644 --- a/libc/test/src/math/powf16_test.cpp +++ b/libc/test/src/math/powf16_test.cpp @@ -12,107 +12,64 @@ #include "utils/MPFRWrapper/MPFRUtils.h" using LlvmLibcPowF16Test = LIBC_NAMESPACE::testing::FPTest; -using LIBC_NAMESPACE::testing::tlog; namespace mpfr = LIBC_NAMESPACE::testing::mpfr; -TEST_F(LlvmLibcPowF16Test, TrickyInputs) { - // These values are in half precision. - constexpr mpfr::BinaryInput INPUTS[] = { - {static_cast(0x1.08p-2f), static_cast(0x1.0cp-1f)}, - {static_cast(0x1.66p-1f), static_cast(0x1.f1p+1f)}, - {static_cast(0x1.c04p-1f), static_cast(0x1.2p+12f)}, - {static_cast(0x1.aep-1f), static_cast(0x1.f9p-1f)}, - {static_cast(0x1.ffcp-1f), static_cast(0x1.fffp-2f)}, - {static_cast(0x1.f55p-1f), static_cast(0x1.88p+12f)}, - {static_cast(0x1.e84p-1f), static_cast(0x1.2cp+13f)}, - }; - - for (auto input : INPUTS) { - float16 x = input.x; - float16 y = input.y; - EXPECT_MPFR_MATCH(mpfr::Operation::Pow, input, LIBC_NAMESPACE::powf16(x, y), - 1.0); // 1 ULP tolerance is enough for f16 +static constexpr float16 SELECTED_VALS[] = { + 0.5f16, 0.83984375f16, 1.0f16, 2.0f16, 3.0f16, 3.140625f16, 15.5f16, +}; + +// Test selected x values against all possible y values. +TEST_F(LlvmLibcPowF16Test, SelectedX_AllY) { + for (size_t i = 0; i < sizeof(SELECTED_VALS) / sizeof(SELECTED_VALS[0]); + ++i) { + float16 x = SELECTED_VALS[i]; + for (uint16_t y_u = 0; y_u <= 0x7c00U; ++y_u) { + float16 y = FPBits(y_u).get_val(); + + mpfr::BinaryInput input{x, y}; + float16 result = LIBC_NAMESPACE::powf16(x, y); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input, result, 1.0); + + // If the result is infinity and we expect it to continue growing, we can + // terminate the loop early. + if (FPBits(result).is_inf() && FPBits(result).is_pos()) { + // For x > 1, as y increases in the positive range, pow remains inf. + if (x > static_cast(1.0f) && y > static_cast(0.0f)) { + // The y_u loop covers the positive range up to 0x7BFF. + break; + } + // For 0 < x < 1, as y becomes more negative, pow becomes inf. + if (x > static_cast(0.0f) && x < static_cast(1.0f) && + y < static_cast(0.0f)) { + // The y_u loop covers the negative range from 0x8000. + break; + } + } + } } } -TEST_F(LlvmLibcPowF16Test, InFloat16Range) { - constexpr uint16_t X_COUNT = 63; - constexpr uint16_t X_START = FPBits(static_cast(0.25)).uintval(); - constexpr uint16_t X_STOP = FPBits(static_cast(4.0)).uintval(); - constexpr uint16_t X_STEP = (X_STOP - X_START) / X_COUNT; - - constexpr uint16_t Y_COUNT = 59; - constexpr uint16_t Y_START = FPBits(static_cast(0.25)).uintval(); - constexpr uint16_t Y_STOP = FPBits(static_cast(4.0)).uintval(); - constexpr uint16_t Y_STEP = (Y_STOP - Y_START) / Y_COUNT; - - auto test = [&](mpfr::RoundingMode rounding_mode) { - mpfr::ForceRoundingMode __r(rounding_mode); - if (!__r.success) - return; - - uint64_t fails = 0; - uint64_t count = 0; - uint64_t cc = 0; - float16 mx = 0.0, my = 0.0, mr = 0.0; - double tol = 1.0; // start with 1 ULP for half precision - - for (uint16_t i = 0, v = X_START; i <= X_COUNT; ++i, v += X_STEP) { - float16 x = FPBits(v).get_val(); - if (FPBits(x).is_inf_or_nan() || x < static_cast(0.0)) - continue; - - for (uint16_t j = 0, w = Y_START; j <= Y_COUNT; ++j, w += Y_STEP) { - float16 y = FPBits(w).get_val(); - if (FPBits(y).is_inf_or_nan()) - continue; - - float16 result = LIBC_NAMESPACE::powf16(x, y); - ++cc; - if (FPBits(result).is_inf_or_nan()) - continue; - - ++count; - mpfr::BinaryInput inputs{x, y}; - - if (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Pow, inputs, - result, 1.0, rounding_mode)) { - ++fails; - while (!TEST_MPFR_MATCH_ROUNDING_SILENTLY( - mpfr::Operation::Pow, inputs, result, tol, rounding_mode)) { - mx = x; - my = y; - mr = result; - - if (tol > 128.0) // half precision is only ~11 bits - break; - - tol *= 2.0; - } +// Test selected y values against all possible x values. +TEST_F(LlvmLibcPowF16Test, SelectedY_AllX) { + for (size_t i = 0; i < sizeof(SELECTED_VALS) / sizeof(SELECTED_VALS[0]); + ++i) { + float16 y = SELECTED_VALS[i]; + for (uint16_t x_u = 0; x_u <= 0x7c00U; ++x_u) { + float16 x = FPBits(x_u).get_val(); + mpfr::BinaryInput input{x, y}; + float16 result = LIBC_NAMESPACE::powf16(x, y); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input, result, 1.0); + + // If the result is infinity and we expect it to continue growing, we can + // terminate the loop early. + if (FPBits(result).is_inf() && FPBits(result).is_pos()) { + // For y > 0, as x increases in the positive range, pow remains inf. + if (y > 0.0f16 && x > 0.0f16) { + // The x_u loop covers the positive range up to 0x7BFF. + break; } } } - if (fails || (count < cc)) { - tlog << " powf16 failed: " << fails << "/" << count << "/" << cc - << " tests.\n" - << " Max ULPs is at most: " << static_cast(tol) << ".\n"; - } - if (fails) { - mpfr::BinaryInput inputs{mx, my}; - EXPECT_MPFR_MATCH(mpfr::Operation::Pow, inputs, mr, 1.0, rounding_mode); - } - }; - - tlog << " Test Rounding To Nearest...\n"; - test(mpfr::RoundingMode::Nearest); - - tlog << " Test Rounding Downward...\n"; - test(mpfr::RoundingMode::Downward); - - tlog << " Test Rounding Upward...\n"; - test(mpfr::RoundingMode::Upward); - - tlog << " Test Rounding Toward Zero...\n"; - test(mpfr::RoundingMode::TowardZero); + } } From 28d52f6c1ca8b4094f3c7c5f12837a7e623f09ff Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Wed, 24 Sep 2025 22:58:15 +0300 Subject: [PATCH 06/12] update log poly to double --- libc/src/math/generic/powf16.cpp | 35 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/libc/src/math/generic/powf16.cpp b/libc/src/math/generic/powf16.cpp index 8e2e6b552632f..1c998ca02e7e8 100644 --- a/libc/src/math/generic/powf16.cpp +++ b/libc/src/math/generic/powf16.cpp @@ -199,14 +199,13 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { int m = -FPBits::EXP_BIAS; // When x is subnormal, normalize it by multiplying by 2^FRACTION_LEN. - if ((x_u_log & FPBits::EXP_MASK) == 0U) { - constexpr float NORMALIZE_EXP = - static_cast(1U << FPBits::FRACTION_LEN); - x_bits = FPBits(x_bits.get_val() * fputil::cast(NORMALIZE_EXP)); + if ((x_u_log & FPBits::EXP_MASK) == 0U) { // Subnormal x + constexpr double NORMALIZE_EXP = 1.0 * (1U << FPBits::FRACTION_LEN); + x_bits = FPBits(fputil::cast( + fputil::cast(x_bits.get_val()) * NORMALIZE_EXP)); x_u_log = x_bits.uintval(); m -= FPBits::FRACTION_LEN; } - // Extract the mantissa and index into small lookup tables. uint16_t mant = x_bits.get_mantissa(); // Use the highest 5 fractional bits of the mantissa as the index f. @@ -217,28 +216,29 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { // Add the hidden bit to the mantissa. // 1 <= m_x < 2 x_bits.set_biased_exponent(FPBits::EXP_BIAS); - float mant_f = x_bits.get_val(); + double mant_d = x_bits.get_val(); // Range reduction for log2(m_x): // v = r * m_x - 1, where r is a power of 2 from a lookup table. // The computation is exact for half-precision, and -2^-5 <= v < 2^-4. // Then m_x = (1 + v) / r, and log2(m_x) = log2(1 + v) - log2(r). - float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f); - + double v = + fputil::multiply_add(mant_d, fputil::cast(ONE_OVER_F_F[f]), -1.0); // For half-precision accuracy, we use a degree-2 polynomial approximation: // P(v) ~ log2(1 + v) / v // Generated by Sollya with: // > P = fpminimax(log2(1+x)/x, 2, [|D...|], [-2^-5, 2^-4]); // The coefficients are rounded from the Sollya output. - float log2p1_d_over_f = - v * fputil::polyeval(v, 0x1.715476p+0f, -0x1.71771ap-1f, 0x1.ecb38ep-2f); + + double log2p1_d_over_f = + v * fputil::polyeval(v, 0x1.715476p+0, -0x1.71771ap-1, 0x1.ecb38ep-2); // log2(1.mant) = log2(f) + log2(1 + v) - float log2_1_mant = LOG2F_F[f] + log2p1_d_over_f; + double log2_1_mant = LOG2F_F[f] + log2p1_d_over_f; // Complete log2(x) = e_x + log2(m_x) - float log2_x = static_cast(m) + log2_1_mant; + double log2_x = static_cast(m) + log2_1_mant; // z = y * log2(x) // Now compute 2^z = 2^(n + r), with n integer and r in [-0.5, 0.5]. @@ -246,10 +246,7 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { // Check for overflow/underflow for half-precision. // Half-precision range is approximately 2^-24 to 2^15. - if (z > 15.0) { - fputil::raise_except_if_required(FE_OVERFLOW); - return FPBits::inf().get_val(); - } + // if (z < -24.0) { fputil::raise_except_if_required(FE_UNDERFLOW); return fputil::cast(0.0f); @@ -282,7 +279,11 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { uint64_t exp_bits = static_cast(n_int + 1023) << 52; double pow2_n = cpp::bit_cast(exp_bits); - float16 result = fputil::cast(pow2_n * exp2_r); + + double result_d = (pow2_n * exp2_r); + float16 result = fputil::cast(result_d); + if(result_d==65504.0) + return (65504.f16); if (result_sign) { FPBits result_bits(result); From 13dd3fff36f1bedb1764278655c0ea1bbaa89877 Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Sat, 11 Oct 2025 13:05:30 +0300 Subject: [PATCH 07/12] fix powf16 tests --- libc/test/src/math/powf16_test.cpp | 34 ++++-------------------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/libc/test/src/math/powf16_test.cpp b/libc/test/src/math/powf16_test.cpp index 4f70291041bfc..090289ba1983d 100644 --- a/libc/test/src/math/powf16_test.cpp +++ b/libc/test/src/math/powf16_test.cpp @@ -28,24 +28,8 @@ TEST_F(LlvmLibcPowF16Test, SelectedX_AllY) { float16 y = FPBits(y_u).get_val(); mpfr::BinaryInput input{x, y}; - float16 result = LIBC_NAMESPACE::powf16(x, y); - EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input, result, 1.0); - - // If the result is infinity and we expect it to continue growing, we can - // terminate the loop early. - if (FPBits(result).is_inf() && FPBits(result).is_pos()) { - // For x > 1, as y increases in the positive range, pow remains inf. - if (x > static_cast(1.0f) && y > static_cast(0.0f)) { - // The y_u loop covers the positive range up to 0x7BFF. - break; - } - // For 0 < x < 1, as y becomes more negative, pow becomes inf. - if (x > static_cast(0.0f) && x < static_cast(1.0f) && - y < static_cast(0.0f)) { - // The y_u loop covers the negative range from 0x8000. - break; - } - } + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input, + LIBC_NAMESPACE::powf16(x, y), 0.5); } } } @@ -58,18 +42,8 @@ TEST_F(LlvmLibcPowF16Test, SelectedY_AllX) { for (uint16_t x_u = 0; x_u <= 0x7c00U; ++x_u) { float16 x = FPBits(x_u).get_val(); mpfr::BinaryInput input{x, y}; - float16 result = LIBC_NAMESPACE::powf16(x, y); - EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input, result, 1.0); - - // If the result is infinity and we expect it to continue growing, we can - // terminate the loop early. - if (FPBits(result).is_inf() && FPBits(result).is_pos()) { - // For y > 0, as x increases in the positive range, pow remains inf. - if (y > 0.0f16 && x > 0.0f16) { - // The x_u loop covers the positive range up to 0x7BFF. - break; - } - } + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input, + LIBC_NAMESPACE::powf16(x, y), 0.5); } } } From ba92b59f302d4d8635eba01a954cf10400ba74d4 Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Sun, 2 Nov 2025 03:18:26 +0200 Subject: [PATCH 08/12] update powf16 approach --- libc/src/math/generic/CMakeLists.txt | 2 +- libc/src/math/generic/powf16.cpp | 333 +++++++++++++++++---------- 2 files changed, 214 insertions(+), 121 deletions(-) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index b1bc47d64fa82..2c47626bed4a5 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1600,7 +1600,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.nearest_integer libc.src.__support.FPUtil.sqrt libc.src.__support.macros.optimization - libc.src.__support.math.expxf16_utils + libc.src.__support.math.exp10f_utils ) add_entrypoint_object( diff --git a/libc/src/math/generic/powf16.cpp b/libc/src/math/generic/powf16.cpp index 1c998ca02e7e8..22010e954183c 100644 --- a/libc/src/math/generic/powf16.cpp +++ b/libc/src/math/generic/powf16.cpp @@ -21,12 +21,53 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/types.h" -#include "src/__support/math/expxf16_utils.h" +#include "src/__support/math/exp10f_utils.h" +#include "src/math/generic/common_constants.h" namespace LIBC_NAMESPACE_DECL { namespace { +static inline double exp2_range_reduced(double x) { + // k = round(x * 32) => (hi + mid) * 2^5 + double kf = fputil::nearest_integer(x * 32.0); + int k = static_cast(kf); + // dx = lo = x - (hi + mid) = x - k * 2^(-5) + double dx = fputil::multiply_add(-0x1.0p-5, kf, x); // -2^-5 * k + x + + // hi = k >> MID_BITS + // exp_hi = hi shifted into double exponent field + int64_t hi = static_cast(k >> ExpBase::MID_BITS); + int64_t exp_hi = static_cast( + static_cast(hi) << fputil::FPBits::FRACTION_LEN); + + // mh_bits = bits for 2^hi * 2^mid (lookup contains base bits for 2^mid) + int tab_index = k & ExpBase::MID_MASK; // mid index in [0, 31] + int64_t mh_bits = ExpBase::EXP_2_MID[tab_index] + exp_hi; + + // mh = 2^(hi + mid) + double mh = fputil::FPBits(static_cast(mh_bits)).get_val(); + + // Degree-5 polynomial approximating (2^x - 1)/x generating by Sollya with: + // > P = fpminimax((2^x - 1)/x, 5, [|D...|], [-1/32. 1/32]); + constexpr double COEFFS[5] = {0x1.62e42fefa39efp-1, 0x1.ebfbdff8131c4p-3, + 0x1.c6b08d7061695p-5, 0x1.3b2b1bee74b2ap-7, + 0x1.5d88091198529p-10}; + + double dx_sq = dx * dx; + double c1 = fputil::multiply_add(dx, COEFFS[0], 1.0); // 1 + ln2*dx + double c2 = + fputil::multiply_add(dx, COEFFS[2], COEFFS[1]); // COEFF1 + COEFF2*dx + double c3 = + fputil::multiply_add(dx, COEFFS[4], COEFFS[3]); // COEFF3 + COEFF4*dx + double p = fputil::multiply_add(dx_sq, c3, c2); // c2 + c3*dx^2 + + // 2^x = 2^(hi+mid) * 2^dx + // ≈ mh * (1 + dx * P(dx)) + // = mh + (mh * dx) * P(dx) + double result = fputil::multiply_add(p, dx_sq * mh, c1 * mh); + return result; +} bool is_odd_integer(float16 x) { using FPBits = fputil::FPBits; FPBits xbits(x); @@ -66,6 +107,7 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { uint16_t x_u = xbits.uintval(); uint16_t x_a = x_abs.uintval(); uint16_t y_a = y_abs.uintval(); + uint16_t y_u = ybits.uintval(); bool result_sign = false; ///////// BEGIN - Check exceptional cases //////////////////////////////////// @@ -74,57 +116,106 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { fputil::raise_except_if_required(FE_INVALID); return FPBits::quiet_nan().get_val(); } - - // - if (LIBC_UNLIKELY(ybits.is_zero() || x_u == FPBits::one().uintval() || - x_u >= FPBits::inf().uintval() || - x_u < FPBits::min_normal().uintval())) { + if (LIBC_UNLIKELY( + ybits.is_zero() || x_u == FPBits::one().uintval() || xbits.is_nan() || + ybits.is_nan() || x_u == FPBits::one().uintval() || + x_u == FPBits::zero().uintval() || x_u >= FPBits::inf().uintval() || + y_u >= FPBits::inf().uintval() || + x_u < FPBits::min_normal().uintval() || y_a == 0x3400U || + y_a == 0x3800U || y_a == 0x3A00U || y_a == 0x3800U || + y_a == 0x3800U || y_a == 0x3D00U || y_a == 0x3E00U || + y_a == 0x4100U || y_a == 0x4300U || y_a == 0x3c00U || + y_a == 0x4000U || is_integer(y))) { // pow(x, 0) = 1 if (ybits.is_zero()) { - return fputil::cast(1.0f); + return 1.0f16; } // pow(1, Y) = 1 if (x_u == FPBits::one().uintval()) { - return fputil::cast(1.0f); + return 1.0f16; + } + // 4. Handle remaining NaNs + // pow(NaN, y) = NaN (for y != 0) + if (xbits.is_nan()) { + return x; + } + // pow(x, NaN) = NaN (for x != 1) + if (ybits.is_nan()) { + return y; } - switch (y_a) { + case 0x3400U: // y = ±0.25 (1/4) + case 0x3800U: // y = ±0.5 (1/2) + case 0x3A00U: // y = ±0.75 (3/4) + case 0x3D00U: // y = ±1.25 (5/4) + case 0x3E00U: // y = ±1.5 (3/2) + case 0x4100U: // y = ±2.5 (5/2) + case 0x4300U: // y = ±3.5 (7/2) + { + if (xbits.is_zero()) { + if (y_sign) { + // pow(±0, negative) handled below + break; + } else { + // pow(±0, positive_fractional) = +0 + return FPBits::zero(Sign::POS).get_val(); + } + } - case 0x3800U: { // y = +-0.5 - if (LIBC_UNLIKELY( - (x == 0.0 || x_u == FPBits::inf(Sign::NEG).uintval()))) { - // pow(-0, 1/2) = +0 - // pow(-inf, 1/2) = +inf - // Make sure it works correctly for FTZ/DAZ. - return fputil::cast(y_sign ? (1.0 / (x * x)) : (x * x)); + if (x_sign && !xbits.is_zero()) { + break; // pow(negative, non-integer) = NaN } - return fputil::cast(y_sign ? (1.0 / fputil::sqrt(x)) - : fputil::sqrt(x)); + + double x_d = static_cast(x); + double sqrt_x = fputil::sqrt(x_d); + double fourth_root = fputil::sqrt(sqrt_x); + double result_d; + + // Compute based on exponent value + switch (y_a) { + case 0x3400U: // 0.25 = x^(1/4) + result_d = fourth_root; + break; + case 0x3800U: // 0.5 = x^(1/2) + result_d = sqrt_x; + break; + case 0x3A00U: // 0.75 = x^(1/2) * x^(1/4) + result_d = sqrt_x * fourth_root; + break; + case 0x3D00U: // 1.25 = x * x^(1/4) + result_d = x_d * fourth_root; + break; + case 0x3E00U: // 1.5 = x * x^(1/2) + result_d = x_d * sqrt_x; + break; + case 0x4100U: // 2.5 = x^2 * x^(1/2) + result_d = x_d * x_d * sqrt_x; + break; + case 0x4300U: // 3.5 = x^3 * x^(1/2) + result_d = x_d * x_d * x_d * sqrt_x; + break; + } + + result_d = y_sign ? (1.0 / result_d) : result_d; + return fputil::cast(result_d); } case 0x3c00U: // y = +-1.0 return fputil::cast(y_sign ? (1.0 / x) : x); case 0x4000U: // y = +-2.0 - return fputil::cast(y_sign ? (1.0 / (x * x)) : (x * x)); + double result_d = static_cast(x) * static_cast(x); + return fputil::cast(y_sign ? (1.0 / (result_d)) : (result_d)); } // TODO: Speed things up with pow(2, y) = exp2(y) and pow(10, y) = exp10(y). - - // Propagate remaining quiet NaNs. - if (xbits.is_quiet_nan()) { - return x; - } - if (ybits.is_quiet_nan()) { - return y; - } - - // x = -1: special case for integer exponents + // + // pow(-1, y) for integer y if (x_u == FPBits::one(Sign::NEG).uintval()) { if (is_integer(y)) { if (is_odd_integer(y)) { - return fputil::cast(-1.0f); + return -1.0f16; } else { - return fputil::cast(1.0f); + return 1.0f16; } } // pow(-1, non-integer) = NaN @@ -133,7 +224,7 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { return FPBits::quiet_nan().get_val(); } - // x = 0 cases + // pow(±0, y) cases if (xbits.is_zero()) { if (y_sign) { // pow(+-0, negative) = +-inf and raise FE_DIVBYZERO @@ -163,9 +254,13 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { bool x_abs_less_than_one = x_a < FPBits::one().uintval(); if ((x_abs_less_than_one && !y_sign) || (!x_abs_less_than_one && y_sign)) { - return fputil::cast(0.0f); + // |x| < 1 and y = +inf => 0.0 + // |x| > 1 and y = -inf => 0.0 + return 0.0f; } else { - return FPBits::inf().get_val(); + // |x| > 1 and y = +inf => +inf + // |x| < 1 and y = -inf => +inf + return FPBits::inf(Sign::POS).get_val(); } } @@ -176,121 +271,119 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { return FPBits::quiet_nan().get_val(); } - // For negative x with integer y, compute pow(|x|, y) and adjust sign - if (x_sign) { - x = -x; - if (is_odd_integer(y)) { - result_sign = true; + bool result_sign = false; + if (x_sign && is_integer(y)) { + result_sign = is_odd_integer(y); + } + + if (is_integer(y)) { + double base = x_abs.get_val(); + double res = 1.0; + int yi = static_cast(y_abs.get_val()); + + // Fast exponentiation by squaring + while (yi > 0) { + if (yi & 1) + res *= base; + base *= base; + yi = yi >> 1; + } + + if (y_sign) { + res = 1.0 / res; + } + + if (result_sign) { + res = -res; + } + + if (FPBits(fputil::cast(res)).is_inf()) { + fputil::raise_except_if_required(FE_OVERFLOW); + res = result_sign ? -0x1.0p20 : 0x1.0p20; } + + float16 final_res = fputil::cast(res); + return final_res; } } + ///////// END - Check exceptional cases ////////////////////////////////////// - // x^y = 2^( y * log2(x) ) - // = 2^( y * ( e_x + log2(m_x) ) ) - // First we compute log2(x) = e_x + log2(m_x) + // Core computation: x^y = 2^( y * log2(x) ) + // We compute log2(x) = log(x) / log(2) using a polynomial approximation. - using namespace math::expxf16_internal; + // The exponent part (m) is added later to get the final log(x). FPBits x_bits(x); - uint16_t x_u_log = x_bits.uintval(); // Extract exponent field of x. - int m = -FPBits::EXP_BIAS; - - // When x is subnormal, normalize it by multiplying by 2^FRACTION_LEN. - if ((x_u_log & FPBits::EXP_MASK) == 0U) { // Subnormal x - constexpr double NORMALIZE_EXP = 1.0 * (1U << FPBits::FRACTION_LEN); - x_bits = FPBits(fputil::cast( - fputil::cast(x_bits.get_val()) * NORMALIZE_EXP)); - x_u_log = x_bits.uintval(); - m -= FPBits::FRACTION_LEN; + int m = x_bits.get_exponent(); + + // When x is subnormal, normalize it by adjusting m. + if ((x_u_log & FPBits::EXP_MASK) == 0U) { + unsigned leading_zeros = + cpp::countl_zero(static_cast(x_u_log)) - (32 - 16); + + constexpr unsigned SUBNORMAL_SHIFT_CORRECTION = 5; + unsigned shift = leading_zeros - SUBNORMAL_SHIFT_CORRECTION; + + x_bits.set_mantissa(static_cast(x_u_log << shift)); + + m = 1 - FPBits::EXP_BIAS - static_cast(shift); } + // Extract the mantissa and index into small lookup tables. uint16_t mant = x_bits.get_mantissa(); - // Use the highest 5 fractional bits of the mantissa as the index f. - int f = mant >> 5; - - m += (x_u_log >> FPBits::FRACTION_LEN); + // Use the highest 7 fractional bits of the mantissa as the index f. + int f = mant >> (FPBits::FRACTION_LEN - 7); - // Add the hidden bit to the mantissa. - // 1 <= m_x < 2 + // Reconstruct the mantissa value m_x so it's in the range [1.0, 2.0). x_bits.set_biased_exponent(FPBits::EXP_BIAS); double mant_d = x_bits.get_val(); - // Range reduction for log2(m_x): - // v = r * m_x - 1, where r is a power of 2 from a lookup table. - // The computation is exact for half-precision, and -2^-5 <= v < 2^-4. - // Then m_x = (1 + v) / r, and log2(m_x) = log2(1 + v) - log2(r). - - double v = - fputil::multiply_add(mant_d, fputil::cast(ONE_OVER_F_F[f]), -1.0); - // For half-precision accuracy, we use a degree-2 polynomial approximation: - // P(v) ~ log2(1 + v) / v - // Generated by Sollya with: - // > P = fpminimax(log2(1+x)/x, 2, [|D...|], [-2^-5, 2^-4]); - // The coefficients are rounded from the Sollya output. + double v = fputil::multiply_add(mant_d, RD[f], -1.0); + double extra_factor = static_cast(m) + LOG2_R[f]; - double log2p1_d_over_f = - v * fputil::polyeval(v, 0x1.715476p+0, -0x1.71771ap-1, 0x1.ecb38ep-2); + // Degree-5 polynomial approximation of log2 generated by Sollya with: + // > P = fpminimax(log2(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]); + constexpr double COEFFS[5] = {0x1.71547652b8133p0, -0x1.71547652d1e33p-1, + 0x1.ec70a098473dep-2, -0x1.7154c5ccdf121p-2, + 0x1.2514fd90a130ap-2}; - // log2(1.mant) = log2(f) + log2(1 + v) - double log2_1_mant = LOG2F_F[f] + log2p1_d_over_f; + double vsq = v * v; + double c0 = fputil::multiply_add(v, COEFFS[0], 0); + double c1 = fputil::multiply_add(v, COEFFS[2], COEFFS[1]); + double c2 = fputil::multiply_add(v, COEFFS[4], COEFFS[3]); - // Complete log2(x) = e_x + log2(m_x) - double log2_x = static_cast(m) + log2_1_mant; + double log2_x = fputil::polyeval(vsq, c0, c1, c2); - // z = y * log2(x) - // Now compute 2^z = 2^(n + r), with n integer and r in [-0.5, 0.5]. - double z = fputil::cast(y) * log2_x; + double y_d = fputil::cast(y); + double z = fputil::multiply_add(y_d, log2_x, y_d * extra_factor); - // Check for overflow/underflow for half-precision. - // Half-precision range is approximately 2^-24 to 2^15. - // - if (z < -24.0) { + // Check for underflow + // Float16 min normal is 2^-14, smallest subnormal is 2^-24 + if (LIBC_UNLIKELY(z < -25.0)) { fputil::raise_except_if_required(FE_UNDERFLOW); - return fputil::cast(0.0f); + return result_sign ? FPBits::zero(Sign::NEG).get_val() + : FPBits::zero(Sign::POS).get_val(); } - double n = fputil::nearest_integer(z); - double r = z - n; - - // Compute 2^r using a degree-7 polynomial for r in [-0.5, 0.5]. - // Generated by Sollya with: - // > P = fpminimax(2^x, 7, [|D...|], [-0.5, 0.5]); - // The polynomial coefficients are rounded from the Sollya output. - constexpr double EXP2_COEFFS[] = { - 0x1p+0, // 1.0 - 0x1.62e42fefa39efp-1, // ln(2) - 0x1.ebfbdff82c58fp-3, // ln(2)^2 / 2 - 0x1.c6b08d704a0c0p-5, // ln(2)^3 / 6 - 0x1.3b2ab6fba4e77p-7, // ln(2)^4 / 24 - 0x1.5d87fe78a6737p-10, // ln(2)^5 / 120 - 0x1.430912f86a805p-13, // ln(2)^6 / 720 - 0x1.10e4104ac8015p-17 // ln(2)^7 / 5040 - }; - - double exp2_r = fputil::polyeval( - r, EXP2_COEFFS[0], EXP2_COEFFS[1], EXP2_COEFFS[2], EXP2_COEFFS[3], - EXP2_COEFFS[4], EXP2_COEFFS[5], EXP2_COEFFS[6], EXP2_COEFFS[7]); - - // Compute 2^n by direct bit manipulation. - int n_int = static_cast(n); - uint64_t exp_bits = static_cast(n_int + 1023) << 52; - double pow2_n = cpp::bit_cast(exp_bits); - - - double result_d = (pow2_n * exp2_r); - float16 result = fputil::cast(result_d); - if(result_d==65504.0) - return (65504.f16); + // Check for overflow + // Float16 max is ~2^16 + double result_d; + if (LIBC_UNLIKELY(z > 16.0)) { + fputil::raise_except_if_required(FE_OVERFLOW); + result_d = result_sign ? -0x1.0p20 : 0x1.0p20; + } else { + result_d = exp2_range_reduced(z); + } if (result_sign) { - FPBits result_bits(result); - result_bits.set_sign(Sign::NEG); - result = result_bits.get_val(); + + result_d = -result_d; } + float16 result = fputil::cast((result_d)); return result; } From 561dd53368a24e5105cedb4ef4916ae4dfa85109 Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Sun, 2 Nov 2025 03:18:55 +0200 Subject: [PATCH 09/12] update tests --- libc/test/src/math/exhaustive/CMakeLists.txt | 15 +++ libc/test/src/math/exhaustive/powf16_test.cpp | 72 ++++++++++++ libc/test/src/math/powf16_test.cpp | 110 ++++++++++++++---- 3 files changed, 174 insertions(+), 23 deletions(-) create mode 100644 libc/test/src/math/exhaustive/powf16_test.cpp diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt index e40972ea9a879..331a4a556ed52 100644 --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -312,6 +312,21 @@ add_fp_unittest( -lpthread ) +add_fp_unittest( + powf16_test + NO_RUN_POSTBUILD + NEED_MPFR + SUITE + libc_math_exhaustive_tests + SRCS + powf16_test.cpp + DEPENDS + .exhaustive_test + libc.src.math.powf16 + libc.src.__support.FPUtil.fp_bits + LINK_LIBRARIES + -lpthread +) add_fp_unittest( hypotf_test NO_RUN_POSTBUILD diff --git a/libc/test/src/math/exhaustive/powf16_test.cpp b/libc/test/src/math/exhaustive/powf16_test.cpp new file mode 100644 index 0000000000000..61c99baa77508 --- /dev/null +++ b/libc/test/src/math/exhaustive/powf16_test.cpp @@ -0,0 +1,72 @@ +//===-- Exhaustive test for powf16 ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exhaustive_test.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/math/powf16.h" +#include "test/UnitTest/FPMatcher.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +struct Powf16Checker : public virtual LIBC_NAMESPACE::testing::Test { + using FloatType = float16; + using FPBits = LIBC_NAMESPACE::fputil::FPBits; + using StorageType = typename FPBits::StorageType; + + uint64_t check(uint16_t x_start, uint16_t x_stop, uint16_t y_start, + uint16_t y_stop, mpfr::RoundingMode rounding) { + mpfr::ForceRoundingMode r(rounding); + if (!r.success) + return true; + uint16_t xbits = x_start; + uint64_t failed = 0; + do { + float16 x = FPBits(xbits).get_val(); + uint16_t ybits = y_start; + do { + float16 y = FPBits(ybits).get_val(); + mpfr::BinaryInput input{x, y}; + bool correct = TEST_MPFR_MATCH_ROUNDING_SILENTLY( + mpfr::Operation::Pow, input, LIBC_NAMESPACE::powf16(x, y), 0.5, + rounding); + failed += (!correct); + } while (ybits++ < y_stop); + } while (xbits++ < x_stop); + return failed; + } +}; + +using LlvmLibcPowf16ExhaustiveTest = + LlvmLibcExhaustiveMathTest; + +// Range: x in [0, inf], y in [0, inf] +static constexpr uint16_t POS_START = 0x0000U; +static constexpr uint16_t POS_STOP = 0x7C00U; + +TEST_F(LlvmLibcPowf16ExhaustiveTest, PositiveRange) { + test_full_range_all_roundings(POS_START, POS_STOP, POS_START, POS_STOP); +} + +// Range: x in [-0, -inf], y in [0, inf] +static constexpr uint16_t NEG_START = 0x8000U; +static constexpr uint16_t NEG_STOP = 0xFC00U; + +TEST_F(LlvmLibcPowf16ExhaustiveTest, NegativeBasePositiveExponent) { + test_full_range_all_roundings(NEG_START, NEG_STOP, POS_START, POS_STOP); +} + +// Range: x in [0, inf], y in [-0, -inf] +TEST_F(LlvmLibcPowf16ExhaustiveTest, PositiveBaseNegativeExponent) { + test_full_range_all_roundings(POS_START, POS_STOP, NEG_START, NEG_STOP); +} + +// Range: x in [-0, -inf], y in [-0, -inf] +TEST_F(LlvmLibcPowf16ExhaustiveTest, NegativeRange) { + test_full_range_all_roundings(NEG_START, NEG_STOP, NEG_START, NEG_STOP); +} diff --git a/libc/test/src/math/powf16_test.cpp b/libc/test/src/math/powf16_test.cpp index 090289ba1983d..1610e66184026 100644 --- a/libc/test/src/math/powf16_test.cpp +++ b/libc/test/src/math/powf16_test.cpp @@ -5,45 +5,109 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - +#include "src/__support/CPP/bit.h" #include "src/math/powf16.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" using LlvmLibcPowF16Test = LIBC_NAMESPACE::testing::FPTest; +using FPBits = LIBC_NAMESPACE::fputil::FPBits; namespace mpfr = LIBC_NAMESPACE::testing::mpfr; static constexpr float16 SELECTED_VALS[] = { - 0.5f16, 0.83984375f16, 1.0f16, 2.0f16, 3.0f16, 3.140625f16, 15.5f16, -}; - -// Test selected x values against all possible y values. -TEST_F(LlvmLibcPowF16Test, SelectedX_AllY) { - for (size_t i = 0; i < sizeof(SELECTED_VALS) / sizeof(SELECTED_VALS[0]); - ++i) { - float16 x = SELECTED_VALS[i]; - for (uint16_t y_u = 0; y_u <= 0x7c00U; ++y_u) { - float16 y = FPBits(y_u).get_val(); - - mpfr::BinaryInput input{x, y}; + 0.83984375f16, 1.414f16, 0.0625f16, 2.5f16, + 3.140625f16, 15.5f16, 2.f16, 3.25f16}; + +// Test tricky inputs for selected x values against all possible y values. +TEST_F(LlvmLibcPowF16Test, TrickyInput_SelectedX_AllY) { + for (float16 x_base : SELECTED_VALS) { + // Only test non-negative x_base + if (FPBits(x_base).is_neg()) + continue; + + // Loop through normal and subnormal values only (0x0001 to 0x7BFF) + for (uint16_t y_u = 1; y_u <= 0x7BFFU; ++y_u) { + float16 y_base = FPBits(y_u).get_val(); + + // Case 1: (+x, +y) - Standard positive case + mpfr::BinaryInput input1{x_base, y_base}; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input1, + LIBC_NAMESPACE::powf16(x_base, y_base), + 0.5); + + // Case 2: (+x, -y) - Always valid for positive x + float16 y_neg = -y_base; + mpfr::BinaryInput input2{x_base, y_neg}; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input2, + LIBC_NAMESPACE::powf16(x_base, y_neg), + 0.5); + } + + // Case 3: (-x, +y) - Only test with positive integer y values + for (int y_int = 1; y_int <= 2048; ++y_int) { + float16 y_val = static_cast(y_int); + float16 x_neg = -x_base; + mpfr::BinaryInput input{x_neg, y_val}; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input, + LIBC_NAMESPACE::powf16(x_neg, y_val), 0.5); + } + + // Case 4: (-x, -y) - Only test with negative integer y values + for (int y_int = -2048; y_int < 0; ++y_int) { + float16 y_val = static_cast(y_int); + float16 x_neg = -x_base; + mpfr::BinaryInput input{x_neg, y_val}; EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input, - LIBC_NAMESPACE::powf16(x, y), 0.5); + LIBC_NAMESPACE::powf16(x_neg, y_val), 0.5); } } } -// Test selected y values against all possible x values. -TEST_F(LlvmLibcPowF16Test, SelectedY_AllX) { - for (size_t i = 0; i < sizeof(SELECTED_VALS) / sizeof(SELECTED_VALS[0]); - ++i) { - float16 y = SELECTED_VALS[i]; - for (uint16_t x_u = 0; x_u <= 0x7c00U; ++x_u) { - float16 x = FPBits(x_u).get_val(); - mpfr::BinaryInput input{x, y}; +// Test tricky inputs for selected y values against all possible x values. +TEST_F(LlvmLibcPowF16Test, TrickyInput_SelectedY_AllX) { + for (float16 y_base : SELECTED_VALS) { + // Only test non-negative y_base + if (FPBits(y_base).is_neg()) + continue; + + // Loop through normal and subnormal values only (0x0001 to 0x7BFF) + for (uint16_t x_u = 1; x_u <= 0x7BFFU; ++x_u) { + float16 x_base = FPBits(x_u).get_val(); + + // Case 1: (+x, +y) - Standard positive case + mpfr::BinaryInput input1{x_base, y_base}; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input1, + LIBC_NAMESPACE::powf16(x_base, y_base), + 0.5); + + // Case 2: (+x, -y) - Always valid for positive x + float16 y_neg = -y_base; + mpfr::BinaryInput input2{x_base, y_neg}; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input2, + LIBC_NAMESPACE::powf16(x_base, y_neg), + 0.5); + } + + // Case 3: (-x, +y) - Only test with positive integer x values + for (int x_int = 1; x_int <= 2048; ++x_int) { + float16 x_val = static_cast(x_int); + float16 x_neg = -x_val; + mpfr::BinaryInput input{x_neg, y_base}; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input, + LIBC_NAMESPACE::powf16(x_neg, y_base), + 0.5); + } + + // Case 4: (-x, -y) - Only test with negative integer x values + for (int x_int = 1; x_int <= 2048; ++x_int) { + float16 x_val = static_cast(x_int); + float16 x_neg = -x_val; + float16 y_neg = -y_base; + mpfr::BinaryInput input{x_neg, y_neg}; EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input, - LIBC_NAMESPACE::powf16(x, y), 0.5); + LIBC_NAMESPACE::powf16(x_neg, y_neg), 0.5); } } } From 2a83767d7c23ad3eec96e01f982655b6820367e9 Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Mon, 3 Nov 2025 02:46:45 +0200 Subject: [PATCH 10/12] fix build failure --- libc/src/math/generic/CMakeLists.txt | 4 ++-- libc/src/math/generic/powf16.cpp | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 16a17429cb48d..eecf61ed878ac 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1631,9 +1631,8 @@ add_entrypoint_object( SRCS powf16.cpp HDRS - ../pow.h + ../powf16.h DEPENDS - .common_constants libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.CPP.bit @@ -1646,6 +1645,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.sqrt libc.src.__support.macros.optimization libc.src.__support.math.exp10f_utils + libc.src.__support.math.common_constants ) add_entrypoint_object( diff --git a/libc/src/math/generic/powf16.cpp b/libc/src/math/generic/powf16.cpp index 22010e954183c..89d5f39a0131f 100644 --- a/libc/src/math/generic/powf16.cpp +++ b/libc/src/math/generic/powf16.cpp @@ -21,11 +21,12 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/types.h" +#include "src/__support/math/common_constants.h" #include "src/__support/math/exp10f_utils.h" -#include "src/math/generic/common_constants.h" namespace LIBC_NAMESPACE_DECL { +using namespace common_constants_internal; namespace { static inline double exp2_range_reduced(double x) { // k = round(x * 32) => (hi + mid) * 2^5 @@ -256,7 +257,7 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { (!x_abs_less_than_one && y_sign)) { // |x| < 1 and y = +inf => 0.0 // |x| > 1 and y = -inf => 0.0 - return 0.0f; + return 0.0f16; } else { // |x| > 1 and y = +inf => +inf // |x| < 1 and y = -inf => +inf @@ -351,7 +352,7 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { 0x1.2514fd90a130ap-2}; double vsq = v * v; - double c0 = fputil::multiply_add(v, COEFFS[0], 0); + double c0 = fputil::multiply_add(v, COEFFS[0], 0.0); double c1 = fputil::multiply_add(v, COEFFS[2], COEFFS[1]); double c2 = fputil::multiply_add(v, COEFFS[4], COEFFS[3]); From 9652c1896a8617f47507643709e93133acf68394 Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Sun, 9 Nov 2025 11:22:29 +0200 Subject: [PATCH 11/12] handle LIBC_TARGET_CPU_HAS_FMA_DOUBLE --- libc/src/math/generic/powf16.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/libc/src/math/generic/powf16.cpp b/libc/src/math/generic/powf16.cpp index 89d5f39a0131f..d5aa9773c07cb 100644 --- a/libc/src/math/generic/powf16.cpp +++ b/libc/src/math/generic/powf16.cpp @@ -341,16 +341,22 @@ LLVM_LIBC_FUNCTION(float16, powf16, (float16 x, float16 y)) { // Reconstruct the mantissa value m_x so it's in the range [1.0, 2.0). x_bits.set_biased_exponent(FPBits::EXP_BIAS); double mant_d = x_bits.get_val(); - - double v = fputil::multiply_add(mant_d, RD[f], -1.0); - double extra_factor = static_cast(m) + LOG2_R[f]; - - // Degree-5 polynomial approximation of log2 generated by Sollya with: + // Degree-5 polynomial approximation + // of log2 generated by Sollya with: // > P = fpminimax(log2(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]); constexpr double COEFFS[5] = {0x1.71547652b8133p0, -0x1.71547652d1e33p-1, 0x1.ec70a098473dep-2, -0x1.7154c5ccdf121p-2, 0x1.2514fd90a130ap-2}; +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + double v = fputil::multiply_add(mant_d, RD[f], -1.0); +#else + double c = fputil::FPBits(fputil::FPBits(mant_d).uintval() & + 0x3fff'e000'0000'0000) + .get_val(); + double v = fputil::multiply_add(RD[f], mant_d - c, CD[f]); +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + double extra_factor = static_cast(m) + LOG2_R[f]; double vsq = v * v; double c0 = fputil::multiply_add(v, COEFFS[0], 0.0); double c1 = fputil::multiply_add(v, COEFFS[2], COEFFS[1]); From 54e3d6079a401522e58093624340258c1e5f66d5 Mon Sep 17 00:00:00 2001 From: anonmiraj Date: Sun, 9 Nov 2025 11:25:10 +0200 Subject: [PATCH 12/12] ensure f16 literals are used in smoke test --- libc/test/src/math/smoke/powf16_test.cpp | 130 ++++++++++------------- 1 file changed, 54 insertions(+), 76 deletions(-) diff --git a/libc/test/src/math/smoke/powf16_test.cpp b/libc/test/src/math/smoke/powf16_test.cpp index b3611e141dc01..f74bb6e57877e 100644 --- a/libc/test/src/math/smoke/powf16_test.cpp +++ b/libc/test/src/math/smoke/powf16_test.cpp @@ -54,7 +54,7 @@ TEST_F(LlvmLibcPowF16Test, SpecialNumbers) { EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(sNaN, aNaN), FE_INVALID); - // powf16( 0.0, exponent ) + // powf16( 0.0f16, exponent ) EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(zero, sNaN), FE_INVALID); EXPECT_FP_EQ_WITH_EXCEPTION( @@ -67,14 +67,14 @@ TEST_F(LlvmLibcPowF16Test, SpecialNumbers) { EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(zero, POS_EVEN_INTEGER)); EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(zero, POS_NON_INTEGER)); EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(zero, ONE_HALF)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(zero, zero)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(zero, neg_zero)); - EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::powf16(zero, inf)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(zero, zero)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(zero, neg_zero)); + EXPECT_FP_EQ(0.0f16, LIBC_NAMESPACE::powf16(zero, inf)); EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::powf16(zero, neg_inf), FE_DIVBYZERO); EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(zero, aNaN)); - // powf16( -0.0, exponent ) + // powf16( -0.0f16, exponent ) EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(neg_zero, sNaN), FE_INVALID); EXPECT_FP_EQ_WITH_EXCEPTION( @@ -88,56 +88,56 @@ TEST_F(LlvmLibcPowF16Test, SpecialNumbers) { EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_zero, POS_EVEN_INTEGER)); EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_zero, POS_NON_INTEGER)); EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_zero, ONE_HALF)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(neg_zero, zero)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(neg_zero, neg_zero)); - EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::powf16(neg_zero, inf)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(neg_zero, zero)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(neg_zero, neg_zero)); + EXPECT_FP_EQ(0.0f16, LIBC_NAMESPACE::powf16(neg_zero, inf)); EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::powf16(neg_zero, neg_inf), FE_DIVBYZERO); EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(neg_zero, aNaN)); - // powf16( 1.0, exponent ) - EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(1.0, sNaN), + // powf16( 1.0f16, exponent ) + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(1.0f16, sNaN), FE_INVALID); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, zero)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, neg_zero)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, 1.0)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, -1.0)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, NEG_ODD_INTEGER)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, NEG_EVEN_INTEGER)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, NEG_NON_INTEGER)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, POS_ODD_INTEGER)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, POS_EVEN_INTEGER)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, POS_NON_INTEGER)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, inf)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, neg_inf)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(1.0, aNaN)); - - // powf16( -1.0, exponent ) - EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(-1.0, sNaN), + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, zero)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, neg_zero)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, 1.0f16)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, -1.0f16)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, NEG_ODD_INTEGER)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, NEG_EVEN_INTEGER)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, NEG_NON_INTEGER)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, POS_ODD_INTEGER)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, POS_EVEN_INTEGER)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, POS_NON_INTEGER)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, inf)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, neg_inf)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(1.0f16, aNaN)); + + // powf16( -1.0f16, exponent ) + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(-1.0f16, sNaN), FE_INVALID); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, zero)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, neg_zero)); - EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::powf16(-1.0, 1.0)); - EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::powf16(-1.0, -1.0)); - EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::powf16(-1.0, NEG_ODD_INTEGER)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, NEG_EVEN_INTEGER)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(-1.0f16, zero)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(-1.0f16, neg_zero)); + EXPECT_FP_EQ(-1.0f16, LIBC_NAMESPACE::powf16(-1.0f16, 1.0f16)); + EXPECT_FP_EQ(-1.0f16, LIBC_NAMESPACE::powf16(-1.0f16, -1.0f16)); + EXPECT_FP_EQ(-1.0f16, LIBC_NAMESPACE::powf16(-1.0f16, NEG_ODD_INTEGER)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(-1.0f16, NEG_EVEN_INTEGER)); EXPECT_FP_IS_NAN_WITH_EXCEPTION( - LIBC_NAMESPACE::powf16(-1.0, NEG_NON_INTEGER), FE_INVALID); - EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::powf16(-1.0, POS_ODD_INTEGER)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, POS_EVEN_INTEGER)); + LIBC_NAMESPACE::powf16(-1.0f16, NEG_NON_INTEGER), FE_INVALID); + EXPECT_FP_EQ(-1.0f16, LIBC_NAMESPACE::powf16(-1.0f16, POS_ODD_INTEGER)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(-1.0f16, POS_EVEN_INTEGER)); EXPECT_FP_IS_NAN_WITH_EXCEPTION( - LIBC_NAMESPACE::powf16(-1.0, POS_NON_INTEGER), FE_INVALID); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, inf)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(-1.0, neg_inf)); - EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(-1.0, aNaN)); + LIBC_NAMESPACE::powf16(-1.0f16, POS_NON_INTEGER), FE_INVALID); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(-1.0f16, inf)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(-1.0f16, neg_inf)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(-1.0f16, aNaN)); // powf16( inf, exponent ) EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(inf, sNaN), FE_INVALID); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(inf, zero)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(inf, neg_zero)); - EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(inf, 1.0)); - EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(inf, -1.0)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(inf, zero)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(inf, neg_zero)); + EXPECT_FP_EQ(inf, LIBC_NAMESPACE::powf16(inf, 1.0f16)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(inf, -1.0f16)); EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(inf, NEG_ODD_INTEGER)); EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(inf, NEG_EVEN_INTEGER)); EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(inf, NEG_NON_INTEGER)); @@ -152,10 +152,10 @@ TEST_F(LlvmLibcPowF16Test, SpecialNumbers) { // powf16( -inf, exponent ) EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(neg_inf, sNaN), FE_INVALID); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(neg_inf, zero)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(neg_inf, neg_zero)); - EXPECT_FP_EQ(neg_inf, LIBC_NAMESPACE::powf16(neg_inf, 1.0)); - EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::powf16(neg_inf, -1.0)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(neg_inf, zero)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(neg_inf, neg_zero)); + EXPECT_FP_EQ(neg_inf, LIBC_NAMESPACE::powf16(neg_inf, 1.0f16)); + EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::powf16(neg_inf, -1.0f16)); EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::powf16(neg_inf, NEG_ODD_INTEGER)); EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_inf, NEG_EVEN_INTEGER)); EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(neg_inf, NEG_NON_INTEGER)); @@ -170,10 +170,10 @@ TEST_F(LlvmLibcPowF16Test, SpecialNumbers) { // powf16 ( aNaN, exponent ) EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::powf16(aNaN, sNaN), FE_INVALID); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(aNaN, zero)); - EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(aNaN, neg_zero)); - EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, 1.0)); - EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, -1.0)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(aNaN, zero)); + EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::powf16(aNaN, neg_zero)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, 1.0f16)); + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, -1.0f16)); EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, NEG_ODD_INTEGER)); EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, NEG_EVEN_INTEGER)); EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf16(aNaN, NEG_NON_INTEGER)); @@ -196,37 +196,15 @@ TEST_F(LlvmLibcPowF16Test, SpecialNumbers) { EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(1.1f16, neg_inf)); EXPECT_FP_EQ(zero, LIBC_NAMESPACE::powf16(-1.1f16, neg_inf)); - // Exact powers of 2: - // TODO: Enable these tests when we use exp2. - // EXPECT_FP_EQ(0x1.0p15, LIBC_NAMESPACE::powf16(2.0, 15.0)); - // EXPECT_FP_EQ(0x1.0p126, LIBC_NAMESPACE::powf16(2.0, 126.0)); - // EXPECT_FP_EQ(0x1.0p-45, LIBC_NAMESPACE::powf16(2.0, -45.0)); - // EXPECT_FP_EQ(0x1.0p-126, LIBC_NAMESPACE::powf16(2.0, -126.0)); - // EXPECT_FP_EQ(0x1.0p-149, LIBC_NAMESPACE::powf16(2.0, -149.0)); - - // Exact powers of 10: - // TODO: Enable these tests when we use exp10 - // EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::powf16(10.0, 0.0)); - // EXPECT_FP_EQ(10.0, LIBC_NAMESPACE::powf16(10.0, 1.0)); - // EXPECT_FP_EQ(100.0, LIBC_NAMESPACE::powf16(10.0, 2.0)); - // EXPECT_FP_EQ(1000.0, LIBC_NAMESPACE::powf16(10.0, 3.0)); - // EXPECT_FP_EQ(10000.0, LIBC_NAMESPACE::powf16(10.0, 4.0)); - // EXPECT_FP_EQ(100000.0, LIBC_NAMESPACE::powf16(10.0, 5.0)); - // EXPECT_FP_EQ(1000000.0, LIBC_NAMESPACE::powf16(10.0, 6.0)); - // EXPECT_FP_EQ(10000000.0, LIBC_NAMESPACE::powf16(10.0, 7.0)); - // EXPECT_FP_EQ(100000000.0, LIBC_NAMESPACE::powf16(10.0, 8.0)); - // EXPECT_FP_EQ(1000000000.0, LIBC_NAMESPACE::powf16(10.0, 9.0)); - // EXPECT_FP_EQ(10000000000.0, LIBC_NAMESPACE::powf16(10.0, 10.0)); - // Overflow / Underflow: if (ROUNDING_MODES[i] != RoundingMode::Downward && ROUNDING_MODES[i] != RoundingMode::TowardZero) { - EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::powf16(3.1f16, 21.0), + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::powf16(3.1f16, 21.0f16), FE_OVERFLOW); } if (ROUNDING_MODES[i] != RoundingMode::Upward) { - EXPECT_FP_EQ_WITH_EXCEPTION(0.0, LIBC_NAMESPACE::powf16(3.1f16, -21.0), - FE_UNDERFLOW); + EXPECT_FP_EQ_WITH_EXCEPTION( + 0.0f16, LIBC_NAMESPACE::powf16(3.1f16, -21.0f16), FE_UNDERFLOW); } } }