Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/shared/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
#include "math/exp2.h"
#include "math/exp2f.h"
#include "math/exp2f16.h"
#include "math/exp2m1f.h"
#include "math/expf.h"
#include "math/expf16.h"
#include "math/frexpf.h"
Expand Down
23 changes: 23 additions & 0 deletions libc/shared/math/exp2m1f.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===-- Shared exp2m1f function ---------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SHARED_MATH_EXP2M1F_H
#define LLVM_LIBC_SHARED_MATH_EXP2M1F_H

#include "shared/libc_common.h"
#include "src/__support/math/exp2m1f.h"

namespace LIBC_NAMESPACE_DECL {
namespace shared {

using math::exp2m1f;

} // namespace shared
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SHARED_MATH_EXP2M1F_H
18 changes: 18 additions & 0 deletions libc/src/__support/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,24 @@ add_header_library(
libc.src.__support.macros.optimization
)

add_header_library(
exp2m1f
HDRS
exp2m1f.h
DEPENDS
.exp10f_utils
libc.src.errno.errno
libc.src.__support.common
libc.src.__support.FPUtil.except_value_utils
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
libc.src.__support.macros.properties.cpu_features
)

add_header_library(
exp10
HDRS
Expand Down
195 changes: 195 additions & 0 deletions libc/src/__support/math/exp2m1f.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
//===-- Implementation header for exp2m1f ------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F_H
#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F_H

#include "exp10f_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h"
#include "src/__support/macros/properties/cpu_features.h"

namespace LIBC_NAMESPACE_DECL {

namespace math {

LIBC_INLINE static constexpr float exp2m1f(float x) {
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
constexpr size_t N_EXCEPTS_LO = 8;

constexpr fputil::ExceptValues<float, N_EXCEPTS_LO> EXP2M1F_EXCEPTS_LO = {{
// (input, RZ output, RU offset, RD offset, RN offset)
// x = 0x1.36dc8ep-36, exp2m1f(x) = 0x1.aef212p-37 (RZ)
{0x2d9b'6e47U, 0x2d57'7909U, 1U, 0U, 0U},
// x = 0x1.224936p-19, exp2m1f(x) = 0x1.926c0ep-20 (RZ)
{0x3611'249bU, 0x35c9'3607U, 1U, 0U, 1U},
// x = 0x1.d16d2p-20, exp2m1f(x) = 0x1.429becp-20 (RZ)
{0x35e8'b690U, 0x35a1'4df6U, 1U, 0U, 1U},
// x = 0x1.17949ep-14, exp2m1f(x) = 0x1.8397p-15 (RZ)
{0x388b'ca4fU, 0x3841'cb80U, 1U, 0U, 1U},
// x = -0x1.9c3e1ep-38, exp2m1f(x) = -0x1.1dbeacp-38 (RZ)
{0xacce'1f0fU, 0xac8e'df56U, 0U, 1U, 0U},
// x = -0x1.4d89b4p-32, exp2m1f(x) = -0x1.ce61b6p-33 (RZ)
{0xafa6'c4daU, 0xaf67'30dbU, 0U, 1U, 1U},
// x = -0x1.a6eac4p-10, exp2m1f(x) = -0x1.24fadap-10 (RZ)
{0xbad3'7562U, 0xba92'7d6dU, 0U, 1U, 1U},
// x = -0x1.e7526ep-6, exp2m1f(x) = -0x1.4e53dep-6 (RZ)
{0xbcf3'a937U, 0xbca7'29efU, 0U, 1U, 1U},
}};

constexpr size_t N_EXCEPTS_HI = 3;

constexpr fputil::ExceptValues<float, N_EXCEPTS_HI> EXP2M1F_EXCEPTS_HI = {{
// (input, RZ output, RU offset, RD offset, RN offset)
// x = 0x1.16a972p-1, exp2m1f(x) = 0x1.d545b2p-2 (RZ)
{0x3f0b'54b9U, 0x3eea'a2d9U, 1U, 0U, 0U},
// x = -0x1.9f12acp-5, exp2m1f(x) = -0x1.1ab68cp-5 (RZ)
{0xbd4f'8956U, 0xbd0d'5b46U, 0U, 1U, 0U},
// x = -0x1.de7b9cp-5, exp2m1f(x) = -0x1.4508f4p-5 (RZ)
{0xbd6f'3dceU, 0xbd22'847aU, 0U, 1U, 1U},
}};
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

using FPBits = fputil::FPBits<float>;
FPBits xbits(x);

uint32_t x_u = xbits.uintval();
uint32_t x_abs = x_u & 0x7fff'ffffU;

// When |x| >= 128, or x is nan, or |x| <= 2^-5
if (LIBC_UNLIKELY(x_abs >= 0x4300'0000U || x_abs <= 0x3d00'0000U)) {
// |x| <= 2^-5
if (x_abs <= 0x3d00'0000U) {
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
if (auto r = EXP2M1F_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
return r.value();
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

// Minimax polynomial generated by Sollya with:
// > display = hexadecimal;
// > fpminimax((2^x - 1)/x, 5, [|D...|], [-2^-5, 2^-5]);
constexpr double COEFFS[] = {
0x1.62e42fefa39f3p-1, 0x1.ebfbdff82c57bp-3, 0x1.c6b08d6f2d7aap-5,
0x1.3b2ab6fc92f5dp-7, 0x1.5d897cfe27125p-10, 0x1.43090e61e6af1p-13};
double xd = x;
double xsq = xd * xd;
double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]);
double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]);
double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]);
double p = fputil::polyeval(xsq, c0, c1, c2);
return static_cast<float>(p * xd);
}

// x >= 128, or x is nan
if (xbits.is_pos()) {
if (xbits.is_finite()) {
int rounding = fputil::quick_get_round();
if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
return FPBits::max_normal().get_val();

fputil::set_errno_if_required(ERANGE);
fputil::raise_except_if_required(FE_OVERFLOW);
}

// x >= 128 and 2^x - 1 rounds to +inf, or x is +inf or nan
return x + FPBits::inf().get_val();
}
}

if (LIBC_UNLIKELY(x <= -25.0f)) {
// 2^(-inf) - 1 = -1
if (xbits.is_inf())
return -1.0f;
// 2^nan - 1 = nan
if (xbits.is_nan())
return x;

int rounding = fputil::quick_get_round();
if (rounding == FE_UPWARD || rounding == FE_TOWARDZERO)
return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f

fputil::set_errno_if_required(ERANGE);
fputil::raise_except_if_required(FE_UNDERFLOW);
return -1.0f;
}

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
if (auto r = EXP2M1F_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
return r.value();
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

// For -25 < x < 128, to compute 2^x, we perform the following range
// reduction: find hi, mid, lo such that:
// x = hi + mid + lo, in which:
// hi is an integer,
// 0 <= mid * 2^5 < 32 is an integer,
// -2^(-6) <= lo <= 2^(-6).
// In particular,
// hi + mid = round(x * 2^5) * 2^(-5).
// Then,
// 2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
// 2^mid is stored in the lookup table of 32 elements.
// 2^lo is computed using a degree-4 minimax polynomial generated by Sollya.
// We perform 2^hi * 2^mid by simply add hi to the exponent field of 2^mid.

// kf = (hi + mid) * 2^5 = round(x * 2^5)
float kf = 0;
int k = 0;
#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
kf = fputil::nearest_integer(x * 32.0f);
k = static_cast<int>(kf);
#else
constexpr float HALF[2] = {0.5f, -0.5f};
k = static_cast<int>(fputil::multiply_add(x, 32.0f, HALF[x < 0.0f]));
kf = static_cast<float>(k);
#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT

// lo = x - (hi + mid) = x - kf * 2^(-5)
double lo = fputil::multiply_add(-0x1.0p-5f, kf, x);

// hi = floor(kf * 2^(-4))
// exp2_hi = shift hi to the exponent field of double precision.
int64_t exp2_hi =
static_cast<int64_t>(static_cast<uint64_t>(k >> ExpBase::MID_BITS)
<< fputil::FPBits<double>::FRACTION_LEN);
// mh = 2^hi * 2^mid
// mh_bits = bit field of mh
int64_t mh_bits = ExpBase::EXP_2_MID[k & ExpBase::MID_MASK] + exp2_hi;
double mh = fputil::FPBits<double>(static_cast<uint64_t>(mh_bits)).get_val();

// Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with:
// > display = hexadecimal;
// > fpminimax((2^x - 1)/x, 4, [|D...|], [-2^-6, 2^-6]);
constexpr double COEFFS[5] = {0x1.62e42fefa39efp-1, 0x1.ebfbdff8131c4p-3,
0x1.c6b08d7061695p-5, 0x1.3b2b1bee74b2ap-7,
0x1.5d88091198529p-10};
double lo_sq = lo * lo;
double c1 = fputil::multiply_add(lo, COEFFS[0], 1.0);
double c2 = fputil::multiply_add(lo, COEFFS[2], COEFFS[1]);
double c3 = fputil::multiply_add(lo, COEFFS[4], COEFFS[3]);
double exp2_lo = fputil::polyeval(lo_sq, c1, c2, c3);
// 2^x - 1 = 2^(hi + mid + lo) - 1
// = 2^(hi + mid) * 2^lo - 1
// ~ mh * (1 + lo * P(lo)) - 1
// = mh * exp2_lo - 1
return static_cast<float>(fputil::multiply_add(exp2_lo, mh, -1.0));
}

} // namespace math

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F_H
12 changes: 1 addition & 11 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1478,17 +1478,7 @@ add_entrypoint_object(
HDRS
../exp2m1f.h
DEPENDS
libc.src.errno.errno
libc.src.__support.common
libc.src.__support.FPUtil.except_value_utils
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
libc.src.__support.macros.properties.cpu_features
libc.src.__support.math.exp10f_utils
libc.src.__support.math.exp2m1f
)

add_entrypoint_object(
Expand Down
Loading
Loading