Skip to content

Commit df2bd8a

Browse files
committed
feat: implement expbf16 higher math function
Signed-off-by: Krishna Pandey <[email protected]>
1 parent 5e4eb33 commit df2bd8a

File tree

4 files changed

+228
-0
lines changed

4 files changed

+228
-0
lines changed

libc/src/math/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ add_math_entrypoint_object(erff)
139139
add_math_entrypoint_object(exp)
140140
add_math_entrypoint_object(expf)
141141
add_math_entrypoint_object(expf16)
142+
add_math_entrypoint_object(expbf16)
142143

143144
add_math_entrypoint_object(exp2)
144145
add_math_entrypoint_object(exp2f)

libc/src/math/expbf16.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//===-- Implementation header for expbf16 -----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_MATH_EXPBF16_H
10+
#define LLVM_LIBC_SRC_MATH_EXPBF16_H
11+
12+
#include "src/__support/macros/config.h"
13+
#include "src/__support/macros/properties/types.h"
14+
15+
namespace LIBC_NAMESPACE_DECL {
16+
17+
bfloat16 expbf16(bfloat16 x);
18+
19+
} // namespace LIBC_NAMESPACE_DECL
20+
21+
#endif // LLVM_LIBC_SRC_MATH_EXPBF16_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,6 +1441,28 @@ add_entrypoint_object(
14411441
libc.src.errno.errno
14421442
)
14431443

1444+
add_entrypoint_object(
1445+
expbf16
1446+
SRCS
1447+
expbf16.cpp
1448+
HDRS
1449+
../expbf16.h
1450+
DEPENDS
1451+
libc.hdr.errno_macros
1452+
libc.hdr.fenv_macros
1453+
libc.src.__support.CPP.array
1454+
libc.src.__support.FPUtil.cast
1455+
libc.src.__support.FPUtil.except_value_utils
1456+
libc.src.__support.FPUtil.fenv_impl
1457+
libc.src.__support.FPUtil.fp_bits
1458+
libc.src.__support.FPUtil.multiply_add
1459+
libc.src.__support.FPUtil.nearest_integer
1460+
libc.src.__support.FPUtil.polyeval
1461+
libc.src.__support.FPUtil.rounding_mode
1462+
libc.src.__support.FPUtil.bfloat16
1463+
libc.src.__support.macros.optimization
1464+
)
1465+
14441466
add_entrypoint_object(
14451467
exp2
14461468
SRCS

libc/src/math/generic/expbf16.cpp

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
//===-- BFloat16 e^x function ---------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/math/expbf16.h"
10+
11+
#include "hdr/errno_macros.h"
12+
#include "hdr/fenv_macros.h"
13+
#include "src/__support/FPUtil/FEnvImpl.h"
14+
#include "src/__support/FPUtil/FPBits.h"
15+
#include "src/__support/FPUtil/PolyEval.h"
16+
#include "src/__support/FPUtil/bfloat16.h"
17+
#include "src/__support/FPUtil/cast.h"
18+
#include "src/__support/FPUtil/except_value_utils.h"
19+
#include "src/__support/FPUtil/multiply_add.h"
20+
#include "src/__support/FPUtil/nearest_integer.h"
21+
#include "src/__support/FPUtil/rounding_mode.h"
22+
#include "src/__support/common.h"
23+
#include "src/__support/macros/config.h"
24+
#include "src/__support/macros/optimization.h"
25+
#include "src/__support/macros/properties/types.h"
26+
27+
namespace LIBC_NAMESPACE_DECL {
28+
29+
// Generated by Sollya with the following commands:
30+
// > display = hexadecimal;
31+
// > for i from -96 to 88 by 8 do print(i, round(exp(i), SG, RN) @ "f,");
32+
static constexpr float EXP_HI[24] = {
33+
0x1.6a4p-139f, 0x1.07b71p-127f, 0x1.7fd974p-116f, 0x1.175afp-104f,
34+
0x1.969d48p-93f, 0x1.27ec46p-81f, 0x1.aebabap-70f, 0x1.397924p-58f,
35+
0x1.c8465p-47f, 0x1.4c1078p-35f, 0x1.e355bcp-24f, 0x1.5fc21p-12f,
36+
0x1p0f, 0x1.749ea8p11f, 0x1.0f2ebep23f, 0x1.8ab7fcp34f,
37+
0x1.1f43fcp46f, 0x1.a220d4p57f, 0x1.304d6ap69f, 0x1.baed16p80f,
38+
0x1.425982p92f, 0x1.d531d8p103f, 0x1.55779cp115f, 0x1.f1056ep126f,
39+
};
40+
41+
// Generated by Sollya with the following commands:
42+
// > display = hexadecimal;
43+
// > for i from 0 to 7.75 by 0.25 do print(round(exp(i), SG, RN) @ "f,");
44+
static constexpr float EXP_MID[32] = {
45+
0x1p0f, 0x1.48b5e4p0f, 0x1.a61298p0f, 0x1.0ef9dcp1f,
46+
0x1.5bf0a8p1f, 0x1.bec38ep1f, 0x1.1ed3fep2f, 0x1.704b6ap2f,
47+
0x1.d8e64cp2f, 0x1.2f9b88p3f, 0x1.85d6fep3f, 0x1.f4907p3f,
48+
0x1.415e5cp4f, 0x1.9ca53cp4f, 0x1.08ec72p5f, 0x1.542b2ep5f,
49+
0x1.b4c902p5f, 0x1.186bf2p6f, 0x1.68118ap6f, 0x1.ce564ep6f,
50+
0x1.28d38ap7f, 0x1.7d21eep7f, 0x1.e96244p7f, 0x1.3a30dp8f,
51+
0x1.936dc6p8f, 0x1.0301a4p9f, 0x1.4c9222p9f, 0x1.ab0786p9f,
52+
0x1.122886p10f, 0x1.6006b6p10f, 0x1.c402b6p10f, 0x1.223252p11f,
53+
};
54+
55+
constexpr fputil::ExceptValues<bfloat16, 4> EXPBF16_EXCEPTS = {{
56+
// (input, RZ output, RU offset, RD offset, RN offset)
57+
// x = 0x40DB (6.84375)
58+
// MPFR: RU=0x446B, RD=0x446A, RZ=0x446A, RN=0x446B
59+
{0x40DBU, 0x446AU, 1U, 0U, 1U},
60+
// x = 0x419D, keep original
61+
{0x419DU, 0x4D9FU, 1U, 0U, 0U},
62+
// x = 0x41F9 (31.125)
63+
// MPFR: RU=0x55F0, RD=0x55EF, RZ=0x55EF, RN=0x55F0
64+
{0x41F9U, 0x55EFU, 1U, 0U, 1U},
65+
// x = 0xC19F (-19.875)
66+
// MPFR: RU=0x3121, RD=0x3120, RZ=0x3120, RN=0x3121
67+
{0xC19FU, 0x3120U, 1U, 0U, 1U},
68+
}};
69+
70+
LLVM_LIBC_FUNCTION(bfloat16, expbf16, (bfloat16 x)) {
71+
using FPBits = fputil::FPBits<bfloat16>;
72+
FPBits x_bits(x);
73+
74+
uint16_t x_u = x_bits.uintval();
75+
uint16_t x_abs = x_u & 0x7fffU;
76+
77+
// 0 <= |x| <= 2^(-3), or |x| >= 89, or x is NaN
78+
if (LIBC_UNLIKELY(x_abs <= 0x3e00U || x_abs >= 0x42b2U)) {
79+
80+
// exp(NaN) = NaN
81+
if (x_bits.is_nan()) {
82+
if (x_bits.is_signaling_nan()) {
83+
fputil::raise_except_if_required(FE_INVALID);
84+
return FPBits::quiet_nan().get_val();
85+
}
86+
87+
return x;
88+
}
89+
90+
// if x >= 89
91+
if (x_bits.is_pos() && x_abs >= 0x42b2U) {
92+
// exp(inf) = inf
93+
if (x_bits.is_inf())
94+
return FPBits::inf().get_val();
95+
96+
switch (fputil::quick_get_round()) {
97+
case FE_TONEAREST:
98+
case FE_UPWARD:
99+
fputil::set_errno_if_required(ERANGE);
100+
fputil::raise_except_if_required(FE_OVERFLOW);
101+
return FPBits::inf().get_val();
102+
default:
103+
return FPBits::max_normal().get_val();
104+
}
105+
}
106+
107+
// x <= -93
108+
if (x_u >= 0xc2baU) {
109+
// exp(-inf) = +0
110+
if (x_bits.is_inf())
111+
return FPBits::zero().get_val();
112+
113+
fputil::set_errno_if_required(ERANGE);
114+
fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
115+
116+
switch (fputil::quick_get_round()) {
117+
case FE_UPWARD:
118+
return FPBits::min_subnormal().get_val();
119+
default:
120+
return FPBits::zero().get_val();
121+
}
122+
}
123+
124+
// 0 < |x| <= 2^(-3)
125+
if (x_abs <= 0x3e00U && !x_bits.is_zero()) {
126+
float xf = static_cast<float>(x);
127+
// Degree-3 minimax polynomial generated by Sollya with the following
128+
// commands:
129+
// > display = hexadecimal;
130+
// > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-7, 2^-7]);
131+
// > 1 + x * P;
132+
// 0x1p0 + x * (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3))
133+
return fputil::cast<bfloat16>(
134+
fputil::polyeval(xf, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f));
135+
}
136+
137+
// exp(0) = 1
138+
if (x_bits.is_zero())
139+
return bfloat16(1.0f);
140+
}
141+
142+
if (auto r = EXPBF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
143+
return r.value();
144+
145+
// For -93 < x < 89, we do the following range reduction:
146+
// x = hi + mid + lo
147+
// where,
148+
// hi / 2^3 is an integer
149+
// mid * 2^2 is an integer
150+
// -2^3 <= lo <= 2^3
151+
// also, hi + mid = round(4 * x) / x
152+
// then,
153+
// exp(x) = exp(hi + mid + lo)
154+
// = exp(hi) * exp(mid) * exp(lo)
155+
// we store 184/8 + 1 = 24 values for looking up exp(hi)
156+
// from -96 to 88
157+
// we store 8*4 = 32 values for looking up exp(mid) since
158+
// mid will always have the bit pattern |bbb.bb| where
159+
// b can be either 0 or 1
160+
161+
float xf = static_cast<float>(x);
162+
float kf = fputil::nearest_integer(xf * 4.0f);
163+
int x_hi_mid = static_cast<int>(kf);
164+
int x_hi = x_hi_mid >> 5;
165+
int x_mid = x_hi_mid & 0b11111;
166+
// lo = x - (hi + mid) = round(x * 4) / (-4) + x
167+
float lo = fputil::multiply_add(kf, -0.25f, xf);
168+
169+
float exp_hi = EXP_HI[x_hi + 12];
170+
float exp_mid = EXP_MID[x_mid];
171+
172+
// Degree-3 minimax polynomial generated by Sollya with the following
173+
// commands:
174+
// > display = hexadecimal;
175+
// > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-7, 2^-7]);
176+
// > 1 + x * P;
177+
// 0x1p0 + x * (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3))
178+
float exp_lo =
179+
fputil::polyeval(lo, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f);
180+
181+
return fputil::cast<bfloat16>(exp_hi * exp_mid * exp_lo);
182+
}
183+
184+
} // namespace LIBC_NAMESPACE_DECL

0 commit comments

Comments
 (0)