feat: implement expbf16 higher math function

krishna2803 · krishna2803 · commit df2bd8ae5b7f · 2025-10-04T03:38:34.000+05:30
Signed-off-by: Krishna Pandey &lt;kpandey81930@gmail.com&gt;
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -139,6 +139,7 @@ add_math_entrypoint_object(erff)
 add_math_entrypoint_object(exp)
 add_math_entrypoint_object(expf)
 add_math_entrypoint_object(expf16)
+add_math_entrypoint_object(expbf16)
 
 add_math_entrypoint_object(exp2)
 add_math_entrypoint_object(exp2f)
diff --git a/libc/src/math/expbf16.h b/libc/src/math/expbf16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for expbf16 -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_EXPBF16_H
+#define LLVM_LIBC_SRC_MATH_EXPBF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 expbf16(bfloat16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_EXPBF16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -1441,6 +1441,28 @@ add_entrypoint_object(
     libc.src.errno.errno
 )
 
+add_entrypoint_object(
+  expbf16
+  SRCS
+    expbf16.cpp
+  HDRS
+    ../expbf16.h
+  DEPENDS
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
+    libc.src.__support.CPP.array
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.FPUtil.bfloat16
+    libc.src.__support.macros.optimization
+)
+
 add_entrypoint_object(
   exp2
   SRCS
diff --git a/libc/src/math/generic/expbf16.cpp b/libc/src/math/generic/expbf16.cpp
@@ -0,0 +1,184 @@
+//===-- BFloat16 e^x function ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/expbf16.h"
+
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// Generated by Sollya with the following commands:
+//   > display = hexadecimal;
+//   > for i from -96 to 88 by 8 do print(i, round(exp(i), SG, RN) @ "f,");
+static constexpr float EXP_HI[24] = {
+    0x1.6a4p-139f,   0x1.07b71p-127f, 0x1.7fd974p-116f, 0x1.175afp-104f,
+    0x1.969d48p-93f, 0x1.27ec46p-81f, 0x1.aebabap-70f,  0x1.397924p-58f,
+    0x1.c8465p-47f,  0x1.4c1078p-35f, 0x1.e355bcp-24f,  0x1.5fc21p-12f,
+    0x1p0f,          0x1.749ea8p11f,  0x1.0f2ebep23f,   0x1.8ab7fcp34f,
+    0x1.1f43fcp46f,  0x1.a220d4p57f,  0x1.304d6ap69f,   0x1.baed16p80f,
+    0x1.425982p92f,  0x1.d531d8p103f, 0x1.55779cp115f,  0x1.f1056ep126f,
+};
+
+// Generated by Sollya with the following commands:
+//   > display = hexadecimal;
+//   > for i from 0 to 7.75 by 0.25 do print(round(exp(i), SG, RN) @ "f,");
+static constexpr float EXP_MID[32] = {
+    0x1p0f,         0x1.48b5e4p0f,  0x1.a61298p0f,  0x1.0ef9dcp1f,
+    0x1.5bf0a8p1f,  0x1.bec38ep1f,  0x1.1ed3fep2f,  0x1.704b6ap2f,
+    0x1.d8e64cp2f,  0x1.2f9b88p3f,  0x1.85d6fep3f,  0x1.f4907p3f,
+    0x1.415e5cp4f,  0x1.9ca53cp4f,  0x1.08ec72p5f,  0x1.542b2ep5f,
+    0x1.b4c902p5f,  0x1.186bf2p6f,  0x1.68118ap6f,  0x1.ce564ep6f,
+    0x1.28d38ap7f,  0x1.7d21eep7f,  0x1.e96244p7f,  0x1.3a30dp8f,
+    0x1.936dc6p8f,  0x1.0301a4p9f,  0x1.4c9222p9f,  0x1.ab0786p9f,
+    0x1.122886p10f, 0x1.6006b6p10f, 0x1.c402b6p10f, 0x1.223252p11f,
+};
+
+constexpr fputil::ExceptValues<bfloat16, 4> EXPBF16_EXCEPTS = {{
+    // (input, RZ output, RU offset, RD offset, RN offset)
+    // x = 0x40DB (6.84375)
+    // MPFR: RU=0x446B, RD=0x446A, RZ=0x446A, RN=0x446B
+    {0x40DBU, 0x446AU, 1U, 0U, 1U},
+    // x = 0x419D, keep original
+    {0x419DU, 0x4D9FU, 1U, 0U, 0U},
+    // x = 0x41F9 (31.125)
+    // MPFR: RU=0x55F0, RD=0x55EF, RZ=0x55EF, RN=0x55F0
+    {0x41F9U, 0x55EFU, 1U, 0U, 1U},
+    // x = 0xC19F (-19.875)
+    // MPFR: RU=0x3121, RD=0x3120, RZ=0x3120, RN=0x3121
+    {0xC19FU, 0x3120U, 1U, 0U, 1U},
+}};
+
+LLVM_LIBC_FUNCTION(bfloat16, expbf16, (bfloat16 x)) {
+  using FPBits = fputil::FPBits<bfloat16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+  uint16_t x_abs = x_u & 0x7fffU;
+
+  // 0 <= |x| <= 2^(-3), or |x| >= 89, or x is NaN
+  if (LIBC_UNLIKELY(x_abs <= 0x3e00U || x_abs >= 0x42b2U)) {
+
+    // exp(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // if x >= 89
+    if (x_bits.is_pos() && x_abs >= 0x42b2U) {
+      // exp(inf) = inf
+      if (x_bits.is_inf())
+        return FPBits::inf().get_val();
+
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_UPWARD:
+        fputil::set_errno_if_required(ERANGE);
+        fputil::raise_except_if_required(FE_OVERFLOW);
+        return FPBits::inf().get_val();
+      default:
+        return FPBits::max_normal().get_val();
+      }
+    }
+
+    // x <= -93
+    if (x_u >= 0xc2baU) {
+      // exp(-inf) = +0
+      if (x_bits.is_inf())
+        return FPBits::zero().get_val();
+
+      fputil::set_errno_if_required(ERANGE);
+      fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
+
+      switch (fputil::quick_get_round()) {
+      case FE_UPWARD:
+        return FPBits::min_subnormal().get_val();
+      default:
+        return FPBits::zero().get_val();
+      }
+    }
+
+    // 0 < |x| <= 2^(-3)
+    if (x_abs <= 0x3e00U && !x_bits.is_zero()) {
+      float xf = static_cast<float>(x);
+      // Degree-3 minimax polynomial generated by Sollya with the following
+      // commands:
+      //   > display = hexadecimal;
+      //   > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-7, 2^-7]);
+      //   > 1 + x * P;
+      //   0x1p0 + x * (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3))
+      return fputil::cast<bfloat16>(
+          fputil::polyeval(xf, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f));
+    }
+
+    // exp(0) = 1
+    if (x_bits.is_zero())
+      return bfloat16(1.0f);
+  }
+
+  if (auto r = EXPBF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+
+  // For -93 < x < 89, we do the following range reduction:
+  // x = hi + mid + lo
+  // where,
+  //    hi / 2^3 is an integer
+  //    mid * 2^2 is an integer
+  //    -2^3 <= lo <= 2^3
+  // also, hi + mid = round(4 * x) / x
+  // then,
+  //   exp(x) = exp(hi + mid + lo)
+  //          = exp(hi) * exp(mid) * exp(lo)
+  // we store 184/8 + 1 = 24 values for looking up exp(hi)
+  //    from -96 to 88
+  // we store 8*4 = 32 values for looking up exp(mid) since
+  //    mid will always have the bit pattern |bbb.bb| where
+  //    b can be either 0 or 1
+
+  float xf = static_cast<float>(x);
+  float kf = fputil::nearest_integer(xf * 4.0f);
+  int x_hi_mid = static_cast<int>(kf);
+  int x_hi = x_hi_mid >> 5;
+  int x_mid = x_hi_mid & 0b11111;
+  // lo = x - (hi + mid) = round(x * 4) / (-4) + x
+  float lo = fputil::multiply_add(kf, -0.25f, xf);
+
+  float exp_hi = EXP_HI[x_hi + 12];
+  float exp_mid = EXP_MID[x_mid];
+
+  // Degree-3 minimax polynomial generated by Sollya with the following
+  // commands:
+  //   > display = hexadecimal;
+  //   > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-7, 2^-7]);
+  //   > 1 + x * P;
+  //   0x1p0 + x * (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3))
+  float exp_lo =
+      fputil::polyeval(lo, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f);
+
+  return fputil::cast<bfloat16>(exp_hi * exp_mid * exp_lo);
+}
+
+} // namespace LIBC_NAMESPACE_DECL