improved erf and erfc, and formatted lgammaf and tgammaf

ZERICO2005 · adriweb · commit fd4936debfe7 · 2025-03-14T22:36:21.000+01:00
diff --git a/src/libc/__float32_constants.h b/src/libc/__float32_constants.h
@@ -18,12 +18,13 @@
 #define F32_LOG2E  1.442695040888963407359924681001892f /* log2(e) */
 #define F32_LOG10E 0.434294481903251827651128918916605f /* log10(e) */
 
-#define F32_PI         3.141592653589793238462643383279502f /* pi */
-#define F32_PI2        1.5707963267948966192313216916398f   /* 1/2 * pi */
-#define F32_PI4        0.78539816339744830961566084581988f  /* 1/4 * pi */
-#define F32_3PI4       2.3561944901923449288469825374596f   /* 3/4 * pi */
-#define F32_INV_PI     0.318309886183790671537767526745028f /* 1 / pi */
-#define F32_INV_SQRTPI 0.564189583547756286948079451560772f /* 1 / sqrt(pi) */
+#define F32_PI           3.141592653589793238462643383279502f /* pi */
+#define F32_PI2          1.5707963267948966192313216916398f   /* 1/2 * pi */
+#define F32_PI4          0.78539816339744830961566084581988f  /* 1/4 * pi */
+#define F32_3PI4         2.3561944901923449288469825374596f   /* 3/4 * pi */
+#define F32_INV_PI       0.318309886183790671537767526745028f /* 1 / pi */
+#define F32_INV_SQRTPI   0.564189583547756286948079451560772f /* 1 / sqrt(pi) */
+#define F32_2_DIV_SQRTPI 1.1283791670955125738961589031215f   /* 2 / sqrt(pi) */
 
 #define F32_EGAMMA 0.577215664901532860606512090082402f /* Euler-Mascheroni constant */
 #define F32_PHI    1.618033988749894848204586834365638f /* Golden Ratio */
diff --git a/src/libc/__float64_constants.h b/src/libc/__float64_constants.h
@@ -18,12 +18,13 @@
 #define F64_LOG2E  1.442695040888963407359924681001892L /* log2(e) */
 #define F64_LOG10E 0.434294481903251827651128918916605L /* log10(e) */
 
-#define F64_PI         3.141592653589793238462643383279502L /* pi */
-#define F64_PI2        1.5707963267948966192313216916398L   /* 1/2 * pi */
-#define F64_PI4        0.78539816339744830961566084581988L  /* 1/4 * pi */
-#define F64_3PI4       2.3561944901923449288469825374596L   /* 3/4 * pi */
-#define F64_INV_PI     0.318309886183790671537767526745028L /* 1 / pi */
-#define F64_INV_SQRTPI 0.564189583547756286948079451560772L /* 1 / sqrt(pi) */
+#define F64_PI           3.141592653589793238462643383279502L /* pi */
+#define F64_PI2          1.5707963267948966192313216916398L   /* 1/2 * pi */
+#define F64_PI4          0.78539816339744830961566084581988L  /* 1/4 * pi */
+#define F64_3PI4         2.3561944901923449288469825374596L   /* 3/4 * pi */
+#define F64_INV_PI       0.318309886183790671537767526745028L /* 1 / pi */
+#define F64_INV_SQRTPI   0.564189583547756286948079451560772L /* 1 / sqrt(pi) */
+#define F64_2_DIV_SQRTPI 1.1283791670955125738961589031215L   /* 2 / sqrt(pi) */
 
 #define F64_EGAMMA 0.577215664901532860606512090082402L /* Euler-Mascheroni constant */
 #define F64_PHI    1.618033988749894848204586834365638L /* Golden Ratio */
diff --git a/src/libc/erfcf.c b/src/libc/erfcf.c
@@ -3,20 +3,27 @@
 /**
  * Algorithm from:
  * https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions
+ *
+ * @remarks Minimum ulp:
+ * ulp of +16     at +0x1.a13834p-1 with ideal expf (x < 1.0f)
+ * ulp of +594482 at +0x1.251634p+3 with ideal expf (x >= 1.0f)
  */
 float erfcf(float x)
 {
     static const float
-        p = 0.47047f,
-        a1 = 0.3480242f,
-        a2 = -0.0958798f,
-        a3 = 0.7478556f;
+        p = 0.3275911f,
+        a1 = 0.254829592f,
+        a2 = -0.284496736f,
+        a3 = 1.421413741f,
+        a4 = -1.453152027f,
+        a5 = 1.061405429f;
     const float t = 1.0f / (1.0f + p * fabsf(x));
-    float ret = t * (a1 + t * (a2 + t * a3)) * expf(-x * x);
+    float ret = t * (a1 + t * (a2 + t * (a3 + t * (a4 + t * a5)))) * expf(-x * x);
     if (signbit(x)) {
         ret = 2.0f - ret;
     }
     return ret;
 }
+ 
 
 double erfc(double) __attribute__((alias("erfcf")));
diff --git a/src/libc/erfcl.c b/src/libc/erfcl.c
@@ -3,19 +3,36 @@
 /**
  * Algorithm from:
  * https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions
+ *
+ * @remarks Minimum ulp:
+ * ulp of -516 at +2.373300261e+01 with ideal expl
  */
-long double erfcl(long double x)
-{
-    static const long double
-        p = 0.3275911L,
-        a1 = 0.254829592L,
-        a2 = -0.284496736L,
-        a3 = 1.421413741L,
-        a4 = -1.453152027L,
-        a5 = 1.061405429L;
-    const long double t = 1.0L / (1.0L + p * fabsl(x));
-    long double ret = t * (a1 + t * (a2 + t * (a3 + t * (a4 + t * a5)))) * expl(-x * x);
-    if (signbit(x)) {
+long double _erfcl_c(long double arg) {
+    long double x, x_sqr;
+    long double t0, t1, t2, t3, t4, t5;
+    long double ret;
+    
+    x = fabsl(arg);
+    x_sqr = x * x;
+    t0 = 0.56418958354775629L / (x + 2.06955023132914151L);
+    t1 =
+        (x_sqr + 2.71078540045147805L * x +  5.80755613130301624L) /
+        (x_sqr + 3.47954057099518960L * x + 12.06166887286239555L);
+    t2 =
+        (x_sqr + 3.47469513777439592L * x + 12.07402036406381411L) /
+        (x_sqr + 3.72068443960225092L * x +  8.44319781003968454L);
+    t3 =
+        (x_sqr + 4.00561509202259545L * x +  9.30596659485887898L) /
+        (x_sqr + 3.90225704029924078L * x +  6.36161630953880464L);
+    t4 =
+        (x_sqr + 5.16722705817812584L * x +  9.12661617673673262L) /
+        (x_sqr + 4.03296893109262491L * x +  5.13578530585681539L);
+    t5 =
+        (x_sqr + 5.95908795446633271L * x +  9.19435612886969243L) /
+        (x_sqr + 4.11240942957450885L * x +  4.48640329523408675L);
+    ret = ((((t0 * t1) * t2) * t3) * t4) * t5;
+    ret *= expl(-x_sqr);
+    if (signbit(arg)) {
         ret = 2.0L - ret;
     }
     return ret;
diff --git a/src/libc/erff.c b/src/libc/erff.c
@@ -1,8 +1,18 @@
 #include <math.h>
+#include "__float32_constants.h"
 
-float erff(float x)
-{
-    return 1 - erfcf(x);
+/**
+ * @remarks Minimum ulp:
+ * ulp of -9    at +0x1.00030ap-5 with ideal erfcf
+ * ulp of +5549 at +0x1.c46b04p-5 with current erfcf
+ */
+float erff(float arg) {
+    float x = fabsf(arg);
+    if (x < 0x1.0p-5f) {
+        return F32_2_DIV_SQRTPI * (arg - arg * arg * arg * F32_1_DIV_3);
+    }
+    x = 1.0f - erfcf(x);
+    return copysignf(x, arg);
 }
 
 double erf(double) __attribute__((alias("erff")));
diff --git a/src/libc/erfl.c b/src/libc/erfl.c
@@ -1,6 +1,16 @@
 #include <math.h>
+#include "__float64_constants.h"
 
-long double erfl(long double x)
-{
-    return 1 - erfcl(x);
+/**
+ * @remarks Minimum ulp:
+ * ulp of -513  at -0x1.35260d8034ac3p-10 with ideal erfcl
+ * ulp of +6702 at -0x1.4ef5ac6f23690p-10 with current erfcl
+ */
+long double erfl(long double arg) {
+    long double x = fabsl(arg);
+    if (x < 0x1.0p-10L) {
+        return F64_2_DIV_SQRTPI * (arg - arg * arg * arg * F64_1_DIV_3);
+    }
+    x = 1.0L - erfcl(x);
+    return copysignl(x, arg);
 }
diff --git a/src/libc/include/math.h b/src/libc/include/math.h
@@ -27,7 +27,7 @@ extern "C" {
 #define M_2_SQRTPI    1.12837916709551257390     /* 2/sqrt(pi)     */
 #define M_SQRT2       1.41421356237309504880     /* sqrt(2)        */
 #define M_SQRT1_2     0.707106781186547524401    /* 1/sqrt(2)      */
-#define M_LOG_2M_PI   1.83787706640934548        /* log(2*M_PI)    */
+// /* deprecated */ #define M_LOG_2M_PI   1.83787706640934548        /* log(2*M_PI)    */
 
 #define FP_ILOGB0     (~__INT_MAX__)
 #define FP_ILOGBNAN     __INT_MAX__
diff --git a/src/libc/lgammaf.c b/src/libc/lgammaf.c
@@ -10,20 +10,30 @@
 
 #define N       8
 
-#define B0  1.0               /* Bernoulli numbers */
-#define B1  (-1.0 / 2.0)
-#define B2  ( 1.0 / 6.0)
-#define B4  (-1.0 / 30.0)
-#define B6  ( 1.0 / 42.0)
-#define B8  (-1.0 / 30.0)
-#define B10 ( 5.0 / 66.0)
-#define B12 (-691.0 / 2730.0)
-#define B14 ( 7.0 / 6.0)
-#define B16 (-3617.0 / 510.0)
+/* Bernoulli numbers */
+#define B0  1.0f
+#define B1  (   -1.0f /    2.0f)
+#define B2  (    1.0f /    6.0f)
+#define B4  (   -1.0f /   30.0f)
+#define B6  (    1.0f /   42.0f)
+#define B8  (   -1.0f /   30.0f)
+#define B10 (    5.0f /   66.0f)
+#define B12 ( -691.0f / 2730.0f)
+#define B14 (    7.0f /    6.0f)
+#define B16 (-3617.0f /  510.0f)
 
+#define ln_pi_div_2 0.91893853320467274178032973640562f
+
+/**
+ * @remarks Minimum relative precision of:
+ * 2^-16.97 at +3.205794811e+00 with ideal logf (x > 3.0f)
+ * 2^-16.71 at +2.940585591e-39 with ideal logf (x > 0.0f && x < 0.5f)
+ * 2^-13.68 at +1.591955781e+00 with ideal logf (x > 1.25f && x < 1.75f)
+ * @note input values 0.5f - 3.0f have very low precision
+ */
 float lgammaf(float x) { /* the natural logarithm of the Gamma function. */
     float v, w;
-    v = 1.0;
+    v = 1.0f;
 
     /**
      * This loop will take forever to terminate if `x < -100.0f`, so we have a
@@ -38,12 +48,12 @@ float lgammaf(float x) { /* the natural logarithm of the Gamma function. */
         v *= x;
         x++;
     }
-    w = 1.0 / (x * x);
-    return ((((((((B16 / (16.0 * 15.0))  * w + (B14 / (14.0 * 13.0))) * w
-                + (B12 / (12.0 * 11.0))) * w + (B10 / (10.0 *  9.0))) * w
-                + (B8  / ( 8.0 *  7.0))) * w + (B6  / ( 6.0 *  5.0))) * w
-                + (B4  / ( 4.0 *  3.0))) * w + (B2  / ( 2.0 *  1.0))) / x
-                + 0.5 * (float)M_LOG_2M_PI - logf(v) - x + (x - 0.5) * logf(x);
+    w = 1.0f / (x * x);
+    return ((((((((B16 / (16.0f * 15.0f))  * w + (B14 / (14.0f * 13.0f))) * w
+                + (B12 / (12.0f * 11.0f))) * w + (B10 / (10.0f *  9.0f))) * w
+                + (B8  / ( 8.0f *  7.0f))) * w + (B6  / ( 6.0f *  5.0f))) * w
+                + (B4  / ( 4.0f *  3.0f))) * w + (B2  / ( 2.0f *  1.0f))) / x
+                + ln_pi_div_2 - logf(v) - x + (x - 0.5f) * logf(x);
 }
 
 double lgamma(double) __attribute__((alias("lgammaf")));
diff --git a/src/libc/tgammaf.c b/src/libc/tgammaf.c
@@ -5,25 +5,34 @@
  *  http://oku.edu.mie-u.ac.jp/~okumura/algo/
 */
 
-#include <math.h>
 #include <errno.h>
+#include <math.h>
+#include <stdbool.h>
 
+/**
+ * @remarks Minimum relative precision of:
+ * 2^-15.34 at +3.226818848e+01 with ideal sinf expf and lgammaf (x > 0.0f)
+ * 2^-17    at +2.940585591e-39 with ideal sinf expf and lgammaf (x > 0.0f && x < 10.0f)
+ * 2^-17.91 at +9.224035263e+00 with ideal sinf expf and lgammaf (x > 0.1f && x < 10.0f)
+ */
 float tgammaf(float x) { /* Gamma function */
-    if (x == 0.0) { /* Pole Error */
+    if (x == 0.0f) { /* Pole Error */
         errno = ERANGE;
         return signbit(x) ? -HUGE_VALF : HUGE_VALF;
     }
-    if (x < 0) {
-        int sign;
-    static float zero = 0.0;
-        float i, f;
+    if (x < 0.0f) {
+    static float zero = 0.0f;
+        float i, f, ret;
         f = modff(-x, &i);
-        if (f == 0.0) { /* Domain Error */
+        if (f == 0.0f) { /* Domain Error */
             errno = EDOM;
             return zero/zero; /* probably better to return NAN here */
         }
-        sign = (fmodf(i, 2.0) != 0.0) ? 1 : -1;
-        return (sign * M_PI) / (sinf(M_PI * f) * expf(lgammaf(1 - x)));
+        ret = (float)M_PI / (sinf((float)M_PI * f) * expf(lgammaf(1.0f - x)));
+        if (((unsigned int)i & 0x1) == 0) {
+            ret = -ret;
+        }
+        return ret;
     }
     return expf(lgammaf(x));
 }