Skip to content

Commit fd4936d

Browse files
ZERICO2005adriweb
authored andcommitted
improved erf and erfc, and formatted lgammaf and tgammaf
1 parent 8fc5716 commit fd4936d

File tree

9 files changed

+127
-62
lines changed

9 files changed

+127
-62
lines changed

src/libc/__float32_constants.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@
1818
#define F32_LOG2E 1.442695040888963407359924681001892f /* log2(e) */
1919
#define F32_LOG10E 0.434294481903251827651128918916605f /* log10(e) */
2020

21-
#define F32_PI 3.141592653589793238462643383279502f /* pi */
22-
#define F32_PI2 1.5707963267948966192313216916398f /* 1/2 * pi */
23-
#define F32_PI4 0.78539816339744830961566084581988f /* 1/4 * pi */
24-
#define F32_3PI4 2.3561944901923449288469825374596f /* 3/4 * pi */
25-
#define F32_INV_PI 0.318309886183790671537767526745028f /* 1 / pi */
26-
#define F32_INV_SQRTPI 0.564189583547756286948079451560772f /* 1 / sqrt(pi) */
21+
#define F32_PI 3.141592653589793238462643383279502f /* pi */
22+
#define F32_PI2 1.5707963267948966192313216916398f /* 1/2 * pi */
23+
#define F32_PI4 0.78539816339744830961566084581988f /* 1/4 * pi */
24+
#define F32_3PI4 2.3561944901923449288469825374596f /* 3/4 * pi */
25+
#define F32_INV_PI 0.318309886183790671537767526745028f /* 1 / pi */
26+
#define F32_INV_SQRTPI 0.564189583547756286948079451560772f /* 1 / sqrt(pi) */
27+
#define F32_2_DIV_SQRTPI 1.1283791670955125738961589031215f /* 2 / sqrt(pi) */
2728

2829
#define F32_EGAMMA 0.577215664901532860606512090082402f /* Euler-Mascheroni constant */
2930
#define F32_PHI 1.618033988749894848204586834365638f /* Golden Ratio */

src/libc/__float64_constants.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@
1818
#define F64_LOG2E 1.442695040888963407359924681001892L /* log2(e) */
1919
#define F64_LOG10E 0.434294481903251827651128918916605L /* log10(e) */
2020

21-
#define F64_PI 3.141592653589793238462643383279502L /* pi */
22-
#define F64_PI2 1.5707963267948966192313216916398L /* 1/2 * pi */
23-
#define F64_PI4 0.78539816339744830961566084581988L /* 1/4 * pi */
24-
#define F64_3PI4 2.3561944901923449288469825374596L /* 3/4 * pi */
25-
#define F64_INV_PI 0.318309886183790671537767526745028L /* 1 / pi */
26-
#define F64_INV_SQRTPI 0.564189583547756286948079451560772L /* 1 / sqrt(pi) */
21+
#define F64_PI 3.141592653589793238462643383279502L /* pi */
22+
#define F64_PI2 1.5707963267948966192313216916398L /* 1/2 * pi */
23+
#define F64_PI4 0.78539816339744830961566084581988L /* 1/4 * pi */
24+
#define F64_3PI4 2.3561944901923449288469825374596L /* 3/4 * pi */
25+
#define F64_INV_PI 0.318309886183790671537767526745028L /* 1 / pi */
26+
#define F64_INV_SQRTPI 0.564189583547756286948079451560772L /* 1 / sqrt(pi) */
27+
#define F64_2_DIV_SQRTPI 1.1283791670955125738961589031215L /* 2 / sqrt(pi) */
2728

2829
#define F64_EGAMMA 0.577215664901532860606512090082402L /* Euler-Mascheroni constant */
2930
#define F64_PHI 1.618033988749894848204586834365638L /* Golden Ratio */

src/libc/erfcf.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,27 @@
33
/**
44
* Algorithm from:
55
* https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions
6+
*
7+
* @remarks Minimum ulp:
8+
* ulp of +16 at +0x1.a13834p-1 with ideal expf (x < 1.0f)
9+
* ulp of +594482 at +0x1.251634p+3 with ideal expf (x >= 1.0f)
610
*/
711
float erfcf(float x)
812
{
913
static const float
10-
p = 0.47047f,
11-
a1 = 0.3480242f,
12-
a2 = -0.0958798f,
13-
a3 = 0.7478556f;
14+
p = 0.3275911f,
15+
a1 = 0.254829592f,
16+
a2 = -0.284496736f,
17+
a3 = 1.421413741f,
18+
a4 = -1.453152027f,
19+
a5 = 1.061405429f;
1420
const float t = 1.0f / (1.0f + p * fabsf(x));
15-
float ret = t * (a1 + t * (a2 + t * a3)) * expf(-x * x);
21+
float ret = t * (a1 + t * (a2 + t * (a3 + t * (a4 + t * a5)))) * expf(-x * x);
1622
if (signbit(x)) {
1723
ret = 2.0f - ret;
1824
}
1925
return ret;
2026
}
27+
2128

2229
double erfc(double) __attribute__((alias("erfcf")));

src/libc/erfcl.c

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,36 @@
33
/**
44
* Algorithm from:
55
* https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions
6+
*
7+
* @remarks Minimum ulp:
8+
* ulp of -516 at +2.373300261e+01 with ideal expl
69
*/
7-
long double erfcl(long double x)
8-
{
9-
static const long double
10-
p = 0.3275911L,
11-
a1 = 0.254829592L,
12-
a2 = -0.284496736L,
13-
a3 = 1.421413741L,
14-
a4 = -1.453152027L,
15-
a5 = 1.061405429L;
16-
const long double t = 1.0L / (1.0L + p * fabsl(x));
17-
long double ret = t * (a1 + t * (a2 + t * (a3 + t * (a4 + t * a5)))) * expl(-x * x);
18-
if (signbit(x)) {
10+
long double _erfcl_c(long double arg) {
11+
long double x, x_sqr;
12+
long double t0, t1, t2, t3, t4, t5;
13+
long double ret;
14+
15+
x = fabsl(arg);
16+
x_sqr = x * x;
17+
t0 = 0.56418958354775629L / (x + 2.06955023132914151L);
18+
t1 =
19+
(x_sqr + 2.71078540045147805L * x + 5.80755613130301624L) /
20+
(x_sqr + 3.47954057099518960L * x + 12.06166887286239555L);
21+
t2 =
22+
(x_sqr + 3.47469513777439592L * x + 12.07402036406381411L) /
23+
(x_sqr + 3.72068443960225092L * x + 8.44319781003968454L);
24+
t3 =
25+
(x_sqr + 4.00561509202259545L * x + 9.30596659485887898L) /
26+
(x_sqr + 3.90225704029924078L * x + 6.36161630953880464L);
27+
t4 =
28+
(x_sqr + 5.16722705817812584L * x + 9.12661617673673262L) /
29+
(x_sqr + 4.03296893109262491L * x + 5.13578530585681539L);
30+
t5 =
31+
(x_sqr + 5.95908795446633271L * x + 9.19435612886969243L) /
32+
(x_sqr + 4.11240942957450885L * x + 4.48640329523408675L);
33+
ret = ((((t0 * t1) * t2) * t3) * t4) * t5;
34+
ret *= expl(-x_sqr);
35+
if (signbit(arg)) {
1936
ret = 2.0L - ret;
2037
}
2138
return ret;

src/libc/erff.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,18 @@
11
#include <math.h>
2+
#include "__float32_constants.h"
23

3-
float erff(float x)
4-
{
5-
return 1 - erfcf(x);
4+
/**
5+
* @remarks Minimum ulp:
6+
* ulp of -9 at +0x1.00030ap-5 with ideal erfcf
7+
* ulp of +5549 at +0x1.c46b04p-5 with current erfcf
8+
*/
9+
float erff(float arg) {
10+
float x = fabsf(arg);
11+
if (x < 0x1.0p-5f) {
12+
return F32_2_DIV_SQRTPI * (arg - arg * arg * arg * F32_1_DIV_3);
13+
}
14+
x = 1.0f - erfcf(x);
15+
return copysignf(x, arg);
616
}
717

818
double erf(double) __attribute__((alias("erff")));

src/libc/erfl.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
#include <math.h>
2+
#include "__float64_constants.h"
23

3-
long double erfl(long double x)
4-
{
5-
return 1 - erfcl(x);
4+
/**
5+
* @remarks Minimum ulp:
6+
* ulp of -513 at -0x1.35260d8034ac3p-10 with ideal erfcl
7+
* ulp of +6702 at -0x1.4ef5ac6f23690p-10 with current erfcl
8+
*/
9+
long double erfl(long double arg) {
10+
long double x = fabsl(arg);
11+
if (x < 0x1.0p-10L) {
12+
return F64_2_DIV_SQRTPI * (arg - arg * arg * arg * F64_1_DIV_3);
13+
}
14+
x = 1.0L - erfcl(x);
15+
return copysignl(x, arg);
616
}

src/libc/include/math.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ extern "C" {
2727
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
2828
#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
2929
#define M_SQRT1_2 0.707106781186547524401 /* 1/sqrt(2) */
30-
#define M_LOG_2M_PI 1.83787706640934548 /* log(2*M_PI) */
30+
// /* deprecated */ #define M_LOG_2M_PI 1.83787706640934548 /* log(2*M_PI) */
3131

3232
#define FP_ILOGB0 (~__INT_MAX__)
3333
#define FP_ILOGBNAN __INT_MAX__

src/libc/lgammaf.c

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,30 @@
1010

1111
#define N 8
1212

13-
#define B0 1.0 /* Bernoulli numbers */
14-
#define B1 (-1.0 / 2.0)
15-
#define B2 ( 1.0 / 6.0)
16-
#define B4 (-1.0 / 30.0)
17-
#define B6 ( 1.0 / 42.0)
18-
#define B8 (-1.0 / 30.0)
19-
#define B10 ( 5.0 / 66.0)
20-
#define B12 (-691.0 / 2730.0)
21-
#define B14 ( 7.0 / 6.0)
22-
#define B16 (-3617.0 / 510.0)
13+
/* Bernoulli numbers */
14+
#define B0 1.0f
15+
#define B1 ( -1.0f / 2.0f)
16+
#define B2 ( 1.0f / 6.0f)
17+
#define B4 ( -1.0f / 30.0f)
18+
#define B6 ( 1.0f / 42.0f)
19+
#define B8 ( -1.0f / 30.0f)
20+
#define B10 ( 5.0f / 66.0f)
21+
#define B12 ( -691.0f / 2730.0f)
22+
#define B14 ( 7.0f / 6.0f)
23+
#define B16 (-3617.0f / 510.0f)
2324

25+
#define ln_pi_div_2 0.91893853320467274178032973640562f
26+
27+
/**
28+
* @remarks Minimum relative precision of:
29+
* 2^-16.97 at +3.205794811e+00 with ideal logf (x > 3.0f)
30+
* 2^-16.71 at +2.940585591e-39 with ideal logf (x > 0.0f && x < 0.5f)
31+
* 2^-13.68 at +1.591955781e+00 with ideal logf (x > 1.25f && x < 1.75f)
32+
* @note input values 0.5f - 3.0f have very low precision
33+
*/
2434
float lgammaf(float x) { /* the natural logarithm of the Gamma function. */
2535
float v, w;
26-
v = 1.0;
36+
v = 1.0f;
2737

2838
/**
2939
* This loop will take forever to terminate if `x < -100.0f`, so we have a
@@ -38,12 +48,12 @@ float lgammaf(float x) { /* the natural logarithm of the Gamma function. */
3848
v *= x;
3949
x++;
4050
}
41-
w = 1.0 / (x * x);
42-
return ((((((((B16 / (16.0 * 15.0)) * w + (B14 / (14.0 * 13.0))) * w
43-
+ (B12 / (12.0 * 11.0))) * w + (B10 / (10.0 * 9.0))) * w
44-
+ (B8 / ( 8.0 * 7.0))) * w + (B6 / ( 6.0 * 5.0))) * w
45-
+ (B4 / ( 4.0 * 3.0))) * w + (B2 / ( 2.0 * 1.0))) / x
46-
+ 0.5 * (float)M_LOG_2M_PI - logf(v) - x + (x - 0.5) * logf(x);
51+
w = 1.0f / (x * x);
52+
return ((((((((B16 / (16.0f * 15.0f)) * w + (B14 / (14.0f * 13.0f))) * w
53+
+ (B12 / (12.0f * 11.0f))) * w + (B10 / (10.0f * 9.0f))) * w
54+
+ (B8 / ( 8.0f * 7.0f))) * w + (B6 / ( 6.0f * 5.0f))) * w
55+
+ (B4 / ( 4.0f * 3.0f))) * w + (B2 / ( 2.0f * 1.0f))) / x
56+
+ ln_pi_div_2 - logf(v) - x + (x - 0.5f) * logf(x);
4757
}
4858

4959
double lgamma(double) __attribute__((alias("lgammaf")));

src/libc/tgammaf.c

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,34 @@
55
* http://oku.edu.mie-u.ac.jp/~okumura/algo/
66
*/
77

8-
#include <math.h>
98
#include <errno.h>
9+
#include <math.h>
10+
#include <stdbool.h>
1011

12+
/**
13+
* @remarks Minimum relative precision of:
14+
* 2^-15.34 at +3.226818848e+01 with ideal sinf expf and lgammaf (x > 0.0f)
15+
* 2^-17 at +2.940585591e-39 with ideal sinf expf and lgammaf (x > 0.0f && x < 10.0f)
16+
* 2^-17.91 at +9.224035263e+00 with ideal sinf expf and lgammaf (x > 0.1f && x < 10.0f)
17+
*/
1118
float tgammaf(float x) { /* Gamma function */
12-
if (x == 0.0) { /* Pole Error */
19+
if (x == 0.0f) { /* Pole Error */
1320
errno = ERANGE;
1421
return signbit(x) ? -HUGE_VALF : HUGE_VALF;
1522
}
16-
if (x < 0) {
17-
int sign;
18-
static float zero = 0.0;
19-
float i, f;
23+
if (x < 0.0f) {
24+
static float zero = 0.0f;
25+
float i, f, ret;
2026
f = modff(-x, &i);
21-
if (f == 0.0) { /* Domain Error */
27+
if (f == 0.0f) { /* Domain Error */
2228
errno = EDOM;
2329
return zero/zero; /* probably better to return NAN here */
2430
}
25-
sign = (fmodf(i, 2.0) != 0.0) ? 1 : -1;
26-
return (sign * M_PI) / (sinf(M_PI * f) * expf(lgammaf(1 - x)));
31+
ret = (float)M_PI / (sinf((float)M_PI * f) * expf(lgammaf(1.0f - x)));
32+
if (((unsigned int)i & 0x1) == 0) {
33+
ret = -ret;
34+
}
35+
return ret;
2736
}
2837
return expf(lgammaf(x));
2938
}

0 commit comments

Comments
 (0)