Skip to content

Commit 184b2a9

Browse files
committed
optimized sinf and cosf, ensured result is [-1,+1]. Optimized atan and atan2
math opt 3, sin, cos, atan, atan2 part 2
1 parent 60c3994 commit 184b2a9

File tree

9 files changed

+108
-138
lines changed

9 files changed

+108
-138
lines changed

src/libc/atan2f.c

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,18 @@
22
#include <math.h>
33
#include "__float32_constants.h"
44

5-
float _atan2f_c(float arg1, float arg2) {
6-
float satan(float);
7-
8-
if ((arg1+arg2)==arg1) {
9-
if (arg1 >= 0.0f) {
10-
return (F32_PI2);
11-
} else {
12-
return (-F32_PI2);
13-
}
14-
} else if (arg2 < 0.0f) {
15-
if(arg1 >= 0.0f) {
16-
return (F32_PI - satan(-arg1/arg2));
17-
} else {
18-
return (-F32_PI + satan(arg1/arg2));
19-
}
20-
} else if (arg1 > 0.0f) {
21-
return (satan(arg1/arg2));
22-
} else {
23-
return (-satan(-arg1/arg2));
5+
static float _positive_atan2f(float y, float x) {
6+
float _f32_satan(float);
7+
if ((y+x)==y) {
8+
return F32_PI2;
9+
} else if (signbit(x)) {
10+
return F32_PI - _f32_satan(-y / x);
2411
}
12+
return _f32_satan(y / x);
13+
}
14+
15+
float _atan2f_c(float y, float x) {
16+
return copysignf(_positive_atan2f(fabsf(y), x), y);
2517
}
2618

2719
double _atan2_c(double, double) __attribute__((alias("_atan2f_c")));

src/libc/atan2l.c

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,16 @@
22
#include <math.h>
33
#include "__float64_constants.h"
44

5-
long double atan2l(long double arg1, long double arg2) {
6-
long double f64_satan(long double);
7-
8-
if ((arg1+arg2)==arg1) {
9-
if (arg1 >= 0.0L) {
10-
return (F64_PI2);
11-
} else {
12-
return (-F64_PI2);
13-
}
14-
} else if (arg2 < 0.0L) {
15-
if(arg1 >= 0.0L) {
16-
return (F64_PI - f64_satan(-arg1/arg2));
17-
} else {
18-
return (-F64_PI + f64_satan(arg1/arg2));
19-
}
20-
} else if (arg1 > 0.0L) {
21-
return (f64_satan(arg1/arg2));
22-
} else {
23-
return (-f64_satan(-arg1/arg2));
5+
static long double _positive_atan2l(long double y, long double x) {
6+
long double _f64_satan(long double);
7+
if ((y+x)==y) {
8+
return F64_PI2;
9+
} else if (signbit(x)) {
10+
return F64_PI - _f64_satan(-y / x);
2411
}
12+
return _f64_satan(y / x);
13+
}
14+
15+
long double atan2l(long double y, long double x) {
16+
return copysignl(_positive_atan2l(fabsl(y), x), y);
2517
}

src/libc/atanf.c

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,8 @@
3838
* ulp of +4 at +0x1.a85846p-2
3939
*/
4040
float _atanf_c(float arg) {
41-
float satan(float);
42-
43-
if (signbit(arg)) {
44-
return (-satan(-arg));
45-
} else {
46-
return (satan(arg));
47-
}
41+
float _f32_satan(float);
42+
return copysignf(_f32_satan(fabsf(arg)), arg);
4843
}
4944

5045
double _atan_c(double) __attribute__((alias("_atanf_c")));
@@ -58,8 +53,7 @@ double _atan_c(double) __attribute__((alias("_atanf_c")));
5853
* xatan evaluates a series valid in the
5954
* range [-0.414...,+0.414...].
6055
*/
61-
62-
static float xatan(float arg) {
56+
static float _f32_xatan(float arg) {
6357
float argsq;
6458
float value;
6559

@@ -73,17 +67,16 @@ static float xatan(float arg) {
7367
* satan reduces its argument (known to be positive)
7468
* to the range [0,0.414...] and calls xatan.
7569
*/
76-
77-
float satan(float arg) {
70+
float _f32_satan(float arg) {
7871
if (arg < F32_SQRT2_MINUS_1) {
79-
return (xatan(arg));
72+
return (_f32_xatan(arg));
8073
} else if (arg > F32_SQRT2_PLUS_1) {
8174
if (arg > 0x1.0p+25f) {
8275
/* rounds to pi/2 */
8376
return F32_PI2;
8477
}
85-
return (F32_PI2 - xatan(1.0f/arg));
78+
return (F32_PI2 - _f32_xatan(1.0f/arg));
8679
} else {
87-
return (F32_PI4 + xatan((arg-1.0f)/(arg+1.0f)));
80+
return (F32_PI4 + _f32_xatan((arg-1.0f)/(arg+1.0f)));
8881
}
8982
}

src/libc/atanl.c

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,8 @@
3535
* 2^-46.95 at +2.438776493e+00
3636
*/
3737
long double atanl(long double arg) {
38-
long double f64_satan(long double);
39-
40-
if (signbit(arg)) {
41-
return (-f64_satan(-arg));
42-
} else {
43-
return (f64_satan(arg));
44-
}
38+
long double _f64_satan(long double);
39+
return copysignl(_f64_satan(fabsl(arg)), arg);
4540
}
4641

4742
/**
@@ -54,7 +49,7 @@ long double atanl(long double arg) {
5449
* range [-0.414...,+0.414...].
5550
*/
5651

57-
static long double f64_xatan(long double arg) {
52+
static long double _f64_xatan(long double arg) {
5853
long double argsq;
5954
long double value;
6055

@@ -69,16 +64,16 @@ static long double f64_xatan(long double arg) {
6964
* to the range [0,0.414...] and calls xatan.
7065
*/
7166

72-
long double f64_satan(long double arg) {
67+
long double _f64_satan(long double arg) {
7368
if (arg < F64_SQRT2_MINUS_1) {
74-
return f64_xatan(arg);
69+
return _f64_xatan(arg);
7570
} else if (arg > F64_SQRT2_PLUS_1) {
7671
if (arg > 0x1.0p+54L) {
7772
/* rounds to pi/2 */
7873
return F64_PI2;
7974
}
80-
return (F64_PI2 - f64_xatan(1.0L / arg));
75+
return (F64_PI2 - _f64_xatan(1.0L / arg));
8176
} else {
82-
return (F64_PI4 + f64_xatan((arg - 1.0L) / (arg + 1.0L)));
77+
return (F64_PI4 + _f64_xatan((arg - 1.0L) / (arg + 1.0L)));
8378
}
8479
}

src/libc/cosf.src

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,35 +10,20 @@ _cos := _cosf
1010

1111
else
1212

13+
; float _f32_sinus(int quad, float arg)
1314
_cos:
1415
_cosf:
1516
call __frameset0
16-
ld hl,(ix+6)
17-
ld e,(ix+9)
18-
ld bc,0
19-
xor a,a
17+
ld e, (ix + 9) ; exponent
18+
ld hl, (ix + 6) ; mantissa
19+
res 7, e ; fabsf(x)
2020
push de
2121
push hl
22-
call __fcmp
23-
pop bc
24-
pop de
25-
ld a,e
26-
jp p,l_1
27-
call __fneg
22+
scf ; quad 1, N reset, C set
23+
push af
24+
jp _sinf.hijack
2825

29-
l_1: ld hl,1
30-
push hl
31-
ld l,a
32-
push hl
33-
push bc
34-
call _sinus
35-
ld sp,ix
36-
pop ix
37-
ret
38-
39-
extern _sinus
4026
extern __frameset0
41-
extern __fcmp
42-
extern __fneg
27+
extern _sinf.hijack
4328

4429
end if

src/libc/floorf.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,16 @@
1010
float _floorf_c(float d) {
1111
float fraction;
1212

13-
if (d<0.0) {
13+
if (d < 0.0f) {
1414
d = -d;
15-
fraction = modff(d, &d);
16-
if (fraction != 0.0) {
17-
d += 1;
15+
fraction = modff(d, &d);
16+
if (fraction != 0.0f) {
17+
d += 1.0f;
1818
}
1919
d = -d;
20-
} else {
21-
fraction = modff(d, &d);
20+
return d;
2221
}
23-
return(d);
22+
return truncf(d);
2423
}
2524

2625
double _floor_c(double) __attribute__((alias("_floorf_c")));

src/libc/include/__math_def.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,9 @@ long lround(double);
230230
long lroundf(float);
231231
long lroundl(long double);
232232

233-
double modf(double, double *);
234-
float modff(float, float *);
235-
long double modfl(long double, long double *);
233+
double modf(double, double *) __attribute__((nonnull(2)));
234+
float modff(float, float *) __attribute__((nonnull(2)));
235+
long double modfl(long double, long double *) __attribute__((nonnull(2)));
236236

237237
double nan(const char *);
238238
float nanf(const char *);

src/libc/sinf.c

Lines changed: 15 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,53 +22,31 @@
2222
#define q2 0.946309610153821e4f
2323
#define q3 0.132653490878614e3f
2424

25-
float sinus(float arg, int quad)
26-
{
27-
float e, f;
28-
int k;
25+
/**
26+
* @remarks Minimum ulp:
27+
* ulp of -5 at +0x1.fe2dd0p-9 (2^-10 < |x| < pi/2)
28+
*
29+
* @note positive arguments only
30+
* @warning undefined behaviour if |x| > LONG_MAX
31+
*/
32+
float _f32_sinus(unsigned char quad, float x) {
33+
float x_trunc;
2934
float ysq;
30-
float x,y;
35+
float y;
3136
float temp1, temp2;
3237

33-
x = arg;
34-
if (x<0.0f) {
35-
x = -x;
36-
quad = quad + 2;
37-
}
38-
x = x * two_over_pi; /* underflow? */
39-
if (x > 32764.0f) {
40-
y = modff(x,&e);
41-
e = e + quad;
42-
modff(0.25f * e,&f);
43-
quad = e - 4.0f * f;
44-
} else {
45-
k = x;
46-
y = x - k;
47-
quad = (quad + k) & 0x3;
48-
}
38+
x = x * two_over_pi;
39+
y = modff(x, &x_trunc);
40+
quad = (quad + (unsigned char)x_trunc) & 0x3;
4941
if (quad & 0x1) {
5042
y = 1.0f - y;
5143
}
52-
if (quad > 1) {
44+
if (quad & 0x2) {
5345
y = -y;
5446
}
5547

56-
ysq = y*y;
48+
ysq = y * y;
5749
temp1 = ((((p4*ysq+p3)*ysq+p2)*ysq+p1)*ysq+p0)*y;
5850
temp2 = ((((ysq+q3)*ysq+q2)*ysq+q1)*ysq+q0);
5951
return(temp1/temp2);
6052
}
61-
62-
/**
63-
* @remarks Minimum ulp:
64-
* ulp of -5 at +0x1.fe2dd0p-9 (|x| < pi/2)
65-
*/
66-
float _sinf_c(float arg) {
67-
if (fabsf(arg) < 0x1.0p-11f) {
68-
return arg;
69-
}
70-
return sinus(arg, 0);
71-
}
72-
73-
74-
double _sin_c(double) __attribute__((alias("_sinf_c")));

src/libc/sinf.src

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
assume adl=1
22

33
section .text
4+
45
public _sinf
56
public _sin
67

@@ -11,10 +12,45 @@ _sin := _sinf
1112

1213
else
1314

14-
_sinf := __sinf_c
15-
_sin := __sin_c
15+
public _sinf.hijack
16+
17+
; float _f32_sinus(int quad, float arg)
18+
_sin:
19+
_sinf:
20+
call __frameset0
21+
ld e, (ix + 9) ; exponent
22+
ld hl, (ix + 6) ; mantissa
23+
ld a, e
24+
add a, a ; clear signbit
25+
sub a, 117 ; |x| < 2^-10 or 0x3affffff
26+
jr c, .small_arg
27+
ld a, e
28+
res 7, e ; x = fabsf(x)
29+
push de ; exponent
30+
push hl ; mantissa
31+
rlca
32+
add a, a
33+
ld e, a
34+
push de
35+
.hijack:
36+
call __f32_sinus
37+
.small_arg:
38+
ld sp, ix
39+
pop ix
40+
; you can ret here if clamping is not needed
41+
; clamp the result to [-1.0, +1.0]
42+
ld a, e
43+
add a, a
44+
sub a, 126
45+
ret nz ; |y| < 0.5f
46+
push hl
47+
add hl, hl
48+
pop hl
49+
ret nc ; |y| < 1.0f
50+
ld l, h ; zero out the lower 8 bits of the mantissa
51+
ret
1652

17-
extern __sinf_c
18-
extern __sin_c
53+
extern __frameset0
54+
extern __f32_sinus
1955

2056
end if

0 commit comments

Comments
 (0)