Skip to content

Commit 39bd879

Browse files
committed
Fix issue with atan on x86
Fixed issue with the new atan function where passing the argument(-0.0) was failing to return -0.0. Fixed both the scalar and vector versions. Also changed which scalar function we call for double precision atan when using the relaxed version. It turns out that the previous version of scalar double precision atan function was slightly faster than our new one, therefore decided to make a way that it could still be called.
1 parent c22671b commit 39bd879

File tree

5 files changed

+13
-5
lines changed

5 files changed

+13
-5
lines changed

runtime/libpgmath/lib/common/atan/atan_d_vec.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ vdouble __attribute__((noinline)) atan_d_vec(vdouble const x) {
105105

106106
vdouble result = vsel_vd_vo_vd_vd(f_big, result_f_big, result_not_f_big);
107107

108+
result = vreinterpret_vd_vm(vreinterpret_vm_vd(result) | vreinterpret_vm_vd(ans_sgn));
109+
108110
return result;
109111
}
110-

runtime/libpgmath/lib/common/atan/fd_atan_scalar.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ double __attribute__((noinline)) atan_d_scalar(double x) {
9292

9393
double result_d = FMA(x2 * xReduced, poly, xReduced);
9494

95+
result_d = copysign(result_d, x);
96+
9597
return result_d;
9698
}
97-

runtime/libpgmath/lib/common/atanf/atan_vec.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,8 @@ vfloat __attribute__((noinline)) atan_vec(vfloat const x) {
7272

7373
vfloat result = vsel_vf_vo_vf_vf(x_big, result_x_big, result_not_x_big);
7474

75+
//Make sure atanf(-0.0f) = -0.0f:
76+
result = vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(result), vreinterpret_vm_vf(ans_sgn)));
77+
7578
return result;
7679
}

runtime/libpgmath/lib/common/atanf/fs_atan_scalar.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,8 @@ float __attribute__((noinline)) atan_scalar(const float x) {
7676

7777
float result_d = FMAF(x2 * xReduced, poly, xReduced);
7878

79+
//This fixes atanf(-0.0) = -0.0, but doesn't slow down the code seemingly
80+
result_d = copysignf(result_d, x);
81+
7982
return result_d;
8083
}

runtime/libpgmath/lib/x86_64/math_tables/mth_atandefs.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ MTHINTRIN(atan , sv8m , avxfma4 , __fs_atan_8_mn , __rs_atan_8_mn
5252
MTHINTRIN(atan , dv4m , avxfma4 , __fd_atan_4_mn , __rd_atan_4_mn , __pd_atan_4_mn ,__math_dispatch_error)
5353

5454
MTHINTRIN(atan , ss , avx2 , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
55-
MTHINTRIN(atan , ds , avx2 , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
55+
MTHINTRIN(atan , ds , avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
5656
MTHINTRIN(atan , sv4 , avx2 , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
5757
MTHINTRIN(atan , dv2 , avx2 , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
5858
MTHINTRIN(atan , sv8 , avx2 , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)
@@ -63,7 +63,7 @@ MTHINTRIN(atan , sv8m , avx2 , __fs_atan_8_mn , __rs_atan_8_mn
6363
MTHINTRIN(atan , dv4m , avx2 , __fd_atan_4_mn , __rd_atan_4_mn , __pd_atan_4_mn ,__math_dispatch_error)
6464

6565
MTHINTRIN(atan , ss , avx512knl , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
66-
MTHINTRIN(atan , ds , avx512knl , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
66+
MTHINTRIN(atan , ds , avx512knl , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
6767
MTHINTRIN(atan , sv4 , avx512knl , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
6868
MTHINTRIN(atan , dv2 , avx512knl , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
6969
MTHINTRIN(atan , sv8 , avx512knl , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)
@@ -78,7 +78,7 @@ MTHINTRIN(atan , sv16m, avx512knl , __fs_atan_16_mn , __rs_atan_16_mn
7878
MTHINTRIN(atan , dv8m , avx512knl , __fd_atan_8_mn , __rd_atan_8_mn , __pd_atan_8_mn ,__math_dispatch_error)
7979

8080
MTHINTRIN(atan , ss , avx512 , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
81-
MTHINTRIN(atan , ds , avx512 , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
81+
MTHINTRIN(atan , ds , avx512 , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
8282
MTHINTRIN(atan , sv4 , avx512 , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
8383
MTHINTRIN(atan , dv2 , avx512 , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
8484
MTHINTRIN(atan , sv8 , avx512 , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)

0 commit comments

Comments
 (0)