Fix issue with atan on x86

gklimowicz · gklimowicz · commit 39bd879da96c · 2019-08-29T16:16:36.000-07:00
Fixed issue with the new atan function where
passing the argument(-0.0) was failing to
return -0.0. Fixed both the scalar and vector
versions.

Also changed which scalar function we call for
double precision atan when using the relaxed
version. It turns out that the previous version
of scalar double precision atan function was
slightly faster than our new one, therefore
decided to make a way that it could still
be called.
diff --git a/runtime/libpgmath/lib/common/atan/atan_d_vec.h b/runtime/libpgmath/lib/common/atan/atan_d_vec.h
@@ -105,6 +105,7 @@ vdouble __attribute__((noinline)) atan_d_vec(vdouble const x) {
     
     vdouble result = vsel_vd_vo_vd_vd(f_big, result_f_big, result_not_f_big);
 
+    result = vreinterpret_vd_vm(vreinterpret_vm_vd(result) | vreinterpret_vm_vd(ans_sgn));
+
     return result;
 }
-
diff --git a/runtime/libpgmath/lib/common/atan/fd_atan_scalar.cpp b/runtime/libpgmath/lib/common/atan/fd_atan_scalar.cpp
@@ -92,6 +92,7 @@ double __attribute__((noinline)) atan_d_scalar(double x) {
 
     double result_d = FMA(x2 * xReduced, poly, xReduced);
 
+    result_d = copysign(result_d, x);
+
     return result_d;
 }
-
diff --git a/runtime/libpgmath/lib/common/atanf/atan_vec.h b/runtime/libpgmath/lib/common/atanf/atan_vec.h
@@ -72,5 +72,8 @@ vfloat __attribute__((noinline)) atan_vec(vfloat const x) {
 
     vfloat result = vsel_vf_vo_vf_vf(x_big, result_x_big, result_not_x_big);
 
+    //Make sure atanf(-0.0f) = -0.0f:
+    result = vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(result), vreinterpret_vm_vf(ans_sgn)));
+
     return result;
 }
diff --git a/runtime/libpgmath/lib/common/atanf/fs_atan_scalar.cpp b/runtime/libpgmath/lib/common/atanf/fs_atan_scalar.cpp
@@ -76,5 +76,8 @@ float __attribute__((noinline)) atan_scalar(const float x) {
 
     float result_d = FMAF(x2 * xReduced, poly, xReduced);
 
+    //This fixes atanf(-0.0) = -0.0, but doesn't slow down the code seemingly
+    result_d = copysignf(result_d, x);
+
     return result_d;
 }
diff --git a/runtime/libpgmath/lib/x86_64/math_tables/mth_atandefs.h b/runtime/libpgmath/lib/x86_64/math_tables/mth_atandefs.h
@@ -52,7 +52,7 @@ MTHINTRIN(atan , sv8m , avxfma4    , __fs_atan_8_mn        , __rs_atan_8_mn
 MTHINTRIN(atan , dv4m , avxfma4    , __fd_atan_4_mn        , __rd_atan_4_mn        , __pd_atan_4_mn        ,__math_dispatch_error)
 
 MTHINTRIN(atan , ss   , avx2       , __fs_atan_1_avx2      , __fs_atan_1_avx2      , __mth_i_atan_avx2     ,__math_dispatch_error)
-MTHINTRIN(atan , ds   , avx2       , __fd_atan_1_avx2      , __fd_atan_1_avx2      , __mth_i_datan_avx2    ,__math_dispatch_error)
+MTHINTRIN(atan , ds   , avx2       , __fd_atan_1_avx2      , __mth_i_datan_avx2    , __mth_i_datan_avx2    ,__math_dispatch_error)
 MTHINTRIN(atan , sv4  , avx2       , __fs_atan_4_avx2      , __fs_atan_4_avx2      , __gs_atan_4_p         ,__math_dispatch_error)
 MTHINTRIN(atan , dv2  , avx2       , __fd_atan_2_avx2      , __fd_atan_2_avx2      , __gd_atan_2_p         ,__math_dispatch_error)
 MTHINTRIN(atan , sv8  , avx2       , __fs_atan_8_avx2      , __fs_atan_8_avx2      , __gs_atan_8_p         ,__math_dispatch_error)
@@ -63,7 +63,7 @@ MTHINTRIN(atan , sv8m , avx2       , __fs_atan_8_mn        , __rs_atan_8_mn
 MTHINTRIN(atan , dv4m , avx2       , __fd_atan_4_mn        , __rd_atan_4_mn        , __pd_atan_4_mn        ,__math_dispatch_error)
 
 MTHINTRIN(atan , ss   , avx512knl  , __fs_atan_1_avx2      , __fs_atan_1_avx2      , __mth_i_atan_avx2     ,__math_dispatch_error)
-MTHINTRIN(atan , ds   , avx512knl  , __fd_atan_1_avx2      , __fd_atan_1_avx2      , __mth_i_datan_avx2    ,__math_dispatch_error)
+MTHINTRIN(atan , ds   , avx512knl  , __fd_atan_1_avx2      , __mth_i_datan_avx2    , __mth_i_datan_avx2    ,__math_dispatch_error)
 MTHINTRIN(atan , sv4  , avx512knl  , __fs_atan_4_avx2      , __fs_atan_4_avx2      , __gs_atan_4_p         ,__math_dispatch_error)
 MTHINTRIN(atan , dv2  , avx512knl  , __fd_atan_2_avx2      , __fd_atan_2_avx2      , __gd_atan_2_p         ,__math_dispatch_error)
 MTHINTRIN(atan , sv8  , avx512knl  , __fs_atan_8_avx2      , __fs_atan_8_avx2      , __gs_atan_8_p         ,__math_dispatch_error)
@@ -78,7 +78,7 @@ MTHINTRIN(atan , sv16m, avx512knl  , __fs_atan_16_mn       , __rs_atan_16_mn
 MTHINTRIN(atan , dv8m , avx512knl  , __fd_atan_8_mn        , __rd_atan_8_mn        , __pd_atan_8_mn        ,__math_dispatch_error)
 
 MTHINTRIN(atan , ss   , avx512     , __fs_atan_1_avx2      , __fs_atan_1_avx2      , __mth_i_atan_avx2     ,__math_dispatch_error)
-MTHINTRIN(atan , ds   , avx512     , __fd_atan_1_avx2      , __fd_atan_1_avx2      , __mth_i_datan_avx2    ,__math_dispatch_error)
+MTHINTRIN(atan , ds   , avx512     , __fd_atan_1_avx2      , __mth_i_datan_avx2    , __mth_i_datan_avx2    ,__math_dispatch_error)
 MTHINTRIN(atan , sv4  , avx512     , __fs_atan_4_avx2      , __fs_atan_4_avx2      , __gs_atan_4_p         ,__math_dispatch_error)
 MTHINTRIN(atan , dv2  , avx512     , __fd_atan_2_avx2      , __fd_atan_2_avx2      , __gd_atan_2_p         ,__math_dispatch_error)
 MTHINTRIN(atan , sv8  , avx512     , __fs_atan_8_avx2      , __fs_atan_8_avx2      , __gs_atan_8_p         ,__math_dispatch_error)

Original file line number	Diff line number	Diff line change
`@@ -105,6 +105,7 @@ vdouble __attribute__((noinline)) atan_d_vec(vdouble const x) {`
`105`	`105`
`106`	`106`	`vdouble result = vsel_vd_vo_vd_vd(f_big, result_f_big, result_not_f_big);`
`107`	`107`
	`108`	`+ result = vreinterpret_vd_vm(vreinterpret_vm_vd(result) \| vreinterpret_vm_vd(ans_sgn));`
	`109`	`+`
`108`	`110`	`return result;`
`109`	`111`	`}`
`110`		`-`
Original file line number	Diff line number	Diff line change
`@@ -92,6 +92,7 @@ double __attribute__((noinline)) atan_d_scalar(double x) {`
`92`	`92`
`93`	`93`	`double result_d = FMA(x2 * xReduced, poly, xReduced);`
`94`	`94`
	`95`	`+ result_d = copysign(result_d, x);`
	`96`	`+`
`95`	`97`	`return result_d;`
`96`	`98`	`}`
`97`		`-`
Original file line number	Diff line number	Diff line change
`@@ -72,5 +72,8 @@ vfloat __attribute__((noinline)) atan_vec(vfloat const x) {`
`72`	`72`
`73`	`73`	`vfloat result = vsel_vf_vo_vf_vf(x_big, result_x_big, result_not_x_big);`
`74`	`74`
	`75`	`+ //Make sure atanf(-0.0f) = -0.0f:`
	`76`	`+ result = vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(result), vreinterpret_vm_vf(ans_sgn)));`
	`77`	`+`
`75`	`78`	`return result;`
`76`	`79`	`}`
Original file line number	Diff line number	Diff line change
`@@ -76,5 +76,8 @@ float __attribute__((noinline)) atan_scalar(const float x) {`
`76`	`76`
`77`	`77`	`float result_d = FMAF(x2 * xReduced, poly, xReduced);`
`78`	`78`
	`79`	`+ //This fixes atanf(-0.0) = -0.0, but doesn't slow down the code seemingly`
	`80`	`+ result_d = copysignf(result_d, x);`
	`81`	`+`
`79`	`82`	`return result_d;`
`80`	`83`	`}`