1111#include " hdr/fenv_macros.h"
1212#include " src/__support/FPUtil/FEnvImpl.h"
1313#include " src/__support/FPUtil/FPBits.h"
14+ #include " src/__support/FPUtil/ManipulationFunctions.h"
1415#include " src/__support/FPUtil/PolyEval.h"
1516#include " src/__support/FPUtil/cast.h"
16- #include " src/__support/FPUtil/multiply_add.h"
17- #include " src/__support/FPUtil/sqrt.h"
17+ #include " src/__support/FPUtil/multiply_add.h" // to remove
1818#include " src/__support/macros/optimization.h"
1919
2020namespace LIBC_NAMESPACE_DECL {
@@ -40,7 +40,7 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
4040 if (LIBC_UNLIKELY (x_abs == 0x0 )) {
4141 fputil::raise_except_if_required (FE_DIVBYZERO);
4242 fputil::set_errno_if_required (ERANGE);
43- return FPBits::quiet_nan ( ).get_val ();
43+ return FPBits::inf (Sign::POS ).get_val ();
4444 }
4545
4646 // -inf <= x < 0
@@ -61,11 +61,36 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
6161 }
6262
6363 // x is valid, estimate the result
64- // 3-degree polynomial generated using Sollya
65- // P = fpminimax(1/sqrt(x), [|1, 2, 3|], [|SG...|], [0.5, 1]);
64+ // Range reduction:
65+ // x can be expressed as m*2^e, where e - int exponent and m - mantissa
66+ // rsqrtf16(x) = rsqrtf16(m*2^e)
67+ // rsqrtf16(m*2^e) = 1/sqrt(m) * 1/sqrt(2^e) = 1/sqrt(m) * 1/2^(e/2)
68+ // 1/sqrt(m) * 1/2^(e/2) = 1/sqrt(m) * 2^(-e/2)
69+
6670 float xf = x;
67- float result =
68- fputil::polyeval (xf, 0x1 .d42408p2f , -0x1 .7cc4fep3f, 0x1 .66cb6ap2f);
71+ int exponent;
72+ float mantissa = fputil::frexp (xf, exponent);
73+
74+ // 6-degree polynomial generated using Sollya
75+ // P = fpminimax(1/sqrt(x), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
76+ float interm =
77+ fputil::polyeval (mantissa, 0x1 .9c81c4p1f, -0x1 .e2c57cp2f , 0x1 .91e8bp3f,
78+ -0x1 .899954p3f, 0x1 .9edcp2f, -0x1 .6bd93cp0f);
79+
80+ // Round (-e/2)
81+ int exp_floored = -(exponent >> 1 );
82+
83+ // rsqrt(x) = 1/sqrt(mantissa) * 2^(-e/2)
84+ // rsqrt(x) = P(mantissa) * 2*(exp_floored)
85+ float result = fputil::ldexp (interm, exp_floored);
86+
87+ // Handle the case where exponent is odd
88+ if (exponent & 1 ) {
89+ const float ONE_OVER_SQRT2 =
90+ 0x1 .6a09e667f3bcc908b2fb1366ea957d3e3adec1751p-1f ;
91+ result *= ONE_OVER_SQRT2;
92+ }
93+
6994 return fputil::cast<float16>(result);
7095}
7196} // namespace LIBC_NAMESPACE_DECL
0 commit comments