@@ -189,163 +189,18 @@ struct erf_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScala
189
189
{
190
190
static FloatingPoint __call (NBL_CONST_REF_ARG (FloatingPoint) _x)
191
191
{
192
- // glibc implementation
193
- const float64_t tiny = NBL_FP64_LITERAL (1e-300 ),
194
- one = NBL_FP64_LITERAL (1. 00000000000000000000e+00 ), /* 0x3FF00000, 0x00000000 */
195
- erx = NBL_FP64_LITERAL (8. 45062911510467529297e-01 ); /* 0x3FEB0AC1, 0x60000000 */
196
-
197
- // Coefficients for approximation to erf in [0,0.84375]
198
- const float64_t efx = NBL_FP64_LITERAL (1. 28379167095512586316e-01 ); /* 0x3FC06EBA, 0x8214DB69 */
199
- const float64_t pp0 = NBL_FP64_LITERAL (1. 28379167095512558561e-01 ); /* 0x3FC06EBA, 0x8214DB68 */
200
- const float64_t pp1 = NBL_FP64_LITERAL (-3. 25042107247001499370e-01 ); /* 0xBFD4CD7D, 0x691CB913 */
201
- const float64_t pp2 = NBL_FP64_LITERAL (-2. 84817495755985104766e-02 ); /* 0xBF9D2A51, 0xDBD7194F */
202
- const float64_t pp3 = NBL_FP64_LITERAL (-5. 77027029648944159157e-03 ); /* 0xBF77A291, 0x236668E4 */
203
- const float64_t pp4 = NBL_FP64_LITERAL (-2. 37630166566501626084e-05 ); /* 0xBEF8EAD6, 0x120016AC */
204
- const float64_t qq1 = NBL_FP64_LITERAL (3. 97917223959155352819e-01 ); /* 0x3FD97779, 0xCDDADC09 */
205
- const float64_t qq2 = NBL_FP64_LITERAL (6. 50222499887672944485e-02 ); /* 0x3FB0A54C, 0x5536CEBA */
206
- const float64_t qq3 = NBL_FP64_LITERAL (5. 08130628187576562776e-03 ); /* 0x3F74D022, 0xC4D36B0F */
207
- const float64_t qq4 = NBL_FP64_LITERAL (1. 32494738004321644526e-04 ); /* 0x3F215DC9, 0x221C1A10 */
208
- const float64_t qq5 = NBL_FP64_LITERAL (-3. 96022827877536812320e-06 ); /* 0xBED09C43, 0x42A26120 */
209
-
210
- //Coefficients for approximation to erf in [0.84375,1.25]
211
- const float64_t pa0 = NBL_FP64_LITERAL (-2. 36211856075265944077e-03 ); /* 0xBF6359B8, 0xBEF77538 */
212
- const float64_t pa1 = NBL_FP64_LITERAL (4. 14856118683748331666e-01 ); /* 0x3FDA8D00, 0xAD92B34D */
213
- const float64_t pa2 = NBL_FP64_LITERAL (-3. 72207876035701323847e-01 ); /* 0xBFD7D240, 0xFBB8C3F1 */
214
- const float64_t pa3 = NBL_FP64_LITERAL (3. 18346619901161753674e-01 ); /* 0x3FD45FCA, 0x805120E4 */
215
- const float64_t pa4 = NBL_FP64_LITERAL (-1. 10894694282396677476e-01 ); /* 0xBFBC6398, 0x3D3E28EC */
216
- const float64_t pa5 = NBL_FP64_LITERAL (3. 54783043256182359371e-02 ); /* 0x3FA22A36, 0x599795EB */
217
- const float64_t pa6 = NBL_FP64_LITERAL (-2. 16637559486879084300e-03 ); /* 0xBF61BF38, 0x0A96073F */
218
- const float64_t qa1 = NBL_FP64_LITERAL (1. 06420880400844228286e-01 ); /* 0x3FBB3E66, 0x18EEE323 */
219
- const float64_t qa2 = NBL_FP64_LITERAL (5. 40397917702171048937e-01 ); /* 0x3FE14AF0, 0x92EB6F33 */
220
- const float64_t qa3 = NBL_FP64_LITERAL (7. 18286544141962662868e-02 ); /* 0x3FB2635C, 0xD99FE9A7 */
221
- const float64_t qa4 = NBL_FP64_LITERAL (1. 26171219808761642112e-01 ); /* 0x3FC02660, 0xE763351F */
222
- const float64_t qa5 = NBL_FP64_LITERAL (1. 36370839120290507362e-02 ); /* 0x3F8BEDC2, 0x6B51DD1C */
223
- const float64_t qa6 = NBL_FP64_LITERAL (1. 19844998467991074170e-02 ); /* 0x3F888B54, 0x5735151D */
224
-
225
- // Coefficients for approximation to erfc in [1.25,1/0.35]
226
- const float64_t ra0 = NBL_FP64_LITERAL (-9. 86494403484714822705e-03 ); /* 0xBF843412, 0x600D6435 */
227
- const float64_t ra1 = NBL_FP64_LITERAL (-6. 93858572707181764372e-01 ); /* 0xBFE63416, 0xE4BA7360 */
228
- const float64_t ra2 = NBL_FP64_LITERAL (-1. 05586262253232909814e+01 ); /* 0xC0251E04, 0x41B0E726 */
229
- const float64_t ra3 = NBL_FP64_LITERAL (-6. 23753324503260060396e+01 ); /* 0xC04F300A, 0xE4CBA38D */
230
- const float64_t ra4 = NBL_FP64_LITERAL (-1. 62396669462573470355e+02 ); /* 0xC0644CB1, 0x84282266 */
231
- const float64_t ra5 = NBL_FP64_LITERAL (-1. 84605092906711035994e+02 ); /* 0xC067135C, 0xEBCCABB2 */
232
- const float64_t ra6 = NBL_FP64_LITERAL (-8. 12874355063065934246e+01 ); /* 0xC0545265, 0x57E4D2F2 */
233
- const float64_t ra7 = NBL_FP64_LITERAL (-9. 81432934416914548592e+00 ); /* 0xC023A0EF, 0xC69AC25C */
234
- const float64_t sa1 = NBL_FP64_LITERAL (1. 96512716674392571292e+01 ); /* 0x4033A6B9, 0xBD707687 */
235
- const float64_t sa2 = NBL_FP64_LITERAL (1. 37657754143519042600e+02 ); /* 0x4061350C, 0x526AE721 */
236
- const float64_t sa3 = NBL_FP64_LITERAL (4. 34565877475229228821e+02 ); /* 0x407B290D, 0xD58A1A71 */
237
- const float64_t sa4 = NBL_FP64_LITERAL (6. 45387271733267880336e+02 ); /* 0x40842B19, 0x21EC2868 */
238
- const float64_t sa5 = NBL_FP64_LITERAL (4. 29008140027567833386e+02 ); /* 0x407AD021, 0x57700314 */
239
- const float64_t sa6 = NBL_FP64_LITERAL (1. 08635005541779435134e+02 ); /* 0x405B28A3, 0xEE48AE2C */
240
- const float64_t sa7 = NBL_FP64_LITERAL (6. 57024977031928170135e+00 ); /* 0x401A47EF, 0x8E484A93 */
241
- const float64_t sa8 = NBL_FP64_LITERAL (-6. 04244152148580987438e-02 ); /* 0xBFAEEFF2, 0xEE749A62 */
242
-
243
- // Coefficients for approximation to erfc in [1/.35,28]
244
- const float64_t rb0 = NBL_FP64_LITERAL (-9. 86494292470009928597e-03 ); /* 0xBF843412, 0x39E86F4A */
245
- const float64_t rb1 = NBL_FP64_LITERAL (-7. 99283237680523006574e-01 ); /* 0xBFE993BA, 0x70C285DE */
246
- const float64_t rb2 = NBL_FP64_LITERAL (-1. 77579549177547519889e+01 ); /* 0xC031C209, 0x555F995A */
247
- const float64_t rb3 = NBL_FP64_LITERAL (-1. 60636384855821916062e+02 ); /* 0xC064145D, 0x43C5ED98 */
248
- const float64_t rb4 = NBL_FP64_LITERAL (-6. 37566443368389627722e+02 ); /* 0xC083EC88, 0x1375F228 */
249
- const float64_t rb5 = NBL_FP64_LITERAL (-1. 02509513161107724954e+03 ); /* 0xC0900461, 0x6A2E5992 */
250
- const float64_t rb6 = NBL_FP64_LITERAL (-4. 83519191608651397019e+02 ); /* 0xC07E384E, 0x9BDC383F */
251
- const float64_t sb1 = NBL_FP64_LITERAL (3. 03380607434824582924e+01 ); /* 0x403E568B, 0x261D5190 */
252
- const float64_t sb2 = NBL_FP64_LITERAL (3. 25792512996573918826e+02 ); /* 0x40745CAE, 0x221B9F0A */
253
- const float64_t sb3 = NBL_FP64_LITERAL (1. 53672958608443695994e+03 ); /* 0x409802EB, 0x189D5118 */
254
- const float64_t sb4 = NBL_FP64_LITERAL (3. 19985821950859553908e+03 ); /* 0x40A8FFB7, 0x688C246A */
255
- const float64_t sb5 = NBL_FP64_LITERAL (2. 55305040643316442583e+03 ); /* 0x40A3F219, 0xCEDF3BE6 */
256
- const float64_t sb6 = NBL_FP64_LITERAL (4. 74528541206955367215e+02 ); /* 0x407DA874, 0xE79FE763 */
257
- const float64_t sb7 = NBL_FP64_LITERAL (-2. 24409524465858183362e+01 ); /* 0xC03670E2, 0x42712D62 */
258
-
259
- float64_t x = float64_t (_x);
260
- int32_t hx, ix;
261
- float64_t s, y, z, r;
262
- hx = int32_t (bit_cast<uint64_t, float64_t>(x) >> 32 );
263
- ix = hx & 0x7fffffff ;
264
- if (ix >= 0x7ff00000 ) // erf(nan)=nan, erf(+-inf)=+-1
265
- {
266
- int32_t i = ((uint32_t)hx >> 31 ) << 1 ;
267
- return (float64_t)(1.0 - i) + one / x;
268
- }
269
-
270
- float64_t P, Q;
271
- if (ix < 0x3feb0000 ) // |x| < 0.84375
272
- {
273
- if (ix < 0x3e300000 ) // |x| < 2**-28
274
- {
275
- if (ix < 0x00800000 )
276
- {
277
- // avoid underflow
278
- return FloatingPoint (0.0625 * (16.0 * x + (16.0 * efx) * x));
279
- }
280
- return FloatingPoint (x + efx * x);
281
- }
282
- z = x * x;
283
- r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
284
- s = one + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
285
- y = r / s;
286
- return FloatingPoint (x + x * y);
287
- }
288
- if (ix < 0x3ff40000 ) // 0.84375 <= |x| < 1.25
289
- {
290
- s = abs_helper<float64_t>::__call (x) - one ;
291
- P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
292
- Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * (qa5 + s * qa6))))));
293
- if (hx >= 0 )
294
- return FloatingPoint (erx + P / Q);
295
- else
296
- return FloatingPoint (-erx - P / Q);
297
- }
298
- if (ix >= 0x40180000 ) // inf > |x| >= 6
299
- {
300
- if (hx >= 0 )
301
- return FloatingPoint (one - tiny);
302
- else
303
- return FloatingPoint (tiny - one );
304
- }
305
-
306
- x = abs_helper<float64_t>::__call (x);
307
- s = one / (x * x);
308
- float64_t R, S;
309
- if (ix < 0x4006DB6E ) // |x| < 1/0.35 ~2.85714
310
- {
311
- R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s * (ra5 + s * (ra6 + s * ra7))))));
312
- S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s * (sa5 + s * (sa6 + s * sa7))))));
313
- }
314
- else // |x| >= 1/0.35
315
- {
316
- R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * rb5))));
317
- S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s * (sb5 + s * (sb6 + s * sb7))))));
318
- }
319
- z = x;
320
- uint64_t z1 = bit_cast<uint64_t, float64_t>(x);
321
- z1 &= 0xffffffff00000000 ;
322
- z = bit_cast<float64_t, uint64_t>(z1);
323
- r = exp_helper<float64_t>::__call (-z * z - 0.5625 ) * exp_helper<float64_t>::__call ((z - x) * (z + x) + R / S);
324
- if (hx >= 0 )
325
- return FloatingPoint (one - r / x);
326
- else
327
- return FloatingPoint (r / x - one );
328
- }
329
- };
330
-
331
- template<>
332
- struct erf_helper<float32_t>
333
- {
334
- static float32_t __call (NBL_CONST_REF_ARG (float32_t) _x)
335
- {
336
- // A&S approximation to 1.5x10-7
337
- const float32_t a1 = float32_t (NBL_FP64_LITERAL (0.254829592 ));
338
- const float32_t a2 = float32_t (NBL_FP64_LITERAL (-0.284496736 ));
339
- const float32_t a3 = float32_t (NBL_FP64_LITERAL (1.421413741 ));
340
- const float32_t a4 = float32_t (NBL_FP64_LITERAL (-1.453152027 ));
341
- const float32_t a5 = float32_t (NBL_FP64_LITERAL (1.061405429 ));
342
- const float32_t p = float32_t (NBL_FP64_LITERAL (0.3275911 ));
343
-
344
- float32_t _sign = float32_t (sign (_x));
345
- float32_t x = abs (_x);
346
-
347
- float32_t t = float32_t (NBL_FP64_LITERAL (1.0 )) / (float32_t (NBL_FP64_LITERAL (1.0 )) + p * x);
348
- float32_t y = float32_t (NBL_FP64_LITERAL (1.0 )) - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp (-x * x);
192
+ const FloatingPoint a1 = FloatingPoint (NBL_FP64_LITERAL (0.254829592 ));
193
+ const FloatingPoint a2 = FloatingPoint (NBL_FP64_LITERAL (-0.284496736 ));
194
+ const FloatingPoint a3 = FloatingPoint (NBL_FP64_LITERAL (1.421413741 ));
195
+ const FloatingPoint a4 = FloatingPoint (NBL_FP64_LITERAL (-1.453152027 ));
196
+ const FloatingPoint a5 = FloatingPoint (NBL_FP64_LITERAL (1.061405429 ));
197
+ const FloatingPoint p = FloatingPoint (NBL_FP64_LITERAL (0.3275911 ));
198
+
199
+ FloatingPoint _sign = FloatingPoint (sign (_x));
200
+ FloatingPoint x = abs (_x);
201
+
202
+ FloatingPoint t = FloatingPoint (NBL_FP64_LITERAL (1.0 )) / (FloatingPoint (NBL_FP64_LITERAL (1.0 )) + p * x);
203
+ FloatingPoint y = FloatingPoint (NBL_FP64_LITERAL (1.0 )) - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp (-x * x);
349
204
350
205
return _sign * y;
351
206
}
0 commit comments