@@ -97,36 +97,14 @@ See also:
9797[`erfinv(x)`](@ref erfinv), [`erfcinv(x)`](@ref erfcinv).
9898
9999# Implementation by
100- - `Float32`/`Float64`: Julia implementation of https://github.com/ARM-software/optimized-routines/blob/master/math/erf.c
100+ - `Float32`/`Float64`: Julia implementation of https://github.com/ARM-software/optimized-routines/blob/master/math/erf.c with modification
101101- `BigFloat`: C library for multiple-precision floating-point [MPFR](https://www.mpfr.org/)
102102"""
103103
104104# Fast erf implementation using a mix of
105105# rational and polynomial approximations.
106106# Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0.
107107function _erf (x:: Float64 )
108- # Minimax approximation of erf
109- PA= (0x1 .06 eba8214db68p- 3 , - 0x1 .812746 b037948p- 2 , 0x1 . ce2f21a03872p- 4 ,- 0x1 . b82ce30e6548p- 6 , 0x1 .565 bcc360a2f2p- 8 , - 0x1 . c02d812bc979ap- 11 ,0x1 . f99bddfc1ebe9p- 14 , - 0x1 . f42c457cee912p- 17 , 0x1 . b0e414ec20ee9p- 20 ,- 0x1 .18 c47fd143c5ep- 23 )
110- # Rational approximation on [0x1p-28, 0.84375]
111- NA= (0x1 .06 eba8214db68p- 3 , - 0x1 .4 cd7d691cb913p- 2 , - 0x1 . d2a51dbd7194fp- 6 ,- 0x1 .7 a291236668e4p- 8 , - 0x1 .8 ead6120016acp- 16 )
112- DA= (0x1 .97779 cddadc09p- 2 , 0x1 .0 a54c5536cebap- 4 , 0x1 .4 d022c4d36b0fp- 8 ,0x1 .15 dc9221c1a1p- 13 , - 0x1 .09 c4342a2612p- 18 )
113- # Rational approximation on [0.84375, 1.25]
114- NB= ( - 0x1 .359 b8bef77538p- 9 , 0x1 . a8d00ad92b34dp- 2 , - 0x1 .7 d240fbb8c3f1p- 2 , 0x1 .45 fca805120e4p- 2 , - 0x1 . c63983d3e28ecp- 4 , 0x1 .22 a36599795ebp- 5 , - 0x1 .1 bf380a96073fp- 9 )
115- DB= ( 0x1 . b3e6618eee323p- 4 , 0x1 .14 af092eb6f33p- 1 , 0x1 .2635 cd99fe9a7p- 4 , 0x1 .02660e763351 fp- 3 , 0x1 . bedc26b51dd1cp- 7 , 0x1 .88 b545735151dp- 7 )
116-
117- # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25
118- PC= ( 0x1 .3 bcd133aa0ffcp- 4 , - 0x1 . e4652fadcb702p- 3 , 0x1 .2 ebf3dcca0446p- 2 , - 0x1 .571 d01c62d66p- 3 , 0x1 .93 a9a8f5b3413p- 8 , 0x1 .8281 cbcc2cd52p- 5 , - 0x1 .5 cffd86b4de16p- 6 , - 0x1 . db4ccf595053ep- 9 , 0x1 .757 cbf8684edap- 8 , - 0x1 . ce7dfd2a9e56ap- 11 , - 0x1 .99 ee3bc5a3263p- 11 , 0x1 .3 c57cf9213f5fp- 12 , 0x1 .60692996 bf254p- 14 , - 0x1 .6e44 cb7c1fa2ap- 14 , 0x1 .9 d4484ac482b2p- 16 , - 0x1 .578 c9e375d37p- 19 )
119- # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2
120- PD= ( 0x1 .328f5 ec350e5p- 8 , - 0x1 .529 b9e8cf8e99p- 5 , 0x1 .529 b9e8cd9e71p- 3 , - 0x1 .8 b0ae3a023bf2p- 2 , 0x1 .1 a2c592599d82p- 1 , - 0x1 . ace732477e494p- 2 , - 0x1 . e1a06a27920ffp- 6 , 0x1 . bae92a6d27af6p- 2 , - 0x1 . a15470fcf5ce7p- 2 , 0x1 . bafe45d18e213p- 6 , 0x1 .0 d950680d199ap- 2 , - 0x1 .8 c9481e8f22e3p- 3 , - 0x1 .158450 ed5c899p- 4 , 0x1 . c01f2973b44p- 3 , - 0x1 .73 ed2827546a7p- 3 , 0x1 .47733687 d1ff7p- 4 , - 0x1 .2 dec70d00b8e1p- 6 , 0x1 . a947ab83cd4fp- 10 )
121- # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25
122- PE= ( 0x1 .20 c13035539e4p- 18 , - 0x1 . e9b5e8d16df7ep- 16 , 0x1 .8 de3cd4733bf9p- 14 , - 0x1 .9 aa48beb8382fp- 13 , 0x1 .2 c7d713370a9fp- 12 , - 0x1 .490 b12110b9e2p- 12 , 0x1 .1459 c5d989d23p- 12 , - 0x1 .64 b28e9f1269p- 13 , 0x1 .57 c76d9d05cf8p- 14 , - 0x1 . bf271d9951cf8p- 16 , 0x1 . db7ea4d4535c9p- 19 , 0x1 .91 c2e102d5e49p- 20 , - 0x1 . e9f0826c2149ep- 21 , 0x1 .60 eebaea236e1p- 23 )
123- # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4
124- PF= ( 0x1 .08 ddd130d1fa6p- 26 , - 0x1 .10 b146f59ff06p- 22 , 0x1 .10 b135328b7b2p- 19 , - 0x1 .6039988e7575 fp- 17 , 0x1 .497 d365e19367p- 15 , - 0x1 . da48d9afac83ep- 14 , 0x1 .1024 c9b1fbb48p- 12 , - 0x1 . fc962e7066272p- 12 , 0x1 .87297282 d4651p- 11 , - 0x1 . f057b255f8c59p- 11 , 0x1 .0228 d0eee063p- 10 , - 0x1 . b1b21b84ec41cp- 11 , 0x1 .1 ead8ae9e1253p- 11 , - 0x1 .1e708 fba37fccp- 12 , 0x1 .9559363991 edap- 14 , - 0x1 .68 c827b783d9cp- 16 , 0x1 .2 ec4adeccf4a2p- 19 )
125-
126- C = 0x1 . b0ac16p- 1
127-
128- TwoOverSqrtPiMinusOne= 0x1 .06 eba8214db69p- 3
129-
130108
131109 # # top 32 bits
132110 ix:: UInt32 = reinterpret (UInt64,x)>> 32
@@ -146,52 +124,40 @@ function _erf(x::Float64)
146124
147125 if (ia < 0x3fe00000 )
148126 # # a < 0.5 - Use polynomial approximation.
149- r1 = fma (x2, PA[2 ], PA[1 ])
150- r2 = fma (x2, PA[4 ], PA[3 ])
151- r3 = fma (x2, PA[6 ], PA[5 ])
152- r4 = fma (x2, PA[8 ], PA[7 ])
153- r5 = fma (x2, PA[10 ], PA[9 ])
154-
155- x4 = x2 * x2
156- r = r5
157- r = fma (x4, r, r4)
158- r = fma (x4, r, r3)
159- r = fma (x4, r, r2)
160- r = fma (x4, r, r1)
161- return fma (r, x, x) # # This fma is crucial for accuracy.
127+
128+ # Minimax approximation of erf of the form x*P(x^2) approximately on the interval [0;0.5]
129+ PA= (0.12837916709551256 , - 0.3761263890318287 , 0.11283791670896592 , - 0.026866170630075903 , 0.005223977428649761 , - 0.0008548312229989974 , 0.00012054654502151287 , - 1.4906315067498891e-5 , 1.6126444349070824e-6 , - 1.3074259056679966e-7 )
130+
131+ r= fma (x,evalpoly (x2,PA),x) # # This fma is crucial for accuracy.
132+
133+ return r
162134 else
163135 # # 0.5 <= a < 0.84375 - Use rational approximation.
164136
165- r1n = fma (x2, NA[2 ], NA[1 ])
166- x4 = x2 * x2
167- r2n = fma (x2, NA[4 ], NA[3 ])
168- x8 = x4 * x4
169- r1d = fma (x2, DA[1 ], 1.0 )
170- r2d = fma (x2, DA[3 ], DA[2 ])
171- r3d = fma (x2, DA[5 ], DA[4 ])
172- P = r1n + x4 * r2n + x8 * NA[5 ]
137+ # Rational approximation on [0x1p-28, 0.84375]
138+ NA= (0x1 .06 eba8214db68p- 3 , - 0x1 .4 cd7d691cb913p- 2 , - 0x1 . d2a51dbd7194fp- 6 ,- 0x1 .7 a291236668e4p- 8 , - 0x1 .8 ead6120016acp- 16 )
139+ DA= (1 ,0x1 .97779 cddadc09p- 2 , 0x1 .0 a54c5536cebap- 4 , 0x1 .4 d022c4d36b0fp- 8 ,0x1 .15 dc9221c1a1p- 13 , - 0x1 .09 c4342a2612p- 18 )
140+
141+ P= evalpoly (x2,NA)
142+ Q= evalpoly (x2,DA)
173143
174- Q = r1d + x4 * r2d + x8 * r3d
175144 return fma (P / Q, x, x)
176145 end
177146 elseif (ia < 0x3ff40000 )
178147 # # 0.84375 <= |x| < 1.25.
179148
149+ # Rational approximation on [0.84375, 1.25]
150+ NB= ( - 0x1 .359 b8bef77538p- 9 , 0x1 . a8d00ad92b34dp- 2 , - 0x1 .7 d240fbb8c3f1p- 2 , 0x1 .45 fca805120e4p- 2 , - 0x1 . c63983d3e28ecp- 4 , 0x1 .22 a36599795ebp- 5 , - 0x1 .1 bf380a96073fp- 9 )
151+ DB= ( 1 , 0x1 . b3e6618eee323p- 4 , 0x1 .14 af092eb6f33p- 1 , 0x1 .2635 cd99fe9a7p- 4 , 0x1 .02660e763351 fp- 3 , 0x1 . bedc26b51dd1cp- 7 , 0x1 .88 b545735151dp- 7 )
152+
153+ C = 0x1 . b0ac16p- 1
154+
180155 a = abs (x) - 1.0
181- r1n = fma (a, NB[2 ], NB[1 ])
182- a2 = a * a
183- r1d = fma (a, DB[1 ], 1.0 )
184- a4 = a2 * a2
185- r2n = fma (a, NB[4 ], NB[3 ])
186- a6 = a4 * a2
187- r2d = fma (a, DB[3 ], DB[2 ])
188- r3n = fma (a, NB[6 ], NB[5 ])
189- r3d = fma (a, DB[5 ], DB[4 ])
190- r4n = NB[7 ]
191- r4d = DB[6 ]
192-
193- P = r1n + a2 * r2n + a4 * r3n + a6 * r4n
194- Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d
156+
157+ P= evalpoly (a,NB)
158+ Q= evalpoly (a,DB)
159+
160+
195161 if (sign)
196162 return - C - P / Q
197163 else
@@ -201,27 +167,13 @@ function _erf(x::Float64)
201167 # # 1.25 <= |x| < 2.0.
202168 a = abs (x)
203169 a = a - 1.25
170+
171+ # erfc polynomial approximation
172+ # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25
173+ PC= ( 0x1 .3 bcd133aa0ffcp- 4 , - 0x1 . e4652fadcb702p- 3 , 0x1 .2 ebf3dcca0446p- 2 , - 0x1 .571 d01c62d66p- 3 , 0x1 .93 a9a8f5b3413p- 8 , 0x1 .8281 cbcc2cd52p- 5 , - 0x1 .5 cffd86b4de16p- 6 , - 0x1 . db4ccf595053ep- 9 , 0x1 .757 cbf8684edap- 8 , - 0x1 . ce7dfd2a9e56ap- 11 , - 0x1 .99 ee3bc5a3263p- 11 , 0x1 .3 c57cf9213f5fp- 12 , 0x1 .60692996 bf254p- 14 , - 0x1 .6e44 cb7c1fa2ap- 14 , 0x1 .9 d4484ac482b2p- 16 , - 0x1 .578 c9e375d37p- 19 )
204174
205- r1 = fma (a, PC[2 ], PC[1 ])
206- r2 = fma (a, PC[4 ], PC[3 ])
207- r3 = fma (a, PC[6 ], PC[5 ])
208- r4 = fma (a, PC[8 ], PC[7 ])
209- r5 = fma (a, PC[10 ], PC[9 ])
210- r6 = fma (a, PC[12 ], PC[11 ])
211- r7 = fma (a, PC[14 ], PC[13 ])
212- r8 = fma (a, PC[16 ], PC[15 ])
213-
214-
215- a2 = a * a
216-
217- r = r8
218- r = fma (a2, r, r7)
219- r = fma (a2, r, r6)
220- r = fma (a2, r, r5)
221- r = fma (a2, r, r4)
222- r = fma (a2, r, r3)
223- r = fma (a2, r, r2)
224- r = fma (a2, r, r1)
175+ # Obtains erfc of |x|
176+ r= evalpoly (a,PC)
225177
226178 if (sign)
227179 return - 1.0 + r
@@ -233,27 +185,13 @@ function _erf(x::Float64)
233185 a = abs (x)
234186 a = fma (0.5 , a, - 1.0 )
235187
236- r1 = fma (a, PD[2 ], PD[1 ])
237- r2 = fma (a, PD[4 ], PD[3 ])
238- r3 = fma (a, PD[6 ], PD[5 ])
239- r4 = fma (a, PD[8 ], PD[7 ])
240- r5 = fma (a, PD[10 ], PD[9 ])
241- r6 = fma (a, PD[12 ], PD[11 ])
242- r7 = fma (a, PD[14 ], PD[13 ])
243- r8 = fma (a, PD[16 ], PD[15 ])
244- r9 = fma (a, PD[18 ], PD[17 ])
245-
246- a2 = a * a
247-
248- r = r9
249- r = fma (a2, r, r8)
250- r = fma (a2, r, r7)
251- r = fma (a2, r, r6)
252- r = fma (a2, r, r5)
253- r = fma (a2, r, r4)
254- r = fma (a2, r, r3)
255- r = fma (a2, r, r2)
256- r = fma (a2, r, r1)
188+ # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2
189+ PD= ( 0x1 .328f5 ec350e5p- 8 , - 0x1 .529 b9e8cf8e99p- 5 , 0x1 .529 b9e8cd9e71p- 3 , - 0x1 .8 b0ae3a023bf2p- 2 , 0x1 .1 a2c592599d82p- 1 , - 0x1 . ace732477e494p- 2 , - 0x1 . e1a06a27920ffp- 6 , 0x1 . bae92a6d27af6p- 2 , - 0x1 . a15470fcf5ce7p- 2 , 0x1 . bafe45d18e213p- 6 , 0x1 .0 d950680d199ap- 2 , - 0x1 .8 c9481e8f22e3p- 3 , - 0x1 .158450 ed5c899p- 4 , 0x1 . c01f2973b44p- 3 , - 0x1 .73 ed2827546a7p- 3 , 0x1 .47733687 d1ff7p- 4 , - 0x1 .2 dec70d00b8e1p- 6 , 0x1 . a947ab83cd4fp- 10 )
190+
191+
192+ # Obtains erfc of |x|
193+ r= evalpoly (a,PD)
194+
257195
258196 if (sign)
259197 return - 1.0 + r
@@ -265,24 +203,11 @@ function _erf(x::Float64)
265203 a = abs (x)
266204 a = a - 3.25
267205
268- r1 = fma (a, PE[2 ], PE[1 ])
269- r2 = fma (a, PE[4 ], PE[3 ])
270- r3 = fma (a, PE[6 ], PE[5 ])
271- r4 = fma (a, PE[8 ], PE[7 ])
272- r5 = fma (a, PE[10 ], PE[9 ])
273- r6 = fma (a, PE[12 ], PE[11 ])
274- r7 = fma (a, PE[14 ], PE[13 ])
275-
206+ # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25
207+ PE= ( 0x1 .20 c13035539e4p- 18 , - 0x1 . e9b5e8d16df7ep- 16 , 0x1 .8 de3cd4733bf9p- 14 , - 0x1 .9 aa48beb8382fp- 13 , 0x1 .2 c7d713370a9fp- 12 , - 0x1 .490 b12110b9e2p- 12 , 0x1 .1459 c5d989d23p- 12 , - 0x1 .64 b28e9f1269p- 13 , 0x1 .57 c76d9d05cf8p- 14 , - 0x1 . bf271d9951cf8p- 16 , 0x1 . db7ea4d4535c9p- 19 , 0x1 .91 c2e102d5e49p- 20 , - 0x1 . e9f0826c2149ep- 21 , 0x1 .60 eebaea236e1p- 23 )
276208
277- a2 = a * a
209+ r = evalpoly (a,PE)
278210
279- r = r7
280- r = fma (a2, r, r6)
281- r = fma (a2, r, r5)
282- r = fma (a2, r, r4)
283- r = fma (a2, r, r3)
284- r = fma (a2, r, r2)
285- r = fma (a2, r, r1)
286211
287212 if (sign)
288213 return - 1.0 + r
@@ -294,28 +219,11 @@ function _erf(x::Float64)
294219 a = abs (x)
295220 a = fma (0.5 , a, - 2.0 )
296221
297- r1 = fma (a, PF[2 ], PF[1 ])
298- r2 = fma (a, PF[4 ], PF[3 ])
299- r3 = fma (a, PF[6 ], PF[5 ])
300- r4 = fma (a, PF[8 ], PF[7 ])
301- r5 = fma (a, PF[10 ], PF[9 ])
302- r6 = fma (a, PF[12 ], PF[11 ])
303- r7 = fma (a, PF[14 ], PF[13 ])
304- r8 = fma (a, PF[16 ], PF[15 ])
305-
306- r9 = PF[17 ]
307-
308- a2 = a * a
309-
310- r = r9
311- r = fma (a2, r, r8)
312- r = fma (a2, r, r7)
313- r = fma (a2, r, r6)
314- r = fma (a2, r, r5)
315- r = fma (a2, r, r4)
316- r = fma (a2, r, r3)
317- r = fma (a2, r, r2)
318- r = fma (a2, r, r1)
222+ # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4
223+ PF= ( 0x1 .08 ddd130d1fa6p- 26 , - 0x1 .10 b146f59ff06p- 22 , 0x1 .10 b135328b7b2p- 19 , - 0x1 .6039988e7575 fp- 17 , 0x1 .497 d365e19367p- 15 , - 0x1 . da48d9afac83ep- 14 , 0x1 .1024 c9b1fbb48p- 12 , - 0x1 . fc962e7066272p- 12 , 0x1 .87297282 d4651p- 11 , - 0x1 . f057b255f8c59p- 11 , 0x1 .0228 d0eee063p- 10 , - 0x1 . b1b21b84ec41cp- 11 , 0x1 .1 ead8ae9e1253p- 11 , - 0x1 .1e708 fba37fccp- 12 , 0x1 .9559363991 edap- 14 , - 0x1 .68 c827b783d9cp- 16 , 0x1 .2 ec4adeccf4a2p- 19 )
224+
225+ r= evalpoly (a,PF)
226+
319227
320228 if (sign)
321229 return - 1.0 + r
0 commit comments