Skip to content

Commit b819c58

Browse files
committed
added erf(x::Float32) implementation
1 parent 3cee8ce commit b819c58

File tree

1 file changed

+50
-3
lines changed

1 file changed

+50
-3
lines changed

src/erf.jl

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ See also:
9797
[`erfinv(x)`](@ref erfinv), [`erfcinv(x)`](@ref erfcinv).
9898
9999
# Implementation by
100-
- `Float32`/`Float64`: Julia implementation of https://github.com/ARM-software/optimized-routines/blob/master/math/erf.c with modification
100+
- `Float32`: polynomial approximations of erf
101+
- `Float64`: Julia implementation of https://github.com/ARM-software/optimized-routines/blob/master/math/erf.c
101102
- `BigFloat`: C library for multiple-precision floating-point [MPFR](https://www.mpfr.org/)
102103
"""
103104

@@ -247,9 +248,55 @@ function _erf(x::Float64)
247248

248249
end
249250

250-
_erf(x::Float32)=Float32(_erf(Float64(x)))
251+
function _erf(x::Float32)
252+
xabs=abs(x)
251253

252-
_erf(x::Float16)=Float16(_erf(Float64(x)))
254+
if (xabs< 0.5)
255+
# range [0;0.5]
256+
# # erf approximation using erf(x)=x+x*P(x^2) with degree 6
257+
# Sollya::remez(erf(sqrt(x))/sqrt(x)-1,6,[1e-32;0.5],1,1e-32);
258+
259+
p=(0.128379167084072442015971708878153077794812875442854468252262977759538466466026108f0 ,-0.37612638677173360887708232154133809982218085424044576090554860522057182310851056f0 ,0.112837843774249243616280397250157777250507394914770984188915122197580134574091983f0 ,-2.68652865375831097248410803484628824158061934615600565461807890879374835870381413f-2 ,5.2188560068141289500478726517878958032487316358404909126782459380536157982980317f-3 ,-8.3948481565340715162646401327604259604058983841328238470236141987304564416187075f-4 )
260+
return copysign(fma(evalpoly(x^2,p),x,x),x)
261+
262+
263+
elseif(xabs<1.25)
264+
# range [0.5;1.25]
265+
# # erfc approximation with degree 11
266+
# Sollya::remez(erfc(x+0.5),11,[0;1.25-0.5],1,1e-32);
267+
268+
p=(0.47950012218627633266759004480928346323282597139306026470914603497045110845486174f0 ,-0.87878257867905935820797978014037919344903083907274027447236660730320123742574287f0 ,0.43939127340837007019212165648359529028950189241566682841945406944095273511955846f0 ,0.14646415648788187125149469711213532491861248937046075996222718522704324115126361f0 ,-0.18308466657525286729680773848609379638263457954975049708494183866284386220432776f0 ,-7.2864220919698610613876940798326935208926368515022370715399462640511442447273458f-3 ,4.9870470155967931820488830916894059744198728056328087545059900897748278238141407f-2 ,-4.8868244174386309272067176761136942655656143898910274020685204636212063119576779f-3 ,-1.1067663329874328692394302924580932358903686528252386541750628802256971128485086f-2 ,3.422346846104011114882331840937679141622349918363223030054135870224315763897819f-3 ,7.3027064271433070734900533407935697302293380332736344236451943042328241926267078f-4 ,-3.75817085020390291674591289563810393435170791862443379088867595819930602228183736f-4)
269+
return copysign(1f0-evalpoly(xabs-0.5f0,p),x)
270+
271+
272+
elseif(xabs<2.5)
273+
# range [1.25;2.5]
274+
# erfc approximation with degree 13
275+
# Sollya::remez(erfc(x+1.25),13,[1.25-1.25;2.5-1.25],1,1e-32);
276+
277+
p=(7.7099871743416328126356260566547494286908999516428835479787235777958146662395595f-2 ,-0.236521122401763914017434484798130978411662107330375092318490618856395191655478216f0,0.29565140031735793699573224918347147906505950384598003349376150036910634397925794f0 ,-0.1675357298956664512006671881841788827839151996664942041368823179421003600967294f0 ,6.1585929514334837247453472606059635765901344910897544510201614098797139034234648f-3 ,4.7187118101037459805673209483303989110029907488612006887134009408435903419517292f-2 ,-2.13310233800625824496628789189780598311678632855392393834221656949343403265787497f-2 ,-3.5262544206616132931628291419365561301271536625777609043061691511329815920675659f-3 ,5.4618311186313877025711961981462734764321122794521259327727086934478142942850845f-3 ,-4.785867288141399400801105017099155449896231371492841611827465311139149075249888f-4 ,-1.27638529129849191843323454760991411930129610353375308047589690520960413915824394f-3 ,7.3386942792237055752542546076918383420248583637581325208367761430501498054660812f-4 ,-1.78316578653402766600700240536421560298308014794597944402973993319799701405750455f-4 ,1.7451623823305231302742688272669315889569143556707803172195238203096188492888552f-5)
278+
return copysign(1f0-evalpoly(xabs-1.25f0,p),x)
279+
280+
281+
elseif (xabs<4.0)
282+
# range [2.5;4.0]
283+
# erfc approximation with degree 13
284+
# Sollya::remez(erfc(x+2.5),13,[0;4-2.5],1,1e-32);
285+
286+
p=(4.0695201730499156824979069048504659592517281936379802348048709133497117253173134f-4 ,-2.17828418992563158453260234674605666557610132217183615223667886784864793391972216f-3 ,5.4457086379161451966966115384340866958878328106709954104596733367011474611248579f-3 ,-8.3500528577413591092464067696190605106058226371648352228707616455735161644530442f-3 ,8.6220108431254600006776817786647116978577973677436276371791579294890933467761521f-3 ,-6.1151670580428489239821330412642128179703517796977571695036324539244245861906337f-3 ,2.7899458310817905157828387844634029030573127219171061924068857965376725014125688f-3 ,-5.1939500417227631511066424033635849427151215499427143347485235667556121428433924f-4 ,-3.0461048275173680879818513198875622675191891190039217228437020935156706025853604f-4 ,3.1068457426894474803882479013975662189036326925351567029013595934579911165234626f-4 ,-1.38668990773606991790025193904557899401436779771288081224199889420050208965460715f-4 ,3.6909690646602578301139502069782980693656088359691610474313757700491321631191208f-5 ,-5.6828887569336940854242289035551153938157998442490411471025066283558049904037765f-6 ,3.9297630357752347428080345871297027499036518175420666529979940028638158683656671f-7)
287+
return copysign(1f0-evalpoly(xabs-2.5f0,p),x)
288+
289+
290+
else
291+
# |x| >= 4.0.
292+
r=copysign(1f0,x)
293+
end
294+
return r
295+
296+
297+
end
298+
299+
_erf(x::Float16)=Float16(_erf(Float32(x)))
253300

254301

255302
function erf end

0 commit comments

Comments
 (0)