-
Notifications
You must be signed in to change notification settings - Fork 104
Added erf(x) Float64 and Float32 Julia implementations #491
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
6f554ef
7f4fd2d
e784c9f
da16cb1
0a755b6
6efcec8
0fc6d4d
3cee8ce
b819c58
57bbaf2
5ad8278
26b3b1f
693f788
642c1de
2911312
bba5f9b
c2a37a1
1d45644
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -111,12 +111,6 @@ function _erf(x::Float64) | |
ix::UInt32=reinterpret(UInt64,x)>>32 | ||
# # top 32, without sign bit | ||
ia::UInt32=ix & 0x7fffffff | ||
# # sign | ||
# sign::UInt32=ix>>31 | ||
|
||
sign::Bool=x<0 | ||
|
||
|
||
|
||
if (ia < 0x3feb0000) | ||
# a = |x| < 0.84375. | ||
|
@@ -130,7 +124,6 @@ function _erf(x::Float64) | |
PA=(0.12837916709551256, -0.3761263890318287, 0.11283791670896592, -0.026866170630075903, 0.005223977428649761, -0.0008548312229989974, 0.00012054654502151287, -1.4906315067498891e-5, 1.6126444349070824e-6, -1.3074259056679966e-7) | ||
|
||
r= fma(x,evalpoly(x2,PA),x) ## This fma is crucial for accuracy. | ||
|
||
return r | ||
else | ||
## 0.5 <= a < 0.84375 - Use rational approximation. | ||
|
@@ -158,12 +151,9 @@ function _erf(x::Float64) | |
P=evalpoly(a,NB) | ||
Q=evalpoly(a,DB) | ||
|
||
r= C + P / Q | ||
return copysign(r,x) | ||
|
||
if (sign) | ||
return -C - P / Q | ||
else | ||
return C + P / Q | ||
end | ||
elseif (ia < 0x40000000) | ||
## 1.25 <= |x| < 2.0. | ||
a = abs(x) | ||
|
@@ -174,13 +164,9 @@ function _erf(x::Float64) | |
PC=( 0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2, -0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5, -0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8, -0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12, 0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16, -0x1.578c9e375d37p-19) | ||
|
||
# Obtains erfc of |x| | ||
r=evalpoly(a,PC) | ||
r=1.0-evalpoly(a,PC) | ||
return copysign(r,x) | ||
|
||
if (sign) | ||
return -1.0 + r | ||
else | ||
return 1.0 - r | ||
end | ||
elseif (ia < 0x400a0000) | ||
## 2 <= |x| < 3.25. | ||
a = abs(x) | ||
|
@@ -189,16 +175,10 @@ function _erf(x::Float64) | |
# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2 | ||
PD=( 0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3, -0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2, -0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2, 0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3, -0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3, 0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10 ) | ||
|
||
|
||
# Obtains erfc of |x| | ||
r=evalpoly(a,PD) | ||
r=1.0-evalpoly(a,PD) | ||
return copysign(r,x) | ||
|
||
|
||
if (sign) | ||
return -1.0 + r | ||
else | ||
return 1.0 - r | ||
end | ||
elseif (ia < 0x40100000) | ||
## 3.25 <= |x| < 4.0. | ||
a = abs(x) | ||
|
@@ -207,14 +187,9 @@ function _erf(x::Float64) | |
# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25 | ||
PE=( 0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14, -0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12, 0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14, -0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20, -0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23 ) | ||
|
||
r=evalpoly(a,PE) | ||
|
||
r=1.0-evalpoly(a,PE) | ||
return copysign(r,x) | ||
|
||
if (sign) | ||
return -1.0 + r | ||
else | ||
return 1.0 - r | ||
end | ||
elseif (ia < 0x4017a000) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any chance you can regenerate the polies to make this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would that have a meaningful impact? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably not much. It might just save a cycle or 2. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you wanted to test the speed, you could try it without regenerating the polys and it should give you a good idea. |
||
## 4 <= |x| < 5.90625. | ||
a = abs(x) | ||
|
@@ -223,31 +198,19 @@ function _erf(x::Float64) | |
# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4 | ||
PF=( 0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19, -0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14, 0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11, -0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11, 0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14, -0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19 ) | ||
|
||
r=evalpoly(a,PF) | ||
|
||
|
||
if (sign) | ||
return -1.0 + r | ||
else | ||
return 1.0 - r | ||
end | ||
r=1.0-evalpoly(a,PF) | ||
return copysign(r,x) | ||
else | ||
|
||
if(isnan(x)) | ||
return NaN | ||
end | ||
|
||
if (sign) | ||
return -1.0 | ||
else | ||
return 1.0 | ||
end | ||
|
||
copysign(1.0,x) | ||
end | ||
|
||
|
||
end | ||
|
||
# Fast erf implementation using | ||
# polynomial approximations of erf and erfc. | ||
# Highest measured error is 1.12 ULPs at x = 1.232469 | ||
function _erf(x::Float32) | ||
AhmedYKadah marked this conversation as resolved.
Show resolved
Hide resolved
|
||
xabs=abs(x) | ||
|
||
|
@@ -256,47 +219,38 @@ function _erf(x::Float32) | |
# # erf approximation using erf(x)=x+x*P(x^2) with degree 6 | ||
# Sollya::remez(erf(sqrt(x))/sqrt(x)-1,6,[1e-32;0.5],1,1e-32); | ||
|
||
p=(0.128379167084072442015971708878153077794812875442854468252262977759538466466026108f0 ,-0.37612638677173360887708232154133809982218085424044576090554860522057182310851056f0 ,0.112837843774249243616280397250157777250507394914770984188915122197580134574091983f0 ,-2.68652865375831097248410803484628824158061934615600565461807890879374835870381413f-2 ,5.2188560068141289500478726517878958032487316358404909126782459380536157982980317f-3 ,-8.3948481565340715162646401327604259604058983841328238470236141987304564416187075f-4 ) | ||
p=(0.12837917f0, -0.37612638f0, 0.11283784f0, -0.026865287f0, 0.005218856f0, -0.0008394848f0) | ||
return copysign(fma(evalpoly(x^2,p),x,x),x) | ||
|
||
|
||
elseif(xabs<1.25) | ||
# range [0.5;1.25] | ||
# # erfc approximation with degree 11 | ||
# Sollya::remez(erfc(x+0.5),11,[0;1.25-0.5],1,1e-32); | ||
|
||
p=(0.47950012218627633266759004480928346323282597139306026470914603497045110845486174f0 ,-0.87878257867905935820797978014037919344903083907274027447236660730320123742574287f0 ,0.43939127340837007019212165648359529028950189241566682841945406944095273511955846f0 ,0.14646415648788187125149469711213532491861248937046075996222718522704324115126361f0 ,-0.18308466657525286729680773848609379638263457954975049708494183866284386220432776f0 ,-7.2864220919698610613876940798326935208926368515022370715399462640511442447273458f-3 ,4.9870470155967931820488830916894059744198728056328087545059900897748278238141407f-2 ,-4.8868244174386309272067176761136942655656143898910274020685204636212063119576779f-3 ,-1.1067663329874328692394302924580932358903686528252386541750628802256971128485086f-2 ,3.422346846104011114882331840937679141622349918363223030054135870224315763897819f-3 ,7.3027064271433070734900533407935697302293380332736344236451943042328241926267078f-4 ,-3.75817085020390291674591289563810393435170791862443379088867595819930602228183736f-4) | ||
p=(0.47950011f0, -0.8787826f0, 0.4393913f0, 0.14646415f0, -0.18308467f0, -0.007286422f0, 0.04987047f0, -0.0048868246f0, -0.011067663f0, 0.003422347f0, 0.00073027064f0, -0.0003758171f0) | ||
return copysign(1f0-evalpoly(xabs-0.5f0,p),x) | ||
|
||
|
||
elseif(xabs<2.5) | ||
# range [1.25;2.5] | ||
# erfc approximation with degree 13 | ||
# Sollya::remez(erfc(x+1.25),13,[1.25-1.25;2.5-1.25],1,1e-32); | ||
|
||
p=(7.7099871743416328126356260566547494286908999516428835479787235777958146662395595f-2 ,-0.236521122401763914017434484798130978411662107330375092318490618856395191655478216f0,0.29565140031735793699573224918347147906505950384598003349376150036910634397925794f0 ,-0.1675357298956664512006671881841788827839151996664942041368823179421003600967294f0 ,6.1585929514334837247453472606059635765901344910897544510201614098797139034234648f-3 ,4.7187118101037459805673209483303989110029907488612006887134009408435903419517292f-2 ,-2.13310233800625824496628789189780598311678632855392393834221656949343403265787497f-2 ,-3.5262544206616132931628291419365561301271536625777609043061691511329815920675659f-3 ,5.4618311186313877025711961981462734764321122794521259327727086934478142942850845f-3 ,-4.785867288141399400801105017099155449896231371492841611827465311139149075249888f-4 ,-1.27638529129849191843323454760991411930129610353375308047589690520960413915824394f-3 ,7.3386942792237055752542546076918383420248583637581325208367761430501498054660812f-4 ,-1.78316578653402766600700240536421560298308014794597944402973993319799701405750455f-4 ,1.7451623823305231302742688272669315889569143556707803172195238203096188492888552f-5) | ||
p=(0.077099875f0, -0.23652112f0, 0.2956514f0, -0.16753574f0, 0.006158593f0, 0.04718712f0, -0.021331023f0, -0.0035262543f0, 0.005461831f0, -0.00047858673f0, -0.0012763853f0, 0.00073386944f0, -0.00017831658f0, 1.7451624f-5) | ||
return copysign(1f0-evalpoly(xabs-1.25f0,p),x) | ||
|
||
|
||
elseif (xabs<4.0) | ||
# range [2.5;4.0] | ||
# erfc approximation with degree 13 | ||
# Sollya::remez(erfc(x+2.5),13,[0;4-2.5],1,1e-32); | ||
|
||
p=(4.0695201730499156824979069048504659592517281936379802348048709133497117253173134f-4 ,-2.17828418992563158453260234674605666557610132217183615223667886784864793391972216f-3 ,5.4457086379161451966966115384340866958878328106709954104596733367011474611248579f-3 ,-8.3500528577413591092464067696190605106058226371648352228707616455735161644530442f-3 ,8.6220108431254600006776817786647116978577973677436276371791579294890933467761521f-3 ,-6.1151670580428489239821330412642128179703517796977571695036324539244245861906337f-3 ,2.7899458310817905157828387844634029030573127219171061924068857965376725014125688f-3 ,-5.1939500417227631511066424033635849427151215499427143347485235667556121428433924f-4 ,-3.0461048275173680879818513198875622675191891190039217228437020935156706025853604f-4 ,3.1068457426894474803882479013975662189036326925351567029013595934579911165234626f-4 ,-1.38668990773606991790025193904557899401436779771288081224199889420050208965460715f-4 ,3.6909690646602578301139502069782980693656088359691610474313757700491321631191208f-5 ,-5.6828887569336940854242289035551153938157998442490411471025066283558049904037765f-6 ,3.9297630357752347428080345871297027499036518175420666529979940028638158683656671f-7) | ||
p=(0.00040695202f0, -0.002178284f0, 0.0054457085f0, -0.008350053f0, 0.008622011f0, -0.006115167f0, 0.0027899458f0, -0.000519395f0, -0.00030461047f0, 0.00031068458f0, -0.00013866898f0, 3.6909692f-5, -5.682889f-6, 3.929763f-7) | ||
return copysign(1f0-evalpoly(xabs-2.5f0,p),x) | ||
|
||
|
||
else | ||
# |x| >= 4.0. | ||
|
||
|
||
r=copysign(1f0,x) | ||
|
||
return r | ||
|
||
# range [4.0,inf) | ||
r=copysign(1f0,x) | ||
return r | ||
end | ||
|
||
end | ||
|
||
_erf(x::Float16)=Float16(_erf(Float32(x))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you wanted to do a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 100% could wait for a followup PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking that too to be honest. |
||
|
Uh oh!
There was an error while loading. Please reload this page.