JuliaMath · AhmedYKadah · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/src/erf.jl b/src/erf.jl
@@ -111,12 +111,6 @@ function _erf(x::Float64)
     ix::UInt32=reinterpret(UInt64,x)>>32
     # # top 32, without sign bit 
     ia::UInt32=ix & 0x7fffffff
-    # # sign
-    # sign::UInt32=ix>>31
-
-    sign::Bool=x<0
-
-
 
     if (ia < 0x3feb0000)
     #  a = |x| < 0.84375.  
@@ -130,7 +124,6 @@ function _erf(x::Float64)
             PA=(0.12837916709551256, -0.3761263890318287, 0.11283791670896592, -0.026866170630075903, 0.005223977428649761, -0.0008548312229989974, 0.00012054654502151287, -1.4906315067498891e-5, 1.6126444349070824e-6, -1.3074259056679966e-7)
 
             r= fma(x,evalpoly(x2,PA),x) ## This fma is crucial for accuracy.  
-
             return r
         else
         ## 0.5 <= a < 0.84375 - Use rational approximation.  
@@ -158,12 +151,9 @@ function _erf(x::Float64)
         P=evalpoly(a,NB)
         Q=evalpoly(a,DB)
 
+        r= C + P / Q
+        return copysign(r,x)
 
-        if (sign)
-            return -C - P / Q
-        else
-            return C + P / Q
-        end
     elseif (ia < 0x40000000)
     ## 1.25 <= |x| < 2.0.  
         a = abs(x)
@@ -174,13 +164,9 @@ function _erf(x::Float64)
         PC=( 0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2, -0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5, -0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8, -0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12, 0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16, -0x1.578c9e375d37p-19)
 
         # Obtains erfc of |x|
-        r=evalpoly(a,PC)
+        r=1.0-evalpoly(a,PC)
+        return copysign(r,x)
 
-        if (sign)
-            return -1.0 + r
-        else
-            return 1.0 - r
-        end
     elseif (ia < 0x400a0000)
     ## 2 <= |x| < 3.25.  
         a = abs(x)
@@ -189,16 +175,10 @@ function _erf(x::Float64)
         # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2 
         PD=( 0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3, -0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2, -0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2, 0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3, -0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3, 0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10 )
 
-
         # Obtains erfc of |x|
-        r=evalpoly(a,PD)
+        r=1.0-evalpoly(a,PD)
+        return copysign(r,x)
 
-
-        if (sign)
-            return -1.0 + r
-        else
-            return 1.0 - r
-        end
     elseif (ia < 0x40100000)
     ## 3.25 <= |x| < 4.0.  
         a = abs(x)
@@ -207,14 +187,9 @@ function _erf(x::Float64)
         # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25 
         PE=( 0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14, -0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12, 0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14, -0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20, -0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23 )
 
-        r=evalpoly(a,PE)
-
+        r=1.0-evalpoly(a,PE)
+        return copysign(r,x)
 
-        if (sign)
-            return -1.0 + r
-        else
-            return 1.0 - r
-        end
     elseif (ia < 0x4017a000)
     ## 4 <= |x| < 5.90625.  
         a = abs(x)
@@ -223,31 +198,19 @@ function _erf(x::Float64)
         # Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4 
         PF=( 0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19, -0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14, 0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11, -0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11, 0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14, -0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19 )
 
-        r=evalpoly(a,PF)
-
-
-        if (sign)
-            return -1.0 + r
-        else
-            return 1.0 - r
-        end
+        r=1.0-evalpoly(a,PF)
+        return copysign(r,x)
     else
-
         if(isnan(x))
              return NaN
         end
-
-        if (sign)
-            return -1.0
-        else
-            return 1.0
-        end
-
+        copysign(1.0,x)
     end
-
-
 end
 
+#    Fast erf implementation using 
+#    polynomial approximations of erf and erfc.
+#    Highest measured error is 1.12 ULPs at x = 1.232469  
 function _erf(x::Float32)
     xabs=abs(x)
 
@@ -256,47 +219,38 @@ function _erf(x::Float32)
     # # erf approximation using erf(x)=x+x*P(x^2) with degree 6
     # Sollya::remez(erf(sqrt(x))/sqrt(x)-1,6,[1e-32;0.5],1,1e-32);
 
-    p=(0.128379167084072442015971708878153077794812875442854468252262977759538466466026108f0 ,-0.37612638677173360887708232154133809982218085424044576090554860522057182310851056f0 ,0.112837843774249243616280397250157777250507394914770984188915122197580134574091983f0 ,-2.68652865375831097248410803484628824158061934615600565461807890879374835870381413f-2 ,5.2188560068141289500478726517878958032487316358404909126782459380536157982980317f-3 ,-8.3948481565340715162646401327604259604058983841328238470236141987304564416187075f-4 )
+    p=(0.12837917f0, -0.37612638f0, 0.11283784f0, -0.026865287f0, 0.005218856f0, -0.0008394848f0)
     return copysign(fma(evalpoly(x^2,p),x,x),x)
 
-
   elseif(xabs<1.25)
     # range [0.5;1.25]
     # # erfc approximation with degree 11
     # Sollya::remez(erfc(x+0.5),11,[0;1.25-0.5],1,1e-32);
 
-    p=(0.47950012218627633266759004480928346323282597139306026470914603497045110845486174f0 ,-0.87878257867905935820797978014037919344903083907274027447236660730320123742574287f0 ,0.43939127340837007019212165648359529028950189241566682841945406944095273511955846f0 ,0.14646415648788187125149469711213532491861248937046075996222718522704324115126361f0 ,-0.18308466657525286729680773848609379638263457954975049708494183866284386220432776f0 ,-7.2864220919698610613876940798326935208926368515022370715399462640511442447273458f-3 ,4.9870470155967931820488830916894059744198728056328087545059900897748278238141407f-2 ,-4.8868244174386309272067176761136942655656143898910274020685204636212063119576779f-3 ,-1.1067663329874328692394302924580932358903686528252386541750628802256971128485086f-2 ,3.422346846104011114882331840937679141622349918363223030054135870224315763897819f-3 ,7.3027064271433070734900533407935697302293380332736344236451943042328241926267078f-4 ,-3.75817085020390291674591289563810393435170791862443379088867595819930602228183736f-4)
+    p=(0.47950011f0, -0.8787826f0, 0.4393913f0, 0.14646415f0, -0.18308467f0, -0.007286422f0, 0.04987047f0, -0.0048868246f0, -0.011067663f0, 0.003422347f0, 0.00073027064f0, -0.0003758171f0)
     return copysign(1f0-evalpoly(xabs-0.5f0,p),x)
 
-
   elseif(xabs<2.5)
     # range [1.25;2.5]
     # erfc approximation with degree 13
     # Sollya::remez(erfc(x+1.25),13,[1.25-1.25;2.5-1.25],1,1e-32);
 
-    p=(7.7099871743416328126356260566547494286908999516428835479787235777958146662395595f-2 ,-0.236521122401763914017434484798130978411662107330375092318490618856395191655478216f0,0.29565140031735793699573224918347147906505950384598003349376150036910634397925794f0 ,-0.1675357298956664512006671881841788827839151996664942041368823179421003600967294f0 ,6.1585929514334837247453472606059635765901344910897544510201614098797139034234648f-3 ,4.7187118101037459805673209483303989110029907488612006887134009408435903419517292f-2 ,-2.13310233800625824496628789189780598311678632855392393834221656949343403265787497f-2 ,-3.5262544206616132931628291419365561301271536625777609043061691511329815920675659f-3 ,5.4618311186313877025711961981462734764321122794521259327727086934478142942850845f-3 ,-4.785867288141399400801105017099155449896231371492841611827465311139149075249888f-4 ,-1.27638529129849191843323454760991411930129610353375308047589690520960413915824394f-3 ,7.3386942792237055752542546076918383420248583637581325208367761430501498054660812f-4 ,-1.78316578653402766600700240536421560298308014794597944402973993319799701405750455f-4 ,1.7451623823305231302742688272669315889569143556707803172195238203096188492888552f-5)
+    p=(0.077099875f0, -0.23652112f0, 0.2956514f0, -0.16753574f0, 0.006158593f0, 0.04718712f0, -0.021331023f0, -0.0035262543f0, 0.005461831f0, -0.00047858673f0, -0.0012763853f0, 0.00073386944f0, -0.00017831658f0, 1.7451624f-5)
     return copysign(1f0-evalpoly(xabs-1.25f0,p),x)
 
-
   elseif (xabs<4.0)
     # range [2.5;4.0]
     # erfc approximation with degree 13
     # Sollya::remez(erfc(x+2.5),13,[0;4-2.5],1,1e-32);
 
-    p=(4.0695201730499156824979069048504659592517281936379802348048709133497117253173134f-4 ,-2.17828418992563158453260234674605666557610132217183615223667886784864793391972216f-3 ,5.4457086379161451966966115384340866958878328106709954104596733367011474611248579f-3 ,-8.3500528577413591092464067696190605106058226371648352228707616455735161644530442f-3 ,8.6220108431254600006776817786647116978577973677436276371791579294890933467761521f-3 ,-6.1151670580428489239821330412642128179703517796977571695036324539244245861906337f-3 ,2.7899458310817905157828387844634029030573127219171061924068857965376725014125688f-3 ,-5.1939500417227631511066424033635849427151215499427143347485235667556121428433924f-4 ,-3.0461048275173680879818513198875622675191891190039217228437020935156706025853604f-4 ,3.1068457426894474803882479013975662189036326925351567029013595934579911165234626f-4 ,-1.38668990773606991790025193904557899401436779771288081224199889420050208965460715f-4 ,3.6909690646602578301139502069782980693656088359691610474313757700491321631191208f-5 ,-5.6828887569336940854242289035551153938157998442490411471025066283558049904037765f-6 ,3.9297630357752347428080345871297027499036518175420666529979940028638158683656671f-7)
+    p=(0.00040695202f0, -0.002178284f0, 0.0054457085f0, -0.008350053f0, 0.008622011f0, -0.006115167f0, 0.0027899458f0, -0.000519395f0, -0.00030461047f0, 0.00031068458f0, -0.00013866898f0, 3.6909692f-5, -5.682889f-6, 3.929763f-7)
     return copysign(1f0-evalpoly(xabs-2.5f0,p),x)
 
-
   else
-#  |x| >= 4.0.  
-
-
-  r=copysign(1f0,x)
-
-  return r
-
+    # range [4.0,inf)
+    r=copysign(1f0,x)
+    return r
   end
-
 end
 
 _erf(x::Float16)=Float16(_erf(Float32(x)))