JuliaMath
diff --git a/‎NEWS.md
Lines changed: 6 additions & 0 deletions b/‎NEWS.md
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/Float128/besselj.jl
Lines changed: 1 addition & 1 deletion b/‎src/Float128/besselj.jl
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/U_polynomials.jl
Lines changed: 95 additions & 51 deletions b/‎src/U_polynomials.jl
Lines changed: 95 additions & 51 deletions
diff --git a/‎src/asymptotics.jl
Lines changed: 42 additions & 19 deletions b/‎src/asymptotics.jl
Lines changed: 42 additions & 19 deletions
@@ -12,6 +12,12 @@ For bug fixes, performance enhancements, or fixes to unexported functions we wil
 
 # Version 0.2.0
 
+### Added
+ - Add more optimized methods for Float32 calculations that are faster([PR #43](https://github.com/JuliaMath/Bessels.jl/pull/43))
+
+### Fixed
+ - Reduce compile time and time to first call of besselj and bessely ([PR #42](https://github.com/JuliaMath/Bessels.jl/pull/42))
+
 ### Added
  - add an unexport method (`Bessels.besseljy(nu, x)`) for faster computation of `besselj` and `bessely` (#33)
  - add exported methods for Hankel functions `besselh(nu, k, x)`, `hankelh1(nu, x)`, `hankelh2(nu, x)` (#33)
 
@@ -1,4 +1,4 @@
-function besselj0(x::BigFloat)
+function _besselj0(x::BigFloat)
     x = abs(x)
     T = eltype(x)
     if iszero(x)
 
@@ -14,16 +14,19 @@
 # [2] http://dlmf.nist.gov/10.41.E9
 # [3] https://dlmf.nist.gov/10.41
 
+#####
+##### Large order expansion for J_{nu}(x) and Y_{nu}(x) and v > x
+#####
+
 """
     besseljy_debye(nu, x::T)
 
 Debey's asymptotic expansion for large order valid when v-> ∞ and x < v.
 Returns both besselj and bessely
 """
-function besseljy_debye(v, x)
-    T = eltype(x)
+function besseljy_debye(v, x::T) where T
     S = promote_type(T, Float64)
-    x = S(x)
+    v, x = S(v), S(x)
 
     vmx = (v + x) * (v - x)
     vs = sqrt(vmx)
@@ -36,12 +39,47 @@ function besseljy_debye(v, x)
     p = v / vs
     p2  = v^2 / vmx
 
-    Uk_Jn, Uk_Yn = Uk_poly_Jn(p, v, p2, x)
-
+    Uk_Jn, Uk_Yn = Uk_poly_Jn(p, v, p2, T(x))
     return coef_Jn * Uk_Jn, coef_Yn * Uk_Yn
 end
 
-besseljy_debye_cutoff(nu, x) = nu > 2.0 + 1.00035*x + Base.Math._approx_cbrt(Float64(302.681)*x) && nu > 15
+# Cutoffs for besseljy_debye expansions
+# regions where the debye expansions for large orders v > x are valid
+# determined by fitting a curve a + bx + (cx)^(1/3) to where debye expansions provide desired precision
+
+# Float32
+besseljy_debye_fit(x::Float32) =  2.5f0 + 1.00035f0*x + 7.114f0*Base.Math._approx_cbrt(x)
+besseljy_debye_cutoff(nu, x::Float32) = nu > besseljy_debye_fit(x) && nu > 6
+
+# Float64
+besseljy_debye_fit(x::Float64) = 2.0 + 1.00035*x + 6.714*Base.Math._approx_cbrt(x)
+besseljy_debye_cutoff(nu, x::Float64) = nu > besseljy_debye_fit(x) && nu > 15
+
+# Float128 - provide roughly ~1e-35 precision
+#besseljy_debye_fit(x) = 16.0 + 1.0012*x + Base.Math._approx_cbrt(Float64(27.91*x))
+#besseljy_debye_cutoff(nu, x) = nu > besseljy_debye_fit(x) && nu > 40
+
+#####
+##### Debye large order expansion coefficients
+#####
+
+function Uk_poly_Jn(p, v, p2, x::Float64)
+    if v > 5.0 + 1.00033*x + 11.26*Base.Math._approx_cbrt(x)
+        return Uk_poly10(p, v, p2)
+    else
+        return Uk_poly20(p, v, p2)
+    end
+end
+Uk_poly_Jn(p, v, p2, x::Float32) = Uk_poly5(p, v, p2)
+
+Uk_poly_In(p, v, p2, ::Type{Float32}) = Uk_poly5(p, v, p2)[1]
+Uk_poly_In(p, v, p2, ::Type{Float64}) = Uk_poly10(p, v, p2)[1]
+Uk_poly_Kn(p, v, p2, ::Type{Float32}) = Uk_poly5(p, v, p2)[2]
+Uk_poly_Kn(p, v, p2, ::Type{Float64}) = Uk_poly10(p, v, p2)[2]
+
+#####
+##### Large order expansion for Hankel functions and x > v
+#####
 
 """
     hankel_debye(nu, x::T)
@@ -51,7 +89,7 @@ Return the Hankel function H(nu, x) = J(nu, x) + Y(nu, x)*im
 """
 function hankel_debye(v, x::T) where T
     S = promote_type(T, Float64)
-    x = S(x)
+    v, x = S(v), S(x)
 
     vmx = abs((v + x) * (x - v))
     vs = sqrt(vmx)
@@ -63,55 +101,39 @@ function hankel_debye(v, x::T) where T
     p = v / vs
     p2  = v^2 / vmx
 
-    _, Uk_Yn = Uk_poly_Hankel(p*im, v, -p2, x)
-
+    _, Uk_Yn = Uk_poly_Hankel(p*im, v, -p2, T(x))
     return coef_Yn * Uk_Yn
 end
 
-hankel_debye_cutoff(nu, x) = nu < 0.2 + x + Base.Math._approx_cbrt(-411*x)
+# Cutoffs for hankel_debye expansions
+# regions where the debye expansions for large orders x > v are valid
+# determined by fitting a curve a + x + (bx)^(1/3) to where debye expansions provide desired precision
+
+# Float32
+hankel_debye_fit(x::Float32) = -3.5f0 + x + 7.435f0*Base.Math._approx_cbrt(-x)
+hankel_debye_cutoff(nu, x::Float32) = nu < hankel_debye_fit(x)
+
+# Float64
+hankel_debye_fit(x::Float64) = 0.2 + x + 7.435*Base.Math._approx_cbrt(-x)
+hankel_debye_cutoff(nu, x::Float64) = nu < hankel_debye_fit(x)
+
+# Float128
+#hankel_debye_cutoff(nu, x) = nu < -2 + 0.9987*x + Base.Math._approx_cbrt(-21570.3*Float64(x))
 
-function Uk_poly_Jn(p, v, p2, x::T) where T <: Float64
-    if v > 5.0 + 1.00033*x + Base.Math._approx_cbrt(1427.61*x)
-        return Uk_poly10(p, v, p2)
-    else
-        return Uk_poly20(p, v, p2)
-    end
-end
 function Uk_poly_Hankel(p, v, p2, x::T) where T <: Float64
-    if v < 5.0 + 0.998*x + Base.Math._approx_cbrt(-1171.34*x)
+    if v < 5.0 + 0.998*x + 10.541*Base.Math._approx_cbrt(-x)
         return Uk_poly10(p, v, p2)
     else
         return Uk_poly20(p, v, p2)
     end
 end
-Uk_poly_Hankel(p, v, p2, x) = Uk_poly_Jn(p, v, p2, x::BigFloat)
-
-Uk_poly_In(p, v, p2, ::Type{Float32}) = Uk_poly5(p, v, p2)[1]
-Uk_poly_In(p, v, p2, ::Type{Float64}) = Uk_poly10(p, v, p2)[1]
-Uk_poly_Kn(p, v, p2, ::Type{Float32}) = Uk_poly5(p, v, p2)[2]
-Uk_poly_Kn(p, v, p2, ::Type{Float64}) = Uk_poly10(p, v, p2)[2]
 
-@inline function split_evalpoly(x, P)
-    # polynomial P must have an even number of terms
-    N = length(P)
-    xx = x*x
+Uk_poly_Hankel(p, v, p2, x::Float32) = Uk_poly5(p, v, p2)
+Uk_poly_Hankel(p, v, p2, x) = Uk_poly_Jn(p, v, p2, x)
 
-    out = P[end]
-    out2 = P[end-1]
-
-    for i in N-2:-2:2
-        out = muladd(xx, out, P[i])
-        out2 = muladd(xx, out2, P[i-1])
-    end
-    if iszero(rem(N, 2))
-        out *= x
-        return out2 - out, out2 + out
-    else 
-        out = muladd(xx, out, P[1])
-        out2 *= x
-        return out - out2, out2 + out
-    end
-end
+#####
+##### U - polynomials
+#####
 
 function Uk_poly5(p, v, p2)
     u0 = 1.0
@@ -169,7 +191,8 @@ function Uk_poly20(p, v, p2)
     return split_evalpoly(-p/v, Poly)
 end
 
-function Uk_poly_Jn(p, v, p2, x::T) where T <: BigFloat
+# implementation for arbitrary precision
+function Uk_poly_Jn(p, v, p2, x::T) where T
     u0 = one(T)
     u1 = evalpoly(p2, (3, -5)) / 24
     u2 = evalpoly(p2, (81, -462, 385)) / 1152
@@ -192,14 +215,35 @@ function Uk_poly_Jn(p, v, p2, x::T) where T <: BigFloat
     u19 = evalpoly(p2, (3761719809509744584195141470215239968860161850335693359375, -1694136104847790923543013061207680485991618397125797873046875, 131518243789528012257323287684549590712219590887856743595703125, -4179129511260217209133623104486559148268490002722848244991240625, 72088266652871136165181276891337618088740053757385007292240377500, -776773977214820033621339638022401758862209813648991378355261599500, 5677394779818071523358076045292335690018332546439714641415150037636, -29667822591847140970922062603154078948366431649274897672067206014580, 114800233558787974267088388638654159779991991716667850063043161177890, -336788945225975997668966486515468748933754942064781256509404101275850, 760599837839891632010677514872412397427959022966733699428711208643750, -1333776105497652608917805362422659621440699261908512326717363285968750, 1821026790520428617034899967974569000414933301802005578083458082187500, -1929493390007550512825649545883182667978851358936168551128507440937500, 1570570304135828069768211937927131121377451398878663909061910457812500, -963355744456233323886326422779275308259891167457840027230510476562500, 430744376085805585076333464506126217301082462279781595185335615234375, -132496442776420617057548516091451843431549350710841121872822607421875, 25067118709566753991500713640672585065184296412786618544560205078125, -2198870062242697718552694179006367110981078632700580574084228515625)) / 980570799589976236952265721984567958568960000000
     u20 = evalpoly(p2, (24030618487110150352755402740028995969072485932314476318359375, -11984379509393886990049682627416290126645799829432886859195312500, 1028816773596376675865176501621547688509187479681533744365175781250, -36136687211104276266628386141898079997731413843884113675698994212500, 689368394712060288513879526213143279387995331221181709158682715671875, -8226054591925395046897310265711439061232478740113589342810359827971600, 66729727980314206345594148553023187689855688854650386958448499394886808, -388235990422199036481157075090292917209702371811112714609151030385239376, 1679621555358289731717827244171539003686077073065087747585589973854567950, -5539024239213671573375139503069183935283937579486175606155069574939719000, 14158993985687610069291256086138044030538819617545349322200892501730615500, -28351273454978996507857547213496220444080209057761228571819950144231575000, 44700502703654649774362294361156754154712937287563245669685024068473468750, -55504285315413734114196925709059066734450069228526494661323483726671250000, 53996017865021964397812742861916826936440170527545458081631955240159375000, -40677946447845849750014263157052100427921007881029648608310681741346250000, 23250401373415767366970579801426233116241475047289980901600427561701171875, -9744288621182737996606001891415854305391392962559389056448157541132812500, 2823768330714179467082676027577350697536971031741905897356560592675781250, -505537818270094147077012813306995849751437826286925078626556809570312500, 42128151522507845589751067775582987479286485523910423218879734130859375)) / 658943577324464031231922565173629668158341120000000
     u21 = evalpoly(p2, (1990919576929585163761247434580873723897677550573731281207275390625, -1094071724893410272201314846735800345180930752601602224440504638671875, 103359845691236916270005420886293061281368263471845448279855946394531250, -3993558675585831919973605001813276532326292885934464373884268722794718750, 83832389176247515253189196480455786065824463879054932596114704927571390625, -1101977288759423115916484463959767795703317649005495798822550381533967842875, 9865413224996821913093061266347613112205666995211786544733850490740903131000, -63509799534443193918054738326913581450607131662586232327271654588312820660616, 305080179205137176095342856930260393560550505786421683990077008259370104309410, -1122099959965657526344757894103766710826629020929459670948834078441128579791750, 3217185258543282976637373169047731813093578126603884759856059695249999306047500, -7276835259402589085458442196686990645669654847254510877738804991391058411412500, 13076651508858985557227319801010895818335803028865299751194038965212274849406250, -18719023753854332443081892087572754119280558462994056787647908433841920235468750, 21307327225910629020600045595173860611314592374884901403059587980149276271875000, -19156728115567236234371593788102013666866274144599851347429312225076676478125000, 13430107219321888006291381503763318985372222835071804983658005207838642173828125, -7186150593017474773099947110903785965879266917528290917207712073663895771484375, 2834031566467842832851805860262261718160136985649155528263587796394390332031250, -776308737033643856044315139790308583914612827783322139063211433790569824218750, 131897976398031751060811874321878385924211075364673046295410087596954345703125, -10468093364923154846096180501736379835254847251164527483762705364837646484375)) / 5456052820246562178600318839637653652351064473600000000
-    u22 = evalpoly(p2, (3.8335346613939443e12, -2.3109159761323565e15, 2.3920280120269997e17, -1.0121818379942089e19, 2.3275346258089414e20, -3.3544689122226785e21, 3.297557757461478e22, -2.336107524486965e23, 1.238524103792452e24, -5.0463598652544e24, 1.6103128541137314e25, -4.077501349206541e25, 8.26258535798955e25, -1.3459193994556415e26, 1.7635713272326644e26, -1.8526731041549917e26, 1.548092083577385e26, -1.0148048982766395e26, 5.103920268388802e25, -1.9006807535664433e25, 4.936185283790662e24, -7.980021228256559e23, 6.04547062746709e22))
-    u23 = -evalpoly(p2, (4.218971570284097e13, -2.778481101311081e16, 3.1385283211499996e18, -1.4486387749510863e20, 3.6341499869780876e21, -5.7179919065432055e22, 6.144339925144987e23, -4.766924608251481e24, 2.774466490672939e25, -1.2449342046124282e26, 4.392130563430048e26, -1.2355529146787609e27, 2.7982068996977173e27, -5.131998439010333e27, 7.641216535678268e27, -9.228395023257356e27, 8.999255845917453e27, -7.02322235515725e27, 4.322773732100187e27, -2.050902994929233e27, 7.234243234844319e26, -1.7860680966743495e26, 2.753863007576946e25, -1.9955529040412654e24))
-    u24 = evalpoly(p2, (4.8540146868529006e14, -3.4792991439250445e17, 4.273207395701127e19, -2.1435653415108537e21, 5.844687629283339e22, -1.0000750138961727e24, 1.1699189691874474e25, -9.896648661695488e25, 6.29370256208713e26, -3.0939194683063286e27, 1.1998211967644424e28, -3.7252346341093444e28, 9.358117764887965e28, -1.9153963148099324e29, 3.206650343980748e29, -4.395132918078325e29, 4.9215508698387624e29, -4.4775348387950634e29, 3.277658265637452e29, -1.9012207767547338e29, 8.536184882279286e28, -2.8599776383548e28, 6.728957650918171e27, -9.916401268407057e26, 6.886389769727123e25))
-    u25 = -evalpoly(p2, (5.827244631566907e15, -4.5305357275125955e18, 6.029638127487473e20, -3.2761234100445222e22, 9.675654883193622e23, -1.7941040647617987e25, 2.2764310713849358e26, -2.0914533474677945e27, 1.4471195817119858e28, -7.757785573404132e28, 3.2900927159291354e29, -1.1210232552135908e30, 3.1034661143911036e30, -7.036055338636485e30, 1.3128796688902614e31, -2.0208792587851872e31, 2.5653099826522344e31, -2.6771355605594045e31, 2.2823085118856488e31, -1.5730388076301427e31, 8.627355824571355e30, -3.676221426681414e30, 1.1728484268744769e30, -2.6355294419807464e29, 3.7195112743738626e28, -2.479674182915908e27))
-    
-    Poly = (u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15, u16,  u17, u18, u19, u20)
+    Poly = (u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15, u16,  u17, u18, u19, u20, u21)
     return split_evalpoly(-p/v, Poly)
 end
+
+# performs a second order horner scheme for polynomial evaluation
+# computes the even and odd coefficients of the polynomial independently within a loop to reduce latency
+# splits the polynomial to compute both 1 + ax + bx^2 + cx^3 and 1 - ax + bx^2 - cx^3 ....
+@inline function split_evalpoly(x, P)
+    # polynomial P must have an even number of terms
+    N = length(P)
+    xx = x*x
+
+    out = P[end]
+    out2 = P[end-1]
+
+    for i in N-2:-2:2
+        out = muladd(xx, out, P[i])
+        out2 = muladd(xx, out2, P[i-1])
+    end
+    if iszero(rem(N, 2))
+        out *= x
+        return out2 - out, out2 + out
+    else 
+        out = muladd(xx, out, P[1])
+        out2 *= x
+        return out - out2, out2 + out
+    end
+end
+
 #=
     u0 = one(x)
     u1 = p / 24 * (3 - 5*p^2) * -1 / v
 
@@ -17,13 +17,13 @@
 # Cutoffs were determined manually for each input type. 
 # AbstractFloat cutoff gives relative error roughly for quadruple precision accuracy (1e-35)
 
-#besseljy_large_argument_min(::Type{Float32}) = 15.0f0
-besseljy_large_argument_min(::Type{Float64}) = 20.0
-besseljy_large_argument_min(::Type{T}) where T <: AbstractFloat = 40.0
+besseljy_large_argument_min(x::Float32) = 15.0f0
+besseljy_large_argument_min(x::Float64) = 20.0
+#besseljy_large_argument_min(x) = 40.0
 
-#besseljy_large_argument_cutoff(v, x::Float32) = (x > 1.2f0*v && x > besseljy_large_argument_min(Float32))
-besseljy_large_argument_cutoff(v, x::Float64) = (x > 1.65*v && x > besseljy_large_argument_min(Float64))
-besseljy_large_argument_cutoff(v, x::T) where T = (x > 4*v && x > besseljy_large_argument_min(T))
+besseljy_large_argument_cutoff(v, x::Float32) = (x > 1.2f0*v && x > besseljy_large_argument_min(x))
+besseljy_large_argument_cutoff(v, x::Float64) = (x > 1.65*v && x > besseljy_large_argument_min(x))
+#besseljy_large_argument_cutoff(v, x) = (x > 4*v && x > besseljy_large_argument_min(x))
 
 """
     besseljy_large_argument(nu, x::T)
@@ -32,26 +32,26 @@ Asymptotic expansions for large arguments valid when x > 1.6*nu and x > 20.0.
 Returns both (besselj(nu, x), bessely(nu, x)).
 """
 function besseljy_large_argument(v, x::T) where T
-    # gives both (besselj, bessely) for x > 1.6*v
     α, αp = _α_αp_asymptotic(v, x)
-    b = SQ2OPI(T) / sqrt(αp * x)
+    S = promote_type(T, Float64)
+    v, x = S(v), S(x)
+    b = SQ2OPI(S) / sqrt(αp * x)
 
     # we need to calculate sin(x - v*pi/2 - pi/4) and cos(x - v*pi/2 - pi/4)
     # For improved accuracy this is expanded using the formula for sin(x+y+z)
+    s, c = sincos(PIO2(S) * v)
+    sα, cα = sincos(α)
 
-    S, C = sincos(PIO2(T) * v)
-    Sα, Cα = sincos(α)
+    CMS = c - s
+    CPS = c + s
 
-    CMS = C - S
-    CPS = C + S
+    s1 = CMS * cα
+    s2 = CPS * sα
 
-    s1 = CMS * Cα
-    s2 = CPS * Sα
+    s3 = CMS * sα
+    s4 = CPS * cα
 
-    s3 = CMS * Sα
-    s4 = CPS * Cα
-
-    return SQ2O2(T) * (s1 + s2) * b, SQ2O2(T) * (s3 - s4) * b
+    return SQ2O2(S) * (s1 + s2) * b, SQ2O2(S) * (s3 - s4) * b
 end
 
 # Float64
@@ -98,7 +98,30 @@ function _α_αp_asymptotic(v, x::Float64)
         return _α_αp_poly_30(v, x)
     end
 end
-
+function _α_αp_asymptotic(v, x::Float32)
+    v, x = Float64(v), Float64(x)
+    if x > 4*v
+        return _α_αp_poly_5(v, x)
+    elseif x > 1.8*v
+        return _α_αp_poly_10(v, x)
+    else
+        return _α_αp_poly_30(v, x)
+    end
+end
+function _α_αp_poly_5(v, x::T) where T
+    xinv = inv(x)^2
+    μ = 4 * v^2
+    s0 = one(T)
+    s1 = (1 - μ) / 8
+    s2 = evalpoly(μ, (-0.1953125, 0.203125, -0.0078125))
+    s3 = evalpoly(μ, (1.0478515625, -1.1591796875, 0.1123046875, -0.0009765625))
+    s4 = evalpoly(μ, (-11.466461181640625, 13.1358642578125, -1.71624755859375, 0.0469970703125, -0.000152587890625))
+    s5 = evalpoly(μ, (211.27614974975586, -246.8455924987793, 37.067405700683594, -1.5151596069335938, 0.017223358154296875, -2.6702880859375e-5))
+    
+    αp = evalpoly(xinv, (s0, s1, s2, s3, s4, s5))
+    α = x * evalpoly(xinv, (s0, -s1, -s2/3, -s3/5, -s4/7, -s5/9))
+    return α, αp
+end
 function _α_αp_poly_10(v, x::T) where T
     xinv = inv(x)^2
     μ = 4 * v^2
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-function besselj0(x::BigFloat)`
	`1`	`+function _besselj0(x::BigFloat)`
`2`	`2`	`x = abs(x)`
`3`	`3`	`T = eltype(x)`
`4`	`4`	`if iszero(x)`