Merge pull request #20 from heltonmc/besselj_y_large_arguments

heltonmc · web-flow · commit 1f35f5c48710 · 2022-07-19T17:30:01.000-04:00
Implementation for besselj of any (positive) real order and argument
diff --git a/Project.toml b/Project.toml
@@ -3,6 +3,9 @@ uuid = "0e736298-9ec6-45e8-9647-e4fc86a2fe38"
 authors = ["Michael Helton <heltonmc@protonmail.com> and contributors"]
 version = "0.1.0"
 
+[deps]
+SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
+
 [compat]
 julia = "1.5"
 
diff --git a/src/Bessels.jl b/src/Bessels.jl
@@ -1,5 +1,7 @@
 module Bessels
 
+using SpecialFunctions: loggamma
+
 export besselj0
 export besselj1
 
@@ -37,6 +39,7 @@ include("U_polynomials.jl")
 include("recurrence.jl")
 include("misc.jl")
 include("Polynomials/besselj_polys.jl")
+include("asymptotics.jl")
 #include("hankel.jl")
 
 end
diff --git a/src/U_polynomials.jl b/src/U_polynomials.jl
diff --git a/src/asymptotics.jl b/src/asymptotics.jl
diff --git a/src/besselj.jl b/src/besselj.jl
@@ -150,3 +150,186 @@ function besselj1(x::Float32)
         return p * s
     end
 end
+"""
+    besselj(nu, x::T) where T <: Union{Float32, Float64}
+
+Bessel function of the first kind of order nu, ``J_{nu}(x)``.
+Nu must be real.
+"""
+function _besselj(nu, x)
+    nu == 0 && return besselj0(x)
+    nu == 1 && return besselj1(x)
+
+    x < 4.0 && return besselj_small_arguments_orders(nu, x)
+
+    large_arg_cutoff = 1.65*nu
+    (x > large_arg_cutoff && x > 20.0) && return besselj_large_argument(nu, x)
+
+
+    debye_cutoff = 2.0 + 1.00035*x + (302.681*x)^(1/3)
+    nu > debye_cutoff && return besselj_debye(nu, x)
+
+    if nu >= x
+        nu_shift = ceil(Int, 5.2 + 1.00033*x + (1427.61*x)^(1/3) - nu)
+        v = nu + nu_shift
+        arr = range(v, stop = nu, length = nu_shift + 1)
+        jnu = besselj_debye(v, x)
+        jnup1 = besselj_debye(v+1, x)
+        return besselj_down_recurrence(x, jnu, jnup1, arr)[2]
+    end
+
+    # at this point x > nu and  x < nu * 1.65
+    # in this region forward recurrence is stable
+    # we must decide if we should do backward recurrence if we are closer to debye accuracy
+    # or if we should do forward recurrence if we are closer to large argument expansion
+    debye_cutoff = 5.0 + 1.00033*x + (1427.61*x)^(1/3)
+
+    debye_diff = debye_cutoff - nu
+    large_arg_diff = nu - x / 2.0
+
+    if (debye_diff > large_arg_diff && x > 20.0)
+        nu_shift = ceil(large_arg_diff)
+        v2 = nu - nu_shift
+        jnu = besselj_large_argument(v2, x)
+        jnum1 = besselj_large_argument(v2 - 1, x)
+        return besselj_up_recurrence(x, jnu, jnum1, v2, nu)[2]
+    else
+        nu_shift = ceil(Int, debye_diff)
+        v = nu + nu_shift
+        arr = range(v, stop = nu, length = nu_shift + 1)
+        jnu = besselj_debye(v, x)
+        jnup1 = besselj_debye(v+1, x)
+        return besselj_down_recurrence(x, jnu, jnup1, arr)[2]
+    end
+end
+
+# for moderate size arguments of x and v this has relative errors ~9e-15
+# for large arguments relative errors ~1e-13
+function besselj_large_argument(v, x::T) where T
+    α, αp = _α_αp_asymptotic(v, x)
+    b = SQ2OPI(T) / sqrt(αp * x)
+
+    S, C = sincos(PIO2(T)*v)
+    Sα, Cα = sincos(α)
+    s1 = (C - S) * Cα
+    s2 = (C + S) * Sα
+
+    return SQ2O2(T) * (s1 + s2) * b
+end
+
+# generally can only use for x < 4.0
+# this needs a better way to sum these as it produces large errors
+# only valid in non-oscillatory regime (v>1/2, 0<t<sqrt(v^2 - 0.25))
+# power series has premature underflow for large orders
+function besselj_small_arguments_orders(v, x::T) where T
+    v > 60 && return log_besselj_small_arguments_orders(v, x)
+
+    MaxIter = 2000
+    out = zero(T)
+    a = (x/2)^v / gamma(v + one(T))
+    t2 = (x/2)^2
+    for i in 0:MaxIter
+        out += a
+        abs(a) < eps(T) * abs(out) && break
+        a *= -inv((v + i + one(T)) * (i + one(T))) * t2
+    end
+    return out
+end
+
+# this needs a better way to sum these as it produces large errors
+# use when v is large and x is small
+# need for bessely 
+function log_besselj_small_arguments_orders(v, x::T) where T
+    MaxIter = 2000
+    out = zero(T)
+    a = one(T)
+    x2 = (x/2)^2
+    for i in 0:MaxIter
+        out += a
+        a *= -x2 * inv((i + one(T)) * (v + i + one(T)))
+        (abs(a) < eps(T) * abs(out)) && break
+    end
+    logout = -loggamma(v + 1) + fma(v, log(x/2), log(out))
+    return exp(logout)
+end
+
+# valid when x < v (uniform asymptotic expansions)
+function besselj_debye(v, x)
+    T = eltype(x)
+    S = promote_type(T, Float64)
+    x = S(x)
+
+    vmx = (v + x) * (v - x)
+    vs = sqrt(vmx)
+    n  = muladd(v, -log(x / (v + vs)), -vs)
+
+    coef = SQ1O2PI(S) * exp(-n) / sqrt(vs)
+    p = v / vs
+    p2  = v^2 / vmx
+
+    return coef * Uk_poly_Jn(p, v, p2, x, T)
+end
+
+# For 0.0 <= x < 171.5
+# Mean ULP = 0.55
+# Max ULP = 2.4
+# Adapted from Cephes Mathematical Library (MIT license https://en.smath.com/view/CephesMathLibrary/license) by Stephen L. Moshier
+function gamma(x)
+    if x > 11.5
+        return large_gamma(x)
+    elseif x < 0.0
+        #p = floor(x)
+        #isequal(p, abs(x)) && return throw(DomainError(x, "NaN result for non-NaN input."))
+        # need reflection formula
+        return throw(DomainError(x, "Negative numbers are currently not implemented"))
+    elseif x <= 11.5
+        return small_gamma(x)
+    elseif isnan(x)
+        return x
+    end
+end
+function large_gamma(x)
+    isinf(x) && return x
+    T = Float64
+    w = inv(x)
+    s = (
+        8.333333333333331800504e-2, 3.472222222230075327854e-3, -2.681327161876304418288e-3, -2.294719747873185405699e-4,
+        7.840334842744753003862e-4, 6.989332260623193171870e-5, -5.950237554056330156018e-4, -2.363848809501759061727e-5,
+        7.147391378143610789273e-4
+    )
+    w = w * evalpoly(w, s) + one(T)
+    # lose precision on following block
+    y = exp((x)) 
+    # avoid overflow
+    v = x^(0.5 * x - 0.25)
+    y = v * (v / y)
+
+    return SQ2PI(T) * y * w
+end
+function small_gamma(x)
+    T = Float64
+    P = (
+        1.000000000000000000009e0, 8.378004301573126728826e-1, 3.629515436640239168939e-1, 1.113062816019361559013e-1,
+        2.385363243461108252554e-2, 4.092666828394035500949e-3, 4.542931960608009155600e-4, 4.212760487471622013093e-5
+    )
+    Q = (
+        9.999999999999999999908e-1, 4.150160950588455434583e-1, -2.243510905670329164562e-1, -4.633887671244534213831e-2,
+        2.773706565840072979165e-2, -7.955933682494738320586e-4, -1.237799246653152231188e-3, 2.346584059160635244282e-4,
+        -1.397148517476170440917e-5
+    )
+
+    z = one(T)
+    while x >= 3.0
+        x -= one(T)
+        z *= x
+    end
+    while x < 2.0
+        z /= x
+        x += one(T)
+    end
+
+    x -= T(2)
+    p = evalpoly(x, P)
+    q = evalpoly(x, Q)
+    return z * p / q
+end
diff --git a/src/math_constants.jl b/src/math_constants.jl
@@ -1,18 +1,23 @@
 const ONEOSQPI(::Type{BigFloat}) = big"5.6418958354775628694807945156077258584405E-1"
 const TWOOPI(::Type{BigFloat}) = big"6.3661977236758134307553505349005744813784E-1"
-#const SQPIO2(::Type{BigFloat}) = big"1.253314137315500251207882642405522626503493370304969158314961788171146827303924"
-#const SQ1O2PI(::Type{BigFloat}) = big"0.3989422804014326779399460599343818684758586311649346576659258296706579258993008"
-#const SQ2OPI(::Type{BigFloat}) = big"0.7978845608028653558798921198687637369517172623298693153318516593413158517986017"
-#const PIO4(::Type{BigFloat}) = big"0.7853981633974483096156608458198757210492923498437764552437361480769541015715495"
+const PIO2(::Type{BigFloat}) = big"1.570796326794896619231321691639751442098584699687552910487472296153908203143099"
+const SQPIO2(::Type{BigFloat}) = big"1.253314137315500251207882642405522626503493370304969158314961788171146827303924"
+const SQ1O2PI(::Type{BigFloat}) = big"0.3989422804014326779399460599343818684758586311649346576659258296706579258993008"
+const SQ2OPI(::Type{BigFloat}) = big"0.7978845608028653558798921198687637369517172623298693153318516593413158517986017"
+const PIO4(::Type{BigFloat}) = big"0.7853981633974483096156608458198757210492923498437764552437361480769541015715495"
+const SQ2O2(::Type{BigFloat}) = big"0.707106781186547524400844362104849039284835937688474036588339868995366239231051"
 
 const PIO4(::Type{Float64}) = .78539816339744830962
+const PIO2(::Type{Float64}) = 1.5707963267948966
 const THPIO4(::Type{Float64}) = 2.35619449019234492885
 const SQ2OPI(::Type{Float64}) = .79788456080286535588
 const TWOOPI(::Type{Float64}) = 0.6366197723675814
 const SQPIO2(::Type{Float64}) = 1.25331413731550025
 const SQ1O2PI(::Type{Float64}) = 0.3989422804014327
-
+const SQ2PI(::Type{Float64}) = 2.5066282746310007
+const SQ2O2(::Type{Float64}) = 0.7071067811865476
 
 const PIO4(::Type{Float32}) = 0.78539816339744830962f0
 const TWOOPI(::Type{Float32}) = 0.636619772367581343075535f0
 const THPIO4(::Type{Float32}) = 2.35619449019234492885f0
+const SQ2O2(::Type{Float32}) = 0.7071067811865476f0
diff --git a/src/recurrence.jl b/src/recurrence.jl
@@ -32,4 +32,31 @@ end
         k1 = k2
     end
     return k2, k0
-end
+end
+
+@inline function besselj_up_recurrence(x, jnu, jnum1, nu_start, nu_end)
+    jnup1 = jnum1
+    x2 = 2 / x
+    for n in nu_start:nu_end
+        a = x2 * n
+        jnup1 = muladd(a, jnu, -jnum1)
+        jnum1 = jnu
+        jnu = jnup1
+    end
+    return jnup1, jnum1
+end
+@inline function besselj_down_recurrence(x, jnu, jnup1, arr)
+    # arr is the index of Bessel orders arr = nu_start:-1:nu_end
+    # but needs special care if nu is a decimal
+    # use  v = nu + nu_shift
+    # arr = range(v, stop = nu, length = nu_shift + 1)
+    jnum1 = jnup1
+    x2 = 2 / x
+    for n in arr
+        a = x2 * n
+        jnum1 = muladd(a, jnu, -jnup1)
+        jnup1 = jnu
+        jnu = jnum1
+    end
+    return jnum1, jnup1
+end
diff --git a/test/besselj_test.jl b/test/besselj_test.jl
@@ -71,8 +71,43 @@ j1_32 = besselj1.(Float32.(x))
 @test besselj1(-80.0) ≈ SpecialFunctions.besselj1(-80.0)
 
 ## Tests for besselj 
-# note this is not complete just a simple test
-# this needs work and removing for now
 
-#@test besselj(3, 1.0) ≈ SpecialFunctions.besselj(3, 1.0)
-#@test besselj(-5, 6.1) ≈ SpecialFunctions.besselj(-5, 6.1)
+#=
+Notes
+    - power series shows larger error when nu is large (146) and x is small (1.46)
+    - asymptotic expansion shows larger error when nu is large or x is large
+=#
+
+## test all numbers and orders for 0<nu<100
+x = [0.05, 0.1, 0.2, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.92, 0.95, 0.97, 0.99, 1.0, 1.01, 1.05]
+nu = [2, 4, 6, 10, 15, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100]
+for v in nu, xx in x
+    xx *= v
+    sf = SpecialFunctions.besselj(BigFloat(v), BigFloat(xx))
+    @test isapprox(Bessels._besselj(v, xx), Float64(sf), rtol=5e-14)
+end
+
+# test half orders (SpecialFunctions does not give big float precision)
+# The SpecialFunctions implementation is only accurate to about 1e-11 - 1e-13
+
+x = [0.05, 0.1, 0.2, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.92, 0.95, 0.97, 0.99, 1.0, 1.01, 1.05, 1.08, 1.1, 1.2, 1.4, 1.5, 1.6, 1.8, 2.0, 2.5, 3.0]
+nu = [0.1, 0.4567, 0.8123, 1.5, 2.5, 4.1234, 6.8, 12.3, 18.9, 28.2345, 38.1235, 51.23, 72.23435, 80.5, 98.5, 104.2]
+for v in nu, xx in x
+    xx *= v
+    @test isapprox(Bessels._besselj(v, xx), SpecialFunctions.besselj(v, xx), rtol=1e-12)
+end
+
+## test large orders
+nu = [150, 165.2, 200.0, 300.0, 500.0, 1000.0, 5000.2, 10000.0, 50000.0]
+x = [0.2, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.92,0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99,0.995, 0.999, 1.0, 1.01, 1.05, 1.08, 1.1, 1.2]
+for v in nu, xx in x
+    xx *= v
+    @test isapprox(Bessels._besselj(v, xx), SpecialFunctions.besselj(v, xx), rtol=5e-11)
+end
+
+## test large arguments
+@test isapprox(Bessels._besselj(10.0, 150.0), SpecialFunctions.besselj(10.0, 150.0), rtol=1e-12)
+
+# test BigFloat for single point
+@test isapprox(Bessels._besselj(big"2000", big"1500.0"), SpecialFunctions.besselj(big"2000", big"1500"), rtol=5e-20)
+@test isapprox(Bessels._besselj(big"20", big"1500.0"), SpecialFunctions.besselj(big"20", big"1500"), rtol=5e-20)