Merge pull request #14 from heltonmc/J0_medium_args

oscardssmith · web-flow · commit f772ccf7e1f3 · 2022-02-26T13:10:17.000-05:00
besselj0 and friends for medium arguments
diff --git a/README.md b/README.md
@@ -45,10 +45,10 @@ We compare the relative [speed](https://github.com/heltonmc/Bessels.jl/blob/mast
 
 | function | `Float32` | `Float64`
 | ------------- | ------------- | ------------- |
-| besselj0  | 1.7x  | 1.8x
-| besselj1  | 1.7x | 1.9x 
-| bessely0  | 1.9x  | 1.8x
-| bessely1  | 1.7x  | 1.7x
+| besselj0  | 1.7x  | 3.1x
+| besselj1  | 1.7x | 3.0x 
+| bessely0  | 1.9x  | 2.7x
+| bessely1  | 1.7x  | 2.7x
 | besseli0  | 26x  | 13.2x
 | besseli1  | 22x  | 13.9x
 | besseli(20, x)  |   5.4x   | 2.1x  |
diff --git a/src/Bessels.jl b/src/Bessels.jl
@@ -2,7 +2,6 @@ module Bessels
 
 export besselj0
 export besselj1
-export besselj
 
 export bessely0
 export bessely1
diff --git a/src/besselj.jl b/src/besselj.jl
@@ -10,13 +10,13 @@
 #    Polynomial coefficients are from [1] which is based on [2]
 #    For tiny arugments the power series expansion is used.
 #
-#    Branch 2: 5.0 < x < 75.0
+#    Branch 2: 5.0 < x < 25.0
 #              besselj0 = sqrt(2/(pi*x))*(cos(x - pi/4)*R7(x) - sin(x - pi/4)*R8(x))
 #    Hankel's asymptotic expansion is used
 #    where R7 and R8 are rational functions (Pn(x)/Qn(x)) of degree 7 and 8 respectively
 #    See section 4 of [3] for more details and [1] for coefficients of polynomials
 # 
-#   Branch 3: x >= 75.0
+#   Branch 3: x >= 25.0
 #              besselj0 = sqrt(2/(pi*x))*beta(x)*(cos(x - pi/4 - alpha(x))
 #   See modified expansions given in [3]. Exact coefficients are used
 #
@@ -28,7 +28,14 @@
 # [3] Harrison, John. "Fast and accurate Bessel function computation." 
 #     2009 19th IEEE Symposium on Computer Arithmetic. IEEE, 2009.
 #
-function besselj0(x::T) where T
+
+"""
+    besselj0(x::T) where T <: Union{Float32, Float64}
+
+Bessel function of the first kind of order zero, ``J_0(x)``.
+"""
+function besselj0(x::Float64)
+    T = Float64
     x = abs(x)
     isinf(x) && return zero(x)
 
@@ -42,7 +49,7 @@ function besselj0(x::T) where T
         p = (z - DR1) * (z - DR2)
         p = p * evalpoly(z, RP_j0(T)) / evalpoly(z, RQ_j0(T))
         return p
-    elseif x < 75.0
+    elseif x < 25.0
         w = 5.0 / x
         q = 25.0 / (x * x)
 
@@ -53,14 +60,18 @@ function besselj0(x::T) where T
         p = p * sc[2] - w * q * sc[1]
         return p * SQ2OPI(T) / sqrt(x)
     else
+        if x < 120.0
+            p = (one(T), -1/16, 53/512, -4447/8192, 3066403/524288, -896631415/8388608, 796754802993/268435456, -500528959023471/4294967296)
+            q = (-1/8, 25/384, -1073/5120, 375733/229376, -55384775/2359296, 24713030909/46137344, -7780757249041/436207616)
+        else
+            p = (one(T), -1/16, 53/512, -4447/8192)
+            q = (-1/8, 25/384, -1073/5120, 375733/229376)
+        end
         xinv = inv(x)
         x2 = xinv*xinv
 
-        p = (one(T), -1/16, 53/512, -4447/8192, 5066403/524288)
         p = evalpoly(x2, p)
         a = SQ2OPI(T) * sqrt(xinv) * p
-
-        q = (-1/8, 25/384, -1073/5120, 375733/229376, -55384775/2359296)
         xn = muladd(xinv, evalpoly(x2, q), - PIO4(T))
 
         # the following computes b = cos(x + xn) more accurately
@@ -93,6 +104,11 @@ function besselj0(x::Float32)
     end
 end
 
+"""
+    besselj1(x::T) where T <: Union{Float32, Float64}
+
+Bessel function of the first kind of order one, ``J_1(x)``.
+"""
 function besselj1(x::Float64)
     T = Float64
     x = abs(x)
@@ -103,7 +119,7 @@ function besselj1(x::Float64)
         w = evalpoly(z, RP_j1(T)) / evalpoly(z, RQ_j1(T))
         w = w * x * (z - 1.46819706421238932572e1) * (z - 4.92184563216946036703e1)
         return w
-    elseif x < 75.0
+    elseif x < 25.0
         w = 5.0 / x
         z = w * w
         p = evalpoly(z, PP_j1(T)) / evalpoly(z, PQ_j1(T))
@@ -113,14 +129,18 @@ function besselj1(x::Float64)
         p = p * sc[2] - w * q * sc[1]
         return p * SQ2OPI(T) / sqrt(x)
     else
+        if x < 120.0
+            p = (one(T), 3/16, -99/512, 6597/8192, -4057965/524288, 1113686901/8388608, -951148335159/268435456, 581513783771781/4294967296) 
+            q = (3/8, -21/128, 1899/5120, -543483/229376, 8027901/262144, -30413055339/46137344, 9228545313147/436207616)
+        else
+            p = (one(T), 3/16, -99/512, 6597/8192)
+            q = (3/8, -21/128, 1899/5120, -543483/229376)
+        end
         xinv = inv(x)
         x2 = xinv*xinv
 
-        p = (one(T), 3/16, -99/512, 6597/8192, -4057965/524288)
         p = evalpoly(x2, p)
         a = SQ2OPI(T) * sqrt(xinv) * p
-
-        q = (3/8, -21/128, 1899/5120, -543483/229376, 8027901/262144)
         xn = muladd(xinv, evalpoly(x2, q), - 3 * PIO4(T))
 
         # the following computes b = cos(x + xn) more accurately
@@ -129,7 +149,6 @@ function besselj1(x::Float64)
         return a * b
     end
 end
-
 function besselj1(x::Float32)
     x = abs(x)
     isinf(x) && return zero(x)
diff --git a/src/bessely.jl b/src/bessely.jl
@@ -10,13 +10,13 @@
 #    Polynomial coefficients are from [1] which is based on [2].
 #    For tiny arugments the power series expansion is used.
 #
-#    Branch 2: 5.0 < x < 75.0
+#    Branch 2: 5.0 < x < 25.0
 #              bessely0 = sqrt(2/(pi*x))*(sin(x - pi/4)*R7(x) - cos(x - pi/4)*R8(x))
 #    Hankel's asymptotic expansion is used
 #    where R7 and R8 are rational functions (Pn(x)/Qn(x)) of degree 7 and 8 respectively
 #    See section 4 of [3] for more details and [1] for coefficients of polynomials
 # 
-#   Branch 3: x >= 75.0
+#   Branch 3: x >= 25.0
 #              bessely0 = sqrt(2/(pi*x))*beta(x)*(sin(x - pi/4 - alpha(x))
 #   See modified expansions given in [3]. Exact coefficients are used.
 #
@@ -28,6 +28,12 @@
 # [3] Harrison, John. "Fast and accurate Bessel function computation." 
 #     2009 19th IEEE Symposium on Computer Arithmetic. IEEE, 2009.
 #
+
+"""
+    bessely0(x::T) where T <: Union{Float32, Float64}
+
+Bessel function of the second kind of order zero, ``Y_0(x)``.
+"""
 function bessely0(x::T) where T <: Union{Float32, Float64}
     if x <= zero(x)
         if iszero(x)
@@ -42,29 +48,33 @@ function bessely0(x::T) where T <: Union{Float32, Float64}
 end
 function _bessely0_compute(x::Float64)
     T = Float64
-    if x <= 5
+    if x <= 5.0
         z = x * x
         w = evalpoly(z, YP_y0(T)) / evalpoly(z, YQ_y0(T))
         w += TWOOPI(T) * log(x) * besselj0(x)
         return w
-    elseif x < 75.0
-        w = T(5) / x
-        z = w*w
+    elseif x < 25.0
+        w = 5.0 / x
+        z = w * w
         p = evalpoly(z, PP_y0(T)) / evalpoly(z, PQ_y0(T))
         q = evalpoly(z, QP_y0(T)) / evalpoly(z, QQ_y0(T))
         xn = x - PIO4(T)
         sc = sincos(xn)
         p = p * sc[1] + w * q * sc[2]
         return p * SQ2OPI(T) / sqrt(x)
     else
+        if x < 120.0
+            p = (one(T), -1/16, 53/512, -4447/8192, 3066403/524288, -896631415/8388608, 796754802993/268435456, -500528959023471/4294967296)
+            q = (-1/8, 25/384, -1073/5120, 375733/229376, -55384775/2359296, 24713030909/46137344, -7780757249041/436207616)
+        else
+            p = (one(T), -1/16, 53/512, -4447/8192)
+            q = (-1/8, 25/384, -1073/5120, 375733/229376)
+        end
         xinv = inv(x)
         x2 = xinv*xinv
 
-        p = (one(T), -1/16, 53/512, -4447/8192, 5066403/524288)
         p = evalpoly(x2, p)
         a = SQ2OPI(T) * sqrt(xinv) * p
-
-        q = (-1/8, 25/384, -1073/5120, 375733/229376, -55384775/2359296)
         xn = muladd(xinv, evalpoly(x2, q), - PIO4(T))
 
         # the following computes b = sin(x + xn) more accurately
@@ -91,6 +101,12 @@ function _bessely0_compute(x::Float32)
         return p
     end
 end
+
+"""
+    bessely1(x::T) where T <: Union{Float32, Float64}
+
+Bessel function of the second kind of order one, ``Y_1(x)``.
+"""
 function bessely1(x::T) where T <: Union{Float32, Float64}
     if x <= zero(x)
         if iszero(x)
@@ -103,16 +119,15 @@ function bessely1(x::T) where T <: Union{Float32, Float64}
     end
     return _bessely1_compute(x)
 end
-
 function _bessely1_compute(x::Float64)
     T = Float64
     if x <= 5
         z = x * x
         w = x * (evalpoly(z, YP_y1(T)) / evalpoly(z, YQ_y1(T)))
         w += TWOOPI(T) * (besselj1(x) * log(x) - inv(x))
         return w
-    elseif x < 75.0
-        w = T(5) / x
+    elseif x < 25.0
+        w = 5.0 / x
         z = w * w
         p = evalpoly(z, PP_j1(T)) / evalpoly(z, PQ_j1(T))
         q = evalpoly(z, QP_j1(T)) / evalpoly(z, QQ_j1(T))
@@ -121,14 +136,18 @@ function _bessely1_compute(x::Float64)
         p = p * sc[1] + w * q * sc[2]
         return p * SQ2OPI(T) / sqrt(x)
     else
+        if x < 120.0
+            p = (one(T), 3/16, -99/512, 6597/8192, -4057965/524288, 1113686901/8388608, -951148335159/268435456, 581513783771781/4294967296) 
+            q = (3/8, -21/128, 1899/5120, -543483/229376, 8027901/262144, -30413055339/46137344, 9228545313147/436207616)
+        else
+            p = (one(T), 3/16, -99/512, 6597/8192)
+            q = (3/8, -21/128, 1899/5120, -543483/229376)
+        end
         xinv = inv(x)
         x2 = xinv*xinv
 
-        p = (one(T), 3/16, -99/512, 6597/8192, -4057965/524288)
         p = evalpoly(x2, p)
         a = SQ2OPI(T) * sqrt(xinv) * p
-
-        q = (3/8, -21/128, 1899/5120, -543483/229376, 8027901/262144)
         xn = muladd(xinv, evalpoly(x2, q), - 3 * PIO4(T))
 
         # the following computes b = sin(x + xn) more accurately
@@ -137,7 +156,6 @@ function _bessely1_compute(x::Float64)
         return a * b
     end
 end
-
 function _bessely1_compute(x::Float32)
     T = Float32
     if x <= 2.0f0
diff --git a/src/misc.jl b/src/misc.jl
@@ -3,7 +3,7 @@
 # written by @oscardssmith
 function cos_sum(x, xn)
     s = x + xn
-    n, r = Base.Math.rem_pio2_kernel(s)
+    n, r = reduce_pi02_med(s)
     lo = r.lo - ((s - x) - xn)
     hi = r.hi + lo
     y = Base.Math.DoubleFloat64(hi, r.hi-hi+lo)
@@ -21,7 +21,7 @@ end
 # function to more accurately compute sin(x + xn)
 function sin_sum(x, xn)
     s = x + xn
-    n, r = Base.Math.rem_pio2_kernel(s)
+    n, r = reduce_pi02_med(s)
     lo = r.lo - ((s - x) - xn)
     hi = r.hi + lo
     y = Base.Math.DoubleFloat64(hi, r.hi-hi+lo)
@@ -36,3 +36,12 @@ function sin_sum(x, xn)
         return -Base.Math.cos_kernel(y)
     end
 end
+@inline function reduce_pi02_med(x::Float64)
+    pio2_1 = 1.57079632673412561417e+00
+
+    fn = round(x*(2/pi))
+    r  = muladd(-fn, pio2_1, x)
+    w  = fn * 6.07710050650619224932e-11
+    y = r-w
+    return unsafe_trunc(Int, fn), Base.Math.DoubleFloat64(y, (r-y)-w)
+end