Skip to content

Commit 7a614a8

Browse files
committed
add F32 for besselj
1 parent e9a6ab7 commit 7a614a8

File tree

5 files changed

+162
-107
lines changed

5 files changed

+162
-107
lines changed

src/Float128/besselj.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
function besselj0(x::BigFloat)
1+
function _besselj0(x::BigFloat)
22
x = abs(x)
33
T = eltype(x)
44
if iszero(x)

src/U_polynomials.jl

Lines changed: 84 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
# [2] http://dlmf.nist.gov/10.41.E9
1515
# [3] https://dlmf.nist.gov/10.41
1616

17+
#####
18+
##### Large order expansion for J_{nu}(x) and Y_{nu}(x) and v > x
19+
#####
20+
1721
"""
1822
besseljy_debye(nu, x::T)
1923
@@ -22,8 +26,7 @@ Returns both besselj and bessely
2226
"""
2327
function besseljy_debye(v, x::T) where T
2428
S = promote_type(T, Float64)
25-
x = S(x)
26-
v = S(v)
29+
v, x = S(v), S(x)
2730

2831
vmx = (v + x) * (v - x)
2932
vs = sqrt(vmx)
@@ -37,13 +40,46 @@ function besseljy_debye(v, x::T) where T
3740
p2 = v^2 / vmx
3841

3942
Uk_Jn, Uk_Yn = Uk_poly_Jn(p, v, p2, T(x))
40-
4143
return coef_Jn * Uk_Jn, coef_Yn * Uk_Yn
4244
end
4345

44-
besseljy_debye_cutoff(nu, x::Float64) = nu > 2.0 + 1.00035*x + Base.Math._approx_cbrt(Float64(302.681)*x) && nu > 15
45-
besseljy_debye_cutoff(nu, x::Float32) = nu > 10.0 + 1.006*x + Base.Math._approx_cbrt(135.0*x) && nu > 10
46-
besseljy_debye_cutoff(nu, x) = nu > 16.0 + 1.0012*x + Base.Math._approx_cbrt(Float64(27.91)*x) && nu > 40
46+
# Cutoffs for besseljy_debye expansions
47+
# regions where the debye expansions for large orders v > x are valid
48+
# determined by fitting a curve a + bx + (cx)^(1/3) to where debye expansions provide desired precision
49+
50+
# Float32
51+
besseljy_debye_fit(x::Float32) = 2.5f0 + 1.00035f0*x + Base.Math._approx_cbrt(360.0f0*x)
52+
besseljy_debye_cutoff(nu, x::Float32) = nu > besseljy_debye_fit(x) && nu > 6
53+
54+
# Float64
55+
besseljy_debye_fit(x::Float64) = 2.0 + 1.00035*x + Base.Math._approx_cbrt(302.681*x)
56+
besseljy_debye_cutoff(nu, x::Float64) = nu > besseljy_debye_fit(x) && nu > 15
57+
58+
# Float128 - provide roughly ~1e-35 precision
59+
besseljy_debye_fit(x) = 16.0 + 1.0012*x + Base.Math._approx_cbrt(Float64(27.91*x))
60+
besseljy_debye_cutoff(nu, x) = nu > besseljy_debye_fit(x) && nu > 40
61+
62+
#####
63+
##### Debye large order expansion coefficients
64+
#####
65+
66+
function Uk_poly_Jn(p, v, p2, x::Float64)
67+
if v > 5.0 + 1.00033*x + Base.Math._approx_cbrt(1427.61*x)
68+
return Uk_poly10(p, v, p2)
69+
else
70+
return Uk_poly20(p, v, p2)
71+
end
72+
end
73+
Uk_poly_Jn(p, v, p2, x::Float32) = Uk_poly5(p, v, p2)
74+
75+
Uk_poly_In(p, v, p2, ::Type{Float32}) = Uk_poly5(p, v, p2)[1]
76+
Uk_poly_In(p, v, p2, ::Type{Float64}) = Uk_poly10(p, v, p2)[1]
77+
Uk_poly_Kn(p, v, p2, ::Type{Float32}) = Uk_poly5(p, v, p2)[2]
78+
Uk_poly_Kn(p, v, p2, ::Type{Float64}) = Uk_poly10(p, v, p2)[2]
79+
80+
#####
81+
##### Large order expansion for Hankel functions and x > v
82+
#####
4783

4884
"""
4985
hankel_debye(nu, x::T)
@@ -53,8 +89,7 @@ Return the Hankel function H(nu, x) = J(nu, x) + Y(nu, x)*im
5389
"""
5490
function hankel_debye(v, x::T) where T
5591
S = promote_type(T, Float64)
56-
x = S(x)
57-
v = S(v)
92+
v, x = S(v), S(x)
5893

5994
vmx = abs((v + x) * (x - v))
6095
vs = sqrt(vmx)
@@ -67,22 +102,23 @@ function hankel_debye(v, x::T) where T
67102
p2 = v^2 / vmx
68103

69104
_, Uk_Yn = Uk_poly_Hankel(p*im, v, -p2, T(x))
70-
71105
return coef_Yn * Uk_Yn
72106
end
73107

74-
hankel_debye_cutoff(nu, x::Union{Float32, Float64}) = nu < 0.2 + x + Base.Math._approx_cbrt(-411*x)
75-
hankel_debye_cutoff(nu, x) = nu < -2 + 0.9987*x + Base.Math._approx_cbrt(-21570.3*Float64(x))
108+
# Cutoffs for hankel_debye expansions
109+
# regions where the debye expansions for large orders x > v are valid
110+
# determined by fitting a curve a + x + (bx)^(1/3) to where debye expansions provide desired precision
76111

77-
function Uk_poly_Jn(p, v, p2, x::T) where T <: Float64
78-
if v > 5.0 + 1.00033*x + Base.Math._approx_cbrt(1427.61*x)
79-
return Uk_poly10(p, v, p2)
80-
else
81-
return Uk_poly20(p, v, p2)
82-
end
83-
end
112+
# Float32
113+
hankel_debye_fit(x::Float32) = -3.5f0 + x + Base.Math._approx_cbrt(-411.0f0*x)
114+
hankel_debye_cutoff(nu, x::Float32) = nu < hankel_debye_fit(x)
115+
116+
# Float64
117+
hankel_debye_fit(x::Float64) = 0.2 + x + Base.Math._approx_cbrt(-411.0*x)
118+
hankel_debye_cutoff(nu, x::Float64) = nu < hankel_debye_fit(x)
84119

85-
Uk_poly_Jn(p, v, p2, x::Float32) = Uk_poly10(p, v, p2)
120+
# Float128
121+
#hankel_debye_cutoff(nu, x) = nu < -2 + 0.9987*x + Base.Math._approx_cbrt(-21570.3*Float64(x))
86122

87123
function Uk_poly_Hankel(p, v, p2, x::T) where T <: Float64
88124
if v < 5.0 + 0.998*x + Base.Math._approx_cbrt(-1171.34*x)
@@ -93,35 +129,11 @@ function Uk_poly_Hankel(p, v, p2, x::T) where T <: Float64
93129
end
94130

95131
Uk_poly_Hankel(p, v, p2, x::Float32) = Uk_poly5(p, v, p2)
96-
97132
Uk_poly_Hankel(p, v, p2, x) = Uk_poly_Jn(p, v, p2, x)
98133

99-
Uk_poly_In(p, v, p2, ::Type{Float32}) = Uk_poly5(p, v, p2)[1]
100-
Uk_poly_In(p, v, p2, ::Type{Float64}) = Uk_poly10(p, v, p2)[1]
101-
Uk_poly_Kn(p, v, p2, ::Type{Float32}) = Uk_poly5(p, v, p2)[2]
102-
Uk_poly_Kn(p, v, p2, ::Type{Float64}) = Uk_poly10(p, v, p2)[2]
103-
104-
@inline function split_evalpoly(x, P)
105-
# polynomial P must have an even number of terms
106-
N = length(P)
107-
xx = x*x
108-
109-
out = P[end]
110-
out2 = P[end-1]
111-
112-
for i in N-2:-2:2
113-
out = muladd(xx, out, P[i])
114-
out2 = muladd(xx, out2, P[i-1])
115-
end
116-
if iszero(rem(N, 2))
117-
out *= x
118-
return out2 - out, out2 + out
119-
else
120-
out = muladd(xx, out, P[1])
121-
out2 *= x
122-
return out - out2, out2 + out
123-
end
124-
end
134+
#####
135+
##### U - polynomials
136+
#####
125137

126138
function Uk_poly5(p, v, p2)
127139
u0 = 1.0
@@ -206,6 +218,32 @@ function Uk_poly_Jn(p, v, p2, x::T) where T
206218
Poly = (u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15, u16, u17, u18, u19, u20)
207219
return split_evalpoly(-p/v, Poly)
208220
end
221+
222+
# performs a second order horner scheme for polynomial evaluation
223+
# computes the even and odd coefficients of the polynomial independently within a loop to reduce latency
224+
# splits the polynomial to compute both 1 + ax + bx^2 + cx^3 and 1 - ax + bx^2 - cx^3 ....
225+
@inline function split_evalpoly(x, P)
226+
# polynomial P must have an even number of terms
227+
N = length(P)
228+
xx = x*x
229+
230+
out = P[end]
231+
out2 = P[end-1]
232+
233+
for i in N-2:-2:2
234+
out = muladd(xx, out, P[i])
235+
out2 = muladd(xx, out2, P[i-1])
236+
end
237+
if iszero(rem(N, 2))
238+
out *= x
239+
return out2 - out, out2 + out
240+
else
241+
out = muladd(xx, out, P[1])
242+
out2 *= x
243+
return out - out2, out2 + out
244+
end
245+
end
246+
209247
#=
210248
u0 = one(x)
211249
u1 = p / 24 * (3 - 5*p^2) * -1 / v

src/asymptotics.jl

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
# Cutoffs were determined manually for each input type.
1818
# AbstractFloat cutoff gives relative error roughly for quadruple precision accuracy (1e-35)
1919

20-
#besseljy_large_argument_min(::Type{Float32}) = 15.0f0
21-
besseljy_large_argument_min(::Type{Float64}) = 20.0
22-
besseljy_large_argument_min(::Type{T}) where T <: AbstractFloat = 40.0
20+
besseljy_large_argument_min(x::Float32) = 15.0f0
21+
besseljy_large_argument_min(x::Float64) = 20.0
22+
besseljy_large_argument_min(x) = 40.0
2323

24-
besseljy_large_argument_cutoff(v, x::Float32) = (x > 1.2f0*v && x > besseljy_large_argument_min(Float32))
25-
besseljy_large_argument_cutoff(v, x::Float64) = (x > 1.65*v && x > besseljy_large_argument_min(Float64))
26-
besseljy_large_argument_cutoff(v, x::T) where T = (x > 4*v && x > besseljy_large_argument_min(T))
24+
besseljy_large_argument_cutoff(v, x::Float32) = (x > 1.2f0*v && x > besseljy_large_argument_min(x))
25+
besseljy_large_argument_cutoff(v, x::Float64) = (x > 1.65*v && x > besseljy_large_argument_min(x))
26+
besseljy_large_argument_cutoff(v, x) = (x > 4*v && x > besseljy_large_argument_min(x))
2727

2828
"""
2929
besseljy_large_argument(nu, x::T)
@@ -32,27 +32,24 @@ Asymptotic expansions for large arguments valid when x > 1.6*nu and x > 20.0.
3232
Returns both (besselj(nu, x), bessely(nu, x)).
3333
"""
3434
function besseljy_large_argument(v, x::T) where T
35-
S = promote_type(T, Float64)
36-
x = S(x)
37-
v = S(v)
38-
# gives both (besselj, bessely) for x > 1.6*v
3935
α, αp = _α_αp_asymptotic(v, x)
36+
S = promote_type(T, Float64)
37+
v, x = S(v), S(x)
4038
b = SQ2OPI(S) / sqrt(αp * x)
4139

4240
# we need to calculate sin(x - v*pi/2 - pi/4) and cos(x - v*pi/2 - pi/4)
4341
# For improved accuracy this is expanded using the formula for sin(x+y+z)
44-
4542
s, c = sincos(PIO2(S) * v)
46-
Sα, Cα = sincos(α)
43+
sα, cα = sincos(α)
4744

4845
CMS = c - s
4946
CPS = c + s
5047

51-
s1 = CMS *
52-
s2 = CPS *
48+
s1 = CMS *
49+
s2 = CPS *
5350

54-
s3 = CMS *
55-
s4 = CPS *
51+
s3 = CMS *
52+
s4 = CPS *
5653

5754
return SQ2O2(S) * (s1 + s2) * b, SQ2O2(S) * (s3 - s4) * b
5855
end
@@ -102,6 +99,7 @@ function _α_αp_asymptotic(v, x::Float64)
10299
end
103100
end
104101
function _α_αp_asymptotic(v, x::Float32)
102+
v, x = Float64(v), Float64(x)
105103
if x > 4*v
106104
return _α_αp_poly_5(v, x)
107105
elseif x > 1.8*v

0 commit comments

Comments
 (0)