14
14
# [2] http://dlmf.nist.gov/10.41.E9
15
15
# [3] https://dlmf.nist.gov/10.41
16
16
17
+ # ####
18
+ # #### Large order expansion for J_{nu}(x) and Y_{nu}(x) and v > x
19
+ # ####
20
+
17
21
"""
18
22
besseljy_debye(nu, x::T)
19
23
@@ -22,8 +26,7 @@ Returns both besselj and bessely
22
26
"""
23
27
function besseljy_debye (v, x:: T ) where T
24
28
S = promote_type (T, Float64)
25
- x = S (x)
26
- v = S (v)
29
+ v, x = S (v), S (x)
27
30
28
31
vmx = (v + x) * (v - x)
29
32
vs = sqrt (vmx)
@@ -37,13 +40,46 @@ function besseljy_debye(v, x::T) where T
37
40
p2 = v^ 2 / vmx
38
41
39
42
Uk_Jn, Uk_Yn = Uk_poly_Jn (p, v, p2, T (x))
40
-
41
43
return coef_Jn * Uk_Jn, coef_Yn * Uk_Yn
42
44
end
43
45
44
- besseljy_debye_cutoff (nu, x:: Float64 ) = nu > 2.0 + 1.00035 * x + Base. Math. _approx_cbrt (Float64 (302.681 )* x) && nu > 15
45
- besseljy_debye_cutoff (nu, x:: Float32 ) = nu > 10.0 + 1.006 * x + Base. Math. _approx_cbrt (135.0 * x) && nu > 10
46
- besseljy_debye_cutoff (nu, x) = nu > 16.0 + 1.0012 * x + Base. Math. _approx_cbrt (Float64 (27.91 )* x) && nu > 40
46
+ # Cutoffs for besseljy_debye expansions
47
+ # regions where the debye expansions for large orders v > x are valid
48
+ # determined by fitting a curve a + bx + (cx)^(1/3) to where debye expansions provide desired precision
49
+
50
+ # Float32
51
+ besseljy_debye_fit (x:: Float32 ) = 2.5f0 + 1.00035f0 * x + Base. Math. _approx_cbrt (360.0f0 * x)
52
+ besseljy_debye_cutoff (nu, x:: Float32 ) = nu > besseljy_debye_fit (x) && nu > 6
53
+
54
+ # Float64
55
+ besseljy_debye_fit (x:: Float64 ) = 2.0 + 1.00035 * x + Base. Math. _approx_cbrt (302.681 * x)
56
+ besseljy_debye_cutoff (nu, x:: Float64 ) = nu > besseljy_debye_fit (x) && nu > 15
57
+
58
+ # Float128 - provide roughly ~1e-35 precision
59
+ besseljy_debye_fit (x) = 16.0 + 1.0012 * x + Base. Math. _approx_cbrt (Float64 (27.91 * x))
60
+ besseljy_debye_cutoff (nu, x) = nu > besseljy_debye_fit (x) && nu > 40
61
+
62
+ # ####
63
+ # #### Debye large order expansion coefficients
64
+ # ####
65
+
66
+ function Uk_poly_Jn (p, v, p2, x:: Float64 )
67
+ if v > 5.0 + 1.00033 * x + Base. Math. _approx_cbrt (1427.61 * x)
68
+ return Uk_poly10 (p, v, p2)
69
+ else
70
+ return Uk_poly20 (p, v, p2)
71
+ end
72
+ end
73
+ Uk_poly_Jn (p, v, p2, x:: Float32 ) = Uk_poly5 (p, v, p2)
74
+
75
+ Uk_poly_In (p, v, p2, :: Type{Float32} ) = Uk_poly5 (p, v, p2)[1 ]
76
+ Uk_poly_In (p, v, p2, :: Type{Float64} ) = Uk_poly10 (p, v, p2)[1 ]
77
+ Uk_poly_Kn (p, v, p2, :: Type{Float32} ) = Uk_poly5 (p, v, p2)[2 ]
78
+ Uk_poly_Kn (p, v, p2, :: Type{Float64} ) = Uk_poly10 (p, v, p2)[2 ]
79
+
80
+ # ####
81
+ # #### Large order expansion for Hankel functions and x > v
82
+ # ####
47
83
48
84
"""
49
85
hankel_debye(nu, x::T)
@@ -53,8 +89,7 @@ Return the Hankel function H(nu, x) = J(nu, x) + Y(nu, x)*im
53
89
"""
54
90
function hankel_debye (v, x:: T ) where T
55
91
S = promote_type (T, Float64)
56
- x = S (x)
57
- v = S (v)
92
+ v, x = S (v), S (x)
58
93
59
94
vmx = abs ((v + x) * (x - v))
60
95
vs = sqrt (vmx)
@@ -67,22 +102,23 @@ function hankel_debye(v, x::T) where T
67
102
p2 = v^ 2 / vmx
68
103
69
104
_, Uk_Yn = Uk_poly_Hankel (p* im, v, - p2, T (x))
70
-
71
105
return coef_Yn * Uk_Yn
72
106
end
73
107
74
- hankel_debye_cutoff (nu, x:: Union{Float32, Float64} ) = nu < 0.2 + x + Base. Math. _approx_cbrt (- 411 * x)
75
- hankel_debye_cutoff (nu, x) = nu < - 2 + 0.9987 * x + Base. Math. _approx_cbrt (- 21570.3 * Float64 (x))
108
+ # Cutoffs for hankel_debye expansions
109
+ # regions where the debye expansions for large orders x > v are valid
110
+ # determined by fitting a curve a + x + (bx)^(1/3) to where debye expansions provide desired precision
76
111
77
- function Uk_poly_Jn (p, v, p2, x :: T ) where T <: Float64
78
- if v > 5.0 + 1.00033 * x + Base. Math. _approx_cbrt (1427.61 * x)
79
- return Uk_poly10 (p, v, p2 )
80
- else
81
- return Uk_poly20 (p, v, p2)
82
- end
83
- end
112
+ # Float32
113
+ hankel_debye_fit (x :: Float32 ) = - 3.5f0 + x + Base. Math. _approx_cbrt (- 411.0f0 * x)
114
+ hankel_debye_cutoff (nu, x :: Float32 ) = nu < hankel_debye_fit (x )
115
+
116
+ # Float64
117
+ hankel_debye_fit (x :: Float64 ) = 0.2 + x + Base . Math . _approx_cbrt ( - 411.0 * x)
118
+ hankel_debye_cutoff (nu, x :: Float64 ) = nu < hankel_debye_fit (x)
84
119
85
- Uk_poly_Jn (p, v, p2, x:: Float32 ) = Uk_poly10 (p, v, p2)
120
+ # Float128
121
+ # hankel_debye_cutoff(nu, x) = nu < -2 + 0.9987*x + Base.Math._approx_cbrt(-21570.3*Float64(x))
86
122
87
123
function Uk_poly_Hankel (p, v, p2, x:: T ) where T <: Float64
88
124
if v < 5.0 + 0.998 * x + Base. Math. _approx_cbrt (- 1171.34 * x)
@@ -93,35 +129,11 @@ function Uk_poly_Hankel(p, v, p2, x::T) where T <: Float64
93
129
end
94
130
95
131
Uk_poly_Hankel (p, v, p2, x:: Float32 ) = Uk_poly5 (p, v, p2)
96
-
97
132
Uk_poly_Hankel (p, v, p2, x) = Uk_poly_Jn (p, v, p2, x)
98
133
99
- Uk_poly_In (p, v, p2, :: Type{Float32} ) = Uk_poly5 (p, v, p2)[1 ]
100
- Uk_poly_In (p, v, p2, :: Type{Float64} ) = Uk_poly10 (p, v, p2)[1 ]
101
- Uk_poly_Kn (p, v, p2, :: Type{Float32} ) = Uk_poly5 (p, v, p2)[2 ]
102
- Uk_poly_Kn (p, v, p2, :: Type{Float64} ) = Uk_poly10 (p, v, p2)[2 ]
103
-
104
- @inline function split_evalpoly (x, P)
105
- # polynomial P must have an even number of terms
106
- N = length (P)
107
- xx = x* x
108
-
109
- out = P[end ]
110
- out2 = P[end - 1 ]
111
-
112
- for i in N- 2 : - 2 : 2
113
- out = muladd (xx, out, P[i])
114
- out2 = muladd (xx, out2, P[i- 1 ])
115
- end
116
- if iszero (rem (N, 2 ))
117
- out *= x
118
- return out2 - out, out2 + out
119
- else
120
- out = muladd (xx, out, P[1 ])
121
- out2 *= x
122
- return out - out2, out2 + out
123
- end
124
- end
134
+ # ####
135
+ # #### U - polynomials
136
+ # ####
125
137
126
138
function Uk_poly5 (p, v, p2)
127
139
u0 = 1.0
@@ -206,6 +218,32 @@ function Uk_poly_Jn(p, v, p2, x::T) where T
206
218
Poly = (u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15, u16, u17, u18, u19, u20)
207
219
return split_evalpoly (- p/ v, Poly)
208
220
end
221
+
222
+ # performs a second order horner scheme for polynomial evaluation
223
+ # computes the even and odd coefficients of the polynomial independently within a loop to reduce latency
224
+ # splits the polynomial to compute both 1 + ax + bx^2 + cx^3 and 1 - ax + bx^2 - cx^3 ....
225
+ @inline function split_evalpoly (x, P)
226
+ # polynomial P must have an even number of terms
227
+ N = length (P)
228
+ xx = x* x
229
+
230
+ out = P[end ]
231
+ out2 = P[end - 1 ]
232
+
233
+ for i in N- 2 : - 2 : 2
234
+ out = muladd (xx, out, P[i])
235
+ out2 = muladd (xx, out2, P[i- 1 ])
236
+ end
237
+ if iszero (rem (N, 2 ))
238
+ out *= x
239
+ return out2 - out, out2 + out
240
+ else
241
+ out = muladd (xx, out, P[1 ])
242
+ out2 *= x
243
+ return out - out2, out2 + out
244
+ end
245
+ end
246
+
209
247
#=
210
248
u0 = one(x)
211
249
u1 = p / 24 * (3 - 5*p^2) * -1 / v
0 commit comments