14
14
# [2] http://dlmf.nist.gov/10.41.E9
15
15
# [3] https://dlmf.nist.gov/10.41
16
16
17
+ # ####
18
+ # #### Large order expansion for J_{nu}(x) and Y_{nu}(x) and v > x
19
+ # ####
20
+
17
21
"""
18
22
besseljy_debye(nu, x::T)
19
23
20
24
Debey's asymptotic expansion for large order valid when v-> ∞ and x < v.
21
25
Returns both besselj and bessely
22
26
"""
23
- function besseljy_debye (v, x)
24
- T = eltype (x)
27
+ function besseljy_debye (v, x:: T ) where T
25
28
S = promote_type (T, Float64)
26
- x = S (x)
29
+ v, x = S (v), S (x)
27
30
28
31
vmx = (v + x) * (v - x)
29
32
vs = sqrt (vmx)
@@ -36,12 +39,47 @@ function besseljy_debye(v, x)
36
39
p = v / vs
37
40
p2 = v^ 2 / vmx
38
41
39
- Uk_Jn, Uk_Yn = Uk_poly_Jn (p, v, p2, x)
40
-
42
+ Uk_Jn, Uk_Yn = Uk_poly_Jn (p, v, p2, T (x))
41
43
return coef_Jn * Uk_Jn, coef_Yn * Uk_Yn
42
44
end
43
45
44
- besseljy_debye_cutoff (nu, x) = nu > 2.0 + 1.00035 * x + Base. Math. _approx_cbrt (Float64 (302.681 )* x) && nu > 15
46
+ # Cutoffs for besseljy_debye expansions
47
+ # regions where the debye expansions for large orders v > x are valid
48
+ # determined by fitting a curve a + bx + (cx)^(1/3) to where debye expansions provide desired precision
49
+
50
+ # Float32
51
+ besseljy_debye_fit (x:: Float32 ) = 2.5f0 + 1.00035f0 * x + 7.114f0 * Base. Math. _approx_cbrt (x)
52
+ besseljy_debye_cutoff (nu, x:: Float32 ) = nu > besseljy_debye_fit (x) && nu > 6
53
+
54
+ # Float64
55
+ besseljy_debye_fit (x:: Float64 ) = 2.0 + 1.00035 * x + 6.714 * Base. Math. _approx_cbrt (x)
56
+ besseljy_debye_cutoff (nu, x:: Float64 ) = nu > besseljy_debye_fit (x) && nu > 15
57
+
58
+ # Float128 - provide roughly ~1e-35 precision
59
+ # besseljy_debye_fit(x) = 16.0 + 1.0012*x + Base.Math._approx_cbrt(Float64(27.91*x))
60
+ # besseljy_debye_cutoff(nu, x) = nu > besseljy_debye_fit(x) && nu > 40
61
+
62
+ # ####
63
+ # #### Debye large order expansion coefficients
64
+ # ####
65
+
66
+ function Uk_poly_Jn (p, v, p2, x:: Float64 )
67
+ if v > 5.0 + 1.00033 * x + 11.26 * Base. Math. _approx_cbrt (x)
68
+ return Uk_poly10 (p, v, p2)
69
+ else
70
+ return Uk_poly20 (p, v, p2)
71
+ end
72
+ end
73
+ Uk_poly_Jn (p, v, p2, x:: Float32 ) = Uk_poly5 (p, v, p2)
74
+
75
+ Uk_poly_In (p, v, p2, :: Type{Float32} ) = Uk_poly5 (p, v, p2)[1 ]
76
+ Uk_poly_In (p, v, p2, :: Type{Float64} ) = Uk_poly10 (p, v, p2)[1 ]
77
+ Uk_poly_Kn (p, v, p2, :: Type{Float32} ) = Uk_poly5 (p, v, p2)[2 ]
78
+ Uk_poly_Kn (p, v, p2, :: Type{Float64} ) = Uk_poly10 (p, v, p2)[2 ]
79
+
80
+ # ####
81
+ # #### Large order expansion for Hankel functions and x > v
82
+ # ####
45
83
46
84
"""
47
85
hankel_debye(nu, x::T)
@@ -51,7 +89,7 @@ Return the Hankel function H(nu, x) = J(nu, x) + Y(nu, x)*im
51
89
"""
52
90
function hankel_debye (v, x:: T ) where T
53
91
S = promote_type (T, Float64)
54
- x = S (x)
92
+ v, x = S (v), S (x)
55
93
56
94
vmx = abs ((v + x) * (x - v))
57
95
vs = sqrt (vmx)
@@ -63,55 +101,39 @@ function hankel_debye(v, x::T) where T
63
101
p = v / vs
64
102
p2 = v^ 2 / vmx
65
103
66
- _, Uk_Yn = Uk_poly_Hankel (p* im, v, - p2, x)
67
-
104
+ _, Uk_Yn = Uk_poly_Hankel (p* im, v, - p2, T (x))
68
105
return coef_Yn * Uk_Yn
69
106
end
70
107
71
- hankel_debye_cutoff (nu, x) = nu < 0.2 + x + Base. Math. _approx_cbrt (- 411 * x)
108
+ # Cutoffs for hankel_debye expansions
109
+ # regions where the debye expansions for large orders x > v are valid
110
+ # determined by fitting a curve a + x + (bx)^(1/3) to where debye expansions provide desired precision
111
+
112
+ # Float32
113
+ hankel_debye_fit (x:: Float32 ) = - 3.5f0 + x + 7.435f0 * Base. Math. _approx_cbrt (- x)
114
+ hankel_debye_cutoff (nu, x:: Float32 ) = nu < hankel_debye_fit (x)
115
+
116
+ # Float64
117
+ hankel_debye_fit (x:: Float64 ) = 0.2 + x + 7.435 * Base. Math. _approx_cbrt (- x)
118
+ hankel_debye_cutoff (nu, x:: Float64 ) = nu < hankel_debye_fit (x)
119
+
120
+ # Float128
121
+ # hankel_debye_cutoff(nu, x) = nu < -2 + 0.9987*x + Base.Math._approx_cbrt(-21570.3*Float64(x))
72
122
73
- function Uk_poly_Jn (p, v, p2, x:: T ) where T <: Float64
74
- if v > 5.0 + 1.00033 * x + Base. Math. _approx_cbrt (1427.61 * x)
75
- return Uk_poly10 (p, v, p2)
76
- else
77
- return Uk_poly20 (p, v, p2)
78
- end
79
- end
80
123
function Uk_poly_Hankel (p, v, p2, x:: T ) where T <: Float64
81
- if v < 5.0 + 0.998 * x + Base. Math. _approx_cbrt (- 1171.34 * x)
124
+ if v < 5.0 + 0.998 * x + 10.541 * Base. Math. _approx_cbrt (- x)
82
125
return Uk_poly10 (p, v, p2)
83
126
else
84
127
return Uk_poly20 (p, v, p2)
85
128
end
86
129
end
87
- Uk_poly_Hankel (p, v, p2, x) = Uk_poly_Jn (p, v, p2, x:: BigFloat )
88
-
89
- Uk_poly_In (p, v, p2, :: Type{Float32} ) = Uk_poly5 (p, v, p2)[1 ]
90
- Uk_poly_In (p, v, p2, :: Type{Float64} ) = Uk_poly10 (p, v, p2)[1 ]
91
- Uk_poly_Kn (p, v, p2, :: Type{Float32} ) = Uk_poly5 (p, v, p2)[2 ]
92
- Uk_poly_Kn (p, v, p2, :: Type{Float64} ) = Uk_poly10 (p, v, p2)[2 ]
93
130
94
- @inline function split_evalpoly (x, P)
95
- # polynomial P must have an even number of terms
96
- N = length (P)
97
- xx = x* x
131
+ Uk_poly_Hankel (p, v, p2, x:: Float32 ) = Uk_poly5 (p, v, p2)
132
+ Uk_poly_Hankel (p, v, p2, x) = Uk_poly_Jn (p, v, p2, x)
98
133
99
- out = P[end ]
100
- out2 = P[end - 1 ]
101
-
102
- for i in N- 2 : - 2 : 2
103
- out = muladd (xx, out, P[i])
104
- out2 = muladd (xx, out2, P[i- 1 ])
105
- end
106
- if iszero (rem (N, 2 ))
107
- out *= x
108
- return out2 - out, out2 + out
109
- else
110
- out = muladd (xx, out, P[1 ])
111
- out2 *= x
112
- return out - out2, out2 + out
113
- end
114
- end
134
+ # ####
135
+ # #### U - polynomials
136
+ # ####
115
137
116
138
function Uk_poly5 (p, v, p2)
117
139
u0 = 1.0
@@ -169,7 +191,8 @@ function Uk_poly20(p, v, p2)
169
191
return split_evalpoly (- p/ v, Poly)
170
192
end
171
193
172
- function Uk_poly_Jn (p, v, p2, x:: T ) where T <: BigFloat
194
+ # implementation for arbitrary precision
195
+ function Uk_poly_Jn (p, v, p2, x:: T ) where T
173
196
u0 = one (T)
174
197
u1 = evalpoly (p2, (3 , - 5 )) / 24
175
198
u2 = evalpoly (p2, (81 , - 462 , 385 )) / 1152
@@ -192,14 +215,35 @@ function Uk_poly_Jn(p, v, p2, x::T) where T <: BigFloat
192
215
u19 = evalpoly(p2, (3761719809509744584195141470215239968860161850335693359375, -1694136104847790923543013061207680485991618397125797873046875, 131518243789528012257323287684549590712219590887856743595703125, -4179129511260217209133623104486559148268490002722848244991240625, 72088266652871136165181276891337618088740053757385007292240377500, -776773977214820033621339638022401758862209813648991378355261599500, 5677394779818071523358076045292335690018332546439714641415150037636, -29667822591847140970922062603154078948366431649274897672067206014580, 114800233558787974267088388638654159779991991716667850063043161177890, -336788945225975997668966486515468748933754942064781256509404101275850, 760599837839891632010677514872412397427959022966733699428711208643750, -1333776105497652608917805362422659621440699261908512326717363285968750, 1821026790520428617034899967974569000414933301802005578083458082187500, -1929493390007550512825649545883182667978851358936168551128507440937500, 1570570304135828069768211937927131121377451398878663909061910457812500, -963355744456233323886326422779275308259891167457840027230510476562500, 430744376085805585076333464506126217301082462279781595185335615234375, -132496442776420617057548516091451843431549350710841121872822607421875, 25067118709566753991500713640672585065184296412786618544560205078125, -2198870062242697718552694179006367110981078632700580574084228515625)) / 980570799589976236952265721984567958568960000000
193
216
u20 = evalpoly(p2, (24030618487110150352755402740028995969072485932314476318359375, -11984379509393886990049682627416290126645799829432886859195312500, 1028816773596376675865176501621547688509187479681533744365175781250, -36136687211104276266628386141898079997731413843884113675698994212500, 689368394712060288513879526213143279387995331221181709158682715671875, -8226054591925395046897310265711439061232478740113589342810359827971600, 66729727980314206345594148553023187689855688854650386958448499394886808, -388235990422199036481157075090292917209702371811112714609151030385239376, 1679621555358289731717827244171539003686077073065087747585589973854567950, -5539024239213671573375139503069183935283937579486175606155069574939719000, 14158993985687610069291256086138044030538819617545349322200892501730615500, -28351273454978996507857547213496220444080209057761228571819950144231575000, 44700502703654649774362294361156754154712937287563245669685024068473468750, -55504285315413734114196925709059066734450069228526494661323483726671250000, 53996017865021964397812742861916826936440170527545458081631955240159375000, -40677946447845849750014263157052100427921007881029648608310681741346250000, 23250401373415767366970579801426233116241475047289980901600427561701171875, -9744288621182737996606001891415854305391392962559389056448157541132812500, 2823768330714179467082676027577350697536971031741905897356560592675781250, -505537818270094147077012813306995849751437826286925078626556809570312500, 42128151522507845589751067775582987479286485523910423218879734130859375)) / 658943577324464031231922565173629668158341120000000
194
217
u21 = evalpoly(p2, (1990919576929585163761247434580873723897677550573731281207275390625, -1094071724893410272201314846735800345180930752601602224440504638671875, 103359845691236916270005420886293061281368263471845448279855946394531250, -3993558675585831919973605001813276532326292885934464373884268722794718750, 83832389176247515253189196480455786065824463879054932596114704927571390625, -1101977288759423115916484463959767795703317649005495798822550381533967842875, 9865413224996821913093061266347613112205666995211786544733850490740903131000, -63509799534443193918054738326913581450607131662586232327271654588312820660616, 305080179205137176095342856930260393560550505786421683990077008259370104309410, -1122099959965657526344757894103766710826629020929459670948834078441128579791750, 3217185258543282976637373169047731813093578126603884759856059695249999306047500, -7276835259402589085458442196686990645669654847254510877738804991391058411412500, 13076651508858985557227319801010895818335803028865299751194038965212274849406250, -18719023753854332443081892087572754119280558462994056787647908433841920235468750, 21307327225910629020600045595173860611314592374884901403059587980149276271875000, -19156728115567236234371593788102013666866274144599851347429312225076676478125000, 13430107219321888006291381503763318985372222835071804983658005207838642173828125, -7186150593017474773099947110903785965879266917528290917207712073663895771484375, 2834031566467842832851805860262261718160136985649155528263587796394390332031250, -776308737033643856044315139790308583914612827783322139063211433790569824218750, 131897976398031751060811874321878385924211075364673046295410087596954345703125, -10468093364923154846096180501736379835254847251164527483762705364837646484375)) / 5456052820246562178600318839637653652351064473600000000
195
- u22 = evalpoly (p2, (3.8335346613939443e12 , - 2.3109159761323565e15 , 2.3920280120269997e17 , - 1.0121818379942089e19 , 2.3275346258089414e20 , - 3.3544689122226785e21 , 3.297557757461478e22 , - 2.336107524486965e23 , 1.238524103792452e24 , - 5.0463598652544e24 , 1.6103128541137314e25 , - 4.077501349206541e25 , 8.26258535798955e25 , - 1.3459193994556415e26 , 1.7635713272326644e26 , - 1.8526731041549917e26 , 1.548092083577385e26 , - 1.0148048982766395e26 , 5.103920268388802e25 , - 1.9006807535664433e25 , 4.936185283790662e24 , - 7.980021228256559e23 , 6.04547062746709e22 ))
196
- u23 = - evalpoly (p2, (4.218971570284097e13 , - 2.778481101311081e16 , 3.1385283211499996e18 , - 1.4486387749510863e20 , 3.6341499869780876e21 , - 5.7179919065432055e22 , 6.144339925144987e23 , - 4.766924608251481e24 , 2.774466490672939e25 , - 1.2449342046124282e26 , 4.392130563430048e26 , - 1.2355529146787609e27 , 2.7982068996977173e27 , - 5.131998439010333e27 , 7.641216535678268e27 , - 9.228395023257356e27 , 8.999255845917453e27 , - 7.02322235515725e27 , 4.322773732100187e27 , - 2.050902994929233e27 , 7.234243234844319e26 , - 1.7860680966743495e26 , 2.753863007576946e25 , - 1.9955529040412654e24 ))
197
- u24 = evalpoly (p2, (4.8540146868529006e14 , - 3.4792991439250445e17 , 4.273207395701127e19 , - 2.1435653415108537e21 , 5.844687629283339e22 , - 1.0000750138961727e24 , 1.1699189691874474e25 , - 9.896648661695488e25 , 6.29370256208713e26 , - 3.0939194683063286e27 , 1.1998211967644424e28 , - 3.7252346341093444e28 , 9.358117764887965e28 , - 1.9153963148099324e29 , 3.206650343980748e29 , - 4.395132918078325e29 , 4.9215508698387624e29 , - 4.4775348387950634e29 , 3.277658265637452e29 , - 1.9012207767547338e29 , 8.536184882279286e28 , - 2.8599776383548e28 , 6.728957650918171e27 , - 9.916401268407057e26 , 6.886389769727123e25 ))
198
- u25 = - evalpoly (p2, (5.827244631566907e15 , - 4.5305357275125955e18 , 6.029638127487473e20 , - 3.2761234100445222e22 , 9.675654883193622e23 , - 1.7941040647617987e25 , 2.2764310713849358e26 , - 2.0914533474677945e27 , 1.4471195817119858e28 , - 7.757785573404132e28 , 3.2900927159291354e29 , - 1.1210232552135908e30 , 3.1034661143911036e30 , - 7.036055338636485e30 , 1.3128796688902614e31 , - 2.0208792587851872e31 , 2.5653099826522344e31 , - 2.6771355605594045e31 , 2.2823085118856488e31 , - 1.5730388076301427e31 , 8.627355824571355e30 , - 3.676221426681414e30 , 1.1728484268744769e30 , - 2.6355294419807464e29 , 3.7195112743738626e28 , - 2.479674182915908e27 ))
199
-
200
- Poly = (u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15, u16, u17, u18, u19, u20)
218
+ Poly = (u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15, u16, u17, u18, u19, u20, u21)
201
219
return split_evalpoly (- p/ v, Poly)
202
220
end
221
+
222
+ # performs a second order horner scheme for polynomial evaluation
223
+ # computes the even and odd coefficients of the polynomial independently within a loop to reduce latency
224
+ # splits the polynomial to compute both 1 + ax + bx^2 + cx^3 and 1 - ax + bx^2 - cx^3 ....
225
+ @inline function split_evalpoly (x, P)
226
+ # polynomial P must have an even number of terms
227
+ N = length (P)
228
+ xx = x* x
229
+
230
+ out = P[end ]
231
+ out2 = P[end - 1 ]
232
+
233
+ for i in N- 2 : - 2 : 2
234
+ out = muladd (xx, out, P[i])
235
+ out2 = muladd (xx, out2, P[i- 1 ])
236
+ end
237
+ if iszero (rem (N, 2 ))
238
+ out *= x
239
+ return out2 - out, out2 + out
240
+ else
241
+ out = muladd (xx, out, P[1 ])
242
+ out2 *= x
243
+ return out - out2, out2 + out
244
+ end
245
+ end
246
+
203
247
#=
204
248
u0 = one(x)
205
249
u1 = p / 24 * (3 - 5*p^2) * -1 / v
0 commit comments