Skip to content

Commit ab48a96

Browse files
authored
Merge pull request #87 from heltonmc/cairy_power
Unroll power series in complex airy functions
2 parents 4afbc6c + 2051f0c commit ab48a96

File tree

2 files changed

+71
-124
lines changed

2 files changed

+71
-124
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ SIMDMath = "5443be0b-e40a-4f70-a07e-dcd652efc383"
77

88
[compat]
99
julia = "1.8"
10-
SIMDMath = "0.2.1"
10+
SIMDMath = "0.2.3"
1111

1212
[extras]
1313
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

src/Airy/cairy.jl

Lines changed: 70 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,6 @@ See also: [`airyaiprime`](@ref), [`airybi`](@ref)
4848
"""
4949
airyai(z::Number) = _airyai(float(z))
5050

51-
_airyai(z::ComplexF16) = ComplexF16(_airyai(ComplexF32(z)))
52-
5351
function _airyai(z::Complex{T}) where T <: Union{Float32, Float64}
5452
if ~isfinite(z)
5553
if abs(angle(z)) < 2*T(π)/3
@@ -60,7 +58,7 @@ function _airyai(z::Complex{T}) where T <: Union{Float32, Float64}
6058
end
6159
x, y = real(z), imag(z)
6260
airy_large_argument_cutoff(z) && return airyai_large_args(z)[1]
63-
airyai_power_series_cutoff(x, y) && return airyai_power_series(z)
61+
airyai_power_series_cutoff(x, y) && return airyai_power_series(z)[1]
6462

6563
if x > zero(T)
6664
# use relation to besselk (http://dlmf.nist.gov/9.6.E1)
@@ -105,9 +103,7 @@ See also: [`airyai`](@ref), [`airybi`](@ref)
105103
"""
106104
airyaiprime(z::Number) = _airyaiprime(float(z))
107105

108-
_airyaiprime(z::ComplexF16) = ComplexF16(_airyaiprime(ComplexF32(z)))
109-
110-
function _airyaiprime(z::ComplexOrReal{T}) where T <: Union{Float32, Float64}
106+
function _airyaiprime(z::Complex{T}) where T <: Union{Float32, Float64}
111107
if ~isfinite(z)
112108
if abs(angle(z)) < 2*T(π)/3
113109
return -exp(-z)
@@ -117,7 +113,7 @@ function _airyaiprime(z::ComplexOrReal{T}) where T <: Union{Float32, Float64}
117113
end
118114
x, y = real(z), imag(z)
119115
airy_large_argument_cutoff(z) && return airyai_large_args(z)[2]
120-
airyai_power_series_cutoff(x, y) && return airyaiprime_power_series(z)
116+
airyai_power_series_cutoff(x, y) && return airyai_power_series(z)[2]
121117

122118
if x > zero(T)
123119
# use relation to besselk (http://dlmf.nist.gov/9.6.E2)
@@ -162,9 +158,7 @@ See also: [`airybiprime`](@ref), [`airyai`](@ref)
162158
"""
163159
airybi(z::Number) = _airybi(float(z))
164160

165-
_airybi(z::ComplexF16) = ComplexF16(_airybi(ComplexF32(z)))
166-
167-
function _airybi(z::ComplexOrReal{T}) where T <: Union{Float32, Float64}
161+
function _airybi(z::Complex{T}) where T <: Union{Float32, Float64}
168162
if ~isfinite(z)
169163
if abs(angle(z)) < 2π/3
170164
return exp(z)
@@ -174,7 +168,7 @@ function _airybi(z::ComplexOrReal{T}) where T <: Union{Float32, Float64}
174168
end
175169
x, y = real(z), imag(z)
176170
airy_large_argument_cutoff(z) && return airybi_large_args(z)[1]
177-
airybi_power_series_cutoff(x, y) && return airybi_power_series(z)
171+
airybi_power_series_cutoff(x, y) && return airybi_power_series(z)[1]
178172

179173
if x > zero(T)
180174
zz = 2 * z * sqrt(z) / 3
@@ -221,9 +215,7 @@ See also: [`airybi`](@ref), [`airyai`](@ref)
221215
"""
222216
airybiprime(z::Number) = _airybiprime(float(z))
223217

224-
_airybiprime(z::ComplexF16) = ComplexF16(_airybiprime(ComplexF32(z)))
225-
226-
function _airybiprime(z::ComplexOrReal{T}) where T <: Union{Float32, Float64}
218+
function _airybiprime(z::Complex{T}) where T <: Union{Float32, Float64}
227219
if ~isfinite(z)
228220
if abs(angle(z)) < 2*T(π)/3
229221
return exp(z)
@@ -233,7 +225,7 @@ function _airybiprime(z::ComplexOrReal{T}) where T <: Union{Float32, Float64}
233225
end
234226
x, y = real(z), imag(z)
235227
airy_large_argument_cutoff(z) && return airybi_large_args(z)[2]
236-
airybi_power_series_cutoff(x, y) && return airybiprime_power_series(z)
228+
airybi_power_series_cutoff(x, y) && return airybi_power_series(z)[2]
237229

238230
if x > zero(T)
239231
zz = 2 * z * sqrt(z) / 3
@@ -259,66 +251,31 @@ function _airybiprime(z::ComplexOrReal{T}) where T <: Union{Float32, Float64}
259251
end
260252

261253
#####
262-
##### Power series for airyai(x)
254+
##### Power series for airyai(z) and airybi(z)
263255
#####
264256

265-
# cutoffs for power series valid for both airyai and airyaiprime
266-
airyai_power_series_cutoff(x::T, y::T) where T <: Float64 = x < 2 && abs(y) > -1.4*(x + 5.5)
267-
airyai_power_series_cutoff(x::T, y::T) where T <: Float32 = x < 4.5f0 && abs(y) > -1.4f0*(x + 9.5f0)
268-
269-
function airyai_power_series(x::ComplexOrReal{T}; tol=eps(T)) where T
270-
S = eltype(x)
271-
iszero(x) && return S(0.3550280538878172)
272-
MaxIter = 3000
273-
ai1 = zero(S)
274-
ai2 = zero(S)
275-
x2 = x*x
276-
x3 = x2*x
277-
t = one(S) / GAMMA_TWO_THIRDS(T)
278-
t2 = 3*x / GAMMA_ONE_THIRD(T)
279-
280-
for i in 0:MaxIter
281-
ai1 += t
282-
ai2 += t2
283-
abs(t) < tol * abs(ai1) && break
284-
t *= x3 * inv(9*(i + one(T))*(i + T(2)/3))
285-
t2 *= x3 * inv(9*(i + one(T))*(i + T(4)/3))
286-
end
287-
return (ai1*3^(-T(2)/3) - ai2*3^(-T(4)/3))
257+
# power series returns value and derivative
258+
function airyai_power_series(z::Complex{T}) where T
259+
z2 = z * z
260+
z3 = z2 * z
261+
p = SIMDMath.horner_simd(z3, pack_AIRYAI_POW_COEF)
262+
ai = muladd(-p[2], z, p[1])
263+
aip = muladd(p[4], z2, -p[3])
264+
return ai, aip
288265
end
289-
airyai_power_series(x::Float32) = Float32(airyai_power_series(Float64(x), tol=eps(Float32)))
290-
airyai_power_series(x::ComplexF32) = ComplexF32(airyai_power_series(ComplexF64(x), tol=eps(Float32)))
291266

292-
#####
293-
##### Power series for airyaiprime(x)
294-
#####
295-
296-
function airyaiprime_power_series(x::ComplexOrReal{T}; tol=eps(T)) where T
297-
S = eltype(x)
298-
iszero(x) && return S(-0.2588194037928068)
299-
MaxIter = 3000
300-
ai1 = zero(S)
301-
ai2 = zero(S)
302-
x2 = x*x
303-
x3 = x2*x
304-
t = one(S) / GAMMA_ONE_THIRD(T)
305-
t2 = 3*x2 / (2*GAMMA_TWO_THIRDS(T))
306-
307-
for i in 0:MaxIter
308-
ai1 += t
309-
ai2 += t2
310-
abs(t) < tol * abs(ai1) && break
311-
t *= x3 * inv(9*(i + one(T))*(i + T(1)/3))
312-
t2 *= x3 * inv(9*(i + one(T))*(i + T(5)/3))
313-
end
314-
return -ai1*3^(-T(1)/3) + ai2*3^(-T(5)/3)
267+
function airybi_power_series(z::Complex{T}) where T
268+
z2 = z * z
269+
z3 = z2 * z
270+
p = SIMDMath.horner_simd(z3, pack_AIRYBI_POW_COEF)
271+
bi = muladd(p[2], z, p[1])
272+
bip = muladd(p[4], z2, p[3])
273+
return bi, bip
315274
end
316-
airyaiprime_power_series(x::Float32) = Float32(airyaiprime_power_series(Float64(x), tol=eps(Float32)))
317-
airyaiprime_power_series(x::ComplexF32) = ComplexF32(airyaiprime_power_series(ComplexF64(x), tol=eps(Float32)))
318275

319-
#####
320-
##### Power series for airybi(x)
321-
#####
276+
# cutoffs for power series valid for both airyai and airyaiprime
277+
airyai_power_series_cutoff(x::T, y::T) where T <: Float64 = x < 2 && abs(y) > -1.4*(x + 5.5)
278+
airyai_power_series_cutoff(x::T, y::T) where T <: Float32 = x < 4.5f0 && abs(y) > -1.4f0*(x + 9.5f0)
322279

323280
# cutoffs for power series valid for both airybi and airybiprime
324281
# has a more complicated validity as it works well close to positive real line and for small negative arguments also works for angle(z) ~ 2pi/3
@@ -327,56 +284,6 @@ airyaiprime_power_series(x::ComplexF32) = ComplexF32(airyaiprime_power_series(Co
327284
airybi_power_series_cutoff(x::T, y::T) where T <: Float64 = (abs(y) > -1.4*(x + 5.5) && abs(y) < -2.2*(x - 4)) || (x > zero(T) && abs(y) < 3)
328285
airybi_power_series_cutoff(x::T, y::T) where T <: Float32 = abs(complex(x, y)) < 9
329286

330-
function airybi_power_series(x::ComplexOrReal{T}; tol=eps(T)) where T
331-
S = eltype(x)
332-
iszero(x) && return S(0.6149266274460007)
333-
MaxIter = 3000
334-
ai1 = zero(S)
335-
ai2 = zero(S)
336-
x2 = x*x
337-
x3 = x2*x
338-
t = one(S) / GAMMA_TWO_THIRDS(T)
339-
t2 = 3*x / GAMMA_ONE_THIRD(T)
340-
341-
for i in 0:MaxIter
342-
ai1 += t
343-
ai2 += t2
344-
abs(t) < tol * abs(ai1) && break
345-
t *= x3 * inv(9*(i + one(T))*(i + T(2)/3))
346-
t2 *= x3 * inv(9*(i + one(T))*(i + T(4)/3))
347-
end
348-
return (ai1*3^(-T(1)/6) + ai2*3^(-T(5)/6))
349-
end
350-
airybi_power_series(x::Float32) = Float32(airybi_power_series(Float64(x), tol=eps(Float32)))
351-
airybi_power_series(x::ComplexF32) = ComplexF32(airybi_power_series(ComplexF64(x), tol=eps(Float32)))
352-
353-
#####
354-
##### Power series for airybiprime(x)
355-
#####
356-
357-
function airybiprime_power_series(x::ComplexOrReal{T}; tol=eps(T)) where T
358-
S = eltype(x)
359-
iszero(x) && return S(0.4482883573538264)
360-
MaxIter = 3000
361-
ai1 = zero(S)
362-
ai2 = zero(S)
363-
x2 = x*x
364-
x3 = x2*x
365-
t = one(S) / GAMMA_ONE_THIRD(T)
366-
t2 = 3*x2 / (2*GAMMA_TWO_THIRDS(T))
367-
368-
for i in 0:MaxIter
369-
ai1 += t
370-
ai2 += t2
371-
abs(t) < tol * abs(ai1) && break
372-
t *= x3 * inv(9*(i + one(T))*(i + T(1)/3))
373-
t2 *= x3 * inv(9*(i + one(T))*(i + T(5)/3))
374-
end
375-
return (ai1*3^(T(1)/6) + ai2*3^(-T(7)/6))
376-
end
377-
airybiprime_power_series(x::Float32) = Float32(airybiprime_power_series(Float64(x), tol=eps(Float32)))
378-
airybiprime_power_series(x::ComplexF32) = ComplexF32(airybiprime_power_series(ComplexF64(x), tol=eps(Float32)))
379-
380287
# calculates besselk from the power series of besseli using the continued fraction and wronskian
381288
# this shift the order higher first to avoid cancellation in the power series of besseli along the imaginary axis
382289
# for real arguments this is not needed because besseli can be computed stably over the entire real axis
@@ -461,13 +368,12 @@ end
461368
a = SIMDMath.fadd(pvec1, zvec)
462369
b = SIMDMath.fsub(pvec1, zvec)
463370

464-
A = complex(b.re[1].value, b.im[1].value)
465-
B = complex(a.re[1].value, a.im[1].value)
466-
C = complex(b.re[2].value, b.im[2].value)
467-
D = complex(a.re[2].value, a.im[2].value)
371+
A, B, C, D = b[1], a[1], b[2], a[2]
468372
return A, B, C, D
469373
end
470374

375+
# Asymptotic Expansion coefficients
376+
471377
# to generate asymptotic expansions then split into even and odd coefficients
472378
# tuple(Float64.([3^k * gamma(k + 1//6) * gamma(k + 5//6) / (2^(2k+2) * gamma(k+1)) for k in big"0":big"24"])...)
473379
# tuple(Float64.([(3)^k * gamma(k - 1//6) * gamma(k + 7//6) / (2^(2k+2) * gamma(k+1)) for k in big"0":big"24"])...)
@@ -480,3 +386,44 @@ const AIRY_ASYM_COEF = (
480386
)
481387

482388
const pack_AIRY_ASYM_COEF = SIMDMath.pack_poly(AIRY_ASYM_COEF)
389+
390+
# Power series coefficients
391+
392+
# airyai
393+
# tuple(Float64.([3^(-2k - 2//3) / (gamma(k + 2//3) * gamma(k+1)) for k in big"0":big"31"])...)
394+
# tuple(Float64.([3^(-2k - 4//3) / (gamma(k + 4//3) * gamma(k+1)) for k in big"0":big"31"])...)
395+
# airyaiprime
396+
# tuple(Float64.([3^(-2k - 1//3) / (gamma(k + 1//3) * gamma(k+1)) for k in big"0":big"31"])...)
397+
# tuple(Float64.([3^(-2k - 5//3) / (gamma(k + 5//3) * gamma(k+1)) for k in big"0":big"31"])...)
398+
# airybi
399+
# tuple(Float64.([3^(-2k - 1//6) / (gamma(k + 2//3) * gamma(k+1)) for k in big"0":big"31"])...)
400+
# tuple(Float64.([3^(-2k - 5//6) / (gamma(k + 4//3) * gamma(k+1)) for k in big"0":big"31"])...)
401+
# airybiprime
402+
# tuple(Float64.([3^(-2k + 1//6) / (gamma(k + 1//3) * gamma(k+1)) for k in big"0":big"31"])...)
403+
# tuple(Float64.([3^(-2k - 7//6) / (gamma(k + 5//3) * gamma(k+1)) for k in big"0":big"31"])...)
404+
405+
const AIRYAI_POW_COEF = (
406+
(0.3550280538878172, 0.05917134231463621, 0.00197237807715454, 2.7394139960479725e-5, 2.0753136333696763e-7, 9.882445873188935e-10, 3.2295574748983444e-12, 7.689422559281772e-15, 1.3930113332032197e-17, 1.984346628494615e-20, 2.280858193671971e-23, 2.159903592492397e-26, 1.7142092003907912e-29, 1.156686370034272e-32, 6.717110162800651e-36, 3.392479880202349e-39, 1.5037588121464314e-42, 5.897093380966397e-46, 2.060479867563381e-49, 6.455137429709841e-53, 1.8234851496355484e-56, 4.6684207619957715e-60, 1.0882099678311822e-63, 2.3192880814816327e-67, 4.536948516200377e-71, 8.174682011171851e-75, 1.3610859159460292e-78, 2.1004412283117732e-82, 3.0126810503611208e-86, 4.026571839563112e-90, 5.026931135534472e-94, 5.875328582906115e-98),
407+
(0.2588194037928068, 0.021568283649400565, 0.0005135305630809659, 5.705895145344065e-6, 3.657625093169273e-8, 1.5240104554871968e-10, 4.456170922477184e-13, 9.645391607093471e-16, 1.6075652678489119e-18, 2.1264090844562326e-21, 2.286461381135734e-24, 2.0378443682136668e-27, 1.5299131893495997e-30, 9.8071358291641e-34, 5.430307768086435e-37, 2.623337086032094e-40, 1.1153644073265705e-43, 4.2057481422570536e-47, 1.4160768155747653e-50, 4.2833539491069734e-54, 1.1703152866412495e-57, 2.902567675201512e-61, 6.56392509091251e-65, 1.3589907020522795e-68, 2.585598748196879e-72, 4.536138154731366e-76, 7.361470552955803e-80, 1.108321372019844e-83, 1.5522708291594454e-87, 2.0275219816607177e-91, 2.4756068152145513e-95, 2.8318540553815505e-99),
408+
(0.2588194037928068, 0.08627313459760226, 0.003594713941566761, 5.705895145344065e-5, 4.7549126211200545e-7, 2.438416728779515e-9, 8.46672475270665e-12, 2.1219861535605637e-14, 4.0189131696222797e-17, 5.953945436477451e-20, 7.088030281520775e-23, 6.928670851926468e-26, 5.660678800593519e-29, 3.9228543316656404e-32, 2.335032340277167e-35, 1.206735059574763e-38, 5.4652855959001957e-42, 2.1869890339736677e-45, 7.78842248566121e-49, 2.4843452904820447e-52, 7.138923248511622e-56, 1.8576433121289676e-59, 4.397829810911382e-63, 9.512934914365956e-67, 1.8874870861837215e-70, 3.4474649975958385e-74, 5.815561736835084e-78, 9.08823525056272e-82, 1.3194302047855285e-85, 1.7842193438614314e-89, 2.2528022018452416e-93, 2.6619428120586576e-97),
409+
(0.1775140269439086, 0.011834268462927242, 0.0002465472596443175, 2.4903763600436113e-6, 1.48236688097834e-8, 5.81320345481702e-11, 1.6147787374491722e-13, 3.3432271996877272e-16, 5.35773589693546e-19, 6.842574581015913e-22, 7.12768185522491e-25, 6.171153121406848e-28, 4.511076843133661e-31, 2.8211862683762735e-34, 1.526615946091057e-37, 7.21804229830287e-41, 3.0075176242928623e-44, 1.1126591284842257e-47, 3.6794283349346094e-51, 1.094091089781329e-54, 2.941105080057336e-58, 7.182185787685802e-62, 1.6003087762223267e-65, 3.2666029316642714e-69, 6.131011508378888e-73, 1.0616470144379027e-76, 1.7013573949325364e-80, 2.5306520823033415e-84, 3.5031175004199077e-88, 4.5242380219810255e-92, 5.4640555821026875e-96, 6.184556403059069e-100)
410+
)
411+
const AIRYBI_POW_COEF = (
412+
(0.6149266274460007, 0.10248777124100013, 0.0034162590413666706, 4.7448042241203764e-5, 3.5945486546366485e-7, 1.7116898355412612e-9, 5.5937576324877815e-12, 1.3318470553542337e-14, 2.412766404627235e-17, 3.4369891803806764e-20, 3.9505622762996283e-23, 3.7410627616473754e-26, 2.9690974298788695e-29, 2.0034395613217741e-32, 1.163437608200798e-35, 5.875947516165647e-39, 2.604586664967042e-42, 1.0214065352811929e-45, 3.568855818592568e-49, 1.1180625998097016e-52, 3.1583689260161066e-56, 8.08594195088609e-60, 1.884834953586501e-63, 4.017124794515134e-67, 7.858225341383282e-71, 1.415896457906898e-74, 2.3574699598849448e-78, 3.6380709257483713e-82, 5.2181166462254325e-86, 6.9742270064493885e-90, 8.706900132895616e-94, 1.0176367616755045e-97),
413+
(0.4482883573538264, 0.03735736311281886, 0.0008894610264956872, 9.882900294396524e-6, 6.335192496408029e-8, 2.6396635401700117e-10, 7.718314444941556e-13, 1.6706308322384318e-15, 2.7843847203973864e-18, 3.683048571954215e-21, 3.960267281671199e-24, 3.52964998366417e-27, 2.6498873751232507e-30, 1.698645753284135e-33, 9.405568955061657e-37, 4.5437531183872734e-40, 1.9318678224435686e-43, 7.284569466227635e-47, 2.4527169919958367e-50, 7.418986666654073e-54, 2.0270455373371784e-57, 5.0273946858560976e-61, 1.136905175453663e-64, 2.353840942968246e-68, 4.478388399863482e-72, 7.856821754146459e-76, 1.275044101614161e-79, 1.919668927452817e-83, 2.688611943211228e-87, 3.511771085699096e-91, 4.28787678351538e-95, 4.904915103540814e-99),
414+
(0.4482883573538264, 0.14942945245127545, 0.0062262271854698105, 9.882900294396525e-5, 8.235750245330437e-7, 4.223461664272019e-9, 1.4664797445388956e-11, 3.67538783092455e-14, 6.960961800993466e-17, 1.0312536001471802e-19, 1.2276828573180715e-22, 1.2000809944458178e-25, 9.804583287956028e-29, 6.79458301313654e-32, 4.0443946506765124e-35, 2.0901264344581458e-38, 9.466152329973487e-42, 3.78797612243837e-45, 1.3489943455977101e-48, 4.3030122666593625e-52, 1.236497777775679e-55, 3.2175325989479025e-59, 7.617264675539542e-63, 1.6476886600777724e-66, 3.269223531900342e-70, 5.97118453315131e-74, 1.007284840275187e-77, 1.5741285205113097e-81, 2.2853201517295438e-85, 3.0903585554152047e-89, 3.901967872998996e-93, 4.6106201973283655e-97),
415+
(0.30746331372300034, 0.020497554248200024, 0.0004270323801708338, 4.313458385563978e-6, 2.567534753311892e-8, 1.0068763738478007e-10, 2.796878816243891e-13, 5.790639371105364e-16, 9.279870787027826e-19, 1.1851686828898885e-21, 1.2345507113436338e-24, 1.068875074756393e-27, 7.813414289154919e-31, 4.8864379544433515e-34, 2.6441763822745408e-37, 1.2502015991841802e-40, 5.209173329934083e-44, 1.9271821420399865e-47, 6.372956818915299e-51, 1.895021355609664e-54, 5.0941434290582364e-58, 1.2439910693670906e-61, 2.771816108215443e-65, 5.657922245795964e-69, 1.0619223434301734e-72, 1.838826568710257e-76, 2.946837449856181e-80, 4.383217982829363e-84, 6.067577495610968e-88, 7.836210119606054e-92, 9.464021883582192e-96, 1.071196591237373e-99)
416+
)
417+
418+
const pack_AIRYAI_POW_COEF = SIMDMath.pack_poly(AIRYAI_POW_COEF)
419+
const pack_AIRYBI_POW_COEF = SIMDMath.pack_poly(AIRYBI_POW_COEF)
420+
421+
# promote Float16 values
422+
for internalf in (:airyai, :airyaiprime, :airybi, :airybiprime), T in (:ComplexF16,)
423+
@eval $internalf(x::$T) = $T($internalf(ComplexF32(x)))
424+
end
425+
426+
# promote power series and asymptotic expansion to Float64 precision
427+
for internalf in (:airyai_power_series, :airybi_power_series, :airyai_large_args, :airybi_large_args), T in (:ComplexF32,)
428+
@eval $internalf(x::$T) = $T.($internalf(ComplexF64(x)))
429+
end

0 commit comments

Comments
 (0)