Skip to content

Commit 8dea409

Browse files
authored
Merge pull request #6 from heltonmc/besseli
Change besseli0 and besseli1 routines to rational approximation
2 parents 547bfad + a5da213 commit 8dea409

File tree

3 files changed

+184
-114
lines changed

3 files changed

+184
-114
lines changed

src/besseli.jl

Lines changed: 91 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,115 @@
1-
#=
2-
Cephes Math Library Release 2.8: June, 2000
3-
Copyright 1984, 1987, 2000 by Stephen L. Moshier
4-
https://github.com/jeremybarnes/cephes/blob/master/bessel/i0.c
5-
https://github.com/jeremybarnes/cephes/blob/master/bessel/i1.c
6-
=#
1+
# Modified Bessel functions of the first kind of order zero and one
2+
# besseli0, besseli1
3+
#
4+
# Scaled modified Bessel functions of the first kind of order zero and one
5+
# besseli0x, besselix
6+
#
7+
# Calculation of besseli0 is done in two branches using polynomial approximations [1]
8+
#
9+
# Branch 1: x < 7.75
10+
# besseli0 = [x/2]^2 P16([x/2]^2)
11+
# Branch 2: x >=
12+
# sqrt(x) * exp(-x) * besseli0(x) = P22(1/x)
13+
# where P16 and P22 are a 16 and 22 degree polynomial respectively.
14+
#
15+
# Remez.jl is then used to approximate the polynomial coefficients of
16+
# P22(y) = sqrt(1/y) * exp(-inv(y)) * besseli0(inv(y))
17+
# N,D,E,X = ratfn_minimax(g, (1/1e6, 1/7.75), 21, 0)
18+
#
19+
# A third branch is used for scaled functions for large values
20+
#
21+
#
22+
# Calculation of besseli1 is done in two branches using polynomial approximations [2]
23+
#
24+
# Branch 1: x < 7.75
25+
# besseli1 = x / 2 * (1 + 1/2 * (x/2)^2 + (x/2)^4 * P13([x/2]^2)
26+
# Branch 2: x >=
27+
# sqrt(x) * exp(-x) * besseli0(x) = P22(1/x)
28+
# where P13 and P22 are a 16 and 22 degree polynomial respectively.
29+
#
30+
# Remez.jl is then used to approximate the polynomial coefficients of
31+
# P13(y) = (besseli1(2 * sqrt(y)) / sqrt(y) - 1 - y/2) / y^2
32+
# N,D,E,X = ratfn_minimax(g, (1/1e6, 1/7.75), 21, 0)
33+
#
34+
# A third branch is used for scaled functions for large values
35+
#
36+
# Horner's scheme is then used to evaluate all polynomials.
37+
# ArbNumerics.jl is used as the reference bessel implementations with 75 digits.
38+
#
39+
# [1] "Rational Approximations for the Modified Bessel Function of the First Kind
40+
# - I0(x) for Computations with Double Precision" by Pavel Holoborodko
41+
# [2] "Rational Approximations for the Modified Bessel Function of the First Kind
42+
# - I1(x) for Computations with Double Precision" by Pavel Holoborodko
43+
"""
44+
besseli0(x::T) where T <: Union{Float32, Float64}
45+
46+
Modified Bessel function of the first kind of order zero, ``I_0(x)``.
47+
"""
748
function besseli0(x::T) where T <: Union{Float32, Float64}
8-
if x <= 8
9-
y = muladd(x, T(.5), T(-2))
10-
return exp(x) * chbevl(y, A_i0(T))
49+
x = abs(x)
50+
if x < 7.75
51+
a = x * x / 4
52+
return muladd(a, evalpoly(a, besseli0_small_coefs(T)), 1)
1153
else
12-
return exp(x) * chbevl(T(32) / x - T(2), B_i0(T)) / sqrt(x)
54+
a = exp(x / 2)
55+
s = a * evalpoly(inv(x), besseli0_med_coefs(T)) / sqrt(x)
56+
return a * s
1357
end
1458
end
59+
"""
60+
besseli0x(x::T) where T <: Union{Float32, Float64}
61+
62+
Scaled modified Bessel function of the first kind of order zero, ``I_0(x)*e^{-x}``.
63+
"""
1564
function besseli0x(x::T) where T <: Union{Float32, Float64}
65+
T == Float32 ? branch = 50 : branch = 500
1666
x = abs(x)
17-
if x <= 8
18-
y = muladd(x, T(.5), T(-2))
19-
return chbevl(y, A_i0(T))
67+
if x < 7.75
68+
a = x * x / 4
69+
return muladd(a, evalpoly(a, besseli0_small_coefs(T)), 1) * exp(-x)
70+
elseif x < branch
71+
return evalpoly(inv(x), besseli0_med_coefs(T)) / sqrt(x)
2072
else
21-
return chbevl(T(32) / x - T(2), B_i0(T)) / sqrt(x)
73+
return evalpoly(inv(x), besseli0_large_coefs(T)) / sqrt(x)
2274
end
2375
end
76+
"""
77+
besseli1(x::T) where T <: Union{Float32, Float64}
78+
79+
Modified Bessel function of the first kind of order one, ``I_1(x)``.
80+
"""
2481
function besseli1(x::T) where T <: Union{Float32, Float64}
2582
z = abs(x)
26-
if x <= 8
27-
y = muladd(z, T(.5), T(-2))
28-
z = chbevl(y, A_i1(T)) * z * exp(z)
83+
if z < 7.75
84+
a = z * z / 4
85+
inner = (one(T), T(0.5), evalpoly(a, besseli1_small_coefs(T)))
86+
z = z * evalpoly(a, inner) / 2
2987
else
30-
z = exp(z) * chbevl(T(32) / z - T(2), B_i1(T)) / sqrt(z)
88+
a = exp(z / 2)
89+
s = a * evalpoly(inv(z), besseli1_med_coefs(T)) / sqrt(z)
90+
z = a * s
3191
end
3292
if x < zero(x)
3393
z = -z
3494
end
3595
return z
3696
end
97+
"""
98+
besseli1x(x::T) where T <: Union{Float32, Float64}
99+
100+
Scaled modified Bessel function of the first kind of order one, ``I_1(x)*e^{-x}``.
101+
"""
37102
function besseli1x(x::T) where T <: Union{Float32, Float64}
103+
T == Float32 ? branch = 50 : branch = 500
38104
z = abs(x)
39-
if z <= 8
40-
y = muladd(z, T(.5), T(-2))
41-
z = chbevl(y, A_i1(T)) * z
105+
if z < 7.75
106+
a = z * z / 4
107+
inner = (one(T), T(0.5), evalpoly(a, besseli1_small_coefs(T)))
108+
z = z * evalpoly(a, inner) / 2 * exp(-z)
109+
elseif z < branch
110+
z = evalpoly(inv(z), besseli1_med_coefs(T)) / sqrt(z)
42111
else
43-
z = chbevl(T(32) / z - T(2), B_i1(T)) / sqrt(z)
112+
z = evalpoly(inv(z), besseli1_large_coefs(T)) / sqrt(z)
44113
end
45114
if x < zero(x)
46115
z = -z

src/constants.jl

Lines changed: 65 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -104,51 +104,71 @@ const QQ_j0(::Type{Float64}) = (
104104
7.24046774195652478189E3, 3.88240183605401609683E3, 8.56430025976980587198E2,
105105
6.43178256118178023184E1, 1.00000000000000000000E0
106106
)
107-
const A_i0(::Type{Float64}) = (
108-
-4.41534164647933937950E-18, 3.33079451882223809783E-17, -2.43127984654795469359E-16,
109-
1.71539128555513303061E-15, -1.16853328779934516808E-14, 7.67618549860493561688E-14,
110-
-4.85644678311192946090E-13, 2.95505266312963983461E-12, -1.72682629144155570723E-11,
111-
9.67580903537323691224E-11, -5.18979560163526290666E-10, 2.65982372468238665035E-9,
112-
-1.30002500998624804212E-8, 6.04699502254191894932E-8, -2.67079385394061173391E-7,
113-
1.11738753912010371815E-6, -4.41673835845875056359E-6, 1.64484480707288970893E-5,
114-
-5.75419501008210370398E-5, 1.88502885095841655729E-4, -5.76375574538582365885E-4,
115-
1.63947561694133579842E-3, -4.32430999505057594430E-3, 1.05464603945949983183E-2,
116-
-2.37374148058994688156E-2, 4.93052842396707084878E-2, -9.49010970480476444210E-2,
117-
1.71620901522208775349E-1, -3.04682672343198398683E-1, 6.76795274409476084995E-1
118-
)
119-
const B_i0(::Type{Float64}) = (
120-
-7.23318048787475395456E-18, -4.83050448594418207126E-18, 4.46562142029675999901E-17,
121-
3.46122286769746109310E-17, -2.82762398051658348494E-16, -3.42548561967721913462E-16,
122-
1.77256013305652638360E-15, 3.81168066935262242075E-15, -9.55484669882830764870E-15,
123-
-4.15056934728722208663E-14, 1.54008621752140982691E-14, 3.85277838274214270114E-13,
124-
7.18012445138366623367E-13, -1.79417853150680611778E-12, -1.32158118404477131188E-11,
125-
-3.14991652796324136454E-11, 1.18891471078464383424E-11, 4.94060238822496958910E-10,
126-
3.39623202570838634515E-9, 2.26666899049817806459E-8, 2.04891858946906374183E-7,
127-
2.89137052083475648297E-6, 6.88975834691682398426E-5, 3.36911647825569408990E-3,
128-
8.04490411014108831608E-1
129-
)
130-
const A_i1(::Type{Float64}) = (
131-
2.77791411276104639959E-18, -2.11142121435816608115E-17, 1.55363195773620046921E-16,
132-
-1.10559694773538630805E-15, 7.60068429473540693410E-15, -5.04218550472791168711E-14,
133-
3.22379336594557470981E-13, -1.98397439776494371520E-12, 1.17361862988909016308E-11,
134-
-6.66348972350202774223E-11, 3.62559028155211703701E-10, -1.88724975172282928790E-9,
135-
9.38153738649577178388E-9, -4.44505912879632808065E-8, 2.00329475355213526229E-7,
136-
-8.56872026469545474066E-7, 3.47025130813767847674E-6, -1.32731636560394358279E-5,
137-
4.78156510755005422638E-5, -1.61760815825896745588E-4, 5.12285956168575772895E-4,
138-
-1.51357245063125314899E-3, 4.15642294431288815669E-3, -1.05640848946261981558E-2,
139-
2.47264490306265168283E-2, -5.29459812080949914269E-2, 1.02643658689847095384E-1,
140-
-1.76416518357834055153E-1, 2.52587186443633654823E-1
141-
)
142-
const B_i1(::Type{Float64}) = (
143-
7.51729631084210481353E-18, 4.41434832307170791151E-18, -4.65030536848935832153E-17,
144-
-3.20952592199342395980E-17, 2.96262899764595013876E-16, 3.30820231092092828324E-16,
145-
-1.88035477551078244854E-15, -3.81440307243700780478E-15, 1.04202769841288027642E-14,
146-
4.27244001671195135429E-14, -2.10154184277266431302E-14, -4.08355111109219731823E-13,
147-
-7.19855177624590851209E-13, 2.03562854414708950722E-12, 1.41258074366137813316E-11,
148-
3.25260358301548823856E-11, -1.89749581235054123450E-11, -5.58974346219658380687E-10,
149-
-3.83538038596423702205E-9, -2.63146884688951950684E-8, -2.51223623787020892529E-7,
150-
-3.88256480887769039346E-6, -1.10588938762623716291E-4, -9.76109749136146840777E-3,
151-
7.78576235018280120474E-1
107+
const besseli0_small_coefs(::Type{Float32}) = (
108+
1.0000000416172086f0, 0.2499995567758189f0, 0.027778554423089f0,
109+
0.0017355879591086688f0, 6.96204574858338f-5, 1.8959194799355527f-6,
110+
4.298760429567388f-8, 3.8884953805853935f-10, 1.483799986601676f-11
111+
)
112+
const besseli0_small_coefs(::Type{Float64}) = (
113+
0.9999999999999998, 0.2500000000000052, 0.027777777777755364,
114+
0.001736111111149161, 6.94444444107536e-5, 1.9290123635366806e-6,
115+
3.9367592765038015e-8, 6.151201574092085e-10, 7.593827956729909e-12,
116+
7.596677643342155e-14, 6.255282299620455e-16, 4.470993793303175e-18,
117+
2.1859737023077178e-20, 2.0941557335286373e-22
118+
)
119+
const besseli0_med_coefs(::Type{Float32}) = (
120+
0.3989423094522082f0, 0.049857687814784155f0, 0.028606362029333827f0,
121+
0.018926572151825607f0, 0.11423960556773063f0
122+
)
123+
const besseli0_med_coefs(::Type{Float64}) = (
124+
0.3989422804014326, 0.04986778505064754, 0.028050628512954097,
125+
0.02921968830978531, 0.04466889626137549, 0.10220642174207666,
126+
-0.9937439085650689, 91.25330271974727, -4901.408890977662,
127+
199209.2752981982, -6.181516298413396e6, 1.4830278710991925e8,
128+
-2.7695254643719645e9, 4.0351394830842026e10, -4.5768930327229974e11,
129+
4.0134844243070063e12, -2.6862476523182016e13, 1.3437999451218112e14,
130+
-4.856333741437621e14, 1.1962791200680235e15, -1.796269414464399e15,
131+
1.239942074380968e15
132+
)
133+
const besseli0_large_coefs(::Type{Float32}) = (
134+
0.398942288181429f0, 0.04986085645388595f0, 0.028961207830173957f0
135+
)
136+
const besseli0_large_coefs(::Type{Float64}) = (
137+
0.3989422804014327, 0.04986778505003549, 0.028050629664438713,
138+
0.029218603406797997, 0.045199090067282434
139+
)
140+
const besseli1_small_coefs(::Type{Float32}) = (
141+
0.08333336088599108f0, 0.0069442679189687774f0, 0.00034740666242346436f0,
142+
1.1502079282185252f-5, 2.8884784673820464f-7, 3.686308746029452f-9,
143+
1.2271327751884605f-10
144+
)
145+
const besseli1_med_coefs(::Type{Float32}) = (
146+
3.98942115977513013f-1, -1.49581264836620262f-1, -4.76475741878486795f-2,
147+
-2.65157315524784407f-2, -1.47148600683672014f-1
148+
)
149+
const besseli1_small_coefs(::Type{Float64}) = (
150+
0.08333333333333334, 0.006944444444444374, 0.00034722222222248526,
151+
1.1574074073690356e-5, 2.7557319253050506e-7, 4.920949730519126e-9,
152+
6.834656365321179e-11, 7.593985414952446e-13, 6.904652315442046e-15,
153+
5.2213850252454655e-17, 3.405120412140281e-19, 1.6398527256182257e-21,
154+
1.3161876924566675e-23
155+
)
156+
const besseli1_med_coefs(::Type{Float64}) = (
157+
0.39894228040143276, -0.149603355151029, -0.04675104787903509,
158+
-0.04090746353279043, -0.05744911840910781,-0.12283724006390319,
159+
1.0023632527650936, -94.90954045770921, 5084.06899084327,
160+
-206253.5613716743, 6.387439538535799e6, -1.529244018923123e8,
161+
2.849523551208316e9, -4.141884344471782e10, 4.6860149658304974e11,
162+
-4.097852944580042e12, 2.7345051110005453e13, -1.3634833112030364e14,
163+
4.909983186948892e14, -1.2048200837913132e15, 1.8014682382937435e15,
164+
-1.2377987428989558e15
165+
)
166+
const besseli1_large_coefs(::Type{Float64}) = (
167+
0.39894228040143265, -0.14960335515036183, -0.0467510491854453,
168+
-0.04090618769936314, -0.05808395099511361
169+
)
170+
const besseli1_large_coefs(::Type{Float32}) = (
171+
0.3989422695655856f0, -0.1495936968314777f0, -0.04802187089304704f0
152172
)
153173
const A_k0(::Type{Float64}) = (
154174
1.37446543561352307156E-16, 4.25981614279661018399E-14, 1.03496952576338420167E-11,
@@ -184,32 +204,6 @@ const B_k1(::Type{Float64}) = (
184204
1.95215518471351631108E-4, -2.85781685962277938680E-3, 1.03923736576817238437E-1,
185205
2.72062619048444266945E0
186206
)
187-
const A_i0(::Type{Float32}) = (
188-
-1.30002500998624804212f-8, 6.04699502254191894932f-8, -2.67079385394061173391f-7,
189-
1.11738753912010371815f-6, -4.41673835845875056359f-6, 1.64484480707288970893f-5,
190-
-5.75419501008210370398f-5, 1.88502885095841655729f-4, -5.76375574538582365885f-4,
191-
1.63947561694133579842f-3, -4.32430999505057594430f-3, 1.05464603945949983183f-2,
192-
-2.37374148058994688156f-2, 4.93052842396707084878f-2, -9.49010970480476444210f-2,
193-
1.71620901522208775349f-1, -3.04682672343198398683f-1, 6.76795274409476084995f-1
194-
)
195-
const B_i0(::Type{Float32}) = (
196-
3.39623202570838634515f-9, 2.26666899049817806459f-8, 2.04891858946906374183f-7,
197-
2.89137052083475648297f-6, 6.88975834691682398426f-5, 3.36911647825569408990f-3,
198-
8.04490411014108831608f-1
199-
)
200-
const A_i1(::Type{Float32}) = (
201-
9.38153738649577178388f-9, -4.44505912879632808065f-8, 2.00329475355213526229f-7,
202-
-8.56872026469545474066f-7, 3.47025130813767847674f-6, -1.32731636560394358279f-5,
203-
4.78156510755005422638f-5, -1.61760815825896745588f-4, 5.12285956168575772895f-4,
204-
-1.51357245063125314899f-3, 4.15642294431288815669f-3, -1.05640848946261981558f-2,
205-
2.47264490306265168283f-2, -5.29459812080949914269f-2, 1.02643658689847095384f-1,
206-
-1.76416518357834055153f-1, 2.52587186443633654823f-1
207-
)
208-
const B_i1(::Type{Float32}) = (
209-
-3.83538038596423702205f-9, -2.63146884688951950684f-8, -2.51223623787020892529f-7,
210-
-3.88256480887769039346f-6, -1.10588938762623716291f-4, -9.76109749136146840777f-3,
211-
7.78576235018280120474f-1
212-
)
213207
const JP_j0(::Type{Float32}) = (
214208
-1.729150680240724f-1, 1.332913422519003f-2, -3.969646342510940f-4,
215209
6.388945720783375f-6, -6.068350350393235f-8

test/besseli_test.jl

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,66 @@
11
# general array for testing input to SpecialFunctions.jl
2-
x = 1e-6:0.01:50.0
2+
x32 = [-1.0; 0.0; 1e-6; 0.01:0.01:60.0]
3+
x64 = [-1.0; 0.0; 1e-6; 0.01:0.01:600.0]
34

45
### Tests for besseli0
5-
i0_SpecialFunctions = SpecialFunctions.besseli.(0, x)
6+
i032_SpecialFunctions = SpecialFunctions.besseli.(0, x32)
7+
i064_SpecialFunctions = SpecialFunctions.besseli.(0, x64)
68

7-
i0_64 = besseli0.(Float64.(x))
8-
i0_32 = besseli0.(Float32.(x))
9+
10+
i0_64 = besseli0.(Float64.(x64))
11+
i0_32 = besseli0.(Float32.(x32))
912

1013
# make sure output types match input types
1114
@test i0_64[1] isa Float64
1215
@test i0_32[1] isa Float32
1316

1417
# test against SpecialFunctions.jl
15-
@test i0_64 i0_SpecialFunctions
16-
@test i0_32 i0_SpecialFunctions
18+
@test i0_64 i064_SpecialFunctions
19+
@test i0_32 i032_SpecialFunctions
1720

1821
### Tests for besseli0x
19-
i0x_SpecialFunctions = SpecialFunctions.besselix.(0, x)
22+
i0x32_SpecialFunctions = SpecialFunctions.besselix.(0, x32)
23+
i0x64_SpecialFunctions = SpecialFunctions.besselix.(0, x64)
2024

21-
i0x_64 = besseli0x.(Float64.(x))
22-
i0x_32 = besseli0x.(Float32.(x))
25+
i0x_64 = besseli0x.(Float64.(x64))
26+
i0x_32 = besseli0x.(Float32.(x32))
2327

2428
# make sure output types match input types
2529
@test i0x_64[1] isa Float64
2630
@test i0x_32[1] isa Float32
2731

2832
# test against SpecialFunctions.jl
29-
@test i0x_64 i0x_SpecialFunctions
30-
@test i0x_32 i0x_SpecialFunctions
33+
@test i0x_64 i0x64_SpecialFunctions
34+
@test i0x_32 i0x32_SpecialFunctions
3135

3236

3337
### Tests for besseli1
34-
i1_SpecialFunctions = SpecialFunctions.besseli.(1, x)
38+
i132_SpecialFunctions = SpecialFunctions.besseli.(1, x32)
39+
i164_SpecialFunctions = SpecialFunctions.besseli.(1, x64)
40+
3541

36-
i1_64 = besseli1.(Float64.(x))
37-
i1_32 = besseli1.(Float32.(x))
42+
i1_64 = besseli1.(Float64.(x64))
43+
i1_32 = besseli1.(Float32.(x32))
3844

3945
# make sure output types match input types
4046
@test i1_64[1] isa Float64
4147
@test i1_32[1] isa Float32
4248

4349
# test against SpecialFunctions.jl
44-
@test i1_64 i1_SpecialFunctions
45-
@test i1_32 i1_SpecialFunctions
50+
@test i1_64 i164_SpecialFunctions
51+
@test i1_32 i132_SpecialFunctions
4652

4753
### Tests for besseli1x
48-
i1x_SpecialFunctions = SpecialFunctions.besselix.(1, x)
54+
i1x32_SpecialFunctions = SpecialFunctions.besselix.(1, x32)
55+
i1x64_SpecialFunctions = SpecialFunctions.besselix.(1, x64)
4956

50-
i1x_64 = besseli1x.(Float64.(x))
51-
i1x_32 = besseli1x.(Float32.(x))
57+
i1x_64 = besseli1x.(Float64.(x64))
58+
i1x_32 = besseli1x.(Float32.(x32))
5259

5360
# make sure output types match input types
5461
@test i1x_64[1] isa Float64
5562
@test i1x_32[1] isa Float32
5663

5764
# test against SpecialFunctions.jl
58-
@test i1x_64 i1x_SpecialFunctions
59-
@test i1x_32 i1x_SpecialFunctions
65+
@test i1x_64 i1x64_SpecialFunctions
66+
@test i1x_32 i1x32_SpecialFunctions

0 commit comments

Comments
 (0)