Skip to content

Commit 3040289

Browse files
authored
implement Sidi methods (#472)
* implement Sidi methods * version bump * avoid unpack of named tuple for 1.6
1 parent 2e1a255 commit 3040289

File tree

8 files changed

+180
-3
lines changed

8 files changed

+180
-3
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "Roots"
22
uuid = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
3-
version = "2.2.7"
3+
version = "2.2.8"
44

55
[deps]
66
Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"

docs/src/index.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ specification of a method. These include:
3535
a bracketing method when a bracket is encountered, The higher order
3636
methods promise higher order (faster) convergence, though don't
3737
always yield results with fewer function calls than `Order1` or
38-
`Order2`. The methods `Roots.Order1B` and `Roots.Order2B` are
38+
`Order2`. `Roots.Sidi` is a family of methods.
39+
The methods `Roots.Order1B` and `Roots.Order2B` are
3940
superlinear and quadratically converging methods independent of the
4041
multiplicity of the zero.
4142

docs/src/reference.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ The [secant](https://en.wikipedia.org/wiki/Secant_method) method replaces the d
105105
``x_{n+1} = x_n - (\frac{f(x_n)-f(x_{n-1})}{x_n - x_{n-1}})^{-1}\cdot f(x_n).``
106106

107107
Though the secant method has convergence rate of order ``\approx 1.618`` -- i.e., is not quadratic -- it
108-
only requires one new function call per step so can be very effective. Often function evaluations are the slowest part of the computation and, as well, no derivative is needed. Because it can be very efficient, the secant method is used in the default method of `find_zero` when called with a single initial starting point.
108+
only requires one new function call per step so can be very effective. Often function evaluations are the slowest part of the computation and, as well, no derivative is needed. Because it can be very efficient, the secant method is used in the default method of `find_zero` when called with a single initial starting point. The `Roots.Sidi` methods generalize the secant method.
109109

110110
[Steffensen's](https://en.wikipedia.org/wiki/Steffensen%27s_method) method is a quadratically converging. derivative-free method which uses a secant line based on ``x_n`` and ``x_n + f(x_n)``. Though of higher order, it requires additional function calls per step and depends on a good initial starting value. Other derivative free methods are available, trading off increased function calls for higher-order convergence. They may be of interest when arbitrary precision is needed. A measure of efficiency is ``q^{1/r}`` where ``q`` is the order of convergence and ``r`` the number of function calls per step. With this measure, the secant method would be ``\approx (1.618)^{1/1}`` and Steffensen's would be less (``2^{1/2}``).
111111

@@ -119,6 +119,7 @@ Order2
119119
Order5
120120
Order8
121121
Order16
122+
Roots.Sidi
122123
```
123124

124125

src/DerivativeFree/sidi.jl

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
"""
2+
Sidi(k)
3+
4+
Implements family of methods in "Generalization Of The Secant Method For Nonlinear Equations" by Avram Sidi in Applied Mathematics E-Notes, 8(2008), 115-123.
5+
6+
These methods generalize the secant method by using an interpolating polynomial based on the last ``k+1`` points to estimate ``f'(xₙ)`` in its use with Newton's method, the secant method being the ``k=1`` case.
7+
8+
9+
## Convergence rates:
10+
11+
* `Sidi(1) = 1.618⋯` (secant method)
12+
* `Sidi(2) = 1.839⋯`
13+
* `Sidi(3) = 1.928⋯`
14+
* `Sidi(4) = 1.966⋯`
15+
16+
Like the secant method, each update step requires one function evaluation.
17+
18+
## Example
19+
20+
```
21+
find_zero(sin, 3, Roots.Sidi(2))
22+
```
23+
24+
"""
25+
struct Sidi{k} <: AbstractSecantMethod end
26+
Sidi(k::Int) = Sidi{k}()
27+
28+
struct SidiState{T,S} <: AbstractUnivariateZeroState{T,S}
29+
xn1::T
30+
xn0::T
31+
fxn1::S
32+
fxn0::S
33+
xs::Vector{T}
34+
fs::Vector{T}
35+
end
36+
37+
function init_state(M::Sidi{k}, F::Callable_Function, x) where {k}
38+
x₀, x₁ = x₀x₁(x)
39+
fx₀, xs, fs = _init_sidi(F, (x₀, x₁), k)
40+
state = SidiState(xs[k], xs[k+1], fx₀, fs[1], xs, fs)
41+
end
42+
43+
function update_state(
44+
L::Sidi{k},
45+
F,
46+
o::SidiState{T,S},
47+
options,
48+
l=NullTracks(),
49+
) where {k, T, S}
50+
51+
xs, fs = o.xs, o.fs
52+
fxn1 = o.fxn1
53+
_update_sidi!(F, xs, fs)
54+
55+
incfn(l)
56+
@reset o.xn0 = xs[k]
57+
@reset o.xn1 = xs[k+1]
58+
@reset o.fxn0 = o.fxn1
59+
@reset o.fxn1 = fs[1]
60+
@reset o.xs = xs
61+
@reset o.fs = fs
62+
63+
return (o, false)
64+
end
65+
66+
# create xs and fs; fs records lower diagonal in newton tableau
67+
# x1 . . . f1234
68+
# x2 . . f234
69+
# x3 . f34
70+
# x4 f4
71+
# xs = [x1,x2,x3,x4]; fs = [f4, f34, f234, f1234]
72+
# build up diagonal by diagonal
73+
# where pk' uses xs, fs to be computed
74+
# no good way to pre-compute fs to speed up, so here
75+
# we expect x to have 2 or more elements
76+
# but we compute each f(x)
77+
# This allocates, as it uses a vector to store xs, fs
78+
function _init_sidi(f, x, k)
79+
x₀ = first(x)
80+
fx₀ = f(x₀)
81+
82+
xs = Vector{typeof( x₀)}(undef, k+1)
83+
fs = Vector{typeof(fx₀)}(undef, k+1)
84+
85+
n = length(x)
86+
xs[1:n] .= x
87+
88+
xs[1] = x[1]
89+
xs[2] = x[2]
90+
91+
# diagonal for x2 above (two entries)
92+
fs[1] = f(xs[2])
93+
fs[2] = (fx₀ - fs[1]) / (xs[1] - xs[2])
94+
95+
# build up diagonal by diagonal
96+
for j 3:(k+1)
97+
if j n # xⱼ was specified
98+
xⱼ = xs[j]
99+
else
100+
xⱼ₋₁ = xs[j-1]
101+
pk′ = evaluate_pk′(view(xs, 1:j-1), view(fs, 1:j-1))
102+
xⱼ = xs[j] = xⱼ₋₁ - fs[1] / pk′
103+
end
104+
Δ = f(xⱼ)
105+
for i 2:j
106+
Δ₀ = fs[i-1]
107+
fs[i-1] = Δ
108+
Δ = (Δ₀ - Δ) / (xs[j-i+1] - xs[j])
109+
end
110+
fs[j] = Δ
111+
end
112+
113+
# return fx₀ for bookkeeping purposes
114+
fx₀, xs, fs
115+
116+
end
117+
118+
# update step: compute xn, fxn, update the xs,fs tableau
119+
function _update_sidi!(f, xs, fs)
120+
xₙ₋₁, fxₙ₋₁ = xs[end], fs[1]
121+
fn′ = evaluate_pk′(xs, fs)
122+
xn = xₙ₋₁ - fxₙ₋₁ / fn′
123+
fxn = f(xn)
124+
update_tableau!(xn, fxn, xs, fs)
125+
end
126+
127+
# formula (10) in paper to evaluate derivative of interpolating polynomial
128+
function evaluate_pk′(xs1, fs1)
129+
δ = xs1[end] - xs1[end-1]
130+
Σ = fs1[2]
131+
k = length(xs1)
132+
for i 3:k
133+
Σ = Σ + fs1[i] * δ
134+
δ = δ * (xs1[end] - xs1[end-i+1])
135+
end
136+
Σ
137+
end
138+
139+
140+
# update tableau's lower part
141+
# leaves [xn-k, xn-k+1, xn-k+2, ..., xn]
142+
# [fn, f(n-1,n), f(n-2, n-1, n), ..., f(n-k,n-k+1, ..., n)]
143+
function update_tableau!(xn, fxn, xs0, fs0)
144+
k = length(xs0)
145+
for i in 1:k-1
146+
xs0[i] = xs0[i+1]
147+
end
148+
xs0[end] = xn
149+
Δ = fxn
150+
for i in 2:k
151+
Δ₀ = fs0[i-1]
152+
fs0[i-1] = Δ
153+
Δ = (Δ₀ - Δ) / (xs0[end-i+1] - xn)
154+
end
155+
fs0[end] = Δ
156+
xs0, fs0
157+
end

src/Roots.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ include("DerivativeFree/order8.jl")
7070
include("DerivativeFree/order16.jl")
7171
include("DerivativeFree/king.jl")
7272
include("DerivativeFree/esser.jl")
73+
include("DerivativeFree/sidi.jl")
7374
include("DerivativeFree/order0.jl")
7475

7576
include("Derivative/newton.jl")

src/find_zero.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,12 @@ function init(
318318
ZeroProblemIterator(M, Nothing, Callable_Function(M, F), state, options, l)
319319
end
320320

321+
# helper for development use only
322+
function __init(f,x,M,p=nothing; kwargs...)
323+
s = init(ZeroProblem(f,x), M, p;kwargs...)
324+
(M=s.M, F=s.F, state=s.state, options=s.options,logger=s.logger)
325+
end
326+
321327
"""
322328
solve!(P::ZeroProblemIterator)
323329
solve(fx::ZeroProblem, [M], [N]; p=nothing, kwargs...)

test/test_derivative_free.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,8 @@ if !isinteractive()
306306
Roots.Order5(),
307307
Roots.Order8(),
308308
Roots.Order16(),
309+
Roots.Sidi(2),
310+
Roots.Sidi(3)
309311
]
310312
results = [run_df_tests((f, b) -> find_zero(f, b, M), name="$M") for M in Ms]
311313

@@ -356,6 +358,7 @@ if !isinteractive()
356358
Roots.Order5(),
357359
Roots.Order8(),
358360
Roots.Order16(),
361+
Roots.Sidi(2)
359362
]
360363
Ts = [Float16, Float32, BigFloat]
361364

test/test_find_zero.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ struct Order3_Test <: Roots.AbstractSecantMethod end
2828
Roots.Thukral16(),
2929
Roots.LithBoonkkampIJzerman(3, 0),
3030
Roots.LithBoonkkampIJzerman(4, 0),
31+
Roots.Sidi(2)
3132
]
3233

3334
## different types of initial values
@@ -594,3 +595,10 @@ end
594595
@test find_zero(f, (0, 8), atol=1) 1.99609375
595596
@test find_zero(f, (0, 8), atol=1e-3) 2.0000152587890625
596597
end
598+
599+
@testset "similar methods" begin
600+
Lsidi,Lsec = Roots.Tracks(),Roots.Tracks()
601+
find_zero(sin, 3.0, Roots.Sidi(1); tracks=Lsidi)
602+
find_zero(sin, 3.0, Roots.Secant(); tracks=Lsec)
603+
@test Lsidi.xfₛ[3:end] == Lsec.xfₛ[3:end] # drop x₀x₁ ordering
604+
end

0 commit comments

Comments
 (0)