implement Sidi methods (#472)

jverzani · web-flow · commit 3040289c438f · 2025-06-19T21:13:35.000-04:00
* implement Sidi methods

* version bump

* avoid unpack of named tuple for 1.6
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "Roots"
 uuid = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
-version = "2.2.7"
+version = "2.2.8"
 
 [deps]
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -35,7 +35,8 @@ specification of a method. These include:
   a bracketing method when a bracket is encountered, The higher order
   methods promise higher order (faster) convergence, though don't
   always yield results with fewer function calls than `Order1` or
-  `Order2`. The methods `Roots.Order1B` and `Roots.Order2B` are
+  `Order2`. `Roots.Sidi` is a family of methods.
+  The methods `Roots.Order1B` and `Roots.Order2B` are
   superlinear and quadratically converging methods independent of the
   multiplicity of the zero.
 
diff --git a/docs/src/reference.md b/docs/src/reference.md
@@ -105,7 +105,7 @@ The [secant](https://en.wikipedia.org/wiki/Secant_method) method replaces the  d
 ``x_{n+1} = x_n - (\frac{f(x_n)-f(x_{n-1})}{x_n - x_{n-1}})^{-1}\cdot  f(x_n).``
 
 Though the secant  method   has  convergence  rate of  order ``\approx 1.618`` -- i.e., is not quadratic --  it
-only requires one new  function call per  step  so  can be very effective. Often  function evaluations are the  slowest part of  the computation and, as  well, no derivative is  needed. Because  it  can be  very efficient, the secant  method  is used in  the default method  of `find_zero` when  called with a single initial starting point.
+only requires one new  function call per  step  so  can be very effective. Often  function evaluations are the  slowest part of  the computation and, as  well, no derivative is  needed. Because  it  can be  very efficient, the secant  method  is used in  the default method  of `find_zero` when  called with a single initial starting point. The `Roots.Sidi` methods generalize the secant method.
 
 [Steffensen's](https://en.wikipedia.org/wiki/Steffensen%27s_method) method is a quadratically converging. derivative-free method  which uses a secant  line  based on ``x_n`` and ``x_n + f(x_n)``.  Though of  higher  order, it requires  additional function calls per step and depends on a  good initial starting value. Other  derivative free methods are available, trading off  increased function calls for higher-order convergence. They may be  of interest when arbitrary  precision is needed. A  measure of efficiency is ``q^{1/r}`` where ``q`` is the order of convergence and ``r`` the number of function calls per step.   With this measure, the secant method  would be ``\approx (1.618)^{1/1}`` and Steffensen's  would be less (``2^{1/2}``).
 
@@ -119,6 +119,7 @@ Order2
 Order5
 Order8
 Order16
+Roots.Sidi
 ```
 
 
diff --git a/src/DerivativeFree/sidi.jl b/src/DerivativeFree/sidi.jl
@@ -0,0 +1,157 @@
+"""
+    Sidi(k)
+
+Implements family of methods in "Generalization Of The Secant Method For Nonlinear Equations" by  Avram Sidi in Applied Mathematics E-Notes, 8(2008), 115-123.
+
+These methods generalize the secant method by using an interpolating polynomial based on the last ``k+1`` points to estimate ``f'(xₙ)`` in its use with Newton's method, the secant method being the ``k=1`` case.
+
+
+## Convergence rates:
+
+* `Sidi(1) = 1.618⋯` (secant method)
+* `Sidi(2) = 1.839⋯`
+* `Sidi(3) = 1.928⋯`
+* `Sidi(4) = 1.966⋯`
+
+Like the secant method, each update step requires one function evaluation.
+
+## Example
+
+```
+find_zero(sin, 3, Roots.Sidi(2))
+```
+
+"""
+struct Sidi{k} <: AbstractSecantMethod end
+Sidi(k::Int) = Sidi{k}()
+
+struct SidiState{T,S} <: AbstractUnivariateZeroState{T,S}
+    xn1::T
+    xn0::T
+    fxn1::S
+    fxn0::S
+    xs::Vector{T}
+    fs::Vector{T}
+end
+
+function init_state(M::Sidi{k}, F::Callable_Function, x) where {k}
+    x₀, x₁ = x₀x₁(x)
+    fx₀, xs, fs = _init_sidi(F, (x₀, x₁),  k)
+    state = SidiState(xs[k], xs[k+1], fx₀, fs[1], xs, fs)
+end
+
+function update_state(
+    L::Sidi{k},
+    F,
+    o::SidiState{T,S},
+    options,
+    l=NullTracks(),
+) where {k, T, S}
+
+    xs, fs = o.xs, o.fs
+    fxn1 = o.fxn1
+    _update_sidi!(F, xs, fs)
+
+    incfn(l)
+    @reset o.xn0 = xs[k]
+    @reset o.xn1 = xs[k+1]
+    @reset o.fxn0 = o.fxn1
+    @reset o.fxn1 = fs[1]
+    @reset o.xs = xs
+    @reset o.fs = fs
+
+    return (o, false)
+end
+
+# create xs and fs; fs records lower diagonal in newton tableau
+# x1  .  .    .  f1234
+# x2  .  .  f234
+# x3  . f34
+# x4 f4
+# xs = [x1,x2,x3,x4]; fs = [f4, f34, f234, f1234]
+# build up diagonal by diagonal
+# where pk' uses xs, fs to be computed
+# no good way to pre-compute fs to speed up, so here
+# we expect x to have 2 or more elements
+# but we compute each f(x)
+# This allocates, as it uses a vector to store xs, fs
+function _init_sidi(f, x, k)
+    x₀ = first(x)
+    fx₀ = f(x₀)
+
+    xs = Vector{typeof( x₀)}(undef, k+1)
+    fs = Vector{typeof(fx₀)}(undef, k+1)
+
+    n = length(x)
+    xs[1:n] .= x
+
+    xs[1] = x[1]
+    xs[2] = x[2]
+
+    # diagonal for x2 above (two entries)
+    fs[1] = f(xs[2])
+    fs[2] = (fx₀ - fs[1]) / (xs[1] - xs[2])
+
+    # build up diagonal by diagonal
+    for j ∈ 3:(k+1)
+        if j ≤ n # xⱼ was specified
+            xⱼ = xs[j]
+        else
+            xⱼ₋₁ = xs[j-1]
+            pk′ = evaluate_pk′(view(xs, 1:j-1), view(fs, 1:j-1))
+            xⱼ = xs[j] = xⱼ₋₁ - fs[1] / pk′
+        end
+        Δ = f(xⱼ)
+        for i ∈ 2:j
+            Δ₀ = fs[i-1]
+            fs[i-1] = Δ
+            Δ = (Δ₀ - Δ) / (xs[j-i+1] - xs[j])
+        end
+        fs[j] = Δ
+    end
+
+    # return fx₀ for bookkeeping purposes
+    fx₀, xs, fs
+
+end
+
+# update step: compute xn, fxn, update the xs,fs tableau
+function _update_sidi!(f, xs, fs)
+    xₙ₋₁, fxₙ₋₁ = xs[end], fs[1]
+    fn′ = evaluate_pk′(xs, fs)
+    xn =  xₙ₋₁ -  fxₙ₋₁ / fn′
+    fxn = f(xn)
+    update_tableau!(xn, fxn, xs, fs)
+end
+
+# formula (10) in paper to evaluate derivative of interpolating polynomial
+function evaluate_pk′(xs1, fs1)
+    δ = xs1[end] - xs1[end-1]
+    Σ = fs1[2]
+    k = length(xs1)
+    for i ∈ 3:k
+        Σ = Σ + fs1[i] * δ
+        δ = δ * (xs1[end] - xs1[end-i+1])
+    end
+    Σ
+end
+
+
+# update tableau's lower part
+# leaves [xn-k, xn-k+1, xn-k+2, ..., xn]
+#        [fn, f(n-1,n), f(n-2, n-1, n), ..., f(n-k,n-k+1, ..., n)]
+function update_tableau!(xn, fxn, xs0, fs0)
+    k = length(xs0)
+    for i in 1:k-1
+        xs0[i] = xs0[i+1]
+    end
+    xs0[end] = xn
+    Δ = fxn
+    for i in 2:k
+        Δ₀ = fs0[i-1]
+        fs0[i-1] = Δ
+        Δ = (Δ₀ - Δ) / (xs0[end-i+1] - xn)
+    end
+    fs0[end] = Δ
+    xs0, fs0
+end
diff --git a/src/Roots.jl b/src/Roots.jl
@@ -70,6 +70,7 @@ include("DerivativeFree/order8.jl")
 include("DerivativeFree/order16.jl")
 include("DerivativeFree/king.jl")
 include("DerivativeFree/esser.jl")
+include("DerivativeFree/sidi.jl")
 include("DerivativeFree/order0.jl")
 
 include("Derivative/newton.jl")
diff --git a/src/find_zero.jl b/src/find_zero.jl
@@ -318,6 +318,12 @@ function init(
     ZeroProblemIterator(M, Nothing, Callable_Function(M, F), state, options, l)
 end
 
+# helper for development use only
+function __init(f,x,M,p=nothing; kwargs...)
+    s = init(ZeroProblem(f,x), M, p;kwargs...)
+    (M=s.M, F=s.F, state=s.state, options=s.options,logger=s.logger)
+end
+
 """
     solve!(P::ZeroProblemIterator)
     solve(fx::ZeroProblem, [M], [N]; p=nothing, kwargs...)
diff --git a/test/test_derivative_free.jl b/test/test_derivative_free.jl
@@ -306,6 +306,8 @@ if !isinteractive()
             Roots.Order5(),
             Roots.Order8(),
             Roots.Order16(),
+            Roots.Sidi(2),
+            Roots.Sidi(3)
         ]
         results = [run_df_tests((f, b) -> find_zero(f, b, M), name="$M") for M in Ms]
 
@@ -356,6 +358,7 @@ if !isinteractive()
             Roots.Order5(),
             Roots.Order8(),
             Roots.Order16(),
+            Roots.Sidi(2)
         ]
         Ts = [Float16, Float32, BigFloat]
 
diff --git a/test/test_find_zero.jl b/test/test_find_zero.jl
@@ -28,6 +28,7 @@ struct Order3_Test <: Roots.AbstractSecantMethod end
         Roots.Thukral16(),
         Roots.LithBoonkkampIJzerman(3, 0),
         Roots.LithBoonkkampIJzerman(4, 0),
+        Roots.Sidi(2)
     ]
 
     ## different types of initial values
@@ -594,3 +595,10 @@ end
     @test find_zero(f, (0, 8), atol=1) ≈ 1.99609375
     @test find_zero(f, (0, 8), atol=1e-3) ≈ 2.0000152587890625
 end
+
+@testset "similar methods" begin
+    Lsidi,Lsec = Roots.Tracks(),Roots.Tracks()
+    find_zero(sin, 3.0, Roots.Sidi(1); tracks=Lsidi)
+    find_zero(sin, 3.0, Roots.Secant(); tracks=Lsec)
+    @test Lsidi.xfₛ[3:end] ==  Lsec.xfₛ[3:end] # drop x₀x₁ ordering
+end

Original file line number	Diff line number	Diff line change
`@@ -306,6 +306,8 @@ if !isinteractive()`
`306`	`306`	`Roots.Order5(),`
`307`	`307`	`Roots.Order8(),`
`308`	`308`	`Roots.Order16(),`
	`309`	`+ Roots.Sidi(2),`
	`310`	`+ Roots.Sidi(3)`
`309`	`311`	`]`
`310`	`312`	`results = [run_df_tests((f, b) -> find_zero(f, b, M), name="$M") for M in Ms]`
`311`	`313`
`@@ -356,6 +358,7 @@ if !isinteractive()`
`356`	`358`	`Roots.Order5(),`
`357`	`359`	`Roots.Order8(),`
`358`	`360`	`Roots.Order16(),`
	`361`	`+ Roots.Sidi(2)`
`359`	`362`	`]`
`360`	`363`	`Ts = [Float16, Float32, BigFloat]`
`361`	`364`