abstol, reltol for CG

ranocha · ranocha · commit 152f89f39db9 · 2020-12-07T10:27:42.000+01:00
diff --git a/src/cg.jl b/src/cg.jl
@@ -8,7 +8,7 @@ mutable struct CGIterable{matT, solT, vecT, numT <: Real}
     r::vecT
     c::vecT
     u::vecT
-    reltol::numT
+    tol::numT
     residual::numT
     prev_residual::numT
     maxiter::Int
@@ -22,14 +22,14 @@ mutable struct PCGIterable{precT, matT, solT, vecT, numT <: Real, paramT <: Numb
     r::vecT
     c::vecT
     u::vecT
-    reltol::numT
+    tol::numT
     residual::numT
     ρ::paramT
     maxiter::Int
     mv_products::Int
 end
 
-@inline converged(it::Union{CGIterable, PCGIterable}) = it.residual ≤ it.reltol
+@inline converged(it::Union{CGIterable, PCGIterable}) = it.residual ≤ it.tol
 
 @inline start(it::Union{CGIterable, PCGIterable}) = 0
 
@@ -41,7 +41,10 @@ end
 ###############
 
 function iterate(it::CGIterable, iteration::Int=start(it))
-    if done(it, iteration) return nothing end
+    # Check for termination first
+    if done(it, iteration)
+        return nothing
+    end
 
     # u := r + βu (almost an axpy)
     β = it.residual^2 / it.prev_residual^2
@@ -72,6 +75,7 @@ function iterate(it::PCGIterable, iteration::Int=start(it))
         return nothing
     end
 
+    # Apply left preconditioner
     ldiv!(it.c, it.Pl, it.r)
 
     ρ_prev = it.ρ
@@ -114,40 +118,48 @@ struct CGStateVariables{T,Tx<:AbstractArray{T}}
 end
 
 function cg_iterator!(x, A, b, Pl = Identity();
-    tol = sqrt(eps(real(eltype(b)))),
-    maxiter::Int = size(A, 2),
-    statevars::CGStateVariables = CGStateVariables(zero(x), similar(x), similar(x)),
-    initially_zero::Bool = false
-)
+                      abstol::Real = zero(real(eltype(b))),
+                      reltol::Real = sqrt(eps(real(eltype(b)))),
+                      tol = nothing, # TODO: Deprecations introduced in v0.8
+                      maxiter::Int = size(A, 2),
+                      statevars::CGStateVariables = CGStateVariables(zero(x), similar(x), similar(x)),
+                      initially_zero::Bool = false)
     u = statevars.u
     r = statevars.r
     c = statevars.c
     u .= zero(eltype(x))
     copyto!(r, b)
 
+    # TODO: Deprecations introduced in v0.8
+    if tol !== nothing
+        Base.depwarn("The keyword argument `tol` is deprecated, use `reltol` instead.", :cg_iterator!)
+        reltol = tol
+    end
+
     # Compute r with an MV-product or not.
     if initially_zero
         mv_products = 0
-        c = similar(x)
-        residual = norm(b)
-        reltol = residual * tol # Save one dot product
     else
         mv_products = 1
         mul!(c, A, x)
         r .-= c
-        residual = norm(r)
-        reltol = norm(b) * tol
     end
+    residual = norm(r)
+    # TODO: According to the docs, the code below should use the initial residual
+    #       instead of the norm of the RHS `b` to set the relative tolerance.
+    # See also https://github.com/JuliaMath/IterativeSolvers.jl/pull/244
+    # tolerance = max(reltol * residual, abstol)
+    tolerance = max(reltol * norm(b), abstol)
 
     # Return the iterable
     if isa(Pl, Identity)
         return CGIterable(A, x, r, c, u,
-            reltol, residual, one(residual),
+            tolerance, residual, one(residual),
             maxiter, mv_products
         )
     else
         return PCGIterable(Pl, A, x, r, c, u,
-            reltol, residual, one(eltype(x)),
+            tolerance, residual, one(eltype(x)),
             maxiter, mv_products
         )
     end
@@ -199,20 +211,28 @@ cg(A, b; kwargs...) = cg!(zerox(A, b), A, b; initially_zero = true, kwargs...)
 - `:resnom` => `::Vector`: residual norm at each iteration.
 """
 function cg!(x, A, b;
-    tol = sqrt(eps(real(eltype(b)))),
-    maxiter::Int = size(A, 2),
-    log::Bool = false,
-    statevars::CGStateVariables = CGStateVariables(zero(x), similar(x), similar(x)),
-    verbose::Bool = false,
-    Pl = Identity(),
-    kwargs...
-)
+             abstol::Real = zero(real(eltype(b))),
+             reltol::Real = sqrt(eps(real(eltype(b)))),
+             tol = nothing, # TODO: Deprecations introduced in v0.8
+             maxiter::Int = size(A, 2),
+             log::Bool = false,
+             statevars::CGStateVariables = CGStateVariables(zero(x), similar(x), similar(x)),
+             verbose::Bool = false,
+             Pl = Identity(),
+             kwargs...)
     history = ConvergenceHistory(partial = !log)
     history[:tol] = tol
     log && reserve!(history, :resnorm, maxiter + 1)
 
+    # TODO: Deprecations introduced in v0.8
+    if tol !== nothing
+        Base.depwarn("The keyword argument `tol` is deprecated, use `reltol` instead.", :cg!)
+        reltol = tol
+    end
+
     # Actually perform CG
-    iterable = cg_iterator!(x, A, b, Pl; tol = tol, maxiter = maxiter, statevars = statevars, kwargs...)
+    iterable = cg_iterator!(x, A, b, Pl; abstol = abstol, reltol = reltol, maxiter = maxiter,
+                            statevars = statevars, kwargs...)
     if log
         history.mvps = iterable.mv_products
     end
diff --git a/test/cg.jl b/test/cg.jl
@@ -26,15 +26,15 @@ Random.seed!(1234321)
         A = rand(T, n, n)
         A = A' * A + I
         b = rand(T, n)
-        tol = √eps(real(T))
+        reltol = √eps(real(T))
 
-        x,ch = cg(A, b; tol=tol, maxiter=2n, log=true)
+        x,ch = cg(A, b; reltol=reltol, maxiter=2n, log=true)
         @test isa(ch, ConvergenceHistory)
-        @test norm(A*x - b) / norm(b) ≤ tol
+        @test norm(A*x - b) / norm(b) ≤ reltol
         @test ch.isconverged
 
         # If you start from the exact solution, you should converge immediately
-        x,ch = cg!(A \ b, A, b; tol=10tol, log=true)
+        x,ch = cg!(A \ b, A, b; reltol=10*reltol, log=true)
         @test niters(ch) ≤ 1
         @test nprods(ch) ≤ 2
 
@@ -56,29 +56,29 @@ end
 
     rhs = randn(size(A, 2))
     rmul!(rhs, inv(norm(rhs)))
-    tol = 1e-5
+    reltol = 1e-5
 
     @testset "SparseMatrixCSC{$T, $Ti}" for T in (Float64, Float32), Ti in (Int64, Int32)
-        xCG = cg(A, rhs; tol=tol, maxiter=100)
-        xJAC = cg(A, rhs; Pl=P, tol=tol, maxiter=100)
-        @test norm(A * xCG - rhs) ≤ tol
-        @test norm(A * xJAC - rhs) ≤ tol
+        xCG = cg(A, rhs; reltol=reltol, maxiter=100)
+        xJAC = cg(A, rhs; Pl=P, reltol=reltol, maxiter=100)
+        @test norm(A * xCG - rhs) ≤ reltol
+        @test norm(A * xJAC - rhs) ≤ reltol
     end
 
     Af = LinearMap(A)
     @testset "Function" begin
-        xCG = cg(Af, rhs; tol=tol, maxiter=100)
-        xJAC = cg(Af, rhs; Pl=P, tol=tol, maxiter=100)
-        @test norm(A * xCG - rhs) ≤ tol
-        @test norm(A * xJAC - rhs) ≤ tol
+        xCG = cg(Af, rhs; reltol=reltol, maxiter=100)
+        xJAC = cg(Af, rhs; Pl=P, reltol=reltol, maxiter=100)
+        @test norm(A * xCG - rhs) ≤ reltol
+        @test norm(A * xJAC - rhs) ≤ reltol
     end
 
     @testset "Function with specified starting guess" begin
         x0 = randn(size(rhs))
-        xCG, hCG = cg!(copy(x0), Af, rhs; tol=tol, maxiter=100, log=true)
-        xJAC, hJAC = cg!(copy(x0), Af, rhs; Pl=P, tol=tol, maxiter=100, log=true)
-        @test norm(A * xCG - rhs) ≤ tol
-        @test norm(A * xJAC - rhs) ≤ tol
+        xCG, hCG = cg!(copy(x0), Af, rhs; reltol=reltol, maxiter=100, log=true)
+        xJAC, hJAC = cg!(copy(x0), Af, rhs; Pl=P, reltol=reltol, maxiter=100, log=true)
+        @test norm(A * xCG - rhs) ≤ reltol
+        @test norm(A * xJAC - rhs) ≤ reltol
         @test niters(hJAC) == niters(hCG)
     end
 end
@@ -92,4 +92,32 @@ end
     @test hist.isconverged
 end
 
+@testset "Termination criterion" begin
+    for T in (Float32, Float64, ComplexF32, ComplexF64)
+        A = T[ 2 -1  0
+              -1  2 -1
+               0 -1  2]
+        n = size(A, 2)
+        b = ones(T, n)
+        x0 = A \ b
+        perturbation = T[(-1)^i for i in 1:n]
+
+        # If the initial residual is small and a small relative tolerance is used,
+        # many iterations are necessary
+        x = x0 + sqrt(eps(real(T))) * perturbation
+        initial_residual = norm(A * x - b)
+        x, ch = cg!(x, A, b, log=true)
+        @test_broken 2 ≤ niters(ch) ≤ n
+        # This test is currently broken since `norm(b)` is used in `cg_iterator!`
+        # instead of the initial `residual` as described in the documentation.
+
+        # If the initial residual is small and a large absolute tolerance is used,
+        # no iterations are necessary
+        x = x0 + 10*sqrt(eps(real(T))) * perturbation
+        initial_residual = norm(A * x - b)
+        x, ch = cg!(x, A, b, abstol=2*initial_residual, reltol=zero(real(T)), log=true)
+        @test niters(ch) == 0
+    end
+end
+
 end