diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 9f179a5e..abe99a1e 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -24,12 +24,13 @@ mutable struct LMSolver{ u_bound::V l_bound_m_x::V u_bound_m_x::V + m_fh_hist::V subsolver::ST subpb::PB substats::GenericExecutionStats{T, V, V, T} end -function LMSolver(reg_nls::AbstractRegularizedNLPModel{T, V}; subsolver = R2Solver) where {T, V} +function LMSolver(reg_nls::AbstractRegularizedNLPModel{T, V}; subsolver = R2Solver, m_monotone::Int = 1) where {T, V} x0 = reg_nls.model.meta.x0 l_bound = reg_nls.model.meta.lvar u_bound = reg_nls.model.meta.uvar @@ -54,6 +55,8 @@ function LMSolver(reg_nls::AbstractRegularizedNLPModel{T, V}; subsolver = R2Solv u_bound_m_x = similar(xk, 0) end + m_fh_hist = fill(T(-Inf), m_monotone - 1) + ψ = has_bnds ? shifted(reg_nls.h, xk, l_bound_m_x, u_bound_m_x, reg_nls.selected) : shifted(reg_nls.h, xk) @@ -80,6 +83,7 @@ function LMSolver(reg_nls::AbstractRegularizedNLPModel{T, V}; subsolver = R2Solv u_bound, l_bound_m_x, u_bound_m_x, + m_fh_hist, subsolver, subpb, substats, @@ -105,7 +109,7 @@ where F(x) and J(x) are the residual and its Jacobian at x, respectively, ψ(s; For advanced usage, first define a solver "LMSolver" to preallocate the memory used in the algorithm, and then call `solve!`: - solver = LMSolver(reg_nls; subsolver = R2Solver) + solver = LMSolver(reg_nls; subsolver = R2Solver, m_monotone = 1) solve!(solver, reg_nls) stats = RegularizedExecutionStats(reg_nls) @@ -130,6 +134,7 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u - `η2::T = T(0.9)`: very successful iteration threshold; - `γ::T = T(3)`: regularization parameter multiplier, σ := σ/γ when the iteration is very successful and σ := σγ when the iteration is unsuccessful; - `θ::T = 1/(1 + eps(T)^(1 / 5))`: is the model decrease fraction with respect to the decrease of the Cauchy model; +- `m_monotone::Int = 1`: monotonicity parameter. By default, LM is monotone but the non-monotone variant will be used if `m_monotone > 1`; - `subsolver = R2Solver`: the solver used to solve the subproblems. The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. @@ -166,7 +171,8 @@ end function LM(reg_nls::AbstractRegularizedNLPModel; kwargs...) kwargs_dict = Dict(kwargs...) subsolver = pop!(kwargs_dict, :subsolver, R2Solver) - solver = LMSolver(reg_nls, subsolver = subsolver) + m_monotone = pop!(kwargs_dict, :m_monotone, 1) + solver = LMSolver(reg_nls, subsolver = subsolver, m_monotone = m_monotone) stats = RegularizedExecutionStats(reg_nls) solve!(solver, reg_nls, stats; kwargs_dict...) return stats @@ -214,7 +220,11 @@ function SolverCore.solve!( ψ = solver.ψ xkn = solver.xkn s = solver.s + m_fh_hist = solver.m_fh_hist .= T(-Inf) has_bnds = solver.has_bnds + + m_monotone = length(m_fh_hist) + 1 + if has_bnds l_bound = solver.l_bound u_bound = solver.u_bound @@ -275,6 +285,7 @@ function SolverCore.solve!( set_solver_specific!(stats, :smooth_obj, fk) set_solver_specific!(stats, :nonsmooth_obj, hk) set_solver_specific!(stats, :prox_evals, prox_evals + 1) + m_monotone > 1 && (m_fh_hist[stats.iter % (m_monotone - 1) + 1] = fk + hk) φ1 = let Fk = Fk, ∇fk = ∇fk d -> dot(Fk, Fk) / 2 + dot(∇fk, d) # ∇fk = Jk^T Fk @@ -345,7 +356,8 @@ function SolverCore.solve!( error("LM: failed to compute a step: ξ = $ξ") end - Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + fhmax = m_monotone > 1 ? maximum(m_fh_hist) : fk + hk + Δobj = fhmax - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() ρk = Δobj / ξ verbose > 0 && @@ -400,6 +412,8 @@ function SolverCore.solve!( σk = σk * γ end + m_monotone > 1 && (m_fh_hist[stats.iter % (m_monotone - 1) + 1] = fk + hk) + set_objective!(stats, fk + hk) set_solver_specific!(stats, :smooth_obj, fk) set_solver_specific!(stats, :nonsmooth_obj, hk) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 90142212..6e8bb42b 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -23,6 +23,7 @@ mutable struct TRSolver{ u_bound::V l_bound_m_x::V u_bound_m_x::V + m_fh_hist::V subsolver::ST subpb::PB substats::GenericExecutionStats{T, V, V, T} @@ -32,6 +33,7 @@ function TRSolver( reg_nlp::AbstractRegularizedNLPModel{T, V}; χ::X = NormLinf(one(T)), subsolver = R2Solver, + m_monotone::Int = 1, ) where {T, V, X} x0 = reg_nlp.model.meta.x0 l_bound = reg_nlp.model.meta.lvar @@ -54,6 +56,8 @@ function TRSolver( u_bound_m_x = similar(xk, 0) end + m_fh_hist = fill(T(-Inf), m_monotone - 1) + ψ = has_bnds || subsolver == TRDHSolver ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : @@ -81,6 +85,7 @@ function TRSolver( u_bound, l_bound_m_x, u_bound_m_x, + m_fh_hist, subsolver, subpb, substats, @@ -107,7 +112,7 @@ where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ Bₖ s is a quadratic a For advanced usage, first define a solver "TRSolver" to preallocate the memory used in the algorithm, and then call `solve!`: - solver = TR(reg_nlp; χ = NormLinf(1), subsolver = R2Solver) + solver = TRSolver(reg_nlp; χ = NormLinf(1), subsolver = R2Solver, m_monotone = 1) solve!(solver, reg_nlp) stats = RegularizedExecutionStats(reg_nlp) @@ -129,6 +134,7 @@ For advanced usage, first define a solver "TRSolver" to preallocate the memory u - `η1::T = √√eps(T)`: successful iteration threshold; - `η2::T = T(0.9)`: very successful iteration threshold; - `γ::T = T(3)`: trust-region radius parameter multiplier. Must satisfy `γ > 1`. The trust-region radius is updated as Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful; +- `m_monotone::Int = 1`: monotonicity parameter. By default, TR is monotone but the non-monotone variant will be used if `m_monotone > 1`; - `χ::F = NormLinf(1)`: norm used to define the trust-region;` - `subsolver::S = R2Solver`: subsolver used to solve the subproblem that appears at each iteration. - `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`. @@ -175,7 +181,8 @@ function TR(reg_nlp::AbstractRegularizedNLPModel{T, V}; kwargs...) where {T, V} kwargs_dict = Dict(kwargs...) subsolver = pop!(kwargs_dict, :subsolver, R2Solver) χ = pop!(kwargs_dict, :χ, NormLinf(one(T))) - solver = TRSolver(reg_nlp, subsolver = subsolver, χ = χ) + m_monotone = pop!(kwargs_dict, :m_monotone, 1) + solver = TRSolver(reg_nlp, subsolver = subsolver, χ = χ, m_monotone = m_monotone) stats = RegularizedExecutionStats(reg_nlp) solve!(solver, reg_nlp, stats; kwargs_dict...) return stats @@ -221,8 +228,11 @@ function SolverCore.solve!( xkn = solver.xkn s = solver.s χ = solver.χ + m_fh_hist = solver.m_fh_hist .= T(-Inf) has_bnds = solver.has_bnds + m_monotone = length(m_fh_hist) + 1 + if has_bnds || isa(solver.subsolver, TRDHSolver) #TODO elsewhere ? l_bound_m_x, u_bound_m_x = solver.l_bound_m_x, solver.u_bound_m_x l_bound, u_bound = solver.l_bound, solver.u_bound @@ -293,6 +303,7 @@ function SolverCore.solve!( set_solver_specific!(stats, :smooth_obj, fk) set_solver_specific!(stats, :nonsmooth_obj, hk) set_solver_specific!(stats, :prox_evals, prox_evals + 1) + m_monotone > 1 && (m_fh_hist[stats.iter % (m_monotone - 1) + 1] = fk + hk) # models φ1 = let ∇fk = ∇fk @@ -380,7 +391,8 @@ function SolverCore.solve!( hkn = @views h(xkn[selected]) sNorm = χ(s) - Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + fhmax = m_monotone > 1 ? maximum(m_fh_hist) : fk + hk + Δobj = fhmax - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() if (ξ ≤ 0 || isnan(ξ)) @@ -452,6 +464,8 @@ function SolverCore.solve!( end end + m_monotone > 1 && (m_fh_hist[stats.iter % (m_monotone - 1) + 1] = fk + hk) + set_objective!(stats, fk + hk) set_solver_specific!(stats, :smooth_obj, fk) set_solver_specific!(stats, :nonsmooth_obj, hk)