diff --git a/examples/demo-cutest.jl b/examples/demo-cutest.jl index db281b99..c3b41d7b 100644 --- a/examples/demo-cutest.jl +++ b/examples/demo-cutest.jl @@ -43,4 +43,4 @@ callback = stats = AL(nlp, h, atol = 1e-6, verbose = 1, callback = callback) print(stats) -finalize(nlp) \ No newline at end of file +finalize(nlp) diff --git a/src/AL_alg.jl b/src/AL_alg.jl index a27d970f..3745d685 100644 --- a/src/AL_alg.jl +++ b/src/AL_alg.jl @@ -383,4 +383,4 @@ project_y!(nlp::AugLagModel) = project_y!(nlp::AugLagModel, -1e20, 1e20) function project_y!(nlp::AugLagModel, ymin::V, ymax::V) where {V} nlp.y .= max.(ymin, min.(nlp.y, ymax)) nlp.μc_y .= nlp.μ .* nlp.cx .- nlp.y -end \ No newline at end of file +end diff --git a/src/R2DH_alg.jl b/src/R2DH_alg.jl new file mode 100644 index 00000000..6107cbc3 --- /dev/null +++ b/src/R2DH_alg.jl @@ -0,0 +1,472 @@ +export R2DH, R2DHSolver, solve! + +import SolverCore.solve! + +mutable struct R2DHSolver{ + T <: Real, + G <: ShiftedProximableFunction, + V <: AbstractVector{T}, + QN <: AbstractDiagonalQuasiNewtonOperator{T} +} <: AbstractOptimizationSolver + xk::V + ∇fk::V + ∇fk⁻::V + mν∇fk::V + D::QN + ψ::G + xkn::V + s::V + dkσk::V + has_bnds::Bool + l_bound::V + u_bound::V + l_bound_m_x::V + u_bound_m_x::V + m_fh_hist::V +end + +function R2DHSolver(reg_nlp::AbstractRegularizedNLPModel{T, V}; m_monotone::Int = 1) where{T, V} + x0 = reg_nlp.model.meta.x0 + l_bound = reg_nlp.model.meta.lvar + u_bound = reg_nlp.model.meta.uvar + + xk = similar(x0) + ∇fk = similar(x0) + ∇fk⁻ = similar(x0) + mν∇fk = similar(x0) + xkn = similar(x0) + s = similar(x0) + dkσk = similar(x0) + has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) + if has_bnds + l_bound_m_x = similar(xk) + u_bound_m_x = similar(xk) + @. l_bound_m_x = l_bound - x0 + @. u_bound_m_x = u_bound - x0 + else + l_bound_m_x = similar(xk, 0) + u_bound_m_x = similar(xk, 0) + end + m_fh_hist = fill(T(-Inf), m_monotone - 1) + + ψ = has_bnds ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : shifted(reg_nlp.h, xk) + D = isa(reg_nlp.model, AbstractDiagonalQNModel) ? hess_op(reg_nlp.model, x0) : SpectralGradient(T(1), reg_nlp.model.meta.nvar) + + return R2DHSolver( + xk, + ∇fk, + ∇fk⁻, + mν∇fk, + D, + ψ, + xkn, + s, + dkσk, + has_bnds, + l_bound, + u_bound, + l_bound_m_x, + u_bound_m_x, + m_fh_hist + ) +end + + +""" + R2DH(reg_nlp; kwargs…) + +A second-order quadratic regularization method for the problem + + min f(x) + h(x) + +where f: ℝⁿ → ℝ has a Lipschitz-continuous gradient, and h: ℝⁿ → ℝ is +lower semi-continuous, proper and prox-bounded. + +About each iterate xₖ, a step sₖ is computed as a solution of + + min φ(s; xₖ) + ½ σₖ ‖s‖² + ψ(s; xₖ) + +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀDₖs is a quadratic approximation of f about xₖ, +ψ(s; xₖ) is either h(xₖ + s) or an approximation of h(xₖ + s), ‖⋅‖ is the ℓ₂ norm and σₖ > 0 is the regularization parameter. + +For advanced usage, first define a solver "R2DHSolver" to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = R2DHSolver(reg_nlp; m_monotone = 1) + solve!(solver, reg_nlp) + + stats = RegularizedExecutionStats(reg_nlp) + solver = R2DHSolver(reg_nlp) + solve!(solver, reg_nlp, stats) + +# Arguments +* `reg_nlp::AbstractRegularizedNLPModel{T, V}`: the problem to solve, see `RegularizedProblems.jl`, `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess; +- `atol::T = √eps(T)`: absolute tolerance; +- `rtol::T = √eps(T)`: relative tolerance; +- `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance +- `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); +- `max_time::Float64 = 30.0`: maximum time limit in seconds; +- `max_iter::Int = 10000`: maximum number of iterations; +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration; +- `σmin::T = eps(T)`: minimum value of the regularization parameter; +- `η1::T = √√eps(T)`: very successful iteration threshold; +- `η2::T = T(0.9)`: successful iteration threshold; +- `ν::T = eps(T)^(1 / 5)`: multiplicative inverse of the regularization parameter: ν = 1/σ; +- `γ::T = T(3)`: regularization parameter multiplier, σ := σ/γ when the iteration is very successful and σ := σγ when the iteration is unsuccessful. +- `θ::T = eps(T)^(1/5)`: is the model decrease fraction with respect to the decrease of the Cauchy model. +- `m_monotone::Int = 1`: monotoneness parameter. By default, R2DH is monotone but the non-monotone variant can be used with `m_monotone > 1` + +The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.xk`: current iterate; +- `solver.∇fk`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.solver_specific[:smooth_obj]`: current value of the smooth part of the objective function + - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. +""" +function R2DH( + nlp::AbstractDiagonalQNModel{T, V}, + h, + options::ROSolverOptions{T}; + kwargs..., +) where{T, V} + kwargs_dict = Dict(kwargs...) + selected = pop!(kwargs_dict, :selected, 1:(nlp.meta.nvar)) + x0 = pop!(kwargs_dict, :x0, nlp.meta.x0) + reg_nlp = RegularizedNLPModel(nlp, h, selected) + return R2DH( + reg_nlp, + x = x0, + atol = options.ϵa, + rtol = options.ϵr, + neg_tol = options.neg_tol, + verbose = options.verbose, + max_iter = options.maxIter, + max_time = options.maxTime, + σmin = options.σmin, + η1 = options.η1, + η2 = options.η2, + ν = options.ν, + γ = options.γ, + θ = options.θ, + kwargs_dict..., + ) +end + +function R2DH( + reg_nlp::AbstractRegularizedNLPModel{T, V}; + kwargs... +) where{T, V} + kwargs_dict = Dict(kwargs...) + m_monotone = pop!(kwargs_dict, :m_monotone, 1) + solver = R2DHSolver(reg_nlp, m_monotone = m_monotone) + stats = RegularizedExecutionStats(reg_nlp) + solve!(solver, reg_nlp, stats; kwargs_dict...) + return stats +end + +function SolverCore.solve!( + solver::R2DHSolver{T, G, V}, + reg_nlp::AbstractRegularizedNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = reg_nlp.model.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + neg_tol::T = eps(T)^(1 / 4), + verbose::Int = 0, + max_iter::Int = 10000, + max_time::Float64 = 30.0, + max_eval::Int = -1, + σmin::T = eps(T), + η1::T = √√eps(T), + η2::T = T(0.9), + ν::T = eps(T)^(1 / 5), + γ::T = T(3), + θ::T = eps(T)^(1 / 5), +) where{T, V, G} + + reset!(stats) + + # Retrieve workspace + selected = reg_nlp.selected + h = reg_nlp.h + nlp = reg_nlp.model + + xk = solver.xk .= x + + # Make sure ψ has the correct shift + shift!(solver.ψ, xk) + + ∇fk = solver.∇fk + ∇fk⁻ = solver.∇fk⁻ + mν∇fk = solver.mν∇fk + D = solver.D + dkσk = solver.dkσk + ψ = solver.ψ + xkn = solver.xkn + s = solver.s + m_fh_hist = solver.m_fh_hist + has_bnds = solver.has_bnds + + if has_bnds + l_bound_m_x = solver.l_bound_m_x + u_bound_m_x = solver.u_bound_m_x + l_bound = solver.l_bound + u_bound = solver.u_bound + end + m_monotone = length(m_fh_hist) + 1 + + + # initialize parameters + improper = false + hk = @views h(xk[selected]) + if hk == Inf + verbose > 0 && @info "R2DH: finding initial guess where nonsmooth term is finite" + prox!(xk, h, xk, T(1)) + hk = @views h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "R2DH: found point where h has value" hk + end + improper = (hk == -Inf) + + if verbose > 0 + @info log_header( + [:iter, :fx, :hx, :xi, :ρ, :σ, :normx, :norms, :arrow], + [Int, T, T, T, T, T, T, T, Char], + hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere + :fx => "f(x)", + :hx => "h(x)", + :xi => "√(ξ/ν)", + :normx => "‖x‖", + :norms => "‖s‖", + :arrow => "R2DH", + ), + colsep = 1, + ) + end + + local ξ::T + local ρk::T = zero(T) + + σk = max(1 / ν, σmin) + + fk = obj(nlp, xk) + grad!(nlp, xk, ∇fk) + ∇fk⁻ .= ∇fk + spectral_test = isa(D, SpectralGradient) + + @. dkσk = D.d .+ σk + DNorm = norm(D.d, Inf) + + ν₁ = 1 / ((DNorm + σk) * (1 + θ)) + sqrt_ξ_νInv = one(T) + + @. mν∇fk = -ν₁ * ∇fk + m_monotone > 1 && (m_fh_hist[mod(stats.iter+1, m_monotone - 1) + 1] = fk + hk) + + set_iter!(stats, 0) + start_time = time() + set_time!(stats, 0.0) + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + + φ = let ∇fk = ∇fk, dkσk = dkσk + d ->begin + result = zero(T) + n = length(d) + @inbounds for i = 1:n + result += d[i]^2*dkσk[i]/2 + ∇fk[i]*d[i] + end + return result + end + end + + mk = let ψ = ψ + d -> φ(d) + ψ(d)::T + end + + spectral_test ? prox!(s, ψ, mν∇fk::V, ν₁) : iprox!(s, ψ, ∇fk, dkσk) + + mks = mk(s) + while mks == -Inf #TODO add test coverage for this + σk = σk * γ + dkσk .= D.d .+ σk + DNorm = norm(D.d, Inf) + ν₁ = 1 / ((DNorm + σk) * (1 + θ)) + @. mν∇fk = -ν₁ * ∇fk + spectral_test ? prox!(s, ψ, mν∇fk, ν₁) : iprox!(s, ψ, ∇fk, dkσk) + mks = mk(s) + end + + ξ = hk - mks + max(1, abs(hk)) * 10 * eps() + sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν₁) : sqrt(-ξ / ν₁) + solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol) + (ξ < 0 && sqrt_ξ_νInv > neg_tol) && + error("R2DH: prox-gradient step should produce a decrease but ξ = $(ξ)") + atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative + + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + + while !done + # Update xk, sigma_k + xkn .= xk .+ s + fkn = obj(nlp, xkn) + hkn = @views h(xkn[selected]) + improper = (hkn == -Inf) + + fhmax = m_monotone > 1 ? maximum(m_fh_hist) : fk + hk + Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps() + Δmod = fhmax - (fk + mks) + max(1, abs(hk)) * 10 * eps() + + ρk = Δobj / Δmod + + verbose > 0 && + stats.iter % verbose == 0 && + @info log_row( + Any[ + stats.iter, + fk, + hk, + sqrt_ξ_νInv, + ρk, + σk, + norm(xk), + norm(s), + (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "="), + ], + colsep = 1, + ) + + if η1 ≤ ρk < Inf + xk .= xkn + if has_bnds + @. l_bound_m_x = l_bound - xk + @. u_bound_m_x = u_bound - xk + set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + end + fk = fkn + hk = hkn + shift!(ψ, xk) + grad!(nlp, xk, ∇fk) + @. ∇fk⁻ = ∇fk - ∇fk⁻ + push!(D, s, ∇fk⁻) # update QN operator + ∇fk⁻ .= ∇fk + end + + if η2 ≤ ρk < Inf + σk = max(σk / γ, σmin) + end + + if ρk < η1 || ρk == Inf + σk = σk * γ + end + + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + @. dkσk = D.d .+ σk + DNorm = norm(D.d, Inf) + + ν₁ = 1 / ((DNorm + σk) * (1 + θ)) + + @. mν∇fk = -ν₁ * ∇fk + m_monotone > 1 && (m_fh_hist[mod(stats.iter+1, m_monotone - 1) + 1] = fk + hk) + + spectral_test ? prox!(s, ψ, mν∇fk, ν₁) : iprox!(s, ψ, ∇fk, dkσk) + mks = mk(s) + + while mks == -Inf #TODO add test coverage for this + σk = σk * γ + dkσk .= D.d .+ σk + DNorm = norm(D.d, Inf) + ν₁ = 1 / ((DNorm + σk) * (1 + θ)) + @. mν∇fk = -ν₁ * ∇fk + spectral_test ? prox!(s, ψ, mν∇fk, ν₁) : iprox!(s, ψ, ∇fk, dkσk) + mks = mk(s) + end + + ξ = hk - mks + max(1, abs(hk)) * 10 * eps() + sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν₁) : sqrt(-ξ / ν₁) + solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol) + (ξ < 0 && sqrt_ξ_νInv > neg_tol) && + error("R2DH: prox-gradient step should produce a decrease but ξ = $(ξ)") + + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + end + + if verbose > 0 && stats.status == :first_order + @info log_row( + Any[ + stats.iter, + 0, + fk, + hk, + sqrt_ξ_νInv, + ρk, + σk, + norm(xk), + norm(s), + (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "="), + ], + colsep = 1, + ) + @info "R2DH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv)" + end + + set_solution!(stats,xk) + set_residuals!(stats, zero(eltype(xk)), sqrt_ξ_νInv) + return stats +end diff --git a/src/R2NModel.jl b/src/R2NModel.jl new file mode 100644 index 00000000..f5f5eaf0 --- /dev/null +++ b/src/R2NModel.jl @@ -0,0 +1,66 @@ +export R2NModel + +@doc raw""" + R2NModel(B, ∇f, v, σ, x0) + +Given the unconstrained optimization problem: +```math +\min f(x), +``` +this model represents the smooth R2N subproblem: +```math +\min_s \ \tfrac{1}{2} s^T B s + ∇f^T s + \tfrac{1}{2} σ\|s\|^2 +``` +where `B` represents either an approximation of the Hessian of `f` or the Hessian itself and `∇f` represents the gradient of `f` on `x0`. +`σ > 0` is a regularization parameter and `v` is a vector of the same size as `x0` used for computations +""" +mutable struct R2NModel{T <: Real, V <: AbstractVector{T}, G <: AbstractLinearOperator{T}} <: AbstractNLPModel{T, V} + B :: G + ∇f :: V + v :: V + σ :: T + meta::NLPModelMeta{T, V} + counters::Counters +end + + function R2NModel( + B :: G, + ∇f :: V, + v :: V, + σ :: T, + x0 :: V +) where{T, V, G} + meta = NLPModelMeta( + length(∇f), + x0 = x0, # Perhaps we should add lvar and uvar as well here. + ) + return R2NModel( + B :: G, + ∇f :: V, + v :: V, + σ :: T, + meta, + Counters() + ) +end + +function NLPModels.obj(nlp::R2NModel, x::AbstractVector) + @lencheck nlp.meta.nvar x + increment!(nlp, :neval_obj) + mul!(nlp.v, nlp.B, x) + return dot(nlp.v, x)/2 + dot(nlp.∇f, x) + nlp.σ * dot(x, x) / 2 +end + +function NLPModels.grad!(nlp::R2NModel, x::AbstractVector, g::AbstractVector) + @lencheck nlp.meta.nvar x + @lencheck nlp.meta.nvar g + increment!(nlp, :neval_grad) + mul!(g, nlp.B, x) + g .+= nlp.∇f + g .+= nlp.σ .* x + return g +end + +function NLPModels.push!(nlp::R2NModel, s::AbstractVector, y::AbstractVector) + push!(nlp.B, s, y) +end \ No newline at end of file diff --git a/src/R2N_alg.jl b/src/R2N_alg.jl new file mode 100644 index 00000000..e97220ba --- /dev/null +++ b/src/R2N_alg.jl @@ -0,0 +1,508 @@ +export R2N, R2NSolver, solve! + +import SolverCore.solve! + +mutable struct R2NSolver{ + T <: Real, + G <: ShiftedProximableFunction, + V <: AbstractVector{T}, + ST <: AbstractOptimizationSolver, + PB <: AbstractRegularizedNLPModel +} <: AbstractOptimizationSolver + xk::V + ∇fk::V + ∇fk⁻::V + mν∇fk::V + ψ::G + xkn::V + s::V + s1::V + has_bnds::Bool + l_bound::V + u_bound::V + l_bound_m_x::V + u_bound_m_x::V + m_fh_hist::V + subsolver::ST + subpb::PB + substats::GenericExecutionStats{T, V, V, T} +end + +function R2NSolver(reg_nlp::AbstractRegularizedNLPModel{T, V}; subsolver = R2Solver, m_monotone::Int = 1) where {T, V} + x0 = reg_nlp.model.meta.x0 + l_bound = reg_nlp.model.meta.lvar + u_bound = reg_nlp.model.meta.uvar + + xk = similar(x0) + ∇fk = similar(x0) + ∇fk⁻ = similar(x0) + mν∇fk = similar(x0) + xkn = similar(x0) + s = similar(x0) + s1 = similar(x0) + v = similar(x0) + has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) + if has_bnds + l_bound_m_x = similar(xk) + u_bound_m_x = similar(xk) + @. l_bound_m_x = l_bound - x0 + @. u_bound_m_x = u_bound - x0 + else + l_bound_m_x = similar(xk, 0) + u_bound_m_x = similar(xk, 0) + end + m_fh_hist = fill(T(-Inf), m_monotone - 1) + + ψ = has_bnds ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : shifted(reg_nlp.h, xk) + + Bk = hess_op(reg_nlp.model, x0) + sub_nlp = R2NModel( + Bk, + ∇fk, + v, + T(1), + x0 + ) + subpb = RegularizedNLPModel(sub_nlp, ψ) + substats = RegularizedExecutionStats(subpb) + subsolver = subsolver(subpb) + + return R2NSolver{T, typeof(ψ), V, typeof(subsolver), typeof(subpb)}( + xk, + ∇fk, + ∇fk⁻, + mν∇fk, + ψ, + xkn, + s, + s1, + has_bnds, + l_bound, + u_bound, + l_bound_m_x, + u_bound_m_x, + m_fh_hist, + subsolver, + subpb, + substats + ) +end + + +""" + R2N(reg_nlp; kwargs…) + +A second-order quadratic regularization method for the problem + + min f(x) + h(x) + +where f: ℝⁿ → ℝ has a Lipschitz-continuous gradient, and h: ℝⁿ → ℝ is +lower semi-continuous, proper and prox-bounded. + +About each iterate xₖ, a step sₖ is computed as a solution of + + min φ(s; xₖ) + ½ σₖ ‖s‖² + ψ(s; xₖ) + +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀBₖs is a quadratic approximation of f about xₖ, +ψ(s; xₖ) is either h(xₖ + s) or an approximation of h(xₖ + s), ‖⋅‖ is the ℓ₂ norm and σₖ > 0 is the regularization parameter. + +For advanced usage, first define a solver "R2NSolver" to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = R2NSolver(reg_nlp; m_monotone = 1) + solve!(solver, reg_nlp) + + stats = RegularizedExecutionStats(reg_nlp) + solver = R2NSolver(reg_nlp) + solve!(solver, reg_nlp, stats) + +# Arguments +* `reg_nlp::AbstractRegularizedNLPModel{T, V}`: the problem to solve, see `RegularizedProblems.jl`, `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess; +- `atol::T = √eps(T)`: absolute tolerance; +- `rtol::T = √eps(T)`: relative tolerance; +- `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance +- `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); +- `max_time::Float64 = 30.0`: maximum time limit in seconds; +- `max_iter::Int = 10000`: maximum number of iterations; +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration; +- `σmin::T = eps(T)`: minimum value of the regularization parameter; +- `η1::T = √√eps(T)`: very successful iteration threshold; +- `η2::T = T(0.9)`: successful iteration threshold; +- `ν::T = eps(T)^(1 / 5)`: multiplicative inverse of the regularization parameter: ν = 1/σ; +- `γ::T = T(3)`: regularization parameter multiplier, σ := σ/γ when the iteration is very successful and σ := σγ when the iteration is unsuccessful. +- `θ::T = eps(T)^(1/5)`: is the model decrease fraction with respect to the decrease of the Cauchy model. +- `m_monotone::Int = 1`: monotoneness parameter. By default, R2DH is monotone but the non-monotone variant can be used with `m_monotone > 1` + +The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.xk`: current iterate; +- `solver.∇fk`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.solver_specific[:smooth_obj]`: current value of the smooth part of the objective function + - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. +""" +function R2N( + nlp::AbstractNLPModel{T, V}, + h, + options::ROSolverOptions{T}; + kwargs..., +) where {T <: Real, V} + kwargs_dict = Dict(kwargs...) + selected = pop!(kwargs_dict, :selected, 1:(nlp.meta.nvar)) + x0 = pop!(kwargs_dict, :x0, nlp.meta.x0) + reg_nlp = RegularizedNLPModel(nlp, h, selected) + return R2N( + reg_nlp, + x = x0, + atol = options.ϵa, + rtol = options.ϵr, + neg_tol = options.neg_tol, + verbose = options.verbose, + max_iter = options.maxIter, + max_time = options.maxTime, + σmin = options.σmin, + η1 = options.η1, + η2 = options.η2, + ν = options.ν, + γ = options.γ; + kwargs_dict..., + ) +end + +function R2N(reg_nlp::AbstractRegularizedNLPModel; kwargs...) + kwargs_dict = Dict(kwargs...) + m_monotone = pop!(kwargs_dict, :m_monotone, 1) + subsolver = pop!(kwargs_dict, :subsolver, R2Solver) + solver = R2NSolver(reg_nlp, subsolver = subsolver, m_monotone = m_monotone) + stats = RegularizedExecutionStats(reg_nlp) + solve!(solver, reg_nlp, stats; kwargs_dict...) + return stats +end + +function SolverCore.solve!( + solver::R2NSolver{T, G, V}, + reg_nlp::AbstractRegularizedNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = reg_nlp.model.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + neg_tol::T = eps(T)^(1 / 4), + verbose::Int = 0, + max_iter::Int = 10000, + max_time::Float64 = 30.0, + max_eval::Int = -1, + σmin::T = eps(T), + η1::T = √√eps(T), + η2::T = T(0.9), + ν::T = eps(T)^(1 / 5), + γ::T = T(3), + β::T = 1 / eps(T), + θ::T = eps(T)^(1 / 5), + kwargs... +) where{T, V, G} + reset!(stats) + + # Retrieve workspace + selected = reg_nlp.selected + h = reg_nlp.h + nlp = reg_nlp.model + + xk = solver.xk .= x + + # Make sure ψ has the correct shift + shift!(solver.ψ, xk) + + ∇fk = solver.∇fk + ∇fk⁻ = solver.∇fk⁻ + mν∇fk = solver.mν∇fk + ψ = solver.ψ + xkn = solver.xkn + s = solver.s + s1 = solver.s1 + m_fh_hist = solver.m_fh_hist + has_bnds = solver.has_bnds + + if has_bnds + l_bound_m_x = solver.l_bound_m_x + u_bound_m_x = solver.u_bound_m_x + l_bound = solver.l_bound + u_bound = solver.u_bound + end + m_monotone = length(m_fh_hist) + 1 + + # initialize parameters + improper = false + hk = @views h(xk[selected]) + if hk == Inf + verbose > 0 && @info "R2N: finding initial guess where nonsmooth term is finite" + prox!(xk, h, xk, T(1)) + hk = @views h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "R2N: found point where h has value" hk + end + improper = (hk == -Inf) + + if verbose > 0 + @info log_header( + [:outer, :inner, :fx, :hx, :xi, :ρ, :σ, :normx, :norms, :normB, :arrow], + [Int, Int, T, T, T, T, T, T, T, T, Char], + hdr_override = Dict{Symbol, String}( + :fx => "f(x)", + :hx => "h(x)", + :xi => "√(ξ1/ν)", + :normx => "‖x‖", + :norms => "‖s‖", + :normB => "‖B‖", + :arrow => "R2N", + ), + colsep = 1, + ) + end + + local ξ1::T + local ρk::T = zero(T) + + σk = max(1 / ν, σmin) + + fk = obj(nlp, xk) + grad!(nlp, xk, ∇fk) + ∇fk⁻ .= ∇fk + + quasiNewtTest = isa(nlp, QuasiNewtonModel) + λmax::T = T(1) + solver.subpb.model.B = hess_op(nlp, xk) + + try + λmax = opnorm(solver.subpb.model.B) # TODO: This allocates; see utils.jl + catch LAPACKException # This should be removed ASAP; see PR #159. + λmax = opnorm(Matrix(solver.subpb.model.B)) + end + + ν₁ = 1 / ((λmax + σk) * (1 + θ)) + sqrt_ξ1_νInv = one(T) + + @. mν∇fk = -ν₁ * ∇fk + m_monotone > 1 && (m_fh_hist[mod(stats.iter+1, m_monotone - 1) + 1] = fk + hk) + + set_iter!(stats, 0) + start_time = time() + set_time!(stats, 0.0) + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + + φ1 = let ∇fk = ∇fk + d -> dot(∇fk, d) + end + + mk1 = let ψ = ψ + d -> φ1(d) + ψ(d)::T + end + + mk = let ψ = ψ, solver = solver + d -> obj(solver.subpb.model, d) + ψ(d)::T + end + + prox!(s1, ψ, mν∇fk, ν₁) + mks = mk1(s1) + + ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() + sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν₁) : sqrt(-ξ1 / ν₁) + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && + error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative + + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + + while !done + + sub_atol = stats.iter == 0 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) + + solver.subpb.model.σ = σk + isa(solver.subsolver, R2DHSolver) && (solver.subsolver.D.d[1] = 1/ν₁) + solve!( + solver.subsolver, + solver.subpb, + solver.substats; + x = s1, + atol = sub_atol, + ν = ν₁, + kwargs... + ) + + s .= solver.substats.solution + + if norm(s) > β * norm(s1) + s .= s1 + end + + xkn .= xk .+ s + fkn = obj(nlp, xkn) + hkn = @views h(xkn[selected]) + hkn == -Inf && error("nonsmooth term is not proper") + mks = mk(s) + + fhmax = m_monotone > 1 ? maximum(m_fh_hist) : fk + hk + Δobj = fhmax - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + Δmod = fhmax - (fk + mks) + max(1, abs(fhmax)) * 10 * eps() + ξ = hk - mks + max(1, abs(hk)) * 10 * eps() + + if (ξ < 0 || isnan(ξ)) + error("R2N: failed to compute a step: ξ = $ξ") + end + + ρk = Δobj / Δmod + + verbose > 0 && + stats.iter % verbose == 0 && + @info log_row( + Any[ + stats.iter, + solver.substats.iter, + fk, + hk, + sqrt_ξ1_νInv, + ρk, + σk, + norm(xk), + norm(s), + λmax, + (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "="), + ], + colsep = 1, + ) + + if η1 ≤ ρk < Inf + xk .= xkn + if has_bnds + @. l_bound_m_x = l_bound - xk + @. u_bound_m_x = u_bound - xk + set_bounds!(ψ, l_bound_m_x, u_bound_m_x) + end + #update functions + fk = fkn + hk = hkn + + shift!(ψ, xk) + ∇fk = grad!(nlp, xk, ∇fk) + + if quasiNewtTest + @. ∇fk⁻ = ∇fk - ∇fk⁻ + push!(nlp, s, ∇fk⁻) + end + solver.subpb.model.B = hess_op(nlp, xk) + + try + λmax = opnorm(solver.subpb.model.B) + catch LAPACKException + λmax = opnorm(Matrix(solver.subpb.model.B)) + end + + ∇fk⁻ .= ∇fk + end + + if η2 ≤ ρk < Inf + σk = max(σk/γ, σmin) + end + + if ρk < η1 || ρk == Inf + σk = σk * γ + end + + ν₁ = 1 / ((λmax + σk) * (1 + θ)) + m_monotone > 1 && (m_fh_hist[mod(stats.iter+1, m_monotone - 1) + 1] = fk + hk) + + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + @. mν∇fk = - ν₁ * ∇fk + prox!(s1, ψ, mν∇fk, ν₁) + mks = mk1(s1) + + ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() + + sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν₁) : sqrt(-ξ1 / ν₁) + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && + error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + end + + if verbose > 0 && stats.status == :first_order + @info log_row( + Any[ + stats.iter, + 0, + fk, + hk, + sqrt_ξ1_νInv, + ρk, + σk, + norm(xk), + norm(s), + λmax, + (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "="), + ], + colsep = 1, + ) + @info "R2N: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" + end + + set_solution!(stats, xk) + set_residuals!(stats, zero(eltype(xk)), sqrt_ξ1_νInv) + return stats +end \ No newline at end of file diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index 908eb928..5181133b 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -19,9 +19,11 @@ include("splitting.jl") include("TR_alg.jl") include("TRDH_alg.jl") include("R2_alg.jl") +include("R2DH_alg.jl") +include("R2NModel.jl") +include("R2N_alg.jl") include("LM_alg.jl") include("LMTR_alg.jl") - include("AL_alg.jl") end # module RegularizedOptimization diff --git a/src/input_struct.jl b/src/input_struct.jl index 2e9937ce..7242b90a 100644 --- a/src/input_struct.jl +++ b/src/input_struct.jl @@ -9,6 +9,7 @@ mutable struct ROSolverOptions{R} maxIter::Int # maximum amount of inner iterations maxTime::Float64 #maximum time allotted to the algorithm in s σmin::R # minimum σk allowed for LM/R2 method + σk::R # initial σk η1::R # step acceptance threshold η2::R # trust-region increase threshold α::R # νk Δ^{-1} parameter @@ -27,6 +28,7 @@ mutable struct ROSolverOptions{R} maxIter::Int = 10000, maxTime::Float64 = 3600.0, σmin::R = eps(R), + σk::R = eps(R)^(1 / 5), η1::R = √√eps(R), η2::R = R(0.9), α::R = 1 / eps(R), @@ -44,6 +46,7 @@ mutable struct ROSolverOptions{R} @assert maxIter ≥ 0 @assert maxTime ≥ 0 @assert σmin ≥ 0 + @assert σk ≥ 0 @assert 0 < η1 < η2 < 1 @assert α > 0 @assert ν > 0 @@ -59,6 +62,7 @@ mutable struct ROSolverOptions{R} maxIter, maxTime, σmin, + σk, η1, η2, α, diff --git a/src/utils.jl b/src/utils.jl index d1b2c028..f0853b7d 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -33,9 +33,9 @@ More specifically, construct a GenericExecutionStats on the NLPModel of reg_nlp This is useful for reducing the number of allocations when calling solve!(..., reg_nlp, stats) and should be used by default. Warning: This should *not* be used when adding other solver_specific entries that do not have the current scalar type. """ -function RegularizedExecutionStats(reg_nlp :: AbstractRegularizedNLPModel{T, V}) where{T, V} +function RegularizedExecutionStats(reg_nlp::AbstractRegularizedNLPModel{T, V}) where {T, V} stats = GenericExecutionStats(reg_nlp.model, solver_specific = Dict{Symbol, T}()) set_solver_specific!(stats, :smooth_obj, T(Inf)) set_solver_specific!(stats, :nonsmooth_obj, T(Inf)) return stats -end \ No newline at end of file +end diff --git a/test/runtests.jl b/test/runtests.jl index 39f3a9a0..16f0ab74 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -134,5 +134,26 @@ for (h, h_name) ∈ ((NormL1(λ), "l1"),) end end +R2N_R2DH(args...; kwargs...) = R2N(args...; subsolver = R2DHSolver, kwargs...) +for (mod, mod_name) ∈ ((SpectralGradientModel, "spg"), (DiagonalPSBModel, "psb"), (LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) + for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1")) + for solver_sym ∈ (:R2DH, :R2N, :R2N_R2DH) + solver_sym ∈ (:R2N,:R2N_R2DH) && mod_name ∈ ("spg", "psb") && continue + solver_sym == :R2DH && mod_name != "spg" && continue + solver_sym == :R2N_R2DH && h_name == "l1" && continue # this test seems to fail because s seems to be equal to zeros within the subsolver + solver_name = string(solver_sym) + solver = eval(solver_sym) + @testset "bpdn-$(mod_name)-$(solver_name)-$(h_name)" begin + x0 = zeros(bpdn.meta.nvar) + out = solver(mod(bpdn), h, options, x0 = x0) + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test out.status == :first_order + end + end + end +end + include("test_bounds.jl") include("test_allocs.jl") diff --git a/test/test_allocs.jl b/test/test_allocs.jl index 3eb218eb..7a0ad508 100644 --- a/test/test_allocs.jl +++ b/test/test_allocs.jl @@ -26,23 +26,30 @@ allocate: ``` """ macro wrappedallocs(expr) - argnames = [gensym() for a in expr.args] + kwargs = [a for a in expr.args if isa(a, Expr)] + args = [a for a in expr.args if isa(a, Symbol)] + + argnames = [gensym() for a in args] + kwargs_dict = Dict{Symbol, Any}(a.args[1] => a.args[2] for a in kwargs if a.head == :kw) quote - function g($(argnames...)) - @allocated $(Expr(expr.head, argnames...)) + function g($(argnames...); kwargs_dict...) + @allocated $(Expr(expr.head, argnames..., kwargs...)) end - $(Expr(:call, :g, [esc(a) for a in expr.args]...)) + $(Expr(:call, :g, [esc(a) for a in args]...)) end end # Test non allocating solve! @testset "allocs" begin - for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1")) - for solver ∈ (:R2Solver,) - reg_nlp = RegularizedNLPModel(bpdn, h) - solver = eval(solver)(reg_nlp) - stats = RegularizedExecutionStats(reg_nlp) - @test @wrappedallocs(solve!(solver, reg_nlp, stats)) == 0 + for (h, h_name) ∈ ((NormL0(λ), "l0"),) + for (solver_constructor, solver_name) ∈ ((R2Solver, "R2"), (R2NSolver, "R2N"), (R2DHSolver, "R2DH")) + @testset "$(solver_name) - allocations" begin + reg_nlp = RegularizedNLPModel(LBFGSModel(bpdn), h) + solver = solver_constructor(reg_nlp) + stats = RegularizedExecutionStats(reg_nlp) + @test @wrappedallocs(solve!(solver, reg_nlp, stats, ν = 1.0, atol = 1e-6, rtol = 1e-6)) == 0 + @test stats.status == :first_order + end end end end