diff --git a/Project.toml b/Project.toml index d5ba1185..b1104def 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,8 @@ author = ["Robert Baraldi and Dominique Orban cons!(nlp, x, c), + (j, x) -> jac_coord!(nlp, x, j.vals), + A, + b + ) + + # Allocate sub_ψ = ||c(x)|| to solve min f(x) + τ||c(x)|| + sub_ψ = CompositeNormL2(1.0, + (c, x) -> cons!(nlp, x, c), + (j, x) -> jac_coord!(nlp, x, j.vals), + A, + b + ) + sub_nlp = RegularizedNLPModel(nlp, sub_ψ) + sub_stats = GenericExecutionStats(nlp) + if sub_solver == R2NSolver + Solver = sub_solver(sub_nlp,sub_solver = L2_R2N_subsolver) + else + Solver = sub_solver(sub_nlp) + end + + return L2PenaltySolver( + x, + s, + s0, + ψ, + sub_ψ, + Solver, + sub_stats + ) +end + + +""" + L2Penalty(nlp; kwargs…) + +An exact ℓ₂-penalty method for the problem + + min f(x) s.t c(x) = 0 + +where f: ℝⁿ → ℝ and c: ℝⁿ → ℝᵐ respectively have a Lipschitz-continuous gradient and Jacobian. + +At each iteration k, an iterate is computed as + + xₖ ∈ argmin f(x) + τₖ‖c(x)‖₂ + +where τₖ is some penalty parameter. +This nonsmooth problem is solved using `R2` (see `R2` for more information) with the first order model ψ(s;x) = τₖ‖c(x) + J(x)s‖₂ + +For advanced usage, first define a solver "L2PenaltySolver" to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = L2PenaltySolver(nlp) + solve!(solver, nlp) + + stats = GenericExecutionStats(nlp) + solver = L2PenaltySolver(nlp) + solve!(solver, nlp, stats) + +# Arguments +* `nlp::AbstractNLPModel{T, V}`: the problem to solve, see `RegularizedProblems.jl`, `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess; +- `atol::T = √eps(T)`: absolute tolerance; +- `rtol::T = √eps(T)`: relative tolerance; +- `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance +- `ktol::T = eps(T)^(1 / 4)`: the initial tolerance sent to the subsolver +- `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); +- `sub_max_eval::Int = -1`: maximum number of evaluation for the subsolver (negative number means unlimited); +- `max_time::Float64 = 30.0`: maximum time limit in seconds; +- `max_iter::Int = 10000`: maximum number of iterations; +- `sub_max_iter::Int = 10000`: maximum number of iterations for the subsolver; +- `max_decreas_iter::Int = 10`: maximum number of iteration where ‖c(xₖ)‖₂ does not decrease before calling the problem locally infeasible; +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration; +- `sub_verbose::Int = 0`: if > 0, display subsolver iteration details every `verbose` iteration; +- `τ::T = T(100)`: initial penalty parameter; +- `β1::T = τ`: penalty update parameter: τₖ <- τₖ + β1; +- `β2::T = T(0.1)`: tolerance decreasing factor, at each iteration, ktol <- β2*ktol; +- `β3::T = 1/τ`: initial regularization parameter σ₀ = β3/τₖ at each iteration; +- `β4::T = eps(T)`: minimal regularization parameter σ for `R2`; +other 'kwargs' are passed to `R2` (see `R2` for more information). + +The algorithm stops either when `√θₖ < atol + rtol*√θ₀ ` or `θₖ < 0` and `√(-θₖ) < neg_tol` where θₖ := ‖c(xₖ)‖₂ - ‖c(xₖ) + J(xₖ)sₖ‖₂, and √θₖ is a stationarity measure. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.sub_solver`: a `R2Solver` structure holding relevant information on the subsolver state, see `R2` for more information; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. +You can also use the `sub_callback` keyword argument which has exactly the same structure and in sent to `R2`. +""" +function L2Penalty( + nlp::AbstractNLPModel{T, V}; + sub_solver = R2Solver, + kwargs...) where{ T <: Real, V } + if !equality_constrained(nlp) + error("L2Penalty: This algorithm only works for equality contrained problems.") + end + solver = L2PenaltySolver(nlp,sub_solver = sub_solver) + stats = GenericExecutionStats(nlp) + solve!( + solver, + nlp, + stats; + kwargs... + ) + return stats +end + +function SolverCore.solve!( + solver::L2PenaltySolver{T, V}, + nlp::AbstractNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + sub_callback = (args...) -> nothing, + x::V = nlp.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + neg_tol = eps(T)^(1/4), + ktol::T = eps(T)^(1/4), + max_iter::Int = 10000, + sub_max_iter::Int = 10000, + max_time::T = T(30.0), + max_eval::Int = -1, + sub_max_eval::Int = -1, + max_decreas_iter = 10, + verbose = 0, + sub_verbose = 0, + τ::T = T(100), + β1::T = τ, + β2::T = T(0.1), + β3::T = 1/τ, + β4::T = eps(T), + kwargs..., + ) where {T, V} + + reset!(stats) + + # Retrieve workspace + h = NormL2(1.0) + ψ = solver.ψ + sub_ψ = solver.sub_ψ + sub_ψ.h = NormL2(τ) + solver.sub_solver.ψ.h = NormL2(τ) + + x = solver.x .= x + s = solver.s + s0 = solver.s0 + shift!(ψ, x) + fx = obj(nlp, x) + hx = h(ψ.b) + + if verbose > 0 + @info log_header( + [:iter, :sub_iter, :fx, :hx, :theta, :xi, :epsk, :tau, :normx], + [Int, Int, Float64, Float64, Float64, Float64, Float64, Float64, Float64], + hdr_override = Dict{Symbol,String}( # TODO: Add this as constant dict elsewhere + :iter => "outer", + :sub_iter => "inner", + :fx => "f(x)", + :hx => "‖c(x)‖₂", + :theta => "√θ", + :xi => "√(ξ/ν)", + :epsk => "ϵₖ", + :tau => "τ", + :normx => "‖x‖" + ), + colsep = 1, + ) + end + + set_iter!(stats, 0) + rem_eval = max_eval + start_time = time() + set_time!(stats, 0.0) + set_objective!(stats,fx + hx) + set_solver_specific!(stats,:smooth_obj,fx) + set_solver_specific!(stats,:nonsmooth_obj, hx) + + local θ::T + prox!(s, ψ, s0, T(1)) + θ = hx - ψ(s) + + sqrt_θ = θ ≥ 0 ? sqrt(θ) : sqrt(-θ) + θ < 0 && sqrt_θ ≥ neg_tol && error("L2Penalty: prox-gradient step should produce a decrease but θ = $(θ)") + + atol += rtol * sqrt_θ # make stopping test absolute and relative + ktol = max(ktol,atol) # Keep ϵ₀ ≥ ϵ + tol_init = ktol # store value of ϵ₀ + + done = false + + n_iter_since_decrease = 0 + + while !done + model = RegularizedNLPModel(nlp, sub_ψ) + solve!( + solver.sub_solver, + model, + solver.sub_stats; + callback = sub_callback, + x = x, + atol = ktol, + rtol = T(0), + neg_tol = neg_tol, + verbose = sub_verbose, + max_iter = sub_max_iter, + max_time = max_time - stats.elapsed_time, + max_eval = min(rem_eval,sub_max_eval), + σmin = β4, + ν = 1/max(β4,β3*τ), + kwargs..., + ) + + x .= solver.sub_stats.solution + fx = solver.sub_stats.solver_specific[:smooth_obj] + hx_prev = copy(hx) + hx = solver.sub_stats.solver_specific[:nonsmooth_obj]/τ + sqrt_ξ_νInv = solver.sub_stats.solver_specific[:xi] + + shift!(ψ, x) + prox!(s, ψ, s0, 1.0) + + θ = hx - ψ(s) + sqrt_θ = θ ≥ 0 ? sqrt(θ) : sqrt(-θ) + θ < 0 && sqrt_θ ≥ neg_tol && error("L2Penalty: prox-gradient step should produce a decrease but θ = $(θ)") + + if sqrt_θ > ktol + τ = τ + β1 + sub_ψ.h = NormL2(τ) + solver.sub_solver.ψ.h = NormL2(τ) + else + n_iter_since_decrease = 0 + ktol = max(β2^(ceil(log(β2,sqrt_ξ_νInv/tol_init)))*ktol,atol) #the β^... allows to directly jump to a sufficiently small ϵₖ + end + if sqrt_θ > ktol && hx_prev ≥ hx + n_iter_since_decrease += 1 + else + n_iter_since_decrease = 0 + end + + solved = (sqrt_θ ≤ atol && solver.sub_stats.status == :first_order) || (θ < 0 && sqrt_θ ≤ neg_tol && solver.sub_stats.status == :first_order) + (θ < 0 && sqrt_θ > neg_tol) && error("L2Penalty: prox-gradient step should produce a decrease but θ = $(θ)") + + verbose > 0 && + stats.iter % verbose == 0 && + @info log_row(Any[stats.iter, solver.sub_stats.iter, fx, hx ,sqrt_θ, sqrt_ξ_νInv, ktol, τ, norm(x)], colsep = 1) + + set_iter!(stats, stats.iter + 1) + rem_eval = max_eval - neval_obj(nlp) + set_time!(stats, time() - start_time) + set_objective!(stats,fx + hx) + set_solver_specific!(stats,:smooth_obj,fx) + set_solver_specific!(stats,:nonsmooth_obj, hx) + set_solver_specific!(stats, :theta, sqrt_θ) + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + n_iter_since_decrease = n_iter_since_decrease, + iter = stats.iter, + optimal = solved, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + max_decreas_iter = max_decreas_iter + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + end + + set_solution!(stats, x) + return stats + +end + +function get_status( + nlp::M; + elapsed_time = 0.0, + iter = 0, + optimal = false, + n_iter_since_decrease = 0, + max_eval = Inf, + max_time = Inf, + max_iter = Inf, + max_decreas_iter = Inf +) where{ M <: AbstractNLPModel } + if optimal + :first_order + elseif iter > max_iter + :max_iter + elseif elapsed_time > max_time + :max_time + elseif neval_obj(nlp) > max_eval && max_eval > -1 + :max_eval + elseif n_iter_since_decrease ≥ max_decreas_iter + :infeasible + else + :unknown + end +end + +mutable struct L2_R2N_subsolver{T <: Real, V <: AbstractVector{T}} <: AbstractOptimizationSolver + u1::V + u2::V +end + +function L2_R2N_subsolver( + reg_nlp::AbstractRegularizedNLPModel{T, V}; + ) where{T, V} + x0 = reg_nlp.model.meta.x0 + n = reg_nlp.model.meta.nvar + m = length(reg_nlp.h.b) + #x = zero(x0) + u1 = similar(x0, n+m) + u2 = zeros(eltype(x0), n+m) + + + return L2_R2N_subsolver( + u1, + u2, + ) +end + +function solve!( + solver::L2_R2N_subsolver{T, V}, + reg_nlp::AbstractRegularizedNLPModel{T, V}, + stats::GenericExecutionStats{T, V, V, Any}, + ∇fk::V, + Q::L, + σk::T; + x = reg_nlp.model.meta.x0, + atol = eps(T)^(0.5), + max_time = T(30), + max_iter = 10000 + ) where{T <: Real, V <: AbstractVector{T} , L <: AbstractLinearOperator} + + start_time = time() + set_time!(stats, 0.0) + set_iter!(stats, 0) + + n = reg_nlp.model.meta.nvar + m = length(reg_nlp.h.b) + Δ = reg_nlp.h.h.lambda/σk + + u1 = solver.u1 + u2 = solver.u2 + + # Create problem + @. u1[1:n] = ∇fk/σk + @. u1[n+1:n+m] = -reg_nlp.h.b + + αₖ = 0.0 + αmin = sqrt(eps(T)) + θ = 0.8 + + H = [[-Q reg_nlp.h.A'];[reg_nlp.h.A αₖ*opEye(m,m)]] + x1,stats_minres = minres_qlp(H,u1) + + if norm(x1[n+1:n+m]) <= Δ && reg_nlp.h.full_row_rank + set_solution!(stats,x1[1:n]) + return + end + if stats_minres.inconsistent == true + αₖ = αmin + H = [[-Q reg_nlp.h.A'];[reg_nlp.h.A αₖ*opEye(m,m)]] + x1,_ = minres_qlp(H,u1) + end + u2[n+1:n+m] .= x1[n+1:n+m] + x2,_ = minres_qlp(H,u2) + + α₊ = αₖ + norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ + αₖ = α₊ ≤ 0 ? θ*α₊ : αₖ + αₖ = αₖ ≤ αmin ? αmin : αₖ + + while abs(norm(x1[n+1:n+m]) - Δ) > eps(T)^(0.75) && stats.iter < max_iter && stats.elapsed_time < max_time + + H = [[-Q reg_nlp.h.A'];[reg_nlp.h.A αₖ*opEye(m,m)]] + x1,_ = minres_qlp(H,u1) + + αₖ == αmin && break + + u2[n+1:n+m] .= x1[n+1:n+m] + x2,_ = minres_qlp(H,u2) + + α₊ = αₖ + norm(x1[n+1:n+m])^2/(x1[n+1:n+m]'x2[n+1:n+m])*(norm(x1[n+1:n+m])- Δ)/Δ + αₖ = α₊ ≤ 0 ? θ*α₊ : αₖ + αₖ = αₖ ≤ sqrt(eps(T)) ? sqrt(eps(T)) : αₖ + + set_iter!(stats,stats.iter + 1) + set_time!(stats,time()-start_time) + end + set_solution!(stats,x1[1:n]) + +end diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 56bc5356..a23973a4 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -104,13 +104,15 @@ function LM( k = 0 Fobj_hist = zeros(maxIter) Hobj_hist = zeros(maxIter) + R = eltype(xk) + sqrt_ξ1_νInv = one(R) Complex_hist = zeros(Int, maxIter) Grad_hist = zeros(Int, maxIter) Resid_hist = zeros(Int, maxIter) if verbose > 0 #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" + @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" #! format: on end @@ -176,14 +178,15 @@ function LM( prox!(s, ψ, ∇fk, ν) ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() # TODO: isn't mk(s) returned by subsolver? ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + sqrt_ξ1_νInv = sqrt(ξ1 * νInv) if ξ1 ≥ 0 && k == 1 - ϵ_increment = ϵr * sqrt(ξ1) + ϵ_increment = ϵr * sqrt_ξ1_νInv ϵ += ϵ_increment # make stopping test absolute and relative ϵ_subsolver += ϵ_increment end - if sqrt(ξ1) < ϵ + if sqrt_ξ1_νInv < ϵ # the current xk is approximately first-order stationary optimal = true continue @@ -191,7 +194,8 @@ function LM( subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) subsolver_options.ν = ν - subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () + #subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () + subsolver_args = subsolver == R2DH ? (SpectralGradient(1., nls.meta.nvar),) : () @debug "setting inner stopping tolerance to" subsolver_options.optTol s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) @@ -221,7 +225,7 @@ function LM( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat #! format: off end @@ -244,6 +248,7 @@ function LM( jtprod_residual!(nls, xk, Fk, ∇fk) σmax = opnorm(Jk) + Complex_hist[k] += 1 end @@ -252,6 +257,7 @@ function LM( σk = σk * γ end νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + tired = k ≥ maxIter || elapsed_time > maxTime end @@ -260,9 +266,9 @@ function LM( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" σk norm(xk) norm(s) νInv + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) νInv #! format: on - @info "LM: terminating with √ξ1 = $(sqrt(ξ1))" + @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" end end status = if optimal diff --git a/src/R2DH.jl b/src/R2DH.jl new file mode 100644 index 00000000..0e7157b0 --- /dev/null +++ b/src/R2DH.jl @@ -0,0 +1,297 @@ +export R2DH + +""" + R2DH(nlp, h, options) + R2DH(f, ∇f!, h, options, x0) + +A first-order quadratic regularization method for the problem + + min f(x) + h(x) + +where f: ℝⁿ → ℝ has a Lipschitz-continuous gradient, and h: ℝⁿ → ℝ is +lower semi-continuous, proper and prox-bounded. + +About each iterate xₖ, a step sₖ is computed as a solution of + + min φ(s; xₖ) + ψ(s; xₖ) + +where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s (if `summation = true`) and φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s (if `summation = false`) is a quadratic approximation of f about xₖ, +ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation +and σₖ > 0 is the regularization parameter. + +### Arguments + +* `nlp::AbstractDiagonalQNModel`: a smooth optimization problem +* `h`: a regularizer such as those defined in ProximalOperators +* `options::ROSolverOptions`: a structure containing algorithmic parameters +* `x0::AbstractVector`: an initial guess (in the second calling form) + +### Keyword Arguments + +* `x0::AbstractVector`: an initial guess (in the first calling form: default = `nlp.meta.x0`) +* `selected::AbstractVector{<:Integer}`: (default `1:length(x0)`). +* `Bk`: initial diagonal Hessian approximation (default: `(one(R) / options.ν) * I`). +* `summation`: boolean used to choose between the two versions of R2DH (see above, default : `true`). + +The objective and gradient of `nlp` will be accessed. + +In the second form, instead of `nlp`, the user may pass in + +* `f` a function such that `f(x)` returns the value of f at x +* `∇f!` a function to evaluate the gradient in place, i.e., such that `∇f!(g, x)` store ∇f(x) in `g` + +### Return values + +* `xk`: the final iterate +* `Fobj_hist`: an array with the history of values of the smooth objective +* `Hobj_hist`: an array with the history of values of the nonsmooth objective +* `Complex_hist`: an array with the history of number of inner iterations. +""" +function R2DH( + nlp::AbstractDiagonalQNModel{R, S}, + h, + options::ROSolverOptions{R}; + kwargs..., + ) where {R <: Real, S} + kwargs_dict = Dict(kwargs...) + x0 = pop!(kwargs_dict, :x0, nlp.meta.x0) + xk, k, outdict = R2DH( + x -> obj(nlp, x), + (g, x) -> grad!(nlp, x, g), + h, + hess_op(nlp, x0), + options, + x0; + kwargs..., + ) + ξ = outdict[:ξ] + stats = GenericExecutionStats(nlp) + set_status!(stats, outdict[:status]) + set_solution!(stats, xk) + set_objective!(stats, outdict[:fk] + outdict[:hk]) + set_residuals!(stats, zero(eltype(xk)), ξ) + set_iter!(stats, k) + set_time!(stats, outdict[:elapsed_time]) + set_solver_specific!(stats, :Fhist, outdict[:Fhist]) + set_solver_specific!(stats, :Hhist, outdict[:Hhist]) + set_solver_specific!(stats, :NonSmooth, outdict[:NonSmooth]) + set_solver_specific!(stats, :SubsolverCounter, outdict[:Chist]) + return stats + end + +function R2DH( + f::F, + ∇f!::G, + h::H, + D::DQN, + options::ROSolverOptions{R}, + x0::AbstractVector{R}; + Mmonotone::Int = 5, + selected::AbstractVector{<:Integer} = 1:length(x0), + summation::Bool = true, + kwargs..., +) where {F <: Function, G <: Function, H, R <: Real, DQN <: AbstractDiagonalQuasiNewtonOperator} + start_time = time() + elapsed_time = 0.0 + ϵ = options.ϵa + ϵr = options.ϵr + neg_tol = options.neg_tol + verbose = options.verbose + maxIter = options.maxIter + maxTime = options.maxTime + σmin = options.σmin + η1 = options.η1 + η2 = options.η2 + ν = options.ν + γ = options.γ + + local l_bound, u_bound + has_bnds = false + for (key, val) in kwargs + if key == :l_bound + l_bound = val + has_bnds = has_bnds || any(l_bound .!= R(-Inf)) + elseif key == :u_bound + u_bound = val + has_bnds = has_bnds || any(u_bound .!= R(Inf)) + end + end + + if verbose == 0 + ptf = Inf + elseif verbose == 1 + ptf = round(maxIter / 10) + elseif verbose == 2 + ptf = round(maxIter / 100) + else + ptf = 1 + end + + # initialize parameters + xk = copy(x0) + hk = h(xk[selected]) + if hk == Inf + verbose > 0 && @info "R2DH: finding initial guess where nonsmooth term is finite" + prox!(xk, h, x0, one(eltype(x0))) + hk = h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "R2DH: found point where h has value" hk + end + hk == -Inf && error("nonsmooth term is not proper") + + xkn = similar(xk) + s = zero(xk) + ψ = has_bnds ? shifted(h, xk, l_bound - xk, u_bound - xk, selected) : shifted(h, xk) + + Fobj_hist = zeros(maxIter) + Hobj_hist = zeros(maxIter) + FHobj_hist = fill!(Vector{R}(undef, Mmonotone), R(-Inf)) + Complex_hist = zeros(Int, maxIter) + if verbose > 0 + #! format: off + @info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %1s" "iter" "f(x)" "h(x)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "" + #! format: off + end + + local ξ + k = 0 + σk = summation ? σmin : max(1 / ν, σmin) + + fk = f(xk) + ∇fk = similar(xk) + ∇f!(∇fk, xk) + ∇fk⁻ = copy(∇fk) + spectral_test = isa(D, SpectralGradient) + D.d .= summation ? D.d .+ σk : D.d .* σk + DNorm = norm(D.d, Inf) + + + ν = 1 / DNorm + mν∇fk = -ν * ∇fk + sqrt_ξ_νInv = one(R) + + optimal = false + tired = maxIter > 0 && k ≥ maxIter || elapsed_time > maxTime + + while !(optimal || tired) + k = k + 1 + elapsed_time = time() - start_time + Fobj_hist[k] = fk + Hobj_hist[k] = hk + Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk) + + D.d .= max.(D.d, eps(R)) + + + # model with diagonal hessian + φ(d) = ∇fk' * d + (d' * (D.d .* d)) / 2 + mk(d) = φ(d) + ψ(d) + + if spectral_test + prox!(s, ψ, mν∇fk, ν) + else + iprox!(s, ψ, ∇fk, D) + end + + # iprox!(s, ψ, ∇fk, D) + + Complex_hist[k] += 1 + xkn .= xk .+ s + fkn = f(xkn) + hkn = h(xkn[selected]) + hkn == -Inf && error("nonsmooth term is not proper") + + fhmax = Mmonotone > 0 ? maximum(FHobj_hist) : fk + hk + Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps() + Δmod = fhmax - (fk + mk(s)) + max(1, abs(hk)) * 10 * eps() + ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() + sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) + + if ξ ≥ 0 && k == 1 + ϵ += ϵr * sqrt_ξ_νInv # make stopping test absolute and relative + end + + if (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < ϵ) + # the current xk is approximately first-order stationary + optimal = true + continue + end + + if (ξ ≤ 0 || isnan(ξ)) + error("R2DH: failed to compute a step: ξ = $ξ") + end + + ρk = Δobj / Δmod + + σ_stat = (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "=") + + if (verbose > 0) && (k % ptf == 0) + #! format: off + @info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv ρk σk norm(xk) norm(s) σ_stat + #! format: on + end + + if η2 ≤ ρk < Inf + σk = max(σk / γ, σmin) + end + + if η1 ≤ ρk < Inf + xk .= xkn + has_bnds && set_bounds!(ψ, l_bound - xk, u_bound - xk) + fk = fkn + hk = hkn + shift!(ψ, xk) + ∇f!(∇fk, xk) + push!(D, s, ∇fk - ∇fk⁻) # update QN operator + DNorm = norm(D.d, Inf) + ∇fk⁻ .= ∇fk + end + + if ρk < η1 || ρk == Inf + σk = σk * γ + end + + D.d .= summation ? D.d .+ σk : D.d .* σk + DNorm = norm(D.d, Inf) + ν = 1 / DNorm + + tired = maxIter > 0 && k ≥ maxIter + if !tired + @. mν∇fk = -ν * ∇fk + end + end + + if verbose > 0 + if k == 1 + @info @sprintf "%6d %8.1e %8.1e" k fk hk + elseif optimal + #! format: off + @info @sprintf "%6d %8.1e %8.1e %7.1e %8s %7.1e %7.1e %7.1e" k fk hk sqrt_ξ_νInv "" σk norm(xk) norm(s) + #! format: on + @info "R2DH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv))" + end + end + + status = if optimal + :first_order + elseif elapsed_time > maxTime + :max_time + elseif tired + :max_iter + else + :exception + end + outdict = Dict( + :Fhist => Fobj_hist[1:k], + :Hhist => Hobj_hist[1:k], + :Chist => Complex_hist[1:k], + :NonSmooth => h, + :status => status, + :fk => fk, + :hk => hk, + :ξ => ξ, + :elapsed_time => elapsed_time, + ) + + return xk, k, outdict +end diff --git a/src/R2N.jl b/src/R2N.jl new file mode 100644 index 00000000..856e5750 --- /dev/null +++ b/src/R2N.jl @@ -0,0 +1,389 @@ +export R2N, R2NSolver, solve! + +import SolverCore.solve! + +mutable struct R2NSolver{ + T <: Real, + G <: ShiftedProximableFunction, + V <: AbstractVector{T}, + S <: AbstractOptimizationSolver, +} <: AbstractOptimizationSolver + xk::V + ∇fk::V + ∇fk⁻::V + mν∇fk::V + ψ::G + sub_ψ::G + xkn::V + s::V + s1::V + has_bnds::Bool + l_bound::V + u_bound::V + sub_solver::S + sub_stats::GenericExecutionStats{T, V, V, Any} +end + +function R2NSolver(reg_nlp::AbstractRegularizedNLPModel{T, V}; sub_solver = R2Solver) where {T, V} + x0 = reg_nlp.model.meta.x0 + l_bound = reg_nlp.model.meta.lvar + u_bound = reg_nlp.model.meta.uvar + + xk = similar(x0) + ∇fk = similar(x0) + ∇fk⁻ = similar(x0) + mν∇fk = similar(x0) + xkn = similar(x0) + s = zero(x0) + s1 = similar(x0) + has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) + + ψ = has_bnds ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : shifted(reg_nlp.h, xk) + sub_ψ = has_bnds ? shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) : shifted(reg_nlp.h, xk) + + sub_nlp = RegularizedNLPModel(reg_nlp.model, sub_ψ) + sub_stats = GenericExecutionStats(reg_nlp.model) + + return R2NSolver( + xk, + ∇fk, + ∇fk⁻, + mν∇fk, + ψ, + sub_ψ, + xkn, + s, + s1, + has_bnds, + l_bound, + u_bound, + sub_solver(sub_nlp), + sub_stats + ) +end + +function SolverCore.solve!( + solver::R2NSolver{T}, + reg_nlp::AbstractRegularizedNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = reg_nlp.model.meta.x0, + atol::T = √eps(T), + sub_atol::T = atol, + rtol::T = √eps(T), + neg_tol::T = eps(T)^(1 / 4), + verbose::Int = 0, + max_iter::Int = 10000, + max_time::Float64 = 30.0, + max_eval::Int = -1, + σmin::T = eps(T), + η1::T = √√eps(T), + η2::T = T(0.9), + ν = eps(T)^(1/5), + γ::T = T(3), + β = 1/eps(T), + θ = eps(T)^(1/5), + kwargs... +) where {T, V} + + reset!(stats) + + # Retrieve workspace + selected = reg_nlp.selected + h = reg_nlp.h + nlp = reg_nlp.model + + xk = solver.xk .= x + + # Make sure ψ has the correct shift + shift!(solver.ψ, xk) + + σk = 1/ν + ∇fk = solver.∇fk + ∇fk⁻ = solver.∇fk⁻ + mν∇fk = solver.mν∇fk + ψ = solver.ψ + xkn = solver.xkn + s = solver.s + s1 = solver.s1 + has_bnds = solver.has_bnds + if has_bnds + l_bound = solver.l_bound + u_bound = solver.u_bound + end + sub_atol_init = copy(sub_atol) + sub_solver = solver.sub_solver + sub_stats = solver.sub_stats + sub_ψ = solver.sub_ψ + + # initialize parameters + improper = false + hk = @views h(xk[selected]) + if hk == Inf + verbose > 0 && @info "R2N: finding initial guess where nonsmooth term is finite" + prox!(xk, h, xk, one(eltype(x0))) + hk = @views h(xk[selected]) + hk < Inf || error("prox computation must be erroneous") + verbose > 0 && @debug "R2N: found point where h has value" hk + end + improper = (hk == -Inf) + + if verbose > 0 + @info log_header( + [:outer, :inner, :fx, :hx, :xi, :ρ, :σ, :normx, :norms, :normB, :arrow], + [Int, Int, Float64, Float64, Float64, Float64, Float64, Float64, Float64, Float64, Char], + hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere + :outer => "outer", + :inner => "inner", + :fx => "f(x)", + :hx => "h(x)", + :xi => "√(ξ1/ν)", + :ρ => "ρ", + :σ => "σ", + :normx => "‖x‖", + :norms => "‖s‖", + :normB => "‖B‖", + :arrow => "R2N", + ), + colsep = 1, + ) + end + + local ξ1::T + local ρk::T + + fk = obj(nlp, xk) + grad!(nlp, xk, ∇fk) + ∇fk⁻ .= ∇fk + + quasiNewtTest = isa(nlp, QuasiNewtonModel) + Bk = hess_op(nlp, xk) + local λmax::T + try + λmax = opnorm(Bk) + catch LAPACKException + λmax = opnorm(Matrix(Bk)) + end + + νInv = (1 + θ) *( σk + λmax) + sqrt_ξ1_νInv = one(T) + ν_subsolver = 1/νInv + + @. mν∇fk = -ν_subsolver * ∇fk + + set_iter!(stats, 0) + start_time = time() + set_time!(stats, 0.0) + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + + # model for first prox-gradient step and ξ1 + φ1(d) = ∇fk' * d + mk1(d) = φ1(d) + ψ(d) + + # model for subsequent prox-gradient steps and ξ + φ(d) = (d' * (Bk * d)) / 2 + ∇fk' * d + σk * dot(d, d) / 2 + + ∇φ!(g, d) = begin + mul!(g, Bk, d) + g .+= ∇fk + g .+= σk * d + g + end + + mk(d) = φ(d) + ψ(d) + + prox!(s, ψ, mν∇fk, ν_subsolver) + mks = mk1(s) + + ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() + + sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν_subsolver) : sqrt(-ξ1 / ν_subsolver) + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && + error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative + sub_atol += rtol * sqrt_ξ1_νInv + + set_solver_specific!(stats, :xi, sqrt_ξ1_νInv) + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + + while !done + + s1 .= s + + sub_atol = stats.iter == 0 ? 1.0e-3 : max(sub_atol, min(1e-3, sqrt_ξ1_νInv)) # 1.0e-5 default + #@debug "setting inner stopping tolerance to" subsolver_options.optTol + #subsolver_args = subsolver == R2DH ? (SpectralGradient(1., f.meta.nvar),) : () + nlp_model = FirstOrderModel(φ,∇φ!,s) + model = RegularizedNLPModel(nlp_model, ψ) + #model.selected .= reg_nlp.selected + if sub_solver == R2Solver + solve!( + sub_solver, + model, + sub_stats, + x = s, + atol = sub_atol, + ν = ν_subsolver, + kwargs...) + else + solve!( + sub_solver, + model, + sub_stats, + ∇fk, + Bk/σk + opEye(nlp.meta.nvar,nlp.meta.nvar), + σk, + atol = sub_atol, + max_time = max_time - stats.elapsed_time, + kwargs...) + end + s .= sub_stats.solution + + if norm(s) > β * norm(s1) + s .= s1 + end + if mk(s) > mk(s1) + s .= s1 + end + + sub_atol = sub_atol_init + + xkn .= xk .+ s + fkn = obj(nlp, xkn) + hkn = h(xkn[selected]) + hkn == -Inf && error("nonsmooth term is not proper") + + mks = mk(s) + Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() + ξ = hk - mks + max(1, abs(hk)) * 10 * eps() + + ρk = Δobj / ξ + + verbose > 0 && + stats.iter % verbose == 0 && + @info log_row( + Any[ + stats.iter, + sub_stats.iter, + fk, + hk, + sqrt_ξ1_νInv, + ρk, + σk, + norm(xk), + norm(s), + λmax, + (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "="), + ], + colsep = 1, + ) + + if η2 ≤ ρk < Inf + σk = max(σk/γ, σmin) + end + + if η1 ≤ ρk < Inf + xk .= xkn + has_bounds(nlp) && set_bounds!(ψ, l_bound - xk, u_bound - xk) + + #update functions + fk = fkn + hk = hkn + + shift!(ψ, xk) + ∇fk = grad!(nlp, xk, ∇fk) + @. mν∇fk = -ν_subsolver * ∇fk + if quasiNewtTest + push!(nlp, s, ∇fk - ∇fk⁻) + end + Bk = hess_op(nlp, xk) + try + λmax = opnorm(Bk) + catch LAPACKException + λmax = opnorm(Matrix(Bk)) + end + ∇fk⁻ .= ∇fk + end + + if ρk < η1 || ρk == Inf + σk = σk * γ + end + νInv = (1 + θ) *( σk + λmax) + + set_objective!(stats, fk + hk) + set_solver_specific!(stats, :smooth_obj, fk) + set_solver_specific!(stats, :nonsmooth_obj, hk) + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + prox!(s, ψ, mν∇fk, ν_subsolver) + mks = mk1(s) + + ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() + sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν_subsolver) : sqrt(-ξ1 / ν_subsolver) + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + + (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && + error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + set_solver_specific!(stats, :xi, sqrt_ξ1_νInv) + set_status!( + stats, + get_status( + reg_nlp, + elapsed_time = stats.elapsed_time, + iter = stats.iter, + optimal = solved, + improper = improper, + max_eval = max_eval, + max_time = max_time, + max_iter = max_iter, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + end + + if verbose > 0 && stats.status == :first_order + @info log_row( + Any[ + stats.iter, + sub_stats.iter, + fk, + hk, + sqrt_ξ1_νInv, + ρk, + σk, + norm(xk), + norm(s), + λmax, + (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "="), + ], + colsep = 1, + ) + @info "R2N: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" + end + + set_solution!(stats,xk) + return stats +end diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl index 22c1c814..0b60faef 100644 --- a/src/RegularizedOptimization.jl +++ b/src/RegularizedOptimization.jl @@ -8,7 +8,7 @@ using ProximalOperators, TSVD # dependencies from us using LinearOperators, - NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore + NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore, SparseMatricesCOO, Krylov include("utils.jl") include("input_struct.jl") @@ -20,5 +20,8 @@ include("TRDH_alg.jl") include("R2_alg.jl") include("LM_alg.jl") include("LMTR_alg.jl") +include("R2DH.jl") +include("R2N.jl") +include("L2Penalty_alg.jl") -end # module RegularizedOptimization +end # module RegularizedOptimization \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 39f3a9a0..1479f434 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,15 +1,55 @@ using LinearAlgebra: length using LinearAlgebra, Random, Test using ProximalOperators -using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimization, SolverCore - +using ShiftedProximalOperators +using NLPModels, NLPModelsModifiers, RegularizedProblems, RegularizedOptimization, SolverCore, RegularizedOptimization, OptimizationProblems, ADNLPModels, OptimizationProblems.ADNLPProblems +using Random +Random.seed!(1234) const global compound = 1 const global nz = 10 * compound -const global options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 10) +const global options = ROSolverOptions(ν = 1.0, β = 1e16, ϵa = 1e-6, ϵr = 1e-6, verbose = 0) const global bpdn, bpdn_nls, sol = bpdn_model(compound) const global bpdn2, bpdn_nls2, sol2 = bpdn_model(compound, bounds = true) const global λ = norm(grad(bpdn, zeros(bpdn.meta.nvar)), Inf) / 10 +meta = OptimizationProblems.meta +problem_list = meta[(meta.has_equalities_only .== 1) .& (meta.has_bounds.==0) .& (meta.has_fixed_variables.==0) .& (meta.variable_nvar .== 0), :] + +for problem ∈ eachrow(problem_list) + for (nlp,subsolver_name) ∈ ((eval(Meta.parse(problem.name))(),"R2"),(LSR1Model(eval(Meta.parse(problem.name))()),"R2N-LSR1"),(LBFGSModel(eval(Meta.parse(problem.name))()),"R2N-LBFGS")) + @testset "Optimization Problems - $(problem.name) - L2Penalty - $(subsolver_name)" begin + if subsolver_name == "R2" + out = L2Penalty( + nlp, + atol = 1e-3, + rtol = 1e-3, + ktol = 1e-1, + max_iter = 100, + max_time = 10.0 + ) + else + try + out = L2Penalty( + nlp, + sub_solver = R2NSolver, + atol = 1e-3, + rtol = 1e-3, + ktol = 1e-1, + max_iter = 100, + max_time = 10.0 + ) + catch LAPACKException + continue + end + end + @test typeof(out.solution) == typeof(nlp.meta.x0) + @test length(out.solution) == nlp.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test (out.status == :first_order) || (out.status == :infeasible) || (out.status == :max_iter) || (out.status == :max_time) + end + end +end + for (mod, mod_name) ∈ ((x -> x, "exact"), (LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1"), (IndBallL0(10 * compound), "B0")) for solver_sym ∈ (:R2, :TR)