Skip to content

Commit 339a6cf

Browse files
Add R2N, R2DH and update LM
1 parent b84a2b1 commit 339a6cf

File tree

4 files changed

+604
-7
lines changed

4 files changed

+604
-7
lines changed

src/LM_alg.jl

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,15 @@ function LM(
104104
k = 0
105105
Fobj_hist = zeros(maxIter)
106106
Hobj_hist = zeros(maxIter)
107+
R = eltype(xk)
108+
sqrt_ξ1_νInv = one(R)
107109
Complex_hist = zeros(Int, maxIter)
108110
Grad_hist = zeros(Int, maxIter)
109111
Resid_hist = zeros(Int, maxIter)
110112

111113
if verbose > 0
112114
#! format: off
113-
@info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "ξ1" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg"
115+
@info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "(ξ1/ν)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg"
114116
#! format: on
115117
end
116118

@@ -176,22 +178,24 @@ function LM(
176178
prox!(s, ψ, ∇fk, ν)
177179
ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() # TODO: isn't mk(s) returned by subsolver?
178180
ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
181+
sqrt_ξ1_νInv = sqrt(ξ1 * νInv)
179182

180183
if ξ1 0 && k == 1
181-
ϵ_increment = ϵr * sqrt(ξ1)
184+
ϵ_increment = ϵr * sqrt_ξ1_νInv
182185
ϵ += ϵ_increment # make stopping test absolute and relative
183186
ϵ_subsolver += ϵ_increment
184187
end
185188

186-
if sqrt(ξ1) < ϵ
189+
if sqrt_ξ1_νInv < ϵ
187190
# the current xk is approximately first-order stationary
188191
optimal = true
189192
continue
190193
end
191194

192195
subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10))
193196
subsolver_options.ν = ν
194-
subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : ()
197+
#subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : ()
198+
subsolver_args = subsolver == R2DH ? (SpectralGradient(1., nls.meta.nvar),) : ()
195199
@debug "setting inner stopping tolerance to" subsolver_options.optTol
196200
s, iter, _ = with_logger(subsolver_logger) do
197201
subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s)
@@ -221,7 +225,7 @@ function LM(
221225

222226
if (verbose > 0) && (k % ptf == 0)
223227
#! format: off
224-
@info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat
228+
@info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat
225229
#! format: off
226230
end
227231

@@ -244,6 +248,7 @@ function LM(
244248
jtprod_residual!(nls, xk, Fk, ∇fk)
245249

246250
σmax = opnorm(Jk)
251+
247252

248253
Complex_hist[k] += 1
249254
end
@@ -252,6 +257,7 @@ function LM(
252257
σk = σk * γ
253258
end
254259
νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
260+
255261
tired = k maxIter || elapsed_time > maxTime
256262
end
257263

@@ -260,9 +266,9 @@ function LM(
260266
@info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk
261267
elseif optimal
262268
#! format: off
263-
@info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" σk norm(xk) norm(s) νInv
269+
@info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) νInv
264270
#! format: on
265-
@info "LM: terminating with √ξ1 = $(sqrt(ξ1))"
271+
@info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)"
266272
end
267273
end
268274
status = if optimal

src/R2DH.jl

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
export R2DH
2+
3+
"""
4+
R2DH(nlp, h, options)
5+
R2DH(f, ∇f!, h, options, x0)
6+
7+
A first-order quadratic regularization method for the problem
8+
9+
min f(x) + h(x)
10+
11+
where f: ℝⁿ → ℝ has a Lipschitz-continuous gradient, and h: ℝⁿ → ℝ is
12+
lower semi-continuous, proper and prox-bounded.
13+
14+
About each iterate xₖ, a step sₖ is computed as a solution of
15+
16+
min φ(s; xₖ) + ψ(s; xₖ)
17+
18+
where φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s (if `summation = true`) and φ(s ; xₖ) = f(xₖ) + ∇f(xₖ)ᵀs + ½ sᵀ (σₖ+Dₖ) s (if `summation = false`) is a quadratic approximation of f about xₖ,
19+
ψ(s; xₖ) = h(xₖ + s), ‖⋅‖ is a user-defined norm, Dₖ is a diagonal Hessian approximation
20+
and σₖ > 0 is the regularization parameter.
21+
22+
### Arguments
23+
24+
* `nlp::AbstractDiagonalQNModel`: a smooth optimization problem
25+
* `h`: a regularizer such as those defined in ProximalOperators
26+
* `options::ROSolverOptions`: a structure containing algorithmic parameters
27+
* `x0::AbstractVector`: an initial guess (in the second calling form)
28+
29+
### Keyword Arguments
30+
31+
* `x0::AbstractVector`: an initial guess (in the first calling form: default = `nlp.meta.x0`)
32+
* `selected::AbstractVector{<:Integer}`: (default `1:length(x0)`).
33+
* `Bk`: initial diagonal Hessian approximation (default: `(one(R) / options.ν) * I`).
34+
* `summation`: boolean used to choose between the two versions of R2DH (see above, default : `true`).
35+
36+
The objective and gradient of `nlp` will be accessed.
37+
38+
In the second form, instead of `nlp`, the user may pass in
39+
40+
* `f` a function such that `f(x)` returns the value of f at x
41+
* `∇f!` a function to evaluate the gradient in place, i.e., such that `∇f!(g, x)` store ∇f(x) in `g`
42+
43+
### Return values
44+
45+
* `xk`: the final iterate
46+
* `Fobj_hist`: an array with the history of values of the smooth objective
47+
* `Hobj_hist`: an array with the history of values of the nonsmooth objective
48+
* `Complex_hist`: an array with the history of number of inner iterations.
49+
"""
50+
function R2DH(
51+
nlp::AbstractDiagonalQNModel{R, S},
52+
h,
53+
options::ROSolverOptions{R};
54+
kwargs...,
55+
) where {R <: Real, S}
56+
kwargs_dict = Dict(kwargs...)
57+
x0 = pop!(kwargs_dict, :x0, nlp.meta.x0)
58+
xk, k, outdict = R2DH(
59+
x -> obj(nlp, x),
60+
(g, x) -> grad!(nlp, x, g),
61+
h,
62+
hess_op(nlp, x0),
63+
options,
64+
x0;
65+
kwargs...,
66+
)
67+
ξ = outdict[]
68+
stats = GenericExecutionStats(nlp)
69+
set_status!(stats, outdict[:status])
70+
set_solution!(stats, xk)
71+
set_objective!(stats, outdict[:fk] + outdict[:hk])
72+
set_residuals!(stats, zero(eltype(xk)), ξ)
73+
set_iter!(stats, k)
74+
set_time!(stats, outdict[:elapsed_time])
75+
set_solver_specific!(stats, :Fhist, outdict[:Fhist])
76+
set_solver_specific!(stats, :Hhist, outdict[:Hhist])
77+
set_solver_specific!(stats, :NonSmooth, outdict[:NonSmooth])
78+
set_solver_specific!(stats, :SubsolverCounter, outdict[:Chist])
79+
return stats
80+
end
81+
82+
function R2DH(
83+
f::F,
84+
∇f!::G,
85+
h::H,
86+
D::DQN,
87+
options::ROSolverOptions{R},
88+
x0::AbstractVector{R};
89+
Mmonotone::Int = 5,
90+
selected::AbstractVector{<:Integer} = 1:length(x0),
91+
summation::Bool = true,
92+
kwargs...,
93+
) where {F <: Function, G <: Function, H, R <: Real, DQN <: AbstractDiagonalQuasiNewtonOperator}
94+
start_time = time()
95+
elapsed_time = 0.0
96+
ϵ = options.ϵa
97+
ϵr = options.ϵr
98+
neg_tol = options.neg_tol
99+
verbose = options.verbose
100+
maxIter = options.maxIter
101+
maxTime = options.maxTime
102+
σmin = options.σmin
103+
η1 = options.η1
104+
η2 = options.η2
105+
ν = options.ν
106+
γ = options.γ
107+
108+
local l_bound, u_bound
109+
has_bnds = false
110+
for (key, val) in kwargs
111+
if key == :l_bound
112+
l_bound = val
113+
has_bnds = has_bnds || any(l_bound .!= R(-Inf))
114+
elseif key == :u_bound
115+
u_bound = val
116+
has_bnds = has_bnds || any(u_bound .!= R(Inf))
117+
end
118+
end
119+
120+
if verbose == 0
121+
ptf = Inf
122+
elseif verbose == 1
123+
ptf = round(maxIter / 10)
124+
elseif verbose == 2
125+
ptf = round(maxIter / 100)
126+
else
127+
ptf = 1
128+
end
129+
130+
# initialize parameters
131+
xk = copy(x0)
132+
hk = h(xk[selected])
133+
if hk == Inf
134+
verbose > 0 && @info "R2DH: finding initial guess where nonsmooth term is finite"
135+
prox!(xk, h, x0, one(eltype(x0)))
136+
hk = h(xk[selected])
137+
hk < Inf || error("prox computation must be erroneous")
138+
verbose > 0 && @debug "R2DH: found point where h has value" hk
139+
end
140+
hk == -Inf && error("nonsmooth term is not proper")
141+
142+
xkn = similar(xk)
143+
s = zero(xk)
144+
ψ = has_bnds ? shifted(h, xk, l_bound - xk, u_bound - xk, selected) : shifted(h, xk)
145+
146+
Fobj_hist = zeros(maxIter)
147+
Hobj_hist = zeros(maxIter)
148+
FHobj_hist = fill!(Vector{R}(undef, Mmonotone), R(-Inf))
149+
Complex_hist = zeros(Int, maxIter)
150+
if verbose > 0
151+
#! format: off
152+
@info @sprintf "%6s %8s %8s %7s %8s %7s %7s %7s %1s" "iter" "f(x)" "h(x)" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" ""
153+
#! format: off
154+
end
155+
156+
local ξ
157+
k = 0
158+
σk = summation ? σmin : max(1 / ν, σmin)
159+
160+
fk = f(xk)
161+
∇fk = similar(xk)
162+
∇f!(∇fk, xk)
163+
∇fk⁻ = copy(∇fk)
164+
spectral_test = isa(D, SpectralGradient)
165+
D.d .= summation ? D.d .+ σk : D.d .* σk
166+
DNorm = norm(D.d, Inf)
167+
168+
169+
ν = 1 / DNorm
170+
mν∇fk = -ν * ∇fk
171+
sqrt_ξ_νInv = one(R)
172+
173+
optimal = false
174+
tired = maxIter > 0 && k maxIter || elapsed_time > maxTime
175+
176+
while !(optimal || tired)
177+
k = k + 1
178+
elapsed_time = time() - start_time
179+
Fobj_hist[k] = fk
180+
Hobj_hist[k] = hk
181+
Mmonotone > 0 && (FHobj_hist[mod(k-1, Mmonotone) + 1] = fk + hk)
182+
183+
D.d .= max.(D.d, eps(R))
184+
185+
186+
# model with diagonal hessian
187+
φ(d) = ∇fk' * d + (d' * (D.d .* d)) / 2
188+
mk(d) = φ(d) + ψ(d)
189+
190+
if spectral_test
191+
prox!(s, ψ, mν∇fk, ν)
192+
else
193+
iprox!(s, ψ, ∇fk, D)
194+
end
195+
196+
# iprox!(s, ψ, ∇fk, D)
197+
198+
Complex_hist[k] += 1
199+
xkn .= xk .+ s
200+
fkn = f(xkn)
201+
hkn = h(xkn[selected])
202+
hkn == -Inf && error("nonsmooth term is not proper")
203+
204+
fhmax = Mmonotone > 0 ? maximum(FHobj_hist) : fk + hk
205+
Δobj = fhmax - (fkn + hkn) + max(1, abs(fhmax)) * 10 * eps()
206+
Δmod = fhmax - (fk + mk(s)) + max(1, abs(hk)) * 10 * eps()
207+
ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps()
208+
sqrt_ξ_νInv = ξ 0 ? sqrt/ ν) : sqrt(-ξ / ν)
209+
210+
if ξ 0 && k == 1
211+
ϵ += ϵr * sqrt_ξ_νInv # make stopping test absolute and relative
212+
end
213+
214+
if< 0 && sqrt_ξ_νInv neg_tol) || 0 && sqrt_ξ_νInv < ϵ)
215+
# the current xk is approximately first-order stationary
216+
optimal = true
217+
continue
218+
end
219+
220+
if 0 || isnan(ξ))
221+
error("R2DH: failed to compute a step: ξ = ")
222+
end
223+
224+
ρk = Δobj / Δmod
225+
226+
σ_stat = (η2 ρk < Inf) ? "" : (ρk < η1 ? "" : "=")
227+
228+
if (verbose > 0) && (k % ptf == 0)
229+
#! format: off
230+
@info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv ρk σk norm(xk) norm(s) σ_stat
231+
#! format: on
232+
end
233+
234+
if η2 ρk < Inf
235+
σk = max(σk / γ, σmin)
236+
end
237+
238+
if η1 ρk < Inf
239+
xk .= xkn
240+
has_bnds && set_bounds!(ψ, l_bound - xk, u_bound - xk)
241+
fk = fkn
242+
hk = hkn
243+
shift!(ψ, xk)
244+
∇f!(∇fk, xk)
245+
push!(D, s, ∇fk - ∇fk⁻) # update QN operator
246+
DNorm = norm(D.d, Inf)
247+
∇fk⁻ .= ∇fk
248+
end
249+
250+
if ρk < η1 || ρk == Inf
251+
σk = σk * γ
252+
end
253+
254+
D.d .= summation ? D.d .+ σk : D.d .* σk
255+
DNorm = norm(D.d, Inf)
256+
ν = 1 / DNorm
257+
258+
tired = maxIter > 0 && k maxIter
259+
if !tired
260+
@. mν∇fk = -ν * ∇fk
261+
end
262+
end
263+
264+
if verbose > 0
265+
if k == 1
266+
@info @sprintf "%6d %8.1e %8.1e" k fk hk
267+
elseif optimal
268+
#! format: off
269+
@info @sprintf "%6d %8.1e %8.1e %7.1e %8s %7.1e %7.1e %7.1e" k fk hk sqrt_ξ_νInv "" σk norm(xk) norm(s)
270+
#! format: on
271+
@info "R2DH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv))"
272+
end
273+
end
274+
275+
status = if optimal
276+
:first_order
277+
elseif elapsed_time > maxTime
278+
:max_time
279+
elseif tired
280+
:max_iter
281+
else
282+
:exception
283+
end
284+
outdict = Dict(
285+
:Fhist => Fobj_hist[1:k],
286+
:Hhist => Hobj_hist[1:k],
287+
:Chist => Complex_hist[1:k],
288+
:NonSmooth => h,
289+
:status => status,
290+
:fk => fk,
291+
:hk => hk,
292+
=> ξ,
293+
:elapsed_time => elapsed_time,
294+
)
295+
296+
return xk, k, outdict
297+
end

0 commit comments

Comments
 (0)