Skip to content

Commit 3a8a0f2

Browse files
committed
Fixed typo in max loop iterations and added note about source
1 parent b6c435f commit 3a8a0f2

File tree

1 file changed

+11
-14
lines changed

1 file changed

+11
-14
lines changed

dfols/trust_region.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@
8787

8888
def ctrsbox_sfista(xopt, g, H, projections, delta, h, L_h, prox_uh, argsh=(), argsprox=(), func_tol=1e-3, max_iters=500, d_max_iters=100, d_tol=1e-10, use_fortran=USE_FORTRAN, scaling_changes=None):
8989
n = xopt.size
90-
# NOTE: L_h, prox_uh unable to check, add instruction to prox_uh
9190
assert xopt.shape == (n,), "xopt has wrong shape (should be vector)"
9291
assert g.shape == (n,), "g and xopt have incompatible sizes"
9392
assert len(H.shape) == 2, "H must be a matrix"
@@ -99,13 +98,17 @@ def ctrsbox_sfista(xopt, g, H, projections, delta, h, L_h, prox_uh, argsh=(), ar
9998
d = np.zeros(n) # start with zero vector
10099
y = np.zeros(n)
101100
t = 1
102-
k_H = np.linalg.norm(H, 2) # SOLVED: use ||H||_2 <= ||H||_F, might be better than maxhessian
103-
# QUESTION: different expression for u from MAX_LOOP_ITERS
104-
#u = 2 * func_tol / (L_h * (L_h + sqrt(L_h * L_h + 2 * k_H * func_tol))) # smoothing parameter
101+
k_H = np.linalg.norm(H, 2)
105102
crvmin = -1.0
106-
# NOTE: iterataion depends on delta/func_tol
103+
104+
# Number of iterations & smoothing parameter, from Theorem 10.57 in
105+
# [A. Beck. First-order methods in optimization, SIAM, 2017]
106+
# We do not use the values of k and mu given in the theorem statement, but rather the intermediate
107+
# results on p313 (K1 for number of iterations, and the immediate next line for mu)
108+
# Note: in the book's notation, Gamma=delta^2, alpha=1, beta=L_h^2/2, Lf=k_H [alpha and beta from Thm 10.51]
107109
try:
108-
MAX_LOOP_ITERS = ceil(min(delta*(2 * L_h+sqrt(L_h*L_h+2*k_H*func_tol)) / func_tol, max_iters)) # maximum number of iterations
110+
MAX_LOOP_ITERS = ceil(delta*(L_h+sqrt(L_h*L_h+2*k_H*func_tol)) / func_tol)
111+
MAX_LOOP_ITERS = min(MAX_LOOP_ITERS, max_iters)
109112
except ValueError:
110113
MAX_LOOP_ITERS = max_iters
111114
u = 2 * delta / (MAX_LOOP_ITERS * L_h) # smoothing parameter
@@ -114,9 +117,7 @@ def gradient_Fu(xopt, g, H, u, prox_uh, d):
114117
# Calculate gradient_Fu,
115118
# where Fu(d) := g(d) + h_u(d) and h_u(d) is a 1/u-smooth approximation of h.
116119
# We assume that h is globally Lipschitz continous with constant L_h,
117-
# then we can let h_u(d) be the Moreau Envelope M_h_u(d) of h.
118-
# TODO: Add instruction to prox_uh
119-
# SOLVED: bug here, previous: g + H @ d + (d - prox_uh(xopt, u, d, *argsprox)) / u
120+
# then we can let h_u(d) be the Moreau Envelope M_h_u(d) of h.
120121
return g + H @ d + (xopt + d - prox_uh(remove_scaling(xopt + d, scaling_changes), u, *argsprox)) / u
121122

122123
# Lipschitz constant of gradient_Fu
@@ -145,20 +146,16 @@ def proj(d0):
145146

146147
# main update step
147148
d = proj(y - g_Fu / l)
148-
# SOLVED: (previously) make sfista decrease in each iteration (might have d = 0, criticality measure=0)
149-
# if model_value(g, H, d, xopt, h, *argsh) > model_value(g, H, prev_d, xopt, h, *argsh):
150-
# d = prev_d
151149
new_model_value = model_value(g, H, d, xopt, h, argsh, scaling_changes)
152150
if new_model_value < model_value_best:
153151
d_best = d.copy()
154152
model_value_best = new_model_value
155153

156154
# update true gradient
157-
# FIXME: gnew is the gradient of the smoothed version
155+
# gnew is the gradient of the smoothed function
158156
gnew = gradient_Fu(xopt, g, H, u, prox_uh, d, *argsprox)
159157

160158
# update CRVMIN
161-
# NOTE: crvmin is the Hessian term
162159
crv = d.dot(H).dot(d)/sumsq(d) if sumsq(d) >= ZERO_THRESH else crvmin
163160
crvmin = min(crvmin, crv) if crvmin != -1.0 else crv
164161

0 commit comments

Comments
 (0)