8787
8888def ctrsbox_sfista (xopt , g , H , projections , delta , h , L_h , prox_uh , argsh = (), argsprox = (), func_tol = 1e-3 , max_iters = 500 , d_max_iters = 100 , d_tol = 1e-10 , use_fortran = USE_FORTRAN , scaling_changes = None ):
8989 n = xopt .size
90- # NOTE: L_h, prox_uh unable to check, add instruction to prox_uh
9190 assert xopt .shape == (n ,), "xopt has wrong shape (should be vector)"
9291 assert g .shape == (n ,), "g and xopt have incompatible sizes"
9392 assert len (H .shape ) == 2 , "H must be a matrix"
@@ -99,13 +98,17 @@ def ctrsbox_sfista(xopt, g, H, projections, delta, h, L_h, prox_uh, argsh=(), ar
9998 d = np .zeros (n ) # start with zero vector
10099 y = np .zeros (n )
101100 t = 1
102- k_H = np .linalg .norm (H , 2 ) # SOLVED: use ||H||_2 <= ||H||_F, might be better than maxhessian
103- # QUESTION: different expression for u from MAX_LOOP_ITERS
104- #u = 2 * func_tol / (L_h * (L_h + sqrt(L_h * L_h + 2 * k_H * func_tol))) # smoothing parameter
101+ k_H = np .linalg .norm (H , 2 )
105102 crvmin = - 1.0
106- # NOTE: iterataion depends on delta/func_tol
103+
104+ # Number of iterations & smoothing parameter, from Theorem 10.57 in
105+ # [A. Beck. First-order methods in optimization, SIAM, 2017]
106+ # We do not use the values of k and mu given in the theorem statement, but rather the intermediate
107+ # results on p313 (K1 for number of iterations, and the immediate next line for mu)
108+ # Note: in the book's notation, Gamma=delta^2, alpha=1, beta=L_h^2/2, Lf=k_H [alpha and beta from Thm 10.51]
107109 try :
108- MAX_LOOP_ITERS = ceil (min (delta * (2 * L_h + sqrt (L_h * L_h + 2 * k_H * func_tol )) / func_tol , max_iters )) # maximum number of iterations
110+ MAX_LOOP_ITERS = ceil (delta * (L_h + sqrt (L_h * L_h + 2 * k_H * func_tol )) / func_tol )
111+ MAX_LOOP_ITERS = min (MAX_LOOP_ITERS , max_iters )
109112 except ValueError :
110113 MAX_LOOP_ITERS = max_iters
111114 u = 2 * delta / (MAX_LOOP_ITERS * L_h ) # smoothing parameter
@@ -114,9 +117,7 @@ def gradient_Fu(xopt, g, H, u, prox_uh, d):
114117 # Calculate gradient_Fu,
115118 # where Fu(d) := g(d) + h_u(d) and h_u(d) is a 1/u-smooth approximation of h.
116119 # We assume that h is globally Lipschitz continous with constant L_h,
117- # then we can let h_u(d) be the Moreau Envelope M_h_u(d) of h.
118- # TODO: Add instruction to prox_uh
119- # SOLVED: bug here, previous: g + H @ d + (d - prox_uh(xopt, u, d, *argsprox)) / u
120+ # then we can let h_u(d) be the Moreau Envelope M_h_u(d) of h.
120121 return g + H @ d + (xopt + d - prox_uh (remove_scaling (xopt + d , scaling_changes ), u , * argsprox )) / u
121122
122123 # Lipschitz constant of gradient_Fu
@@ -145,20 +146,16 @@ def proj(d0):
145146
146147 # main update step
147148 d = proj (y - g_Fu / l )
148- # SOLVED: (previously) make sfista decrease in each iteration (might have d = 0, criticality measure=0)
149- # if model_value(g, H, d, xopt, h, *argsh) > model_value(g, H, prev_d, xopt, h, *argsh):
150- # d = prev_d
151149 new_model_value = model_value (g , H , d , xopt , h , argsh , scaling_changes )
152150 if new_model_value < model_value_best :
153151 d_best = d .copy ()
154152 model_value_best = new_model_value
155153
156154 # update true gradient
157- # FIXME: gnew is the gradient of the smoothed version
155+ # gnew is the gradient of the smoothed function
158156 gnew = gradient_Fu (xopt , g , H , u , prox_uh , d , * argsprox )
159157
160158 # update CRVMIN
161- # NOTE: crvmin is the Hessian term
162159 crv = d .dot (H ).dot (d )/ sumsq (d ) if sumsq (d ) >= ZERO_THRESH else crvmin
163160 crvmin = min (crvmin , crv ) if crvmin != - 1.0 else crv
164161
0 commit comments