Final implementation done

lindonroberts · lindonroberts · commit dc4f04d96f6c · 2024-09-11T12:26:52.000+10:00
diff --git a/dfols/controller.py b/dfols/controller.py
@@ -460,13 +460,13 @@ def evaluate_criticality_measure(self, params):
             d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, np.zeros(H.shape), self.model.projections, 1,
                                 self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol, 
                                 max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
-                                scaling_changes=self.scaling_changes)
+                                scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
         else:
             proj = lambda x: pbox(x, self.model.sl, self.model.su)
             d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, np.zeros(H.shape), [proj], 1,
                                 self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol, 
                                 max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
-                                scaling_changes=self.scaling_changes)
+                                scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
         
         # Calculate criticality measure
         criticality_measure = self.h(remove_scaling(self.model.xopt(abs_coordinates=True), self.scaling_changes), *self.argsh) - model_value(gopt, np.zeros(H.shape), d, self.model.xopt(abs_coordinates=True), self.h, self.argsh, self.scaling_changes)
@@ -505,15 +505,15 @@ def trust_region_step(self, params, criticality_measure=1e-2):
                 d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, H, self.model.projections, self.delta,
                                      self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol, 
                                      max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
-                                     scaling_changes=self.scaling_changes)
+                                     scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
             else:
                 # NOTE: alternative way if using trsbox
                 # d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
                 proj = lambda x: pbox(x, self.model.sl, self.model.su)
                 d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, H, [proj], self.delta,
                                       self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol,
                                       max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
-                                      scaling_changes=self.scaling_changes)
+                                      scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
             
             # NOTE: check sufficient decrease. If increase in the model, set zero step
             pred_reduction = self.h(remove_scaling(self.model.xopt(abs_coordinates=True), self.scaling_changes), *self.argsh) - model_value(gopt, H, d, self.model.xopt(abs_coordinates=True), self.h, self.argsh, self.scaling_changes)
diff --git a/dfols/params.py b/dfols/params.py
@@ -109,15 +109,20 @@ def __init__(self, n, npt, maxfun, objfun_has_noise=False):
         self.params["growing.full_rank.min_sing_val"] = 1e-6  # absolute floor on singular values
         self.params["growing.full_rank.svd_max_jac_cond"] = 1e8  # maximum condition number of Jacobian
         self.params["growing.perturb_trust_region_step"] = False  # add random direction onto TRS solution?
+        
         # Dykstra's algorithm
         self.params["dykstra.d_tol"] = 1e-10
         self.params["dykstra.max_iters"] = 100
+        
         # Matrix rank algorithm
         self.params["matrix_rank.r_tol"] = 1e-18
+        
         # Function tolerance when applying S-FISTA method
         self.params["func_tol.criticality_measure"] = 1e-3
         self.params["func_tol.tr_step"] = 1-1e-1
         self.params["func_tol.max_iters"] = 500
+        self.params["sfista.max_iters_scaling"] = 2.0
+        
         self.params_changed = {}
         for p in self.params:
             self.params_changed[p] = False
@@ -277,6 +282,8 @@ def param_type(self, key, npt):
             type_str, nonetype_ok, lower, upper = 'float', False, 0.0, 1.0
         elif key == "func_tol.max_iters":
             type_str, nonetype_ok, lower, upper = 'int', False, 0, None
+        elif key == "sfista.max_iters_scaling":
+            type_str, nonetype_ok, lower, upper = 'float', False, 1.0, None
         else:
             assert False, "ParameterList.param_type() has unknown key: %s" % key
         return type_str, nonetype_ok, lower, upper
diff --git a/dfols/trust_region.py b/dfols/trust_region.py
@@ -85,7 +85,7 @@
 
 ZERO_THRESH = 1e-14
 
-def ctrsbox_sfista(xopt, g, H, projections, delta, h, L_h, prox_uh, argsh=(), argsprox=(), func_tol=1e-3, max_iters=500, d_max_iters=100, d_tol=1e-10, use_fortran=USE_FORTRAN, scaling_changes=None):
+def ctrsbox_sfista(xopt, g, H, projections, delta, h, L_h, prox_uh, argsh=(), argsprox=(), func_tol=1e-3, max_iters=500, d_max_iters=100, d_tol=1e-10, use_fortran=USE_FORTRAN, scaling_changes=None, sfista_iters_scale=1.0):
     n = xopt.size
     assert xopt.shape == (n,), "xopt has wrong shape (should be vector)"
     assert g.shape == (n,), "g and xopt have incompatible sizes"
@@ -107,11 +107,12 @@ def ctrsbox_sfista(xopt, g, H, projections, delta, h, L_h, prox_uh, argsh=(), ar
     # results on p313 (K1 for number of iterations, and the immediate next line for mu)
     # Note: in the book's notation, Gamma=delta^2, alpha=1, beta=L_h^2/2, Lf=k_H [alpha and beta from Thm 10.51]
     try:
-        MAX_LOOP_ITERS = ceil(delta*(L_h+sqrt(L_h*L_h+2*k_H*func_tol)) / func_tol)
+        MAX_LOOP_ITERS = ceil(sfista_iters_scale * delta * (L_h+sqrt(L_h*L_h+2*k_H*func_tol)) / func_tol)
         MAX_LOOP_ITERS = min(MAX_LOOP_ITERS, max_iters)
     except ValueError:
         MAX_LOOP_ITERS = max_iters
     u =  2 * delta / (MAX_LOOP_ITERS * L_h) # smoothing parameter
+    # u = 2 * func_tol / (L_h ** 2 + L_h * sqrt(L_h ** 2 + 2 * k_H * func_tol))  # the above choice works better in practice
 
     def gradient_Fu(xopt, g, H, u, prox_uh, d):
     # Calculate gradient_Fu,