merge ProxGrad and its accel, add Generalization

Olivier Leblanc · Olivier Leblanc · commit 7810d5be0c46 · 2022-06-02T16:09:53.000+02:00
diff --git a/pyproximal/optimization/primal.py b/pyproximal/optimization/primal.py
@@ -100,12 +100,13 @@ def ProximalPoint(prox, x0, tau, niter=10, callback=None, show=False):
 
 
 def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
-                     epsg=1., niter=10, niterback=100,
-                     callback=None, show=False):
-    r"""Proximal gradient
+                                epsg=1., niter=10, niterback=100,
+                                acceleration='vandenberghe',
+                                callback=None, show=False):
+    r"""Proximal gradient (optionnally accelerated)
 
-    Solves the following minimization problem using Proximal gradient
-    algorithm:
+    Solves the following minimization problem using (Accelerated) Proximal
+    gradient algorithm:
 
     .. math::
 
@@ -138,6 +139,8 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
         Number of iterations of iterative scheme
     niterback : :obj:`int`, optional
         Max number of iterations of backtracking
+    acceleration:  :obj:`str`, optional
+        Acceleration (``None``, ``vandenberghe`` or ``fista``)
     callback : :obj:`callable`, optional
         Function with signature (``callback(x)``) to call after each iteration
         where ``x`` is the current model vector
@@ -151,12 +154,15 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
 
     Notes
     -----
-    The Proximal point algorithm can be expressed by the following recursion:
+    The (Accelerated) Proximal point algorithm can be expressed by the
+    following recursion:
 
     .. math::
 
-        \mathbf{x}^{k+1} = \prox_{\tau^k \epsilon g}(\mathbf{x}^k -
-        \tau^k \nabla f(\mathbf{x}^k))
+        \mathbf{y}^{k+1} = \mathbf{x}^k + \omega^k
+        (\mathbf{x}^k - \mathbf{x}^{k-1})
+        \mathbf{x}^{k+1} = \prox_{\tau^k \epsilon g}(\mathbf{y}^{k+1}  -
+        \tau^k \nabla f(\mathbf{y}^{k+1})) \\
 
     where at each iteration :math:`\tau^k` can be estimated by back-tracking
     as follows:
@@ -173,7 +179,17 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
 
     where :math:`\tilde{f}_\tau(\mathbf{x}, \mathbf{y}) = f(\mathbf{y}) +
     \nabla f(\mathbf{y})^T (\mathbf{x} - \mathbf{y}) +
-    1/(2\tau)||\mathbf{x} - \mathbf{y}||_2^2`.
+    1/(2\tau)||\mathbf{x} - \mathbf{y}||_2^2`,
+    and
+    :math:`\omega^k = 0` for ``acceleration=None``,
+    :math:`\omega^k = k / (k + 3)` for ``acceleration=vandenberghe`` [1]_
+    or :math:`\omega^k = (t_{k-1}-1)/t_k` for ``acceleration=fista`` where
+    :math:`t_k = (1 + \sqrt{1+4t_{k-1}^{2}}) / 2` [2]_
+
+    .. [1] Vandenberghe, L., "Fast proximal gradient methods", 2010.
+    .. [2] Beck, A., and Teboulle, M. "A Fast Iterative Shrinkage-Thresholding
+       Algorithm for Linear Inverse Problems", SIAM Journal on
+       Imaging Sciences, vol. 2, pp. 183-202. 2009.
 
     """
     # check if epgs is a ve
@@ -182,9 +198,12 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
     else:
         epsg_print = 'Multi'
 
+    if acceleration not in [None, 'None', 'vandenberghe', 'fista']:
+        raise NotImplementedError('Acceleration should be None, vandenberghe '
+                                  'or fista')
     if show:
         tstart = time.time()
-        print('Proximal Gradient\n'
+        print('Accelerated Proximal Gradient\n'
               '---------------------------------------------------------\n'
               'Proximal operator (f): %s\n'
               'Proximal operator (g): %s\n'
@@ -201,13 +220,32 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
         backtracking = True
         tau = 1.
 
+    # initialize model
+    t = 1.
     x = x0.copy()
+    y = x.copy()
+
+    # iterate
     for iiter in range(niter):
+        xold = x.copy()
+
+        # proximal step
         if not backtracking:
-            x = proxg.prox(x - tau * proxf.grad(x), epsg * tau)
+            x = proxg.prox(y - tau * proxf.grad(y), epsg * tau)
         else:
-            x, tau = _backtracking(x, tau, proxf, proxg, epsg,
+            x, tau = _backtracking(y, tau, proxf, proxg, epsg,
                                    beta=beta, niterback=niterback)
+        
+        # update y
+        if acceleration == 'vandenberghe':
+            omega = iiter / (iiter + 3)
+        elif acceleration== 'fista':
+            told = t
+            t = (1. + np.sqrt(1. + 4. * t ** 2)) / 2.
+            omega = ((told - 1.) / t)
+        else:
+            omega = 0
+        y = x + omega * (x - xold)
 
         # run callback
         if callback is not None:
@@ -226,47 +264,44 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
         print('---------------------------------------------------------\n')
     return x
 
-
-def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
-                                epsg=1., niter=10, niterback=100,
-                                acceleration='vandenberghe',
+def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None, beta=0.5,
+                                epsg=1., niter=10,
+                                acceleration='None',
                                 callback=None, show=False):
-    r"""Accelerated Proximal gradient
+    r"""Generalized Proximal gradient
 
-    Solves the following minimization problem using Accelerated Proximal
+    Solves the following minimization problem using Generalized Proximal
     gradient algorithm:
 
     .. math::
 
-        \mathbf{x} = \argmin_\mathbf{x} f(\mathbf{x}) + \epsilon g(\mathbf{x})
+        \mathbf{x} = \argmin_\mathbf{x} \sum_{i=1}^n f_i(\mathbf{x}) 
+        + \sum_{j=1}^m \tau_j g_j(\mathbf{x}),~~n,m \in \mathbb{N}^+
 
-    where :math:`f(\mathbf{x})` is a smooth convex function with a uniquely
-    defined gradient and :math:`g(\mathbf{x})` is any convex function that
-    has a known proximal operator.
+    where the :math:`f_i(\mathbf{x})` are smooth convex functions with a uniquely
+    defined gradient and the :math:`g_j(\mathbf{x})` are any convex function that
+    have a known proximal operator.
 
     Parameters
     ----------
-    proxf : :obj:`pyproximal.ProxOperator`
-        Proximal operator of f function (must have ``grad`` implemented)
-    proxg : :obj:`pyproximal.ProxOperator`
-        Proximal operator of g function
+    proxfs : :obj:`List of pyproximal.ProxOperator`
+        Proximal operators of the f_i functions (must have ``grad`` implemented)
+    proxgs : :obj:`List of pyproximal.ProxOperator`
+        Proximal operators of the g_j functions
     x0 : :obj:`numpy.ndarray`
         Initial vector
     tau : :obj:`float` or :obj:`numpy.ndarray`, optional
         Positive scalar weight, which should satisfy the following condition
         to guarantees convergence: :math:`\tau  \in (0, 1/L]` where ``L`` is
-        the Lipschitz constant of :math:`\nabla f`. When ``tau=None``,
+        the Lipschitz constant of :math:`\sum_{i=1}^n \nabla f_i`. When ``tau=None``,
         backtracking is used to adaptively estimate the best tau at each
-        iteration. Finally note that :math:`\tau` can be chosen to be a vector
-        when dealing with problems with multiple right-hand-sides
+        iteration.
     beta : obj:`float`, optional
         Backtracking parameter (must be between 0 and 1)
     epsg : :obj:`float` or :obj:`np.ndarray`, optional
         Scaling factor of g function
     niter : :obj:`int`, optional
         Number of iterations of iterative scheme
-    niterback : :obj:`int`, optional
-        Max number of iterations of backtracking
     acceleration:  :obj:`str`, optional
         Acceleration (``vandenberghe`` or ``fista``)
     callback : :obj:`callable`, optional
@@ -282,76 +317,75 @@ def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
 
     Notes
     -----
-    The Accelerated Proximal point algorithm can be expressed by the
+    The Generalized Proximal point algorithm can be expressed by the
     following recursion:
 
     .. math::
-
-        \mathbf{x}^{k+1} = \prox_{\tau^k \epsilon g}(\mathbf{y}^{k+1} -
-        \tau^k \nabla f(\mathbf{y}^{k+1})) \\
-        \mathbf{y}^{k+1} = \mathbf{x}^k + \omega^k
-        (\mathbf{x}^k - \mathbf{x}^{k-1})
-
-    where :math:`\omega^k = k / (k + 3)` for ``acceleration=vandenberghe`` [1]_
-    or :math:`\omega^k = (t_{k-1}-1)/t_k` for ``acceleration=fista`` where
-    :math:`t_k = (1 + \sqrt{1+4t_{k-1}^{2}}) / 2` [2]_
-
-    .. [1] Vandenberghe, L., "Fast proximal gradient methods", 2010.
-    .. [2] Beck, A., and Teboulle, M. "A Fast Iterative Shrinkage-Thresholding
-       Algorithm for Linear Inverse Problems", SIAM Journal on
-       Imaging Sciences, vol. 2, pp. 183-202. 2009.
-
+        \text{for } j=1,\cdots,n, \\
+        ~~~~\mathbf z_j^{k+1} = \mathbf z_j^{k} + \eta_k (prox_{\frac{\tau^k}{\omega_j} g_j}(2 \mathbf{x}^{k} - z_j^{k}) 
+        - \tau^k \sum_{i=1}^n \nabla f_i(\mathbf{x}^{k})) - \mathbf{x}^{k} - \\
+        \mathbf{x}^{k+1} = \sum_{j=1}^n \omega_j f_j \\
+        
+    where :math:`\sum_{j=1}^n \omega_j=1`. 
     """
-    # check if epgs is a ve
+    # check if epgs is a vector
     if np.asarray(epsg).size == 1.:
         epsg_print = str(epsg)
     else:
         epsg_print = 'Multi'
 
-    if acceleration not in ['vandenberghe', 'fista']:
-        raise NotImplementedError('Acceleration should be vandenberghe '
+    if acceleration not in [None, 'None', 'vandenberghe', 'fista']:
+        raise NotImplementedError('Acceleration should be None, vandenberghe '
                                   'or fista')
     if show:
         tstart = time.time()
-        print('Accelerated Proximal Gradient\n'
+        print('Generalized Proximal Gradient\n'
               '---------------------------------------------------------\n'
-              'Proximal operator (f): %s\n'
-              'Proximal operator (g): %s\n'
-              'tau = %10e\tepsg = %s\tniter = %d\n' % (type(proxf),
-                                                       type(proxg),
-                                                       0 if tau is None else tau,
-                                                         epsg_print, niter))
+              'Proximal operators (f): %s\n'
+              'Proximal operators (g): %s\n'
+              'tau = %10e\tbeta=%10e\nepsg = %s\tniter = %d\n' % ([type(proxf) for proxf in proxfs],
+                                                         [type(proxg) for proxg in proxgs],
+                                                         0 if tau is None else tau,
+                                                         beta, epsg_print, niter))
         head = '   Itn       x[0]          f           g       J=f+eps*g'
         print(head)
 
-    backtracking = False
     if tau is None:
-        backtracking = True
         tau = 1.
 
     # initialize model
     t = 1.
     x = x0.copy()
     y = x.copy()
+    zs = [x.copy() for _ in range(len(proxgs))]
 
     # iterate
     for iiter in range(niter):
         xold = x.copy()
 
         # proximal step
-        if not backtracking:
-            x = proxg.prox(y - tau * proxf.grad(y), epsg * tau)
-        else:
-            x, tau = _backtracking(y, tau, proxf, proxg, epsg,
-                                   beta=beta, niterback=niterback)
+        grad = np.zeros_like(x)
+        for i, proxf in enumerate(proxfs):
+            grad += proxf.grad(x)
+
+        sol = np.zeros_like(x)
+        for i, proxg in enumerate(proxgs):
+            tmp = 2 * y - zs[i] - tau * grad
+            tmp[:] = proxg.prox(tmp, tau *len(proxgs) )
+            zs[i] += epsg * (tmp - y)
+            sol += zs[i]/len(proxgs)
+        x[:] = sol.copy()
 
         # update y
         if acceleration == 'vandenberghe':
             omega = iiter / (iiter + 3)
-        else:
+        elif acceleration== 'fista':
             told = t
             t = (1. + np.sqrt(1. + 4. * t ** 2)) / 2.
             omega = ((told - 1.) / t)
+        else:
+            omega = 0
+        
         y = x + omega * (x - xold)
 
         # run callback
@@ -360,7 +394,7 @@ def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
 
         if show:
             if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0:
-                pf, pg = proxf(x), proxg(x)
+                pf, pg = np.sum([proxf(x) for proxf in proxfs]), np.sum([proxg(x) for proxg in proxgs])
                 msg = '%6g  %12.5e  %10.3e  %10.3e  %10.3e' % \
                       (iiter + 1, x[0] if x.ndim == 1 else x[0, 0],
                        pf, pg[0] if epsg_print == 'Multi' else pg,
diff --git a/pyproximal/proximal/__init__.py b/pyproximal/proximal/__init__.py
@@ -20,6 +20,7 @@
     L21	                            L2,1 Norm
     L21_plus_L1	                    L2,1 + L1 mixed-norm
     Huber	                        Huber Norm
+    TV                              Total Variation Norm                                   
     Nuclear                         Nuclear Norm
     NuclearBall                     Nuclear Ball
     Orthogonal	                    Product between orthogonal operator and vector
@@ -46,6 +47,7 @@
 from .L21 import *
 from .L21_plus_L1 import *
 from .Huber import *
+from .TV import *
 from .Nuclear import *
 from .Orthogonal import *
 from .VStack import *
@@ -58,6 +60,6 @@
 
 __all__ = ['Box', 'Simplex', 'Intersection', 'AffineSet', 'Quadratic',
            'Euclidean', 'EuclideanBall', 'L0Ball', 'L1', 'L1Ball', 'L2',
-           'L2Convolve', 'L21', 'L21_plus_L1', 'Huber', 'Nuclear',
+           'L2Convolve', 'L21', 'L21_plus_L1', 'Huber', 'TV', 'Nuclear',
            'NuclearBall', 'Orthogonal', 'VStack', 'Nonlinear', 'SCAD',
            'Log', 'ETP', 'Geman', 'QuadraticEnvelopeCard', 'SingularValuePenalty']