@@ -156,7 +156,7 @@ def ProximalGradient(proxf, proxg, x0, epsg=1.,
156156 Proximal operator of g function
157157 x0 : :obj:`numpy.ndarray`
158158 Initial vector
159- epsg : :obj:`float` or :obj:`np .ndarray`, optional
159+ epsg : :obj:`float` or :obj:`numpy .ndarray`, optional
160160 Scaling factor of g function
161161 tau : :obj:`float` or :obj:`numpy.ndarray`, optional
162162 Positive scalar weight, which should satisfy the following condition
@@ -195,7 +195,7 @@ def ProximalGradient(proxf, proxg, x0, epsg=1.,
195195
196196 Notes
197197 -----
198- The Proximal point algorithm can be expressed by the following recursion:
198+ The Proximal gradient algorithm can be expressed by the following recursion:
199199
200200 .. math::
201201
@@ -359,6 +359,202 @@ def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
359359 callback = callback , show = show )
360360
361361
362+ def AndersonProximalGradient (proxf , proxg , x0 , epsg = 1. ,
363+ tau = None , niter = 10 ,
364+ nhistory = 10 , epsr = 1e-10 ,
365+ safeguard = False , tol = None ,
366+ callback = None , show = False ):
367+ r"""Proximal gradient with Anderson acceleration
368+
369+ Solves the following minimization problem using the Proximal
370+ gradient algorithm with Anderson acceleration:
371+
372+ .. math::
373+
374+ \mathbf{x} = \argmin_\mathbf{x} f(\mathbf{x}) + \epsilon g(\mathbf{x})
375+
376+ where :math:`f(\mathbf{x})` is a smooth convex function with a uniquely
377+ defined gradient and :math:`g(\mathbf{x})` is any convex function that
378+ has a known proximal operator.
379+
380+ Parameters
381+ ----------
382+ proxf : :obj:`pyproximal.ProxOperator`
383+ Proximal operator of f function (must have ``grad`` implemented)
384+ proxg : :obj:`pyproximal.ProxOperator`
385+ Proximal operator of g function
386+ x0 : :obj:`numpy.ndarray`
387+ Initial vector
388+ epsg : :obj:`float` or :obj:`numpy.ndarray`, optional
389+ Scaling factor of g function
390+ tau : :obj:`float` or :obj:`numpy.ndarray`, optional
391+ Positive scalar weight, which should satisfy the following condition
392+ to guarantees convergence: :math:`\tau \in (0, 1/L]` where ``L`` is
393+ the Lipschitz constant of :math:`\nabla f`. When ``tau=None``,
394+ backtracking is used to adaptively estimate the best tau at each
395+ iteration. Finally, note that :math:`\tau` can be chosen to be a vector
396+ when dealing with problems with multiple right-hand-sides
397+ niter : :obj:`int`, optional
398+ Number of iterations of iterative scheme
399+ nhistory : :obj:`int`, optional
400+ Number of previous iterates to be kept in memory (to compute the scaling factors
401+ epsr : :obj:`float`, optional
402+ Scaling factor for regularization added to the inverse of :math:\mathbf{R}^T \mathbf{R}`
403+ safeguard : :obj:`bool`, optional
404+ Apply safeguarding strategy to the update (``True``) or not (``False``)
405+ tol : :obj:`float`, optional
406+ Tolerance on change of objective function (used as stopping criterion). If
407+ ``tol=None``, run until ``niter`` is reached
408+ callback : :obj:`callable`, optional
409+ Function with signature (``callback(x)``) to call after each iteration
410+ where ``x`` is the current model vector
411+ show : :obj:`bool`, optional
412+ Display iterations log
413+
414+ Returns
415+ -------
416+ x : :obj:`numpy.ndarray`
417+ Inverted model
418+
419+ Notes
420+ -----
421+ The Proximal gradient algorithm with Anderson acceleration can be expressed by the
422+ following recursion [1]_:
423+
424+ .. math::
425+ m_k = min(m, k)\\
426+ \mathbf{g}^{k} = \mathbf{x}^{k} - \tau^k \nabla f(\mathbf{x}^k)\\
427+ \mathbf{r}^{k} = \mathbf{g}^{k} - \mathbf{g}^{k}\\
428+ \mathbf{G}^{k} = [\mathbf{g}^{k},..., \mathbf{g}^{k-m_k}]\\
429+ \mathbf{R}^{k} = [\mathbf{r}^{k},..., \mathbf{r}^{k-m_k}]\\
430+ \alpha_k = (\mathbf{R}^{kT} \mathbf{R}^{k})^{-1} \mathbf{1} / \mathbf{1}^T
431+ (\mathbf{R}^{kT} \mathbf{R}^{k})^{-1} \mathbf{1}\\
432+ \mathbf{y}^{k+1} = \mathbf{G}^{k} \alpha_k\\
433+ \mathbf{x}^{k+1} = \prox_{\tau^{k+1} g}(\mathbf{y}^{k+1})
434+
435+ where :math:`m` equals ``nhistory``, :math:`k=1,2,...,n_{iter}`, :math:`\mathbf{y}^{0}=\mathbf{x}^{0}`,
436+ :math:`\mathbf{y}^{1}=\mathbf{x}^{0} - \tau^0 \nabla f(\mathbf{x}^0)`,
437+ :math:`\mathbf{x}^{1}=\prox_{\tau^k g}(\mathbf{y}^{1})`, and
438+ :math:`\mathbf{g}^{0}=\mathbf{y}^{1}`.
439+
440+ Refer to [1]_ for the guarded version of the algorithm (when ``safeguard=True``).
441+
442+ .. [1] Mai, V., and Johansson, M. "Anderson Acceleration of Proximal Gradient
443+ Methods", 2020.
444+
445+ """
446+ # check if epgs is a vector
447+ if np .asarray (epsg ).size == 1. :
448+ epsg = epsg * np .ones (niter )
449+ epsg_print = str (epsg [0 ])
450+ else :
451+ epsg_print = 'Multi'
452+
453+ if show :
454+ tstart = time .time ()
455+ print ('Proximal Gradient with Anderson Acceleration \n '
456+ '---------------------------------------------------------\n '
457+ 'Proximal operator (f): %s\n '
458+ 'Proximal operator (g): %s\n '
459+ 'tau = %s\t \t epsg = %s\t niter = %d\n '
460+ 'nhist = %d\t epsr = %s\n '
461+ 'guard = %s\t tol = %s\n ' % (type (proxf ), type (proxg ),
462+ str (tau ), epsg_print , niter ,
463+ nhistory , str (epsr ),
464+ str (safeguard ), str (tol )))
465+ head = ' Itn x[0] f g J=f+eps*g tau'
466+ print (head )
467+
468+ # initialize model
469+ y = x0 - tau * proxf .grad (x0 )
470+ x = proxg .prox (y , epsg [0 ] * tau )
471+ g = y .copy ()
472+ r = g - x0
473+ R , G = [g , ], [r , ]
474+ pf = proxf (x )
475+ pfg = np .inf
476+ tolbreak = False
477+
478+ # iterate
479+ for iiter in range (niter ):
480+
481+ # update fix point
482+ g = x - tau * proxf .grad (x )
483+ r = g - y
484+
485+ # update history vectors
486+ R .insert (0 , r )
487+ G .insert (0 , g )
488+ if iiter >= nhistory - 1 :
489+ R .pop (- 1 )
490+ G .pop (- 1 )
491+
492+ # solve for alpha coefficients
493+ Rstack = np .vstack (R )
494+ Rinv = np .linalg .pinv (Rstack @ Rstack .T + epsr * np .linalg .norm (Rstack ) ** 2 )
495+ ones = np .ones (min (nhistory , iiter + 2 ))
496+ Rinvones = Rinv @ ones
497+ alpha = Rinvones / (ones [None ] @ Rinvones )
498+
499+ if not safeguard :
500+ # update auxiliary variable
501+ y = np .vstack (G ).T @ alpha
502+
503+ # update main variable
504+ x = proxg .prox (y , epsg [iiter ] * tau )
505+
506+ else :
507+ # update auxiliary variable
508+ ytest = np .vstack (G ).T @ alpha
509+
510+ # update main variable
511+ xtest = proxg .prox (ytest , epsg [iiter ] * tau )
512+
513+ # check if function is decreased, otherwise do basic PG step
514+ pfold , pf = pf , proxf (xtest )
515+ if pf <= pfold - tau * np .linalg .norm (proxf .grad (x )) ** 2 / 2 :
516+ y = ytest
517+ x = xtest
518+ else :
519+ x = proxg .prox (g , epsg [iiter ] * tau )
520+ y = g
521+
522+ # run callback
523+ if callback is not None :
524+ callback (x )
525+
526+ # tolerance check: break iterations if overall
527+ # objective does not decrease below tolerance
528+ if tol is not None :
529+ pfgold = pfg
530+ pf , pg = proxf (x ), proxg (x )
531+ pfg = pf + np .sum (epsg [iiter ] * pg )
532+ if np .abs (1.0 - pfg / pfgold ) < tol :
533+ tolbreak = True
534+
535+ # show iteration logger
536+ if show :
537+ if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10 ) == 0 :
538+ if tol is None :
539+ pf , pg = proxf (x ), proxg (x )
540+ pfg = pf + np .sum (epsg [iiter ] * pg )
541+ msg = '%6g %12.5e %10.3e %10.3e %10.3e %10.3e' % \
542+ (iiter + 1 , np .real (to_numpy (x [0 ])) if x .ndim == 1 else np .real (to_numpy (x [0 , 0 ])),
543+ pf , pg ,
544+ pfg ,
545+ tau )
546+ print (msg )
547+
548+ # break if tolerance condition is met
549+ if tolbreak :
550+ break
551+
552+ if show :
553+ print ('\n Total time (s) = %.2f' % (time .time () - tstart ))
554+ print ('---------------------------------------------------------\n ' )
555+ return x
556+
557+
362558def GeneralizedProximalGradient (proxfs , proxgs , x0 , tau ,
363559 epsg = 1. , weights = None ,
364560 eta = 1. , niter = 10 ,
@@ -390,7 +586,7 @@ def GeneralizedProximalGradient(proxfs, proxgs, x0, tau,
390586 Positive scalar weight, which should satisfy the following condition
391587 to guarantees convergence: :math:`\tau \in (0, 1/L]` where ``L`` is
392588 the Lipschitz constant of :math:`\sum_{i=1}^n \nabla f_i`.
393- epsg : :obj:`float` or :obj:`np .ndarray`, optional
589+ epsg : :obj:`float` or :obj:`numpy .ndarray`, optional
394590 Scaling factor(s) of ``g`` function(s)
395591 weights : :obj:`float`, optional
396592 Weighting factors of ``g`` functions. Must sum to 1.
0 commit comments