Skip to content

Commit 6fb31e3

Browse files
authored
Merge pull request #143 from shnaqvi/gen_pgm_fix
Generalized Proximal Gradient Fix to match Proximal Gradient
2 parents ae5dc7c + 91fdfef commit 6fb31e3

File tree

2 files changed

+128
-32
lines changed

2 files changed

+128
-32
lines changed

pyproximal/optimization/primal.py

Lines changed: 54 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,9 @@ def ProximalPoint(prox, x0, tau, niter=10, callback=None, show=False):
102102
return x
103103

104104

105-
def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
106-
epsg=1., niter=10, niterback=100,
105+
def ProximalGradient(proxf, proxg, x0, epsg=1.,
106+
tau=None, beta=0.5, eta=1.,
107+
niter=10, niterback=100,
107108
acceleration=None,
108109
callback=None, show=False):
109110
r"""Proximal gradient (optionally accelerated)
@@ -127,17 +128,19 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
127128
Proximal operator of g function
128129
x0 : :obj:`numpy.ndarray`
129130
Initial vector
131+
epsg : :obj:`float` or :obj:`np.ndarray`, optional
132+
Scaling factor of g function
130133
tau : :obj:`float` or :obj:`numpy.ndarray`, optional
131134
Positive scalar weight, which should satisfy the following condition
132135
to guarantees convergence: :math:`\tau \in (0, 1/L]` where ``L`` is
133136
the Lipschitz constant of :math:`\nabla f`. When ``tau=None``,
134137
backtracking is used to adaptively estimate the best tau at each
135-
iteration. Finally note that :math:`\tau` can be chosen to be a vector
138+
iteration. Finally, note that :math:`\tau` can be chosen to be a vector
136139
when dealing with problems with multiple right-hand-sides
137140
beta : :obj:`float`, optional
138141
Backtracking parameter (must be between 0 and 1)
139-
epsg : :obj:`float` or :obj:`np.ndarray`, optional
140-
Scaling factor of g function
142+
eta : :obj:`float`, optional
143+
Relaxation parameter (must be between 0 and 1, 0 excluded).
141144
niter : :obj:`int`, optional
142145
Number of iterations of iterative scheme
143146
niterback : :obj:`int`, optional
@@ -161,9 +164,8 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
161164
162165
.. math::
163166
164-
165-
\mathbf{x}^{k+1} = \prox_{\tau^k \epsilon g}(\mathbf{y}^{k+1} -
166-
\tau^k \nabla f(\mathbf{y}^{k+1})) \\
167+
\mathbf{x}^{k+1} = \mathbf{y}^k + \eta (\prox_{\tau^k \epsilon g}(\mathbf{y}^k -
168+
\tau^k \nabla f(\mathbf{y}^k)) - \mathbf{y}^k) \\
167169
\mathbf{y}^{k+1} = \mathbf{x}^k + \omega^k
168170
(\mathbf{x}^k - \mathbf{x}^{k-1})
169171
@@ -187,7 +189,7 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
187189
Different accelerations are provided:
188190
189191
- ``acceleration=None``: :math:`\omega^k = 0`;
190-
- `acceleration=vandenberghe`` [1]_: :math:`\omega^k = k / (k + 3)` for `
192+
- ``acceleration=vandenberghe`` [1]_: :math:`\omega^k = k / (k + 3)` for `
191193
- ``acceleration=fista``: :math:`\omega^k = (t_{k-1}-1)/t_k` for where
192194
:math:`t_k = (1 + \sqrt{1+4t_{k-1}^{2}}) / 2` [2]_
193195
@@ -197,7 +199,7 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
197199
Imaging Sciences, vol. 2, pp. 183-202. 2009.
198200
199201
"""
200-
# check if epgs is a ve
202+
# check if epgs is a vector
201203
if np.asarray(epsg).size == 1.:
202204
epsg_print = str(epsg)
203205
else:
@@ -218,7 +220,7 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
218220
'niterback = %d\tacceleration = %s\n' % (type(proxf), type(proxg),
219221
'Adaptive' if tau is None else str(tau), beta,
220222
epsg_print, niter, niterback, acceleration))
221-
head = ' Itn x[0] f g J=f+eps*g'
223+
head = ' Itn x[0] f g J=f+eps*g tau'
222224
print(head)
223225

224226
backtracking = False
@@ -237,10 +239,15 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
237239

238240
# proximal step
239241
if not backtracking:
240-
x = proxg.prox(y - tau * proxf.grad(y), epsg * tau)
242+
if eta == 1.:
243+
x = proxg.prox(y - tau * proxf.grad(y), epsg * tau)
244+
else:
245+
x = x + eta * (proxg.prox(x - tau * proxf.grad(x), epsg * tau) - x)
241246
else:
242247
x, tau = _backtracking(y, tau, proxf, proxg, epsg,
243248
beta=beta, niterback=niterback)
249+
if eta != 1.:
250+
x = x + eta * (proxg.prox(x - tau * proxf.grad(x), epsg * tau) - x)
244251

245252
# update internal parameters for bilinear operator
246253
if isinstance(proxf, BilinearOperator):
@@ -264,10 +271,11 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
264271
if show:
265272
if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0:
266273
pf, pg = proxf(x), proxg(x)
267-
msg = '%6g %12.5e %10.3e %10.3e %10.3e' % \
274+
msg = '%6g %12.5e %10.3e %10.3e %10.3e %10.3e' % \
268275
(iiter + 1, np.real(to_numpy(x[0])) if x.ndim == 1 else np.real(to_numpy(x[0, 0])),
269276
pf, pg[0] if epsg_print == 'Multi' else pg,
270-
pf + np.sum(epsg * pg))
277+
pf + np.sum(epsg * pg),
278+
tau)
271279
print(msg)
272280
if show:
273281
print('\nTotal time (s) = %.2f' % (time.time() - tstart))
@@ -296,8 +304,9 @@ def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
296304
callback=callback, show=show)
297305

298306

299-
def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,
300-
epsg=1., niter=10,
307+
def GeneralizedProximalGradient(proxfs, proxgs, x0, tau,
308+
epsg=1., weights=None,
309+
eta=1., niter=10,
301310
acceleration=None,
302311
callback=None, show=False):
303312
r"""Generalized Proximal gradient
@@ -316,24 +325,27 @@ def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,
316325
317326
Parameters
318327
----------
319-
proxfs : :obj:`List of pyproximal.ProxOperator`
328+
proxfs : :obj:`list of pyproximal.ProxOperator`
320329
Proximal operators of the :math:`f_i` functions (must have ``grad`` implemented)
321-
proxgs : :obj:`List of pyproximal.ProxOperator`
330+
proxgs : :obj:`list of pyproximal.ProxOperator`
322331
Proximal operators of the :math:`g_j` functions
323332
x0 : :obj:`numpy.ndarray`
324333
Initial vector
325-
tau : :obj:`float` or :obj:`numpy.ndarray`, optional
334+
tau : :obj:`float`
326335
Positive scalar weight, which should satisfy the following condition
327336
to guarantees convergence: :math:`\tau \in (0, 1/L]` where ``L`` is
328-
the Lipschitz constant of :math:`\sum_{i=1}^n \nabla f_i`. When ``tau=None``,
329-
backtracking is used to adaptively estimate the best tau at each
330-
iteration.
337+
the Lipschitz constant of :math:`\sum_{i=1}^n \nabla f_i`.
331338
epsg : :obj:`float` or :obj:`np.ndarray`, optional
332339
Scaling factor(s) of ``g`` function(s)
340+
weights : :obj:`float`, optional
341+
Weighting factors of ``g`` functions. Must sum to 1.
342+
eta : :obj:`float`, optional
343+
Relaxation parameter (must be between 0 and 1, 0 excluded). Note that
344+
this will be only used when ``acceleration=None``.
333345
niter : :obj:`int`, optional
334346
Number of iterations of iterative scheme
335347
acceleration: :obj:`str`, optional
336-
Acceleration (``vandenberghe`` or ``fista``)
348+
Acceleration (``None``, ``vandenberghe`` or ``fista``)
337349
callback : :obj:`callable`, optional
338350
Function with signature (``callback(x)``) to call after each iteration
339351
where ``x`` is the current model vector
@@ -352,16 +364,27 @@ def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,
352364
353365
.. math::
354366
\text{for } j=1,\cdots,n, \\
355-
~~~~\mathbf z_j^{k+1} = \mathbf z_j^{k} + \epsilon_j
356-
\left[prox_{\frac{\tau^k}{\omega_j} g_j}\left(2 \mathbf{x}^{k} - \mathbf{z}_j^{k}
367+
~~~~\mathbf z_j^{k+1} = \mathbf z_j^{k} + \eta
368+
\left[prox_{\frac{\tau^k \epsilon_j}{w_j} g_j}\left(2 \mathbf{x}^{k} - \mathbf{z}_j^{k}
357369
- \tau^k \sum_{i=1}^n \nabla f_i(\mathbf{x}^{k})\right) - \mathbf{x}^{k} \right] \\
358-
\mathbf{x}^{k+1} = \sum_{j=1}^n \omega_j f_j \\
359-
360-
where :math:`\sum_{j=1}^n \omega_j=1`. In the current implementation :math:`\omega_j=1/n`.
370+
\mathbf{x}^{k+1} = \sum_{j=1}^n w_j \mathbf z_j^{k+1} \\
371+
372+
where :math:`\sum_{j=1}^n w_j=1`. In the current implementation, :math:`w_j=1/n` when
373+
not provided.
374+
361375
"""
376+
# check if weights sum to 1
377+
if weights is None:
378+
weights = np.ones(len(proxgs)) / len(proxgs)
379+
if len(weights) != len(proxgs) or np.sum(weights) != 1.:
380+
raise ValueError(f'omega={weights} must be an array of size {len(proxgs)} '
381+
f'summing to 1')
382+
print(weights)
383+
362384
# check if epgs is a vector
363385
if np.asarray(epsg).size == 1.:
364386
epsg_print = str(epsg)
387+
epsg = epsg * np.ones(len(proxgs))
365388
else:
366389
epsg_print = 'Multi'
367390

@@ -403,9 +426,9 @@ def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,
403426
x = np.zeros_like(x)
404427
for i, proxg in enumerate(proxgs):
405428
ztmp = 2 * y - zs[i] - tau * grad
406-
ztmp = proxg.prox(ztmp, tau * len(proxgs))
407-
zs[i] += epsg * (ztmp - y)
408-
x += zs[i] / len(proxgs)
429+
ztmp = proxg.prox(ztmp, tau * epsg[i] / weights[i])
430+
zs[i] += eta * (ztmp - y)
431+
x += weights[i] * zs[i]
409432

410433
# update y
411434
if acceleration == 'vandenberghe':
@@ -416,7 +439,6 @@ def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,
416439
omega = ((told - 1.) / t)
417440
else:
418441
omega = 0
419-
420442
y = x + omega * (x - xold)
421443

422444
# run callback

pytests/test_solver.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import pytest
2+
3+
import numpy as np
4+
from numpy.testing import assert_array_almost_equal
5+
from pylops.basicoperators import MatrixMult
6+
from pyproximal.proximal import L1, L2
7+
from pyproximal.optimization.primal import ProximalGradient, GeneralizedProximalGradient
8+
9+
par1 = {'n': 8, 'm': 10, 'dtype': 'float32'} # float64
10+
par2 = {'n': 8, 'm': 10, 'dtype': 'float64'} # float32
11+
12+
13+
@pytest.mark.parametrize("par", [(par1), (par2)])
14+
def test_GPG_weights(par):
15+
"""Check GPG raises error if weight is not summing to 1
16+
"""
17+
with pytest.raises(ValueError):
18+
np.random.seed(0)
19+
n, m = par['n'], par['m']
20+
21+
# Random mixing matrix
22+
R = np.random.normal(0., 1., (n, m))
23+
Rop = MatrixMult(R)
24+
25+
# Model and data
26+
x = np.zeros(m)
27+
y = Rop @ x
28+
29+
# Operators
30+
l2 = L2(Op=Rop, b=y, niter=10, warm=True)
31+
l1 = L1(sigma=5e-1)
32+
_ = GeneralizedProximalGradient([l2, ], [l1, ],
33+
x0=np.zeros(m),
34+
tau=1.,
35+
weights=[1., 1.])
36+
37+
38+
@pytest.mark.parametrize("par", [(par1), (par2)])
39+
def test_PG_GPG(par):
40+
"""Check equivalency of ProximalGradient and GeneralizedProximalGradient when using
41+
a single regularization term
42+
"""
43+
np.random.seed(0)
44+
n, m = par['n'], par['m']
45+
46+
# Define sparse model
47+
x = np.zeros(m)
48+
x[2], x[4] = 1, 0.5
49+
50+
# Random mixing matrix
51+
R = np.random.normal(0., 1., (n, m))
52+
Rop = MatrixMult(R)
53+
54+
y = Rop @ x
55+
56+
# Step size
57+
L = (Rop.H * Rop).eigs(1).real
58+
tau = 0.99 / L
59+
60+
# PG
61+
l2 = L2(Op=Rop, b=y, niter=10, warm=True)
62+
l1 = L1(sigma=5e-1)
63+
xpg = ProximalGradient(l2, l1, x0=np.zeros(m),
64+
tau=tau, niter=100,
65+
acceleration='fista')
66+
67+
# GPG
68+
l2 = L2(Op=Rop, b=y, niter=10, warm=True)
69+
l1 = L1(sigma=5e-1)
70+
xgpg = GeneralizedProximalGradient([l2, ], [l1, ], x0=np.zeros(m),
71+
tau=tau, niter=100,
72+
acceleration='fista')
73+
74+
assert_array_almost_equal(xpg, xgpg, decimal=2)

0 commit comments

Comments
 (0)