Skip to content

Commit 53116b9

Browse files
Merge pull request #126 from scikit-learn-contrib/em_sampler_mle_debug
Em sampler mle debug
2 parents 8708ed7 + 07d98a9 commit 53116b9

File tree

24 files changed

+1010
-486
lines changed

24 files changed

+1010
-486
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.1.1
2+
current_version = 0.1.2
33
commit = True
44
tag = True
55

HISTORY.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22
History
33
=======
44

5+
0.1.3 (2024-03-07)
6+
------------------
7+
8+
* RPCA algorithms now start with a normalizing scaler
9+
* The EM algorithms now include a gradient projection step to be more robust to colinearity
10+
* The EM algorithm based on the Gaussian model is now initialized using a robust estimation of the covariance matrix
11+
* A bug in the EM algorithm has been patched: the normalizing matrix gamma was creating a sampling biais
12+
* Speed up of the EM algorithm likelihood maximization, using the conjugate gradient method
13+
* The ImputeRegressor class now handles the nans by `row` by default
14+
* The metric `frechet` was not correctly called and has been patched
15+
* The EM algorithm with VAR(p) now fills initial holes in order to avoid exponential explosions
16+
517
0.1.2 (2024-02-28)
618
------------------
719

docs/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
author = "Quantmetry"
2828

2929
# The full version, including alpha/beta/rc tags
30-
version = "0.1.1"
30+
version = "0.1.2"
3131
release = version
3232

3333
# -- General configuration ---------------------------------------------------

docs/imputers.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ See the :class:`~qolmat.imputations.imputers.ImputerRpcaPcp` class for implement
4141
The class :class:`RPCANoisy` implements an recommanded improved version, which relies on a decomposition :math:`\mathbf{D} = \mathbf{M} + \mathbf{A} + \mathbf{E}`. The additionnal term encodes a Gaussian noise and makes the numerical convergence more reliable. This class also implements a time-consistency penalization for time series, parametrized by the :math:`\eta_k`and :math:`H_k`. By defining :math:`\Vert \mathbf{MH_k} \Vert_p` is either :math:`\Vert \mathbf{MH_k} \Vert_1` or :math:`\Vert \mathbf{MH_k} \Vert_F^2`, the optimisation problem is the following
4242

4343
.. math::
44-
\text{min}_{\mathbf{M, A} \in \mathbb{R}^{m \times n}} \quad \Vert P_{\Omega} (\mathbf{D}-\mathbf{M}-\mathbf{A}) \Vert_F^2 + \tau \Vert \mathbf{M} \Vert_* + \lambda \Vert \mathbf{A} \Vert_1 + \sum_{k=1}^K \eta_k \Vert \mathbf{M H_k} \Vert_p
44+
\text{min}_{\mathbf{M, A} \in \mathbb{R}^{m \times n}} \quad \frac 1 2 \Vert P_{\Omega} (\mathbf{D}-\mathbf{M}-\mathbf{A}) \Vert_F^2 + \tau \Vert \mathbf{M} \Vert_* + \lambda \Vert \mathbf{A} \Vert_1 + \sum_{k=1}^K \eta_k \Vert \mathbf{M H_k} \Vert_p
4545
4646
with :math:`\mathbf{E} = \mathbf{D} - \mathbf{M} - \mathbf{A}`.
4747
See the :class:`~qolmat.imputations.imputers.ImputerRpcaNoisy` class for implementation details.

examples/RPCA.md

Lines changed: 163 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ jupyter:
88
format_version: '1.3'
99
jupytext_version: 1.14.4
1010
kernelspec:
11-
display_name: Python 3 (ipykernel)
11+
display_name: env_qolmat_dev
1212
language: python
13-
name: python3
13+
name: env_qolmat_dev
1414
---
1515

16-
```python
16+
```python tags=[]
1717
%reload_ext autoreload
1818
%autoreload 2
1919

@@ -26,17 +26,18 @@ import sys
2626

2727
from math import pi
2828

29-
from qolmat.utils import plot, data
30-
from qolmat.imputations.rpca.rpca_pcp import RPCAPCP
31-
from qolmat.imputations.rpca.rpca_noisy import RPCANoisy
29+
from qolmat.utils import utils, plot, data
30+
from qolmat.imputations.rpca.rpca_pcp import RpcaPcp
31+
from qolmat.imputations.rpca.rpca_noisy import RpcaNoisy
32+
from qolmat.imputations.softimpute import SoftImpute
3233
from qolmat.imputations.rpca import rpca_utils
3334
from qolmat.utils.data import generate_artificial_ts
3435
```
3536

3637
**Generate synthetic data**
3738

38-
```python
39-
n_samples = 1000
39+
```python tags=[]
40+
n_samples = 10000
4041
periods = [100, 20]
4142
amp_anomalies = 0.5
4243
ratio_anomalies = 0.05
@@ -47,13 +48,15 @@ X_true, A_true, E_true = generate_artificial_ts(n_samples, periods, amp_anomalie
4748
signal = X_true + A_true + E_true
4849

4950
# Adding missing data
50-
#signal[5:20] = np.nan
51-
mask = np.random.choice(len(signal), round(len(signal) / 20))
52-
signal[mask] = np.nan
51+
signal[120:180] = np.nan
52+
signal[:20] = np.nan
53+
# signal[80:220] = np.nan
54+
# mask = np.random.choice(len(signal), round(len(signal) / 20))
55+
# signal[mask] = np.nan
5356

5457
```
5558

56-
```python
59+
```python tags=[]
5760
fig = plt.figure(figsize=(15, 8))
5861
ax = fig.add_subplot(4, 1, 1)
5962
ax.title.set_text("Low-rank signal")
@@ -74,40 +77,172 @@ plt.plot(signal)
7477
plt.show()
7578
```
7679

80+
<!-- #region tags=[] -->
81+
# Fit RPCA Noisy
82+
<!-- #endregion -->
83+
84+
```python tags=[]
85+
rpca_noisy = RpcaNoisy(tau=1, lam=.4, rank=1, norm="L2")
86+
```
87+
88+
```python tags=[]
89+
period = 100
90+
D = utils.prepare_data(signal, period)
91+
Omega = ~np.isnan(D)
92+
D = utils.linear_interpolation(D)
93+
```
94+
95+
```python tags=[]
96+
M, A, L, Q = rpca_noisy.decompose_with_basis(D, Omega)
97+
M2, A2 = rpca_noisy.decompose_on_basis(D, Omega, Q)
98+
```
99+
100+
```python tags=[]
101+
M_final = utils.get_shape_original(M, signal.shape)
102+
A_final = utils.get_shape_original(A, signal.shape)
103+
D_final = utils.get_shape_original(D, signal.shape)
104+
signal_imputed = M_final + A_final
105+
```
106+
107+
```python tags=[]
108+
fig = plt.figure(figsize=(12, 4))
109+
110+
plt.plot(signal_imputed, label="Imputed signal with anomalies")
111+
plt.plot(M_final, label="Imputed signal without anomalies")
112+
plt.plot(A_final, label="Anomalies")
113+
# plt.plot(D_final, label="D")
114+
plt.plot(signal, color="black", label="Original signal")
115+
plt.xlim(0, 400)
116+
plt.legend()
117+
plt.show()
118+
```
119+
77120
## PCP RPCA
78121

122+
```python tags=[]
123+
rpca_pcp = RpcaPcp(max_iterations=1000, lam=.1)
124+
```
125+
126+
```python tags=[]
127+
period = 100
128+
D = utils.prepare_data(signal, period)
129+
Omega = ~np.isnan(D)
130+
D = utils.linear_interpolation(D)
131+
```
132+
133+
```python tags=[]
134+
M, A = rpca_pcp.decompose(D, Omega)
135+
```
136+
137+
```python tags=[]
138+
M_final = utils.get_shape_original(M, signal.shape)
139+
A_final = utils.get_shape_original(A, signal.shape)
140+
D_final = utils.get_shape_original(D, signal.shape)
141+
# Y_final = utils.get_shape_original(Y, signal.shape)
142+
signal_imputed = M_final + A_final
143+
```
144+
145+
```python tags=[]
146+
fig = plt.figure(figsize=(12, 4))
147+
148+
plt.plot(signal_imputed, label="Imputed signal with anomalies")
149+
plt.plot(M_final, label="Imputed signal without anomalies")
150+
plt.plot(A_final, label="Anomalies")
151+
152+
plt.plot(signal, color="black", label="Original signal")
153+
plt.xlim(0, 400)
154+
# plt.gca().twinx()
155+
# plt.plot(Y_final, label="Y")
156+
plt.legend()
157+
plt.show()
158+
```
159+
160+
## Soft Impute
161+
162+
```python tags=[]
163+
imputer = SoftImpute(max_iterations=1000, tau=.1)
164+
```
165+
166+
```python tags=[]
167+
period = 100
168+
D = utils.prepare_data(signal, period)
169+
Omega = ~np.isnan(D)
170+
D = utils.linear_interpolation(D)
171+
```
172+
173+
```python tags=[]
174+
M, A = imputer.decompose(D, Omega)
175+
```
176+
177+
```python tags=[]
178+
M_final = utils.get_shape_original(M, signal.shape)
179+
A_final = utils.get_shape_original(A, signal.shape)
180+
D_final = utils.get_shape_original(D, signal.shape)
181+
# Y_final = utils.get_shape_original(Y, signal.shape)
182+
signal_imputed = M_final + A_final
183+
```
184+
185+
```python tags=[]
186+
fig = plt.figure(figsize=(12, 4))
187+
188+
plt.plot(signal_imputed, label="Imputed signal with anomalies")
189+
plt.plot(M_final, label="Imputed signal without anomalies")
190+
plt.plot(A_final, label="Anomalies")
191+
192+
plt.plot(signal, color="black", label="Original signal")
193+
plt.xlim(0, 400)
194+
plt.legend()
195+
plt.show()
196+
```
197+
198+
## Temporal RPCA
199+
79200
```python
80201
%%time
81-
rpca_pcp = RPCAPCP(period=100, max_iterations=100, mu=.5, lam=0.1)
82-
X, A = rpca_pcp.decompose_rpca_signal(signal)
83-
imputed = signal - A
202+
# rpca_noisy = RPCANoisy(period=10, tau=1, lam=0.4, rank=2, list_periods=[10], list_etas=[0.01], norm="L2")
203+
rpca_noisy = RpcaNoisy(tau=1, lam=0.4, rank=2, norm="L2")
204+
M, A = rpca_noisy.decompose(D, Omega)
205+
# imputed = X
84206
```
85207

86-
```python
208+
```python tags=[]
87209
fig = plt.figure(figsize=(12, 4))
88-
plt.plot(X, color="black")
89-
plt.plot(imputed)
210+
211+
plt.plot(signal_imputed, label="Imputed signal with anomalies")
212+
plt.plot(M_final, label="Imputed signal without anomalies")
213+
plt.plot(A_final, label="Anomalies")
214+
215+
plt.plot(signal, color="black", label="Original signal")
216+
plt.xlim(0, 400)
217+
# plt.gca().twinx()
218+
# plt.plot(Y_final, label="Y")
219+
plt.legend()
220+
plt.show()
90221
```
91222

92-
## Temporal RPCA
223+
# EM VAR(p)
93224

94225
```python
95-
signal.shape
226+
from qolmat.imputations import em_sampler
96227
```
97228

98229
```python
99-
%%time
100-
# rpca_noisy = RPCANoisy(period=10, tau=1, lam=0.4, rank=2, list_periods=[10], list_etas=[0.01], norm="L2")
101-
rpca_noisy = RPCANoisy(period=10, tau=1, lam=0.4, rank=2, norm="L2")
102-
X, A = rpca_noisy.decompose_rpca_signal(signal)
103-
imputed =
230+
p = 1
231+
model = em_sampler.VARpEM(method="mle", max_iter_em=10, n_iter_ou=512, dt=1e-1, p=p)
232+
```
233+
234+
```python
235+
D = signal.reshape(-1, 1)
236+
M_final = model.fit_transform(D)
104237
```
105238

106239
```python
107240
fig = plt.figure(figsize=(12, 4))
108-
plt.plot(signal, color="black")
109-
plt.plot(X_true)
110-
plt.plot(X)
241+
plt.plot(signal_imputed, label="Imputed signal with anomalies")
242+
plt.plot(M_final, label="Imputed signal without anomalies")
243+
plt.xlim(0, 400)
244+
plt.legend()
245+
plt.show()
111246
```
112247

113248
```python

0 commit comments

Comments
 (0)