Skip to content

Commit 47bb770

Browse files
author
Hông-Lan Botterman
committed
ENH: test imputers and fixture
1 parent 746a570 commit 47bb770

File tree

5 files changed

+127
-123
lines changed

5 files changed

+127
-123
lines changed

qolmat/imputations/rpca/rpca_noisy.py

Lines changed: 49 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -12,54 +12,6 @@
1212
from qolmat.utils.exceptions import CostFunctionRPCANotMinimized
1313

1414

15-
def _check_cost_function_minimized(
16-
observations: NDArray,
17-
low_rank: NDArray,
18-
anomalies: NDArray,
19-
tau: float,
20-
lam: float,
21-
norm: str,
22-
):
23-
"""Check that the functional minimized by the RPCA
24-
is smaller at the end than at the beginning
25-
26-
Parameters
27-
----------
28-
observations : NDArray
29-
observations matrix with first linear interpolation
30-
low_rank : NDArray
31-
low_rank matrix resulting from RPCA
32-
anomalies : NDArray
33-
sparse matrix resulting from RPCA
34-
tau : float
35-
parameter penalizing the nuclear norm of the low rank part
36-
lam : float
37-
parameter penalizing the L1-norm of the anomaly/sparse part
38-
norm : str
39-
norm of the temporal penalisation. Has to be `L1` or `L2`
40-
41-
Raises
42-
------
43-
CostFunctionRPCANotMinimized
44-
The RPCA does not minimized the cost function:
45-
the starting cost is at least equal to the final one.
46-
"""
47-
value_start = tau * np.linalg.norm(observations, "nuc")
48-
if norm == "L1":
49-
anomalies_norm = np.sum(np.abs(anomalies))
50-
function_str = "||D-M-A||_2 + tau ||D||_* + lam ||A||_1"
51-
elif norm == "L2":
52-
anomalies_norm = np.sum(anomalies**2)
53-
function_str = "||D-M-A||_2 + tau ||D||_* + lam ||A||_2"
54-
value_end = (
55-
np.sum((observations - low_rank - anomalies) ** 2)
56-
+ tau * np.linalg.norm(low_rank, "nuc")
57-
+ lam * anomalies_norm
58-
)
59-
if value_start + 1e-4 < value_end:
60-
raise CostFunctionRPCANotMinimized(function_str, value_start, value_end)
61-
62-
6315
class RPCANoisy(RPCA):
6416
"""
6517
This class implements a noisy version of the so-called 'improved RPCA'
@@ -423,12 +375,54 @@ def decompose_rpca(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
423375
elif self.norm == "L2":
424376
M, A, U, V = self.decompose_rpca_L2(D, Omega, lam, tau, rank)
425377

426-
print("D")
427-
print(D)
428-
print("M")
429-
print(M)
430-
print("A")
431-
print(A)
432-
_check_cost_function_minimized(D, M, A, tau, lam, self.norm)
378+
self._check_cost_function_minimized(D, M, A, tau, lam, self.norm)
433379

434380
return M, A
381+
382+
@staticmethod
383+
def _check_cost_function_minimized(
384+
observations: NDArray,
385+
low_rank: NDArray,
386+
anomalies: NDArray,
387+
tau: float,
388+
lam: float,
389+
norm: str,
390+
):
391+
"""Check that the functional minimized by the RPCA
392+
is smaller at the end than at the beginning
393+
394+
Parameters
395+
----------
396+
observations : NDArray
397+
observations matrix with first linear interpolation
398+
low_rank : NDArray
399+
low_rank matrix resulting from RPCA
400+
anomalies : NDArray
401+
sparse matrix resulting from RPCA
402+
tau : float
403+
parameter penalizing the nuclear norm of the low rank part
404+
lam : float
405+
parameter penalizing the L1-norm of the anomaly/sparse part
406+
norm : str
407+
norm of the temporal penalisation. Has to be `L1` or `L2`
408+
409+
Raises
410+
------
411+
CostFunctionRPCANotMinimized
412+
The RPCA does not minimized the cost function:
413+
the starting cost is at least equal to the final one.
414+
"""
415+
value_start = tau * np.linalg.norm(observations, "nuc")
416+
if norm == "L1":
417+
anomalies_norm = np.sum(np.abs(anomalies))
418+
function_str = "||D-M-A||_2 + tau ||D||_* + lam ||A||_1"
419+
elif norm == "L2":
420+
anomalies_norm = np.sum(anomalies**2)
421+
function_str = "||D-M-A||_2 + tau ||D||_* + lam ||A||_2"
422+
value_end = (
423+
np.sum((observations - low_rank - anomalies) ** 2)
424+
+ tau * np.linalg.norm(low_rank, "nuc")
425+
+ lam * anomalies_norm
426+
)
427+
if value_start + 1e-4 <= value_end:
428+
raise CostFunctionRPCANotMinimized(function_str, value_start, value_end)

qolmat/imputations/rpca/rpca_pcp.py

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,39 +11,6 @@
1111
from qolmat.utils.exceptions import CostFunctionRPCANotMinimized
1212

1313

14-
def _check_cost_function_minimized(
15-
observations: NDArray,
16-
low_rank: NDArray,
17-
anomalies: NDArray,
18-
lam: float,
19-
):
20-
"""Check that the functional minimized by the RPCA
21-
is smaller at the end than at the beginning
22-
23-
Parameters
24-
----------
25-
observations : NDArray
26-
observations matrix with first linear interpolation
27-
low_rank : NDArray
28-
low_rank matrix resulting from RPCA
29-
anomalies : NDArray
30-
sparse matrix resulting from RPCA
31-
lam : float
32-
parameter penalizing the L1-norm of the anomaly/sparse part
33-
34-
Raises
35-
------
36-
CostFunctionRPCANotMinimized
37-
The RPCA does not minimized the cost function:
38-
the starting cost is at least equal to the final one.
39-
"""
40-
value_start = np.linalg.norm(observations, "nuc")
41-
value_end = np.linalg.norm(low_rank, "nuc") + lam * np.sum(np.abs(anomalies))
42-
if value_start + 1e-9 < value_end:
43-
function_str = "||D||_* + lam ||A||_1"
44-
raise CostFunctionRPCANotMinimized(function_str, value_start, value_end)
45-
46-
4714
class RPCAPCP(RPCA):
4815
"""
4916
This class implements the basic RPCA decomposition using Alternating Lagrangian Multipliers.
@@ -112,6 +79,39 @@ def decompose_rpca(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
11279
if error < self.tol:
11380
break
11481

115-
_check_cost_function_minimized(D, M, A, lam)
82+
self._check_cost_function_minimized(D, M, A, lam)
11683

11784
return M, A
85+
86+
@staticmethod
87+
def _check_cost_function_minimized(
88+
observations: NDArray,
89+
low_rank: NDArray,
90+
anomalies: NDArray,
91+
lam: float,
92+
):
93+
"""Check that the functional minimized by the RPCA
94+
is smaller at the end than at the beginning
95+
96+
Parameters
97+
----------
98+
observations : NDArray
99+
observations matrix with first linear interpolation
100+
low_rank : NDArray
101+
low_rank matrix resulting from RPCA
102+
anomalies : NDArray
103+
sparse matrix resulting from RPCA
104+
lam : float
105+
parameter penalizing the L1-norm of the anomaly/sparse part
106+
107+
Raises
108+
------
109+
CostFunctionRPCANotMinimized
110+
The RPCA does not minimized the cost function:
111+
the starting cost is at least equal to the final one.
112+
"""
113+
value_start = np.linalg.norm(observations, "nuc")
114+
value_end = np.linalg.norm(low_rank, "nuc") + lam * np.sum(np.abs(anomalies))
115+
if value_start + 1e-4 <= value_end:
116+
function_str = "||D||_* + lam ||A||_1"
117+
raise CostFunctionRPCANotMinimized(function_str, value_start, value_end)

tests/imputations/rpca/test_rpca_noisy.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pytest
33
from numpy.typing import NDArray
44

5-
from qolmat.imputations.rpca.rpca_noisy import RPCANoisy, _check_cost_function_minimized
5+
from qolmat.imputations.rpca.rpca_noisy import RPCANoisy
66
from qolmat.utils import utils
77
from qolmat.utils.data import generate_artificial_ts
88
from qolmat.utils.exceptions import CostFunctionRPCANotMinimized
@@ -12,18 +12,22 @@
1212
X_interpolated = np.array([[1, 2], [3, 3]], dtype=float)
1313
omega = np.array([[True, True], [True, False]])
1414
max_iterations = 100
15-
# synthetic temporal data
16-
n_samples = 1000
17-
periods = [100, 20]
18-
amp_anomalies = 0.5
19-
ratio_anomalies = 0.05
20-
amp_noise = 0.1
21-
X_true, A_true, E_true = generate_artificial_ts(
22-
n_samples, periods, amp_anomalies, ratio_anomalies, amp_noise
23-
)
24-
signal = X_true + A_true + E_true
25-
mask = np.random.choice(len(signal), round(len(signal) / 20))
26-
signal[mask] = np.nan
15+
16+
17+
@pytest.fixture
18+
def synthetic_temporal_data():
19+
n_samples = 1000
20+
periods = [100, 20]
21+
amp_anomalies = 0.5
22+
ratio_anomalies = 0.05
23+
amp_noise = 0.1
24+
X_true, A_true, E_true = generate_artificial_ts(
25+
n_samples, periods, amp_anomalies, ratio_anomalies, amp_noise
26+
)
27+
signal = X_true + A_true + E_true
28+
mask = np.random.choice(len(signal), round(len(signal) / 20))
29+
signal[mask] = np.nan
30+
return signal
2731

2832

2933
@pytest.mark.parametrize(
@@ -42,8 +46,9 @@
4246
def test_check_cost_function_minimized_raise_expection(
4347
obs: NDArray, lr: NDArray, ano: NDArray, lam: float, tau: float, norm: str
4448
):
49+
rpca = RPCANoisy()
4550
with pytest.raises(CostFunctionRPCANotMinimized):
46-
_check_cost_function_minimized(obs, lr, ano, lam, tau, norm)
51+
rpca._check_cost_function_minimized(obs, lr, ano, lam, tau, norm)
4752

4853

4954
@pytest.mark.parametrize("X", [X_complete])
@@ -85,8 +90,8 @@ def test_rpca_pcp_zero_lambda(X: NDArray, tau: float, X_interpolated: NDArray):
8590
np.testing.assert_allclose(A_result, X_interpolated, atol=1e-4)
8691

8792

88-
@pytest.mark.parametrize("signal", [signal])
89-
def test_rpca_temporal_signal(signal: NDArray):
93+
def test_rpca_temporal_signal(synthetic_temporal_data):
94+
signal = synthetic_temporal_data
9095
period = 100
9196
tau = 1
9297
lam = 0.1

tests/imputations/rpca/test_rpca_pcp.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pytest
33
from numpy.typing import NDArray
44

5-
from qolmat.imputations.rpca.rpca_pcp import RPCAPCP, _check_cost_function_minimized
5+
from qolmat.imputations.rpca.rpca_pcp import RPCAPCP
66
from qolmat.utils import utils
77
from qolmat.utils.data import generate_artificial_ts
88
from qolmat.utils.exceptions import CostFunctionRPCANotMinimized
@@ -12,18 +12,22 @@
1212
max_iterations = 50
1313
small_mu = 1e-5
1414
large_mu = 1e5
15-
# synthetic temporal data
16-
n_samples = 1000
17-
periods = [100, 20]
18-
amp_anomalies = 0.5
19-
ratio_anomalies = 0.05
20-
amp_noise = 0.1
21-
X_true, A_true, E_true = generate_artificial_ts(
22-
n_samples, periods, amp_anomalies, ratio_anomalies, amp_noise
23-
)
24-
signal = X_true + A_true + E_true
25-
mask = np.random.choice(len(signal), round(len(signal) / 20))
26-
signal[mask] = np.nan
15+
16+
17+
@pytest.fixture
18+
def synthetic_temporal_data():
19+
n_samples = 1000
20+
periods = [100, 20]
21+
amp_anomalies = 0.5
22+
ratio_anomalies = 0.05
23+
amp_noise = 0.1
24+
X_true, A_true, E_true = generate_artificial_ts(
25+
n_samples, periods, amp_anomalies, ratio_anomalies, amp_noise
26+
)
27+
signal = X_true + A_true + E_true
28+
mask = np.random.choice(len(signal), round(len(signal) / 20))
29+
signal[mask] = np.nan
30+
return signal
2731

2832

2933
@pytest.mark.parametrize(
@@ -41,13 +45,14 @@ def test_check_cost_function_minimized_raise_expection(
4145
obs: NDArray, lr: NDArray, ano: NDArray, lam: float
4246
):
4347
function_str = "||D||_* + lam ||A||_1"
48+
rpca = RPCAPCP()
4449
with pytest.raises(
4550
CostFunctionRPCANotMinimized,
4651
match="PCA algorithm may provide bad results. "
4752
f"{function_str} is larger at the end "
4853
"of the algorithm than at the start.",
4954
):
50-
_check_cost_function_minimized(obs, lr, ano, lam)
55+
rpca._check_cost_function_minimized(obs, lr, ano, lam)
5156

5257

5358
@pytest.mark.parametrize("X", [X_complete])
@@ -85,8 +90,8 @@ def test_rpca_rpca_pcp_large_lambda_small_mu(X: NDArray, mu: float):
8590
np.testing.assert_allclose(A_result, np.full_like(X, 0), atol=1e-4)
8691

8792

88-
@pytest.mark.parametrize("signal", [signal])
89-
def test_rpca_temporal_signal(signal: NDArray):
93+
def test_rpca_temporal_signal(synthetic_temporal_data):
94+
signal = synthetic_temporal_data
9095
period = 100
9196
lam = 0.1
9297
rpca = RPCAPCP(period=period, lam=lam, mu=0.01)

tests/imputations/test_imputers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pandas as pd
55
import pytest
66
from sklearn.ensemble import ExtraTreesRegressor
7-
from sklearn.utils.estimator_checks import parametrize_with_checks
7+
from sklearn.utils.estimator_checks import check_estimator, parametrize_with_checks
88
from qolmat.benchmark.hyperparameters import HyperValue
99

1010
from qolmat.imputations import imputers
@@ -303,7 +303,7 @@ def test_ImputerEM_fit_transform(df: pd.DataFrame) -> None:
303303
imputers.KNNImputer(),
304304
imputers.ImputerMICE(),
305305
imputers.ImputerRegressor(),
306-
imputers.ImputerRPCA(),
306+
imputers.ImputerRPCA(tau=0, lam=0),
307307
imputers.ImputerEM(),
308308
]
309309
)

0 commit comments

Comments
 (0)