Merge pull request #112 from Quantmetry/add_test_varpem_nonregression

JulienRoussel77 · web-flow · commit 57e9f90cfe8c · 2023-10-30T14:38:05.000+01:00
Add test varpem nonregression
diff --git a/qolmat/benchmark/metrics.py b/qolmat/benchmark/metrics.py
@@ -863,7 +863,6 @@ def kl_divergence_gaussian_exact(
     norm_M = (M**2).sum().sum()
     norm_y = (y**2).sum()
     term_diag_L = 2 * np.sum(np.log(np.diagonal(L2) / np.diagonal(L1)))
-    print(norm_M, "-", n_variables, "+", norm_y, "+", term_diag_L)
     div_kl = 0.5 * (norm_M - n_variables + norm_y + term_diag_L)
     return div_kl
 
diff --git a/qolmat/imputations/em_sampler.py b/qolmat/imputations/em_sampler.py
@@ -269,7 +269,7 @@ def _sample_ou(
         X_init = X.copy()
         gamma = self.get_gamma()
         sqrt_gamma = np.real(spl.sqrtm(gamma))
-        for _ in range(self.n_iter_ou):
+        for i in range(self.n_iter_ou):
             noise = self.ampli * self.rng.normal(0, 1, size=(n_variables, n_samples))
             grad_X = self.gradient_X_loglik(X_copy)
             X_copy += self.dt * grad_X @ gamma + np.sqrt(2 * self.dt) * noise @ sqrt_gamma
@@ -489,8 +489,8 @@ def get_gamma(self) -> NDArray:
         NDArray
             Gamma matrix
         """
-        gamma = np.diag(np.diagonal(self.cov))
-        # gamma = self.cov
+        # gamma = np.diag(np.diagonal(self.cov))
+        gamma = self.cov
         # gamma = np.eye(len(self.cov))
         return gamma
 
@@ -571,9 +571,9 @@ def _maximize_likelihood(self, X: NDArray, mask_na: NDArray) -> NDArray:
         NDArray
             DataFrame with imputed values.
         """
-        X_center = X - self.means[:, None]
+        X_center = X - self.means
         X_imputed = _conjugate_gradient(self.cov_inv, X_center, mask_na)
-        X_imputed = self.means[:, None] + X_imputed
+        X_imputed = self.means + X_imputed
         return X_imputed
 
     def _check_convergence(self) -> bool:
diff --git a/tests/imputations/test_em_sampler.py b/tests/imputations/test_em_sampler.py
@@ -1,5 +1,4 @@
-from typing import List
-
+from typing import List, Literal
 import numpy as np
 import pytest
 from numpy.typing import NDArray
@@ -279,6 +278,31 @@ def test_mean_covariance_multinormalem():
     np.testing.assert_allclose(covariance_imputed, covariance, rtol=1e-1, atol=1e-1)
 
 
+def test_multinormal_em_minimize_llik():
+    X, X_missing, mean, covariance = generate_multinormal_predefined_mean_cov(d=2, n=1000)
+    imputer = em_sampler.MultiNormalEM(method="mle", random_state=11)
+    X_imputed = imputer.fit_transform(X_missing)
+    llikelihood_imputed = imputer.get_loglikelihood(X_imputed)
+    for _ in range(10):
+        Delta = imputer.rng.uniform(0, 1, size=X.shape)
+        X_perturbated = X_imputed + Delta
+        llikelihood_perturbated = imputer.get_loglikelihood(X_perturbated)
+        assert llikelihood_perturbated < llikelihood_imputed
+    X_perturbated = X
+    X_perturbated[np.isnan(X)] = 0
+    llikelihood_perturbated = imputer.get_loglikelihood(X_perturbated)
+    assert llikelihood_perturbated < llikelihood_imputed
+
+
+@pytest.mark.parametrize("method", ["sample", "mle"])
+def test_multinormal_em_fit_transform(method: Literal["mle", "sample"]):
+    imputer = em_sampler.MultiNormalEM(method=method, random_state=11)
+    X = np.array([[1, 1, 1, 1], [np.nan, np.nan, 3, 2], [1, 2, 2, 1], [2, 2, 2, 2]])
+    result = imputer.fit_transform(X)
+    assert result.shape == X.shape
+    np.testing.assert_allclose(result[~np.isnan(X)], X[~np.isnan(X)])
+
+
 @pytest.mark.parametrize(
     "p",
     [1],
@@ -319,7 +343,6 @@ def test_varpem_fit_transform():
         ]
     )
     np.testing.assert_allclose(result, expected, atol=1e-12)
-    # assert False
 
 
 @pytest.mark.parametrize(
diff --git a/tests/imputations/test_imputers.py b/tests/imputations/test_imputers.py
@@ -174,7 +174,6 @@ def test_ImputerShuffle_fit_transform1(df: pd.DataFrame) -> None:
 def test_ImputerShuffle_fit_transform2(df: pd.DataFrame) -> None:
     imputer = imputers.ImputerShuffle(random_state=42)
     result = imputer.fit_transform(df)
-    print(result)
     expected = pd.DataFrame({"col1": [0, 3, 2, 3, 0], "col2": [-1, 1.5, 0.5, 1.5, 1.5]})
     np.testing.assert_allclose(result, expected)
 
@@ -290,20 +289,6 @@ def test_ImputerSoftImpute_fit_transform(df: pd.DataFrame) -> None:
     np.testing.assert_allclose(result, expected, atol=1e-2)
 
 
-@pytest.mark.parametrize("df", [df_timeseries])
-def test_ImputerEM_fit_transform(df: pd.DataFrame) -> None:
-    imputer = imputers.ImputerEM(method="sample", dt=1e-3, random_state=42)
-    result = imputer.fit_transform(df)
-    expected = pd.DataFrame(
-        {
-            "col1": [i for i in range(20)],
-            "col2": [0, 0.638, 2, 2.714, 2] + [i for i in range(5, 20)],
-        }
-    )
-    print(result)
-    np.testing.assert_allclose(result, expected, atol=1e-2)
-
-
 index_grouped = pd.MultiIndex.from_product([["a", "b"], range(4)], names=["group", "date"])
 dict_values = {"col1": [0, np.nan, 0, np.nan, 1, 1, 1, 1], "col2": [1, 1, 1, 1, 2, 2, 2, 2]}
 df_grouped = pd.DataFrame(dict_values, index=index_grouped)
diff --git a/tests/imputations/test_imputers_pytorch.py b/tests/imputations/test_imputers_pytorch.py
@@ -54,7 +54,6 @@ def test_ImputerRegressorPyTorch_fit_transform(df: pd.DataFrame) -> None:
             "col5": [93, 75, 2.132, 12, 2.345],
         }
     )
-    print(result["col5"])
     np.testing.assert_allclose(result, expected, atol=1e-3)
 
 
diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py
@@ -186,11 +186,9 @@ def test_utils_data_get_data(name_data: str, df: pd.DataFrame, mocker: MockerFix
         assert df_result.columns.tolist() == expected_columns
     elif name_data == "Monach_weather":
         assert mock_download.call_count == 1
-        print(df_result)
         pd.testing.assert_frame_equal(df_result, df_monach_weather_preprocess)
     elif name_data == "Monach_electricity_australia":
         assert mock_download.call_count == 1
-        print(df_result)
         pd.testing.assert_frame_equal(df_result, df_monach_elec_preprocess)
     else:
         assert False

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,6 @@ def test_ImputerRegressorPyTorch_fit_transform(df: pd.DataFrame) -> None:`
`54`	`54`	`"col5": [93, 75, 2.132, 12, 2.345],`
`55`	`55`	`}`
`56`	`56`	`)`
`57`		`- print(result["col5"])`
`58`	`57`	`np.testing.assert_allclose(result, expected, atol=1e-3)`
`59`	`58`
`60`	`59`