gamma changed to X covariance, and tests added

Julien Roussel · Julien Roussel · commit 94064d23d47f · 2023-10-30T11:54:57.000+01:00
diff --git a/qolmat/imputations/em_sampler.py b/qolmat/imputations/em_sampler.py
@@ -268,14 +268,26 @@ def _sample_ou(
             self.reset_learned_parameters()
         X_init = X.copy()
         gamma = self.get_gamma()
+        print("gamma:")
+        print(gamma)
         sqrt_gamma = np.real(spl.sqrtm(gamma))
-        for _ in range(self.n_iter_ou):
+        for i in range(self.n_iter_ou):
+            print(f"Iteration #{i}")
             noise = self.ampli * self.rng.normal(0, 1, size=(n_variables, n_samples))
             grad_X = self.gradient_X_loglik(X_copy)
+            print("grad")
+            print(self.dt * grad_X @ gamma)
+            print("noise")
+            print(np.sqrt(2 * self.dt) * noise @ sqrt_gamma)
             X_copy += self.dt * grad_X @ gamma + np.sqrt(2 * self.dt) * noise @ sqrt_gamma
             X_copy[~mask_na] = X_init[~mask_na]
             if estimate_params:
                 self.update_parameters(X_copy)
+        print("X_copy")
+        print(X_copy)
+        if np.sum(np.abs(X_copy)) > 1e9:
+            raise AssertionError
+        print()
 
         return X_copy
 
@@ -489,8 +501,10 @@ def get_gamma(self) -> NDArray:
         NDArray
             Gamma matrix
         """
-        gamma = np.diag(np.diagonal(self.cov))
-        # gamma = self.cov
+        print("get_gamma")
+        print(self.cov)
+        # gamma = np.diag(np.diagonal(self.cov))
+        gamma = self.cov
         # gamma = np.eye(len(self.cov))
         return gamma
 
@@ -571,9 +585,9 @@ def _maximize_likelihood(self, X: NDArray, mask_na: NDArray) -> NDArray:
         NDArray
             DataFrame with imputed values.
         """
-        X_center = X - self.means[:, None]
+        X_center = X - self.means
         X_imputed = _conjugate_gradient(self.cov_inv, X_center, mask_na)
-        X_imputed = self.means[:, None] + X_imputed
+        X_imputed = self.means + X_imputed
         return X_imputed
 
     def _check_convergence(self) -> bool:
diff --git a/tests/imputations/test_em_sampler.py b/tests/imputations/test_em_sampler.py
@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Literal
 
 import numpy as np
 import pytest
@@ -279,6 +279,31 @@ def test_mean_covariance_multinormalem():
     np.testing.assert_allclose(covariance_imputed, covariance, rtol=1e-1, atol=1e-1)
 
 
+def test_multinormal_em_minimize_llik():
+    X, X_missing, mean, covariance = generate_multinormal_predefined_mean_cov(d=2, n=1000)
+    imputer = em_sampler.MultiNormalEM(method="mle", random_state=11)
+    X_imputed = imputer.fit_transform(X_missing)
+    llikelihood_imputed = imputer.get_loglikelihood(X_imputed)
+    for _ in range(10):
+        Delta = imputer.rng.uniform(0, 1, size=X.shape)
+        X_perturbated = X_imputed + Delta
+        llikelihood_perturbated = imputer.get_loglikelihood(X_perturbated)
+        assert llikelihood_perturbated < llikelihood_imputed
+    X_perturbated = X
+    X_perturbated[np.isnan(X)] = 0
+    llikelihood_perturbated = imputer.get_loglikelihood(X_perturbated)
+    assert llikelihood_perturbated < llikelihood_imputed
+
+
+@pytest.mark.parametrize("method", ["sample", "mle"])
+def test_multinormal_em_fit_transform(method: Literal["mle", "sample"]):
+    imputer = em_sampler.MultiNormalEM(method=method, random_state=11)
+    X = np.array([[1, 1, 1, 1], [np.nan, np.nan, 3, 2], [1, 2, 2, 1], [2, 2, 2, 2]])
+    result = imputer.fit_transform(X)
+    assert result.shape == X.shape
+    np.testing.assert_allclose(result[~np.isnan(X)], X[~np.isnan(X)])
+
+
 @pytest.mark.parametrize(
     "p",
     [1],
@@ -319,7 +344,6 @@ def test_varpem_fit_transform():
         ]
     )
     np.testing.assert_allclose(result, expected, atol=1e-12)
-    # assert False
 
 
 @pytest.mark.parametrize(
diff --git a/tests/imputations/test_imputers.py b/tests/imputations/test_imputers.py
@@ -290,20 +290,6 @@ def test_ImputerSoftImpute_fit_transform(df: pd.DataFrame) -> None:
     np.testing.assert_allclose(result, expected, atol=1e-2)
 
 
-@pytest.mark.parametrize("df", [df_timeseries])
-def test_ImputerEM_fit_transform(df: pd.DataFrame) -> None:
-    imputer = imputers.ImputerEM(method="sample", dt=1e-3, random_state=42)
-    result = imputer.fit_transform(df)
-    expected = pd.DataFrame(
-        {
-            "col1": [i for i in range(20)],
-            "col2": [0, 0.638, 2, 2.714, 2] + [i for i in range(5, 20)],
-        }
-    )
-    print(result)
-    np.testing.assert_allclose(result, expected, atol=1e-2)
-
-
 index_grouped = pd.MultiIndex.from_product([["a", "b"], range(4)], names=["group", "date"])
 dict_values = {"col1": [0, np.nan, 0, np.nan, 1, 1, 1, 1], "col2": [1, 1, 1, 1, 2, 2, 2, 2]}
 df_grouped = pd.DataFrame(dict_values, index=index_grouped)