Merge remote-tracking branch 'origin/dev' into chp_add_rand_state_ddpm

baruch11 · baruch11 · commit 4156b879d924 · 2023-10-30T14:33:43.000+01:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -27,16 +27,14 @@ jobs:
         with:
           python-version: ${{matrix.python-version}}
           environment-file: environment.ci.yml
-          channels: default, conda-forge
       - name: Lint with flake8
         run: |
           conda install flake8
           flake8
       - name: Test with pytest
         run: |
           conda install pytest
-          pytest
-          echo you should uncomment pytest and delete this line
+          make coverage
       - name: typing with mypy
         run: |
           mypy qolmat
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -1,10 +1,11 @@
 version: 2
 
 build:
-  image: latest
+  os: "ubuntu-22.04"
+  tools:
+    python: "mambaforge-22.9"
 
 python:
-  version: 3.8
   install:
     - method: pip
       path: .
diff --git a/Makefile b/Makefile
@@ -0,0 +1,13 @@
+coverage:
+	pytest --cov-branch --cov=qolmat --cov-report=xml
+
+doctest:
+	pytest --doctest-modules --pyargs qolmat
+
+doc:
+	make html -C docs
+
+clean:
+	rm -rf .mypy_cache .pytest_cache .coverage*
+	rm -rf **__pycache__
+	make clean -C docs
diff --git a/environment.ci.yml b/environment.ci.yml
@@ -1,18 +1,18 @@
 name: env_qolmat_ci
 channels:
-    - conda-forge
     - defaults
+    - conda-forge
 dependencies:
-    - pip=23.0.1
+    - codecov
+    - flake8
+    - matplotlib
+    - mypy
+    - numpy
+    - numpydoc
+    - pytest
+    - pytest-cov
+    - pytest-mock
+    - pip
     - pip:
-          - codecov
-          - flake8
-          - matplotlib
-          - mypy
-          - numpy
-          - numpydoc
-          - pytest
-          - pytest-cov
-          - pytest-mock
-          - torch==2.0.1
-          - -e .
+        - torch
+        - -e .
diff --git a/qolmat/imputations/em_sampler.py b/qolmat/imputations/em_sampler.py
@@ -326,10 +326,16 @@ def fit(self, X: NDArray) -> Self:
                 self.p = p
                 self.fit_X(X)
                 n1, n2 = self.X.shape
-                aic = np.log(np.linalg.det(self.S)) + 2 * p * (n2**2) / n1
+                det = np.linalg.det(self.S)
+                if abs(det) < 1e-12:
+                    aic = -np.inf
+                else:
+                    aic = np.log(det) + 2 * p * (n2**2) / n1
                 if len(aics) > 0 and aic > aics[-1]:
                     break
                 aics.append(aic)
+                if aic == -np.inf:
+                    break
             self.p = int(np.argmin(aics))
             self.fit_X(X)
 
@@ -352,15 +358,15 @@ def transform(self, X: NDArray) -> NDArray:
         NDArray
             Final array after EM sampling.
         """
+        mask_na = np.isnan(X)
+
         # shape_original = X.shape
         if hash(X.tobytes()) == self.hash_fit:
             X = self.X
         else:
             X = utils.prepare_data(X, self.period)
             X = utils.linear_interpolation(X)
 
-        mask_na = np.isnan(X)
-
         if self.method == "mle":
             X_transformed = self._maximize_likelihood(X, mask_na)
         elif self.method == "sample":
@@ -664,14 +670,19 @@ class VARpEM(EM):
     Examples
     --------
     >>> import numpy as np
-    >>> import pandas as pd
     >>> from qolmat.imputations.em_sampler import VARpEM
-    >>> imputer = VARpEM(method="sample")
-    >>> X = pd.DataFrame(data=[[1, 1, 1, 1],
-    >>>                        [np.nan, np.nan, 3, 2],
-    >>>                        [1, 2, 2, 1], [2, 2, 2, 2]],
-    >>>                        columns=["var1", "var2", "var3", "var4"])
+    >>> imputer = VARpEM(method="sample", random_state=11)
+    >>> X = np.array([[1, 1, 1, 1],
+    ...               [np.nan, np.nan, 3, 2],
+    ...               [1, 2, 2, 1], [2, 2, 2, 2]])
     >>> imputer.fit_transform(X)
+    EM converged after 9 iterations.
+    EM converged after 20 iterations.
+    EM converged after 13 iterations.
+    array([[1.        , 1.        , 1.        , 1.        ],
+           [1.17054054, 1.49986137, 3.        , 2.        ],
+           [1.        , 2.        , 2.        , 1.        ],
+           [2.        , 2.        , 2.        , 2.        ]])
     """
 
     def __init__(
@@ -837,6 +848,7 @@ def combine_parameters(self) -> None:
         stack_YY = np.stack(list_YY)
         self.YY = np.mean(stack_YY, axis=0)
         self.S = self.YY - self.ZY.T @ self.B - self.B.T @ self.ZY + self.B.T @ self.ZZ @ self.B
+        self.S[self.S < 1e-12] = 0
         self.S_inv = np.linalg.pinv(self.S, rcond=1e-10)
 
     def _check_convergence(self) -> bool:
diff --git a/qolmat/imputations/imputers.py b/qolmat/imputations/imputers.py
@@ -1114,10 +1114,10 @@ class ImputerResiduals(_Imputer):
 
     Examples
     --------
-    TODO review/remake this exemple
     >>> import numpy as np
     >>> import pandas as pd
-    >>> from qolmat.imputations.models import ImputeOnResiduals
+    >>> from qolmat.imputations.imputers import ImputerResiduals
+    >>> np.random.seed(100)
     >>> df = pd.DataFrame(index=pd.date_range('2015-01-01','2020-01-01'))
     >>> mean = 5
     >>> offset = 10
@@ -1127,11 +1127,24 @@ class ImputerResiduals(_Imputer):
     >>> noise_mean = 0
     >>> noise_var = 2
     >>> df['y'] = df['y'] + np.random.normal(noise_mean, noise_var, df.shape[0])
-    >>> np.random.seed(100)
     >>> mask = np.random.choice([True, False], size=df.shape)
     >>> df = df.mask(mask)
-    >>> imputor = ImputeOnResiduals(period=365, model="additive")
+    >>> imputor = ImputerResiduals(period=365, model_tsa="additive")
     >>> imputor.fit_transform(df)
+                        y
+    2015-01-01   1.501210
+    2015-01-02   5.691061
+    2015-01-03   4.404106
+    2015-01-04   3.531540
+    2015-01-05   3.129532
+    ...               ...
+    2019-12-28  10.288054
+    2019-12-29  10.632659
+    2019-12-30  14.900671
+    2019-12-31  12.957837
+    2020-01-01  12.780517
+    <BLANKLINE>
+    [1827 rows x 1 columns]
     """
 
     def __init__(
@@ -1353,10 +1366,10 @@ class ImputerMICE(_Imputer):
     ...                        columns=["var1", "var2", "var3", "var4"])
     >>> imputer.fit_transform(df)
        var1  var2  var3  var4
-    0   1.0   1.0   1.0   1.0
-    1   1.0   2.0   2.0   5.0
-    2   1.0   2.0   2.0   5.0
-    3   2.0   2.0   2.0   2.0
+    0  1.00  1.00  1.00  1.00
+    1  1.51  1.99  1.99  3.55
+    2  1.00  2.00  2.00  5.00
+    3  2.00  2.00  2.00  2.00
     """
 
     def __init__(
@@ -1470,18 +1483,18 @@ class ImputerRegressor(_Imputer):
     >>> import pandas as pd
     >>> from qolmat.imputations import imputers
     >>> from sklearn.ensemble import ExtraTreesRegressor
-    >>> imputer = imputers.ImputerRegressor(model=ExtraTreesRegressor())
+    >>> imputer = imputers.ImputerRegressor(estimator=ExtraTreesRegressor())
     >>> df = pd.DataFrame(data=[[1, 1, 1, 1],
     ...                        [np.nan, np.nan, np.nan, np.nan],
     ...                        [1, 2, 2, 5],
     ...                        [2, 2, 2, 2]],
     ...                        columns=["var1", "var2", "var3", "var4"])
     >>> imputer.fit_transform(df)
-           var1      var2      var3      var4
-    0  1.000000  1.000000  1.000000  1.000000
-    1  1.333333  1.666667  1.666667  2.666667
-    2  1.000000  2.000000  2.000000  5.000000
-    3  2.000000  2.000000  2.000000  2.000000
+       var1  var2  var3  var4
+    0   1.0   1.0   1.0   1.0
+    1   1.0   2.0   2.0   2.0
+    2   1.0   2.0   2.0   5.0
+    3   2.0   2.0   2.0   2.0
     """
 
     def __init__(
diff --git a/qolmat/imputations/imputers_pytorch.py b/qolmat/imputations/imputers_pytorch.py
@@ -471,12 +471,10 @@ def build_autoencoder(
 
     Examples
     --------
-    >>> encoder, decoder = build_autoencoder(
-                                                        input_dim=10,
-                                                        latent_dim=4,
-                                                        list_num_neurons=[32, 64, 128],
-                                                        output_dim=252
-                                                    )
+    >>> encoder, decoder = build_autoencoder(input_dim=10,
+    ...                                      latent_dim=4,
+    ...                                      list_num_neurons=[32, 64, 128],
+    ...                                      output_dim=252)
     >>> print(encoder)
     Sequential(
       (0): Linear(in_features=10, out_features=128, bias=True)
diff --git a/qolmat/imputations/softimpute.py b/qolmat/imputations/softimpute.py
@@ -46,8 +46,12 @@ class SoftImpute(BaseEstimator, TransformerMixin):
     >>> import numpy as np
     >>> from qolmat.imputations.softimpute import SoftImpute
     >>> X = np.array([[1, 2, np.nan, 4], [1, 5, 3, np.nan], [4, 2, 3, 2], [1, 1, 5, 4]])
-    >>> X_imputed = SoftImpute().fit_transform(X)
+    >>> X_imputed = SoftImpute(random_state=11).fit_transform(X)
     >>> print(X_imputed)
+    [[1.         2.         3.7242757  4.        ]
+     [1.         5.         3.         1.97846028]
+     [4.         2.         3.         2.        ]
+     [1.         1.         5.         4.        ]]
     """
 
     def __init__(
diff --git a/tests/imputations/test_em_sampler.py b/tests/imputations/test_em_sampler.py
@@ -306,6 +306,22 @@ def test_parameters_after_imputation_varpem(p: int):
     np.testing.assert_allclose(em.S, S, rtol=1e-1, atol=1e-1)
 
 
+def test_varpem_fit_transform():
+    imputer = em_sampler.VARpEM(method="sample", random_state=11)
+    X = np.array([[1, 1, 1, 1], [np.nan, np.nan, 3, 2], [1, 2, 2, 1], [2, 2, 2, 2]])
+    result = imputer.fit_transform(X)
+    expected = np.array(
+        [
+            [1.0, 1.0, 1.0, 1.0],
+            [1.0, 1.5, 3.0, 2.0],
+            [1.0, 2.0, 2.0, 1.0],
+            [2.0, 2.0, 2.0, 2.0],
+        ]
+    )
+    np.testing.assert_allclose(result, expected, atol=1e-12)
+    # assert False
+
+
 @pytest.mark.parametrize(
     "X, em, p",
     [(X_first_guess, em_sampler.MultiNormalEM(), 0), (X_first_guess, em_sampler.VARpEM(p=2), 2)],
diff --git a/tests/imputations/test_imputers.py b/tests/imputations/test_imputers.py
@@ -297,9 +297,10 @@ def test_ImputerEM_fit_transform(df: pd.DataFrame) -> None:
     expected = pd.DataFrame(
         {
             "col1": [i for i in range(20)],
-            "col2": [0, 0.773, 2, 2.621, 2] + [i for i in range(5, 20)],
+            "col2": [0, 0.638, 2, 2.714, 2] + [i for i in range(5, 20)],
         }
     )
+    print(result)
     np.testing.assert_allclose(result, expected, atol=1e-2)
 
 

Original file line number	Diff line number	Diff line change
`@@ -297,9 +297,10 @@ def test_ImputerEM_fit_transform(df: pd.DataFrame) -> None:`
`297`	`297`	`expected = pd.DataFrame(`
`298`	`298`	`{`
`299`	`299`	`"col1": [i for i in range(20)],`
`300`		`- "col2": [0, 0.773, 2, 2.621, 2] + [i for i in range(5, 20)],`
	`300`	`+ "col2": [0, 0.638, 2, 2.714, 2] + [i for i in range(5, 20)],`
`301`	`301`	`}`
`302`	`302`	`)`
	`303`	`+ print(result)`
`303`	`304`	`np.testing.assert_allclose(result, expected, atol=1e-2)`
`304`	`305`
`305`	`306`