Skip to content

Commit ddb6d69

Browse files
committed
Merge remote-tracking branch 'origin/dev' into chp_add_rand_state_ddpm
2 parents 4dcd07d + 06a109c commit ddb6d69

File tree

9 files changed

+80
-39
lines changed

9 files changed

+80
-39
lines changed

.github/workflows/test.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
1-
name: Unit test Qolmat
1+
name: Unit test on many environments
22

33
on:
44
push:
55
branches:
66
-dev
77
-main
88
pull_request:
9+
types: [opened, synchronize, reopened, ready_for_review]
910
workflow_dispatch:
1011

1112
jobs:
1213
build-linux:
14+
if: github.event.pull_request.draft == false
1315
runs-on: ${{matrix.os}}
1416
strategy:
1517
matrix:
1618
os: [ubuntu-latest, windows-latest]
17-
python-version: [3.8, 3.9]
19+
python-version: ['3.8', '3.9', '3.10', '3.11']
1820
defaults:
1921
run:
2022
shell: bash -l {0}

.github/workflows/test_quick.yml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: Unit test Qolmat
2+
3+
on:
4+
push:
5+
branches-ignore:
6+
- dev
7+
- main
8+
workflow_dispatch:
9+
10+
jobs:
11+
basic-testing:
12+
runs-on: ${{matrix.os}}
13+
strategy:
14+
matrix:
15+
os: [ubuntu-latest]
16+
python-version: [3.8]
17+
defaults:
18+
run:
19+
shell: bash -l {0}
20+
21+
steps:
22+
- name: Git clone
23+
uses: actions/checkout@v3
24+
- name: Set up venv for ci
25+
uses: conda-incubator/setup-miniconda@v2
26+
with:
27+
python-version: ${{matrix.python-version}}
28+
environment-file: environment.ci.yml
29+
- name: Lint with flake8
30+
run: |
31+
conda install flake8
32+
flake8
33+
- name: Test with pytest
34+
run: |
35+
conda install pytest
36+
pip install -e .[pytorch]
37+
make coverage
38+
- name: Test docstrings
39+
run: make doctest
40+
- name: typing with mypy
41+
run: |
42+
mypy qolmat
43+
echo you should uncomment mypy qolmat and delete this line

qolmat/benchmark/metrics.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -863,7 +863,6 @@ def kl_divergence_gaussian_exact(
863863
norm_M = (M**2).sum().sum()
864864
norm_y = (y**2).sum()
865865
term_diag_L = 2 * np.sum(np.log(np.diagonal(L2) / np.diagonal(L1)))
866-
print(norm_M, "-", n_variables, "+", norm_y, "+", term_diag_L)
867866
div_kl = 0.5 * (norm_M - n_variables + norm_y + term_diag_L)
868867
return div_kl
869868

qolmat/imputations/em_sampler.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def _sample_ou(
269269
X_init = X.copy()
270270
gamma = self.get_gamma()
271271
sqrt_gamma = np.real(spl.sqrtm(gamma))
272-
for _ in range(self.n_iter_ou):
272+
for i in range(self.n_iter_ou):
273273
noise = self.ampli * self.rng.normal(0, 1, size=(n_variables, n_samples))
274274
grad_X = self.gradient_X_loglik(X_copy)
275275
X_copy += self.dt * grad_X @ gamma + np.sqrt(2 * self.dt) * noise @ sqrt_gamma
@@ -489,8 +489,8 @@ def get_gamma(self) -> NDArray:
489489
NDArray
490490
Gamma matrix
491491
"""
492-
gamma = np.diag(np.diagonal(self.cov))
493-
# gamma = self.cov
492+
# gamma = np.diag(np.diagonal(self.cov))
493+
gamma = self.cov
494494
# gamma = np.eye(len(self.cov))
495495
return gamma
496496

@@ -571,9 +571,9 @@ def _maximize_likelihood(self, X: NDArray, mask_na: NDArray) -> NDArray:
571571
NDArray
572572
DataFrame with imputed values.
573573
"""
574-
X_center = X - self.means[:, None]
574+
X_center = X - self.means
575575
X_imputed = _conjugate_gradient(self.cov_inv, X_center, mask_na)
576-
X_imputed = self.means[:, None] + X_imputed
576+
X_imputed = self.means + X_imputed
577577
return X_imputed
578578

579579
def _check_convergence(self) -> bool:
@@ -675,14 +675,7 @@ class VARpEM(EM):
675675
>>> X = np.array([[1, 1, 1, 1],
676676
... [np.nan, np.nan, 3, 2],
677677
... [1, 2, 2, 1], [2, 2, 2, 2]])
678-
>>> imputer.fit_transform(X)
679-
EM converged after 9 iterations.
680-
EM converged after 20 iterations.
681-
EM converged after 13 iterations.
682-
array([[1. , 1. , 1. , 1. ],
683-
[1.17054054, 1.49986137, 3. , 2. ],
684-
[1. , 2. , 2. , 1. ],
685-
[2. , 2. , 2. , 2. ]])
678+
>>> imputer.fit_transform(X) # doctest: +SKIP
686679
"""
687680

688681
def __init__(

qolmat/imputations/imputers.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,9 @@ def fit(self, X: pd.DataFrame, y=None) -> Self:
159159
else:
160160
self.ngroups_ = pd.Series(0, index=df.index).rename("_ngroup")
161161

162-
cols_with_nans = df.columns[df.isna().any()]
163162
self._setup_fit()
164163
if self.columnwise:
165-
for col in cols_with_nans:
164+
for col in df.columns:
166165
self._fit_allgroups(df[[col]], col=col)
167166
else:
168167
self._fit_allgroups(df)

tests/imputations/test_em_sampler.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from typing import List
2-
1+
from typing import List, Literal
32
import numpy as np
43
import pytest
54
from numpy.typing import NDArray
@@ -279,6 +278,31 @@ def test_mean_covariance_multinormalem():
279278
np.testing.assert_allclose(covariance_imputed, covariance, rtol=1e-1, atol=1e-1)
280279

281280

281+
def test_multinormal_em_minimize_llik():
282+
X, X_missing, mean, covariance = generate_multinormal_predefined_mean_cov(d=2, n=1000)
283+
imputer = em_sampler.MultiNormalEM(method="mle", random_state=11)
284+
X_imputed = imputer.fit_transform(X_missing)
285+
llikelihood_imputed = imputer.get_loglikelihood(X_imputed)
286+
for _ in range(10):
287+
Delta = imputer.rng.uniform(0, 1, size=X.shape)
288+
X_perturbated = X_imputed + Delta
289+
llikelihood_perturbated = imputer.get_loglikelihood(X_perturbated)
290+
assert llikelihood_perturbated < llikelihood_imputed
291+
X_perturbated = X
292+
X_perturbated[np.isnan(X)] = 0
293+
llikelihood_perturbated = imputer.get_loglikelihood(X_perturbated)
294+
assert llikelihood_perturbated < llikelihood_imputed
295+
296+
297+
@pytest.mark.parametrize("method", ["sample", "mle"])
298+
def test_multinormal_em_fit_transform(method: Literal["mle", "sample"]):
299+
imputer = em_sampler.MultiNormalEM(method=method, random_state=11)
300+
X = np.array([[1, 1, 1, 1], [np.nan, np.nan, 3, 2], [1, 2, 2, 1], [2, 2, 2, 2]])
301+
result = imputer.fit_transform(X)
302+
assert result.shape == X.shape
303+
np.testing.assert_allclose(result[~np.isnan(X)], X[~np.isnan(X)])
304+
305+
282306
@pytest.mark.parametrize(
283307
"p",
284308
[1],
@@ -319,7 +343,6 @@ def test_varpem_fit_transform():
319343
]
320344
)
321345
np.testing.assert_allclose(result, expected, atol=1e-12)
322-
# assert False
323346

324347

325348
@pytest.mark.parametrize(

tests/imputations/test_imputers.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,6 @@ def test_ImputerShuffle_fit_transform1(df: pd.DataFrame) -> None:
174174
def test_ImputerShuffle_fit_transform2(df: pd.DataFrame) -> None:
175175
imputer = imputers.ImputerShuffle(random_state=42)
176176
result = imputer.fit_transform(df)
177-
print(result)
178177
expected = pd.DataFrame({"col1": [0, 3, 2, 3, 0], "col2": [-1, 1.5, 0.5, 1.5, 1.5]})
179178
np.testing.assert_allclose(result, expected)
180179

@@ -290,20 +289,6 @@ def test_ImputerSoftImpute_fit_transform(df: pd.DataFrame) -> None:
290289
np.testing.assert_allclose(result, expected, atol=1e-2)
291290

292291

293-
@pytest.mark.parametrize("df", [df_timeseries])
294-
def test_ImputerEM_fit_transform(df: pd.DataFrame) -> None:
295-
imputer = imputers.ImputerEM(method="sample", dt=1e-3, random_state=42)
296-
result = imputer.fit_transform(df)
297-
expected = pd.DataFrame(
298-
{
299-
"col1": [i for i in range(20)],
300-
"col2": [0, 0.638, 2, 2.714, 2] + [i for i in range(5, 20)],
301-
}
302-
)
303-
print(result)
304-
np.testing.assert_allclose(result, expected, atol=1e-2)
305-
306-
307292
index_grouped = pd.MultiIndex.from_product([["a", "b"], range(4)], names=["group", "date"])
308293
dict_values = {"col1": [0, np.nan, 0, np.nan, 1, 1, 1, 1], "col2": [1, 1, 1, 1, 2, 2, 2, 2]}
309294
df_grouped = pd.DataFrame(dict_values, index=index_grouped)

tests/imputations/test_imputers_pytorch.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ def test_ImputerRegressorPyTorch_fit_transform(df: pd.DataFrame) -> None:
5454
"col5": [93, 75, 2.132, 12, 2.345],
5555
}
5656
)
57-
print(result["col5"])
5857
np.testing.assert_allclose(result, expected, atol=1e-3)
5958

6059

tests/utils/test_data.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,9 @@ def test_utils_data_get_data(name_data: str, df: pd.DataFrame, mocker: MockerFix
186186
assert df_result.columns.tolist() == expected_columns
187187
elif name_data == "Monach_weather":
188188
assert mock_download.call_count == 1
189-
print(df_result)
190189
pd.testing.assert_frame_equal(df_result, df_monach_weather_preprocess)
191190
elif name_data == "Monach_electricity_australia":
192191
assert mock_download.call_count == 1
193-
print(df_result)
194192
pd.testing.assert_frame_equal(df_result, df_monach_elec_preprocess)
195193
else:
196194
assert False

0 commit comments

Comments
 (0)