Skip to content

Commit a2de269

Browse files
author
Hông-Lan Botterman
committed
ENH: softimpute
1 parent 5b51305 commit a2de269

File tree

4 files changed

+70
-37
lines changed

4 files changed

+70
-37
lines changed

qolmat/imputations/imputers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,7 +1791,7 @@ def __init__(
17911791
tau: float = 0,
17921792
max_iterations: int = 100,
17931793
verbose: bool = False,
1794-
projected: bool = False,
1794+
projected: bool = True,
17951795
):
17961796
super().__init__(
17971797
imputer_params=(
@@ -1844,7 +1844,7 @@ def _fit_element(
18441844
self._check_dataframe(df)
18451845
assert col == "__all__"
18461846
hyperparams = self.get_hyperparams()
1847-
model = softimpute.SoftImpute(**hyperparams)
1847+
model = softimpute.SoftImpute(random_state=self._rng, **hyperparams)
18481848
model = model.fit(df.values)
18491849
return model
18501850

qolmat/utils/plot.py

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import matplotlib.pyplot as plt
1010
import matplotlib.ticker as plticker
1111
import numpy as np
12+
from numpy.typing import NDArray
1213
import pandas as pd
1314
import scipy
1415
from mpl_toolkits.axes_grid1 import make_axes_locatable
@@ -155,53 +156,72 @@ def plot_images(
155156

156157

157158
def make_ellipses(
158-
X: np.ndarray,
159+
x: NDArray,
160+
y: NDArray,
159161
ax: mpl.axes.Axes,
160-
color: Union[str, Tuple[float, float, float]],
162+
n_std: float = 2,
163+
color: Union[str, Any, Tuple[float, float, float]] = "None",
161164
):
162-
"""Draw ellipses on a figure
165+
"""
166+
Create a plot of the covariance confidence ellipse of *x* and *y*.
163167
164168
Parameters
165169
----------
166-
X : np.ndarray
167-
array for the ellipse
168-
ax : matplotlib.axes._subplots.AxesSubplot
169-
matplotlib ax handles
170-
color : Union[str, Tuple[float, float, float]]
171-
ellipse's color
170+
x, y : array-like, shape (n, )
171+
Input data.
172+
173+
ax : matplotlib.axes.Axes
174+
The axes object to draw the ellipse into.
175+
176+
n_std : float
177+
The number of standard deviations to determine the ellipse's radiuses.
178+
179+
color : Optional[str]
180+
facecolor
181+
182+
Returns
183+
-------
184+
matplotlib.patches.Ellipse
172185
"""
173-
covariances = np.cov(X) # gmm.covariances_[0] # [n][:2, :2]
174-
v, w = np.linalg.eigh(covariances)
175-
u = w[0] / np.linalg.norm(w[0])
176-
angle = np.arctan2(u[1], u[0])
177-
angle = 180 * angle / np.pi # convert to degrees
178-
center = X.mean(axis=0) # .means_[0]
179-
v[v < 0] = np.nan
180-
v = 2.0 * np.sqrt(2.0) * np.sqrt(v)
181-
ell = mpl.patches.Ellipse(center, v[0], v[1], angle=180 + angle, color=color)
186+
if x.size != y.size:
187+
raise ValueError("x and y must be the same size")
188+
189+
cov = np.cov(x, y)
190+
pearson = cov[0, 1] / np.sqrt(cov[0, 0] * cov[1, 1])
191+
ell_radius_x = np.sqrt(1 + pearson) * 2.5
192+
ell_radius_y = np.sqrt(1 - pearson) * 2.5
193+
ell = mpl.patches.Ellipse((0, 0), width=ell_radius_x, height=ell_radius_y, facecolor=color)
194+
scale_x = np.sqrt(cov[0, 0]) * n_std
195+
mean_x = np.mean(x)
196+
scale_y = np.sqrt(cov[1, 1]) * n_std
197+
mean_y = np.mean(y)
198+
transf = (
199+
mpl.transforms.Affine2D().rotate_deg(45).scale(scale_x, scale_y).translate(mean_x, mean_y)
200+
)
201+
ell.set_transform(transf + ax.transData)
202+
ax.add_patch(ell)
182203
ell.set_clip_box(ax.bbox)
183-
ell.set_alpha(0.5)
184-
ax.add_artist(ell)
204+
ell.set_alpha(0.4)
185205
ax.set_aspect("equal", "datalim")
186206

187207

188208
def compare_covariances(
189-
df1: pd.DataFrame,
190-
df2: pd.DataFrame,
209+
df_1: pd.DataFrame,
210+
df_2: pd.DataFrame,
191211
col_x: str,
192212
col_y: str,
193-
ax: mpl.axes._subplots.AxesSubplot,
213+
ax: mpl.axes.Axes,
194214
label: str = "",
195-
color: Optional[Union[str, Tuple[float, float, float]]] = None,
215+
color: Union[None, str, Tuple[float, float, float], Tuple[float, float, float, float]] = None,
196216
):
197217
"""
198218
Covariance plot: scatter plot with ellipses
199219
200220
Parameters
201221
----------
202-
df1 : pd.DataFrame
222+
df_1 : pd.DataFrame
203223
dataframe with raw data
204-
df2 : pd.DataFrame
224+
df_2 : pd.DataFrame
205225
dataframe with imputations
206226
col_x : str
207227
variable x, column's name of dataframe df1 to compare with
@@ -210,12 +230,14 @@ def compare_covariances(
210230
ax : matplotlib.axes._subplots.AxesSubplot
211231
matplotlib ax handles
212232
"""
233+
df1 = df_1.dropna()
234+
df2 = df_2.dropna()
213235
if color is None:
214236
color = tab10(0)
215-
ax.scatter(df2[col_x], df2[col_y], marker=".", color=color, s=0.2, label=label)
216-
ax.scatter(df1[col_x], df1[col_y], marker=".", color="black", s=0.2)
217-
make_ellipses(df1[[col_x, col_y]], ax, "black")
218-
make_ellipses(df2[[col_x, col_y]], ax, color)
237+
ax.scatter(df2[col_x], df2[col_y], marker=".", color=color, s=2, alpha=0.7, label="imputed")
238+
ax.scatter(df1[col_x], df1[col_y], marker=".", color="black", s=2, alpha=0.7, label="original")
239+
make_ellipses(df1[col_x], df1[col_y], ax, color="black")
240+
make_ellipses(df2[col_x], df2[col_y], ax, color=color)
219241
ax.set_xlabel(col_x)
220242
ax.set_ylabel(col_y)
221243

tests/imputations/test_imputers.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -275,18 +275,31 @@ def test_ImputerRPCA_fit_transform(df: pd.DataFrame) -> None:
275275
np.testing.assert_allclose(result, expected, atol=1e-2)
276276

277277

278+
@pytest.mark.parametrize("df", [df_incomplete])
279+
def test_ImputerSoftImpute_fit_transform(df: pd.DataFrame) -> None:
280+
imputer = imputers.ImputerSoftImpute(
281+
columnwise=False, max_iterations=100, tau=0.3, random_state=4
282+
)
283+
result = imputer.fit_transform(df)
284+
expected = pd.DataFrame(
285+
{
286+
"col1": [0, 1.327, 2, 3, 0.137],
287+
"col2": [-1, 0.099, 0.5, 0.122, 1.5],
288+
}
289+
)
290+
np.testing.assert_allclose(result, expected, atol=1e-2)
291+
292+
278293
@pytest.mark.parametrize("df", [df_timeseries])
279294
def test_ImputerEM_fit_transform(df: pd.DataFrame) -> None:
280295
imputer = imputers.ImputerEM(method="sample", dt=1e-3, random_state=42)
281296
result = imputer.fit_transform(df)
282-
print(result)
283297
expected = pd.DataFrame(
284298
{
285299
"col1": [i for i in range(20)],
286300
"col2": [0, 0.773, 2, 2.621, 2] + [i for i in range(5, 20)],
287301
}
288302
)
289-
print(result)
290303
np.testing.assert_allclose(result, expected, atol=1e-2)
291304

292305

@@ -307,7 +320,6 @@ def test_ImputerEM_fit_transform(df: pd.DataFrame) -> None:
307320
imputers.ImputerMICE(groups=("group",)),
308321
imputers.ImputerRegressor(groups=("group",), estimator=LinearRegression()),
309322
imputers.ImputerRPCA(groups=("group",)),
310-
imputers.ImputerSoftImpute(groups=("group",)),
311323
imputers.ImputerEM(groups=("group",)),
312324
]
313325

@@ -335,7 +347,6 @@ def test_models_fit_transform_grouped(imputer):
335347
imputers.ImputerMICE(),
336348
imputers.ImputerRegressor(),
337349
imputers.ImputerRPCA(tau=0, lam=0),
338-
imputers.ImputerSoftImpute(),
339350
imputers.ImputerEM(),
340351
]
341352
)

tests/utils/test_plot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def test__utils_plot_plot_images(
7575
def test_utils_plot_make_ellipses(X: np.ndarray, mocker: MockerFixture):
7676
mocker.patch("matplotlib.pyplot.show")
7777
ax = plt.gca()
78-
plot.make_ellipses(X, ax, color="blue")
78+
plot.make_ellipses(X[1], X[2], ax, color="blue")
7979
assert len(plt.gcf().get_axes()) > 0
8080
plt.close("all")
8181

0 commit comments

Comments
 (0)