Skip to content

Commit 31706f3

Browse files
Julien RousselJulien Roussel
authored andcommitted
Merge branch 'fix-bugs-imputers' into 'dev'
Fix bugs imputers See merge request quantmetry/retd/qolmat!7
2 parents 10ad0be + d1c3306 commit 31706f3

File tree

1 file changed

+27
-14
lines changed

1 file changed

+27
-14
lines changed

qolmat/imputations/models.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
7272
df_imputed[col] = df_imputed[col].fillna(self.apply_imputation(df_imputed[col]))
7373

7474
if df_imputed.isna().any().any():
75-
print(df_imputed)
75+
print("Number of null by col")
76+
print(df_imputed.isna().sum())
7677
warnings.warn("Problem: there are still nan in the columns to be imputed")
7778
return df_imputed
7879

@@ -395,6 +396,8 @@ def get_resid(x, model, period, extrapolate_trend, method_interpolation):
395396
"""
396397
Fit/transform missing values on residuals.
397398
"""
399+
if x.isna().all():
400+
return np.nan
398401
result = seasonal_decompose(
399402
x.interpolate().bfill().ffill(),
400403
model=model,
@@ -404,8 +407,7 @@ def get_resid(x, model, period, extrapolate_trend, method_interpolation):
404407

405408
residuals = result.resid
406409
residuals[x.isnull()] = np.nan
407-
residuals = residuals.interpolate(method=method_interpolation)
408-
410+
residuals = residuals.interpolate(method=method_interpolation).ffill().bfill()
409411
return result.seasonal + result.trend + residuals
410412

411413
self.apply_imputation = partial(
@@ -439,7 +441,14 @@ class ImputeKNN(_BaseImputer):
439441
>>> imputor.fit_transform(X)
440442
"""
441443

442-
def __init__(self, **kwargs) -> None:
444+
def __init__(
445+
self,
446+
n_neighbors: int = 5,
447+
weights: str = "distance",
448+
**kwargs,
449+
) -> None:
450+
self.n_neighbors = n_neighbors
451+
self.weights = weights
443452
for name, value in kwargs.items():
444453
setattr(self, name, value)
445454

@@ -460,7 +469,9 @@ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
460469
if not isinstance(df, pd.DataFrame):
461470
raise ValueError("Input has to be a pandas.DataFrame.")
462471

463-
imputer = KNNImputer(n_neighbors=self.k, weights="distance", metric="nan_euclidean")
472+
imputer = KNNImputer(
473+
n_neighbors=self.n_neighbors, weights=self.weights, metric="nan_euclidean"
474+
)
464475
results = imputer.fit_transform(df)
465476
return pd.DataFrame(data=results, columns=df.columns, index=df.index)
466477

@@ -493,15 +504,15 @@ class ImputeRPCA(_BaseImputer):
493504
TO DO
494505
"""
495506

496-
def __init__(self, method, multivariate=False, **kwargs) -> None:
507+
def __init__(self, method: str = "temporal", multivariate: bool = False, **kwargs) -> None:
497508
self.multivariate = multivariate
498509
self.method = method
499510

500511
if method == "PCP":
501512
self.rpca = RPCA()
502513
elif method == "temporal":
503514
self.rpca = TemporalRPCA()
504-
elif method == "online":
515+
elif method == "onlinetemporal":
505516
self.rpca = OnlineTemporalRPCA()
506517
for name, value in kwargs.items():
507518
setattr(self.rpca, name, value)
@@ -526,7 +537,9 @@ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
526537
imputed = pd.DataFrame(imputed, columns=df.columns)
527538
else:
528539
imputed = pd.DataFrame()
529-
for col in df.columns:
540+
cols_with_nans = df.columns[df.isna().any()]
541+
for col in cols_with_nans:
542+
print(col)
530543
imputed_signal, _, _ = self.rpca.fit_transform(signal=df[col].values)
531544
imputed[col] = imputed_signal
532545
imputed.index = df.index
@@ -570,9 +583,9 @@ class ImputeMICE(_BaseImputer):
570583
--------
571584
>>> import numpy as np
572585
>>> import pandas as pd
573-
>>> from qolmat.imputations.models import ImputeIterative
586+
>>> from qolmat.imputations.models import ImputeMICE
574587
>>> from sklearn.ensemble import ExtraTreesRegressor
575-
>>> imputor = ImputeIterative(estimator=ExtraTreesRegressor(),
588+
>>> imputor = ImputeMICE(estimator=ExtraTreesRegressor(),
576589
>>> sample_posterior=False,
577590
>>> max_iter=100, missing_values=np.nan)
578591
>>> X = pd.DataFrame(data=[[1, 1, 1, 1],
@@ -717,8 +730,8 @@ class ImputeStochasticRegressor(_BaseImputer):
717730
>>> imputor.fit_transform(X)
718731
"""
719732

720-
def __init__(self, model, **kwargs) -> None:
721-
self.model = model
733+
def __init__(self, estimator, **kwargs) -> None:
734+
self.estimator = estimator
722735

723736
for name, value in kwargs.items():
724737
setattr(self, name, value)
@@ -748,8 +761,8 @@ def fit_transform(self, df: pd.DataFrame) -> pd.Series:
748761
X = df[cols_without_nans]
749762
y = df[col]
750763
is_na = y.isna()
751-
self.model.fit(X[~is_na], y[~is_na])
752-
y_pred = self.model.predict(X)
764+
self.estimator.fit(X[~is_na], y[~is_na])
765+
y_pred = self.estimator.predict(X)
753766
std_error = (y_pred[~is_na] - y[~is_na]).std()
754767
random_pred = np.random.normal(size=len(y), loc=y_pred, scale=std_error)
755768
df_imp.loc[is_na, col] = random_pred[is_na]

0 commit comments

Comments
 (0)