@@ -72,7 +72,8 @@ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
7272 df_imputed [col ] = df_imputed [col ].fillna (self .apply_imputation (df_imputed [col ]))
7373
7474 if df_imputed .isna ().any ().any ():
75- print (df_imputed )
75+ print ("Number of null by col" )
76+ print (df_imputed .isna ().sum ())
7677 warnings .warn ("Problem: there are still nan in the columns to be imputed" )
7778 return df_imputed
7879
@@ -395,6 +396,8 @@ def get_resid(x, model, period, extrapolate_trend, method_interpolation):
395396 """
396397 Fit/transform missing values on residuals.
397398 """
399+ if x .isna ().all ():
400+ return np .nan
398401 result = seasonal_decompose (
399402 x .interpolate ().bfill ().ffill (),
400403 model = model ,
@@ -404,8 +407,7 @@ def get_resid(x, model, period, extrapolate_trend, method_interpolation):
404407
405408 residuals = result .resid
406409 residuals [x .isnull ()] = np .nan
407- residuals = residuals .interpolate (method = method_interpolation )
408-
410+ residuals = residuals .interpolate (method = method_interpolation ).ffill ().bfill ()
409411 return result .seasonal + result .trend + residuals
410412
411413 self .apply_imputation = partial (
@@ -439,7 +441,14 @@ class ImputeKNN(_BaseImputer):
439441 >>> imputor.fit_transform(X)
440442 """
441443
442- def __init__ (self , ** kwargs ) -> None :
444+ def __init__ (
445+ self ,
446+ n_neighbors : int = 5 ,
447+ weights : str = "distance" ,
448+ ** kwargs ,
449+ ) -> None :
450+ self .n_neighbors = n_neighbors
451+ self .weights = weights
443452 for name , value in kwargs .items ():
444453 setattr (self , name , value )
445454
@@ -460,7 +469,9 @@ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
460469 if not isinstance (df , pd .DataFrame ):
461470 raise ValueError ("Input has to be a pandas.DataFrame." )
462471
463- imputer = KNNImputer (n_neighbors = self .k , weights = "distance" , metric = "nan_euclidean" )
472+ imputer = KNNImputer (
473+ n_neighbors = self .n_neighbors , weights = self .weights , metric = "nan_euclidean"
474+ )
464475 results = imputer .fit_transform (df )
465476 return pd .DataFrame (data = results , columns = df .columns , index = df .index )
466477
@@ -493,15 +504,15 @@ class ImputeRPCA(_BaseImputer):
493504 TO DO
494505 """
495506
496- def __init__ (self , method , multivariate = False , ** kwargs ) -> None :
507+ def __init__ (self , method : str = "temporal" , multivariate : bool = False , ** kwargs ) -> None :
497508 self .multivariate = multivariate
498509 self .method = method
499510
500511 if method == "PCP" :
501512 self .rpca = RPCA ()
502513 elif method == "temporal" :
503514 self .rpca = TemporalRPCA ()
504- elif method == "online " :
515+ elif method == "onlinetemporal " :
505516 self .rpca = OnlineTemporalRPCA ()
506517 for name , value in kwargs .items ():
507518 setattr (self .rpca , name , value )
@@ -526,7 +537,9 @@ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
526537 imputed = pd .DataFrame (imputed , columns = df .columns )
527538 else :
528539 imputed = pd .DataFrame ()
529- for col in df .columns :
540+ cols_with_nans = df .columns [df .isna ().any ()]
541+ for col in cols_with_nans :
542+ print (col )
530543 imputed_signal , _ , _ = self .rpca .fit_transform (signal = df [col ].values )
531544 imputed [col ] = imputed_signal
532545 imputed .index = df .index
@@ -570,9 +583,9 @@ class ImputeMICE(_BaseImputer):
570583 --------
571584 >>> import numpy as np
572585 >>> import pandas as pd
573- >>> from qolmat.imputations.models import ImputeIterative
586+ >>> from qolmat.imputations.models import ImputeMICE
574587 >>> from sklearn.ensemble import ExtraTreesRegressor
575- >>> imputor = ImputeIterative (estimator=ExtraTreesRegressor(),
588+ >>> imputor = ImputeMICE (estimator=ExtraTreesRegressor(),
576589 >>> sample_posterior=False,
577590 >>> max_iter=100, missing_values=np.nan)
578591 >>> X = pd.DataFrame(data=[[1, 1, 1, 1],
@@ -717,8 +730,8 @@ class ImputeStochasticRegressor(_BaseImputer):
717730 >>> imputor.fit_transform(X)
718731 """
719732
720- def __init__ (self , model , ** kwargs ) -> None :
721- self .model = model
733+ def __init__ (self , estimator , ** kwargs ) -> None :
734+ self .estimator = estimator
722735
723736 for name , value in kwargs .items ():
724737 setattr (self , name , value )
@@ -748,8 +761,8 @@ def fit_transform(self, df: pd.DataFrame) -> pd.Series:
748761 X = df [cols_without_nans ]
749762 y = df [col ]
750763 is_na = y .isna ()
751- self .model .fit (X [~ is_na ], y [~ is_na ])
752- y_pred = self .model .predict (X )
764+ self .estimator .fit (X [~ is_na ], y [~ is_na ])
765+ y_pred = self .estimator .predict (X )
753766 std_error = (y_pred [~ is_na ] - y [~ is_na ]).std ()
754767 random_pred = np .random .normal (size = len (y ), loc = y_pred , scale = std_error )
755768 df_imp .loc [is_na , col ] = random_pred [is_na ]
0 commit comments