3535from statsmodels .tools .tools import add_constant
3636from statsmodels .api import RLM
3737import statsmodels
38+ from joblib import Parallel , delayed
3839
3940
4041def _weighted_check_cv (cv = 5 , y = None , classifier = False ):
@@ -539,6 +540,34 @@ def fit(self, X, y, sample_weight=None):
539540 return self
540541
541542
543+ def _get_theta_coefs_and_tau_sq (i , X , sample_weight , alpha_cov , n_alphas_cov , max_iter , tol , random_state ):
544+ n_samples , n_features = X .shape
545+ y = X [:, i ]
546+ X_reduced = X [:, list (range (i )) + list (range (i + 1 , n_features ))]
547+ # Call weighted lasso on reduced design matrix
548+ if alpha_cov == 'auto' :
549+ local_wlasso = WeightedLassoCV (cv = 3 , n_alphas = n_alphas_cov ,
550+ fit_intercept = False ,
551+ max_iter = max_iter ,
552+ tol = tol , n_jobs = 1 ,
553+ random_state = random_state )
554+ else :
555+ local_wlasso = WeightedLasso (alpha = alpha_cov ,
556+ fit_intercept = False ,
557+ max_iter = max_iter ,
558+ tol = tol ,
559+ random_state = random_state )
560+ local_wlasso .fit (X_reduced , y , sample_weight = sample_weight )
561+ coefs = local_wlasso .coef_
562+ # Weighted tau
563+ if sample_weight is not None :
564+ y_weighted = y * sample_weight / np .sum (sample_weight )
565+ else :
566+ y_weighted = y / n_samples
567+ tausq = np .dot (y - local_wlasso .predict (X_reduced ), y_weighted )
568+ return coefs , tausq
569+
570+
542571class DebiasedLasso (WeightedLasso ):
543572 """Debiased Lasso model.
544573
@@ -555,6 +584,18 @@ class DebiasedLasso(WeightedLasso):
555584 reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.
556585 Given this, you should use the :class:`.LinearRegression` object.
557586
587+ n_alphas : int, optional, default 100
588+ How many alphas to try if alpha='auto'
589+
590+ alpha_cov : string | float, optional, default 'auto'
591+ The regularization alpha that is used when constructing the pseudo inverse of
592+ the covariance matrix Theta used to for correcting the lasso coefficient. Each
593+ such regression corresponds to the regression of one feature on the remainder
594+ of the features.
595+
596+ n_alphas_cov : int, optional, default 10
597+ How many alpha_cov to try if alpha_cov='auto'.
598+
558599 fit_intercept : boolean, optional, default True
559600 Whether to calculate the intercept for this model. If set
560601 to False, no intercept will be used in calculations
@@ -597,6 +638,9 @@ class DebiasedLasso(WeightedLasso):
597638 (setting to 'random') often leads to significantly faster convergence
598639 especially when tol is higher than 1e-4.
599640
641+ n_jobs : int or None, default None
642+ How many jobs to use whenever parallelism is invoked
643+
600644 Attributes
601645 ----------
602646 coef_ : array, shape (n_features,)
@@ -620,10 +664,14 @@ class DebiasedLasso(WeightedLasso):
620664
621665 """
622666
623- def __init__ (self , alpha = 'auto' , fit_intercept = True ,
624- precompute = False , copy_X = True , max_iter = 1000 ,
667+ def __init__ (self , alpha = 'auto' , n_alphas = 100 , alpha_cov = 'auto' , n_alphas_cov = 10 ,
668+ fit_intercept = True , precompute = False , copy_X = True , max_iter = 1000 ,
625669 tol = 1e-4 , warm_start = False ,
626- random_state = None , selection = 'cyclic' ):
670+ random_state = None , selection = 'cyclic' , n_jobs = None ):
671+ self .n_jobs = n_jobs
672+ self .n_alphas = n_alphas
673+ self .alpha_cov = alpha_cov
674+ self .n_alphas_cov = n_alphas_cov
627675 super ().__init__ (
628676 alpha = alpha , fit_intercept = fit_intercept ,
629677 precompute = precompute , copy_X = copy_X ,
@@ -747,18 +795,8 @@ def predict_interval(self, X, alpha=0.1):
747795 lower = alpha / 2
748796 upper = 1 - alpha / 2
749797 y_pred = self .predict (X )
750- y_lower = np .empty (y_pred .shape )
751- y_upper = np .empty (y_pred .shape )
752- # Note that in the case of no intercept, X_offset is 0
753- if self .fit_intercept :
754- X = X - self ._X_offset
755- # Calculate the variance of the predictions
756- var_pred = np .sum (np .matmul (X , self ._coef_variance ) * X , axis = 1 )
757- if self .fit_intercept :
758- var_pred += self ._mean_error_variance
759-
760798 # Calculate prediction confidence intervals
761- sd_pred = np . sqrt ( var_pred )
799+ sd_pred = self . prediction_stderr ( X )
762800 y_lower = y_pred + \
763801 np .apply_along_axis (lambda s : norm .ppf (
764802 lower , scale = s ), 0 , sd_pred )
@@ -810,20 +848,25 @@ def intercept__interval(self, alpha=0.1):
810848
811849 def _get_coef_correction (self , X , y , y_pred , sample_weight , theta_hat ):
812850 # Assumes flattened y
813- n_samples , n_features = X .shape
851+ n_samples , _ = X .shape
814852 y_res = np .ndarray .flatten (y ) - y_pred
815853 # Compute weighted residuals
816854 if sample_weight is not None :
817855 y_res_scaled = y_res * sample_weight / np .sum (sample_weight )
818856 else :
819857 y_res_scaled = y_res / n_samples
820858 delta_coef = np .matmul (
821- np .matmul (theta_hat , X .T ) , y_res_scaled )
859+ theta_hat , np .matmul (X .T , y_res_scaled ) )
822860 return delta_coef
823861
824862 def _get_optimal_alpha (self , X , y , sample_weight ):
825863 # To be done once per target. Assumes y can be flattened.
826- cv_estimator = WeightedLassoCV (cv = 5 , fit_intercept = self .fit_intercept )
864+ cv_estimator = WeightedLassoCV (cv = 5 , n_alphas = self .n_alphas , fit_intercept = self .fit_intercept ,
865+ precompute = self .precompute , copy_X = True ,
866+ max_iter = self .max_iter , tol = self .tol ,
867+ random_state = self .random_state ,
868+ selection = self .selection ,
869+ n_jobs = self .n_jobs )
827870 cv_estimator .fit (X , y .flatten (), sample_weight = sample_weight )
828871 return cv_estimator .alpha_
829872
@@ -835,27 +878,15 @@ def _get_theta_hat(self, X, sample_weight):
835878 C_hat = np .ones ((1 , 1 ))
836879 tausq = (X .T @ X / n_samples ).flatten ()
837880 return np .diag (1 / tausq ) @ C_hat
838- coefs = np .empty ((n_features , n_features - 1 ))
839- tausq = np .empty (n_features )
840881 # Compute Lasso coefficients for the columns of the design matrix
841- for i in range (n_features ):
842- y = X [:, i ]
843- X_reduced = X [:, list (range (i )) + list (range (i + 1 , n_features ))]
844- # Call weighted lasso on reduced design matrix
845- # Inherit some parameters from the parent
846- local_wlasso = WeightedLasso (
847- alpha = self .alpha ,
848- fit_intercept = False ,
849- max_iter = self .max_iter ,
850- tol = self .tol
851- ).fit (X_reduced , y , sample_weight = sample_weight )
852- coefs [i ] = local_wlasso .coef_
853- # Weighted tau
854- if sample_weight is not None :
855- y_weighted = y * sample_weight / np .sum (sample_weight )
856- else :
857- y_weighted = y / n_samples
858- tausq [i ] = np .dot (y - local_wlasso .predict (X_reduced ), y_weighted )
882+ results = Parallel (n_jobs = self .n_jobs )(
883+ delayed (_get_theta_coefs_and_tau_sq )(i , X , sample_weight ,
884+ self .alpha_cov , self .n_alphas_cov ,
885+ self .max_iter , self .tol , self .random_state )
886+ for i in range (n_features ))
887+ coefs , tausq = zip (* results )
888+ coefs = np .array (coefs )
889+ tausq = np .array (tausq )
859890 # Compute C_hat
860891 C_hat = np .diag (np .ones (n_features ))
861892 C_hat [0 ][1 :] = - coefs [0 ]
@@ -893,6 +924,18 @@ class MultiOutputDebiasedLasso(MultiOutputRegressor):
893924 reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.
894925 Given this, you should use the :class:`LinearRegression` object.
895926
927+ n_alphas : int, optional, default 100
928+ How many alphas to try if alpha='auto'
929+
930+ alpha_cov : string | float, optional, default 'auto'
931+ The regularization alpha that is used when constructing the pseudo inverse of
932+ the covariance matrix Theta used to for correcting the lasso coefficient. Each
933+ such regression corresponds to the regression of one feature on the remainder
934+ of the features.
935+
936+ n_alphas_cov : int, optional, default 10
937+ How many alpha_cov to try if alpha_cov='auto'.
938+
896939 fit_intercept : boolean, optional, default True
897940 Whether to calculate the intercept for this model. If set
898941 to False, no intercept will be used in calculations
@@ -935,6 +978,9 @@ class MultiOutputDebiasedLasso(MultiOutputRegressor):
935978 (setting to 'random') often leads to significantly faster convergence
936979 especially when tol is higher than 1e-4.
937980
981+ n_jobs : int or None, default None
982+ How many jobs to use whenever parallelism is invoked
983+
938984 Attributes
939985 ----------
940986 coef_ : array, shape (n_targets, n_features) or (n_features,)
@@ -954,14 +1000,17 @@ class MultiOutputDebiasedLasso(MultiOutputRegressor):
9541000
9551001 """
9561002
957- def __init__ (self , alpha = 'auto' , fit_intercept = True ,
1003+ def __init__ (self , alpha = 'auto' , n_alphas = 100 , alpha_cov = 'auto' , n_alphas_cov = 10 ,
1004+ fit_intercept = True ,
9581005 precompute = False , copy_X = True , max_iter = 1000 ,
9591006 tol = 1e-4 , warm_start = False ,
9601007 random_state = None , selection = 'cyclic' , n_jobs = None ):
961- self .estimator = DebiasedLasso (alpha = alpha , fit_intercept = fit_intercept ,
1008+ self .estimator = DebiasedLasso (alpha = alpha , n_alphas = n_alphas , alpha_cov = alpha_cov , n_alphas_cov = n_alphas_cov ,
1009+ fit_intercept = fit_intercept ,
9621010 precompute = precompute , copy_X = copy_X , max_iter = max_iter ,
9631011 tol = tol , warm_start = warm_start ,
964- random_state = random_state , selection = selection )
1012+ random_state = random_state , selection = selection ,
1013+ n_jobs = n_jobs )
9651014 super ().__init__ (estimator = self .estimator , n_jobs = n_jobs )
9661015
9671016 def fit (self , X , y , sample_weight = None ):
0 commit comments