2121from sklearn .base import RegressorMixin
2222from sklearn .base import TransformerMixin
2323from sklearn .base import clone
24+ from sklearn .model_selection import cross_val_predict
2425from sklearn .model_selection ._split import check_cv
2526
2627import numpy as np
@@ -82,6 +83,24 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
8283 recommended if you are working with estimators that are supporting
8384 the scikit-learn fit/predict API interface but are not compatible
8485 to scikit-learn's `clone` function.
86+ n_jobs : int or None, optional (default=None)
87+ The number of CPUs to use to do the computation.
88+ ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
89+ ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
90+ for more details.
91+ pre_dispatch : int, or string, optional
92+ Controls the number of jobs that get dispatched during parallel
93+ execution. Reducing this number can be useful to avoid an
94+ explosion of memory consumption when more jobs get dispatched
95+ than CPUs can process. This parameter can be:
96+ - None, in which case all the jobs are immediately
97+ created and spawned. Use this for lightweight and
98+ fast-running jobs, to avoid delays due to on-demand
99+ spawning of the jobs
100+ - An int, giving the exact number of total jobs that are
101+ spawned
102+ - A string, giving an expression as a function of n_jobs,
103+ as in '2*n_jobs'
85104
86105 Attributes
87106 ----------
@@ -97,10 +116,10 @@ class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
97116
98117 """
99118 def __init__ (self , regressors , meta_regressor , cv = 5 ,
100- shuffle = True ,
119+ shuffle = True , n_jobs = 1 ,
101120 use_features_in_secondary = False ,
102121 store_train_meta_features = False ,
103- refit = True ):
122+ refit = True , pre_dispatch = None ):
104123
105124 self .regressors = regressors
106125 self .meta_regressor = meta_regressor
@@ -112,9 +131,11 @@ def __init__(self, regressors, meta_regressor, cv=5,
112131 _name_estimators ([meta_regressor ])}
113132 self .cv = cv
114133 self .shuffle = shuffle
134+ self .n_jobs = n_jobs
115135 self .use_features_in_secondary = use_features_in_secondary
116136 self .store_train_meta_features = store_train_meta_features
117137 self .refit = refit
138+ self .pre_dispatch = pre_dispatch
118139
119140 def fit (self , X , y , groups = None , sample_weight = None ):
120141 """ Fit ensemble regressors and the meta-regressor.
@@ -155,34 +176,23 @@ def fit(self, X, y, groups=None, sample_weight=None):
155176 # Override shuffle parameter in case of self generated
156177 # cross-validation strategy
157178 kfold .shuffle = self .shuffle
158-
159- meta_features = np .zeros ((X .shape [0 ], len (self .regressors )))
160-
161- #
162- # The outer loop iterates over the base-regressors. Each regressor
163- # is trained cv times and makes predictions, after which we train
164- # the meta-regressor on their combined results.
165179 #
166- for i , regr in enumerate (self .regressors ):
167- #
168- # In the inner loop, each model is trained cv times on the
169- # training-part of this fold of data; and the holdout-part of data
170- # is used for predictions. This is repeated cv times, so in
171- # the end we have predictions for each data point.
172- #
173- # Advantage of this complex approach is that data points we're
174- # predicting have not been trained on by the algorithm, so it's
175- # less susceptible to overfitting.
176- #
177- for train_idx , holdout_idx in kfold .split (X , y , groups ):
178- instance = clone (regr )
179- if sample_weight is None :
180- instance .fit (X [train_idx ], y [train_idx ])
181- else :
182- instance .fit (X [train_idx ], y [train_idx ],
183- sample_weight = sample_weight [train_idx ])
184- y_pred = instance .predict (X [holdout_idx ])
185- meta_features [holdout_idx , i ] = y_pred
180+ # The meta_features are collection of the prediction data,
181+ # in shape of [n_samples, len(self.regressors)]. Each column
182+ # corresponds to the result of `corss_val_predict` using every
183+ # base regressors.
184+ # Advantage of this complex approach is that data points we're
185+ # predicting have not been trained on by the algorithm, so it's
186+ # less susceptible to overfitting.
187+ if sample_weight is None :
188+ fit_params = None
189+ else :
190+ fit_params = dict (sample_weight = sample_weight )
191+ meta_features = np .column_stack ([cross_val_predict (
192+ regr , X , y , groups = groups , cv = kfold ,
193+ n_jobs = self .n_jobs , fit_params = fit_params ,
194+ pre_dispatch = self .pre_dispatch )
195+ for regr in self .regr_ ])
186196
187197 # save meta-features for training data
188198 if self .store_train_meta_features :
0 commit comments