1- """
1+ """Python package for stacking (machine learning technique)
2+
3+ Find out how to use:
4+ >>>from vecstack import stacking
5+ >>>help(stacking)
6+
27MIT License
38
4- Vecstack. Python package for stacking (machine learning technique)
59Copyright (c) 2016 vecxoz
610Email: vecxoz@gmail.com
711
3741#-------------------------------------------------------------------------------
3842#-------------------------------------------------------------------------------
3943
40- def transformer (y , func = None ):
41- """
42- Used to transform target variable and prediction
43- """
44+ def transformer (y , func = None ):
45+ """Transforms target variable and prediction"""
4446 if func is None :
4547 return y
4648 else :
@@ -49,20 +51,20 @@ def transformer(y, func = None):
4951#-------------------------------------------------------------------------------
5052#-------------------------------------------------------------------------------
5153
52- def stacking (models , X_train , y_train , X_test , regression = True ,
53- transform_target = None , transform_pred = None ,
54- metric = None , n_folds = 4 , stratified = False ,
55- shuffle = False , random_state = 0 , verbose = 0 ):
54+ def stacking (models , X_train , y_train , X_test , regression = True ,
55+ transform_target = None , transform_pred = None ,
56+ metric = None , n_folds = 4 , stratified = False ,
57+ shuffle = False , random_state = 0 , verbose = 0 ):
5658 """Function 'stacking' takes train data, test data and list of 1-st level
57- models, and return stacking features, which can be used with 2-nd level model.
59+ models, and returns stacking features, which can be used with 2-nd level model.
5860
5961 Complete examples and stacking concept - see below.
6062
6163 Parameters
6264 ----------
6365 models : list
6466 List of 1-st level models. You can use any models that follow sklearn
65- convention i.e. have methods 'fit' and 'predict'.
67+ convention i.e. accept numpy arrays and have methods 'fit' and 'predict'.
6668
6769 X_train : numpy array or sparse matrix of shape [n_train_samples, n_features]
6870 Training data
@@ -81,33 +83,37 @@ def stacking(models, X_train, y_train, X_test, regression = True,
8183 Function to transform target variable.
8284 If None - transformation is not used.
8385 For example, for regression task (if target variable is skewed)
84- you can use transformation like numpy.log
86+ you can use transformation like numpy.log.
87+ Set transform_target = numpy.log
8588 Usually you want to use respective backward transformation
86- for prediction like numpy.exp. To do so set
87- transform_pred = numpy.exp
89+ for prediction like numpy.exp.
90+ Set transform_pred = numpy.exp
8891 Caution! Some transformations may give inapplicable results.
89- For example, if target variable contains zeros numpy.log
92+ For example, if target variable contains zeros, numpy.log
9093 gives you -inf. In such case you can use appropriate
9194 transformation like numpy.log1p and respective
9295 backward transformation like numpy.expm1
9396
9497 transform_pred : callable, default None
9598 Function to transform prediction.
9699 If None - transformation is not used.
97- If you use transformation for target variable
98- like numpy.log, then using transform_pred you can specify
99- respective backward transformation, like numpy.exp
100+ If you use transformation for target variable (transform_target)
101+ like numpy.log, then using transform_pred you can specify
102+ respective backward transformation like numpy.exp.
103+ Look at description of parameter transform_target
100104
101105 metric : callable, default None
102106 Evaluation metric (score function) which is used to calculate
103107 results of cross-validation.
104108 If None, then by default:
105- for regression - mean_absolute_error,
106- for classification - accuracy_score
107- You can use any sklearn metric or define your own metric like shown below:
109+ sklearn.metrics.mean_absolute_error - for regression
110+ sklearn.metrics.accuracy_score - for classification
111+ You can use any appropriate sklearn metric or
112+ define your own metric like shown below:
108113
109- def root_mean_square_error(y_true, y_pred):
110- return numpy.sqrt(numpy.mean(numpy.power(y_true - y_pred, 2)))
114+ def your_metric(y_true, y_pred):
115+ # calculate
116+ return result
111117
112118 n_folds : int, default 4
113119 Number of folds in cross-validation
@@ -123,9 +129,9 @@ def root_mean_square_error(y_true, y_pred):
123129
124130 verbose : int, default 0
125131 Level of verbosity.
126- 0 - show no messages,
127- 1 - show single score for each 1-st level model,
128- 2 - show score for each fold of each 1-st level model
132+ 0 - show no messages
133+ 1 - for each model show single mean score
134+ 2 - for each model show score for each fold and mean score
129135
130136 Caution. To calculate MEAN score across all folds
131137 full train set prediction and full true target are used.
@@ -140,16 +146,19 @@ def root_mean_square_error(y_true, y_pred):
140146 S_test : numpy array of shape [n_test_samples, n_models]
141147 Stacking features for test set
142148
143- Usage
144- -----
145- # For regression
146- S_train, S_test = stacking(models, X_train, y_train, X_test, verbose = 2)
147-
148- # For classification
149+ Brief example (complete examples - see below)
150+ ---------------------------------------------
151+ from vecstack import stacking
152+
153+ # Get your data
154+
155+ # Initialize 1-st level models
156+
157+ # Get your stacking features in a single line
149158 S_train, S_test = stacking(models, X_train, y_train, X_test,
150- regression = False , verbose = 2)
151-
152- Complete examples - see below.
159+ regression = True , verbose = 2)
160+
161+ # Use 2-nd level model with stacking features
153162
154163 Stacking concept
155164 ----------------
@@ -165,8 +174,8 @@ def root_mean_square_error(y_true, y_pred):
165174 You can find further stacking explanation with pictures at
166175 https://github.com/vecxoz/vecstack
167176
168- Examples
169- --------
177+ Complete examples
178+ -----------------
170179
171180 Regression
172181 ----------
@@ -292,16 +301,16 @@ def root_mean_square_error(y_true, y_pred):
292301 kf = KFold (len (y_train ), n_folds , shuffle = shuffle , random_state = random_state )
293302
294303 # Create empty numpy arrays for stacking features
295- S_train = np .zeros ((len ( X_train ) , len (models )))
296- S_test = np .zeros ((len ( X_test ) , len (models )))
304+ S_train = np .zeros ((X_train . shape [ 0 ] , len (models )))
305+ S_test = np .zeros ((X_test . shape [ 0 ] , len (models )))
297306
298307 # Loop across models
299308 for model_counter , model in enumerate (models ):
300309 if verbose > 0 :
301310 print ('model %d: [%s]' % (model_counter , model .__class__ .__name__ ))
302311
303312 # Create empty numpy array, which will contain temporary predictions for test set made in each fold
304- S_test_temp = np .zeros ((len ( X_test ) , len (kf )))
313+ S_test_temp = np .zeros ((X_test . shape [ 0 ] , len (kf )))
305314
306315 # Loop across folds
307316 for fold_counter , (tr_index , te_index ) in enumerate (kf ):
0 commit comments