Skip to content

Commit d74200d

Browse files
authored
Update core.py
1 parent 4990af5 commit d74200d

File tree

1 file changed

+50
-41
lines changed

1 file changed

+50
-41
lines changed

vecstack/core.py

Lines changed: 50 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1-
"""
1+
"""Python package for stacking (machine learning technique)
2+
3+
Find out how to use:
4+
>>>from vecstack import stacking
5+
>>>help(stacking)
6+
27
MIT License
38
4-
Vecstack. Python package for stacking (machine learning technique)
59
Copyright (c) 2016 vecxoz
610
Email: vecxoz@gmail.com
711
@@ -37,10 +41,8 @@
3741
#-------------------------------------------------------------------------------
3842
#-------------------------------------------------------------------------------
3943

40-
def transformer(y, func = None):
41-
"""
42-
Used to transform target variable and prediction
43-
"""
44+
def transformer(y, func=None):
45+
"""Transforms target variable and prediction"""
4446
if func is None:
4547
return y
4648
else:
@@ -49,20 +51,20 @@ def transformer(y, func = None):
4951
#-------------------------------------------------------------------------------
5052
#-------------------------------------------------------------------------------
5153

52-
def stacking(models, X_train, y_train, X_test, regression = True,
53-
transform_target = None, transform_pred = None,
54-
metric = None, n_folds = 4, stratified = False,
55-
shuffle = False, random_state = 0, verbose = 0):
54+
def stacking(models, X_train, y_train, X_test, regression=True,
55+
transform_target=None, transform_pred=None,
56+
metric=None, n_folds=4, stratified=False,
57+
shuffle=False, random_state=0, verbose=0):
5658
"""Function 'stacking' takes train data, test data and list of 1-st level
57-
models, and return stacking features, which can be used with 2-nd level model.
59+
models, and returns stacking features, which can be used with 2-nd level model.
5860
5961
Complete examples and stacking concept - see below.
6062
6163
Parameters
6264
----------
6365
models : list
6466
List of 1-st level models. You can use any models that follow sklearn
65-
convention i.e. have methods 'fit' and 'predict'.
67+
convention i.e. accept numpy arrays and have methods 'fit' and 'predict'.
6668
6769
X_train : numpy array or sparse matrix of shape [n_train_samples, n_features]
6870
Training data
@@ -81,33 +83,37 @@ def stacking(models, X_train, y_train, X_test, regression = True,
8183
Function to transform target variable.
8284
If None - transformation is not used.
8385
For example, for regression task (if target variable is skewed)
84-
you can use transformation like numpy.log
86+
you can use transformation like numpy.log.
87+
Set transform_target = numpy.log
8588
Usually you want to use respective backward transformation
86-
for prediction like numpy.exp. To do so set
87-
transform_pred = numpy.exp
89+
for prediction like numpy.exp.
90+
Set transform_pred = numpy.exp
8891
Caution! Some transformations may give inapplicable results.
89-
For example, if target variable contains zeros numpy.log
92+
For example, if target variable contains zeros, numpy.log
9093
gives you -inf. In such case you can use appropriate
9194
transformation like numpy.log1p and respective
9295
backward transformation like numpy.expm1
9396
9497
transform_pred : callable, default None
9598
Function to transform prediction.
9699
If None - transformation is not used.
97-
If you use transformation for target variable
98-
like numpy.log, then using transform_pred you can specify
99-
respective backward transformation, like numpy.exp
100+
If you use transformation for target variable (transform_target)
101+
like numpy.log, then using transform_pred you can specify
102+
respective backward transformation like numpy.exp.
103+
Look at description of parameter transform_target
100104
101105
metric : callable, default None
102106
Evaluation metric (score function) which is used to calculate
103107
results of cross-validation.
104108
If None, then by default:
105-
for regression - mean_absolute_error,
106-
for classification - accuracy_score
107-
You can use any sklearn metric or define your own metric like shown below:
109+
sklearn.metrics.mean_absolute_error - for regression
110+
sklearn.metrics.accuracy_score - for classification
111+
You can use any appropriate sklearn metric or
112+
define your own metric like shown below:
108113
109-
def root_mean_square_error(y_true, y_pred):
110-
return numpy.sqrt(numpy.mean(numpy.power(y_true - y_pred, 2)))
114+
def your_metric(y_true, y_pred):
115+
# calculate
116+
return result
111117
112118
n_folds : int, default 4
113119
Number of folds in cross-validation
@@ -123,9 +129,9 @@ def root_mean_square_error(y_true, y_pred):
123129
124130
verbose : int, default 0
125131
Level of verbosity.
126-
0 - show no messages,
127-
1 - show single score for each 1-st level model,
128-
2 - show score for each fold of each 1-st level model
132+
0 - show no messages
133+
1 - for each model show single mean score
134+
2 - for each model show score for each fold and mean score
129135
130136
Caution. To calculate MEAN score across all folds
131137
full train set prediction and full true target are used.
@@ -140,16 +146,19 @@ def root_mean_square_error(y_true, y_pred):
140146
S_test : numpy array of shape [n_test_samples, n_models]
141147
Stacking features for test set
142148
143-
Usage
144-
-----
145-
# For regression
146-
S_train, S_test = stacking(models, X_train, y_train, X_test, verbose = 2)
147-
148-
# For classification
149+
Brief example (complete examples - see below)
150+
---------------------------------------------
151+
from vecstack import stacking
152+
153+
# Get your data
154+
155+
# Initialize 1-st level models
156+
157+
# Get your stacking features in a single line
149158
S_train, S_test = stacking(models, X_train, y_train, X_test,
150-
regression = False, verbose = 2)
151-
152-
Complete examples - see below.
159+
regression = True, verbose = 2)
160+
161+
# Use 2-nd level model with stacking features
153162
154163
Stacking concept
155164
----------------
@@ -165,8 +174,8 @@ def root_mean_square_error(y_true, y_pred):
165174
You can find further stacking explanation with pictures at
166175
https://github.com/vecxoz/vecstack
167176
168-
Examples
169-
--------
177+
Complete examples
178+
-----------------
170179
171180
Regression
172181
----------
@@ -292,16 +301,16 @@ def root_mean_square_error(y_true, y_pred):
292301
kf = KFold(len(y_train), n_folds, shuffle = shuffle, random_state = random_state)
293302

294303
# Create empty numpy arrays for stacking features
295-
S_train = np.zeros((len(X_train), len(models)))
296-
S_test = np.zeros((len(X_test), len(models)))
304+
S_train = np.zeros((X_train.shape[0], len(models)))
305+
S_test = np.zeros((X_test.shape[0], len(models)))
297306

298307
# Loop across models
299308
for model_counter, model in enumerate(models):
300309
if verbose > 0:
301310
print('model %d: [%s]' % (model_counter, model.__class__.__name__))
302311

303312
# Create empty numpy array, which will contain temporary predictions for test set made in each fold
304-
S_test_temp = np.zeros((len(X_test), len(kf)))
313+
S_test_temp = np.zeros((X_test.shape[0], len(kf)))
305314

306315
# Loop across folds
307316
for fold_counter, (tr_index, te_index) in enumerate(kf):

0 commit comments

Comments
 (0)