Skip to content

Commit 72a16a7

Browse files
glemaitrechkoar
authored andcommitted
[WIP] Adding testing for pipeline (#1)
Add testing for pipeline
1 parent d2b6102 commit 72a16a7

File tree

7 files changed

+481
-82
lines changed

7 files changed

+481
-82
lines changed

unbalanced_dataset/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,4 @@
3434
'ensemble',
3535
'over_sampling',
3636
'under_sampling',
37-
'utils']
37+
'pipeline']

unbalanced_dataset/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from sklearn.base import BaseEstimator
1515
from sklearn.utils import check_X_y
16+
from sklearn.utils import check_array
1617
from sklearn.externals import six
1718

1819
from six import string_types

unbalanced_dataset/pipeline.py

Lines changed: 35 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# Adapted from
2-
# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/pipeline.py
3-
4-
"""
5-
The :mod:`unblanced_dataset.pipeline` module implements utilities to build a composite
6-
estimator, as a chain of transforms, samples and estimators.
1+
"""
2+
The :mod:`unbalanced_dataset.pipeline` module implements utilities to build
3+
a composite estimator, as a chain of transforms, samples and estimators.
74
"""
85

6+
# Adapted from
7+
# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/pipeline.py
8+
99
# Author: Edouard Duchesnay
1010
# Gael Varoquaux
1111
# Virgile Fritsch
@@ -14,6 +14,9 @@
1414
# chkoar
1515
# License: BSD
1616

17+
from __future__ import print_function
18+
from __future__ import division
19+
1720
from warnings import warn
1821

1922
from sklearn.externals import six
@@ -29,7 +32,7 @@ class Pipeline(pipeline.Pipeline):
2932
"""Pipeline of transforms and resamples with a final estimator.
3033
3134
Sequentially apply a list of transforms, samples and a final estimator.
32-
Intermediate steps of the pipeline must be transformers or resamplers,
35+
Intermediate steps of the pipeline must be transformers or resamplers,
3336
that is, they must implement fit, transform and sample methods.
3437
The final estimator only needs to implement fit.
3538
@@ -38,50 +41,19 @@ class Pipeline(pipeline.Pipeline):
3841
For this, it enables setting parameters of the various steps using their
3942
names and the parameter name separated by a '__', as in the example below.
4043
41-
Read more in the :ref:`User Guide <pipeline>`.
42-
4344
Parameters
4445
----------
4546
steps : list
46-
List of (name, transform) tuples (implementing fit/transform/fit_sample) that are
47-
chained, in the order in which they are chained, with the last object
48-
an estimator.
47+
List of (name, transform) tuples (implementing
48+
fit/transform/fit_sample) that are chained, in the order in which they
49+
are chained, with the last object an estimator.
4950
5051
Attributes
5152
----------
5253
named_steps : dict
5354
Read-only attribute to access any step parameter by user given name.
5455
Keys are step names and values are steps parameters.
5556
56-
Examples
57-
--------
58-
>>> from sklearn import svm
59-
>>> from sklearn.datasets import samples_generator
60-
>>> from sklearn.feature_selection import SelectKBest
61-
>>> from sklearn.feature_selection import f_regression
62-
>>> from sklearn.pipeline import Pipeline
63-
>>> # generate some data to play with
64-
>>> X, y = samples_generator.make_classification(
65-
... n_informative=5, n_redundant=0, random_state=42)
66-
>>> # ANOVA SVM-C
67-
>>> anova_filter = SelectKBest(f_regression, k=5)
68-
>>> clf = svm.SVC(kernel='linear')
69-
>>> anova_svm = Pipeline([('anova', anova_filter), ('svc', clf)])
70-
>>> # You can set the parameters using the names issued
71-
>>> # For instance, fit using a k of 10 in the SelectKBest
72-
>>> # and a parameter 'C' of the svm
73-
>>> anova_svm.set_params(anova__k=10, svc__C=.1).fit(X, y)
74-
... # doctest: +ELLIPSIS
75-
Pipeline(steps=[...])
76-
>>> prediction = anova_svm.predict(X)
77-
>>> anova_svm.score(X, y) # doctest: +ELLIPSIS
78-
0.77...
79-
>>> # getting the selected features chosen by anova_filter
80-
>>> anova_svm.named_steps['anova'].get_support()
81-
... # doctest: +NORMALIZE_WHITESPACE
82-
array([ True, True, True, False, False, True, False, True, True, True,
83-
False, False, True, False, True, False, False, False, False,
84-
True], dtype=bool)
8557
"""
8658

8759
# BaseEstimator interface
@@ -99,8 +71,8 @@ def __init__(self, steps):
9971

10072
for t in transforms:
10173
if (not (hasattr(t, "fit") or hasattr(t, "fit_transform") or
102-
hasattr(t, "fit_sample")) or not (hasattr(t, "transform")
103-
or hasattr(t, "sample"))):
74+
hasattr(t, "fit_sample")) or
75+
not (hasattr(t, "transform") or hasattr(t, "sample"))):
10476
raise TypeError("All intermediate steps of the chain should "
10577
"be transforms and implement fit and transform"
10678
" '%s' (type %s) doesn't)" % (t, type(t)))
@@ -130,7 +102,7 @@ def _pre_transform(self, X, y=None, **fit_params):
130102
return Xt, yt, fit_params_steps[self.steps[-1][0]]
131103

132104
def fit(self, X, y=None, **fit_params):
133-
"""Fit all the transforms and samples one after the other and transform
105+
"""Fit all the transforms and samples one after the other and transform
134106
the data, then fit the transformed data using the final estimator.
135107
136108
Parameters
@@ -147,8 +119,8 @@ def fit(self, X, y=None, **fit_params):
147119
return self
148120

149121
def fit_transform(self, X, y=None, **fit_params):
150-
"""Fit all the transforms and samples one after the other and
151-
transform or sample the data, then use fit_transform on
122+
"""Fit all the transforms and samples one after the other and
123+
transform or sample the data, then use fit_transform on
152124
transformed data using the final estimator.
153125
154126
Parameters
@@ -169,8 +141,8 @@ def fit_transform(self, X, y=None, **fit_params):
169141

170142
@if_delegate_has_method(delegate='_final_estimator')
171143
def fit_sample(self, X, y=None, **fit_params):
172-
"""Fit all the transforms and samples one after the other and
173-
transform or sample the data, then use fit_sample on
144+
"""Fit all the transforms and samples one after the other and
145+
transform or sample the data, then use fit_sample on
174146
transformed data using the final estimator.
175147
176148
Parameters
@@ -188,8 +160,8 @@ def fit_sample(self, X, y=None, **fit_params):
188160

189161
@if_delegate_has_method(delegate='_final_estimator')
190162
def sample(self, X, y):
191-
"""Applies transforms to the data, and the sample method of
192-
the final estimator. Valid only if the final estimator
163+
"""Applies transforms to the data, and the sample method of
164+
the final estimator. Valid only if the final estimator
193165
implements predict.
194166
195167
Parameters
@@ -199,7 +171,7 @@ def sample(self, X, y):
199171
of the pipeline.
200172
"""
201173
Xt = X
202-
for name, transform in self.steps[:-1]:
174+
for _, transform in self.steps[:-1]:
203175
if hasattr(transform, "fit_sample"):
204176
pass
205177
else:
@@ -208,8 +180,8 @@ def sample(self, X, y):
208180

209181
@if_delegate_has_method(delegate='_final_estimator')
210182
def predict(self, X):
211-
"""Applies transforms to the data, and the predict method of
212-
the final estimator. Valid only if the final estimator
183+
"""Applies transforms to the data, and the predict method of
184+
the final estimator. Valid only if the final estimator
213185
implements predict.
214186
215187
Parameters
@@ -219,7 +191,7 @@ def predict(self, X):
219191
of the pipeline.
220192
"""
221193
Xt = X
222-
for name, transform in self.steps[:-1]:
194+
for _, transform in self.steps[:-1]:
223195
if hasattr(transform, "fit_sample"):
224196
pass
225197
else:
@@ -231,8 +203,8 @@ def fit_predict(self, X, y=None, **fit_params):
231203
"""Applies fit_predict of last step in pipeline after transforms
232204
and samples.
233205
234-
Applies fit_transforms or fit_samples of a pipeline to the data,
235-
followed by the fit_predict method of the final estimator in the
206+
Applies fit_transforms or fit_samples of a pipeline to the data,
207+
followed by the fit_predict method of the final estimator in the
236208
pipeline. Valid only if the final estimator implements fit_predict.
237209
238210
Parameters
@@ -260,7 +232,7 @@ def predict_proba(self, X):
260232
of the pipeline.
261233
"""
262234
Xt = X
263-
for name, transform in self.steps[:-1]:
235+
for _, transform in self.steps[:-1]:
264236
if hasattr(transform, "fit_sample"):
265237
pass
266238
else:
@@ -280,7 +252,7 @@ def decision_function(self, X):
280252
of the pipeline.
281253
"""
282254
Xt = X
283-
for name, transform in self.steps[:-1]:
255+
for _, transform in self.steps[:-1]:
284256
if hasattr(transform, "fit_sample"):
285257
pass
286258
else:
@@ -300,7 +272,7 @@ def predict_log_proba(self, X):
300272
of the pipeline.
301273
"""
302274
Xt = X
303-
for name, transform in self.steps[:-1]:
275+
for _, transform in self.steps[:-1]:
304276
if hasattr(transform, "fit_sample"):
305277
pass
306278
else:
@@ -320,7 +292,7 @@ def transform(self, X):
320292
of the pipeline.
321293
"""
322294
Xt = X
323-
for name, transform in self.steps:
295+
for _, transform in self.steps:
324296
if hasattr(transform, "fit_sample"):
325297
pass
326298
else:
@@ -345,8 +317,8 @@ def inverse_transform(self, X):
345317
" pipeline.inverse_transform any more.", FutureWarning)
346318
X = X[None, :]
347319
Xt = X
348-
for name, step in self.steps[::-1]:
349-
if hasattr(transform, "fit_sample"):
320+
for _, step in self.steps[::-1]:
321+
if hasattr(step, "fit_sample"):
350322
pass
351323
else:
352324
Xt = step.inverse_transform(Xt)
@@ -369,13 +341,11 @@ def score(self, X, y=None):
369341
steps of the pipeline.
370342
"""
371343
Xt = X
372-
for name, transform in self.steps[:-1]:
344+
for _, transform in self.steps[:-1]:
373345
if hasattr(transform, "fit_sample"):
374346
pass
375347
else:
376-
print Xt.shape
377348
Xt = transform.transform(Xt)
378-
print Xt.shape
379349
return self.steps[-1][-1].score(Xt, y)
380350

381351

@@ -386,15 +356,6 @@ def make_pipeline(*steps):
386356
does not permit, naming the estimators. Instead, their names will be set
387357
to the lowercase of their types automatically.
388358
389-
Examples
390-
--------
391-
>>> from sklearn.naive_bayes import GaussianNB
392-
>>> from sklearn.preprocessing import StandardScaler
393-
>>> make_pipeline(StandardScaler(), GaussianNB(priors=None)) # doctest: +NORMALIZE_WHITESPACE
394-
Pipeline(steps=[('standardscaler',
395-
StandardScaler(copy=True, with_mean=True, with_std=True)),
396-
('gaussiannb', GaussianNB(priors=None))])
397-
398359
Returns
399360
-------
400361
p : Pipeline

unbalanced_dataset/setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ def configuration(parent_package='', top_path=None):
1111
config.add_subpackage('over_sampling/tests')
1212
config.add_subpackage('under_sampling')
1313
config.add_subpackage('under_sampling/tests')
14-
config.add_subpackage('utils')
15-
config.add_subpackage('utils/tests')
14+
15+
config.add_subpackage('tests')
1616

1717
return config
1818

unbalanced_dataset/tests/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)