Skip to content

Commit 47e3507

Browse files
authored
Merge pull request #80 from chkoar/pipeline
[WIP] Modify Pipeline object to conform the current API of samplers
2 parents 49753dd + 72a16a7 commit 47e3507

File tree

11 files changed

+921
-74
lines changed

11 files changed

+921
-74
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
=========================
3+
Pipeline Object
4+
=========================
5+
An example of the Pipeline object working with transformers and resamplers.
6+
"""
7+
8+
print(__doc__)
9+
10+
from sklearn.cross_validation import train_test_split as tts
11+
from sklearn.datasets import make_classification
12+
from sklearn.decomposition import PCA
13+
from sklearn.neighbors import KNeighborsClassifier as KNN
14+
from sklearn.metrics import classification_report
15+
16+
17+
from unbalanced_dataset.pipeline import make_pipeline
18+
from unbalanced_dataset.under_sampling import EditedNearestNeighbours, \
19+
RepeatedEditedNearestNeighbours
20+
21+
# Generate the dataset
22+
X, y = make_classification(n_classes=2, class_sep=1.25, weights=[0.3, 0.7],
23+
n_informative=3, n_redundant=1, flip_y=0,
24+
n_features=5, n_clusters_per_class=1,
25+
n_samples=5000, random_state=10)
26+
27+
# Instanciate a PCA object for the sake of easy visualisation
28+
pca = PCA(n_components=2)
29+
30+
# Create the samplers
31+
enn = EditedNearestNeighbours()
32+
renn = RepeatedEditedNearestNeighbours()
33+
34+
# Create teh classifier
35+
knn = KNN(1)
36+
37+
38+
# Make the splits
39+
X_train, X_test, y_train, y_test = tts(X, y, random_state=42)
40+
41+
# Add one transformers and two samplers in the pipeline object
42+
pipeline = make_pipeline(pca, enn, renn, knn)
43+
44+
pipeline.fit(X_train, y_train)
45+
y_hat = pipeline.predict(X_test)
46+
47+
print(classification_report(y_test, y_hat))
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
=========================
3+
Repeated Edited nearest-neighbours
4+
=========================
5+
6+
An illustration of the edited nearest-neighbours and repeated
7+
edited nearest-neighbours method combined in a pipeline object.
8+
9+
"""
10+
11+
print(__doc__)
12+
13+
import matplotlib.pyplot as plt
14+
import seaborn as sns
15+
sns.set()
16+
17+
# Define some color for the plotting
18+
almost_black = '#262626'
19+
palette = sns.color_palette()
20+
21+
from sklearn.datasets import make_classification
22+
from sklearn.decomposition import PCA
23+
24+
from unbalanced_dataset.under_sampling import EditedNearestNeighbours, \
25+
RepeatedEditedNearestNeighbours
26+
from unbalanced_dataset.pipeline import make_pipeline
27+
28+
# Generate the dataset
29+
X, y = make_classification(n_classes=2, class_sep=1.25, weights=[0.3, 0.7],
30+
n_informative=3, n_redundant=1, flip_y=0,
31+
n_features=5, n_clusters_per_class=1,
32+
n_samples=5000, random_state=10)
33+
34+
35+
# Fit and transform x to visualise inside a 2D feature space
36+
pca = PCA(n_components=2)
37+
X_vis = pca.fit_transform(X)
38+
39+
# Three subplots, unpack the axes array immediately
40+
f, (ax1, ax3) = plt.subplots(1, 2)
41+
42+
ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=.5,
43+
edgecolor=almost_black, facecolor=palette[0], linewidth=0.15)
44+
ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=.5,
45+
edgecolor=almost_black, facecolor=palette[2], linewidth=0.15)
46+
ax1.set_title('Original set')
47+
48+
# Create the samplers
49+
enn = EditedNearestNeighbours()
50+
renn = RepeatedEditedNearestNeighbours()
51+
52+
# Add the samplers in the pipeline
53+
pipeline = make_pipeline(enn, renn)
54+
X_resampled, y_resampled = pipeline.fit_sample(X, y)
55+
X_res_vis = pca.transform(X_resampled)
56+
57+
ax3.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
58+
label="Class #0", alpha=.5, edgecolor=almost_black,
59+
facecolor=palette[0], linewidth=0.15)
60+
ax3.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],
61+
label="Class #1", alpha=.5, edgecolor=almost_black,
62+
facecolor=palette[2], linewidth=0.15)
63+
ax3.set_title('RENN + ENN ')
64+
65+
plt.show()

unbalanced_dataset/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,4 @@
3434
'ensemble',
3535
'over_sampling',
3636
'under_sampling',
37-
'utils']
37+
'pipeline']

unbalanced_dataset/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from sklearn.base import BaseEstimator
1515
from sklearn.utils import check_X_y
16+
from sklearn.utils import check_array
1617
from sklearn.externals import six
1718

1819
from six import string_types

0 commit comments

Comments
 (0)