Skip to content

Commit 6f3c6fa

Browse files
committed
example RENN added
1 parent 49cf84e commit 6f3c6fa

File tree

3 files changed

+92
-14
lines changed

3 files changed

+92
-14
lines changed
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""
2+
=========================
3+
Repeated Edited nearest-neighbours
4+
=========================
5+
6+
An illustration of the repeated edited nearest-neighbours method.
7+
8+
"""
9+
10+
print(__doc__)
11+
12+
import matplotlib.pyplot as plt
13+
import seaborn as sns
14+
sns.set()
15+
16+
# Define some color for the plotting
17+
almost_black = '#262626'
18+
palette = sns.color_palette()
19+
20+
from sklearn.datasets import make_classification
21+
from sklearn.decomposition import PCA
22+
23+
from unbalanced_dataset.under_sampling import EditedNearestNeighbours, \
24+
RepeatedEditedNearestNeighbours
25+
26+
# Generate the dataset
27+
X, y = make_classification(n_classes=2, class_sep=1.25, weights=[0.3, 0.7],
28+
n_informative=3, n_redundant=1, flip_y=0,
29+
n_features=5, n_clusters_per_class=1,
30+
n_samples=5000, random_state=10)
31+
32+
# Instanciate a PCA object for the sake of easy visualisation
33+
pca = PCA(n_components=2)
34+
# Fit and transform x to visualise inside a 2D feature space
35+
X_vis = pca.fit_transform(X)
36+
37+
# Three subplots, unpack the axes array immediately
38+
f, (ax1, ax2, ax3) = plt.subplots(1, 3)
39+
40+
ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=.5,
41+
edgecolor=almost_black, facecolor=palette[0], linewidth=0.15)
42+
ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=.5,
43+
edgecolor=almost_black, facecolor=palette[2], linewidth=0.15)
44+
ax1.set_title('Original set')
45+
46+
# Apply the ENN
47+
print('ENN')
48+
enn = EditedNearestNeighbours()
49+
X_resampled, y_resampled = enn.fit_transform(X, y)
50+
X_res_vis = pca.transform(X_resampled)
51+
52+
ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
53+
label="Class #0", alpha=.5, edgecolor=almost_black,
54+
facecolor=palette[0], linewidth=0.15)
55+
ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],
56+
label="Class #1", alpha=.5, edgecolor=almost_black,
57+
facecolor=palette[2], linewidth=0.15)
58+
ax2.set_title('Edited nearest neighbours')
59+
60+
# Apply the RENN
61+
print('RENN')
62+
renn = RepeatedEditedNearestNeighbours()
63+
X_resampled, y_resampled = renn.fit_transform(X, y)
64+
X_res_vis = pca.transform(X_resampled)
65+
66+
ax3.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
67+
label="Class #0", alpha=.5, edgecolor=almost_black,
68+
facecolor=palette[0], linewidth=0.15)
69+
ax3.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],
70+
label="Class #1", alpha=.5, edgecolor=almost_black,
71+
facecolor=palette[2], linewidth=0.15)
72+
ax3.set_title('Repeated Edited nearest neighbours')
73+
74+
plt.show()

unbalanced_dataset/under_sampling/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .one_sided_selection import OneSidedSelection
1313
from .neighbourhood_cleaning_rule import NeighbourhoodCleaningRule
1414
from .edited_nearest_neighbours import EditedNearestNeighbours
15+
from .edited_nearest_neighbours import RepeatedEditedNearestNeighbours
1516
from .instance_hardness_threshold import InstanceHardnessThreshold
1617

1718
__all__ = ['UnderSampler',
@@ -23,4 +24,5 @@
2324
'OneSidedSelection',
2425
'NeighbourhoodCleaningRule',
2526
'EditedNearestNeighbours',
27+
'RepeatedEditedNearestNeighbours',
2628
'InstanceHardnessThreshold']

unbalanced_dataset/under_sampling/edited_nearest_neighbours.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ class RepeatedEditedNearestNeighbours(UnderSampler):
324324
"""
325325

326326
def __init__(self, return_indices=False, random_state=None, verbose=True,
327-
size_ngh=3, max_iter=None, kind_sel='all', n_jobs=-1):
327+
size_ngh=3, max_iter=100, kind_sel='all', n_jobs=-1):
328328
"""Initialisation of RENN object.
329329
330330
Parameters
@@ -366,7 +366,7 @@ def __init__(self, return_indices=False, random_state=None, verbose=True,
366366
super(RepeatedEditedNearestNeighbours, self).__init__(
367367
return_indices=return_indices,
368368
random_state=random_state,
369-
verbose=False)
369+
verbose=verbose)
370370

371371
self.size_ngh = size_ngh
372372
possible_kind_sel = ('all', 'mode')
@@ -435,27 +435,29 @@ def transform(self, X, y):
435435
# Check the consistency of X and y
436436
X, y = check_X_y(X, y)
437437

438-
X_resampled, y_resampled = X.copy(), y.copy()
439-
len_ = y.shape[0]
440-
current_len_ = len_ - 1
438+
X_, y_ = X, y
441439

442440
if self.return_indices:
443-
idx_under = np.arange(len_, dtype=int)
441+
idx_under = np.arange(len(X.shape[0]), dtype=int)
442+
443+
prev_len = y.shape[0]
444444

445-
n_iter = 0
446-
while current_len_ < len_ and
447-
(self.max_iter is None or n_iter < self.max_iter):
445+
for n_iter in range(self.max_iter):
446+
prev_len = y_.shape[0]
448447
if self.return_indices:
449-
X_resampled, y_resampled, idx_ = self.enn_.transform(X_resampled, y_resampled)
448+
X_, y_, idx_ = self.enn_.transform(X_, y_)
450449
idx_under = idx_under[idx_]
451450
else:
452-
X_resampled, y_resampled = self.enn_.transform(X_resampled, y_resampled)
451+
X_, y_ = self.enn_.transform(X_, y_)
453452

454-
n_iter += 1
455-
len_, current_len_ = current_len_, y_resampled.shape[0]
453+
if prev_len == y_.shape[0]:
454+
break
456455

457456
if self.verbose:
458-
print("Under-sampling performed: {}".format(Counter(y_resampled)))
457+
print("Under-sampling performed: {}".format(Counter(y_)))
458+
459+
#X_resampled, y_resampled = X_.copy(), y_.copy()
460+
X_resampled, y_resampled = X_, y_
459461

460462
# Check if the indices of the samples selected should be returned too
461463
if self.return_indices:

0 commit comments

Comments
 (0)