Skip to content

Commit 7ac94b8

Browse files
author
Guillaume Lemaitre
committed
Update the RENN with test and doc
1 parent e9b5a81 commit 7ac94b8

File tree

9 files changed

+126
-6
lines changed

9 files changed

+126
-6
lines changed

doc/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Classes
2222
unbalanced_dataset.under_sampling.ClusterCentroids
2323
unbalanced_dataset.under_sampling.CondensedNearestNeighbour
2424
unbalanced_dataset.under_sampling.EditedNearestNeighbours
25+
unbalanced_dataset.under_sampling.RepeatedEditedNearestNeighbours
2526
unbalanced_dataset.under_sampling.InstanceHardnessThreshold
2627
unbalanced_dataset.under_sampling.NearMiss
2728
unbalanced_dataset.under_sampling.NeighbourhoodCleaningRule

unbalanced_dataset/under_sampling/edited_nearest_neighbours.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def transform(self, X, y):
254254

255255

256256
class RepeatedEditedNearestNeighbours(UnderSampler):
257-
"""Class to perform under-sampling based on the repeated edited nearest
257+
"""Class to perform under-sampling based on the repeated edited nearest
258258
neighbour method.
259259
260260
Parameters
@@ -317,8 +317,8 @@ class RepeatedEditedNearestNeighbours(UnderSampler):
317317
318318
References
319319
----------
320-
.. [1] I. Tomek, An Experiment with the Edited Nearest-Neighbor
321-
Rule, IEEE Trans. Systems, Man, and Cybernetics, vol. 6, no. 6,
320+
.. [1] I. Tomek, "An Experiment with the Edited Nearest-Neighbor
321+
Rule," IEEE Transactions on Systems, Man, and Cybernetics, vol. 6(6),
322322
pp. 448-452, June 1976.
323323
324324
"""
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

unbalanced_dataset/under_sampling/tests/test_edited_nearest_neighbours.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Test the module condensed nearest neighbour."""
1+
"""Test the module edited nearest neighbour."""
22
from __future__ import print_function
33

44
import os
@@ -110,8 +110,6 @@ def test_enn_fit_transform_mode():
110110
X_resampled, y_resampled = enn.fit_transform(X, Y)
111111

112112
currdir = os.path.dirname(os.path.abspath(__file__))
113-
np.save(os.path.join(currdir, 'data', 'enn_x_mode.npy'), X_resampled)
114-
np.save(os.path.join(currdir, 'data', 'enn_y_mode.npy'), y_resampled)
115113
X_gt = np.load(os.path.join(currdir, 'data', 'enn_x_mode.npy'))
116114
y_gt = np.load(os.path.join(currdir, 'data', 'enn_y_mode.npy'))
117115
assert_array_equal(X_resampled, X_gt)
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
"""Test the module repeated edited nearest neighbour."""
2+
from __future__ import print_function
3+
4+
import os
5+
6+
import numpy as np
7+
from numpy.testing import assert_raises
8+
from numpy.testing import assert_equal
9+
from numpy.testing import assert_array_equal
10+
from numpy.testing import assert_warns
11+
12+
from sklearn.datasets import make_classification
13+
from sklearn.utils.estimator_checks import check_estimator
14+
15+
from unbalanced_dataset.under_sampling import RepeatedEditedNearestNeighbours
16+
17+
# Generate a global dataset to use
18+
RND_SEED = 0
19+
X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
20+
n_informative=3, n_redundant=1, flip_y=0,
21+
n_features=20, n_clusters_per_class=1,
22+
n_samples=5000, random_state=RND_SEED)
23+
24+
25+
def test_renn_init():
26+
"""Test the initialisation of the object"""
27+
28+
# Define a ratio
29+
verbose = True
30+
renn = RepeatedEditedNearestNeighbours(random_state=RND_SEED,
31+
verbose=verbose)
32+
33+
assert_equal(renn.size_ngh, 3)
34+
assert_equal(renn.kind_sel, 'all')
35+
assert_equal(renn.n_jobs, -1)
36+
assert_equal(renn.rs_, RND_SEED)
37+
assert_equal(renn.verbose, verbose)
38+
assert_equal(renn.min_c_, None)
39+
assert_equal(renn.maj_c_, None)
40+
assert_equal(renn.stats_c_, {})
41+
42+
43+
def test_renn_fit_single_class():
44+
"""Test either if an error when there is a single class"""
45+
46+
# Create the object
47+
renn = RepeatedEditedNearestNeighbours(random_state=RND_SEED)
48+
# Resample the data
49+
# Create a wrong y
50+
y_single_class = np.zeros((X.shape[0], ))
51+
assert_raises(RuntimeError, renn.fit, X, y_single_class)
52+
53+
54+
def test_renn_fit():
55+
"""Test the fitting method"""
56+
57+
# Create the object
58+
renn = RepeatedEditedNearestNeighbours(random_state=RND_SEED)
59+
# Fit the data
60+
renn.fit(X, Y)
61+
62+
# Check if the data information have been computed
63+
assert_equal(renn.min_c_, 0)
64+
assert_equal(renn.maj_c_, 1)
65+
assert_equal(renn.stats_c_[0], 500)
66+
assert_equal(renn.stats_c_[1], 4500)
67+
68+
69+
def test_renn_transform_wt_fit():
70+
"""Test either if an error is raised when transform is called before
71+
fitting"""
72+
73+
# Create the object
74+
renn = RepeatedEditedNearestNeighbours(random_state=RND_SEED)
75+
assert_raises(RuntimeError, renn.transform, X, Y)
76+
77+
78+
def test_renn_fit_transform():
79+
"""Test the fit transform routine"""
80+
81+
# Resample the data
82+
renn = RepeatedEditedNearestNeighbours(random_state=RND_SEED)
83+
X_resampled, y_resampled = renn.fit_transform(X, Y)
84+
85+
currdir = os.path.dirname(os.path.abspath(__file__))
86+
X_gt = np.load(os.path.join(currdir, 'data', 'renn_x.npy'))
87+
y_gt = np.load(os.path.join(currdir, 'data', 'renn_y.npy'))
88+
assert_array_equal(X_resampled, X_gt)
89+
assert_array_equal(y_resampled, y_gt)
90+
91+
92+
def test_renn_fit_transform_with_indices():
93+
"""Test the fit transform routine with indices support"""
94+
95+
# Resample the data
96+
renn = RepeatedEditedNearestNeighbours(return_indices=True,
97+
random_state=RND_SEED)
98+
X_resampled, y_resampled, idx_under = renn.fit_transform(X, Y)
99+
100+
currdir = os.path.dirname(os.path.abspath(__file__))
101+
X_gt = np.load(os.path.join(currdir, 'data', 'renn_x.npy'))
102+
y_gt = np.load(os.path.join(currdir, 'data', 'renn_y.npy'))
103+
idx_gt = np.load(os.path.join(currdir, 'data', 'renn_idx.npy'))
104+
assert_array_equal(X_resampled, X_gt)
105+
assert_array_equal(y_resampled, y_gt)
106+
assert_array_equal(idx_under, idx_gt)
107+
108+
109+
def test_renn_fit_transform_mode():
110+
"""Test the fit transform routine using the mode as selection"""
111+
112+
# Resample the data
113+
renn = RepeatedEditedNearestNeighbours(random_state=RND_SEED,
114+
kind_sel='mode')
115+
X_resampled, y_resampled = renn.fit_transform(X, Y)
116+
117+
currdir = os.path.dirname(os.path.abspath(__file__))
118+
X_gt = np.load(os.path.join(currdir, 'data', 'renn_x_mode.npy'))
119+
y_gt = np.load(os.path.join(currdir, 'data', 'renn_y_mode.npy'))
120+
assert_array_equal(X_resampled, X_gt)
121+
assert_array_equal(y_resampled, y_gt)

0 commit comments

Comments
 (0)