Skip to content

Commit 75e886e

Browse files
author
Guillaume Lemaitre
committed
Add stopping criteria
1 parent f905274 commit 75e886e

File tree

3 files changed

+39
-6
lines changed

3 files changed

+39
-6
lines changed

doc/whats_new.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Bug fixes
1717

1818
- Fixed a bug in :class:`under_sampling.NearMiss` which was not picking the right samples during under sampling for the method 3. By `Guillaume Lemaitre`_.
1919
- Fixed a bug in :class:`ensemble.EasyEnsemble`, correction of the `random_state` generation. By `Guillaume Lemaitre`_ and `Christos Aridas`_.
20+
- Fixed a bug in :class:`under_sampling.AllKNN`, add stopping criteria to avoid that the minority class become a majority class or that a class disappear. By `Guillaume Lemaitre`_.
2021

2122
New features
2223
~~~~~~~~~~~~

imblearn/under_sampling/edited_nearest_neighbours.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -537,10 +537,42 @@ def _sample(self, X, y):
537537
# updating ENN size_ngh
538538
self.enn_.size_ngh = curr_size_ngh
539539
if self.return_indices:
540-
X_, y_, idx_ = self.enn_.fit_sample(X_, y_)
541-
idx_under = idx_under[idx_]
540+
X_enn, y_enn, idx_enn = self.enn_.fit_sample(X_, y_)
542541
else:
543-
X_, y_ = self.enn_.fit_sample(X_, y_)
542+
X_enn, y_enn = self.enn_.fit_sample(X_, y_)
543+
544+
# Check the stopping criterion
545+
# 1. If the number of samples in the other class become inferior to
546+
# the number of samples in the majority class
547+
# 2. If one of the class is disappearing
548+
# Case 1
549+
stats_enn = Counter(y_enn)
550+
self.logger.debug('Current ENN stats: %s', stats_enn)
551+
# Get the number of samples in the non-minority classes
552+
count_non_min = np.array([val for val, key
553+
in zip(stats_enn.itervalues(),
554+
stats_enn.iterkeys())
555+
if key != self.min_c_])
556+
self.logger.debug('Number of samples in the non-majority'
557+
' classes: %s', count_non_min)
558+
# Check the minority stop to be the minority
559+
b_min_bec_maj = np.any(count_non_min < self.stats_c_[self.min_c_])
560+
561+
# Case 2
562+
b_remove_maj_class = (len(stats_enn) < len(self.stats_c_))
563+
564+
if b_min_bec_maj or b_remove_maj_class:
565+
# Log the variables to explain the stop of the algorithm
566+
self.logger.debug('AllKNN minority become majority: %s',
567+
b_min_bec_maj)
568+
self.logger.debug('AllKNN remove one class: %s',
569+
b_remove_maj_class)
570+
break
571+
572+
# Update the data for the next iteration
573+
X_, y_, = X_enn, y_enn
574+
if self.return_indices:
575+
idx_under = idx_under[idx_enn]
544576

545577
self.logger.info('Under-sampling performed: %s', Counter(y_))
546578

imblearn/under_sampling/tests/test_allknn.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,6 @@ def test_multiclass_fit_sample():
155155

156156
# Check the size of y
157157
count_y_res = Counter(y_resampled)
158-
assert_equal(count_y_res[0], 341)
159-
assert_equal(count_y_res[1], 2485)
160-
assert_equal(count_y_res[2], 212)
158+
assert_equal(count_y_res[0], 400)
159+
assert_equal(count_y_res[1], 3600)
160+
assert_equal(count_y_res[2], 1000)

0 commit comments

Comments
 (0)