Skip to content

Commit a5379ab

Browse files
author
Guillaume Lemaitre
committed
Merge branch 'issue_130'
Conflicts: doc/whats_new.rst
2 parents 7721f07 + 75e886e commit a5379ab

File tree

3 files changed

+39
-6
lines changed

3 files changed

+39
-6
lines changed

doc/whats_new.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Bug fixes
1818
- Fixed a bug in :class:`under_sampling.NearMiss` which was not picking the right samples during under sampling for the method 3. By `Guillaume Lemaitre`_.
1919
- Fixed a bug in :class:`ensemble.EasyEnsemble`, correction of the `random_state` generation. By `Guillaume Lemaitre`_ and `Christos Aridas`_.
2020
- Fixed a bug in :class:`under_sampling.RepeatedEditedNearestNeighbours`, add additional stopping criterion to avoid that the minority class become a majority class or that a class disappear. By `Guillaume Lemaitre`_.
21+
- Fixed a bug in :class:`under_sampling.AllKNN`, add stopping criteria to avoid that the minority class become a majority class or that a class disappear. By `Guillaume Lemaitre`_.
2122

2223
New features
2324
~~~~~~~~~~~~

imblearn/under_sampling/edited_nearest_neighbours.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -579,10 +579,42 @@ def _sample(self, X, y):
579579
# updating ENN size_ngh
580580
self.enn_.size_ngh = curr_size_ngh
581581
if self.return_indices:
582-
X_, y_, idx_ = self.enn_.fit_sample(X_, y_)
583-
idx_under = idx_under[idx_]
582+
X_enn, y_enn, idx_enn = self.enn_.fit_sample(X_, y_)
584583
else:
585-
X_, y_ = self.enn_.fit_sample(X_, y_)
584+
X_enn, y_enn = self.enn_.fit_sample(X_, y_)
585+
586+
# Check the stopping criterion
587+
# 1. If the number of samples in the other class become inferior to
588+
# the number of samples in the majority class
589+
# 2. If one of the class is disappearing
590+
# Case 1
591+
stats_enn = Counter(y_enn)
592+
self.logger.debug('Current ENN stats: %s', stats_enn)
593+
# Get the number of samples in the non-minority classes
594+
count_non_min = np.array([val for val, key
595+
in zip(stats_enn.itervalues(),
596+
stats_enn.iterkeys())
597+
if key != self.min_c_])
598+
self.logger.debug('Number of samples in the non-majority'
599+
' classes: %s', count_non_min)
600+
# Check the minority stop to be the minority
601+
b_min_bec_maj = np.any(count_non_min < self.stats_c_[self.min_c_])
602+
603+
# Case 2
604+
b_remove_maj_class = (len(stats_enn) < len(self.stats_c_))
605+
606+
if b_min_bec_maj or b_remove_maj_class:
607+
# Log the variables to explain the stop of the algorithm
608+
self.logger.debug('AllKNN minority become majority: %s',
609+
b_min_bec_maj)
610+
self.logger.debug('AllKNN remove one class: %s',
611+
b_remove_maj_class)
612+
break
613+
614+
# Update the data for the next iteration
615+
X_, y_, = X_enn, y_enn
616+
if self.return_indices:
617+
idx_under = idx_under[idx_enn]
586618

587619
self.logger.info('Under-sampling performed: %s', Counter(y_))
588620

imblearn/under_sampling/tests/test_allknn.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,6 @@ def test_multiclass_fit_sample():
155155

156156
# Check the size of y
157157
count_y_res = Counter(y_resampled)
158-
assert_equal(count_y_res[0], 341)
159-
assert_equal(count_y_res[1], 2485)
160-
assert_equal(count_y_res[2], 212)
158+
assert_equal(count_y_res[0], 400)
159+
assert_equal(count_y_res[1], 3600)
160+
assert_equal(count_y_res[2], 1000)

0 commit comments

Comments
 (0)