FIX consider only current class when generating sample in ADASYN (#355)

glemaitre · web-flow · commit 2f7e4e572f19 · 2017-10-14T11:31:05.000+02:00
diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst
@@ -6,6 +6,13 @@ Version 0.4 (under development)
 Changelog
 ---------
 
+Bug fixes
+.........
+
+- Fix bug in ADASYN to consider only samples from the current class when
+  generating new samples. :issue:`354` by :user:`Guillaume Lemaitre
+  <glemaitre>`.
+
 Maintenance
 ...........
 
diff --git a/imblearn/over_sampling/adasyn.py b/imblearn/over_sampling/adasyn.py
@@ -184,7 +184,8 @@ def _sample(self, X, y):
                     steps = random_state.uniform(size=len(nn_zs))
                     if x_i.nnz:
                         for step, nn_z in zip(steps, nn_zs):
-                            sample = x_i + step * (X[x_i_nn[nn_z], :] - x_i)
+                            sample = (x_i +
+                                      step * (X_class[x_i_nn[nn_z], :] - x_i))
                             row_indices += ([n_samples_generated] *
                                             len(sample.indices))
                             col_indices += sample.indices.tolist()
@@ -204,7 +205,8 @@ def _sample(self, X, y):
                     nn_zs = random_state.randint(
                         1, high=self.nn_.n_neighbors, size=num_sample_i)
                     steps = random_state.uniform(size=len(nn_zs))
-                    x_class_gen.append([x_i + step * (X[x_i_nn[nn_z], :] - x_i)
+                    x_class_gen.append([x_i +
+                                        step * (X_class[x_i_nn[nn_z], :] - x_i)
                                         for step, nn_z in zip(steps, nn_zs)])
 
                 X_new = np.concatenate(x_class_gen)
diff --git a/imblearn/over_sampling/tests/test_adasyn.py b/imblearn/over_sampling/tests/test_adasyn.py
@@ -64,10 +64,10 @@ def test_ada_fit_sample():
                      [-0.41635887, -0.38299653],
                      [0.08711622, 0.93259929],
                      [1.70580611, -0.11219234],
-                     [0.36370445, -0.19262406],
+                     [0.94899098, -0.30508981],
                      [0.28204936, -0.13953426],
-                     [0.39635544, 0.33629036],
-                     [0.35301481, 0.25795516]])
+                     [1.58028868, -0.04089947],
+                     [0.66117333, -0.28009063]])
     y_gt = np.array([
         0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0
     ])
@@ -106,10 +106,10 @@ def test_ada_fit_sample_nn_obj():
                      [-0.41635887, -0.38299653],
                      [0.08711622, 0.93259929],
                      [1.70580611, -0.11219234],
-                     [0.36370445, -0.19262406],
+                     [0.94899098, -0.30508981],
                      [0.28204936, -0.13953426],
-                     [0.39635544, 0.33629036],
-                     [0.35301481, 0.25795516]])
+                     [1.58028868, -0.04089947],
+                     [0.66117333, -0.28009063]])
     y_gt = np.array([
         0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0
     ])