1616
1717
1818class DROP3 :
19-
2019 """
2120 Wilson, D. R., & Martinez, T. R. (2000). Reduction techniques for
2221 instance-based learning algorithms. Machine learning, 38(3), 257-286.
@@ -63,11 +62,17 @@ def filter(self, samples, y):
6362 :param y: DataFrame.
6463 :return: the input dataset with the remaining samples.
6564 """
66- initial_distances , initial_samples , initial_targets , knn , \
67- samples_info = self ._create_variables (samples , y )
68-
69- self ._find_associates (initial_distances , initial_samples ,
70- initial_targets , knn , samples_info )
65+ (
66+ initial_distances ,
67+ initial_samples ,
68+ initial_targets ,
69+ knn ,
70+ samples_info ,
71+ ) = self ._create_variables (samples , y )
72+
73+ self ._find_associates (
74+ initial_distances , initial_samples , initial_targets , knn , samples_info
75+ )
7176
7277 initial_distances .sort (key = lambda x : x [2 ], reverse = True )
7378
@@ -79,32 +84,37 @@ def filter(self, samples, y):
7984 with_ , without = self ._with_without (tuple (x_sample ), samples_info )
8085
8186 if without >= with_ :
82- initial_distances = initial_distances [:index_x - removed ] + \
83- initial_distances [index_x - removed + 1 :]
87+ initial_distances = (
88+ initial_distances [: index_x - removed ]
89+ + initial_distances [index_x - removed + 1 :]
90+ )
8491 removed += 1
8592
8693 for a_associate_of_x in samples_info [(tuple (x_sample ))][1 ]:
8794 a_neighs , remaining_samples = self ._remove_from_neighs (
88- a_associate_of_x , initial_distances ,
89- samples_info , x_sample )
95+ a_associate_of_x , initial_distances , samples_info , x_sample
96+ )
9097
9198 knn = NearestNeighbors (
9299 n_neighbors = self .nearest_neighbors + 2 ,
93- n_jobs = 1 , p = self .power_parameter )
100+ n_jobs = 1 ,
101+ p = self .power_parameter ,
102+ )
94103 knn .fit (remaining_samples )
95104 _ , neigh_ind = knn .kneighbors ([a_associate_of_x ])
96- possible_neighs = [initial_distances [x ][0 ] for x in
97- neigh_ind [0 ]]
105+ possible_neighs = [initial_distances [x ][0 ]
106+ for x in neigh_ind [0 ]]
98107
99- self ._find_new_neighs (a_associate_of_x , a_neighs ,
100- possible_neighs , samples_info )
108+ self ._find_new_neighs (
109+ a_associate_of_x , a_neighs , possible_neighs , samples_info
110+ )
101111
102112 new_neigh = a_neighs [- 1 ]
103- samples_info [tuple (new_neigh )][1 ].append (
104- a_associate_of_x )
113+ samples_info [tuple (new_neigh )][1 ].append (a_associate_of_x )
105114
106- samples = pd .DataFrame ([x for x , _ , _ in initial_distances ],
107- columns = self .x_attr )
115+ samples = pd .DataFrame (
116+ [x for x , _ , _ in initial_distances ], columns = self .x_attr
117+ )
108118 y = pd .DataFrame ([x for _ , x , _ in initial_distances ])
109119
110120 return samples , y
@@ -122,23 +132,24 @@ def _create_variables(self, samples, y):
122132 self .x_attr = samples .keys ()
123133 samples = transform (samples , y )
124134 s = copy .deepcopy (samples )
125- initial_samples = s ['data' ]
126- initial_targets = s ['target' ]
127- initial_samples , samples_index = np .unique (ar = initial_samples ,
128- return_index = True , axis = 0 )
135+ initial_samples = s ["data" ]
136+ initial_targets = s ["target" ]
137+ initial_samples , samples_index = np .unique (
138+ ar = initial_samples , return_index = True , axis = 0
139+ )
129140 initial_targets = initial_targets [samples_index ]
130- knn = NearestNeighbors (n_neighbors = self .nearest_neighbors + 2 , n_jobs = 1 ,
131- p = self .power_parameter )
141+ knn = NearestNeighbors (
142+ n_neighbors = self .nearest_neighbors + 2 , n_jobs = 1 , p = self .power_parameter
143+ )
132144 knn .fit (initial_samples )
133- samples_info = {tuple (x ): [[], [], y ] for x , y in zip (initial_samples ,
134- initial_targets )}
145+ samples_info = {
146+ tuple (x ): [[], [], y ] for x , y in zip (initial_samples , initial_targets )
147+ }
135148 initial_distances = []
136- return initial_distances , initial_samples , initial_targets , knn , \
137- samples_info
149+ return initial_distances , initial_samples , initial_targets , knn , samples_info
138150
139151 @staticmethod
140- def _find_new_neighs (a_associate_of_x , a_neighs , possible_neighs ,
141- samples_info ):
152+ def _find_new_neighs (a_associate_of_x , a_neighs , possible_neighs , samples_info ):
142153 """
143154 > The function takes a sample, finds its neighbors, and then checks if
144155 any of the neighbors are not already in the list of neighbors. If
@@ -162,8 +173,9 @@ def _find_new_neighs(a_associate_of_x, a_neighs, possible_neighs,
162173 samples_info [tuple (a_associate_of_x )][0 ] = a_neighs
163174
164175 @staticmethod
165- def _remove_from_neighs (a_associate_of_x , initial_distances ,
166- samples_info , x_sample ):
176+ def _remove_from_neighs (
177+ a_associate_of_x , initial_distances , samples_info , x_sample
178+ ):
167179 """
168180 > It removes the sample `x_sample` from the list of neighbors of
169181 `a_associate_of_x` and returns the updated list of neighbors of
@@ -191,8 +203,9 @@ def _remove_from_neighs(a_associate_of_x, initial_distances,
191203 return a_neighs , remaining_samples
192204
193205 @staticmethod
194- def _find_associates (initial_distances , initial_samples , initial_targets ,
195- knn , samples_info ):
206+ def _find_associates (
207+ initial_distances , initial_samples , initial_targets , knn , samples_info
208+ ):
196209 """
197210 For each sample in the initial set, find the closest sample from the
198211 other class and store it in the initial_distances list
@@ -245,12 +258,13 @@ def _with_without(x_sample, samples_info):
245258 associates_targets = [samples_info [tuple (x )][2 ] for x in x_associates ]
246259 associates_neighs = [samples_info [tuple (x )][0 ] for x in x_associates ]
247260
248- for _ , a_target , a_neighs in zip (x_associates ,
249- associates_targets ,
250- associates_neighs ):
261+ for _ , a_target , a_neighs in zip (
262+ x_associates , associates_targets , associates_neighs
263+ ):
251264
252- neighs_targets = np .ravel (np .array ([samples_info [tuple (x )][2 ] for x
253- in a_neighs ])).astype (int )
265+ neighs_targets = np .ravel (
266+ np .array ([samples_info [tuple (x )][2 ] for x in a_neighs ])
267+ ).astype (int )
254268 neighs_targets = neighs_targets .tolist ()
255269
256270 count = np .bincount (neighs_targets [:- 1 ])
@@ -261,8 +275,9 @@ def _with_without(x_sample, samples_info):
261275 for index_a , neigh in enumerate (a_neighs ):
262276 if np .array_equal (neigh , x_sample ):
263277 break
264- count = np .bincount (neighs_targets [:index_a ] + neighs_targets [
265- index_a + 1 :])
278+ count = np .bincount (
279+ neighs_targets [:index_a ] + neighs_targets [index_a + 1 :]
280+ )
266281 max_class = np .where (count == np .amax (count ))[0 ][0 ]
267282 if max_class == a_target :
268283 without += 1
0 commit comments