Skip to content

Commit 348ead4

Browse files
Merge pull request #202 from dpr1005/deepsource-transform-8eabe43c
Format code with yapf, black, autopep8 and isort
2 parents 37331e3 + 385f836 commit 348ead4

25 files changed

+668
-472
lines changed

instance_selection/_CNN.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,10 @@
88
import numpy as np
99
import pandas as pd
1010

11-
from .utils import transform, delete_multiple_element
11+
from .utils import delete_multiple_element, transform
1212

1313

1414
class CNN:
15-
1615
"""
1716
Hart, P. (1968). The condensed nearest neighbor rule (corresp.). IEEE
1817
transactions on information theory, 14(3), 515-516.
@@ -54,7 +53,7 @@ def filter(self, samples, y):
5453
samples = transform(samples, y)
5554
store_classes, indexes = np.unique(samples.target, return_index=True)
5655
store_classes = store_classes.tolist()
57-
store = [samples['data'][x] for x in indexes]
56+
store = [samples["data"][x] for x in indexes]
5857

5958
handbag = []
6059

@@ -82,8 +81,8 @@ def filter(self, samples, y):
8281
delete_multiple_element(handbag, indexes)
8382
del handbag
8483
samples = pd.DataFrame(store, columns=self.x_attr)
85-
y = pd.DataFrame(np.array(store_classes, dtype=object).flatten().astype(
86-
int))
84+
y = pd.DataFrame(
85+
np.array(store_classes, dtype=object).flatten().astype(int))
8786

8887
return samples, y
8988

@@ -106,4 +105,4 @@ class of the sample in the store that is closest to the sample
106105
euc = np.array(euc)
107106
euc_nn = np.amin(euc)
108107
index_nn = np.ravel(np.where(euc == euc_nn))
109-
return store_classes[index_nn[0]]
108+
return store_classes[index_nn[0]]

instance_selection/_DROP3.py

Lines changed: 57 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717

1818
class DROP3:
19-
2019
"""
2120
Wilson, D. R., & Martinez, T. R. (2000). Reduction techniques for
2221
instance-based learning algorithms. Machine learning, 38(3), 257-286.
@@ -63,11 +62,17 @@ def filter(self, samples, y):
6362
:param y: DataFrame.
6463
:return: the input dataset with the remaining samples.
6564
"""
66-
initial_distances, initial_samples, initial_targets, knn, \
67-
samples_info = self._create_variables(samples, y)
68-
69-
self._find_associates(initial_distances, initial_samples,
70-
initial_targets, knn, samples_info)
65+
(
66+
initial_distances,
67+
initial_samples,
68+
initial_targets,
69+
knn,
70+
samples_info,
71+
) = self._create_variables(samples, y)
72+
73+
self._find_associates(
74+
initial_distances, initial_samples, initial_targets, knn, samples_info
75+
)
7176

7277
initial_distances.sort(key=lambda x: x[2], reverse=True)
7378

@@ -79,32 +84,37 @@ def filter(self, samples, y):
7984
with_, without = self._with_without(tuple(x_sample), samples_info)
8085

8186
if without >= with_:
82-
initial_distances = initial_distances[:index_x - removed] + \
83-
initial_distances[index_x - removed + 1:]
87+
initial_distances = (
88+
initial_distances[: index_x - removed]
89+
+ initial_distances[index_x - removed + 1:]
90+
)
8491
removed += 1
8592

8693
for a_associate_of_x in samples_info[(tuple(x_sample))][1]:
8794
a_neighs, remaining_samples = self._remove_from_neighs(
88-
a_associate_of_x, initial_distances,
89-
samples_info, x_sample)
95+
a_associate_of_x, initial_distances, samples_info, x_sample
96+
)
9097

9198
knn = NearestNeighbors(
9299
n_neighbors=self.nearest_neighbors + 2,
93-
n_jobs=1, p=self.power_parameter)
100+
n_jobs=1,
101+
p=self.power_parameter,
102+
)
94103
knn.fit(remaining_samples)
95104
_, neigh_ind = knn.kneighbors([a_associate_of_x])
96-
possible_neighs = [initial_distances[x][0] for x in
97-
neigh_ind[0]]
105+
possible_neighs = [initial_distances[x][0]
106+
for x in neigh_ind[0]]
98107

99-
self._find_new_neighs(a_associate_of_x, a_neighs,
100-
possible_neighs, samples_info)
108+
self._find_new_neighs(
109+
a_associate_of_x, a_neighs, possible_neighs, samples_info
110+
)
101111

102112
new_neigh = a_neighs[-1]
103-
samples_info[tuple(new_neigh)][1].append(
104-
a_associate_of_x)
113+
samples_info[tuple(new_neigh)][1].append(a_associate_of_x)
105114

106-
samples = pd.DataFrame([x for x, _, _ in initial_distances],
107-
columns=self.x_attr)
115+
samples = pd.DataFrame(
116+
[x for x, _, _ in initial_distances], columns=self.x_attr
117+
)
108118
y = pd.DataFrame([x for _, x, _ in initial_distances])
109119

110120
return samples, y
@@ -122,23 +132,24 @@ def _create_variables(self, samples, y):
122132
self.x_attr = samples.keys()
123133
samples = transform(samples, y)
124134
s = copy.deepcopy(samples)
125-
initial_samples = s['data']
126-
initial_targets = s['target']
127-
initial_samples, samples_index = np.unique(ar=initial_samples,
128-
return_index=True, axis=0)
135+
initial_samples = s["data"]
136+
initial_targets = s["target"]
137+
initial_samples, samples_index = np.unique(
138+
ar=initial_samples, return_index=True, axis=0
139+
)
129140
initial_targets = initial_targets[samples_index]
130-
knn = NearestNeighbors(n_neighbors=self.nearest_neighbors + 2, n_jobs=1,
131-
p=self.power_parameter)
141+
knn = NearestNeighbors(
142+
n_neighbors=self.nearest_neighbors + 2, n_jobs=1, p=self.power_parameter
143+
)
132144
knn.fit(initial_samples)
133-
samples_info = {tuple(x): [[], [], y] for x, y in zip(initial_samples,
134-
initial_targets)}
145+
samples_info = {
146+
tuple(x): [[], [], y] for x, y in zip(initial_samples, initial_targets)
147+
}
135148
initial_distances = []
136-
return initial_distances, initial_samples, initial_targets, knn, \
137-
samples_info
149+
return initial_distances, initial_samples, initial_targets, knn, samples_info
138150

139151
@staticmethod
140-
def _find_new_neighs(a_associate_of_x, a_neighs, possible_neighs,
141-
samples_info):
152+
def _find_new_neighs(a_associate_of_x, a_neighs, possible_neighs, samples_info):
142153
"""
143154
> The function takes a sample, finds its neighbors, and then checks if
144155
any of the neighbors are not already in the list of neighbors. If
@@ -162,8 +173,9 @@ def _find_new_neighs(a_associate_of_x, a_neighs, possible_neighs,
162173
samples_info[tuple(a_associate_of_x)][0] = a_neighs
163174

164175
@staticmethod
165-
def _remove_from_neighs(a_associate_of_x, initial_distances,
166-
samples_info, x_sample):
176+
def _remove_from_neighs(
177+
a_associate_of_x, initial_distances, samples_info, x_sample
178+
):
167179
"""
168180
> It removes the sample `x_sample` from the list of neighbors of
169181
`a_associate_of_x` and returns the updated list of neighbors of
@@ -191,8 +203,9 @@ def _remove_from_neighs(a_associate_of_x, initial_distances,
191203
return a_neighs, remaining_samples
192204

193205
@staticmethod
194-
def _find_associates(initial_distances, initial_samples, initial_targets,
195-
knn, samples_info):
206+
def _find_associates(
207+
initial_distances, initial_samples, initial_targets, knn, samples_info
208+
):
196209
"""
197210
For each sample in the initial set, find the closest sample from the
198211
other class and store it in the initial_distances list
@@ -245,12 +258,13 @@ def _with_without(x_sample, samples_info):
245258
associates_targets = [samples_info[tuple(x)][2] for x in x_associates]
246259
associates_neighs = [samples_info[tuple(x)][0] for x in x_associates]
247260

248-
for _, a_target, a_neighs in zip(x_associates,
249-
associates_targets,
250-
associates_neighs):
261+
for _, a_target, a_neighs in zip(
262+
x_associates, associates_targets, associates_neighs
263+
):
251264

252-
neighs_targets = np.ravel(np.array([samples_info[tuple(x)][2] for x
253-
in a_neighs])).astype(int)
265+
neighs_targets = np.ravel(
266+
np.array([samples_info[tuple(x)][2] for x in a_neighs])
267+
).astype(int)
254268
neighs_targets = neighs_targets.tolist()
255269

256270
count = np.bincount(neighs_targets[:-1])
@@ -261,8 +275,9 @@ def _with_without(x_sample, samples_info):
261275
for index_a, neigh in enumerate(a_neighs):
262276
if np.array_equal(neigh, x_sample):
263277
break
264-
count = np.bincount(neighs_targets[:index_a] + neighs_targets[
265-
index_a + 1:])
278+
count = np.bincount(
279+
neighs_targets[:index_a] + neighs_targets[index_a + 1:]
280+
)
266281
max_class = np.where(count == np.amax(count))[0][0]
267282
if max_class == a_target:
268283
without += 1

instance_selection/_ENN.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414

1515
class ENN:
16-
1716
"""
1817
Wilson, D. L. (1972). Asymptotic properties of nearest neighbor rules
1918
using edited data. IEEE Transactions on Systems, Man, and
@@ -62,12 +61,12 @@ def _neighs(self, s_samples, s_targets, index, removed):
6261
"""
6362
x_sample = s_samples[index - removed]
6463
x_target = s_targets[index - removed]
65-
knn = NearestNeighbors(n_jobs=-1,
66-
n_neighbors=self.nearest_neighbors, p=2)
67-
samples_not_x = s_samples[:index - removed] + s_samples[
68-
index - removed + 1:]
69-
targets_not_x = s_targets[:index - removed] + s_targets[
70-
index - removed + 1:]
64+
knn = NearestNeighbors(
65+
n_jobs=-1, n_neighbors=self.nearest_neighbors, p=2)
66+
samples_not_x = s_samples[: index - removed] + \
67+
s_samples[index - removed + 1:]
68+
targets_not_x = s_targets[: index - removed] + \
69+
s_targets[index - removed + 1:]
7170
knn.fit(samples_not_x)
7271
_, neigh_ind = knn.kneighbors([x_sample])
7372

@@ -88,16 +87,18 @@ def filter(self, samples, y):
8887
"""
8988
self.x_attr = samples.keys()
9089
samples = transform(samples, y)
91-
size = len(samples['data'])
92-
s_samples = list(samples['data'])
93-
s_targets = list(samples['target'])
90+
size = len(samples["data"])
91+
s_samples = list(samples["data"])
92+
s_targets = list(samples["target"])
9493
removed = 0
9594

9695
for index in range(size):
97-
_, x_target, targets_not_x, samples_not_x, neigh_ind = \
98-
self._neighs(s_samples, s_targets, index, removed)
96+
_, x_target, targets_not_x, samples_not_x, neigh_ind = self._neighs(
97+
s_samples, s_targets, index, removed
98+
)
9999
y_targets = np.ravel(
100-
np.array([targets_not_x[x] for x in neigh_ind[0]])).astype(int)
100+
np.array([targets_not_x[x] for x in neigh_ind[0]])
101+
).astype(int)
101102
count = np.bincount(y_targets)
102103
max_class = np.where(count == np.amax(count))[0][0]
103104
if max_class != x_target:
@@ -110,8 +111,7 @@ def filter(self, samples, y):
110111

111112
return samples, y
112113

113-
def filter_original_complete(self, original, original_y, complete,
114-
complete_y):
114+
def filter_original_complete(self, original, original_y, complete, complete_y):
115115
"""
116116
Modification of the Wilson Editing algorithm.
117117
@@ -129,17 +129,19 @@ def filter_original_complete(self, original, original_y, complete,
129129
:return: the input dataset with the remaining samples.
130130
"""
131131
self.x_attr = original.keys()
132-
original, complete = transform_original_complete(original, original_y,
133-
complete, complete_y)
134-
size = len(complete['data'])
135-
s_samples = list(complete['data'])
136-
s_targets = list(complete['target'])
137-
o_samples = list(original['data'])
132+
original, complete = transform_original_complete(
133+
original, original_y, complete, complete_y
134+
)
135+
size = len(complete["data"])
136+
s_samples = list(complete["data"])
137+
s_targets = list(complete["target"])
138+
o_samples = list(original["data"])
138139
removed = 0
139140

140141
for index in range(size):
141-
x_sample, x_target, targets_not_x, samples_not_x, neigh_ind = \
142-
self._neighs(s_samples, s_targets, index, removed)
142+
x_sample, x_target, targets_not_x, samples_not_x, neigh_ind = self._neighs(
143+
s_samples, s_targets, index, removed
144+
)
143145
y_targets = [targets_not_x[x] for x in neigh_ind[0]]
144146
count = np.bincount(np.ravel(y_targets))
145147
max_class = np.where(count == np.amax(count))[0][0]

0 commit comments

Comments
 (0)