Skip to content

Commit 1ea512a

Browse files
Format code with yapf, black, autopep8 and isort
This commit fixes the style issues introduced in 45c5eb6 according to the output from yapf, black, autopep8 and isort. Details: https://deepsource.io/gh/dpr1005/Semisupervised-learning-and-instance-selection-methods/transform/3f6f56c5-c550-4f14-967e-eff0799dbff0/
1 parent 45c5eb6 commit 1ea512a

28 files changed

+751
-511
lines changed

instance_selection/_CNN.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,10 @@
88
import numpy as np
99
import pandas as pd
1010

11-
from .utils import transform, delete_multiple_element
11+
from .utils import delete_multiple_element, transform
1212

1313

1414
class CNN:
15-
1615
"""
1716
Hart, P. (1968). The condensed nearest neighbor rule (corresp.). IEEE
1817
transactions on information theory, 14(3), 515-516.
@@ -54,7 +53,7 @@ def filter(self, samples, y):
5453
samples = transform(samples, y)
5554
store_classes, indexes = np.unique(samples.target, return_index=True)
5655
store_classes = store_classes.tolist()
57-
store = [samples['data'][x] for x in indexes]
56+
store = [samples["data"][x] for x in indexes]
5857

5958
handbag = []
6059

@@ -82,8 +81,8 @@ def filter(self, samples, y):
8281
delete_multiple_element(handbag, indexes)
8382
del handbag
8483
samples = pd.DataFrame(store, columns=self.x_attr)
85-
y = pd.DataFrame(np.array(store_classes, dtype=object).flatten().astype(
86-
int))
84+
y = pd.DataFrame(
85+
np.array(store_classes, dtype=object).flatten().astype(int))
8786

8887
return samples, y
8988

@@ -106,4 +105,4 @@ class of the sample in the store that is closest to the sample
106105
euc = np.array(euc)
107106
euc_nn = np.amin(euc)
108107
index_nn = np.ravel(np.where(euc == euc_nn))
109-
return store_classes[index_nn[0]]
108+
return store_classes[index_nn[0]]

instance_selection/_DROP3.py

Lines changed: 57 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717

1818
class DROP3:
19-
2019
"""
2120
Wilson, D. R., & Martinez, T. R. (2000). Reduction techniques for
2221
instance-based learning algorithms. Machine learning, 38(3), 257-286.
@@ -63,11 +62,17 @@ def filter(self, samples, y):
6362
:param y: DataFrame.
6463
:return: the input dataset with the remaining samples.
6564
"""
66-
initial_distances, initial_samples, initial_targets, knn, \
67-
samples_info = self._create_variables(samples, y)
68-
69-
self._find_associates(initial_distances, initial_samples,
70-
initial_targets, knn, samples_info)
65+
(
66+
initial_distances,
67+
initial_samples,
68+
initial_targets,
69+
knn,
70+
samples_info,
71+
) = self._create_variables(samples, y)
72+
73+
self._find_associates(
74+
initial_distances, initial_samples, initial_targets, knn, samples_info
75+
)
7176

7277
initial_distances.sort(key=lambda x: x[2], reverse=True)
7378

@@ -79,32 +84,37 @@ def filter(self, samples, y):
7984
with_, without = self._with_without(tuple(x_sample), samples_info)
8085

8186
if without >= with_:
82-
initial_distances = initial_distances[:index_x - removed] + \
83-
initial_distances[index_x - removed + 1:]
87+
initial_distances = (
88+
initial_distances[: index_x - removed]
89+
+ initial_distances[index_x - removed + 1:]
90+
)
8491
removed += 1
8592

8693
for a_associate_of_x in samples_info[(tuple(x_sample))][1]:
8794
a_neighs, remaining_samples = self._remove_from_neighs(
88-
a_associate_of_x, initial_distances,
89-
samples_info, x_sample)
95+
a_associate_of_x, initial_distances, samples_info, x_sample
96+
)
9097

9198
knn = NearestNeighbors(
9299
n_neighbors=self.nearest_neighbors + 2,
93-
n_jobs=1, p=self.power_parameter)
100+
n_jobs=1,
101+
p=self.power_parameter,
102+
)
94103
knn.fit(remaining_samples)
95104
_, neigh_ind = knn.kneighbors([a_associate_of_x])
96-
possible_neighs = [initial_distances[x][0] for x in
97-
neigh_ind[0]]
105+
possible_neighs = [initial_distances[x][0]
106+
for x in neigh_ind[0]]
98107

99-
self._find_new_neighs(a_associate_of_x, a_neighs,
100-
possible_neighs, samples_info)
108+
self._find_new_neighs(
109+
a_associate_of_x, a_neighs, possible_neighs, samples_info
110+
)
101111

102112
new_neigh = a_neighs[-1]
103-
samples_info[tuple(new_neigh)][1].append(
104-
a_associate_of_x)
113+
samples_info[tuple(new_neigh)][1].append(a_associate_of_x)
105114

106-
samples = pd.DataFrame([x for x, _, _ in initial_distances],
107-
columns=self.x_attr)
115+
samples = pd.DataFrame(
116+
[x for x, _, _ in initial_distances], columns=self.x_attr
117+
)
108118
y = pd.DataFrame([x for _, x, _ in initial_distances])
109119

110120
return samples, y
@@ -122,23 +132,24 @@ def _create_variables(self, samples, y):
122132
self.x_attr = samples.keys()
123133
samples = transform(samples, y)
124134
s = copy.deepcopy(samples)
125-
initial_samples = s['data']
126-
initial_targets = s['target']
127-
initial_samples, samples_index = np.unique(ar=initial_samples,
128-
return_index=True, axis=0)
135+
initial_samples = s["data"]
136+
initial_targets = s["target"]
137+
initial_samples, samples_index = np.unique(
138+
ar=initial_samples, return_index=True, axis=0
139+
)
129140
initial_targets = initial_targets[samples_index]
130-
knn = NearestNeighbors(n_neighbors=self.nearest_neighbors + 2, n_jobs=1,
131-
p=self.power_parameter)
141+
knn = NearestNeighbors(
142+
n_neighbors=self.nearest_neighbors + 2, n_jobs=1, p=self.power_parameter
143+
)
132144
knn.fit(initial_samples)
133-
samples_info = {tuple(x): [[], [], y] for x, y in zip(initial_samples,
134-
initial_targets)}
145+
samples_info = {
146+
tuple(x): [[], [], y] for x, y in zip(initial_samples, initial_targets)
147+
}
135148
initial_distances = []
136-
return initial_distances, initial_samples, initial_targets, knn, \
137-
samples_info
149+
return initial_distances, initial_samples, initial_targets, knn, samples_info
138150

139151
@staticmethod
140-
def _find_new_neighs(a_associate_of_x, a_neighs, possible_neighs,
141-
samples_info):
152+
def _find_new_neighs(a_associate_of_x, a_neighs, possible_neighs, samples_info):
142153
"""
143154
> The function takes a sample, finds its neighbors, and then checks if
144155
any of the neighbors are not already in the list of neighbors. If
@@ -162,8 +173,9 @@ def _find_new_neighs(a_associate_of_x, a_neighs, possible_neighs,
162173
samples_info[tuple(a_associate_of_x)][0] = a_neighs
163174

164175
@staticmethod
165-
def _remove_from_neighs(a_associate_of_x, initial_distances,
166-
samples_info, x_sample):
176+
def _remove_from_neighs(
177+
a_associate_of_x, initial_distances, samples_info, x_sample
178+
):
167179
"""
168180
> It removes the sample `x_sample` from the list of neighbors of
169181
`a_associate_of_x` and returns the updated list of neighbors of
@@ -191,8 +203,9 @@ def _remove_from_neighs(a_associate_of_x, initial_distances,
191203
return a_neighs, remaining_samples
192204

193205
@staticmethod
194-
def _find_associates(initial_distances, initial_samples, initial_targets,
195-
knn, samples_info):
206+
def _find_associates(
207+
initial_distances, initial_samples, initial_targets, knn, samples_info
208+
):
196209
"""
197210
For each sample in the initial set, find the closest sample from the
198211
other class and store it in the initial_distances list
@@ -245,12 +258,13 @@ def _with_without(x_sample, samples_info):
245258
associates_targets = [samples_info[tuple(x)][2] for x in x_associates]
246259
associates_neighs = [samples_info[tuple(x)][0] for x in x_associates]
247260

248-
for _, a_target, a_neighs in zip(x_associates,
249-
associates_targets,
250-
associates_neighs):
261+
for _, a_target, a_neighs in zip(
262+
x_associates, associates_targets, associates_neighs
263+
):
251264

252-
neighs_targets = np.ravel(np.array([samples_info[tuple(x)][2] for x
253-
in a_neighs])).astype(int)
265+
neighs_targets = np.ravel(
266+
np.array([samples_info[tuple(x)][2] for x in a_neighs])
267+
).astype(int)
254268
neighs_targets = neighs_targets.tolist()
255269

256270
count = np.bincount(neighs_targets[:-1])
@@ -261,8 +275,9 @@ def _with_without(x_sample, samples_info):
261275
for index_a, neigh in enumerate(a_neighs):
262276
if np.array_equal(neigh, x_sample):
263277
break
264-
count = np.bincount(neighs_targets[:index_a] + neighs_targets[
265-
index_a + 1:])
278+
count = np.bincount(
279+
neighs_targets[:index_a] + neighs_targets[index_a + 1:]
280+
)
266281
max_class = np.where(count == np.amax(count))[0][0]
267282
if max_class == a_target:
268283
without += 1

instance_selection/_ENN.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414

1515
class ENN:
16-
1716
"""
1817
Wilson, D. L. (1972). Asymptotic properties of nearest neighbor rules
1918
using edited data. IEEE Transactions on Systems, Man, and
@@ -62,12 +61,12 @@ def _neighs(self, s_samples, s_targets, index, removed):
6261
"""
6362
x_sample = s_samples[index - removed]
6463
x_target = s_targets[index - removed]
65-
knn = NearestNeighbors(n_jobs=-1,
66-
n_neighbors=self.nearest_neighbors, p=2)
67-
samples_not_x = s_samples[:index - removed] + s_samples[
68-
index - removed + 1:]
69-
targets_not_x = s_targets[:index - removed] + s_targets[
70-
index - removed + 1:]
64+
knn = NearestNeighbors(
65+
n_jobs=-1, n_neighbors=self.nearest_neighbors, p=2)
66+
samples_not_x = s_samples[: index - removed] + \
67+
s_samples[index - removed + 1:]
68+
targets_not_x = s_targets[: index - removed] + \
69+
s_targets[index - removed + 1:]
7170
knn.fit(samples_not_x)
7271
_, neigh_ind = knn.kneighbors([x_sample])
7372

@@ -88,16 +87,18 @@ def filter(self, samples, y):
8887
"""
8988
self.x_attr = samples.keys()
9089
samples = transform(samples, y)
91-
size = len(samples['data'])
92-
s_samples = list(samples['data'])
93-
s_targets = list(samples['target'])
90+
size = len(samples["data"])
91+
s_samples = list(samples["data"])
92+
s_targets = list(samples["target"])
9493
removed = 0
9594

9695
for index in range(size):
97-
_, x_target, targets_not_x, samples_not_x, neigh_ind = \
98-
self._neighs(s_samples, s_targets, index, removed)
96+
_, x_target, targets_not_x, samples_not_x, neigh_ind = self._neighs(
97+
s_samples, s_targets, index, removed
98+
)
9999
y_targets = np.ravel(
100-
np.array([targets_not_x[x] for x in neigh_ind[0]])).astype(int)
100+
np.array([targets_not_x[x] for x in neigh_ind[0]])
101+
).astype(int)
101102
count = np.bincount(y_targets)
102103
max_class = np.where(count == np.amax(count))[0][0]
103104
if max_class != x_target:
@@ -110,8 +111,7 @@ def filter(self, samples, y):
110111

111112
return samples, y
112113

113-
def filter_original_complete(self, original, original_y, complete,
114-
complete_y):
114+
def filter_original_complete(self, original, original_y, complete, complete_y):
115115
"""
116116
Modification of the Wilson Editing algorithm.
117117
@@ -129,17 +129,19 @@ def filter_original_complete(self, original, original_y, complete,
129129
:return: the input dataset with the remaining samples.
130130
"""
131131
self.x_attr = original.keys()
132-
original, complete = transform_original_complete(original, original_y,
133-
complete, complete_y)
134-
size = len(complete['data'])
135-
s_samples = list(complete['data'])
136-
s_targets = list(complete['target'])
137-
o_samples = list(original['data'])
132+
original, complete = transform_original_complete(
133+
original, original_y, complete, complete_y
134+
)
135+
size = len(complete["data"])
136+
s_samples = list(complete["data"])
137+
s_targets = list(complete["target"])
138+
o_samples = list(original["data"])
138139
removed = 0
139140

140141
for index in range(size):
141-
x_sample, x_target, targets_not_x, samples_not_x, neigh_ind = \
142-
self._neighs(s_samples, s_targets, index, removed)
142+
x_sample, x_target, targets_not_x, samples_not_x, neigh_ind = self._neighs(
143+
s_samples, s_targets, index, removed
144+
)
143145
y_targets = [targets_not_x[x] for x in neigh_ind[0]]
144146
count = np.bincount(np.ravel(y_targets))
145147
max_class = np.where(count == np.amax(count))[0][0]

0 commit comments

Comments
 (0)