Skip to content

Commit 76f9526

Browse files
Merge pull request #210 from dpr1005/development
Improved code quality
2 parents 348ead4 + e7cb91a commit 76f9526

File tree

12 files changed

+79
-41
lines changed

12 files changed

+79
-41
lines changed

.deepsource.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
version = 1
22

33
test_patterns = [
4-
"test/**",
5-
"test_*"
4+
"\"tests/**\",",
5+
"\"test_*.py\","
66
]
77

88
exclude_patterns = [

instance_selection/_CNN.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def filter(self, samples, y):
7979
indexes.append(index)
8080
store_not_modified = True
8181
delete_multiple_element(handbag, indexes)
82-
del handbag
82+
8383
samples = pd.DataFrame(store, columns=self.x_attr)
8484
y = pd.DataFrame(
8585
np.array(store_classes, dtype=object).flatten().astype(int))

instance_selection/_ENN.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ def __init__(self, nearest_neighbors=3, power_parameter=2):
4747

4848
def _neighs(self, s_samples, s_targets, index, removed):
4949
"""
50-
_neighs() takes in the samples and targets, the index of the sample to
51-
be removed, and the number of samples already removed. It returns the
52-
sample to be removed, its target, the targets of the samples not yet
53-
removed, the samples not yet removed, and the indices of the nearest
54-
neighbors of the sample to be removed.
50+
The function takes in the samples and targets, the index of the
51+
sample to be removed, and the number of samples already removed. It
52+
returns the sample to be removed, its target, the targets of the
53+
samples not yet removed, the samples not yet removed, and the
54+
indices of the nearest neighbors of the sample to be removed.
5555
5656
:param s_samples: the samples that are being used to train the model
5757
:param s_targets: the targets of the samples

instance_selection/_LocalSets.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,19 @@ def _find_enemy_distance(
8888
label,
8989
labels,
9090
):
91+
"""
92+
It finds the closest enemy sample to the current sample
93+
94+
:param closest_enemy_distance: the distance to the closest enemy sample
95+
:param closest_enemy_sample: the index of the closest enemy sample
96+
:param distances: the distance matrix
97+
:param index: the index of the current sample
98+
:param instances: the data
99+
:param label: the label of the current sample
100+
:param labels: the labels of the samples
101+
:return: The closest enemy distance and the index of the closest enemy
102+
sample.
103+
"""
91104
for index2, (_, label2) in enumerate(zip(instances, labels)):
92105
if index == index2 or label == label2:
93106
continue
@@ -146,10 +159,6 @@ class LSSm(LocalSets):
146159
147160
"""
148161

149-
def __init__(self):
150-
"""A constructor for the class."""
151-
super().__init__()
152-
153162
def filter(self, instances, labels):
154163
"""
155164
The function takes in a dataframe of instances and a dataframe of
@@ -200,10 +209,6 @@ class LSBo(LocalSets):
200209
201210
"""
202211

203-
def __init__(self):
204-
"""A constructor for the class."""
205-
super(LSBo, self).__init__()
206-
207212
def filter(self, instances, labels):
208213
"""
209214
> The function takes in a dataframe of instances and a dataframe of

instance_selection/_MSS.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,17 +83,15 @@ def _enemy_distance(dat, tar):
8383
:return: A list of lists, where each list contains a sample, its class,
8484
and its distance to its nearest enemy.
8585
"""
86-
8786
solution = []
8887
for sample, x_class in zip(dat, tar):
8988
distance = sys.maxsize
9089
for sample_1, x1_class in zip(dat, tar):
9190
if x1_class == x_class:
9291
continue
93-
else:
94-
euc = np.linalg.norm(sample - sample_1)
95-
if euc < distance:
96-
distance = euc
92+
euc = np.linalg.norm(sample - sample_1)
93+
if euc < distance:
94+
distance = euc
9795
solution.append([sample, x_class, distance])
9896

9997
solution.sort(key=lambda x: x[2])

semisupervised/DemocraticCoLearning.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ def predict(self, samples):
395395
confidence = [0 for _ in range(self.n_labels)]
396396
for index, j in enumerate(gj):
397397
izq = (j + 0.5) / (j + 1)
398-
div = True if j != 0 else False
398+
div = j != 0
399399
if div:
400400
der = [
401401
(gj_h[0][index] * self.w1) / gj[index],

semisupervised/DensityPeaks.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,22 @@ def __init__(
7070
else:
7171
self.filter = None
7272

73+
self.y = None
74+
self.low = None
75+
self.u = None
76+
self.classifier_stdpnf = None
77+
self.order = None
78+
self.structure = None
79+
self.structure_stdnpf = None
80+
self.n_id = None
81+
self.distances = None
82+
self.max_dis = None
83+
self.min_dis = None
84+
self.rho = None
85+
self.delta = None
86+
self.nneigh = None
87+
self.data = None
88+
7389
def __build_distance(self):
7490
"""
7591
Calculate distance dict.
@@ -182,9 +198,13 @@ def __min_neighbor_and_distance(self):
182198
183199
:return: distance vector, nearest neighbor vector
184200
"""
201+
if self.rho is None:
202+
raise ValueError("Encountered rho as None.")
203+
185204
sort_rho_idx = np.argsort(-self.rho)
186205
delta, nneigh = [float(self.max_dis)] * self.n_id, [0] * self.n_id
187206
delta[sort_rho_idx[0]] = -1.0
207+
188208
for i in range(self.n_id):
189209
for j in range(0, i):
190210
old_i, old_j = sort_rho_idx[i], sort_rho_idx[j]
@@ -240,6 +260,7 @@ def __step_a(self):
240260
return samples_labeled
241261

242262
def __discover_structure(self):
263+
"""Discovers the under laying structure."""
243264
self._fit_without()
244265

245266
def __nan_search(self):
@@ -343,22 +364,22 @@ def __enane(self, fx, nan, r):
343364

344365
return es, es_pred
345366

346-
def __init_values(self, l, u, y):
367+
def __init_values(self, low, u, y):
347368
"""
348369
It takes in the lower and upper bounds of the data, and the data itself,
349370
and then calculates the distances between the data points,
350371
the maximum distance, the minimum distance, the dc value, the rho
351372
value, the delta value, the number of neighbors, and the structure
352373
of the data
353374
354-
:param l: lower bound of the data
375+
:param low: lower bound of the data
355376
:param u: upper bound of the data
356377
:param y: the labels of the data
357378
"""
358379
self.y = y
359-
self.l = l
380+
self.low = low
360381
self.u = u
361-
self.data = np.concatenate((l, u), axis=0)
382+
self.data = np.concatenate((low, u), axis=0)
362383
self.n_id = self.data.shape[0]
363384
self.distances, self.max_dis, self.min_dis = self.__build_distance()
364385
self.dc = self.__select_dc()
@@ -447,14 +468,13 @@ def _fit_stdpnf(self):
447468
Self Training based on Density Peaks and a parameter-free noise
448469
filter.
449470
"""
450-
451471
self.__discover_structure()
452472

453473
nan, lambda_param = self.__nan_search()
454474
self.classifier_stdpnf = KNeighborsClassifier(
455475
n_neighbors=self.k, metric=self.distance_metric
456476
)
457-
self.classifier_stdpnf.fit(self.l, self.y)
477+
self.classifier_stdpnf.fit(self.low, self.y)
458478
count = 1
459479

460480
while count <= max(self.order.values()):
@@ -530,7 +550,7 @@ def _if_filter(self, complete, complete_y):
530550
:return: The result is a dataframe with the filtered data.
531551
"""
532552
if isinstance(self.filter, ENN):
533-
original = pd.DataFrame(self.l)
553+
original = pd.DataFrame(self.low)
534554
original_y = pd.DataFrame(self.y)
535555
result, _ = self.filter.filter_original_complete(
536556
original, original_y, complete, complete_y

semisupervised/TriTraining.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,6 @@
1818
from .utils import split
1919

2020

21-
def measure_error(classifier_j, classifier_k, labeled_data):
22-
pred_j = classifier_j.predict(labeled_data)
23-
pred_k = classifier_k.predict(labeled_data)
24-
same = len([0 for x, y in zip(pred_j, pred_k) if x == y])
25-
return (len(pred_j) - same) / same
26-
27-
2821
class TriTraining:
2922
"""
3023
Zhou, Z. H., & Li, M. (2005). Tri-training: Exploiting unlabeled data
@@ -244,7 +237,7 @@ def _train_classifier(self, ep_k, h_i, h_j, h_k, labeled, lp_k, u):
244237
"""
245238
update_k = False
246239
l_k = Bunch(data=np.array([]), target=np.array([]))
247-
e_k = measure_error(h_j, h_k, labeled)
240+
e_k = self.measure_error(h_j, h_k, labeled)
248241
if e_k < ep_k:
249242
for sample in u:
250243
sample_s = sample.reshape(1, -1)
@@ -286,3 +279,20 @@ def predict(self, samples):
286279
labels.append(np.where(count == np.amax(count))[0][0])
287280

288281
return np.array(labels)
282+
283+
@staticmethod
284+
def measure_error(classifier_j, classifier_k, labeled_data):
285+
"""
286+
It returns the fraction of the time that classifiers j and k disagree on
287+
the labels of the labeled data
288+
289+
:param classifier_j: the classifier you want to compare to
290+
:param classifier_k: the classifier that we want to measure the error of
291+
:param labeled_data: the labeled data that we're using to train the
292+
classifiers
293+
:return: The error rate of the two classifiers.
294+
"""
295+
pred_j = classifier_j.predict(labeled_data)
296+
pred_k = classifier_k.predict(labeled_data)
297+
same = len([0 for x, y in zip(pred_j, pred_k) if x == y])
298+
return (len(pred_j) - same) / same

semisupervised/ensemble/_RESSEL.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ def fit(self, labeled, unlabeled, base_estimator, estimator_params=None):
8080
:param estimator_params: dict of params to pass to the estimator.
8181
:return: the ensemble in case is needed.
8282
"""
83-
8483
self._validate_params(base_estimator, labeled, unlabeled)
8584

8685
self._init_ensemble(base_estimator, estimator_params)
@@ -218,7 +217,6 @@ def _robust_self_training(self, iteration, l_i, u_i, oob_i, d_class_i):
218217
:param d_class_i: the proportion of samples to be selected from each
219218
class
220219
"""
221-
222220
y_pred = self.ensemble[iteration].predict(oob_i.iloc[:, :-1])
223221
best_error_i = f1_score(
224222
y_true=np.ravel(oob_i.iloc[:, -1:]),

tests/test_InstanceSelection.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def to_dataframe(y):
2323
"""
2424
if not isinstance(y, pd.DataFrame):
2525
return pd.DataFrame(y)
26+
return y
2627

2728

2829
@pytest.fixture

0 commit comments

Comments
 (0)