Minor refactors #207

dpuenteramirez · dpuenteramirez · commit f2a7b155c031 · 2022-05-09T10:22:51.000+02:00
diff --git a/instance_selection/_ENN.py b/instance_selection/_ENN.py
@@ -47,11 +47,11 @@ def __init__(self, nearest_neighbors=3, power_parameter=2):
 
     def _neighs(self, s_samples, s_targets, index, removed):
         """
-        _neighs() takes in the samples and targets, the index of the sample to
-        be removed, and the number of samples already removed. It returns the
-        sample to be removed, its target, the targets of the samples not yet
-        removed, the samples not yet removed, and the indices of the nearest
-        neighbors of the sample to be removed.
+        The function takes in the samples and targets, the index of the
+        sample to be removed, and the number of samples already removed. It
+        returns the sample to be removed, its target, the targets of the
+        samples not yet removed, the samples not yet removed, and the
+        indices of the nearest neighbors of the sample to be removed.
 
         :param s_samples: the samples that are being used to train the model
         :param s_targets: the targets of the samples
diff --git a/semisupervised/TriTraining.py b/semisupervised/TriTraining.py
@@ -18,13 +18,6 @@
 from .utils import split
 
 
-def measure_error(classifier_j, classifier_k, labeled_data):
-    pred_j = classifier_j.predict(labeled_data)
-    pred_k = classifier_k.predict(labeled_data)
-    same = len([0 for x, y in zip(pred_j, pred_k) if x == y])
-    return (len(pred_j) - same) / same
-
-
 class TriTraining:
     """
     Zhou, Z. H., & Li, M. (2005). Tri-training: Exploiting unlabeled data
@@ -203,7 +196,8 @@ def fit(self, samples, y):
             ):
                 break
 
-    def _check_for_update(self, e_j, ep_j, h_j, l_j, labeled, lp_j, update_j, y):
+    def _check_for_update(self, e_j, ep_j, h_j, l_j,
+                          labeled, lp_j, update_j, y):
         """
         If the update_j flag is True, then we concatenate the labeled data with
         the new data, and fit the model to the new data
@@ -244,7 +238,7 @@ def _train_classifier(self, ep_k, h_i, h_j, h_k, labeled, lp_k, u):
         """
         update_k = False
         l_k = Bunch(data=np.array([]), target=np.array([]))
-        e_k = measure_error(h_j, h_k, labeled)
+        e_k = self.measure_error(h_j, h_k, labeled)
         if e_k < ep_k:
             for sample in u:
                 sample_s = sample.reshape(1, -1)
@@ -286,3 +280,20 @@ def predict(self, samples):
             labels.append(np.where(count == np.amax(count))[0][0])
 
         return np.array(labels)
+
+    @staticmethod
+    def measure_error(classifier_j, classifier_k, labeled_data):
+        """
+        It returns the fraction of the time that classifiers j and k disagree on
+         the labels of the labeled data
+
+        :param classifier_j: the classifier you want to compare to
+        :param classifier_k: the classifier that we want to measure the error of
+        :param labeled_data: the labeled data that we're using to train the
+        classifiers
+        :return: The error rate of the two classifiers.
+        """
+        pred_j = classifier_j.predict(labeled_data)
+        pred_k = classifier_k.predict(labeled_data)
+        same = len([0 for x, y in zip(pred_j, pred_k) if x == y])
+        return (len(pred_j) - same) / same