Docs #194

dpuenteramirez · dpuenteramirez · commit a6e6eb615030 · 2022-05-04T23:03:07.000+02:00
diff --git a/semisupervised/DemocraticCoLearning.py b/semisupervised/DemocraticCoLearning.py
@@ -6,6 +6,7 @@
 # @Version:     5.0
 
 import copy
+import warnings
 from math import sqrt
 
 import numpy as np
@@ -17,21 +18,6 @@
 from .utils import split
 
 
-def check_bounds(wi):
-    """
-    It checks that the lower bound is not less than 0 and the upper bound is not
-    greater than 1
-
-    :param wi: lower and upper mean confidence
-    :return: the fixed wi.
-    """
-    if wi[0] < 0:
-        wi[0] = 0
-    if wi[1] > 1:
-        wi[1] = 1
-    return wi
-
-
 class DemocraticCoLearning:
     """
     Democratic Co-Learning Implementation. Based on:
@@ -168,7 +154,7 @@ def fit(self, samples, y):
                                             len(labeled)),
                   error + self.const * sqrt((error * (1 - error)) /
                                             len(labeled))]
-            w1 = sum(check_bounds(w1)) / 2
+            w1 = sum(self.check_bounds(w1)) / 2
 
             for index, proba in enumerate(probas):
                 c_k = new_labels[index][0]
@@ -193,7 +179,7 @@ def fit(self, samples, y):
                                             len(labeled)),
                   error + self.const * sqrt((error * (1 - error)) /
                                             len(labeled))]
-            w2 = sum(check_bounds(w2)) / 2
+            w2 = sum(self.check_bounds(w2)) / 2
 
             for index, proba in enumerate(probas):
                 c_k = new_labels[index][0]
@@ -218,7 +204,7 @@ def fit(self, samples, y):
                                             len(labeled)),
                   error + self.const * sqrt((error * (1 - error)) /
                                             len(labeled))]
-            w3 = sum(check_bounds(w3)) / 2
+            w3 = sum(self.check_bounds(w3)) / 2
 
             for index, proba in enumerate(probas):
                 c_k = new_labels[index][0]
@@ -249,7 +235,7 @@ def fit(self, samples, y):
             ci_1 = [
                 error - self.const * sqrt((error * (1 - error)) / len(pred)),
                 error + self.const * sqrt((error * (1 - error)) / len(pred))]
-            ci_1 = check_bounds(ci_1)
+            ci_1 = self.check_bounds(ci_1)
             q_1 = len(pred) * pow((1 - 2 * (e_1 / len(pred))), 2)
             e_prime_1 = (1 - (ci_1[0] * len(pred)) / len(pred)) * len(pred)
             q_prime_1 = (len(l1_data) + len(pred)) * pow(
@@ -273,7 +259,7 @@ def fit(self, samples, y):
             ci_2 = [
                 error - self.const * sqrt((error * (1 - error)) / len(pred)),
                 error + self.const * sqrt((error * (1 - error)) / len(pred))]
-            ci_2 = check_bounds(ci_2)
+            ci_2 = self.check_bounds(ci_2)
             q_2 = len(pred) * pow((1 - 2 * (e_2 / len(pred))), 2)
             e_prime_2 = (1 - (ci_2[0] * len(pred)) / len(pred)) * len(pred)
             q_prime_2 = (len(l2_data) + len(pred)) * pow(
@@ -297,7 +283,7 @@ def fit(self, samples, y):
             ci_3 = [
                 error - self.const * sqrt((error * (1 - error)) / len(pred)),
                 error + self.const * sqrt((error * (1 - error)) / len(pred))]
-            ci_3 = check_bounds(ci_3)
+            ci_3 = self.check_bounds(ci_3)
             q_3 = len(pred) * pow((1 - 2 * (e_3 / len(pred))), 2)
             e_prime_3 = (1 - (ci_3[0] * len(pred)) / len(pred)) * len(pred)
             q_prime_3 = (len(l3_data) + len(pred)) * pow(
@@ -316,17 +302,17 @@ def fit(self, samples, y):
         error = len([0 for p, tar in zip(pred, y) if p != tar]) / len(pred)
         w1 = [error - self.const * sqrt((error * (1 - error)) / len(labeled)),
               error + self.const * sqrt((error * (1 - error)) / len(labeled))]
-        self.w1 = sum(check_bounds(w1)) / 2
+        self.w1 = sum(self.check_bounds(w1)) / 2
         pred = self.h2.predict(labeled)
         error = len([0 for p, tar in zip(pred, y) if p != tar]) / len(pred)
         w2 = [error - self.const * sqrt((error * (1 - error)) / len(labeled)),
               error + self.const * sqrt((error * (1 - error)) / len(labeled))]
-        self.w2 = sum(check_bounds(w2)) / 2
+        self.w2 = sum(self.check_bounds(w2)) / 2
         pred = self.h3.predict(labeled)
         error = len([0 for p, tar in zip(pred, y) if p != tar]) / len(pred)
         w3 = [error - self.const * sqrt((error * (1 - error)) / len(labeled)),
               error + self.const * sqrt((error * (1 - error)) / len(labeled))]
-        self.w3 = sum(check_bounds(w3)) / 2
+        self.w3 = sum(self.check_bounds(w3)) / 2
 
     def predict(self, samples):
         """
@@ -361,7 +347,7 @@ def predict(self, samples):
                     gj[p] += 1
                     gj_h[2][p] += 1
         except IndexError:
-            breakpoint()
+            warnings.warn("Retraining the model is advised.")
 
         confidence = [0 for _ in range(self.n_labels)]
         for index, j in enumerate(gj):
@@ -386,3 +372,18 @@ def predict(self, samples):
             labels.append(np.where(count == np.amax(count))[0][0])
 
         return np.array(labels)
+
+    @staticmethod
+    def check_bounds(wi):
+        """
+        It checks that the lower bound is not less than 0 and the upper bound
+        is not greater than 1
+
+        :param wi: lower and upper mean confidence
+        :return: the fixed wi.
+        """
+        if wi[0] < 0:
+            wi[0] = 0
+        if wi[1] > 1:
+            wi[1] = 1
+        return wi
diff --git a/semisupervised/TriTraining.py b/semisupervised/TriTraining.py
@@ -80,7 +80,7 @@ def __init__(self, random_state=None,
         self.random_state = random_state if random_state is not None else \
             np.random.randint(low=0, high=10e5, size=1)[0]
 
-    def subsample(self, l_t, s):
+    def _subsample(self, l_t, s):
         np.random.seed(self.random_state)
         rng = np.random.default_rng()
         data = np.array(l_t['data'])
@@ -144,8 +144,8 @@ def fit(self, samples, y):
                     if e_j * len(l_j['data']) < ep_j * lp_j:
                         update_j = True
                     elif lp_j > e_j / (ep_j - e_j):
-                        l_j = self.subsample(l_j, ceil(((ep_j * lp_j) / e_j)
-                                                       - 1))
+                        l_j = self._subsample(l_j, ceil(((ep_j * lp_j) / e_j)
+                                                        - 1))
                         update_j = True
 
             update_k = False
@@ -171,8 +171,8 @@ def fit(self, samples, y):
                     if e_k * len(l_k['data']) < ep_k * lp_k:
                         update_k = True
                     elif lp_k > e_k / (ep_k - e_k):
-                        l_k = self.subsample(l_k, ceil(((ep_k * lp_k) / e_k)
-                                                       - 1))
+                        l_k = self._subsample(l_k, ceil(((ep_k * lp_k) / e_k)
+                                                        - 1))
                         update_k = True
 
             update_i = False
@@ -198,8 +198,8 @@ def fit(self, samples, y):
                     if e_i * len(l_i['data']) < ep_i * lp_i:
                         update_i = True
                     elif lp_i > e_i / (ep_i - e_i):
-                        l_i = self.subsample(l_i, ceil(((ep_i * lp_i) / e_i)
-                                                       - 1))
+                        l_i = self._subsample(l_i, ceil(((ep_i * lp_i) / e_i)
+                                                        - 1))
                         update_i = True
 
             if update_j:
diff --git a/utils/__init__.py b/utils/__init__.py
@@ -1,8 +1,4 @@
-#!/usr/bin/env python
-# -*- coding:utf-8 -*-
-# @Filename:    __init__.py.py
-# @Author:      Daniel Puente Ramírez
-# @Time:        22/12/21 18:05
+"""Utils ARFF"""
 
 from .arff2dataset import arff_data