dpuenteramirez
diff --git a/‎semisupervised/CoTraining.py‎
Lines changed: 99 additions & 18 deletions b/‎semisupervised/CoTraining.py‎
Lines changed: 99 additions & 18 deletions
diff --git a/‎semisupervised/DemocraticCoLearning.py‎
Lines changed: 66 additions & 8 deletions b/‎semisupervised/DemocraticCoLearning.py‎
Lines changed: 66 additions & 8 deletions
@@ -3,7 +3,7 @@
 # @Filename:    CoTraining.py
 # @Author:      Daniel Puente Ramírez
 # @Time:        22/12/21 09:27
-# @Version:     4.0
+# @Version:     5 .0
 
 from math import ceil, floor
 
@@ -15,15 +15,66 @@
 
 
 class CoTraining:
-    """Blum, A., & Mitchell, T. (1998, July). Combining labeled and unlabeled
-        data with co-training. In Proceedings of the eleventh annual conference
-        on Computational learning theory (pp. 92-100).
+    """
+    Blum, A., & Mitchell, T. (1998, July). Combining labeled and unlabeled
+    data with co-training. In Proceedings of the eleventh annual conference
+    on Computational learning theory (pp. 92-100).
+
+    Parameters
+    ----------
+    p : int, default=1
+        The number of positive samples.
+
+    n : int, default=3
+        The number of negative samples.
+
+    k : int, default=30
+        The number of iterations to train the classifiers.
+
+    u : int, default=75
+        The number of unlabeled samples to use in the training set
+
+    random_state : int, default=None
+        The random seed used to generate the initial population
+
+    c1 : base_estimator, default=GaussianNB
+        The first classifier to be used
+
+    c1_params : dict, default=None
+        Parameters for the first classifier
+
+    c2 : base_estimator, default=GaussianNB
+        The second classifier to be used
+
+    c2_params : dict, default=None
+        Parameters for the second classifier
+
      """
 
     def __init__(self, p=1, n=3, k=30, u=75, random_state=None,
                  c1=None, c1_params=None,
-                 c2=None, c2_params=None,
-                 ):
+                 c2=None, c2_params=None,):
+        """
+        The function takes in the parameters for the two classifiers, and if the
+        classifier is not None, it will use the parameters to create the
+        classifier. If the classifier is None, it will use the default
+        classifier, which is GaussianNB
+
+        :param p: The number of positive samples, defaults to 1
+        (optional)
+        :param n: The number of negative samples, defaults to 3
+        (optional)
+        :param k: The number of iterations to train the classifiers, defaults
+        to 30 (optional)
+        :param u: The number of unlabeled samples to use in the training set,
+        defaults to 75 (optional)
+        :param random_state: The random seed used to generate the initial
+        population
+        :param c1: The first classifier to be used
+        :param c1_params: parameters for the first classifier
+        :param c2: The second classifier to be used
+        :param c2_params: The parameters for the second classifier
+        """
         self.p = p
         self.n = n
         self.k = k
@@ -46,10 +97,17 @@ def __init__(self, p=1, n=3, k=30, u=75, random_state=None,
         self.h1, self.h2 = configs
 
     def fit(self, samples, y):
-        try:
-            labeled, u, y = split(samples, y)
-        except IndexError:
-            raise ValueError('Dimensions do not match.')
+        """
+        The function takes in a set of labeled samples and unlabeled samples,
+        and then uses the labeled samples to train two classifiers, and then
+        uses the two classifiers to predict the unlabeled samples. The top n
+        samples with the highest confidence are then added to the labeled
+        samples, and the process is repeated k times
+
+        :param samples: the unlabeled data
+        :param y: the labels of the samples
+        """
+        labeled, rng, u, u_random_index, y = self._check_parameters(samples, y)
 
         le = LabelEncoder()
         le.fit(y)
@@ -58,14 +116,6 @@ def fit(self, samples, y):
 
         self.size_x1 = ceil(len(labeled[0]) / 2)
 
-        rng = np.random.default_rng()
-        try:
-            u_random_index = rng.choice(len(u), size=floor(self.u),
-                                        replace=False, shuffle=False)
-        except ValueError:
-            raise ValueError('The model was incorrectly parametrized, '
-                             'total between _p_ and _u_ is to big.')
-
         u_prime = u[u_random_index]
         u1, u2 = np.array_split(u_prime, 2, axis=1)
 
@@ -118,7 +168,38 @@ def fit(self, samples, y):
 
             u_prime = np.concatenate((u_prime, u[u_random_index]))
 
+    def _check_parameters(self, samples, y):
+        """
+        > The function checks the parameters of the model and returns the
+        labeled samples, the random number generator, the unlabeled samples,
+        the random index of the unlabeled samples, and the labels
+
+        :param samples: The samples to be labeled
+        :param y: the target variable
+        :return: the labeled, rng, u, u_random_index, y
+        """
+        try:
+            labeled, u, y = split(samples, y)
+        except IndexError:
+            raise ValueError('Dimensions do not match.')
+        rng = np.random.default_rng()
+        try:
+            u_random_index = rng.choice(len(u), size=floor(self.u),
+                                        replace=False, shuffle=False)
+        except ValueError:
+            raise ValueError('The model was incorrectly parametrized, '
+                             'total between _p_ and _u_ is to big.')
+        return labeled, rng, u, u_random_index, y
+
     def predict(self, samples):
+        """
+        If the predictions of the two classifiers are the same, return that
+        prediction. If they disagree, return the prediction of the classifier
+        with the highest probability
+
+        :param samples: the data to be predicted
+        :return: The labels of the samples.
+        """
         x1, x2 = np.array_split(samples, 2, axis=1)
         pred1, pred_proba1 = self.h1.predict(x1), self.h1.predict_proba(x1)
         pred2, pred_proba2 = self.h2.predict(x2), self.h2.predict_proba(x2)
 
@@ -3,7 +3,7 @@
 # @Filename:    DemocraticCoLearning.py
 # @Author:      Daniel Puente Ramírez
 # @Time:        29/12/21 15:39
-# @Version:     4.0
+# @Version:     5.0
 
 import copy
 from math import sqrt
@@ -18,11 +18,12 @@
 
 
 def check_bounds(wi):
-    """Check upper and lower bounds. The left minimum value can be 0, and the
-    right minimum value can be 1.
+    """
+    It checks that the lower bound is not less than 0 and the upper bound is not
+    greater than 1
 
     :param wi: lower and upper mean confidence
-    :return: wi fixed
+    :return: the fixed wi.
     """
     if wi[0] < 0:
         wi[0] = 0
@@ -32,16 +33,54 @@ def check_bounds(wi):
 
 
 class DemocraticCoLearning:
-    """Democratic Co-Learning Implementation. Based on:
-        Zhou, Y., & Goldman, S. (2004, November). Democratic co-learning.
-        In 16th IEEE International Conference on Tools with Artificial
-        Intelligence (pp. 594-602). IEEE.
+    """
+    Democratic Co-Learning Implementation. Based on:
+    Zhou, Y., & Goldman, S. (2004, November). Democratic co-learning.
+    In 16th IEEE International Conference on Tools with Artificial
+    Intelligence (pp. 594-602). IEEE.
+
+    Parameters
+    ----------
+    random_state : int, default=None
+        The random seed used to initialize the classifiers
+
+    c1 : base_estimator, default=MultinomialNB
+        The first classifier to be used
+
+    c1_params : dict, default=None
+        Parameters for the first classifier
+
+    c2 : base_estimator, default=KNeighborsClassifier
+        The second classifier to be used
+
+    c2_params : dict, default=None
+        Parameters for the second classifier
+
+    c3 : base_estimator, default=DecisionTreeClassifier
+        The third classifier to be used
+
+    c3_params : dict, default=None
+        Parameters for the third classifier
+
     """
 
     def __init__(self, random_state=None,
                  c1=None, c1_params=None,
                  c2=None, c2_params=None,
                  c3=None, c3_params=None):
+        """
+        The function takes in three classifiers and their parameters, and if
+        they are not provided, it uses the default classifiers and their
+        parameters.
+
+        :param random_state: The random seed used to initialize the classifiers
+        :param c1: The first classifier
+        :param c1_params: parameters for the first classifier
+        :param c2: The classifier to use for the second classifier
+        :param c2_params: The parameters for the second classifier
+        :param c3: The third classifier
+        :param c3_params: The parameters for the third classifier
+        """
         self.const = 1.96  # 95%
         self.random_state = random_state if random_state is not None else \
             np.random.randint(low=0, high=10e5, size=1)[0]
@@ -69,6 +108,17 @@ def __init__(self, random_state=None,
         self.h1, self.h2, self.h3 = configs
 
     def fit(self, samples, y):
+        """
+        The function takes in a set of labeled and unlabeled data, and uses the
+        labeled data to train three classifiers. Then, it uses the three
+        classifiers to predict the labels of the unlabeled data. If the
+        prediction is correct, the data is not added to the training set. If
+        the prediction is incorrect, the data is added to the training set.
+        The process is repeated until the training set stops changing
+
+        :param samples: the training data
+        :param y: the labels of the samples
+        """
         try:
             labeled, u, y = split(samples, y)
         except IndexError:
@@ -291,6 +341,14 @@ def fit(self, samples, y):
         self.w3 = sum(check_bounds(w3)) / 2
 
     def predict(self, samples):
+        """
+        For each sample, we get the predictions of the three classifiers, and
+        then we count the number of times each label appears in the
+        predictions. The label that appears the most is the one we return
+
+        :param samples: the samples to be classified
+        :return: The labels of the samples.
+        """
         all_instances = samples
 
         gj = [0 for _ in range(self.n_labels)]