dpuenteramirez
diff --git a/‎semisupervised/CoTraining.py‎
Lines changed: 19 additions & 16 deletions b/‎semisupervised/CoTraining.py‎
Lines changed: 19 additions & 16 deletions
@@ -3,18 +3,19 @@
 # @Filename:    CoTraining.py
 # @Author:      Daniel Puente Ramírez
 # @Time:        22/12/21 09:27
-# @Version:     2.0
+# @Version:     3.0
 
 from math import ceil, floor
 
 import numpy as np
 from sklearn.naive_bayes import GaussianNB
 from sklearn.preprocessing import LabelEncoder
+from .utils import split
 
 
 class CoTraining:
 
-    def __init__(self, p=1, n=3, k=30, u=75, random_state=42):
+    def __init__(self, p=1, n=3, k=30, u=75, random_state=None):
         self.p = p
         self.n = n
         self.k = k
@@ -24,29 +25,31 @@ def __init__(self, p=1, n=3, k=30, u=75, random_state=42):
         self.h1 = GaussianNB()
         self.h2 = GaussianNB()
 
-    def fit(self, L, U, y):
-        if len(L) != len(y):
+    def fit(self, samples, y):
+        labeled, u, y = split(samples, y)
+
+        if len(labeled) != len(y):
             raise ValueError(
                 f'The dimension of the labeled data must be the same as the '
-                f'number of labels given. {len(L)} != {len(y)}'
+                f'number of labels given. {len(labeled)} != {len(y)}'
             )
 
         le = LabelEncoder()
         le.fit(y)
         y = le.transform(y)
         tot = self.n + self.p
 
-        self.size_x1 = ceil(len(L[0]) / 2)
+        self.size_x1 = ceil(len(labeled[0]) / 2)
 
         rng = np.random.default_rng()
-        u_random_index = rng.choice(len(U), size=floor(self.u),
+        u_random_index = rng.choice(len(u), size=floor(self.u),
                                     replace=False, shuffle=False)
 
-        u_prime = U[u_random_index]
+        u_prime = u[u_random_index]
         u1, u2 = np.array_split(u_prime, 2, axis=1)
 
         for _ in range(self.k):
-            x1, x2 = np.array_split(L, 2, axis=1)
+            x1, x2 = np.array_split(labeled, 2, axis=1)
 
             self.h1.fit(x1, y)
             self.h2.fit(x2, y)
@@ -74,29 +77,29 @@ def fit(self, L, U, y):
             u1_new_samples = np.concatenate((u1_samples, u2_x1_samples), axis=1)
             u2_new_samples = np.concatenate((u2_samples, u1_x2_samples), axis=1)
             u_new = np.concatenate((u1_new_samples, u2_new_samples))
-            L = np.concatenate((L, u_new))
+            labeled = np.concatenate((labeled, u_new))
             y_new = np.array([x[0] for x in top_h1] + [x[0] for x in top_h2])
             y = np.concatenate((y, y_new))
 
-            old_indexes = np.array([x[2] for x in top_h1] + [x[2] for x in \
+            old_indexes = np.array([x[2] for x in top_h1] + [x[2] for x in
                                                              top_h2], int)
             u_prime = np.delete(u_prime, old_indexes, axis=0)
 
-            U = np.delete(U, u_random_index, axis=0)
+            u = np.delete(u, u_random_index, axis=0)
             try:
-                u_random_index = rng.choice(len(U),
+                u_random_index = rng.choice(len(u),
                                             size=2 * self.p + 2 * self.n,
                                             replace=False, shuffle=False)
             except ValueError:
                 print(f'The model was incorrectly parametrized, k is to big.')
             try:
-                u_prime = np.concatenate((u_prime, U[u_random_index]))
+                u_prime = np.concatenate((u_prime, u[u_random_index]))
             except IndexError:
                 print('The model was incorrectly parametrized, there are not '
                       'enough unlabeled samples.')
 
-    def predict(self, X):
-        x1, x2 = np.array_split(X, 2, axis=1)
+    def predict(self, samples):
+        x1, x2 = np.array_split(samples, 2, axis=1)
         pred1, pred_proba1 = self.h1.predict(x1), self.h1.predict_proba(x1)
         pred2, pred_proba2 = self.h2.predict(x2), self.h2.predict_proba(x2)
         labels = []