dpuenteramirez
diff --git a/‎docs/anexos.pdf‎
260 KB b/‎docs/anexos.pdf‎
260 KB
diff --git a/‎docs/img/anexos/manual-programador/base-tests-is.png‎
185 KB b/‎docs/img/anexos/manual-programador/base-tests-is.png‎
185 KB
diff --git a/‎docs/img/anexos/manual-programador/base-tests-ssl.png‎
192 KB b/‎docs/img/anexos/manual-programador/base-tests-ssl.png‎
192 KB
diff --git a/‎docs/img/anexos/manual-programador/tests-superados-is-ssl.png‎
79.5 KB b/‎docs/img/anexos/manual-programador/tests-superados-is-ssl.png‎
79.5 KB
diff --git a/‎docs/tex/D_Manual_programador.tex‎
Lines changed: 44 additions & 0 deletions b/‎docs/tex/D_Manual_programador.tex‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎semisupervised/utils/_split.py‎
Lines changed: 2 additions & 0 deletions b/‎semisupervised/utils/_split.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/InstanceSelection.py‎ ‎tests/test_InstanceSelection.py‎tests/InstanceSelection.py renamed to tests/test_InstanceSelection.py
Lines changed: 79 additions & 0 deletions b/‎tests/InstanceSelection.py‎ ‎tests/test_InstanceSelection.py‎tests/InstanceSelection.py renamed to tests/test_InstanceSelection.py
Lines changed: 79 additions & 0 deletions
diff --git a/‎tests/SemiSupervised.py‎ ‎tests/test_SemiSupervised.py‎tests/SemiSupervised.py renamed to tests/test_SemiSupervised.py
Lines changed: 52 additions & 0 deletions b/‎tests/SemiSupervised.py‎ ‎tests/test_SemiSupervised.py‎tests/SemiSupervised.py renamed to tests/test_SemiSupervised.py
Lines changed: 52 additions & 0 deletions
diff --git a/‎tests/utils.py‎ ‎tests/test_utils.py‎tests/utils.py renamed to tests/test_utils.py
Lines changed: 10 additions & 0 deletions b/‎tests/utils.py‎ ‎tests/test_utils.py‎tests/utils.py renamed to tests/test_utils.py
Lines changed: 10 additions & 0 deletions
@@ -481,3 +481,47 @@ \subsubsection{Sonar Cloud}
 \imagenRuta{../img/anexos/manual-programador/SonarCloud-IS-SSL}{SonarCloud.}{SonarCloud-IS-SSL}
 
 \subsection{Pruebas del sistema}
+
+En esta sección se van a describir las pruebas que poseen las bibliotecas.
+
+Las pruebas son realmente sencillas, ya que los propios algoritmos no poseen muchas disyunciones. Es por ello que el conjunto de pruebas codificado está compuesto de pruebas unitarias para cada algoritmo programado, permitiendo comprobar tanto que las entradas son correctas, como sus salidas. En el caso de los algoritmos de selección de instancias se asegura que las instancias recuperadas pertenecen al conjunto de datos de entrada, y que no son más que las que se introdujeron en primera instancia. De tal manera que en caso de que los algoritmos se modifiquen, el filtrado lo sigan realizando. La prueba base se puede visualizar en la Figura~\ref{fig:base-test-is}.
+
+Para la biblioteca de algoritmos de aprendizaje semi-supervisado se poseen también pruebas unitarias para todos y cada uno de ellos, la prueba base se puede visualizar en~\ref{fig:base-test-ssl}. En ellas se comprueban:
+\begin{itemize}
+\tightlist
+\item Tipo de objeto de entrada.
+\item Instanciación con mediante diferentes parámetros, tanto por defecto como mediante uso de diccionarios.
+\item Entrenamiento del propio algoritmo (\texttt{fit}).
+\item Predicción usando el modelo, \texttt{predict}.
+\item Las etiquetas devueltas como resultado de la predicción existen en el conjunto de entrenamiento, no se ha inventado ninguna.
+\end{itemize}
+
+
+\imagenFlotante{../img/anexos/manual-programador/base-tests-is}{Prueba base para algoritmos de selección de instancias.}{base-test-is}
+\imagenFlotante{../img/anexos/manual-programador/base-tests-ssl}{Prueba base para algoritmos de aprendizaje semi-supervisado.}{base-test-ssl}
+\begin{figure}
+\centering
+\includegraphics[width=0.5\textwidth]{../img/anexos/manual-programador/tests-superados-is-ssl}
+\caption{Prueba base para algoritmos de aprendizaje semi-supervisado.}\label{fig:tests-superados-is-ssl}
+
+\end{figure}
+
+Además, se ha codificado una prueba para asegurar que se siguen leyendo los ficheros \texttt{ARFF} correctamente.
+
+El resultado de las ejecuciones se puede ver la Figura~\ref{fig:tests-superados-is-ssl}.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -22,6 +22,8 @@ def split(samples, y):
         y = y.to_numpy()
 
     labeled_indexes = y != (-1 or np.NaN or None)
+    
+    labeled_indexes = np.ravel(labeled_indexes)
 
     L = samples.iloc[labeled_indexes].to_numpy()
     U = samples.iloc[~labeled_indexes].to_numpy()
 
@@ -15,19 +15,36 @@
 
 
 def to_dataframe(y):
+    """
+    If the input is not a dataframe, convert it to a dataframe
+
+    :param y: The target variable
+    :return: A dataframe
+    """
     if not isinstance(y, pd.DataFrame):
         return pd.DataFrame(y)
 
 
 @pytest.fixture
 def iris_dataset():
+    """
+    It loads the iris dataset, converts the target variable to a dataframe, and
+    returns the data and target
+    :return: The dataframe of the features and the dataframe of the labels
+    """
     x, y = load_iris(return_X_y=True, as_frame=True)
     y = to_dataframe(y)
     return x, y
 
 
 @pytest.fixture
 def iris_dataset_ss():
+    """
+    It loads the iris dataset, randomly selects 30% of the data points to be
+    unlabeled, and returns the labeled and unlabeled data
+    :return: The original dataframe, the original labels, the complete
+    dataframe, and the complete labels.
+    """
     x, y = load_iris(return_X_y=True, as_frame=True)
     y = to_dataframe(y)
     li = list(set(range(x.shape[0])))
@@ -45,6 +62,16 @@ def iris_dataset_ss():
 
 
 def base(x, y, algorithm, params=None):
+    """
+    It takes in a dataframe of features, a dataframe of labels, an algorithm,
+    and a dictionary of parameters. It then creates a model using the
+    algorithm and parameters, and filters the data using the model
+
+    :param x: The input dataframe
+    :param y: The target variable
+    :param algorithm: The algorithm to use
+    :param params: a dictionary of parameters to pass to the algorithm
+    """
     assert isinstance(x, pd.DataFrame) and isinstance(y, pd.DataFrame)
     model = algorithm(**params) if params is not None else algorithm()
     x_filtered, y_filtered = model.filter(x, y)
@@ -57,46 +84,91 @@ def base(x, y, algorithm, params=None):
 
 
 def test_enn_original(iris_dataset):
+    """
+    It tests the ENN algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, ENN, {'nearest_neighbors': 3, 'power_parameter': 2})
 
 
 def test_cnn(iris_dataset):
+    """
+    It tests the CNN algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, CNN)
 
 
 def test_rnn(iris_dataset):
+    """
+    It tests the RNN algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, RNN)
 
 
 def test_icf(iris_dataset):
+    """
+    It tests the ICF algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, ICF, {'nearest_neighbors': 3, 'power_parameter': 2})
 
 
 def test_mss(iris_dataset):
+    """
+    It tests the MSS algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, MSS)
 
 
 def test_drop3(iris_dataset):
+    """
+    It tests the DROP3 algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, DROP3, {'nearest_neighbors': 3, 'power_parameter': 2})
 
 
 def test_local_sets_lssm(iris_dataset):
+    """
+    It tests the LSSm algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, LSSm)
 
 
 def test_local_sets_lsbo(iris_dataset):
+    """
+    It tests the LSBo algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, LSBo)
 
 
 def test_enn_ss(iris_dataset_ss):
+    """
+    It tests the safe ENN algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     original, original_labels, complete, complete_labels, = iris_dataset_ss
 
     model = ENN()
@@ -118,6 +190,13 @@ def test_enn_ss(iris_dataset_ss):
 
 
 def test_different_len(iris_dataset):
+    """
+    It tests that the filter method raises a ValueError if the length of the
+    input and output dataframes are different
+
+    :param iris_dataset: a fixture that returns a tuple of two pandas
+    DataFrames, one for the features and one for the target
+    """
     x, y = iris_dataset
     y = y.loc[:-1]
     model1 = LSSm()
 
@@ -20,6 +20,11 @@
 
 @pytest.fixture
 def digits_dataset_ss():
+    """
+    It loads the digits dataset, splits it into train and test sets, and then
+    randomly assigns 55% of the training set as unlabeled
+    :return: x_train, x_test, y_train, y_test, opt_labels
+    """
     x, y = load_digits(return_X_y=True, as_frame=True)
     x = x.to_numpy()
     y = y.to_numpy()
@@ -39,6 +44,18 @@ def digits_dataset_ss():
 
 
 def base(x_train, x_test, y_train, y_test, opt_labels, algorithm, params=None):
+    """
+    It takes in a training and testing set, a list of possible labels, and a
+    model, and checks the predictions of the model on the testing set
+
+    :param x_train: The training data
+    :param x_test: The test data
+    :param y_train: The training labels
+    :param y_test: the actual labels of the test set
+    :param opt_labels: the set of labels that the model can predict
+    :param algorithm: the algorithm to use
+    :param params: a dictionary of parameters to pass to the algorithm
+    """
     assert isinstance(x_train, pd.DataFrame) and isinstance(y_train,
                                                             pd.DataFrame)
     assert isinstance(x_test, pd.DataFrame) and isinstance(y_test,
@@ -52,6 +69,11 @@ def base(x_train, x_test, y_train, y_test, opt_labels, algorithm, params=None):
 
 
 def test_co_training(digits_dataset_ss):
+    """
+    It tests the Co-Training algorithm on the digits dataset
+
+    :param digits_dataset_ss: The dataset we're using
+    """
     x_train, x_test, y_train, y_test, opt_labels = digits_dataset_ss
     base(x_train, x_test, y_train, y_test, opt_labels, CoTraining,
          {'p': 1, 'n': 3, 'k': 1, 'u': 7})
@@ -73,13 +95,23 @@ def test_co_training(digits_dataset_ss):
 
 
 def test_tri_training(digits_dataset_ss):
+    """
+    It tests the Tri-Training algorithm on the digits dataset
+
+    :param digits_dataset_ss: the dataset we're using
+    """
     x_train, x_test, y_train, y_test, opt_labels = digits_dataset_ss
     base(x_train, x_test, y_train, y_test, opt_labels, TriTraining,
          {'c1': KNeighborsClassifier, 'c1_params': {'n_neighbors': 3},
           'c2': KNeighborsClassifier})
 
 
 def test_demo_co_learning(digits_dataset_ss):
+    """
+    It tests the Democratic Co-Learning algorithm on the digits dataset
+
+    :param digits_dataset_ss: The dataset we're using
+    """
     x_train, x_test, y_train, y_test, opt_labels = digits_dataset_ss
     base(x_train, x_test, y_train, y_test, opt_labels, DemocraticCoLearning)
     base(x_train, x_test, y_train, y_test, opt_labels, DemocraticCoLearning,
@@ -88,11 +120,25 @@ def test_demo_co_learning(digits_dataset_ss):
 
 
 def test_density_peaks(digits_dataset_ss):
+    """
+    It takes the training and testing data, and the optimal labels, and runs the
+    STDPNF algorithm on it
+
+    :param digits_dataset_ss: a tuple of (x_train, x_test, y_train, y_test,
+    opt_labels)
+    """
     x_train, x_test, y_train, y_test, opt_labels = digits_dataset_ss
     base(x_train, x_test, y_train, y_test, opt_labels, STDPNF)
 
 
 def test_density_peaks_filtering(digits_dataset_ss):
+    """
+    It tests that the `filtering` option works as expected in the STDPNF
+    algorithm
+
+    :param digits_dataset_ss: a fixture that returns a tuple of (x_train,
+    x_test, y_train, y_test, opt_labels)
+    """
     x_train, x_test, y_train, y_test, opt_labels = digits_dataset_ss
     with pytest.raises(AttributeError):
         base(x_train, x_test, y_train, y_test, opt_labels, STDPNF,
@@ -106,6 +152,12 @@ def test_density_peaks_filtering(digits_dataset_ss):
 
 
 def test_different_len(digits_dataset_ss):
+    """
+    It tests that if the length of the input and output are different, then the
+    model will raise a ValueError
+
+    :param digits_dataset_ss: a tuple of (x, x_test, y, y_test, y_pred)
+    """
     x, _, y, _, _ = digits_dataset_ss
     co = CoTraining()
     tri = TriTraining()
 
@@ -14,10 +14,20 @@
 
 @pytest.fixture
 def arff_path_file():
+    """
+    It returns the path to the iris dataset in the datasets folder
+    :return: The path to the iris.arff file
+    """
     return join('datasets', 'iris.arff')
 
 
 def test_arff_data(arff_path_file):
+    """
+    `arff_data` loads an arff file into a `Bunch` object, which is a
+    dictionary-like object.
+
+    :param arff_path_file: The path to the arff file
+    """
     dataset = arff_data(arff_path_file)
     assert isinstance(dataset, Bunch)
     dataset1 = arff_data(arff_path_file, ['a', 'b', 'c', 'd'])