dpuenteramirez
diff --git a/‎docs/anexos.pdf‎
262 KB b/‎docs/anexos.pdf‎
262 KB
diff --git a/‎docs/img/anexos/manual-programador/base-tests-is.png‎
185 KB b/‎docs/img/anexos/manual-programador/base-tests-is.png‎
185 KB
diff --git a/‎docs/img/anexos/manual-programador/base-tests-ssl.png‎
192 KB b/‎docs/img/anexos/manual-programador/base-tests-ssl.png‎
192 KB
diff --git a/‎docs/img/anexos/manual-programador/tests-superados-is-ssl.png‎
79.5 KB b/‎docs/img/anexos/manual-programador/tests-superados-is-ssl.png‎
79.5 KB
diff --git a/‎docs/tex/D_Manual_programador.tex‎
Lines changed: 55 additions & 2 deletions b/‎docs/tex/D_Manual_programador.tex‎
Lines changed: 55 additions & 2 deletions
diff --git a/‎tests/test_InstanceSelection.py‎
Lines changed: 79 additions & 0 deletions b/‎tests/test_InstanceSelection.py‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎tests/test_SemiSupervised.py‎
Lines changed: 52 additions & 0 deletions b/‎tests/test_SemiSupervised.py‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎tests/test_utils.py‎
Lines changed: 10 additions & 0 deletions b/‎tests/test_utils.py‎
Lines changed: 10 additions & 0 deletions
@@ -285,8 +285,17 @@ \subsubsection{Travis-CI}
 \imagenRuta{../img/anexos/manual-programador/Travis-CI-UBUMLaaS}{Travis-CI.}{Travis-CI-UBUMLaaS}
 
 \subsection{Pruebas del sistema}
-Intro \\ (En desarrollo)
-Tal y como se ha descrito, \texttt{UBUMLaaS} es un \textit{software} que ha sufrido un cambio de diseño de grandes dimensiones, por lo que las pruebas de integración continua previas que existían han dejado de ser funcionales, debido a que únicamente comprobaban la interacción del usuario con la plataforma a través de un navegador.
+Las pruebas del sistema se encuentran dentro de los trabajos futuros que debe seguir la aplicación.
+
+Tal y como se ha descrito, \texttt{UBUMLaaS} es un \textit{software} que ha sufrido un cambio de diseño de grandes dimensiones, por lo que las pruebas de integración continua previas que existían han dejado de ser funcionales. La problemática que surge con las existentes es que no eran pruebas sobre el \textit{backend} de la aplicación, sino la interacción del usuario (mediante el uso de \textit{Selenium}) con el \textit{frontend}. 
+
+Es por ello que, al haberse hecho <<de cero>> toda la interfaz de la aplicación web, literalmente, no existe ninguna referencia de las antiguas que las pruebas existentes puedan reutilizar. 
+
+Debido a esta problemática, y con el fin de obtener un sistema estable, se ha seleccionado a un grupo de personas de edades comprendidas entre 18 y 55 años, profesionales de mecánica y electrónica, medicina, informática y estudiantes del grado de otras universidades españolas; con el fin de que dado un \href{https://github.com/dpr1005/Semisupervised-learning-and-instance-selection-methods/blob/917d49bcc9d7f1a4825225414b246023bfa2ea7b/docs/misc/survey_steps.pdf}{documento} relativamente sencillo, puedan hacer uso de todo el sistema, y sean estos los que reporten en un \href{https://forms.gle/eZUWmT5GNahqYVVa6}{formulario de \texttt{Google}} sus impresiones y resultados. Está preparado para que se vaya leyendo el documento, probando la aplicación y posteriormente rellenando el formulario, de forma que todo sea un proceso auto-guiado.
+
+Gracias a estas respuestas, se han podido encontrar \textit{bugs} existentes en la aplicación que no permitían trabajar correctamente con ella. El más importante de todo y el cual posee su propia \href{https://github.com/dpr1005/UBUMLaaS/issues/101}{\textit{issue}}, es el desfase existente entre los algoritmos de \texttt{Scikit-Learn} de la versión 0.21 (la utilizada previamente) y la actual, la 0.24. Numerosos algoritmos han sido modificados en su parametrización y por ende en \texttt{UBUMLaaS} no se podían utilizar.
+
+Durante algunos meses de desarrollo del proyecto \texttt{Travis-CI} funcionó correctamente pero la propia herramienta sufrió una actualización a finales de marzo y dejó de ser funcional, resultando en numerosos \textit{commits} marcados como erróneos, pero tal y como se aprecia en los propios \textit{logs} de Travis, nunca se llegó a testar el código, sino que en la configuración de la máquina remota <<rompía>>.
 
 \clearpage
 \section{IS-SSL}
@@ -481,3 +490,47 @@ \subsubsection{Sonar Cloud}
 \imagenRuta{../img/anexos/manual-programador/SonarCloud-IS-SSL}{SonarCloud.}{SonarCloud-IS-SSL}
 
 \subsection{Pruebas del sistema}
+
+En esta sección se van a describir las pruebas que poseen las bibliotecas.
+
+Las pruebas son realmente sencillas, ya que los propios algoritmos no poseen muchas disyunciones. Es por ello que el conjunto de pruebas codificado está compuesto de pruebas unitarias para cada algoritmo programado, permitiendo comprobar tanto que las entradas son correctas, como sus salidas. En el caso de los algoritmos de selección de instancias se asegura que las instancias recuperadas pertenecen al conjunto de datos de entrada, y que no son más que las que se introdujeron en primera instancia. De tal manera que en caso de que los algoritmos se modifiquen, el filtrado lo sigan realizando. La prueba base se puede visualizar en la Figura~\ref{fig:base-test-is}.
+
+Para la biblioteca de algoritmos de aprendizaje semi-supervisado se poseen también pruebas unitarias para todos y cada uno de ellos, la prueba base se puede visualizar en~\ref{fig:base-test-ssl}. En ellas se comprueban:
+\begin{itemize}
+\tightlist
+\item Tipo de objeto de entrada.
+\item Instanciación con mediante diferentes parámetros, tanto por defecto como mediante uso de diccionarios.
+\item Entrenamiento del propio algoritmo (\texttt{fit}).
+\item Predicción usando el modelo, \texttt{predict}.
+\item Las etiquetas devueltas como resultado de la predicción existen en el conjunto de entrenamiento, no se ha inventado ninguna.
+\end{itemize}
+
+
+\imagenFlotante{../img/anexos/manual-programador/base-tests-is}{Prueba base para algoritmos de selección de instancias.}{base-test-is}
+\imagenFlotante{../img/anexos/manual-programador/base-tests-ssl}{Prueba base para algoritmos de aprendizaje semi-supervisado.}{base-test-ssl}
+\begin{figure}
+\centering
+\includegraphics[width=0.5\textwidth]{../img/anexos/manual-programador/tests-superados-is-ssl}
+\caption{Prueba base para algoritmos de aprendizaje semi-supervisado.}\label{fig:tests-superados-is-ssl}
+
+\end{figure}
+
+Además, se ha codificado una prueba para asegurar que se siguen leyendo los ficheros \texttt{ARFF} correctamente.
+
+El resultado de las ejecuciones se puede ver la Figura~\ref{fig:tests-superados-is-ssl}.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -15,19 +15,36 @@
 
 
 def to_dataframe(y):
+    """
+    If the input is not a dataframe, convert it to a dataframe
+
+    :param y: The target variable
+    :return: A dataframe
+    """
     if not isinstance(y, pd.DataFrame):
         return pd.DataFrame(y)
 
 
 @pytest.fixture
 def iris_dataset():
+    """
+    It loads the iris dataset, converts the target variable to a dataframe, and
+    returns the data and target
+    :return: The dataframe of the features and the dataframe of the labels
+    """
     x, y = load_iris(return_X_y=True, as_frame=True)
     y = to_dataframe(y)
     return x, y
 
 
 @pytest.fixture
 def iris_dataset_ss():
+    """
+    It loads the iris dataset, randomly selects 30% of the data points to be
+    unlabeled, and returns the labeled and unlabeled data
+    :return: The original dataframe, the original labels, the complete
+    dataframe, and the complete labels.
+    """
     x, y = load_iris(return_X_y=True, as_frame=True)
     y = to_dataframe(y)
     li = list(set(range(x.shape[0])))
@@ -45,6 +62,16 @@ def iris_dataset_ss():
 
 
 def base(x, y, algorithm, params=None):
+    """
+    It takes in a dataframe of features, a dataframe of labels, an algorithm,
+    and a dictionary of parameters. It then creates a model using the
+    algorithm and parameters, and filters the data using the model
+
+    :param x: The input dataframe
+    :param y: The target variable
+    :param algorithm: The algorithm to use
+    :param params: a dictionary of parameters to pass to the algorithm
+    """
     assert isinstance(x, pd.DataFrame) and isinstance(y, pd.DataFrame)
     model = algorithm(**params) if params is not None else algorithm()
     x_filtered, y_filtered = model.filter(x, y)
@@ -57,46 +84,91 @@ def base(x, y, algorithm, params=None):
 
 
 def test_enn_original(iris_dataset):
+    """
+    It tests the ENN algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, ENN, {"nearest_neighbors": 3, "power_parameter": 2})
 
 
 def test_cnn(iris_dataset):
+    """
+    It tests the CNN algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, CNN)
 
 
 def test_rnn(iris_dataset):
+    """
+    It tests the RNN algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, RNN)
 
 
 def test_icf(iris_dataset):
+    """
+    It tests the ICF algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, ICF, {"nearest_neighbors": 3, "power_parameter": 2})
 
 
 def test_mss(iris_dataset):
+    """
+    It tests the MSS algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, MSS)
 
 
 def test_drop3(iris_dataset):
+    """
+    It tests the DROP3 algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, DROP3, {"nearest_neighbors": 3, "power_parameter": 2})
 
 
 def test_local_sets_lssm(iris_dataset):
+    """
+    It tests the LSSm algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, LSSm)
 
 
 def test_local_sets_lsbo(iris_dataset):
+    """
+    It tests the LSBo algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     x, y = iris_dataset
     base(x, y, LSBo)
 
 
 def test_enn_ss(iris_dataset_ss):
+    """
+    It tests the safe ENN algorithm on the iris dataset
+
+    :param iris_dataset: This is the dataset to use
+    """
     (
         original,
         original_labels,
@@ -124,6 +196,13 @@ def test_enn_ss(iris_dataset_ss):
 
 
 def test_different_len(iris_dataset):
+    """
+    It tests that the filter method raises a ValueError if the length of the
+    input and output dataframes are different
+
+    :param iris_dataset: a fixture that returns a tuple of two pandas
+    DataFrames, one for the features and one for the target
+    """
     x, y = iris_dataset
     y = y.loc[:-1]
     model1 = LSSm()
 
@@ -19,6 +19,11 @@
 
 @pytest.fixture
 def digits_dataset_ss():
+    """
+    It loads the digits dataset, splits it into train and test sets, and then
+    randomly assigns 55% of the training set as unlabeled
+    :return: x_train, x_test, y_train, y_test, opt_labels
+    """
     x, y = load_digits(return_X_y=True, as_frame=True)
     x = x.to_numpy()
     y = y.to_numpy()
@@ -38,6 +43,18 @@ def digits_dataset_ss():
 
 
 def base(x_train, x_test, y_train, y_test, opt_labels, algorithm, params=None):
+    """
+    It takes in a training and testing set, a list of possible labels, and a
+    model, and checks the predictions of the model on the testing set
+
+    :param x_train: The training data
+    :param x_test: The test data
+    :param y_train: The training labels
+    :param y_test: the actual labels of the test set
+    :param opt_labels: the set of labels that the model can predict
+    :param algorithm: the algorithm to use
+    :param params: a dictionary of parameters to pass to the algorithm
+    """
     assert isinstance(x_train, pd.DataFrame) and isinstance(
         y_train, pd.DataFrame)
     assert isinstance(x_test, pd.DataFrame) and isinstance(
@@ -51,6 +68,11 @@ def base(x_train, x_test, y_train, y_test, opt_labels, algorithm, params=None):
 
 
 def test_co_training(digits_dataset_ss):
+    """
+    It tests the Co-Training algorithm on the digits dataset
+
+    :param digits_dataset_ss: The dataset we're using
+    """
     x_train, x_test, y_train, y_test, opt_labels = digits_dataset_ss
     base(
         x_train,
@@ -106,6 +128,11 @@ def test_co_training(digits_dataset_ss):
 
 
 def test_tri_training(digits_dataset_ss):
+    """
+    It tests the Tri-Training algorithm on the digits dataset
+
+    :param digits_dataset_ss: the dataset we're using
+    """
     x_train, x_test, y_train, y_test, opt_labels = digits_dataset_ss
     base(
         x_train,
@@ -123,6 +150,11 @@ def test_tri_training(digits_dataset_ss):
 
 
 def test_demo_co_learning(digits_dataset_ss):
+    """
+    It tests the Democratic Co-Learning algorithm on the digits dataset
+
+    :param digits_dataset_ss: The dataset we're using
+    """
     x_train, x_test, y_train, y_test, opt_labels = digits_dataset_ss
     base(x_train, x_test, y_train, y_test, opt_labels, DemocraticCoLearning)
     base(
@@ -141,11 +173,25 @@ def test_demo_co_learning(digits_dataset_ss):
 
 
 def test_density_peaks(digits_dataset_ss):
+    """
+    It takes the training and testing data, and the optimal labels, and runs the
+    STDPNF algorithm on it
+
+    :param digits_dataset_ss: a tuple of (x_train, x_test, y_train, y_test,
+    opt_labels)
+    """
     x_train, x_test, y_train, y_test, opt_labels = digits_dataset_ss
     base(x_train, x_test, y_train, y_test, opt_labels, STDPNF)
 
 
 def test_density_peaks_filtering(digits_dataset_ss):
+    """
+    It tests that the `filtering` option works as expected in the STDPNF
+    algorithm
+
+    :param digits_dataset_ss: a fixture that returns a tuple of (x_train,
+    x_test, y_train, y_test, opt_labels)
+    """
     x_train, x_test, y_train, y_test, opt_labels = digits_dataset_ss
     with pytest.raises(AttributeError):
         base(x_train, x_test, y_train, y_test,
@@ -177,6 +223,12 @@ def test_density_peaks_filtering(digits_dataset_ss):
 
 
 def test_different_len(digits_dataset_ss):
+    """
+    It tests that if the length of the input and output are different, then the
+    model will raise a ValueError
+
+    :param digits_dataset_ss: a tuple of (x, x_test, y, y_test, y_pred)
+    """
     x, _, y, _, _ = digits_dataset_ss
     co = CoTraining()
     tri = TriTraining()
 
@@ -14,10 +14,20 @@
 
 @pytest.fixture
 def arff_path_file():
+    """
+    It returns the path to the iris dataset in the datasets folder
+    :return: The path to the iris.arff file
+    """
     return join("datasets", "iris.arff")
 
 
 def test_arff_data(arff_path_file):
+    """
+    `arff_data` loads an arff file into a `Bunch` object, which is a
+    dictionary-like object.
+
+    :param arff_path_file: The path to the arff file
+    """
     dataset = arff_data(arff_path_file)
     assert isinstance(dataset, Bunch)
     dataset1 = arff_data(arff_path_file, ["a", "b", "c", "d"])