diff --git a/.gitignore b/.gitignore index b6e4761..923fd8d 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,8 @@ dmypy.json # Pyre type checker .pyre/ +Project02/.idea/inspectionProfiles/profiles_settings.xml +Project02/.idea/inspectionProfiles/Project_Default.xml +Project02/.idea/misc.xml +*.iml +*.xml diff --git a/Project02/.idea/.gitignore b/Project02/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/Project02/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/Project02/source/Evaluation/CrossValidation.py b/Project02/source/Evaluation/CrossValidation.py index 9e7ceff..5fece48 100644 --- a/Project02/source/Evaluation/CrossValidation.py +++ b/Project02/source/Evaluation/CrossValidation.py @@ -178,6 +178,7 @@ def validate( stratify: bool = False, model_params=[], predict_params=[], + train_params=None, ) -> float: """Perform cross-validation using k=num_folds folds @@ -209,6 +210,9 @@ def validate( # Instantiate the model self.algorithm = model(training_data, self.target_feature, *model_params) + # Train the model + if train_params is not None: + self.algorithm.train(*train_params) fold_results = self.calculate_results_for_fold(self.algorithm, test_data, predict_params=predict_params) overall_results.append(fold_results) diff --git a/Project02/source/Utilities/Preprocess.py b/Project02/source/Utilities/Preprocess.py index b73579b..f2eaa6f 100644 --- a/Project02/source/Utilities/Preprocess.py +++ b/Project02/source/Utilities/Preprocess.py @@ -1,11 +1,12 @@ import math + from typing import List, Callable import warnings warnings.simplefilter(action='ignore', category=FutureWarning) import pandas as pd -Converter = dict[str, Callable] -Bins = dict[str, int] +Converter = Dict[str, Callable] +Bins = Dict[str, int] class Preprocessor: diff --git a/Project02/video.py b/Project02/video.py index 115b737..7f256a5 100644 --- a/Project02/video.py +++ b/Project02/video.py @@ -270,7 +270,7 @@ def demonstrate_clustering(k): # Perform one iteration of clustering kmeans._initialize_clusters() - kmeans._update_samples(False) + kmeans._update_samples() # Print clusters for index, cluster in enumerate(kmeans.clusters): @@ -313,13 +313,14 @@ def demonstrate_average_classification_performance(): # # Perform 10-fold CV for E-NN print("\nRunning on E-NN") - # Define hyper-parameters + # # Define hyper-parameters num_neighbors = 5 - eknn_cv = CrossValidation(training_data, target_feature="class") - eknn_results = eknn_cv.validate(EditedKNN, 10, True, predict_params=[num_neighbors, False, 0.0]) - # Convert the results to performance metrics + eknn_cv = CrossValidation(training_data.copy(), target_feature="class") + eknn_results = eknn_cv.validate(EditedKNN, 10, True, predict_params=[num_neighbors, False, 0.0], train_params=[num_neighbors]) + + # # Convert the results to performance metrics eknn_measures = ExperimentHelper.convert_classification_results_to_measures( eknn_results, classification_levels ) @@ -335,7 +336,7 @@ def demonstrate_average_classification_performance(): num_neighbors = 5 num_clusters = 5 - kmeans_cv = CrossValidation(training_data, target_feature="class") + kmeans_cv = CrossValidation(training_data.copy(), target_feature="class") kmeans_results = kmeans_cv.validate( KNN, 10, True, model_params=[False, None, num_clusters], predict_params=[num_neighbors] ) @@ -457,6 +458,7 @@ def project_demonstration(): demonstrate_average_classification_performance() input("") + # Display average performance across ten folds for k-NN, k-Means, and ENN on a regression data set demonstrate_average_regression_performance()