Skip to content

Commit 27d0282

Browse files
REFACTOR: Updating the main function in main.py
1 parent 7a9ac58 commit 27d0282

File tree

1 file changed

+31
-1
lines changed

1 file changed

+31
-1
lines changed

main.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,37 @@ def main():
715715
:return: None
716716
"""
717717

718-
pass
718+
print(f"{BackgroundColors.CLEAR_TERMINAL}{BackgroundColors.BOLD}{BackgroundColors.GREEN}Starting Machine Learning Pipeline...{Style.RESET_ALL}\n") # Print the start message and clear the terminal
719+
720+
sorted_datasets = sorted(DATASETS.items()) # Sort datasets alphabetically by keys
721+
722+
all_model_scores = [] # List to store all models' performance metrics across all datasets
723+
724+
for index, (dataset_key, (training_file_path, testing_file_path)) in enumerate(sorted_datasets, start=1): # Enumerate through sorted datasets with index starting from 1
725+
dataset_name = os.path.basename(dataset_key) # Get the dataset name from the directory path
726+
727+
print(f"{BackgroundColors.BOLD}{BackgroundColors.GREEN}Processing dataset {BackgroundColors.CYAN}{index}/{len(sorted_datasets)}{BackgroundColors.GREEN}: {BackgroundColors.CYAN}{dataset_name}{BackgroundColors.GREEN}{Style.RESET_ALL}")
728+
729+
if not verify_filepath_exists(training_file_path) or not verify_filepath_exists(testing_file_path): # If either training or testing file does not exist
730+
print(f"{BackgroundColors.RED}Missing input files for {dataset_name}. Skipping.{Style.RESET_ALL}")
731+
continue # Skip to the next dataset if files are missing
732+
733+
train_df, test_df, split_required = load_and_prepare_data(training_file_path, testing_file_path) # Load and prepare the training and testing data
734+
X_train, X_test, y_train, y_test, feature_names = split_data(train_df, test_df, split_required) # Split the data into training and testing sets, and preprocess features
735+
736+
models, dataset_model_scores = train_and_evaluate_models(X_train, X_test, y_train, y_test, dataset_key, dataset_name) # Train and evaluate models on the dataset, returning trained models and their performance metrics
737+
738+
# for model_name, model in models.items(): # Iterate through each trained model
739+
# print(f"\n{BackgroundColors.BOLD}{BackgroundColors.GREEN}Explaining predictions for {model_name} on {dataset_name}...{Style.RESET_ALL}")
740+
# explain_with_multiple_methods(model, X_train, X_test, feature_names, model_name=model_name) # Explain model predictions using multiple methods
741+
742+
all_model_scores.extend(dataset_model_scores) if dataset_model_scores else None # Extend the list of all model scores with the current dataset's scores if available
743+
744+
print(f"{BackgroundColors.BOLD}{BackgroundColors.GREEN}Pipeline for {BackgroundColors.CYAN}{dataset_name}{BackgroundColors.GREEN} finished successfully.{Style.RESET_ALL}\n")
745+
746+
generate_overall_performance_summary(all_model_scores) if all_model_scores else None # Generate overall performance summary if there are any model scores
747+
748+
print(f"{BackgroundColors.BOLD}{BackgroundColors.GREEN}All datasets processed. Overall analysis finished.{Style.RESET_ALL}")
719749

720750
if __name__ == "__main__":
721751
"""

0 commit comments

Comments
 (0)