You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: main.py
+31-1Lines changed: 31 additions & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -715,7 +715,37 @@ def main():
715
715
:return: None
716
716
"""
717
717
718
-
pass
718
+
print(f"{BackgroundColors.CLEAR_TERMINAL}{BackgroundColors.BOLD}{BackgroundColors.GREEN}Starting Machine Learning Pipeline...{Style.RESET_ALL}\n") # Print the start message and clear the terminal
719
+
720
+
sorted_datasets=sorted(DATASETS.items()) # Sort datasets alphabetically by keys
721
+
722
+
all_model_scores= [] # List to store all models' performance metrics across all datasets
723
+
724
+
forindex, (dataset_key, (training_file_path, testing_file_path)) inenumerate(sorted_datasets, start=1): # Enumerate through sorted datasets with index starting from 1
725
+
dataset_name=os.path.basename(dataset_key) # Get the dataset name from the directory path
ifnotverify_filepath_exists(training_file_path) ornotverify_filepath_exists(testing_file_path): # If either training or testing file does not exist
730
+
print(f"{BackgroundColors.RED}Missing input files for {dataset_name}. Skipping.{Style.RESET_ALL}")
731
+
continue# Skip to the next dataset if files are missing
732
+
733
+
train_df, test_df, split_required=load_and_prepare_data(training_file_path, testing_file_path) # Load and prepare the training and testing data
734
+
X_train, X_test, y_train, y_test, feature_names=split_data(train_df, test_df, split_required) # Split the data into training and testing sets, and preprocess features
735
+
736
+
models, dataset_model_scores=train_and_evaluate_models(X_train, X_test, y_train, y_test, dataset_key, dataset_name) # Train and evaluate models on the dataset, returning trained models and their performance metrics
737
+
738
+
# for model_name, model in models.items(): # Iterate through each trained model
739
+
# print(f"\n{BackgroundColors.BOLD}{BackgroundColors.GREEN}Explaining predictions for {model_name} on {dataset_name}...{Style.RESET_ALL}")
740
+
# explain_with_multiple_methods(model, X_train, X_test, feature_names, model_name=model_name) # Explain model predictions using multiple methods
741
+
742
+
all_model_scores.extend(dataset_model_scores) ifdataset_model_scoreselseNone# Extend the list of all model scores with the current dataset's scores if available
743
+
744
+
print(f"{BackgroundColors.BOLD}{BackgroundColors.GREEN}Pipeline for {BackgroundColors.CYAN}{dataset_name}{BackgroundColors.GREEN} finished successfully.{Style.RESET_ALL}\n")
745
+
746
+
generate_overall_performance_summary(all_model_scores) ifall_model_scoreselseNone# Generate overall performance summary if there are any model scores
0 commit comments