Skip to content

Commit fb5c05e

Browse files
FEAT: Adding the save_pca_results function to the pca.py code
1 parent 69fe8cd commit fb5c05e

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

pca.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,78 @@ def print_pca_results(results):
282282
print(f" {BackgroundColors.GREEN}Test FNR: {BackgroundColors.CYAN}{results['test_fnr']:.4f}{Style.RESET_ALL}")
283283
print(f" {BackgroundColors.GREEN}Elapsed Time: {BackgroundColors.CYAN}{results['elapsed_time']:.2f}s{Style.RESET_ALL}")
284284

285+
def save_pca_results(csv_path, all_results):
286+
"""
287+
Saves PCA results to a structured text file and CSV comparison.
288+
289+
:param csv_path: Original CSV file path
290+
:param all_results: List of result dictionaries from different PCA configurations
291+
:return: None
292+
"""
293+
294+
output_dir = f"{os.path.dirname(csv_path)}/Feature_Analysis/" # Output directory
295+
os.makedirs(output_dir, exist_ok=True) # Create output directory if it doesn't exist
296+
297+
output_file = f"{output_dir}/PCA_Results.txt" # Output text file path
298+
with open(output_file, "w", encoding="utf-8") as f: # Open the output file for writing
299+
f.write("="*80 + "\n")
300+
f.write("Principal Component Analysis (PCA) Feature Extraction Results\n")
301+
f.write("="*80 + "\n\n")
302+
f.write("Configuration:\n")
303+
f.write(" - Evaluation Method: 10-Fold Stratified Cross-Validation\n")
304+
f.write(" - Model: Random Forest Classifier (100 estimators)\n")
305+
f.write(" - Train/Test Split: 80/20\n")
306+
f.write(" - Scaling: StandardScaler (z-score normalization)\n\n")
307+
308+
for i, results in enumerate(all_results, 1): # Loop over each configuration's results
309+
f.write(f"\n{'='*80}\n")
310+
f.write(f"Configuration {i}: PCA with {results['n_components']} Components\n")
311+
f.write(f"{'='*80}\n\n")
312+
f.write(f"Explained Variance Ratio: {results['explained_variance']:.4f} ({results['explained_variance']*100:.2f}%)\n\n")
313+
314+
f.write("10-Fold Cross-Validation Metrics (Training Set):\n")
315+
f.write(f" Accuracy: {results['cv_accuracy']:.4f}\n")
316+
f.write(f" Precision: {results['cv_precision']:.4f}\n")
317+
f.write(f" Recall: {results['cv_recall']:.4f}\n")
318+
f.write(f" F1-Score: {results['cv_f1_score']:.4f}\n\n")
319+
320+
f.write("Test Set Metrics:\n")
321+
f.write(f" Accuracy: {results['test_accuracy']:.4f}\n")
322+
f.write(f" Precision: {results['test_precision']:.4f}\n")
323+
f.write(f" Recall: {results['test_recall']:.4f}\n")
324+
f.write(f" F1-Score: {results['test_f1_score']:.4f}\n")
325+
f.write(f" FPR: {results['test_fpr']:.4f}\n")
326+
f.write(f" FNR: {results['test_fnr']:.4f}\n")
327+
f.write(f" Training Time: {results['elapsed_time']:.2f}s\n")
328+
329+
print(f"\n{BackgroundColors.GREEN}Detailed results saved to {BackgroundColors.CYAN}{output_file}{Style.RESET_ALL}")
330+
331+
comparison_data = [] # List to store comparison data
332+
for results in all_results: # Loop over each configuration's results
333+
comparison_data.append({ # Append results to comparison data
334+
"n_components": results['n_components'],
335+
"explained_variance": round(results['explained_variance'], 4),
336+
"cv_accuracy": round(results['cv_accuracy'], 4),
337+
"cv_precision": round(results['cv_precision'], 4),
338+
"cv_recall": round(results['cv_recall'], 4),
339+
"cv_f1_score": round(results['cv_f1_score'], 4),
340+
"test_accuracy": round(results['test_accuracy'], 4),
341+
"test_precision": round(results['test_precision'], 4),
342+
"test_recall": round(results['test_recall'], 4),
343+
"test_f1_score": round(results['test_f1_score'], 4),
344+
"test_fpr": round(results['test_fpr'], 4),
345+
"test_fnr": round(results['test_fnr'], 4),
346+
"training_time_s": round(results['elapsed_time'], 2)
347+
})
348+
349+
comparison_df = pd.DataFrame(comparison_data) # Create DataFrame from comparison data
350+
csv_output = f"{output_dir}/PCA_Comparison.csv" # Output CSV file path
351+
comparison_df.to_csv(csv_output, index=False) # Save comparison DataFrame to CSV
352+
print(f"{BackgroundColors.GREEN}Comparison CSV saved to {BackgroundColors.CYAN}{csv_output}{Style.RESET_ALL}")
353+
354+
print(f"\n{BackgroundColors.BOLD}{BackgroundColors.GREEN}PCA Configuration Comparison:{Style.RESET_ALL}")
355+
print(comparison_df.to_string(index=False)) if VERBOSE else None
356+
285357
def run_pca_analysis(csv_path, n_components_list=[8, 16, 24, 32]):
286358
"""
287359
Runs PCA analysis with different numbers of components and evaluates performance.

0 commit comments

Comments
 (0)