diff --git a/Dockerfile b/Dockerfile
index 5d2c48e..6eb9e3f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,69 +1,35 @@
-# BioAnalyzer Backend Dockerfile
 FROM python:3.11-slim
 
 WORKDIR /app
-
-# FIX: Make Python recognize the application as a package
 ENV PYTHONPATH="/app:/app/app"
 
-# Install system dependencies
 RUN apt-get update && apt-get install -y \
-    gcc \
-    g++ \
-    curl \
-    git \
+    gcc g++ curl git \
     && rm -rf /var/lib/apt/lists/*
 
-# Copy pyproject.toml and README.md first for better caching
 COPY pyproject.toml README.md ./
 
-# Upgrade pip and setuptools first
 RUN pip install --upgrade pip setuptools wheel build
 
-# ------------------------------------------------------------
-# Step 1: Install PyTorch CPU versions (fixed +cpu issue)
-# Note: PyTorch CPU versions require special index URL, so we install them separately
-# before installing the package from pyproject.toml
-# ------------------------------------------------------------
-RUN pip install --no-cache-dir --default-timeout=600 --retries=10 \
-    --extra-index-url https://download.pytorch.org/whl/cpu \
-    torch==2.1.0+cpu \
-    torchvision==0.16.0+cpu \
-    torchaudio==2.1.0+cpu
+# Install PyTorch CPU wheels
+RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu \
+    torch==2.1.0+cpu torchvision==0.16.0+cpu torchaudio==2.1.0+cpu
 
-# ------------------------------------------------------------
-# Step 2: Copy application code
-# ------------------------------------------------------------
 COPY . .
 
-# ------------------------------------------------------------
-# Step 3: Install the package from pyproject.toml
-# This installs the package and all its dependencies from pyproject.toml
-# PyTorch is already installed above, so pip will skip it
-# Installing in editable mode (-e) ensures entry points are properly installed
-# ------------------------------------------------------------
-RUN pip install --no-cache-dir --default-timeout=300 --retries=5 -e .
+# Install package + dependencies
+RUN pip install --no-cache-dir -e .
 
-# ------------------------------------------------------------
-# Step 4: Install testing dependencies (optional, for development)
-# ------------------------------------------------------------
-RUN pip install --no-cache-dir pytest>=7.4.0 pytest-cov>=4.1.0
+# Explicit analysis deps (defensive)
+RUN pip install --no-cache-dir pandas scikit-learn matplotlib seaborn
 
-# Create necessary directories
 RUN mkdir -p cache logs results
 
-# Make CLI executable
-RUN chmod +x cli.py
+RUN chmod +x cli.py || true
+RUN chmod +x scripts/*.py || true
 
-# Expose port
 EXPOSE 8000
 
-# Health check
-HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:8000/health || exit 1
-
-# Set PYTHONPATH for app module imports (fixed nested /app/app issue)
-# ENV PYTHONPATH=/app:/app/app
+HEALTHCHECK CMD curl -f http://localhost:8000/health || exit 1
 
-# Default command (can be overridden)
 CMD ["python", "main.py", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/app/utils/config.py b/app/utils/config.py
index 797e2cc..706f1aa 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -17,7 +17,15 @@ def load_dotenv(*args: object, **kwargs: object) -> None:  # type: ignore[no-red
         )
 
 
-import google.generativeai as genai
+def get_genai():
+    try:
+        import google.generativeai as genai
+        return genai
+    except ImportError:
+        raise RuntimeError(
+            "google-generativeai is not installed. "
+            "Install with: pip install google-generativeai"
+        )
 
 possible_env_paths = [
     Path(__file__).parents[1] / ".env",  # Original location
diff --git a/config/requirements.txt b/config/requirements.txt
index 7ab6b0a..cbbc783 100644
--- a/config/requirements.txt
+++ b/config/requirements.txt
@@ -10,8 +10,8 @@ torchaudio>=2.1.0+cpu
 numpy>=1.26.0
 pandas>=2.1.1
 scikit-learn>=1.3.0
-matplotlib>=3.7
-seaborn>=0.12
+matplotlib>=3.7.0
+seaborn>=0.12.0
 biopython>=1.81
 pytz>=2023.3
 
@@ -29,7 +29,7 @@ paper-qa>=5.0.0
 # --- Vector Database ---
 qdrant-client>=1.7.0
 
-# --- Web Framework & API (FastAPI/Uvicorn) ---
+# --- Web Framework & API ---
 fastapi>=0.104.0
 uvicorn[standard]>=0.23.2
 starlette>=0.31.1
@@ -47,19 +47,15 @@ wsproto>=1.0.0
 h11>=0.12.0
 httptools>=0.3.0
 
-# --- File Processing (Excel/Env) ---
+# --- File Processing ---
 openpyxl>=3.1.0
 xlrd>=2.0.1
 python-dotenv>=1.0.0
 PyYAML>=5.4.1
 aiofiles>=0.7.0
 
-# --- Utilities & System ---
-tqdm>=4.65.0`
+# --- Utilities ---
+tqdm>=4.65.0
 psutil>=5.9.0
 click>=8.0.1
 watchfiles[watchdog]>=1.0.0
-
-# --- Development (Uncomment to install) ---
-# pytest>=7.4.0
-# pytest-cov>=4.1.0
\ No newline at end of file
diff --git a/confusion_matrix_analysis.py b/confusion_matrix_analysis.py
index 0c55795..1e80cb4 100644
--- a/confusion_matrix_analysis.py
+++ b/confusion_matrix_analysis.py
@@ -1,288 +1,103 @@
 #!/usr/bin/env python3
 """
-Confusion Matrix and Matthew's Correlation Coefficient Analysis
-
-This script compares BioAnalyzer predictions (analysis_results.csv) with
-curator feedback (feedback.csv) to generate confusion matrices and calculate
-MCC for each variable.
+Formal validation of BioAnalyzer predictions using
+confusion matrices and Matthews Correlation Coefficient (MCC).
 """
 
-import pandas as pd
+from pathlib import Path
+import json
 import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.metrics import confusion_matrix, matthews_corrcoef
-from pathlib import Path
-import json
-from typing import Dict, Tuple, List
 
-# Set style for better-looking plots
+CLASSES = ["ABSENT", "PARTIALLY_PRESENT", "PRESENT"]
+
 sns.set_style("whitegrid")
-plt.rcParams['figure.figsize'] = (12, 8)
+plt.rcParams["figure.figsize"] = (10, 8)
 
 
-def load_data(analysis_file: str, feedback_file: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
-    """Load and prepare the analysis results and feedback data."""
-    print("Loading data files...")
-    analysis_df = pd.read_csv(analysis_file)
-    feedback_df = pd.read_csv(feedback_file)
-    
-    print(f"Analysis results: {len(analysis_df)} records")
-    print(f"Feedback data: {len(feedback_df)} records")
-    
-    return analysis_df, feedback_df
+def load_data(predictions: Path, feedback: Path):
+    return pd.read_csv(predictions), pd.read_csv(feedback)
 
 
-def merge_data(analysis_df: pd.DataFrame, feedback_df: pd.DataFrame) -> pd.DataFrame:
-    """Merge analysis results with feedback data on PMID."""
-    print("\nMerging data on PMID...")
-    
-    # Merge on PMID, keeping only records that exist in both
-    merged = analysis_df.merge(
-        feedback_df,
-        on='PMID',
-        suffixes=('_predicted', '_actual'),
-        how='inner'
-    )
-    
-    print(f"Matched records: {len(merged)}")
-    
-    if len(merged) == 0:
-        raise ValueError("No matching PMIDs found between the two files!")
-    
+def merge_on_pmid(pred_df, fb_df):
+    merged = pred_df.merge(fb_df, on="PMID", suffixes=("_predicted", "_actual"))
+    if merged.empty:
+        raise ValueError("No overlapping PMIDs found.")
     return merged
 
 
-def calculate_binary_mcc(y_true: np.ndarray, y_pred: np.ndarray) -> float:
-    """
-    Calculate MCC for binary classification (PRESENT vs not PRESENT).
-    Treats PARTIALLY_PRESENT as a separate category initially.
-    """
-    # Convert to binary: PRESENT = 1, everything else = 0
-    y_true_binary = (y_true == 'PRESENT').astype(int)
-    y_pred_binary = (y_pred == 'PRESENT').astype(int)
-    
-    return matthews_corrcoef(y_true_binary, y_pred_binary)
+def binary_mcc(y_true, y_pred):
+    y_true_bin = (y_true == "PRESENT").astype(int)
+    y_pred_bin = (y_pred == "PRESENT").astype(int)
+    return matthews_corrcoef(y_true_bin, y_pred_bin)
 
 
-def calculate_multiclass_mcc(y_true: np.ndarray, y_pred: np.ndarray) -> float:
-    """
-    Calculate MCC for multi-class classification.
-    Maps PRESENT=2, PARTIALLY_PRESENT=1, ABSENT=0
-    """
-    status_map = {'ABSENT': 0, 'PARTIALLY_PRESENT': 1, 'PRESENT': 2}
-    
-    y_true_mapped = np.array([status_map.get(s, 0) for s in y_true])
-    y_pred_mapped = np.array([status_map.get(s, 0) for s in y_pred])
-    
-    return matthews_corrcoef(y_true_mapped, y_pred_mapped)
+def multiclass_mcc(y_true, y_pred):
+    return matthews_corrcoef(y_true, y_pred)
 
 
-def create_confusion_matrix_data(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[np.ndarray, List[str]]:
-    """Create confusion matrix with all three classes."""
-    classes = ['ABSENT', 'PARTIALLY_PRESENT', 'PRESENT']
-    
-    # Filter out any NaN values
-    mask = ~(pd.isna(y_true) | pd.isna(y_pred))
-    y_true_clean = y_true[mask]
-    y_pred_clean = y_pred[mask]
-    
-    cm = confusion_matrix(
-        y_true_clean,
-        y_pred_clean,
-        labels=classes
-    )
-    
-    return cm, classes
-
+def analyze_variable(df, variable):
+    yt = df[f"{variable}_actual"].dropna()
+    yp = df[f"{variable}_predicted"].dropna()
 
-def plot_confusion_matrix(cm: np.ndarray, classes: List[str], variable_name: str, 
-                         save_path: str = None) -> None:
-    """Plot a confusion matrix with annotations."""
-    plt.figure(figsize=(10, 8))
-    
-    # Normalize confusion matrix to show percentages
-    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
-    cm_normalized = np.nan_to_num(cm_normalized)
-    
-    # Create heatmap
-    sns.heatmap(
-        cm_normalized,
-        annot=True,
-        fmt='.2%',
-        cmap='Blues',
-        xticklabels=classes,
-        yticklabels=classes,
-        cbar_kws={'label': 'Normalized Frequency'},
-        annot_kws={'size': 12}
-    )
-    
-    plt.title(f'Confusion Matrix: {variable_name}\n(Normalized)', fontsize=16, fontweight='bold')
-    plt.ylabel('Actual (Curator)', fontsize=12)
-    plt.xlabel('Predicted (BioAnalyzer)', fontsize=12)
-    plt.tight_layout()
-    
-    if save_path:
-        plt.savefig(save_path, dpi=300, bbox_inches='tight')
-        print(f"  Saved confusion matrix plot: {save_path}")
-    
-    plt.close()
+    mask = yt.index.intersection(yp.index)
+    yt, yp = yt.loc[mask], yp.loc[mask]
 
+    cm = confusion_matrix(yt, yp, labels=CLASSES)
 
-def analyze_variable(merged_df: pd.DataFrame, variable: str) -> Dict:
-    """Analyze a single variable and return metrics."""
-    print(f"\nAnalyzing variable: {variable}")
-    
-    pred_col = f"{variable}_predicted"
-    actual_col = f"{variable}_actual"
-    
-    if pred_col not in merged_df.columns or actual_col not in merged_df.columns:
-        print(f"  Warning: Columns not found for {variable}")
-        return None
-    
-    y_pred = merged_df[pred_col].values
-    y_actual = merged_df[actual_col].values
-    
-    # Remove NaN values
-    mask = ~(pd.isna(y_actual) | pd.isna(y_pred))
-    y_pred_clean = y_pred[mask]
-    y_actual_clean = y_actual[mask]
-    
-    if len(y_pred_clean) == 0:
-        print(f"  Warning: No valid data for {variable}")
-        return None
-    
-    # Create confusion matrix
-    cm, classes = create_confusion_matrix_data(y_actual_clean, y_pred_clean)
-    
-    # Calculate MCCs
-    mcc_binary = calculate_binary_mcc(y_actual_clean, y_pred_clean)
-    mcc_multiclass = calculate_multiclass_mcc(y_actual_clean, y_pred_clean)
-    
-    # Calculate accuracy
-    accuracy = np.sum(y_actual_clean == y_pred_clean) / len(y_actual_clean)
-    
-    # Calculate per-class metrics
-    results = {
-        'variable': variable,
-        'confusion_matrix': cm.tolist(),
-        'classes': classes,
-        'mcc_binary': float(mcc_binary),
-        'mcc_multiclass': float(mcc_multiclass),
-        'accuracy': float(accuracy),
-        'n_samples': int(len(y_pred_clean)),
-        'class_distribution_actual': {
-            cls: int(np.sum(y_actual_clean == cls)) for cls in classes
-        },
-        'class_distribution_predicted': {
-            cls: int(np.sum(y_pred_clean == cls)) for cls in classes
-        }
+    return {
+        "variable": variable,
+        "n": len(yt),
+        "accuracy": float((yt == yp).mean()),
+        "mcc_binary": float(binary_mcc(yt, yp)),
+        "mcc_multiclass": float(multiclass_mcc(yt, yp)),
+        "confusion_matrix": cm.tolist(),
     }
-    
-    print(f"  Samples: {results['n_samples']}")
-    print(f"  Accuracy: {results['accuracy']:.3f}")
-    print(f"  MCC (Binary): {results['mcc_binary']:.3f}")
-    print(f"  MCC (Multi-class): {results['mcc_multiclass']:.3f}")
-    
-    return results
 
 
-def generate_summary_report(all_results: List[Dict], output_file: str) -> None:
-    """Generate a summary report with all metrics."""
-    print("\n" + "="*80)
-    print("SUMMARY REPORT")
-    print("="*80)
-    
-    # Create summary DataFrame
-    summary_data = []
-    for result in all_results:
-        if result is None:
-            continue
-        summary_data.append({
-            'Variable': result['variable'],
-            'N Samples': result['n_samples'],
-            'Accuracy': f"{result['accuracy']:.3f}",
-            'MCC (Binary)': f"{result['mcc_binary']:.3f}",
-            'MCC (Multi-class)': f"{result['mcc_multiclass']:.3f}"
-        })
-    
-    summary_df = pd.DataFrame(summary_data)
-    print("\n" + summary_df.to_string(index=False))
-    
-    # Save to CSV
-    summary_df.to_csv(output_file, index=False)
-    print(f"\nSummary saved to: {output_file}")
+def plot_cm(cm, variable, out):
+    cmn = cm / cm.sum(axis=1, keepdims=True)
+    sns.heatmap(cmn, annot=True, fmt=".2%", xticklabels=CLASSES, yticklabels=CLASSES)
+    plt.title(variable)
+    plt.xlabel("Predicted")
+    plt.ylabel("Actual")
+    plt.tight_layout()
+    plt.savefig(out, dpi=300)
+    plt.close()
 
 
 def main():
-    """Main analysis function."""
-    # File paths
-    analysis_file = Path("analysis_results.csv")
-    feedback_file = Path("feedback.csv")
-    output_dir = Path("confusion_matrix_results")
-    output_dir.mkdir(exist_ok=True)
-    
-    # Variables to analyze
+    preds, fb = load_data(
+        Path("analysis_results.csv"),
+        Path("feedback.csv"),
+    )
+    merged = merge_on_pmid(preds, fb)
+
+    outdir = Path("confusion_matrix_results")
+    outdir.mkdir(exist_ok=True)
+
     variables = [
-        'Host Species Status',
-        'Body Site Status',
-        'Condition Status',
-        'Sequencing Type Status',
-        'Taxa Level Status',
-        'Sample Size Status'
+        "Host Species Status",
+        "Body Site Status",
+        "Condition Status",
+        "Sequencing Type Status",
+        "Taxa Level Status",
+        "Sample Size Status",
     ]
-    
-    # Load and merge data
-    analysis_df, feedback_df = load_data(analysis_file, feedback_file)
-    merged_df = merge_data(analysis_df, feedback_df)
-    
-    # Analyze each variable
-    all_results = []
-    
-    for variable in variables:
-        result = analyze_variable(merged_df, variable)
-        if result:
-            all_results.append(result)
-            
-            # Plot confusion matrix
-            cm = np.array(result['confusion_matrix'])
-            plot_path = output_dir / f"confusion_matrix_{variable.replace(' ', '_')}.png"
-            plot_confusion_matrix(cm, result['classes'], variable, str(plot_path))
-    
-    # Generate summary report
-    summary_file = output_dir / "summary_metrics.csv"
-    generate_summary_report(all_results, str(summary_file))
-    
-    # Save detailed results as JSON
-    json_file = output_dir / "detailed_results.json"
-    with open(json_file, 'w') as f:
-        json.dump(all_results, f, indent=2)
-    print(f"\nDetailed results saved to: {json_file}")
-    
-    # Create a comprehensive comparison table
-    comparison_file = output_dir / "comparison_table.csv"
-    comparison_data = []
-    for result in all_results:
-        if result is None:
-            continue
-        for i, actual_class in enumerate(result['classes']):
-            for j, pred_class in enumerate(result['classes']):
-                count = result['confusion_matrix'][i][j]
-                comparison_data.append({
-                    'Variable': result['variable'],
-                    'Actual': actual_class,
-                    'Predicted': pred_class,
-                    'Count': count
-                })
-    
-    comparison_df = pd.DataFrame(comparison_data)
-    comparison_df.to_csv(comparison_file, index=False)
-    print(f"Comparison table saved to: {comparison_file}")
-    
-    print("\n" + "="*80)
-    print("Analysis complete!")
-    print(f"Results saved in: {output_dir}")
-    print("="*80)
+
+    results = []
+    for v in variables:
+        r = analyze_variable(merged, v)
+        results.append(r)
+        plot_cm(np.array(r["confusion_matrix"]), v, outdir / f"{v.replace(' ', '_')}.png")
+
+    pd.DataFrame(results).to_csv(outdir / "summary_metrics.csv", index=False)
+    json.dump(results, open(outdir / "detailed_results.json", "w"), indent=2)
+
+    print("Validation complete. Results in:", outdir)
 
 
 if __name__ == "__main__":