improve README clarity, and small update to approve_deployment step

marwan37 · marwan37 · commit 9bb29371a322 · 2025-05-22T10:06:07.000-05:00
diff --git a/credit-scorer/README.md b/credit-scorer/README.md
@@ -1,6 +1,6 @@
 # Credit Scoring EU AI Act Demo
 
-Automatically generate complete EU AI Act compliance documentation with minimal manual effort for credit scoring models.
+An end‑to‑end credit‑scoring workflow that automatically generates the technical evidence required by the [EU AI Act](https://www.zenml.io/blog/understanding-the-ai-act-february-2025-updates-and-implications).
 
 <div align="center"> <img src="assets/compliance-dashboard.png" alt="Compliance Dashboard" width="800" /> </div>
 
@@ -15,7 +15,7 @@ Financial institutions must comply with the EU AI Act for any high‑risk AI sys
 
 ## 🔍 Data Overview
 
-This project uses a credit scoring dataset based on the Home Credit Default Risk data. The raw dataset contains potentially sensitive attributes such as `CODE_GENDER`, `DAYS_BIRTH`, `NAME_EDUCATION_TYPE`, `NAME_FAMILY_STATUS`, and `NAME_HOUSING_TYPE`, which can be filtered using the pipeline's `sensitive_attributes` parameter to comply with fairness requirements.
+This project leverages the [Home Credit Default Risk dataset provided by the Home Credit Group](https://www.kaggle.com/c/home-credit-default-risk/overview). The raw dataset contains potentially sensitive attributes such as `CODE_GENDER`, `DAYS_BIRTH`, `NAME_EDUCATION_TYPE`, `NAME_FAMILY_STATUS`, and `NAME_HOUSING_TYPE`, which can be filtered using the pipeline's `sensitive_attributes` parameter to comply with fairness requirements.
 
 Key fields used for modeling:
 
@@ -46,7 +46,7 @@ The system implements three main pipelines that map directly to EU AI Act requir
 | **[Training](src/pipelines/training.py)**                       | **Train** → LightGBM w/ class‑imbalance handling 🎯<br>**Evaluate** → Accuracy, AUC, fairness analysis ⚖️<br>**Assess** → Risk scoring & model registry 📋 | Arts 9, 11, 15  |
 | **[Deployment](src/pipelines/deployment.py)**                   | **Approve** → Human oversight gate 🙋‍♂️<br>**Deploy** → Modal API deployment 🚀<br>**Monitor** → SBOM + post‑market tracking 📈                              | Arts 14, 17, 18 |
 
-Each pipeline run automatically versions all inputs/outputs, generates profiling reports, creates risk assessments, produces SBOM, and compiles complete Annex IV technical documentation.
+Each pipeline run automatically versions all inputs/outputs, generates profiling reports, creates risk assessments, produces a [Software Bill of Materials (SBOM)](https://www.cisa.gov/sbom), and compiles complete Annex IV technical documentation.
 
 ## 🛠️ Project Structure
 
@@ -134,7 +134,7 @@ To run the dashboard:
 python run_dashboard.py
 ```
 
-> **Note:** All compliance artifacts are also directly accessible through the ZenML dashboard. The Streamlit dashboard is provided as a convenient additional interface for browsing compliance information interactively.
+> **Note:** All compliance artifacts are also directly accessible through the ZenML dashboard. The Streamlit dashboard is provided as a convenient additional interface for browsing compliance information locally and offline.
 
 ### 🔧 Configuration
 
diff --git a/credit-scorer/run.py b/credit-scorer/run.py
@@ -83,7 +83,7 @@
 @click.option(
     "--auto-approve",
     is_flag=True,
-    default=True,
+    default=False,
     help="Auto-approve deployment (for CI/CD pipelines).",
 )
 @click.option(
diff --git a/credit-scorer/src/steps/deployment/approve.py b/credit-scorer/src/steps/deployment/approve.py
@@ -48,129 +48,132 @@ def approve_deployment(
     """
     # Timestamp for record-keeping
     timestamp = datetime.now().isoformat()
-
-    # Create human-readable summary for the reviewer
-    print("\n" + "=" * 50)
+    
+    print("\n" + "=" * 60)
     print("  HUMAN OVERSIGHT REQUIRED (EU AI Act Article 14)  ")
-    print("=" * 50)
-
+    print("=" * 60)
+    
+    # Extract metrics for display
+    metrics = evaluation_results.get("metrics", {})
+    fairness_data = evaluation_results.get("fairness", {})
+    fairness_metrics = fairness_data.get("fairness_metrics", {})
+    bias_flag = fairness_data.get("bias_flag", False)
+    
     # Performance metrics summary
     print("\n📊 PERFORMANCE METRICS:")
-
-    # Get accuracy with safe formatting
-    accuracy = evaluation_results["metrics"].get("accuracy", "N/A")
-    if accuracy != "N/A":
-        print(f"  • Accuracy: {accuracy:.4f}")
+    print(f"  • Accuracy: {metrics.get('accuracy', 'N/A'):.4f}")
+    print(f"  • Precision: {metrics.get('precision', 'N/A'):.4f}")
+    print(f"  • Recall: {metrics.get('recall', 'N/A'):.4f}")
+    print(f"  • F1 Score: {metrics.get('f1_score', 'N/A'):.4f}")
+    print(f"  • AUC-ROC: {metrics.get('auc_roc', 'N/A'):.4f}")
+    print(f"  • Average Precision: {metrics.get('average_precision', 'N/A'):.4f}")
+    print(f"  • Balanced Accuracy: {metrics.get('balanced_accuracy', 'N/A'):.4f}")
+    
+    # Financial impact metrics
+    print("\n💰 FINANCIAL IMPACT:")
+    print(f"  • Optimal Threshold: {metrics.get('optimal_threshold', 'N/A'):.4f}")
+    print(f"  • Normalized Cost: {metrics.get('normalized_cost', 'N/A'):.4f}")
+    
+   # Fairness summary (aggregated, not per-group)
+    print(f"\n⚖️ FAIRNESS ASSESSMENT:")
+    if bias_flag:
+        print("  🚨 BIAS DETECTED - Requires careful review")
+        
+        # Show worst disparity without listing all groups
+        max_disparity = 0
+        worst_attribute = None
+        
+        for attribute, attr_metrics in fairness_metrics.items():
+            disparity = abs(attr_metrics.get("selection_rate_disparity", 0))
+            if disparity > max_disparity:
+                max_disparity = disparity
+                worst_attribute = attribute
+        
+        if worst_attribute:
+            print(f"  • Highest disparity: {max_disparity:.3f} ({worst_attribute})")
+        
+        print(f"  • Protected attributes analyzed: {len(fairness_metrics)}")
     else:
-        print(f"  • Accuracy: {accuracy}")
-
-    # Get AUC with safe formatting (note the key change from 'auc' to 'auc_roc')
-    auc = evaluation_results["metrics"].get("auc_roc", "N/A")
-    if auc != "N/A":
-        print(f"  • AUC: {auc:.4f}")
-    else:
-        print(f"  • AUC: {auc}")
-
-    # Fairness summary
-    if "fairness_metrics" in evaluation_results:
-        print("\n⚖️ FAIRNESS ASSESSMENT:")
-        for attribute, metrics in evaluation_results[
-            "fairness_metrics"
-        ].items():
-            print(
-                f"  • {attribute.capitalize()} disparate impact: {metrics.get('disparate_impact', 'N/A'):.2f}"
-            )
-            print(
-                f"  • {attribute.capitalize()} demographic parity: {metrics.get('demographic_parity', 'N/A'):.4f}"
-            )
-
-    # Risk assessment summary
-    print("\n⚠️ RISK ASSESSMENT:")
-    print(f"  • Risk level: {risk_scores.get('risk_level', 'N/A')}")
-
-    if "high_risk_factors" in risk_scores and risk_scores["high_risk_factors"]:
-        print("  • High risk factors detected:")
-        for factor in risk_scores["high_risk_factors"]:
-            print(f"    - {factor}")
-
-    if (
-        "mitigation_measures" in risk_scores
-        and risk_scores["mitigation_measures"]
-    ):
-        print("  • Mitigation measures:")
-        for measure in risk_scores["mitigation_measures"]:
-            print(f"    - {measure}")
-
-    # Create threshold checks
+        print("  ✅ No significant bias detected across protected groups")
+        print(f"  • Protected attributes analyzed: {len(fairness_metrics)}")
+    
+    # Risk assessment
+    print(f"\n⚠️ RISK ASSESSMENT:")
+    print(f"  • Overall Risk Score: {risk_scores.get('overall', 0):.3f}")
+    print(f"  • Risk Level: {risk_scores.get('risk_level', 'Unknown')}")
+    
+    high_risk_count = len(risk_scores.get("high_risk_factors", []))
+    if high_risk_count > 0:
+        print(f"  • High-risk factors identified: {high_risk_count}")
+    
+    # Approval criteria check
     threshold_checks = {
-        "Accuracy": evaluation_results["metrics"].get("accuracy", 0)
-        >= approval_thresholds["accuracy"],
-        "Bias disparity": all(
-            metrics.get("selection_rate_disparity", 1)
-            <= approval_thresholds["bias_disparity"]
-            for attr, metrics in evaluation_results.get(
-                "fairness_metrics", {}
-            ).items()
-        ),
-        "Risk score": risk_scores.get("overall", 1)
-        <= approval_thresholds["risk_score"],
+        "Performance": metrics.get("accuracy", 0) >= approval_thresholds.get("accuracy", 0.7),
+        "Fairness": not bias_flag,
+        "Risk": risk_scores.get("overall", 1) <= approval_thresholds.get("risk_score", 0.8),
     }
-
-    # Display threshold check results
-    print("\n🔍 THRESHOLD CHECKS:")
+    
+    print(f"\n🔍 APPROVAL CRITERIA:")
+    all_passed = True
     for check_name, passed in threshold_checks.items():
-        status = "✅ PASS" if passed else "⚠️ FAIL"
+        status = "✅ PASS" if passed else "❌ FAIL"
         print(f"  • {check_name}: {status}")
-
-    # Decision prompt
-    print("\n📝 APPROVAL DECISION:")
-
-    # Check for automated decision via environment variable (e.g., in CI pipeline)
+        if not passed:
+            all_passed = False
+    
+    print(f"\n📝 RECOMMENDATION: {'✅ APPROVE' if all_passed else '⚠️ REVIEW REQUIRED'}")
+    
+    # Get decision
     decision = os.getenv("DEPLOY_APPROVAL")
-
+    
     if decision is None:
-        # Interactive mode - request human input
-        decision = input("\nApprove deployment? (y/N): ").strip().lower()
+        if bias_flag:
+            print("\n⚠️ WARNING: Review fairness implications before approval")
+        
+        decision = input(f"\nApprove deployment? ({'Y/n' if all_passed else 'y/N'}): ").strip().lower()
         approver = os.getenv("USER", input("Approver name: ").strip())
         rationale = input("Decision rationale: ").strip()
         decision_mode = "interactive"
     else:
-        # Automated mode
         approver = os.getenv("APPROVER", "automated")
-        rationale = os.getenv(
-            "APPROVAL_RATIONALE", "Automated approval via environment variable"
-        )
+        rationale = os.getenv("APPROVAL_RATIONALE", "Automated approval")
         decision_mode = "automated"
-
-    approved = decision == "y"
-
-    # Create documented record for compliance
+    
+    # Handle default approval logic
+    if decision == "":
+        approved = all_passed  # Default to approve only if all criteria pass
+    else:
+        approved = decision in ["y", "yes"]
+    
+    # Create approval record
     approval_record = {
         "approval_id": f"approval_{timestamp.replace(':', '-')}",
         "timestamp": timestamp,
         "approved": approved,
         "approver": approver,
         "rationale": rationale,
         "decision_mode": decision_mode,
-        "threshold_checks": {
-            check: passed for check, passed in threshold_checks.items()
-        },
-        "evaluation_summary": {
-            "accuracy": evaluation_results["metrics"].get("accuracy", None),
-            "auc": evaluation_results["metrics"].get("auc", None),
-            "fairness_flags": evaluation_results.get("fairness_flags", []),
-        },
-        "risk_summary": {
-            "risk_level": risk_scores.get("risk_level", "unknown"),
-            "high_risk_factors": risk_scores.get("high_risk_factors", []),
+        "criteria_met": all_passed,
+        "bias_detected": bias_flag,
+        "key_metrics": {
+            "accuracy": metrics.get("accuracy"),
+            "f1_score": metrics.get("f1_score"),
+            "auc_roc": metrics.get("auc_roc"),
+            "cost_per_application": metrics.get("normalized_cost"),
+            "risk_score": risk_scores.get("overall"),
         },
+        "protected_attributes_count": len(fairness_metrics),
+        "max_bias_disparity": max([
+            abs(attr_metrics.get("selection_rate_disparity", 0))
+            for attr_metrics in fairness_metrics.values()
+        ]) if fairness_metrics else 0,
     }
-
-    # Final decision message
+    
+    # Final status
     if approved:
-        print("\n✅ DEPLOYMENT APPROVED")
+        print(f"\n✅ DEPLOYMENT APPROVED by {approver}")
     else:
-        print("\n❌ DEPLOYMENT REJECTED")
+        print(f"\n❌ DEPLOYMENT REJECTED by {approver}")
         raise RuntimeError(f"Deployment rejected by {approver}: {rationale}")
-
-    return approved, approval_record
+    
+    return approved, approval_record

Original file line number	Diff line number	Diff line change
`@@ -83,7 +83,7 @@`
`83`	`83`	`@click.option(`
`84`	`84`	`"--auto-approve",`
`85`	`85`	`is_flag=True,`
`86`		`- default=True,`
	`86`	`+ default=False,`
`87`	`87`	`help="Auto-approve deployment (for CI/CD pipelines).",`
`88`	`88`	`)`
`89`	`89`	`@click.option(`