Skip to content

Commit 9bb2937

Browse files
author
marwan37
committed
improve README clarity, and small update to approve_deployment step
1 parent b5c8c02 commit 9bb2937

File tree

3 files changed

+106
-103
lines changed

3 files changed

+106
-103
lines changed

credit-scorer/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Credit Scoring EU AI Act Demo
22

3-
Automatically generate complete EU AI Act compliance documentation with minimal manual effort for credit scoring models.
3+
An end‑to‑end credit‑scoring workflow that automatically generates the technical evidence required by the [EU AI Act](https://www.zenml.io/blog/understanding-the-ai-act-february-2025-updates-and-implications).
44

55
<div align="center"> <img src="assets/compliance-dashboard.png" alt="Compliance Dashboard" width="800" /> </div>
66

@@ -15,7 +15,7 @@ Financial institutions must comply with the EU AI Act for any high‑risk AI sys
1515

1616
## 🔍 Data Overview
1717

18-
This project uses a credit scoring dataset based on the Home Credit Default Risk data. The raw dataset contains potentially sensitive attributes such as `CODE_GENDER`, `DAYS_BIRTH`, `NAME_EDUCATION_TYPE`, `NAME_FAMILY_STATUS`, and `NAME_HOUSING_TYPE`, which can be filtered using the pipeline's `sensitive_attributes` parameter to comply with fairness requirements.
18+
This project leverages the [Home Credit Default Risk dataset provided by the Home Credit Group](https://www.kaggle.com/c/home-credit-default-risk/overview). The raw dataset contains potentially sensitive attributes such as `CODE_GENDER`, `DAYS_BIRTH`, `NAME_EDUCATION_TYPE`, `NAME_FAMILY_STATUS`, and `NAME_HOUSING_TYPE`, which can be filtered using the pipeline's `sensitive_attributes` parameter to comply with fairness requirements.
1919

2020
Key fields used for modeling:
2121

@@ -46,7 +46,7 @@ The system implements three main pipelines that map directly to EU AI Act requir
4646
| **[Training](src/pipelines/training.py)** | **Train** → LightGBM w/ class‑imbalance handling 🎯<br>**Evaluate** → Accuracy, AUC, fairness analysis ⚖️<br>**Assess** → Risk scoring & model registry 📋 | Arts 9, 11, 15 |
4747
| **[Deployment](src/pipelines/deployment.py)** | **Approve** → Human oversight gate 🙋‍♂️<br>**Deploy** → Modal API deployment 🚀<br>**Monitor** → SBOM + post‑market tracking 📈 | Arts 14, 17, 18 |
4848

49-
Each pipeline run automatically versions all inputs/outputs, generates profiling reports, creates risk assessments, produces SBOM, and compiles complete Annex IV technical documentation.
49+
Each pipeline run automatically versions all inputs/outputs, generates profiling reports, creates risk assessments, produces a [Software Bill of Materials (SBOM)](https://www.cisa.gov/sbom), and compiles complete Annex IV technical documentation.
5050

5151
## 🛠️ Project Structure
5252

@@ -134,7 +134,7 @@ To run the dashboard:
134134
python run_dashboard.py
135135
```
136136

137-
> **Note:** All compliance artifacts are also directly accessible through the ZenML dashboard. The Streamlit dashboard is provided as a convenient additional interface for browsing compliance information interactively.
137+
> **Note:** All compliance artifacts are also directly accessible through the ZenML dashboard. The Streamlit dashboard is provided as a convenient additional interface for browsing compliance information locally and offline.
138138
139139
### 🔧 Configuration
140140

credit-scorer/run.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
@click.option(
8484
"--auto-approve",
8585
is_flag=True,
86-
default=True,
86+
default=False,
8787
help="Auto-approve deployment (for CI/CD pipelines).",
8888
)
8989
@click.option(

credit-scorer/src/steps/deployment/approve.py

Lines changed: 101 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -48,129 +48,132 @@ def approve_deployment(
4848
"""
4949
# Timestamp for record-keeping
5050
timestamp = datetime.now().isoformat()
51-
52-
# Create human-readable summary for the reviewer
53-
print("\n" + "=" * 50)
51+
52+
print("\n" + "=" * 60)
5453
print(" HUMAN OVERSIGHT REQUIRED (EU AI Act Article 14) ")
55-
print("=" * 50)
56-
54+
print("=" * 60)
55+
56+
# Extract metrics for display
57+
metrics = evaluation_results.get("metrics", {})
58+
fairness_data = evaluation_results.get("fairness", {})
59+
fairness_metrics = fairness_data.get("fairness_metrics", {})
60+
bias_flag = fairness_data.get("bias_flag", False)
61+
5762
# Performance metrics summary
5863
print("\n📊 PERFORMANCE METRICS:")
59-
60-
# Get accuracy with safe formatting
61-
accuracy = evaluation_results["metrics"].get("accuracy", "N/A")
62-
if accuracy != "N/A":
63-
print(f" • Accuracy: {accuracy:.4f}")
64+
print(f" • Accuracy: {metrics.get('accuracy', 'N/A'):.4f}")
65+
print(f" • Precision: {metrics.get('precision', 'N/A'):.4f}")
66+
print(f" • Recall: {metrics.get('recall', 'N/A'):.4f}")
67+
print(f" • F1 Score: {metrics.get('f1_score', 'N/A'):.4f}")
68+
print(f" • AUC-ROC: {metrics.get('auc_roc', 'N/A'):.4f}")
69+
print(f" • Average Precision: {metrics.get('average_precision', 'N/A'):.4f}")
70+
print(f" • Balanced Accuracy: {metrics.get('balanced_accuracy', 'N/A'):.4f}")
71+
72+
# Financial impact metrics
73+
print("\n💰 FINANCIAL IMPACT:")
74+
print(f" • Optimal Threshold: {metrics.get('optimal_threshold', 'N/A'):.4f}")
75+
print(f" • Normalized Cost: {metrics.get('normalized_cost', 'N/A'):.4f}")
76+
77+
# Fairness summary (aggregated, not per-group)
78+
print(f"\n⚖️ FAIRNESS ASSESSMENT:")
79+
if bias_flag:
80+
print(" 🚨 BIAS DETECTED - Requires careful review")
81+
82+
# Show worst disparity without listing all groups
83+
max_disparity = 0
84+
worst_attribute = None
85+
86+
for attribute, attr_metrics in fairness_metrics.items():
87+
disparity = abs(attr_metrics.get("selection_rate_disparity", 0))
88+
if disparity > max_disparity:
89+
max_disparity = disparity
90+
worst_attribute = attribute
91+
92+
if worst_attribute:
93+
print(f" • Highest disparity: {max_disparity:.3f} ({worst_attribute})")
94+
95+
print(f" • Protected attributes analyzed: {len(fairness_metrics)}")
6496
else:
65-
print(f" • Accuracy: {accuracy}")
66-
67-
# Get AUC with safe formatting (note the key change from 'auc' to 'auc_roc')
68-
auc = evaluation_results["metrics"].get("auc_roc", "N/A")
69-
if auc != "N/A":
70-
print(f" • AUC: {auc:.4f}")
71-
else:
72-
print(f" • AUC: {auc}")
73-
74-
# Fairness summary
75-
if "fairness_metrics" in evaluation_results:
76-
print("\n⚖️ FAIRNESS ASSESSMENT:")
77-
for attribute, metrics in evaluation_results[
78-
"fairness_metrics"
79-
].items():
80-
print(
81-
f" • {attribute.capitalize()} disparate impact: {metrics.get('disparate_impact', 'N/A'):.2f}"
82-
)
83-
print(
84-
f" • {attribute.capitalize()} demographic parity: {metrics.get('demographic_parity', 'N/A'):.4f}"
85-
)
86-
87-
# Risk assessment summary
88-
print("\n⚠️ RISK ASSESSMENT:")
89-
print(f" • Risk level: {risk_scores.get('risk_level', 'N/A')}")
90-
91-
if "high_risk_factors" in risk_scores and risk_scores["high_risk_factors"]:
92-
print(" • High risk factors detected:")
93-
for factor in risk_scores["high_risk_factors"]:
94-
print(f" - {factor}")
95-
96-
if (
97-
"mitigation_measures" in risk_scores
98-
and risk_scores["mitigation_measures"]
99-
):
100-
print(" • Mitigation measures:")
101-
for measure in risk_scores["mitigation_measures"]:
102-
print(f" - {measure}")
103-
104-
# Create threshold checks
97+
print(" ✅ No significant bias detected across protected groups")
98+
print(f" • Protected attributes analyzed: {len(fairness_metrics)}")
99+
100+
# Risk assessment
101+
print(f"\n⚠️ RISK ASSESSMENT:")
102+
print(f" • Overall Risk Score: {risk_scores.get('overall', 0):.3f}")
103+
print(f" • Risk Level: {risk_scores.get('risk_level', 'Unknown')}")
104+
105+
high_risk_count = len(risk_scores.get("high_risk_factors", []))
106+
if high_risk_count > 0:
107+
print(f" • High-risk factors identified: {high_risk_count}")
108+
109+
# Approval criteria check
105110
threshold_checks = {
106-
"Accuracy": evaluation_results["metrics"].get("accuracy", 0)
107-
>= approval_thresholds["accuracy"],
108-
"Bias disparity": all(
109-
metrics.get("selection_rate_disparity", 1)
110-
<= approval_thresholds["bias_disparity"]
111-
for attr, metrics in evaluation_results.get(
112-
"fairness_metrics", {}
113-
).items()
114-
),
115-
"Risk score": risk_scores.get("overall", 1)
116-
<= approval_thresholds["risk_score"],
111+
"Performance": metrics.get("accuracy", 0) >= approval_thresholds.get("accuracy", 0.7),
112+
"Fairness": not bias_flag,
113+
"Risk": risk_scores.get("overall", 1) <= approval_thresholds.get("risk_score", 0.8),
117114
}
118-
119-
# Display threshold check results
120-
print("\n🔍 THRESHOLD CHECKS:")
115+
116+
print(f"\n🔍 APPROVAL CRITERIA:")
117+
all_passed = True
121118
for check_name, passed in threshold_checks.items():
122-
status = "✅ PASS" if passed else "⚠️ FAIL"
119+
status = "✅ PASS" if passed else " FAIL"
123120
print(f" • {check_name}: {status}")
124-
125-
# Decision prompt
126-
print("\n📝 APPROVAL DECISION:")
127-
128-
# Check for automated decision via environment variable (e.g., in CI pipeline)
121+
if not passed:
122+
all_passed = False
123+
124+
print(f"\n📝 RECOMMENDATION: {'✅ APPROVE' if all_passed else '⚠️ REVIEW REQUIRED'}")
125+
126+
# Get decision
129127
decision = os.getenv("DEPLOY_APPROVAL")
130-
128+
131129
if decision is None:
132-
# Interactive mode - request human input
133-
decision = input("\nApprove deployment? (y/N): ").strip().lower()
130+
if bias_flag:
131+
print("\n⚠️ WARNING: Review fairness implications before approval")
132+
133+
decision = input(f"\nApprove deployment? ({'Y/n' if all_passed else 'y/N'}): ").strip().lower()
134134
approver = os.getenv("USER", input("Approver name: ").strip())
135135
rationale = input("Decision rationale: ").strip()
136136
decision_mode = "interactive"
137137
else:
138-
# Automated mode
139138
approver = os.getenv("APPROVER", "automated")
140-
rationale = os.getenv(
141-
"APPROVAL_RATIONALE", "Automated approval via environment variable"
142-
)
139+
rationale = os.getenv("APPROVAL_RATIONALE", "Automated approval")
143140
decision_mode = "automated"
144-
145-
approved = decision == "y"
146-
147-
# Create documented record for compliance
141+
142+
# Handle default approval logic
143+
if decision == "":
144+
approved = all_passed # Default to approve only if all criteria pass
145+
else:
146+
approved = decision in ["y", "yes"]
147+
148+
# Create approval record
148149
approval_record = {
149150
"approval_id": f"approval_{timestamp.replace(':', '-')}",
150151
"timestamp": timestamp,
151152
"approved": approved,
152153
"approver": approver,
153154
"rationale": rationale,
154155
"decision_mode": decision_mode,
155-
"threshold_checks": {
156-
check: passed for check, passed in threshold_checks.items()
157-
},
158-
"evaluation_summary": {
159-
"accuracy": evaluation_results["metrics"].get("accuracy", None),
160-
"auc": evaluation_results["metrics"].get("auc", None),
161-
"fairness_flags": evaluation_results.get("fairness_flags", []),
162-
},
163-
"risk_summary": {
164-
"risk_level": risk_scores.get("risk_level", "unknown"),
165-
"high_risk_factors": risk_scores.get("high_risk_factors", []),
156+
"criteria_met": all_passed,
157+
"bias_detected": bias_flag,
158+
"key_metrics": {
159+
"accuracy": metrics.get("accuracy"),
160+
"f1_score": metrics.get("f1_score"),
161+
"auc_roc": metrics.get("auc_roc"),
162+
"cost_per_application": metrics.get("normalized_cost"),
163+
"risk_score": risk_scores.get("overall"),
166164
},
165+
"protected_attributes_count": len(fairness_metrics),
166+
"max_bias_disparity": max([
167+
abs(attr_metrics.get("selection_rate_disparity", 0))
168+
for attr_metrics in fairness_metrics.values()
169+
]) if fairness_metrics else 0,
167170
}
168-
169-
# Final decision message
171+
172+
# Final status
170173
if approved:
171-
print("\n✅ DEPLOYMENT APPROVED")
174+
print(f"\n✅ DEPLOYMENT APPROVED by {approver}")
172175
else:
173-
print("\n❌ DEPLOYMENT REJECTED")
176+
print(f"\n❌ DEPLOYMENT REJECTED by {approver}")
174177
raise RuntimeError(f"Deployment rejected by {approver}: {rationale}")
175-
176-
return approved, approval_record
178+
179+
return approved, approval_record

0 commit comments

Comments
 (0)