-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfunctions.py
More file actions
80 lines (49 loc) · 2.18 KB
/
functions.py
File metadata and controls
80 lines (49 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import joblib
import numpy as np
import shap
import pandas as pd
model = joblib.load("models/model.pkl")
explainer = shap.TreeExplainer(model)
def predict_diabetes_risk(age: float, bmi: float, diabetes_pedigree_function: float) -> dict:
"""Predicts diabetes risk based on patient health metrics.
Args:
age: Patient's age in years
bmi: Body Mass Index (weight in kg / height in m²)
diabetes_pedigree_function: Diabetes pedigree function score
Returns:
Dictionary with prediction (0=no diabetes, 1=diabetes) and probability score
"""
X = np.array([[age, bmi, diabetes_pedigree_function]])
prediction = model.predict(X)[0]
proba = model.predict_proba(X)[0][1] # Probability of class 1 (diabetes)
return {
"prediction": int(prediction),
"probability": round(float(proba), 4)
}
def explain_diabetes_risk(age: float, bmi: float, diabetes_pedigree_function: float) -> dict:
"""
Explains the prediction of diabetes risk using SHAP values.
This function computes SHAP (SHapley Additive exPlanations) values to show how each input feature
contributes to the model's prediction for diabetes risk.
Args:
age (float): Age of the individual.
bmi (float): Body Mass Index.
diabetes_pedigree_function (float): A measure of hereditary diabetes risk.
Returns:
dict: A dictionary mapping each feature to a pair of SHAP values:
[contribution to class 0 (no diabetes), contribution to class 1 (diabetes)].
Positive SHAP values for class 1 indicate that the feature increases predicted diabetes risk.
"""
# Wrap input into DataFrame for SHAP
input_df = pd.DataFrame([{
"age": age,
"bmi": bmi,
"diabetes_pedigree_function": diabetes_pedigree_function
}])
# Compute SHAP values (returns list of arrays for classification)
shap_values = explainer.shap_values(input_df)
# Use SHAP values for class 1 (positive class: diabetic)
explanation = {k: shap_values[0].tolist()[i][1] for i, k in enumerate(input_df.columns)} # ← safe serialization
return {
"explanation": explanation
}