-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexplainability.py
More file actions
61 lines (43 loc) · 1.72 KB
/
explainability.py
File metadata and controls
61 lines (43 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import joblib
import pandas as pd
import lime
import lime.lime_text
import shap
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
def load_model_and_pipeline():
model = joblib.load("model/sentiment_model.pkl")
vectorizer = joblib.load("model/tfidf_pipeline.pkl")
return model, vectorizer
def run_lime_example(df, model, vectorizer, sample_idx=0):
pipeline = make_pipeline(vectorizer, model)
explainer = lime.lime_text.LimeTextExplainer(class_names=["negativo", "positive"])
sample_text = df["comments"].iloc[sample_idx]
exp = explainer.explain_instance(
sample_text, pipeline.predict_proba, num_features=10
)
fig = exp.as_pyplot_figure()
plt.savefig("explainability/lime.png", dpi=300, bbox_inches="tight")
plt.close()
def run_shap_example(df, model, vectorizer, sample_size=100):
import matplotlib.pyplot as plt
X_sample = df["comments"].sample(sample_size, random_state=42).tolist()
X_transformed = vectorizer.transform(X_sample)
explainer = shap.Explainer(model, X_transformed)
shap_values = explainer(X_transformed)
# Run SHAP on the first review and get results
shap.summary_plot(
shap_values,
X_transformed,
feature_names=vectorizer.get_feature_names_out(),
show=False,
)
plt.savefig("explainability/shap.png", dpi=300, bbox_inches="tight")
plt.close()
if __name__ == "__main__":
df = pd.read_csv("processed_csvs/customer_reviews_preprocessed.csv")
model, vectorizer = load_model_and_pipeline()
# Run LIME on first review
run_lime_example(df, model, vectorizer, sample_idx=0)
# Run SHAP on sample of reviews
run_shap_example(df, model, vectorizer, sample_size=100)