44import sys
55import pytest
66import time
7+ import numpy as np
8+
9+ from trustyai .explainers import LimeExplainer , SHAPExplainer
10+ from trustyai .model import feature , PredictionInput
11+ from trustyai .utils import TestModels
12+ from trustyai .metrics .saliency import mean_impact_score , classification_fidelity , local_saliency_f1
13+
14+ from org .kie .trustyai .explainability .model import (
15+ PredictionInputsDataDistribution ,
16+ )
717
818myPath = os .path .dirname (os .path .abspath (__file__ ))
919sys .path .insert (0 , myPath + "/../general/" )
1020
1121import test_counterfactualexplainer as tcf
12- import test_limeexplainer as tlime
13-
1422
1523@pytest .mark .benchmark (
1624 group = "counterfactuals" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
@@ -35,9 +43,147 @@ def test_counterfactual_match_python_model(benchmark):
3543 """Counterfactual match (Python model)"""
3644 benchmark (tcf .test_counterfactual_match_python_model )
3745
38- # @pytest.mark.benchmark(
39- # group="lime", min_rounds=10, timer=time.time, disable_gc=True, warmup=True
40- # )
41- # def test_non_empty_input(benchmark):
42- # """Counterfactual match (Python model)"""
43- # benchmark(tlime.test_non_empty_input)
46+
47+ @pytest .mark .benchmark (
48+ group = "lime" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
49+ )
50+ def test_sumskip_lime_impact_score_at_2 (benchmark ):
51+ no_of_features = 10
52+ np .random .seed (0 )
53+ explainer = LimeExplainer ()
54+ model = TestModels .getSumSkipModel (0 )
55+ data = []
56+ for i in range (100 ):
57+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 10 , 10 ), dtype = "number" ) for i in range (no_of_features )])
58+ benchmark .extra_info ['metric' ] = mean_impact_score (explainer , model , data )
59+ benchmark (mean_impact_score , explainer , model , data )
60+
61+
62+ @pytest .mark .benchmark (
63+ group = "shap" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
64+ )
65+ def test_sumskip_shap_impact_score_at_2 (benchmark ):
66+ no_of_features = 10
67+ np .random .seed (0 )
68+ background = []
69+ for i in range (10 ):
70+ background .append (PredictionInput ([feature (name = f"f-num{ i } " , value = np .random .randint (- 10 , 10 ), dtype = "number" ) for i in range (no_of_features )]))
71+ explainer = SHAPExplainer (background , samples = 10000 )
72+ model = TestModels .getSumSkipModel (0 )
73+ data = []
74+ for i in range (100 ):
75+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 10 , 10 ), dtype = "number" ) for i in range (no_of_features )])
76+ benchmark .extra_info ['metric' ] = mean_impact_score (explainer , model , data )
77+ benchmark (mean_impact_score , explainer , model , data )
78+
79+
80+ @pytest .mark .benchmark (
81+ group = "lime" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
82+ )
83+ def test_sumthreshold_lime_impact_score_at_2 (benchmark ):
84+ no_of_features = 10
85+ np .random .seed (0 )
86+ explainer = LimeExplainer ()
87+ center = 100.0
88+ epsilon = 10.0
89+ model = TestModels .getSumThresholdModel (center , epsilon )
90+ data = []
91+ for i in range (100 ):
92+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )])
93+ benchmark .extra_info ['metric' ] = mean_impact_score (explainer , model , data )
94+ benchmark (mean_impact_score , explainer , model , data )
95+
96+
97+ @pytest .mark .benchmark (
98+ group = "shap" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
99+ )
100+ def test_sumthreshold_shap_impact_score_at_2 (benchmark ):
101+ no_of_features = 10
102+ np .random .seed (0 )
103+ background = []
104+ for i in range (100 ):
105+ background .append (PredictionInput ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )]))
106+ explainer = SHAPExplainer (background , samples = 10000 )
107+ center = 100.0
108+ epsilon = 10.0
109+ model = TestModels .getSumThresholdModel (center , epsilon )
110+ data = []
111+ for i in range (100 ):
112+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )])
113+ benchmark .extra_info ['metric' ] = mean_impact_score (explainer , model , data )
114+ benchmark (mean_impact_score , explainer , model , data )
115+
116+
117+ @pytest .mark .benchmark (
118+ group = "lime" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
119+ )
120+ def test_lime_fidelity (benchmark ):
121+ no_of_features = 10
122+ np .random .seed (0 )
123+ explainer = LimeExplainer ()
124+ model = TestModels .getEvenSumModel (0 )
125+ data = []
126+ for i in range (100 ):
127+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )])
128+ benchmark .extra_info ['metric' ] = classification_fidelity (explainer , model , data )
129+ benchmark (classification_fidelity , explainer , model , data )
130+
131+
132+ @pytest .mark .benchmark (
133+ group = "shap" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
134+ )
135+ def test_shap_fidelity (benchmark ):
136+ no_of_features = 10
137+ np .random .seed (0 )
138+ background = []
139+ for i in range (10 ):
140+ background .append (PredictionInput (
141+ [feature (name = f"f-num{ i } " , value = np .random .randint (- 10 , 10 ), dtype = "number" ) for i in
142+ range (no_of_features )]))
143+ explainer = SHAPExplainer (background , samples = 10000 )
144+ model = TestModels .getEvenSumModel (0 )
145+ data = []
146+ for i in range (100 ):
147+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in
148+ range (no_of_features )])
149+ benchmark .extra_info ['metric' ] = classification_fidelity (explainer , model , data )
150+ benchmark (classification_fidelity , explainer , model , data )
151+
152+
153+ @pytest .mark .benchmark (
154+ group = "lime" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
155+ )
156+ def test_lime_local_saliency_f1 (benchmark ):
157+ no_of_features = 10
158+ np .random .seed (0 )
159+ explainer = LimeExplainer ()
160+ model = TestModels .getEvenSumModel (0 )
161+ output_name = "sum-even-but0"
162+ data = []
163+ for i in range (100 ):
164+ data .append (PredictionInput ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )]))
165+ distribution = PredictionInputsDataDistribution (data )
166+ benchmark .extra_info ['metric' ] = local_saliency_f1 (output_name , model , explainer , distribution , 2 , 10 )
167+ benchmark (local_saliency_f1 , output_name , model , explainer , distribution , 2 , 10 )
168+
169+
170+ @pytest .mark .benchmark (
171+ group = "shap" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
172+ )
173+ def test_shap_local_saliency_f1 (benchmark ):
174+ no_of_features = 10
175+ np .random .seed (0 )
176+ background = []
177+ for i in range (10 ):
178+ background .append (PredictionInput (
179+ [feature (name = f"f-num{ i } " , value = np .random .randint (- 10 , 10 ), dtype = "number" ) for i in
180+ range (no_of_features )]))
181+ explainer = SHAPExplainer (background , samples = 10000 )
182+ model = TestModels .getEvenSumModel (0 )
183+ output_name = "sum-even-but0"
184+ data = []
185+ for i in range (100 ):
186+ data .append (PredictionInput ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )]))
187+ distribution = PredictionInputsDataDistribution (data )
188+ benchmark .extra_info ['metric' ] = local_saliency_f1 (output_name , model , explainer , distribution , 2 , 10 )
189+ benchmark (local_saliency_f1 , output_name , model , explainer , distribution , 2 , 10 )
0 commit comments