99
1010"""
1111
12+
1213class calculate_shap ():
1314 def __init__ (self ):
1415 super (calculate_shap , self ).__init__ ()
@@ -64,16 +65,14 @@ def catboost_shap(self, model, df, y_variable=None):
6465
6566 # final_df = df.join(shap_values)
6667
67- shap_columns = shap_values .columns
68+ shap_columns = shap_values .columns
6869
6970 Y = df .copy ()
7071 for c in shap_columns :
7172 Y [c ] = list (shap_values [c ])
7273
73-
7474 return Y
7575
76-
7776 def kernel_shap (self , model , X_train ):
7877 # use Kernel SHAP to explain test set predictions
7978 explainer = shap .KernelExplainer (model .predict_proba , X_train )
@@ -88,15 +87,13 @@ def kernel_shap(self, model, X_train):
8887 shap_columns .append (i + "_impact" )
8988 pd_shap .columns = shap_columns
9089
91-
92-
9390 Y = X_train .copy ()
9491 for c in shap_columns :
9592 Y [c ] = list (pd_shap [c ])
9693
9794 return Y , explainer
9895
99- def kernel_shap_classification (self , model , X_train ,prediction_col ):
96+ def kernel_shap_classification (self , model , X_train , prediction_col ):
10097 # use Kernel SHAP to explain test set predictions
10198 explainer = shap .KernelExplainer (model .predict_proba , X_train )
10299 shap_values = explainer .shap_values (X_train , nsamples = 100 )
@@ -110,41 +107,50 @@ def kernel_shap_classification(self, model, X_train,prediction_col):
110107 shap_columns .append (i + "_impact" )
111108 pd_shap .columns = shap_columns
112109
113-
114-
115110 Y = X_train .copy ()
116111 for c in shap_columns :
117112 Y [c ] = list (pd_shap [c ])
118113
119114 return Y , explainer
120115
121- def select_row_shap_values (self , shap_values ,prediction_col ):
116+ def h2o_shap_results (self , model , h2o_df , X_train , prediction_col ):
117+
118+ pd_shap = model .predict_contributions (h2o_df )
119+
120+ pd_shap = pd_shap .as_data_frame ()
121+ pd_shap .drop (['BiasTerm' ],axis = 1 ,inplace = True ,errors = 'ignore' )
122+ shap_columns = [i + "_impact" for i in pd_shap .columns ]
123+ pd_shap .columns = shap_columns
124+ print (shap_columns )
125+ y = X_train .copy ()
126+ for c in shap_columns :
127+ y [c ] = list (pd_shap [c ])
128+ return y , pd_shap
129+
130+ def select_row_shap_values (self , shap_values , prediction_col ):
122131
123132 num_of_classes = len (shap_values )
124133
125- if num_of_classes == len (prediction_col ):
134+ if num_of_classes == len (prediction_col ):
126135 df_final = pd .DataFrame (shap_values )
127136 return df_final
128137
129- point_no = 0
138+ point_no = 0
130139 df_array = []
131140 for p in prediction_col :
132141 df_array .append (shap_values [p ][point_no ])
133- point_no = point_no + 1
142+ point_no = point_no + 1
134143
135144 df_final = pd .DataFrame (df_array )
136145 return df_final
137146
138-
139- def randomforest_shap_classification (self , model , X ,prediction_col ):
147+ def randomforest_shap_classification (self , model , X , prediction_col ):
140148 explainer = shap .TreeExplainer (model )
141- shap_values = explainer .shap_values (X ,approximate = True )
142-
149+ shap_values = explainer .shap_values (X , approximate = True )
143150
144- pd_shap = self .select_row_shap_values (shap_values ,prediction_col )
151+ pd_shap = self .select_row_shap_values (shap_values , prediction_col )
145152 all_columns = list (X .columns )
146153
147-
148154 pd_shap .columns = [f"{ y } _impact" for y in all_columns ]
149155
150156 shap_columns = pd_shap .columns
@@ -153,19 +159,15 @@ def randomforest_shap_classification(self, model, X,prediction_col):
153159 for c in shap_columns :
154160 Y [c ] = list (pd_shap [c ])
155161
156-
157162 return Y , explainer
158163
159-
160164 def randomforest_shap (self , model , X ):
161165 explainer = shap .TreeExplainer (model )
162- shap_values = explainer .shap_values (X ,approximate = True )
163-
166+ shap_values = explainer .shap_values (X , approximate = True )
164167
165168 pd_shap = pd .DataFrame (shap_values )
166169 all_columns = list (X .columns )
167170
168-
169171 pd_shap .columns = [f"{ y } _impact" for y in all_columns ]
170172
171173 shap_columns = pd_shap .columns
@@ -174,10 +176,8 @@ def randomforest_shap(self, model, X):
174176 for c in shap_columns :
175177 Y [c ] = list (pd_shap [c ])
176178
177-
178179 return Y , explainer
179180
180-
181181 def get_shap_values (self , x_array , model , x_variable , cat_index ):
182182 """
183183 SHAP VALUES CALCULATED
@@ -191,78 +191,77 @@ def get_shap_values(self, x_array, model, x_variable, cat_index):
191191 shap_values = pd .DataFrame (data = shap_values , columns = total_columns )
192192 return shap_values
193193
194- def find (self , model , df ,prediction_col ,is_classification , model_name = "xgboost" ):
194+ def find (self , model , df , prediction_col , is_classification , model_name = "xgboost" ):
195195
196196 if model_name == "xgboost" :
197- df2 , explainer = self .xgboost_shap (model , df )
197+ df2 , explainer = self .xgboost_shap (model , df )
198198 return df2 , explainer
199199
200200 elif model_name == "lightgbm" :
201- df2 , explainer = self .xgboost_shap (model , df )
201+ df2 , explainer = self .xgboost_shap (model , df )
202202
203203 return df2 , explainer
204204
205205 elif model_name == "catboost" :
206206 df2 = self .catboost_shap (model , df )
207- explainer = None
207+ explainer = None
208208 return df2 , explainer
209209
210+ elif model_name == 'h2o' :
211+ df2 , explainer = self .h2o_shap_results (model , df , df .as_data_frame (), prediction_col )
212+ return df2 , explainer
210213
211214 elif model_name == "randomforest" :
212215 if is_classification :
213- df2 , explainer = self .randomforest_shap_classification (model , df , prediction_col )
216+ df2 , explainer = self .randomforest_shap_classification (model , df , prediction_col )
214217 else :
215- df2 , explainer = self .randomforest_shap (model , df )
218+ df2 , explainer = self .randomforest_shap (model , df )
216219 return df2 , explainer
217220
218221 elif model_name == "svm" :
219222 if is_classification :
220- df2 , explainer = self .kernel_shap_classification (model , df ,prediction_col )
223+ df2 , explainer = self .kernel_shap_classification (model , df , prediction_col )
221224 else :
222225 df2 , explainer = self .kernel_shap (model , df )
223226 return df2 , explainer
224227
225228 elif model_name == "knn" :
226229 if is_classification :
227- df2 , explainer = self .kernel_shap_classification (model , df ,prediction_col )
230+ df2 , explainer = self .kernel_shap_classification (model , df , prediction_col )
228231 else :
229- df2 , explainer = self .kernel_shap (model , df )
232+ df2 , explainer = self .kernel_shap (model , df )
230233 return df2 , explainer
231234
232235 elif model_name == "logisticregression" :
233236 if is_classification :
234- df2 , explainer = self .kernel_shap_classification (model , df ,prediction_col )
237+ df2 , explainer = self .kernel_shap_classification (model , df , prediction_col )
235238 else :
236239 df2 , explainer = self .kernel_shap (model , df )
237240 return df2 , explainer
238241
239242 elif model_name == "decisiontree" :
240243 if is_classification :
241- df2 , explainer = self .kernel_shap_classification (model , df ,prediction_col )
244+ df2 , explainer = self .kernel_shap_classification (model , df , prediction_col )
242245 else :
243246 df2 , explainer = self .kernel_shap (model , df )
244247 return df2 , explainer
245248
246249 elif model_name == "neuralnetwork" :
247250 if is_classification :
248- df2 , explainer = self .kernel_shap_classification (model , df ,prediction_col )
251+ df2 , explainer = self .kernel_shap_classification (model , df , prediction_col )
249252 else :
250253 df2 , explainer = self .kernel_shap (model , df )
251254 return df2 , explainer
252255
253- elif model_name == "gradientboostingregressor" :
254- df2 , explainer = self .xgboost_shap (model , df )
256+ elif model_name == "gradientboostingregressor" :
257+ df2 , explainer = self .xgboost_shap (model , df )
255258 return df2 , explainer
256259 elif "gradientboosting" in model_name :
257260 df2 , explainer = self .xgboost_shap (model , df )
258261 return df2 , explainer
259262 else :
260263 if is_classification :
261- df2 , explainer = self .kernel_shap_classification (model , df ,prediction_col )
264+ df2 , explainer = self .kernel_shap_classification (model , df , prediction_col )
262265 else :
263- df2 , explainer = self .kernel_shap (model , df )
266+ df2 , explainer = self .kernel_shap (model , df )
264267 return df2 , explainer
265-
266-
267-
268-
0 commit comments