11import os
22import sys
3-
43import re
5-
64from pathlib import Path
75from sys import platform
86import subprocess
1917from calculate_shap import *
2018from analytics import Analytics
2119
22- """
23- This class calculates feature importance
24-
25- Input:
26-
27-
28- """
29-
30-
3120class explain ():
3221 def __init__ (self ):
3322 super (explain , self ).__init__ ()
3423 self .param = {}
3524
3625 # is classification function?
3726
38- def is_classification_given_y_array (self , y_test ):
39- is_classification = False
40- total = len (y_test )
41- total_unique = len (set (y_test ))
42- if total < 30 :
43- if total_unique < 10 :
44- is_classification = True
45- else :
46- if total_unique < 20 :
47- is_classification = True
48- return is_classification
27+ # def is_classification_given_y_array(self, y_test):
28+ # is_classification = False
29+ # total = len(y_test)
30+ # total_unique = len(set(y_test))
31+ # if total < 30:
32+ # if total_unique < 10:
33+ # is_classification = True
34+ # else:
35+ # if total_unique < 20:
36+ # is_classification = True
37+ # return is_classification
38+
4939
5040 def random_string_generator (self ):
5141 random_str = '' .join (random .choice (string .ascii_uppercase + string .digits ) for _ in range (10 ))
@@ -54,6 +44,8 @@ def random_string_generator(self):
5444 def ai (self , df , y , model , model_name = "xgboost" , mode = None ):
5545 y_variable = "y_actual"
5646 y_variable_predict = "y_prediction"
47+
48+ #Code for Analytics
5749 instance_id = self .random_string_generator ()
5850 analytics = Analytics ()
5951 analytics ['ip' ] = analytics .finding_ip ()
@@ -69,11 +61,6 @@ def ai(self, df, y, model, model_name="xgboost", mode=None):
6961 analytics ['finish_time' ] = ''
7062 analytics .insert_data ()
7163
72- # If yes, then different shap functuions are required.
73- # get the shap value based on predcton and make a new dataframe.
74-
75- # find predictions first as shap values need that.
76-
7764 prediction_col = []
7865
7966 if model_name == "xgboost" :
@@ -88,40 +75,48 @@ def ai(self, df, y, model, model_name="xgboost", mode=None):
8875 prediction_col = model .predict (df .to_numpy ())
8976
9077 else :
91- prediction_col = model .predict (df . to_numpy () )
78+ prediction_col = model .predict (df )
9279
9380 # is classification?
94- is_classification = self .is_classification_given_y_array (prediction_col )
81+ #is_classification = self.is_classification_given_y_array(prediction_col)
82+ ModelType = lambda model : True if is_classifier (model ) else False
83+ is_classification = ModelType (model )
9584
9685 # shap
9786 c = calculate_shap ()
9887 self .df_final , self .explainer = c .find (model , df , prediction_col , is_classification , model_name = model_name )
9988
100- # prediction col
89+ #Append Model Decision & True Labels Columns into the dataset.
10190 self .df_final [y_variable_predict ] = prediction_col
102-
10391 self .df_final [y_variable ] = y
10492
10593 # additional inputs.
10694 if is_classification == True :
10795 # find and add probabilities in the dataset.
108- prediction_col_prob = model .predict_proba (df . to_numpy () )
109- pd_prediction_col_prob = pd .DataFrame (prediction_col_prob )
96+ # prediction_col_prob = model.predict_proba(df)
97+ # pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)
11098
111- for c in pd_prediction_col_prob .columns :
112- self .df_final ["probability_of_predicting_class_" + str (c )] = list (pd_prediction_col_prob [c ])
99+ probabilities = model .predict_proba (df )
113100
114- classes = []
115- for c in pd_prediction_col_prob .columns :
116- classes .append (str (c ))
117- self .param ["classes" ] = classes
101+ for i in range (len (np .unique (prediction_col ))):
102+ self .df_final ['Probability: {}' .format (np .unique (prediction_col )[i ])] = probabilities [:,i ]
103+
104+ self .param ['classes' ] = np .unique (prediction_col )
105+
106+ #for c in pd_prediction_col_prob.columns:
107+ # self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])
108+
109+ #classes = []
110+ #for c in pd_prediction_col_prob.columns:
111+ # classes.append(str(c))
112+ #self.param["classes"] = classes
118113
119114 try :
120115 expected_values_by_class = self .explainer .expected_value
121116 except :
122117 expected_values_by_class = []
123- for c in range (len (classes )):
124- expected_values_by_class .append (1 / len (classes ))
118+ for c in range (len (np . unique ( prediction_col ) )):
119+ expected_values_by_class .append (1 / len (np . unique ( prediction_col ) ))
125120
126121 self .param ["expected_values" ] = expected_values_by_class
127122 else :
0 commit comments