1717from  calculate_shap  import  * 
1818from  analytics  import  Analytics 
1919
20+ 
2021class  explain ():
2122    def  __init__ (self ):
2223        super (explain , self ).__init__ ()
@@ -35,17 +36,108 @@ def __init__(self):
3536    #         if total_unique < 20: 
3637    #             is_classification = True 
3738    #     return is_classification 
38-     
3939
4040    def  random_string_generator (self ):
4141        random_str  =  '' .join (random .choice (string .ascii_uppercase  +  string .digits ) for  _  in  range (10 ))
4242        return  random_str 
4343
44+     def  ai_h2o_automl (self , df , y_column_name , model , model_name = "h2o" , mode = None ):
45+         y_variable  =  "y_actual" 
46+         y_variable_predict  =  "y_prediction" 
47+         y_variable  =  "y_actual" 
48+         y_variable_predict  =  "y_prediction" 
49+         instance_id  =  self .random_string_generator ()
50+         analytics  =  Analytics ()
51+         analytics ['ip' ] =  analytics .finding_ip ()
52+         analytics ['mac' ] =  analytics .finding_address ()
53+         analytics ['instance_id' ] =  instance_id 
54+         analytics ['time' ] =  str (datetime .datetime .now ())
55+         analytics ['total_columns' ] =  len (df .columns )
56+         analytics ['total_rows' ] =  len (df )
57+         analytics ['os' ] =  analytics .finding_system ()
58+         analytics ['model_name' ] =  model_name 
59+         analytics ["function" ] =  'before_dashboard' 
60+         analytics ["query" ] =  "before_dashboard" 
61+         analytics ['finish_time' ] =  '' 
62+         analytics .insert_data ()
63+ 
64+         # If yes, then different shap functuions are required. 
65+         # get the shap value based on predcton and make a new dataframe. 
66+ 
67+         # find predictions first as shap values need that. 
68+ 
69+         prediction_col  =  []
70+ 
71+         if  model_name  ==  'h2o' :
72+             if  isinstance (df , pd .DataFrame ):
73+                 df  =  h2o .H2OFrame (df )
74+             prediction_col  =  model .predict (df [y_column_name ])
75+         # is classification? 
76+ 
77+         is_classification  =  True  if  model .type  ==  'classifier'  else  False 
78+         # shap 
79+         c  =  calculate_shap ()
80+         self .df_final , self .explainer  =  c .find (model , df , prediction_col , is_classification ,
81+                                                model_name = model_name )
82+ 
83+         # prediction col 
84+         self .df_final [y_variable_predict ] =  prediction_col .as_data_frame ()[y_column_name ].tolist ()
85+ 
86+         self .df_final [y_variable ] =  df .as_data_frame ()[y_column_name ].tolist ()
87+ 
88+         # additional inputs. 
89+         if  is_classification  is  True :
90+             # find and add probabilities in the dataset. 
91+             try :
92+                 prediction_col_prob  =  model .predict_proba (df )
93+             except :
94+                 prediction_col_prob  =  model .predict (df )
95+             prediction_col_prob  =  prediction_col_prob .as_data_frame ()
96+ 
97+             pd_prediction_col_prob  =  pd .DataFrame (prediction_col_prob )
98+ 
99+             for  c  in  pd_prediction_col_prob .columns :
100+                 self .df_final ["probability_of_predicting_class_"  +  str (c )] =  list (pd_prediction_col_prob [c ])
101+ 
102+             classes  =  []
103+             for  c  in  pd_prediction_col_prob .columns :
104+                 classes .append (str (c ))
105+             self .param ["classes" ] =  classes 
106+ 
107+             try :
108+                 expected_values_by_class  =  self .explainer .expected_value 
109+             except :
110+                 expected_values_by_class  =  []
111+                 for  c  in  range (len (classes )):
112+                     expected_values_by_class .append (1  /  len (classes ))
113+ 
114+             self .param ["expected_values" ] =  expected_values_by_class 
115+         else :
116+             try :
117+                 expected_values  =  self .explainer .expected_value 
118+                 self .param ["expected_values" ] =  [expected_values ]
119+             except :
120+                 expected_value  =  [round (np .array (y ).mean (), 2 )]
121+                 self .param ["expected_values" ] =  expected_value 
122+ 
123+         self .param ["is_classification" ] =  is_classification 
124+         self .param ["model_name" ] =  model_name 
125+         self .param ["model" ] =  model 
126+         self .param ["columns" ] =  df .columns 
127+         self .param ["y_variable" ] =  y_variable 
128+         self .param ["y_variable_predict" ] =  y_variable_predict 
129+         self .param ['instance_id' ] =  instance_id 
130+ 
131+         d  =  dashboard ()
132+         d .find (self .df_final , mode , self .param )
133+ 
134+         return  True 
135+ 
44136    def  ai (self , df , y , model , model_name = "xgboost" , mode = None ):
45137        y_variable  =  "y_actual" 
46138        y_variable_predict  =  "y_prediction" 
47-          
48-         #Code for Analytics 
139+ 
140+         #  Code for Analytics 
49141        instance_id  =  self .random_string_generator ()
50142        analytics  =  Analytics ()
51143        analytics ['ip' ] =  analytics .finding_ip ()
@@ -78,38 +170,38 @@ def ai(self, df, y, model, model_name="xgboost", mode=None):
78170            prediction_col  =  model .predict (df )
79171
80172        # is classification? 
81-         #is_classification = self.is_classification_given_y_array(prediction_col) 
173+         #  is_classification = self.is_classification_given_y_array(prediction_col) 
82174        ModelType  =  lambda  model : True  if  is_classifier (model ) else  False 
83175        is_classification  =  ModelType (model )
84176
85177        # shap 
86178        c  =  calculate_shap ()
87179        self .df_final , self .explainer  =  c .find (model , df , prediction_col , is_classification , model_name = model_name )
88180
89-         #Append Model Decision & True Labels Columns into the dataset. 
181+         #  Append Model Decision & True Labels Columns into the dataset. 
90182        self .df_final [y_variable_predict ] =  prediction_col 
91183        self .df_final [y_variable ] =  y 
92184
93185        # additional inputs. 
94186        if  is_classification  ==  True :
95187            # find and add probabilities in the dataset. 
96-             #prediction_col_prob = model.predict_proba(df) 
97-             #pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) 
188+             #  prediction_col_prob = model.predict_proba(df) 
189+             #  pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) 
98190
99191            probabilities  =  model .predict_proba (df )
100192
101193            for  i  in  range (len (np .unique (prediction_col ))):
102-                 self .df_final ['Probability: {}' .format (np .unique (prediction_col )[i ])] =  probabilities [:,i ]
103-              
194+                 self .df_final ['Probability: {}' .format (np .unique (prediction_col )[i ])] =  probabilities [:,  i ]
195+ 
104196            self .param ['classes' ] =  np .unique (prediction_col )
105197
106-             #for c in pd_prediction_col_prob.columns: 
107-               #   self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c]) 
198+             #  for c in pd_prediction_col_prob.columns: 
199+             #   self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c]) 
108200
109-             #classes = [] 
110-             #for c in pd_prediction_col_prob.columns: 
111-               #   classes.append(str(c)) 
112-             #self.param["classes"] = classes 
201+             #  classes = [] 
202+             #  for c in pd_prediction_col_prob.columns: 
203+             #   classes.append(str(c)) 
204+             #  self.param["classes"] = classes 
113205
114206            try :
115207                expected_values_by_class  =  self .explainer .expected_value 
0 commit comments