@@ -41,6 +41,100 @@ def random_string_generator(self):
4141 random_str = '' .join (random .choice (string .ascii_uppercase + string .digits ) for _ in range (10 ))
4242 return random_str
4343
44+ def ai_h2o_automl (self , df , y_column_name , model , model_name = "h2o" , mode = None ):
45+ y_variable = "y_actual"
46+ y_variable_predict = "y_prediction"
47+ y_variable = "y_actual"
48+ y_variable_predict = "y_prediction"
49+ instance_id = self .random_string_generator ()
50+ analytics = Analytics ()
51+ analytics ['ip' ] = analytics .finding_ip ()
52+ analytics ['mac' ] = analytics .finding_address ()
53+ analytics ['instance_id' ] = instance_id
54+ analytics ['time' ] = str (datetime .datetime .now ())
55+ analytics ['total_columns' ] = len (df .columns )
56+ analytics ['total_rows' ] = len (df )
57+ analytics ['os' ] = analytics .finding_system ()
58+ analytics ['model_name' ] = model_name
59+ analytics ["function" ] = 'before_dashboard'
60+ analytics ["query" ] = "before_dashboard"
61+ analytics ['finish_time' ] = ''
62+ analytics .insert_data ()
63+
64+ # If yes, then different shap functuions are required.
65+ # get the shap value based on predcton and make a new dataframe.
66+
67+ # find predictions first as shap values need that.
68+
69+ prediction_col = []
70+
71+ if model_name == 'h2o' :
72+ if isinstance (df , pd .DataFrame ):
73+ df = h2o .H2OFrame (df )
74+ prediction_col = model .predict (df [y_column_name ])
75+ # is classification?
76+
77+ is_classification = self .is_classification_given_y_array (prediction_col .as_data_frame ()[y_column_name ].tolist ())
78+
79+ # shap
80+ c = calculate_shap ()
81+ self .df_final , self .explainer = c .find (model , df , prediction_col , is_classification ,
82+ model_name = model_name )
83+
84+ # prediction col
85+ self .df_final [y_variable_predict ] = prediction_col .as_data_frame ()[y_column_name ].tolist ()
86+
87+ self .df_final [y_variable ] = df .as_data_frame ()[y_column_name ].tolist ()
88+
89+ # additional inputs.
90+ if is_classification is True :
91+ # find and add probabilities in the dataset.
92+ try :
93+ prediction_col_prob = model .predict_proba (df )
94+ except :
95+ prediction_col_prob = model .predict (df )
96+ prediction_col_prob = prediction_col_prob .as_data_frame ()
97+
98+ pd_prediction_col_prob = pd .DataFrame (prediction_col_prob )
99+
100+ for c in pd_prediction_col_prob .columns :
101+ self .df_final ["probability_of_predicting_class_" + str (c )] = list (pd_prediction_col_prob [c ])
102+
103+ classes = []
104+ for c in pd_prediction_col_prob .columns :
105+ classes .append (str (c ))
106+ self .param ["classes" ] = classes
107+
108+ try :
109+ expected_values_by_class = self .explainer .expected_value
110+ except :
111+ expected_values_by_class = []
112+ for c in range (len (classes )):
113+ expected_values_by_class .append (1 / len (classes ))
114+
115+ self .param ["expected_values" ] = expected_values_by_class
116+ else :
117+ try :
118+ expected_values = self .explainer .expected_value
119+ self .param ["expected_values" ] = [expected_values ]
120+ except :
121+ expected_value = [round (np .array (y ).mean (), 2 )]
122+ self .param ["expected_values" ] = expected_value
123+
124+ self .param ["is_classification" ] = is_classification
125+ self .param ["model_name" ] = model_name
126+ self .param ["model" ] = model
127+ self .param ["columns" ] = df .columns
128+ self .param ["y_variable" ] = y_variable
129+ self .param ["y_variable_predict" ] = y_variable_predict
130+ self .param ['instance_id' ] = instance_id
131+
132+ d = dashboard ()
133+ d .find (self .df_final , mode , self .param )
134+
135+ return True
136+
137+
44138 def ai (self , df , y , model , model_name = "xgboost" , mode = None ):
45139 y_variable = "y_actual"
46140 y_variable_predict = "y_prediction"
0 commit comments