formatting h2o_frame

tawabshakeel · tawabshakeel · commit e92c35d4d92c · 2020-12-09T01:09:03.000+05:00
diff --git a/lib/dashboard.py b/lib/dashboard.py
@@ -593,6 +593,8 @@ def update_table(*values):
                 df_selected = df[list(self.param["columns"]) + [self.param["y_variable_predict"]]]
 
                 row_number = len(df_selected)
+                if not isinstance(df_row, pd.DataFrame):
+                    df_row = df_row.as_data_frame()
                 df_selected.loc[row_number] = df_row.values[0]
 
                 p = protodash()
@@ -717,7 +719,7 @@ def update_graph2(xaxis_column_name, plot_type, sql_query):
 
         # Port Finder
         port = 8080
-        debug_value = False
+        debug_value = True
 
         if mode == "inline":
             try:
@@ -799,6 +801,10 @@ def calculate_prediction_shap(self, df):
         elif self.param["model_name"] == "catboost":
             prediction_col = self.param["model"].predict(df.to_numpy())
 
+        elif self.param['model_name'] == 'h2o':
+            df = h2o.H2OFrame(df)
+            prediction_col = self.param["model"].predict(df)
+
         else:
             prediction_col = self.param["model"].predict(df.to_numpy())
 
@@ -811,19 +817,58 @@ def calculate_prediction_shap(self, df):
                                      model_name=self.param["model_name"])
 
         # prediction col
-        df_final["y_prediction"] = prediction_col
+        # df_final["y_prediction"] = prediction_col
 
-        if is_classification == True:
+        if is_classification is True:
 
-            # find and add probabilities in the dataset.
-            prediction_col_prob = self.param["model"].predict_proba(df.to_numpy())
+            try:
+                df_final = self.formatting_y_pred_for_h2o_classification(df_final, prediction_col)
+                # find and add probabilities in the dataset.
+                prediction_col_prob = self.param["model"].predict_proba(df.to_numpy())
+            except:
+                prediction_col_prob = self.param["model"].predict(df)
+                prediction_col_prob = prediction_col_prob.as_data_frame()
             pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)
 
             for c in pd_prediction_col_prob.columns:
                 df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
 
+            # for c in pd_prediction_col_prob.columns:
+            #     df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
+            #     if c != 'predict':
+            #         if "p" in c:
+            #             res = c.split("p")[-1]
+            #             df_final["Probability_" + str(res)] = list(pd_prediction_col_prob[c])
+            #         else:
+            #             df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
+            #     else:
+            #         df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
+            df_final = self.formatting_h2o_prediction_prob(df_final, pd_prediction_col_prob)
         return df_final
 
+    def formatting_y_pred_for_h2o_classification(self, final_df, pred_col):
+        try:
+            final_df["y_prediction"] = pred_col
+        except:
+            # df_final = df_final.as_data_frame()
+            print("prediction col checking")
+            prediction_col = pred_col.as_data_frame()
+            final_df["y_prediction"] = prediction_col['predict'].iloc[0]
+        return final_df
+
+    def formatting_h2o_prediction_prob(self, final_df, h2o_pred):
+        for c in h2o_pred.columns:
+            final_df["Probability_" + str(c)] = list(h2o_pred[c])
+            if c != 'predict':
+                if "p" in c:
+                    res = c.split("p")[-1]
+                    final_df["Probability_" + str(res)] = list(h2o_pred[c])
+                else:
+                    final_df["Probability_" + str(c)] = list(h2o_pred[c])
+            else:
+                final_df["Probability_" + str(c)] = list(h2o_pred[c])
+        return final_df
+
     def calculate_prediction(self, df):
         if self.param["model_name"] == "xgboost":
             import xgboost
@@ -836,6 +881,10 @@ def calculate_prediction(self, df):
         elif self.param["model_name"] == "catboost":
             prediction_col = self.param["model"].predict(df.to_numpy())
 
+        elif self.param['model_name'] == 'h2o':
+            df = h2o.H2OFrame(df)
+            prediction_col = self.param["model"].predict(df)
+
         else:
             prediction_col = self.param["model"].predict(df.to_numpy())