Skip to content

Commit e92c35d

Browse files
committed
formatting h2o_frame
1 parent d2352f7 commit e92c35d

File tree

1 file changed

+54
-5
lines changed

1 file changed

+54
-5
lines changed

lib/dashboard.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,8 @@ def update_table(*values):
593593
df_selected = df[list(self.param["columns"]) + [self.param["y_variable_predict"]]]
594594

595595
row_number = len(df_selected)
596+
if not isinstance(df_row, pd.DataFrame):
597+
df_row = df_row.as_data_frame()
596598
df_selected.loc[row_number] = df_row.values[0]
597599

598600
p = protodash()
@@ -717,7 +719,7 @@ def update_graph2(xaxis_column_name, plot_type, sql_query):
717719

718720
# Port Finder
719721
port = 8080
720-
debug_value = False
722+
debug_value = True
721723

722724
if mode == "inline":
723725
try:
@@ -799,6 +801,10 @@ def calculate_prediction_shap(self, df):
799801
elif self.param["model_name"] == "catboost":
800802
prediction_col = self.param["model"].predict(df.to_numpy())
801803

804+
elif self.param['model_name'] == 'h2o':
805+
df = h2o.H2OFrame(df)
806+
prediction_col = self.param["model"].predict(df)
807+
802808
else:
803809
prediction_col = self.param["model"].predict(df.to_numpy())
804810

@@ -811,19 +817,58 @@ def calculate_prediction_shap(self, df):
811817
model_name=self.param["model_name"])
812818

813819
# prediction col
814-
df_final["y_prediction"] = prediction_col
820+
# df_final["y_prediction"] = prediction_col
815821

816-
if is_classification == True:
822+
if is_classification is True:
817823

818-
# find and add probabilities in the dataset.
819-
prediction_col_prob = self.param["model"].predict_proba(df.to_numpy())
824+
try:
825+
df_final = self.formatting_y_pred_for_h2o_classification(df_final, prediction_col)
826+
# find and add probabilities in the dataset.
827+
prediction_col_prob = self.param["model"].predict_proba(df.to_numpy())
828+
except:
829+
prediction_col_prob = self.param["model"].predict(df)
830+
prediction_col_prob = prediction_col_prob.as_data_frame()
820831
pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)
821832

822833
for c in pd_prediction_col_prob.columns:
823834
df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
824835

836+
# for c in pd_prediction_col_prob.columns:
837+
# df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
838+
# if c != 'predict':
839+
# if "p" in c:
840+
# res = c.split("p")[-1]
841+
# df_final["Probability_" + str(res)] = list(pd_prediction_col_prob[c])
842+
# else:
843+
# df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
844+
# else:
845+
# df_final["Probability_" + str(c)] = list(pd_prediction_col_prob[c])
846+
df_final = self.formatting_h2o_prediction_prob(df_final, pd_prediction_col_prob)
825847
return df_final
826848

849+
def formatting_y_pred_for_h2o_classification(self, final_df, pred_col):
850+
try:
851+
final_df["y_prediction"] = pred_col
852+
except:
853+
# df_final = df_final.as_data_frame()
854+
print("prediction col checking")
855+
prediction_col = pred_col.as_data_frame()
856+
final_df["y_prediction"] = prediction_col['predict'].iloc[0]
857+
return final_df
858+
859+
def formatting_h2o_prediction_prob(self, final_df, h2o_pred):
860+
for c in h2o_pred.columns:
861+
final_df["Probability_" + str(c)] = list(h2o_pred[c])
862+
if c != 'predict':
863+
if "p" in c:
864+
res = c.split("p")[-1]
865+
final_df["Probability_" + str(res)] = list(h2o_pred[c])
866+
else:
867+
final_df["Probability_" + str(c)] = list(h2o_pred[c])
868+
else:
869+
final_df["Probability_" + str(c)] = list(h2o_pred[c])
870+
return final_df
871+
827872
def calculate_prediction(self, df):
828873
if self.param["model_name"] == "xgboost":
829874
import xgboost
@@ -836,6 +881,10 @@ def calculate_prediction(self, df):
836881
elif self.param["model_name"] == "catboost":
837882
prediction_col = self.param["model"].predict(df.to_numpy())
838883

884+
elif self.param['model_name'] == 'h2o':
885+
df = h2o.H2OFrame(df)
886+
prediction_col = self.param["model"].predict(df)
887+
839888
else:
840889
prediction_col = self.param["model"].predict(df.to_numpy())
841890

0 commit comments

Comments
 (0)