Skip to content

Commit 1f3f000

Browse files
committed
parent h2o_model ai function
1 parent 6f872bf commit 1f3f000

File tree

1 file changed

+94
-0
lines changed

1 file changed

+94
-0
lines changed

explain.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,100 @@ def random_string_generator(self):
4141
random_str = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
4242
return random_str
4343

44+
def ai_h2o_automl(self, df, y_column_name, model, model_name="h2o", mode=None):
45+
y_variable = "y_actual"
46+
y_variable_predict = "y_prediction"
47+
y_variable = "y_actual"
48+
y_variable_predict = "y_prediction"
49+
instance_id = self.random_string_generator()
50+
analytics = Analytics()
51+
analytics['ip'] = analytics.finding_ip()
52+
analytics['mac'] = analytics.finding_address()
53+
analytics['instance_id'] = instance_id
54+
analytics['time'] = str(datetime.datetime.now())
55+
analytics['total_columns'] = len(df.columns)
56+
analytics['total_rows'] = len(df)
57+
analytics['os'] = analytics.finding_system()
58+
analytics['model_name'] = model_name
59+
analytics["function"] = 'before_dashboard'
60+
analytics["query"] = "before_dashboard"
61+
analytics['finish_time'] = ''
62+
analytics.insert_data()
63+
64+
# If yes, then different shap functuions are required.
65+
# get the shap value based on predcton and make a new dataframe.
66+
67+
# find predictions first as shap values need that.
68+
69+
prediction_col = []
70+
71+
if model_name == 'h2o':
72+
if isinstance(df, pd.DataFrame):
73+
df = h2o.H2OFrame(df)
74+
prediction_col = model.predict(df[y_column_name])
75+
# is classification?
76+
77+
is_classification = self.is_classification_given_y_array(prediction_col.as_data_frame()[y_column_name].tolist())
78+
79+
# shap
80+
c = calculate_shap()
81+
self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification,
82+
model_name=model_name)
83+
84+
# prediction col
85+
self.df_final[y_variable_predict] = prediction_col.as_data_frame()[y_column_name].tolist()
86+
87+
self.df_final[y_variable] = df.as_data_frame()[y_column_name].tolist()
88+
89+
# additional inputs.
90+
if is_classification is True:
91+
# find and add probabilities in the dataset.
92+
try:
93+
prediction_col_prob = model.predict_proba(df)
94+
except:
95+
prediction_col_prob = model.predict(df)
96+
prediction_col_prob = prediction_col_prob.as_data_frame()
97+
98+
pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)
99+
100+
for c in pd_prediction_col_prob.columns:
101+
self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])
102+
103+
classes = []
104+
for c in pd_prediction_col_prob.columns:
105+
classes.append(str(c))
106+
self.param["classes"] = classes
107+
108+
try:
109+
expected_values_by_class = self.explainer.expected_value
110+
except:
111+
expected_values_by_class = []
112+
for c in range(len(classes)):
113+
expected_values_by_class.append(1 / len(classes))
114+
115+
self.param["expected_values"] = expected_values_by_class
116+
else:
117+
try:
118+
expected_values = self.explainer.expected_value
119+
self.param["expected_values"] = [expected_values]
120+
except:
121+
expected_value = [round(np.array(y).mean(), 2)]
122+
self.param["expected_values"] = expected_value
123+
124+
self.param["is_classification"] = is_classification
125+
self.param["model_name"] = model_name
126+
self.param["model"] = model
127+
self.param["columns"] = df.columns
128+
self.param["y_variable"] = y_variable
129+
self.param["y_variable_predict"] = y_variable_predict
130+
self.param['instance_id'] = instance_id
131+
132+
d = dashboard()
133+
d.find(self.df_final, mode, self.param)
134+
135+
return True
136+
137+
44138
def ai(self, df, y, model, model_name="xgboost", mode=None):
45139
y_variable = "y_actual"
46140
y_variable_predict = "y_prediction"

0 commit comments

Comments
 (0)