Skip to content

Commit 2273cf9

Browse files
committed
separating all features
1 parent 248676f commit 2273cf9

17 files changed

+55
-133
lines changed

__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
from explainx.explain import *
2+
3+
from explainx.main import *

demo-explainx-with-sound.gif

-8.32 MB
Loading

explain.py

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import os
22
import sys
3-
43
import re
5-
64
from pathlib import Path
75
from sys import platform
86
import subprocess
@@ -19,33 +17,25 @@
1917
from calculate_shap import *
2018
from analytics import Analytics
2119

22-
"""
23-
This class calculates feature importance
24-
25-
Input:
26-
27-
28-
"""
29-
30-
3120
class explain():
3221
def __init__(self):
3322
super(explain, self).__init__()
3423
self.param = {}
3524

3625
# is classification function?
3726

38-
def is_classification_given_y_array(self, y_test):
39-
is_classification = False
40-
total = len(y_test)
41-
total_unique = len(set(y_test))
42-
if total < 30:
43-
if total_unique < 10:
44-
is_classification = True
45-
else:
46-
if total_unique < 20:
47-
is_classification = True
48-
return is_classification
27+
# def is_classification_given_y_array(self, y_test):
28+
# is_classification = False
29+
# total = len(y_test)
30+
# total_unique = len(set(y_test))
31+
# if total < 30:
32+
# if total_unique < 10:
33+
# is_classification = True
34+
# else:
35+
# if total_unique < 20:
36+
# is_classification = True
37+
# return is_classification
38+
4939

5040
def random_string_generator(self):
5141
random_str = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
@@ -54,6 +44,8 @@ def random_string_generator(self):
5444
def ai(self, df, y, model, model_name="xgboost", mode=None):
5545
y_variable = "y_actual"
5646
y_variable_predict = "y_prediction"
47+
48+
#Code for Analytics
5749
instance_id = self.random_string_generator()
5850
analytics = Analytics()
5951
analytics['ip'] = analytics.finding_ip()
@@ -69,11 +61,6 @@ def ai(self, df, y, model, model_name="xgboost", mode=None):
6961
analytics['finish_time'] = ''
7062
analytics.insert_data()
7163

72-
# If yes, then different shap functuions are required.
73-
# get the shap value based on predcton and make a new dataframe.
74-
75-
# find predictions first as shap values need that.
76-
7764
prediction_col = []
7865

7966
if model_name == "xgboost":
@@ -88,40 +75,48 @@ def ai(self, df, y, model, model_name="xgboost", mode=None):
8875
prediction_col = model.predict(df.to_numpy())
8976

9077
else:
91-
prediction_col = model.predict(df.to_numpy())
78+
prediction_col = model.predict(df)
9279

9380
# is classification?
94-
is_classification = self.is_classification_given_y_array(prediction_col)
81+
#is_classification = self.is_classification_given_y_array(prediction_col)
82+
ModelType = lambda model: True if is_classifier(model) else False
83+
is_classification = ModelType(model)
9584

9685
# shap
9786
c = calculate_shap()
9887
self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name)
9988

100-
# prediction col
89+
#Append Model Decision & True Labels Columns into the dataset.
10190
self.df_final[y_variable_predict] = prediction_col
102-
10391
self.df_final[y_variable] = y
10492

10593
# additional inputs.
10694
if is_classification == True:
10795
# find and add probabilities in the dataset.
108-
prediction_col_prob = model.predict_proba(df.to_numpy())
109-
pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)
96+
#prediction_col_prob = model.predict_proba(df)
97+
#pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)
11098

111-
for c in pd_prediction_col_prob.columns:
112-
self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])
99+
probabilities = model.predict_proba(df)
113100

114-
classes = []
115-
for c in pd_prediction_col_prob.columns:
116-
classes.append(str(c))
117-
self.param["classes"] = classes
101+
for i in range(len(np.unique(prediction_col))):
102+
self.df_final['Probability: {}'.format(np.unique(prediction_col)[i])] = probabilities[:,i]
103+
104+
self.param['classes'] = np.unique(prediction_col)
105+
106+
#for c in pd_prediction_col_prob.columns:
107+
# self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])
108+
109+
#classes = []
110+
#for c in pd_prediction_col_prob.columns:
111+
# classes.append(str(c))
112+
#self.param["classes"] = classes
118113

119114
try:
120115
expected_values_by_class = self.explainer.expected_value
121116
except:
122117
expected_values_by_class = []
123-
for c in range(len(classes)):
124-
expected_values_by_class.append(1 / len(classes))
118+
for c in range(len(np.unique(prediction_col))):
119+
expected_values_by_class.append(1 / len(np.unique(prediction_col)))
125120

126121
self.param["expected_values"] = expected_values_by_class
127122
else:

lib/analytics.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,18 @@ def __init__(self):
99

1010
@staticmethod
1111
def finding_address():
12-
try:
13-
val = get_mac()
14-
return val
15-
16-
except Exception as e :
17-
return None
12+
val = get_mac()
13+
return val
1814

1915
@staticmethod
2016
def finding_ip():
21-
try:
22-
val = socket.gethostbyname(socket.gethostname())
23-
return val
24-
except Exception as e:
25-
return None
17+
val = socket.gethostbyname(socket.gethostname())
18+
return val
2619

2720
@staticmethod
2821
def finding_system():
29-
try:
30-
return platform.system()
31-
except Exception as e:
32-
return None
22+
return platform.system()
23+
3324
def __setitem__(self, key, val):
3425
self.dict[key] = val
3526

lib/calculate_shap.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
1010
"""
1111

12-
1312
class calculate_shap():
1413
def __init__(self):
1514
super(calculate_shap, self).__init__()

lib/dashboard.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,6 @@ def toggle_collapse(n, is_open):
464464
return is_open
465465

466466
#Cohort Analysis - Callbacks
467-
468467
@app.callback(
469468
Output("modal", "is_open"),
470469
[Input("open", "n_clicks"), Input("close", "n_clicks")],
@@ -675,6 +674,7 @@ def update_graph(xaxis_column_name, third_axis_name, sql_query):
675674
g = plotly_graphs()
676675
graph_type = 'pdp'
677676
df3 = self.caching_data_manager(df, sql_query, graph_type, g.partial_dependence_plot)
677+
print(df3)
678678
fig = g.pdp_plot(df3, df3[xaxis_column_name], df3[xaxis_column_name+"_impact"], df3[third_axis_name])
679679
return fig
680680

lib/encode_decode_cat_col.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,7 @@
11
from imports import *
22
from sklearn.preprocessing import OneHotEncoder
33
import numpy as np
4-
"""
5-
This class calculates feature importance
64

7-
Input:
8-
9-
10-
"""
115

126

137
class encode_decode_cat_col():

lib/feature_impact.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,14 @@
11
from imports import *
22

3-
"""
4-
This class calculates feature impact
5-
6-
Input:
7-
8-
9-
"""
10-
11-
123
class feature_impact():
134
def __init__(self):
145
super(feature_impact, self).__init__()
156
self.param= None
167

178

189
def find(self, df):
19-
10+
df = pd.DataFrame(df)
11+
print(df)
2012
variables = [col for col in df.columns if '_impact' in col]
2113
y = []
2214
for i in range(len(variables)):

lib/feature_impact_classification.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,5 @@
11
from imports import *
22

3-
"""
4-
This class calculates feature impact
5-
6-
Input:
7-
8-
9-
"""
10-
11-
123
class feature_impact_classification():
134
def __init__(self):
145
super(feature_impact_classification, self).__init__()

lib/feature_importance.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,5 @@
11
from imports import *
22

3-
"""
4-
This class calculates feature importance
5-
6-
Input:
7-
8-
9-
"""
10-
11-
123
class feature_importance():
134
def __init__(self):
145
super(feature_importance, self).__init__()

0 commit comments

Comments
 (0)