explainX
diff --git a/‎__pycache__/__init__.cpython-37.pyc‎
0 Bytes b/‎__pycache__/__init__.cpython-37.pyc‎
0 Bytes
diff --git a/‎__pycache__/explain.cpython-37.pyc‎
1.3 KB b/‎__pycache__/explain.cpython-37.pyc‎
1.3 KB
diff --git a/‎explain.py‎
Lines changed: 99 additions & 0 deletions b/‎explain.py‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎lib/__pycache__/app.cpython-37.pyc‎
0 Bytes b/‎lib/__pycache__/app.cpython-37.pyc‎
0 Bytes
diff --git a/‎lib/__pycache__/calculate_shap.cpython-37.pyc‎
-45 Bytes b/‎lib/__pycache__/calculate_shap.cpython-37.pyc‎
-45 Bytes
diff --git a/‎lib/__pycache__/dashboard.cpython-37.pyc‎
0 Bytes b/‎lib/__pycache__/dashboard.cpython-37.pyc‎
0 Bytes
diff --git a/‎lib/__pycache__/data_for_shap_graphs.cpython-37.pyc‎
0 Bytes b/‎lib/__pycache__/data_for_shap_graphs.cpython-37.pyc‎
0 Bytes
diff --git a/‎lib/__pycache__/imports.cpython-37.pyc‎
0 Bytes b/‎lib/__pycache__/imports.cpython-37.pyc‎
0 Bytes
diff --git a/‎lib/__pycache__/plotly_css.cpython-37.pyc‎
0 Bytes b/‎lib/__pycache__/plotly_css.cpython-37.pyc‎
0 Bytes
diff --git a/‎lib/__pycache__/protodash.cpython-37.pyc‎
0 Bytes b/‎lib/__pycache__/protodash.cpython-37.pyc‎
0 Bytes
@@ -1,8 +1,13 @@
 import os
 import sys
+
+import re
+
+
 from pathlib import Path
 from sys import platform
 import subprocess
+import time
 
 path= Path(__file__).parent.absolute()
 path_dataset= os.path.join(path, "datasets")
@@ -131,6 +136,97 @@ def ai(self,  df,  y, model, model_name="xgboost", mode=None):
 
         return True
 
+
+    def ai_test(self,  df,  y, model, model_name="xgboost", mode=None):
+        y_variable= "y_actual"
+        y_variable_predict= "y_prediction"
+
+
+
+        prediction_col=[]
+
+        if model_name == "xgboost":
+            import xgboost
+            if xgboost.__version__ in ['1.1.0', '1.1.1', '1.1.0rc2', '1.1.0rc1']:
+                print("Current Xgboost version is not supported. Please install Xgboost using 'pip install xgboost==1.0.2'")
+                return False
+            prediction_col = model.predict(xgboost.DMatrix(df))
+
+        elif model_name == "catboost":
+            prediction_col = model.predict(df.to_numpy())
+
+        else:
+            prediction_col = model.predict(df.to_numpy())
+
+        # is classification?
+        is_classification = self.is_classification_given_y_array(prediction_col)
+
+
+
+        #shap
+        c = calculate_shap()
+        self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name)
+
+        #prediction col
+        self.df_final[y_variable_predict] = prediction_col
+
+
+
+        self.df_final[y_variable] = y
+
+
+        #additional inputs.
+        if is_classification==True:
+            # find and add probabilities in the dataset.
+            prediction_col_prob = model.predict_proba(df.to_numpy())
+            pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)
+
+            for c in pd_prediction_col_prob.columns:
+                self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])
+
+            classes = []
+            for c in pd_prediction_col_prob.columns:
+                classes.append(str(c))
+            self.param["classes"]=classes
+
+            try:
+                expected_values_by_class = self.explainer.expected_value
+            except:
+                expected_values_by_class=[]
+                for c in range(len(classes)):
+                    expected_values_by_class.append(1/len(classes))
+
+
+            self.param["expected_values"]= expected_values_by_class
+        else:
+            try:
+                expected_values = self.explainer.expected_value
+                self.param["expected_values"] = [expected_values]
+            except:
+                expected_value = [round(np.array(y).mean(),2)]
+                self.param["expected_values"] = expected_value
+
+
+        self.param["is_classification"]= is_classification
+        self.param["model_name"]= model_name
+        self.param["model"]= model
+        self.param["columns"]= df.columns
+        self.param["y_variable"]= y_variable
+        self.param["y_variable_predict"]= y_variable_predict
+
+
+
+        # manually test all the graphs to see if all work
+
+        g = plotly_graphs()
+
+        __, df2 = g.feature_importance(self.df_final)
+        fim, df2 = g.feature_impact(self.df_final)
+        sp = g.summary_plot(self.df_final)
+
+
+        return True
+
     def dataset_boston(self):
         # load JS visualization code to notebook
         shap.initjs()
@@ -195,3 +291,6 @@ def run_command(command):
 
 
 
+
+
+