added documentation

jarverha · jarverha · commit 7b5137db187a · 2025-09-25T11:48:57.000+02:00
diff --git a/powershap/powershap.py b/powershap/powershap.py
@@ -534,9 +534,8 @@ def transform(self, X):
             )
         return super().transform(X)
 
-    # def _more_tags(self):
-    #     return self._explainer._get_more_tags()
-
+    # Since sklearn 1.6, the tag system changed so this function is necessary to make it compatible
+    # https://scikit-learn.org/stable/auto_examples/release_highlights/plot_release_highlights_1_6_0.html#improvements-to-the-developer-api-for-third-party-libraries
     def __sklearn_tags__(self):
         return self._explainer._get_more_tags()
 
diff --git a/powershap/shap_wrappers/shap_explainer.py b/powershap/shap_wrappers/shap_explainer.py
@@ -46,13 +46,10 @@ def __init__(self, model: Any):
     def _fit_get_shap(self, X_train, Y_train, X_val, Y_val, random_seed, **kwargs) -> np.array:
         raise NotImplementedError
     
-    # Should be implemented by explainers themselves
+    # If the explainer supports nan values, infinite values, or others, the explainer must override this function
     def validate_data(self, _estimator, X, y, **kwargs):
         return validate_data(_estimator, X, y, **kwargs)
 
-    # def _validate_data(self, validate_data: Callable, X, y, **kwargs):
-    #     return validate_data(X, y, **kwargs)
-
     # Should be implemented by subclass
     @staticmethod
     def supports_model(model) -> bool:
@@ -241,6 +238,7 @@ def _fit_get_shap(self, X_train, Y_train, X_val, Y_val, random_seed, **kwargs) -
         C_explainer = shap.TreeExplainer(PowerShap_model)
         return C_explainer.shap_values(X_val)
 
+    # Function to define the tags which will be used in sklearn pipelines
     def _get_more_tags(self):
         return Tags(
             estimator_type=None,
@@ -273,6 +271,7 @@ def _fit_get_shap(self, X_train, Y_train, X_val, Y_val, random_seed, **kwargs) -
         C_explainer = shap.TreeExplainer(PowerShap_model)
         return C_explainer.shap_values(X_val)
 
+    # Function to define the tags which will be used in sklearn pipelines
     def _get_more_tags(self):
         return Tags(
             estimator_type=None,
@@ -305,6 +304,7 @@ def _fit_get_shap(self, X_train, Y_train, X_val, Y_val, random_seed, **kwargs) -
         C_explainer = shap.TreeExplainer(PowerShap_model)
         return C_explainer.shap_values(X_val)
 
+    # Function to define the tags which will be used in sklearn pipelines
     def _get_more_tags(self):
         return Tags(
             estimator_type=None,
@@ -370,7 +370,8 @@ def _fit_get_shap(self, X_train, Y_train, X_val, Y_val, random_seed, **kwargs) -
 
 ### DEEP LEARNING
 
-
+# Tensorflow has been phased out and current version does not support deepLearning approach
+# TODO add support for Pytorch instead
 class DeepLearningExplainer(ShapExplainer):
     @staticmethod
     def supports_model(model) -> bool:
@@ -379,30 +380,31 @@ def supports_model(model) -> bool:
         # import torch  ## TODO: do we support pytorch??
 
         # supported_models = [tf.keras.Model]  # , torch.nn.Module]
-        return None #isinstance(model, tuple(supported_models))
+        # return isinstance(model, tuple(supported_models))
+        return False
     
 
-    def _fit_get_shap(self, X_train, Y_train, X_val, Y_val, random_seed, **kwargs) -> np.array:
-        # import tensorflow as tf
-
-        # tf.compat.v1.disable_v2_behavior()  # https://github.com/slundberg/shap/issues/2189
-        # Fit the model
-        # PowerShap_model = tf.keras.models.clone_model(self.model)
-        # metrics = kwargs.get("nn_metric")
-        # PowerShap_model.compile(
-        #     loss=kwargs["loss"],
-        #     optimizer=kwargs["optimizer"],
-        #     metrics=metrics if metrics is None else [metrics],
-        #     # run_eagerly=True,
-        # )
-        # _ = PowerShap_model.fit(
-        #     X_train,
-        #     Y_train,
-        #     batch_size=kwargs["batch_size"],
-        #     epochs=kwargs["epochs"],
-        #     validation_data=(X_val, Y_val),
-        #     verbose=False,
-        # )
-        # # Calculate the shap values
-        # C_explainer = shap.DeepExplainer(PowerShap_model, X_train)
-        return None# C_explainer.shap_values(X_val)
+    # def _fit_get_shap(self, X_train, Y_train, X_val, Y_val, random_seed, **kwargs) -> np.array:
+    #     # import tensorflow as tf
+
+    #     # tf.compat.v1.disable_v2_behavior()  # https://github.com/slundberg/shap/issues/2189
+    #     # Fit the model
+    #     # PowerShap_model = tf.keras.models.clone_model(self.model)
+    #     # metrics = kwargs.get("nn_metric")
+    #     # PowerShap_model.compile(
+    #     #     loss=kwargs["loss"],
+    #     #     optimizer=kwargs["optimizer"],
+    #     #     metrics=metrics if metrics is None else [metrics],
+    #     #     # run_eagerly=True,
+    #     # )
+    #     # _ = PowerShap_model.fit(
+    #     #     X_train,
+    #     #     Y_train,
+    #     #     batch_size=kwargs["batch_size"],
+    #     #     epochs=kwargs["epochs"],
+    #     #     validation_data=(X_val, Y_val),
+    #     #     verbose=False,
+    #     # )
+    #     # # Calculate the shap values
+    #     # C_explainer = shap.DeepExplainer(PowerShap_model, X_train)
+    #     return C_explainer.shap_values(X_val)
diff --git a/powershap/utils.py b/powershap/utils.py
@@ -62,6 +62,8 @@ def powerSHAP_statistical_analysis(
             effect_size.append(0)
             power_list.append(0)
     
+    # The solve power of statsmodels might not always converge. If that happens it outputs a list of length 1 instead of a float
+    # Numpy typing requires nonambiguous typing so this line is to ensure the cast to np.array later on will not result in a ValueError due to a failed converge
     required_iterations = [x[0] if not (isinstance(x, float) or isinstance(x, int)) else x for x in required_iterations]
     
     processed_shaps_df = pd.DataFrame(