add gputree support (#291)

oegedijk · web-flow · commit 667661095e38 · 2026-01-27T22:15:57.000+01:00
* add gputree support

* formatting

* Improve gputree support and docs
diff --git a/README.md b/README.md
@@ -213,18 +213,20 @@ Some of the calculations for the dashboard such as calculating SHAP (interaction
 and permutation importances can be slow for large datasets and complicated models. 
 There are a few tricks to make this less painful:
 
-1. Switching off the interactions tab (`shap_interaction=False`) and disabling
-    permutation importances (`no_permutations=True`). Especially SHAP interaction
-    values can be very slow to calculate, and often are not needed for analysis.
-    For permutation importances you can set the `n_jobs` parameter to speed up
-    the calculation in parallel.
-2. Calculate approximate shap values. You can pass approximate=True as a shap parameter by
-   passing `shap_kwargs=dict(approximate=True)` to the explainer initialization. 
-4. Storing the explainer. The calculated properties are only calculated once
-    for each instance, however each time when you instantiate a new explainer
-    instance they will have to be recalculated. You can store them with
-    `explainer.dump("explainer.joblib")` and load with e.g. 
-    `ClassifierExplainer.from_file("explainer.joblib")`. All calculated properties
+1. Switching off the interactions tab (`shap_interaction=False`) and disabling
+    permutation importances (`no_permutations=True`). Especially SHAP interaction
+    values can be very slow to calculate, and often are not needed for analysis.
+    For permutation importances you can set the `n_jobs` parameter to speed up
+    the calculation in parallel.
+2. Calculate approximate shap values. You can pass approximate=True as a shap parameter by
+   passing `shap_kwargs=dict(approximate=True)` to the explainer initialization. 
+3. Use GPU Tree SHAP by passing `shap='gputree'` when your model supports it.
+   This requires an NVIDIA GPU and a CUDA-enabled SHAP build (see the SHAP docs).
+4. Storing the explainer. The calculated properties are only calculated once
+    for each instance, however each time when you instantiate a new explainer
+    instance they will have to be recalculated. You can store them with
+    `explainer.dump("explainer.joblib")` and load with e.g. 
+    `ClassifierExplainer.from_file("explainer.joblib")`. All calculated properties
     are stored along with the explainer.
 5. Using a smaller (test) dataset, or using smaller decision trees. 
     TreeShap computational complexity is `O(TLD^2)`, where `T` is the 
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -9,6 +9,9 @@
 - Preserve categorical dtypes during permutation importance shuffles and PDP grid generation to prevent dtype-related model errors (e.g., LightGBM).
 - Align categorical/boolean dtypes for user-provided `X_row` inputs and add dtype alignment tests.
 
+### Improvements
+- Add support for GPU Tree SHAP explainers via `shap='gputree'` (requires CUDA-enabled SHAP).
+
 ## Version 0.5.4:
 
 ### Breaking Changes
diff --git a/explainerdashboard/dashboard_components/overview_components.py b/explainerdashboard/dashboard_components/overview_components.py
@@ -231,8 +231,8 @@ def __init__(
             self.importance_type = "shap"
         if self.description is None:
             self.description = """
-        Shows the features sorted from most important to least important. Can 
-        be either sorted by absolute SHAP value (average absolute impact of 
+        Shows the features sorted from most important to least important. Can
+        be either sorted by absolute SHAP value (average absolute impact of
         the feature on final prediction) or by permutation importance (how much
         does the model get worse when you shuffle this feature, rendering it
         useless?).
@@ -647,7 +647,7 @@ def __init__(
         of observations and how these observations would change with this
         feature (gridlines). The average effect is shown in grey. The effect
         of changing the feature for a single {self.explainer.index_name} is
-        shown in blue. You can adjust how many observations to sample for the 
+        shown in blue. You can adjust how many observations to sample for the
         average, how many gridlines to show, and how many points along the
         x-axis to calculate model predictions for (gridpoints).
         """
diff --git a/explainerdashboard/explainers.py b/explainerdashboard/explainers.py
@@ -330,9 +330,9 @@ def __init__(
                     "sklearn-compatible NeuralNet wrapper are supported for now! "
                     "See https://github.com/skorch-dev/skorch"
                 )
-            assert shap in ["tree", "linear", "deep", "kernel", "skorch"], (
-                "ERROR! Only shap='guess', 'tree', 'linear', ' kernel' or 'skorch' are "
-                " supported for now!"
+            assert shap in ["tree", "linear", "deep", "kernel", "skorch", "gputree"], (
+                "ERROR! Only shap='guess', 'tree', 'linear', ' kernel', 'skorch' "
+                "or 'gputree' are supported for now!"
             )
             self.shap = shap
         if self.shap in {"kernel", "skorch", "linear"}:
@@ -1276,6 +1276,24 @@ def model_predict(data_asarray):
                     if self.X_background is not None
                     else shap.sample(self.X, 50),
                 )
+            elif self.shap == "gputree":
+                print(
+                    "Generating self.shap_explainer = shap.GPUTreeExplainer(model, X). "
+                    "Make sure you have a CUDA-enabled GPU and a CUDA-built SHAP "
+                    "installed. See https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/explainers/GPUTree.html#"  # noqa: E501
+                )
+                X_data = self.X_background if self.X_background is not None else self.X
+                if hasattr(shap, "explainers") and hasattr(shap.explainers, "GPUTree"):
+                    explainer_cls = shap.explainers.GPUTree
+                elif hasattr(shap, "GPUTreeExplainer"):
+                    explainer_cls = shap.GPUTreeExplainer
+                else:
+                    raise ValueError(
+                        "shap does not expose GPUTreeExplainer. "
+                        "Please install a CUDA-enabled SHAP build that includes "
+                        "GPUTree support."
+                    )
+                self._shap_explainer = explainer_cls(self.model, X_data)
         return self._shap_explainer
 
     @insert_pos_label