From b0a4612ea66599268a06ff301f8ed8a97011b069 Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sat, 18 Jan 2025 16:00:44 +0800
Subject: [PATCH 01/12] ShapleyCAM

Weighting the activation maps using Gradient and Hessian-Vector Product.
---
 README.md                                     |   1 +
 cam.py                                        |   7 +-
 pytorch_grad_cam/__init__.py                  |   1 +
 .../activations_and_gradients_no_detach.py    |  53 ++++++
 pytorch_grad_cam/shapley_cam.py               | 159 ++++++++++++++++++
 pytorch_grad_cam/utils/model_targets.py       |  16 ++
 6 files changed, 234 insertions(+), 3 deletions(-)
 create mode 100644 pytorch_grad_cam/activations_and_gradients_no_detach.py
 create mode 100644 pytorch_grad_cam/shapley_cam.py

diff --git a/README.md b/README.md
index 4908a83b..5f0a3e1c 100644
--- a/README.md
+++ b/README.md
@@ -47,6 +47,7 @@ The aim is also to serve as a benchmark of algorithms and metrics for research o
 | Deep Feature Factorizations           | Non Negative Matrix Factorization on the 2D activations                                                   |
 | KPCA-CAM            | Like EigenCAM but with Kernel PCA instead of PCA                                                                            |            
 | FEM                 | A gradient free method that binarizes activations by an activation > mean + k * std rule.                                   |
+| ShapleyCAM          | Weighting the activation maps using Gradient and Hessian-Vector Product.|
 ## Visual Examples
 
 | What makes the network think the image label is 'pug, pug-dog' | What makes the network think the image label is 'tabby, tabby cat' | Combining Grad-CAM with Guided Backpropagation for the 'pug, pug-dog' class |
diff --git a/cam.py b/cam.py
index b14b7723..a1c11eb8 100644
--- a/cam.py
+++ b/cam.py
@@ -7,7 +7,7 @@
 from pytorch_grad_cam import (
     GradCAM, FEM, HiResCAM, ScoreCAM, GradCAMPlusPlus,
     AblationCAM, XGradCAM, EigenCAM, EigenGradCAM,
-    LayerCAM, FullGrad, GradCAMElementWise, KPCA_CAM
+    LayerCAM, FullGrad, GradCAMElementWise, KPCA_CAM, ShapleyCAM
 )
 from pytorch_grad_cam import GuidedBackpropReLUModel
 from pytorch_grad_cam.utils.image import (
@@ -37,7 +37,7 @@ def get_args():
                             'gradcam', 'fem', 'hirescam', 'gradcam++',
                             'scorecam', 'xgradcam', 'ablationcam',
                             'eigencam', 'eigengradcam', 'layercam',
-                            'fullgrad', 'gradcamelementwise', 'kpcacam'
+                            'fullgrad', 'gradcamelementwise', 'kpcacam', 'shapleycam'
                         ],
                         help='CAM method')
 
@@ -75,7 +75,8 @@ def get_args():
         "fullgrad": FullGrad,
         "fem": FEM,
         "gradcamelementwise": GradCAMElementWise,
-        'kpcacam': KPCA_CAM
+        'kpcacam': KPCA_CAM,
+        'shapleycam': ShapleyCAM
     }
 
     if args.device=='hpu':
diff --git a/pytorch_grad_cam/__init__.py b/pytorch_grad_cam/__init__.py
index 7ac376a8..3b0d2f75 100644
--- a/pytorch_grad_cam/__init__.py
+++ b/pytorch_grad_cam/__init__.py
@@ -1,4 +1,5 @@
 from pytorch_grad_cam.grad_cam import GradCAM
+from pytorch_grad_cam.shapley_cam import ShapleyCAM
 from pytorch_grad_cam.fem import FEM
 from pytorch_grad_cam.hirescam import HiResCAM
 from pytorch_grad_cam.grad_cam_elementwise import GradCAMElementWise
diff --git a/pytorch_grad_cam/activations_and_gradients_no_detach.py b/pytorch_grad_cam/activations_and_gradients_no_detach.py
new file mode 100644
index 00000000..d89bbfbf
--- /dev/null
+++ b/pytorch_grad_cam/activations_and_gradients_no_detach.py
@@ -0,0 +1,53 @@
+class ActivationsAndGradients_no_detach:
+    """ Class for extracting activations and
+    registering gradients from targetted intermediate layers """
+
+    def __init__(self, model, target_layers, reshape_transform):
+        self.model = model
+        # self.gradients = []
+        # self.activations = []
+        self.original_gradients = []
+        self.original_activations = []
+        self.reshape_transform = reshape_transform
+        self.handles = []
+        for target_layer in target_layers:
+            self.handles.append(
+                target_layer.register_forward_hook(self.save_activation))
+            # Because of https://github.com/pytorch/pytorch/issues/61519,
+            # we don't use backward hook to record gradients.
+            self.handles.append(
+                target_layer.register_forward_hook(self.save_gradient))
+
+    def save_activation(self, module, input, output):
+        activation = output
+
+        self.original_activations.append(activation)
+        # if self.reshape_transform is not None:
+        #     activation = self.reshape_transform(activation)
+        # # self.activations.append(activation.cpu().detach())
+        # self.activations.append(activation)
+
+    def save_gradient(self, module, input, output):
+        if not hasattr(output, "requires_grad") or not output.requires_grad:
+            # You can only register hooks on tensor requires grad.
+            return
+
+        # Gradients are computed in reverse order
+        def _store_grad(grad):
+            self.original_gradients = [grad] + self.original_gradients
+            # if self.reshape_transform is not None:
+            #     grad = self.reshape_transform(grad)
+            # self.gradients = [grad] + self.gradients
+
+        output.register_hook(_store_grad)
+
+    def __call__(self, x):
+        # self.gradients = []
+        # self.activations = []
+        self.original_gradients = []
+        self.original_activations = []
+        return self.model(x)
+
+    def release(self):
+        for handle in self.handles:
+            handle.remove()
diff --git a/pytorch_grad_cam/shapley_cam.py b/pytorch_grad_cam/shapley_cam.py
new file mode 100644
index 00000000..e6e617b0
--- /dev/null
+++ b/pytorch_grad_cam/shapley_cam.py
@@ -0,0 +1,159 @@
+from typing import Callable, List, Optional, Tuple
+
+import numpy as np
+import torch
+from pytorch_grad_cam.base_cam import BaseCAM
+from scipy.signal import convolve2d
+from scipy.ndimage import gaussian_filter
+import cv2
+
+from pytorch_grad_cam.activations_and_gradients_no_detach import ActivationsAndGradients_no_detach
+from pytorch_grad_cam.utils.image import scale_cam_image
+from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
+from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
+
+"""
+Weighting the activation maps using Gradient and Hessian-Vector Product.
+This method (https://arxiv.org/abs/2501.06261) reinterpret CAM methods from a Shapley value perspective.
+"""
+class ShapleyCAM(BaseCAM):
+    def __init__(self, model, target_layers,
+                 reshape_transform=None):
+        super(
+            ShapleyCAM,
+            self).__init__(
+            model,
+            target_layers,
+            reshape_transform)
+
+        self.activations_and_grads = ActivationsAndGradients_no_detach(self.model, target_layers, reshape_transform)
+
+    def forward(
+        self, input_tensor: torch.Tensor, targets: List[torch.nn.Module], eigen_smooth: bool = False
+    ) -> np.ndarray:
+        input_tensor = input_tensor.to(self.device)
+
+        input_tensor = torch.autograd.Variable(input_tensor, requires_grad=True)
+
+        self.outputs = outputs = self.activations_and_grads(input_tensor)
+
+        if targets is None:
+            target_categories = np.argmax(outputs.cpu().data.numpy(), axis=-1)
+            targets = [ClassifierOutputTarget(category) for category in target_categories]
+
+        if self.uses_gradients:
+            self.model.zero_grad()
+            loss = sum([target(output) for target, output in zip(targets, outputs)])
+            torch.autograd.grad(loss, input_tensor,  retain_graph = True, create_graph = True)
+
+        # In most of the saliency attribution papers, the saliency is
+        # computed with a single target layer.
+        # Commonly it is the last convolutional layer.
+        # Here we support passing a list with multiple target layers.
+        # It will compute the saliency image for every image,
+        # and then aggregate them (with a default mean aggregation).
+        # This gives you more flexibility in case you just want to
+        # use all conv layers for example, all Batchnorm layers,
+        # or something else.
+        cam_per_layer = self.compute_cam_per_layer(input_tensor, targets, eigen_smooth)
+        return self.aggregate_multi_layers(cam_per_layer)
+
+
+    def get_cam_weights(self,
+                        input_tensor,
+                        target_layer,
+                        target_category,
+                        activations,
+                        grads):
+        activations: List[Tensor]  # type: ignore[assignment]
+        grads: List[Tensor]  # type: ignore[assignment]
+
+        hvp = torch.autograd.grad(
+            outputs=grads,
+            inputs=activations,
+            grad_outputs=activations,
+            retain_graph=False,
+            allow_unused=True
+        )[0]
+        if hvp is None:
+            hvp = torch.tensor(0).to(self.device)
+        elif self.activations_and_grads.reshape_transform is not None:
+            hvp = self.activations_and_grads.reshape_transform(hvp)
+
+        if self.activations_and_grads.reshape_transform is not None:
+            activations = self.activations_and_grads.reshape_transform(activations)
+            grads = self.activations_and_grads.reshape_transform(grads)
+        weight = (grads  - 0.5*hvp).cpu().detach().numpy()
+        activations = activations.cpu().detach().numpy()
+        grads = grads.cpu().detach().numpy()
+
+
+        # 2D image
+        if len(activations.shape) == 4:
+            weight = np.mean(weight, axis=(2, 3))
+            return weight, activations
+        
+        # 3D image
+        elif len(activations.shape) == 5:
+            weight = np.mean(weight, axis=(2, 3, 4))
+            return weight, activations
+        
+        else:
+            raise ValueError("Invalid grads shape."
+                             "Shape of grads should be 4 (2D image) or 5 (3D image).")
+
+
+
+    def get_cam_image(
+        self,
+        input_tensor: torch.Tensor,
+        target_layer: torch.nn.Module,
+        targets: List[torch.nn.Module],
+        activations: torch.Tensor,
+        grads: torch.Tensor,
+        eigen_smooth: bool = False,
+    ) -> np.ndarray:
+        weights, activations = self.get_cam_weights(input_tensor, target_layer, targets, activations, grads)
+
+        # 2D conv
+        if len(activations.shape) == 4:
+            weighted_activations = weights[:, :, None, None] * activations
+
+        # 3D conv
+        elif len(activations.shape) == 5:
+            weighted_activations = weights[:, :, None, None, None] * activations
+        else:
+            raise ValueError(f"Invalid activation shape. Get {len(activations.shape)}.")
+        
+        # weighted_activations = np.maximum(weighted_activations, 0)
+        # weighted_activations = np.abs(weighted_activations)
+        if eigen_smooth:
+            cam = get_2d_projection(weighted_activations)
+        else:
+            cam = weighted_activations.sum(axis=1)
+        return cam
+
+    def compute_cam_per_layer(
+        self, input_tensor: torch.Tensor, targets: List[torch.nn.Module], eigen_smooth: bool
+    ) -> np.ndarray:
+        activations_list = [a for a in self.activations_and_grads.original_activations]
+        grads_list = [g for g in self.activations_and_grads.original_gradients]
+        target_size = self.get_target_width_height(input_tensor)
+
+        cam_per_target_layer = []
+        # Loop over the saliency image from every layer
+        for i in range(len(self.target_layers)):
+            target_layer = self.target_layers[i]
+            layer_activations = None
+            layer_grads = None
+            if i < len(activations_list):
+                layer_activations = activations_list[i]
+            if i < len(grads_list):
+                layer_grads = grads_list[i]
+
+            cam = self.get_cam_image(input_tensor, target_layer, targets, layer_activations, layer_grads, eigen_smooth)
+            cam = np.maximum(cam, 0)
+            scaled = scale_cam_image(cam, target_size)
+            cam_per_target_layer.append(scaled[:, None, :])
+
+        return cam_per_target_layer
diff --git a/pytorch_grad_cam/utils/model_targets.py b/pytorch_grad_cam/utils/model_targets.py
index d0a48189..88533f69 100644
--- a/pytorch_grad_cam/utils/model_targets.py
+++ b/pytorch_grad_cam/utils/model_targets.py
@@ -23,6 +23,22 @@ def __call__(self, model_output):
         return torch.softmax(model_output, dim=-1)[:, self.category]
 
 
+class ClassifierOutputReST:
+    """
+    Using both pre-softmax and post-softmax, propoesed in https://arxiv.org/abs/2501.06261
+    """
+    def __init__(self, category):
+        self.category = category
+    def __call__(self, model_output): 
+        if len(model_output.shape) == 1:
+            target = torch.tensor([self.category], device=model_output.device)
+            model_output = model_output.unsqueeze(0)
+            return model_output[0][self.category] - torch.nn.functional.cross_entropy(model_output, target)
+        else:
+            target = torch.tensor([self.category] * model_output.shape[0], device=model_output.device)
+            return model_output[:,self.category]- torch.nn.functional.cross_entropy(model_output, target)
+
+
 class BinaryClassifierOutputTarget:
     def __init__(self, category):
         self.category = category

From 7bde09ca22dcc7e86e245339311c8c21ac99a766 Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sat, 18 Jan 2025 20:23:20 +0800
Subject: [PATCH 02/12] name

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5f0a3e1c..f24fa5a7 100644
--- a/README.md
+++ b/README.md
@@ -363,4 +363,8 @@ Sachin Karmani, Thanushon Sivakaran, Gaurav Prasad, Mehmet Ali, Wenbo Yang, Shey
 https://hal.science/hal-02963298/document <br>
 `Features Understanding in 3D CNNs for Actions Recognition in Video
 Kazi Ahmed Asif Fuad, Pierre-Etienne Martin, Romain Giot, Romain
-Bourqui, Jenny Benois-Pineau, Akka Zemmar`
\ No newline at end of file
+Bourqui, Jenny Benois-Pineau, Akka Zemmar`
+
+https://arxiv.org/abs/2501.06261 <br>
+`CAMs as Shapley Value-based Explainers
+Huaiguang Cai`
\ No newline at end of file

From 0b85cbabae91fe3d8304cffb821611a65e5e7028 Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sat, 18 Jan 2025 21:07:44 +0800
Subject: [PATCH 03/12] ReST example

---
 cam.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cam.py b/cam.py
index a1c11eb8..459b8ae2 100644
--- a/cam.py
+++ b/cam.py
@@ -13,7 +13,7 @@
 from pytorch_grad_cam.utils.image import (
     show_cam_on_image, deprocess_image, preprocess_image
 )
-from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
+from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget, ClassifierOutputReST
 
 
 def get_args():
@@ -110,7 +110,7 @@ def get_args():
     # If targets is None, the highest scoring category (for every member in the batch) will be used.
     # You can target specific categories by
     # targets = [ClassifierOutputTarget(281)]
-    # targets = [ClassifierOutputTarget(281)]
+    # targets = [ClassifierOutputReST(281)]
     targets = None
 
     # Using the with statement ensures the context is freed, and you can

From 445d3234ef83dba5edec6107d2242da02421bf8b Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sat, 18 Jan 2025 21:11:36 +0800
Subject: [PATCH 04/12] comments

---
 pytorch_grad_cam/shapley_cam.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytorch_grad_cam/shapley_cam.py b/pytorch_grad_cam/shapley_cam.py
index e6e617b0..00332993 100644
--- a/pytorch_grad_cam/shapley_cam.py
+++ b/pytorch_grad_cam/shapley_cam.py
@@ -75,6 +75,7 @@ def get_cam_weights(self,
             retain_graph=False,
             allow_unused=True
         )[0]
+        # print(torch.max(hvp[0]).item())  # verify that hvp is not all zeros
         if hvp is None:
             hvp = torch.tensor(0).to(self.device)
         elif self.activations_and_grads.reshape_transform is not None:
@@ -83,7 +84,7 @@ def get_cam_weights(self,
         if self.activations_and_grads.reshape_transform is not None:
             activations = self.activations_and_grads.reshape_transform(activations)
             grads = self.activations_and_grads.reshape_transform(grads)
-        weight = (grads  - 0.5*hvp).cpu().detach().numpy()
+        weight = (grads  - 0.5 * hvp).cpu().detach().numpy()
         activations = activations.cpu().detach().numpy()
         grads = grads.cpu().detach().numpy()
 

From 4344b4199144d1787ce9b3e6e7d61e50a9d549f1 Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sun, 19 Jan 2025 00:01:56 +0800
Subject: [PATCH 05/12] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f24fa5a7..c3ee00db 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ The aim is also to serve as a benchmark of algorithms and metrics for research o
 | Deep Feature Factorizations           | Non Negative Matrix Factorization on the 2D activations                                                   |
 | KPCA-CAM            | Like EigenCAM but with Kernel PCA instead of PCA                                                                            |            
 | FEM                 | A gradient free method that binarizes activations by an activation > mean + k * std rule.                                   |
-| ShapleyCAM          | Weighting the activation maps using Gradient and Hessian-Vector Product.|
+| ShapleyCAM          | Weights the activations using Gradient and Hessian-Vector Product.|
 ## Visual Examples
 
 | What makes the network think the image label is 'pug, pug-dog' | What makes the network think the image label is 'tabby, tabby cat' | Combining Grad-CAM with Guided Backpropagation for the 'pug, pug-dog' class |
@@ -367,4 +367,4 @@ Bourqui, Jenny Benois-Pineau, Akka Zemmar`
 
 https://arxiv.org/abs/2501.06261 <br>
 `CAMs as Shapley Value-based Explainers
-Huaiguang Cai`
\ No newline at end of file
+Huaiguang Cai`

From e44daf488c396597be849e21f32d3375e60ddf26 Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sun, 19 Jan 2025 00:04:12 +0800
Subject: [PATCH 06/12] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c3ee00db..d91c2284 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ The aim is also to serve as a benchmark of algorithms and metrics for research o
 | Deep Feature Factorizations           | Non Negative Matrix Factorization on the 2D activations                                                   |
 | KPCA-CAM            | Like EigenCAM but with Kernel PCA instead of PCA                                                                            |            
 | FEM                 | A gradient free method that binarizes activations by an activation > mean + k * std rule.                                   |
-| ShapleyCAM          | Weights the activations using Gradient and Hessian-Vector Product.|
+| ShapleyCAM          | Weights the activations using the gradient and Hessian-vector product.|
 ## Visual Examples
 
 | What makes the network think the image label is 'pug, pug-dog' | What makes the network think the image label is 'tabby, tabby cat' | Combining Grad-CAM with Guided Backpropagation for the 'pug, pug-dog' class |

From ddf1618670b5c55feca21e8db609c329c18a2318 Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sun, 19 Jan 2025 00:04:39 +0800
Subject: [PATCH 07/12] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d91c2284..eb28dcb8 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ The aim is also to serve as a benchmark of algorithms and metrics for research o
 | Deep Feature Factorizations           | Non Negative Matrix Factorization on the 2D activations                                                   |
 | KPCA-CAM            | Like EigenCAM but with Kernel PCA instead of PCA                                                                            |            
 | FEM                 | A gradient free method that binarizes activations by an activation > mean + k * std rule.                                   |
-| ShapleyCAM          | Weights the activations using the gradient and Hessian-vector product.|
+| ShapleyCAM          | Weight the activations using the gradient and Hessian-vector product.|
 ## Visual Examples
 
 | What makes the network think the image label is 'pug, pug-dog' | What makes the network think the image label is 'tabby, tabby cat' | Combining Grad-CAM with Guided Backpropagation for the 'pug, pug-dog' class |

From 4a8d358404c13890371617cc079e2c3a7eebd775 Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sun, 19 Jan 2025 11:44:00 +0800
Subject: [PATCH 08/12] update a simpler version

---
 pytorch_grad_cam/activations_and_gradients.py | 22 +++--
 .../activations_and_gradients_no_detach.py    | 53 ----------
 pytorch_grad_cam/base_cam.py                  | 14 ++-
 pytorch_grad_cam/shapley_cam.py               | 99 +++----------------
 pytorch_grad_cam/utils/model_targets.py       |  4 +-
 5 files changed, 42 insertions(+), 150 deletions(-)
 delete mode 100644 pytorch_grad_cam/activations_and_gradients_no_detach.py

diff --git a/pytorch_grad_cam/activations_and_gradients.py b/pytorch_grad_cam/activations_and_gradients.py
index 0c2071e5..8765567d 100644
--- a/pytorch_grad_cam/activations_and_gradients.py
+++ b/pytorch_grad_cam/activations_and_gradients.py
@@ -2,11 +2,12 @@ class ActivationsAndGradients:
     """ Class for extracting activations and
     registering gradients from targetted intermediate layers """
 
-    def __init__(self, model, target_layers, reshape_transform):
+    def __init__(self, model, target_layers, reshape_transform, detach=True):
         self.model = model
         self.gradients = []
         self.activations = []
         self.reshape_transform = reshape_transform
+        self.detach = detach
         self.handles = []
         for target_layer in target_layers:
             self.handles.append(
@@ -18,10 +19,12 @@ def __init__(self, model, target_layers, reshape_transform):
 
     def save_activation(self, module, input, output):
         activation = output
-
-        if self.reshape_transform is not None:
-            activation = self.reshape_transform(activation)
-        self.activations.append(activation.cpu().detach())
+        if self.detach:
+            if self.reshape_transform is not None:
+                activation = self.reshape_transform(activation)
+            self.activations.append(activation.cpu().detach())
+        else:
+            self.activations.append(activation)
 
     def save_gradient(self, module, input, output):
         if not hasattr(output, "requires_grad") or not output.requires_grad:
@@ -30,9 +33,12 @@ def save_gradient(self, module, input, output):
 
         # Gradients are computed in reverse order
         def _store_grad(grad):
-            if self.reshape_transform is not None:
-                grad = self.reshape_transform(grad)
-            self.gradients = [grad.cpu().detach()] + self.gradients
+            if self.detach:
+                if self.reshape_transform is not None:
+                    grad = self.reshape_transform(grad)
+                self.gradients = [grad.cpu().detach()] + self.gradients
+            else:
+                self.gradients = [grad] + self.gradients
 
         output.register_hook(_store_grad)
 
diff --git a/pytorch_grad_cam/activations_and_gradients_no_detach.py b/pytorch_grad_cam/activations_and_gradients_no_detach.py
deleted file mode 100644
index d89bbfbf..00000000
--- a/pytorch_grad_cam/activations_and_gradients_no_detach.py
+++ /dev/null
@@ -1,53 +0,0 @@
-class ActivationsAndGradients_no_detach:
-    """ Class for extracting activations and
-    registering gradients from targetted intermediate layers """
-
-    def __init__(self, model, target_layers, reshape_transform):
-        self.model = model
-        # self.gradients = []
-        # self.activations = []
-        self.original_gradients = []
-        self.original_activations = []
-        self.reshape_transform = reshape_transform
-        self.handles = []
-        for target_layer in target_layers:
-            self.handles.append(
-                target_layer.register_forward_hook(self.save_activation))
-            # Because of https://github.com/pytorch/pytorch/issues/61519,
-            # we don't use backward hook to record gradients.
-            self.handles.append(
-                target_layer.register_forward_hook(self.save_gradient))
-
-    def save_activation(self, module, input, output):
-        activation = output
-
-        self.original_activations.append(activation)
-        # if self.reshape_transform is not None:
-        #     activation = self.reshape_transform(activation)
-        # # self.activations.append(activation.cpu().detach())
-        # self.activations.append(activation)
-
-    def save_gradient(self, module, input, output):
-        if not hasattr(output, "requires_grad") or not output.requires_grad:
-            # You can only register hooks on tensor requires grad.
-            return
-
-        # Gradients are computed in reverse order
-        def _store_grad(grad):
-            self.original_gradients = [grad] + self.original_gradients
-            # if self.reshape_transform is not None:
-            #     grad = self.reshape_transform(grad)
-            # self.gradients = [grad] + self.gradients
-
-        output.register_hook(_store_grad)
-
-    def __call__(self, x):
-        # self.gradients = []
-        # self.activations = []
-        self.original_gradients = []
-        self.original_activations = []
-        return self.model(x)
-
-    def release(self):
-        for handle in self.handles:
-            handle.remove()
diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py
index 44ae5b90..b0f7a059 100644
--- a/pytorch_grad_cam/base_cam.py
+++ b/pytorch_grad_cam/base_cam.py
@@ -19,6 +19,7 @@ def __init__(
         compute_input_gradient: bool = False,
         uses_gradients: bool = True,
         tta_transforms: Optional[tta.Compose] = None,
+        detach: bool = True,
     ) -> None:
         self.model = model.eval()
         self.target_layers = target_layers
@@ -45,7 +46,8 @@ def __init__(
         else:
             self.tta_transforms = tta_transforms
 
-        self.activations_and_grads = ActivationsAndGradients(self.model, target_layers, reshape_transform)
+        self.detach = detach
+        self.activations_and_grads = ActivationsAndGradients(self.model, target_layers, reshape_transform, self.detach)
 
     """ Get a vector of weights for every channel in the target layer.
         Methods that return weights channels,
@@ -71,6 +73,8 @@ def get_cam_image(
         eigen_smooth: bool = False,
     ) -> np.ndarray:
         weights = self.get_cam_weights(input_tensor, target_layer, targets, activations, grads)
+        if isinstance(activations, torch.Tensor):
+            activations = activations.cpu().detach().numpy()
         # 2D conv
         if len(activations.shape) == 4:
             weighted_activations = weights[:, :, None, None] * activations
@@ -132,8 +136,12 @@ def get_target_width_height(self, input_tensor: torch.Tensor) -> Tuple[int, int]
     def compute_cam_per_layer(
         self, input_tensor: torch.Tensor, targets: List[torch.nn.Module], eigen_smooth: bool
     ) -> np.ndarray:
-        activations_list = [a.cpu().data.numpy() for a in self.activations_and_grads.activations]
-        grads_list = [g.cpu().data.numpy() for g in self.activations_and_grads.gradients]
+        if self.detach:
+            activations_list = [a.cpu().data.numpy() for a in self.activations_and_grads.activations]
+            grads_list = [g.cpu().data.numpy() for g in self.activations_and_grads.gradients]
+        else:
+            activations_list = [a for a in self.activations_and_grads.activations]
+            grads_list = [g for g in self.activations_and_grads.gradients]
         target_size = self.get_target_width_height(input_tensor)
 
         cam_per_target_layer = []
diff --git a/pytorch_grad_cam/shapley_cam.py b/pytorch_grad_cam/shapley_cam.py
index 00332993..4bb41e7e 100644
--- a/pytorch_grad_cam/shapley_cam.py
+++ b/pytorch_grad_cam/shapley_cam.py
@@ -1,32 +1,22 @@
 from typing import Callable, List, Optional, Tuple
-
-import numpy as np
-import torch
 from pytorch_grad_cam.base_cam import BaseCAM
-from scipy.signal import convolve2d
-from scipy.ndimage import gaussian_filter
-import cv2
-
-from pytorch_grad_cam.activations_and_gradients_no_detach import ActivationsAndGradients_no_detach
-from pytorch_grad_cam.utils.image import scale_cam_image
-from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
-from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
+import torch
+import numpy as np
 
 """
 Weighting the activation maps using Gradient and Hessian-Vector Product.
-This method (https://arxiv.org/abs/2501.06261) reinterpret CAM methods from a Shapley value perspective.
+This method (https://arxiv.org/abs/2501.06261) reinterpret CAM methods (include GradCAM, HiResCAM and the original CAM) from a Shapley value perspective.
 """
 class ShapleyCAM(BaseCAM):
     def __init__(self, model, target_layers,
-                 reshape_transform=None):
+                 reshape_transform=None, detach=False):
         super(
             ShapleyCAM,
             self).__init__(
             model,
             target_layers,
-            reshape_transform)
-
-        self.activations_and_grads = ActivationsAndGradients_no_detach(self.model, target_layers, reshape_transform)
+            reshape_transform,
+            detach = detach)
 
     def forward(
         self, input_tensor: torch.Tensor, targets: List[torch.nn.Module], eigen_smooth: bool = False
@@ -44,6 +34,7 @@ def forward(
         if self.uses_gradients:
             self.model.zero_grad()
             loss = sum([target(output) for target, output in zip(targets, outputs)])
+            # keep the graph
             torch.autograd.grad(loss, input_tensor,  retain_graph = True, create_graph = True)
 
         # In most of the saliency attribution papers, the saliency is
@@ -65,9 +56,7 @@ def get_cam_weights(self,
                         target_category,
                         activations,
                         grads):
-        activations: List[Tensor]  # type: ignore[assignment]
-        grads: List[Tensor]  # type: ignore[assignment]
-
+        
         hvp = torch.autograd.grad(
             outputs=grads,
             inputs=activations,
@@ -75,86 +64,28 @@ def get_cam_weights(self,
             retain_graph=False,
             allow_unused=True
         )[0]
-        # print(torch.max(hvp[0]).item())  # verify that hvp is not all zeros
+        # print(torch.max(hvp[0]).item())  # Use .item() to get the scalar value
         if hvp is None:
             hvp = torch.tensor(0).to(self.device)
-        elif self.activations_and_grads.reshape_transform is not None:
-            hvp = self.activations_and_grads.reshape_transform(hvp)
+        else:
+            if self.activations_and_grads.reshape_transform is not None:
+                hvp = self.activations_and_grads.reshape_transform(hvp)
 
         if self.activations_and_grads.reshape_transform is not None:
             activations = self.activations_and_grads.reshape_transform(activations)
             grads = self.activations_and_grads.reshape_transform(grads)
-        weight = (grads  - 0.5 * hvp).cpu().detach().numpy()
-        activations = activations.cpu().detach().numpy()
-        grads = grads.cpu().detach().numpy()
-
 
+        weight = (grads  - 0.5 * hvp).detach().cpu().numpy()
         # 2D image
         if len(activations.shape) == 4:
             weight = np.mean(weight, axis=(2, 3))
-            return weight, activations
+            return weight
         
         # 3D image
         elif len(activations.shape) == 5:
             weight = np.mean(weight, axis=(2, 3, 4))
-            return weight, activations
+            return weight
         
         else:
             raise ValueError("Invalid grads shape."
                              "Shape of grads should be 4 (2D image) or 5 (3D image).")
-
-
-
-    def get_cam_image(
-        self,
-        input_tensor: torch.Tensor,
-        target_layer: torch.nn.Module,
-        targets: List[torch.nn.Module],
-        activations: torch.Tensor,
-        grads: torch.Tensor,
-        eigen_smooth: bool = False,
-    ) -> np.ndarray:
-        weights, activations = self.get_cam_weights(input_tensor, target_layer, targets, activations, grads)
-
-        # 2D conv
-        if len(activations.shape) == 4:
-            weighted_activations = weights[:, :, None, None] * activations
-
-        # 3D conv
-        elif len(activations.shape) == 5:
-            weighted_activations = weights[:, :, None, None, None] * activations
-        else:
-            raise ValueError(f"Invalid activation shape. Get {len(activations.shape)}.")
-        
-        # weighted_activations = np.maximum(weighted_activations, 0)
-        # weighted_activations = np.abs(weighted_activations)
-        if eigen_smooth:
-            cam = get_2d_projection(weighted_activations)
-        else:
-            cam = weighted_activations.sum(axis=1)
-        return cam
-
-    def compute_cam_per_layer(
-        self, input_tensor: torch.Tensor, targets: List[torch.nn.Module], eigen_smooth: bool
-    ) -> np.ndarray:
-        activations_list = [a for a in self.activations_and_grads.original_activations]
-        grads_list = [g for g in self.activations_and_grads.original_gradients]
-        target_size = self.get_target_width_height(input_tensor)
-
-        cam_per_target_layer = []
-        # Loop over the saliency image from every layer
-        for i in range(len(self.target_layers)):
-            target_layer = self.target_layers[i]
-            layer_activations = None
-            layer_grads = None
-            if i < len(activations_list):
-                layer_activations = activations_list[i]
-            if i < len(grads_list):
-                layer_grads = grads_list[i]
-
-            cam = self.get_cam_image(input_tensor, target_layer, targets, layer_activations, layer_grads, eigen_smooth)
-            cam = np.maximum(cam, 0)
-            scaled = scale_cam_image(cam, target_size)
-            cam_per_target_layer.append(scaled[:, None, :])
-
-        return cam_per_target_layer
diff --git a/pytorch_grad_cam/utils/model_targets.py b/pytorch_grad_cam/utils/model_targets.py
index 88533f69..4861ab9c 100644
--- a/pytorch_grad_cam/utils/model_targets.py
+++ b/pytorch_grad_cam/utils/model_targets.py
@@ -25,7 +25,7 @@ def __call__(self, model_output):
 
 class ClassifierOutputReST:
     """
-    Using both pre-softmax and post-softmax, propoesed in https://arxiv.org/abs/2501.06261
+    Using both pre-softmax and post-softmax, proposed in https://arxiv.org/abs/2501.06261
     """
     def __init__(self, category):
         self.category = category
@@ -36,7 +36,7 @@ def __call__(self, model_output):
             return model_output[0][self.category] - torch.nn.functional.cross_entropy(model_output, target)
         else:
             target = torch.tensor([self.category] * model_output.shape[0], device=model_output.device)
-            return model_output[:,self.category]- torch.nn.functional.cross_entropy(model_output, target)
+            return model_output[:,self.category] - torch.nn.functional.cross_entropy(model_output, target)
 
 
 class BinaryClassifierOutputTarget:

From b6be00bd8bf8417e43ac6fc098fae8c9556622ca Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sun, 19 Jan 2025 11:48:38 +0800
Subject: [PATCH 09/12] comments

---
 pytorch_grad_cam/shapley_cam.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_grad_cam/shapley_cam.py b/pytorch_grad_cam/shapley_cam.py
index 4bb41e7e..508180ba 100644
--- a/pytorch_grad_cam/shapley_cam.py
+++ b/pytorch_grad_cam/shapley_cam.py
@@ -64,7 +64,7 @@ def get_cam_weights(self,
             retain_graph=False,
             allow_unused=True
         )[0]
-        # print(torch.max(hvp[0]).item())  # Use .item() to get the scalar value
+        # print(torch.max(hvp[0]).item())  # check if hvp is not all zeros
         if hvp is None:
             hvp = torch.tensor(0).to(self.device)
         else:

From 326300d3b925a48d6e8b1fa50bf64f870fb63e89 Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sun, 19 Jan 2025 14:46:59 +0800
Subject: [PATCH 10/12] forward function in shapely_cam.py still needed

This is because the calculation of the Hessian-vector product (HVP) requires the computation graph to be retained, see comments in line 37 or 38.
---
 pytorch_grad_cam/shapley_cam.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytorch_grad_cam/shapley_cam.py b/pytorch_grad_cam/shapley_cam.py
index 508180ba..22b58bd7 100644
--- a/pytorch_grad_cam/shapley_cam.py
+++ b/pytorch_grad_cam/shapley_cam.py
@@ -1,5 +1,6 @@
 from typing import Callable, List, Optional, Tuple
 from pytorch_grad_cam.base_cam import BaseCAM
+from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
 import torch
 import numpy as np
 
@@ -34,7 +35,7 @@ def forward(
         if self.uses_gradients:
             self.model.zero_grad()
             loss = sum([target(output) for target, output in zip(targets, outputs)])
-            # keep the graph
+            # keep the graph, create_graph = True is needed for hvp
             torch.autograd.grad(loss, input_tensor,  retain_graph = True, create_graph = True)
 
         # In most of the saliency attribution papers, the saliency is

From c25ca788a5451dfdbe70e1ad8d6da2055edaa7af Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sun, 19 Jan 2025 15:12:10 +0800
Subject: [PATCH 11/12] delete forward function in shapley_cam.py

---
 pytorch_grad_cam/base_cam.py    |  8 +++++-
 pytorch_grad_cam/shapley_cam.py | 44 ++++++---------------------------
 2 files changed, 14 insertions(+), 38 deletions(-)

diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py
index b0f7a059..c54b2a29 100644
--- a/pytorch_grad_cam/base_cam.py
+++ b/pytorch_grad_cam/base_cam.py
@@ -107,7 +107,13 @@ def forward(
         if self.uses_gradients:
             self.model.zero_grad()
             loss = sum([target(output) for target, output in zip(targets, outputs)])
-            loss.backward(retain_graph=True)
+            if self.detach:
+                loss.backward(retain_graph=True)
+            else:
+                # keep the computational graph, create_graph = True is needed for hvp
+                torch.autograd.grad(loss, input_tensor,  retain_graph = True, create_graph = True)
+                # When using the following loss.backward() method, a warning is raised: "UserWarning: Using backward() with create_graph=True will create a reference cycle"
+                # loss.backward(retain_graph=True, create_graph=True)
             if 'hpu' in str(self.device):
                 self.__htcore.mark_step()
 
diff --git a/pytorch_grad_cam/shapley_cam.py b/pytorch_grad_cam/shapley_cam.py
index 22b58bd7..9398e8fc 100644
--- a/pytorch_grad_cam/shapley_cam.py
+++ b/pytorch_grad_cam/shapley_cam.py
@@ -10,46 +10,16 @@
 """
 class ShapleyCAM(BaseCAM):
     def __init__(self, model, target_layers,
-                 reshape_transform=None, detach=False):
+                 reshape_transform=None):
         super(
             ShapleyCAM,
             self).__init__(
-            model,
-            target_layers,
-            reshape_transform,
-            detach = detach)
-
-    def forward(
-        self, input_tensor: torch.Tensor, targets: List[torch.nn.Module], eigen_smooth: bool = False
-    ) -> np.ndarray:
-        input_tensor = input_tensor.to(self.device)
-
-        input_tensor = torch.autograd.Variable(input_tensor, requires_grad=True)
-
-        self.outputs = outputs = self.activations_and_grads(input_tensor)
-
-        if targets is None:
-            target_categories = np.argmax(outputs.cpu().data.numpy(), axis=-1)
-            targets = [ClassifierOutputTarget(category) for category in target_categories]
-
-        if self.uses_gradients:
-            self.model.zero_grad()
-            loss = sum([target(output) for target, output in zip(targets, outputs)])
-            # keep the graph, create_graph = True is needed for hvp
-            torch.autograd.grad(loss, input_tensor,  retain_graph = True, create_graph = True)
-
-        # In most of the saliency attribution papers, the saliency is
-        # computed with a single target layer.
-        # Commonly it is the last convolutional layer.
-        # Here we support passing a list with multiple target layers.
-        # It will compute the saliency image for every image,
-        # and then aggregate them (with a default mean aggregation).
-        # This gives you more flexibility in case you just want to
-        # use all conv layers for example, all Batchnorm layers,
-        # or something else.
-        cam_per_layer = self.compute_cam_per_layer(input_tensor, targets, eigen_smooth)
-        return self.aggregate_multi_layers(cam_per_layer)
-
+            model = model,
+            target_layers = target_layers,
+            reshape_transform = reshape_transform,
+            compute_input_gradient = True,
+            uses_gradients = True,
+            detach = False)
 
     def get_cam_weights(self,
                         input_tensor,

From 9f2d539034c3323398477b2ee182f6c93d6e1b63 Mon Sep 17 00:00:00 2001
From: cai2-huaiguang <caihg3@mail2.sysu.edu.cn>
Date: Sun, 19 Jan 2025 15:23:50 +0800
Subject: [PATCH 12/12] comments

---
 pytorch_grad_cam/base_cam.py    | 2 +-
 pytorch_grad_cam/shapley_cam.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py
index c54b2a29..484e8865 100644
--- a/pytorch_grad_cam/base_cam.py
+++ b/pytorch_grad_cam/base_cam.py
@@ -111,7 +111,7 @@ def forward(
                 loss.backward(retain_graph=True)
             else:
                 # keep the computational graph, create_graph = True is needed for hvp
-                torch.autograd.grad(loss, input_tensor,  retain_graph = True, create_graph = True)
+                torch.autograd.grad(loss, input_tensor, retain_graph = True, create_graph = True)
                 # When using the following loss.backward() method, a warning is raised: "UserWarning: Using backward() with create_graph=True will create a reference cycle"
                 # loss.backward(retain_graph=True, create_graph=True)
             if 'hpu' in str(self.device):
diff --git a/pytorch_grad_cam/shapley_cam.py b/pytorch_grad_cam/shapley_cam.py
index 9398e8fc..e8331528 100644
--- a/pytorch_grad_cam/shapley_cam.py
+++ b/pytorch_grad_cam/shapley_cam.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 """
-Weighting the activation maps using Gradient and Hessian-Vector Product.
+Weights the activation maps using the gradient and Hessian-Vector product.
 This method (https://arxiv.org/abs/2501.06261) reinterpret CAM methods (include GradCAM, HiResCAM and the original CAM) from a Shapley value perspective.
 """
 class ShapleyCAM(BaseCAM):
@@ -51,12 +51,10 @@ def get_cam_weights(self,
         if len(activations.shape) == 4:
             weight = np.mean(weight, axis=(2, 3))
             return weight
-        
         # 3D image
         elif len(activations.shape) == 5:
             weight = np.mean(weight, axis=(2, 3, 4))
             return weight
-        
         else:
             raise ValueError("Invalid grads shape."
                              "Shape of grads should be 4 (2D image) or 5 (3D image).")