Add compute_gramian_with_autograd_no_cross_terms and CloneParams

ValerianRey · ValerianRey · commit 30fdc0078be5 · 2025-10-20T04:18:51.000+02:00
diff --git a/tests/utils/forward_backwards.py b/tests/utils/forward_backwards.py
@@ -4,6 +4,7 @@
 from torch import Tensor, nn, vmap
 from torch.nn.functional import mse_loss
 from torch.utils._pytree import PyTree, tree_flatten, tree_map
+from torch.utils.hooks import RemovableHandle
 from utils.architectures import get_in_out_shapes
 from utils.contexts import fork_rng
 
@@ -138,9 +139,109 @@ def get_vjp(grad_outputs: Tensor) -> list[Tensor]:
     return gramian
 
 
+def compute_gramian_with_autograd_no_cross_terms(
+    model: nn.Module,
+    inputs: PyTree,
+    loss_fn: Callable[[PyTree], list[Tensor]],
+):
+    with CloneParams(model) as usage_clones:
+        output = model(inputs)
+
+    _, expected_output_shapes = get_in_out_shapes(model)
+    assert tree_map(lambda t: t.shape[1:], output) == expected_output_shapes
+
+    loss_tensors = loss_fn(output)
+    loss_vector = reduce_to_vector(loss_tensors)
+
+    def get_vjp(grad_outputs: Tensor) -> list[Tensor]:
+        grads = torch.autograd.grad(
+            loss_vector,
+            [cloned_param for orig_param_id, cloned_param in usage_clones],
+            grad_outputs=grad_outputs,
+            retain_graph=False,
+            allow_unused=True,
+        )
+        return [grad for grad in grads if grad is not None]
+
+    jacobians = vmap(get_vjp)(torch.diag(torch.ones_like(loss_vector)))
+    jacobian_matrices = [jacobian.reshape([jacobian.shape[0], -1]) for jacobian in jacobians]
+
+    gramian = sum([jacobian @ jacobian.T for jacobian in jacobian_matrices])
+
+    return gramian
+
+
 def compute_gramian(matrix: Tensor) -> Tensor:
     """Contracts the last dimension of matrix to make it into a Gramian."""
 
     indices = list(range(matrix.ndim))
     transposed_matrix = matrix.movedim(indices, indices[::-1])
     return torch.tensordot(matrix, transposed_matrix, dims=([-1], [0]))
+
+
+class CloneParams:
+    """
+    ContextManager enabling the computation of per-usage gradients.
+
+    For each submodule with direct trainable parameters, registers:
+    - A pre-hook that clones the params before using them, so that gradients will be computed with
+      respect to the cloned params.
+    - A post-hook that restores the original params.
+
+    The list of clones is returned so that we know where to find the .grad values corresponding to
+    each individual usage of a parameter.
+
+    Exiting this context manager takes care of removing hooks and restoring the original params (in
+    case an exception occurred before the post-hook could do it).
+
+    Note that this does not work for intra-module parameter reuse, which would require a node-based
+    algorithm rather than a module-based algorithm.
+    """
+
+    def __init__(self, model: nn.Module):
+        self.model = model
+        self.usage_clones: list[tuple[int, nn.Parameter]] = []
+        self._orig_params_storage: dict[int, dict[str, nn.Parameter]] = {}
+        self._handles: list[RemovableHandle] = []
+
+    def __enter__(self) -> list[tuple[int, nn.Parameter]]:
+        """Register hooks and return list of (orig_param_id, clone_param)."""
+
+        def pre_hook(module: nn.Module, _) -> None:
+            saved: dict[str, nn.Parameter] = {}
+            for name, orig_param in module.named_parameters():
+                if orig_param is None or not orig_param.requires_grad:
+                    continue
+                clone_tensor = orig_param.detach().clone().requires_grad_()
+                clone_param = nn.Parameter(clone_tensor)
+                saved[name] = orig_param
+                setattr(module, name, clone_param)
+                self.usage_clones.append((id(orig_param), clone_param))
+            self._orig_params_storage[id(module)] = saved
+
+        def post_hook(module: nn.Module, _, __) -> None:
+            self._restore_original_params(module)
+
+        # Register hooks on all modules with direct trainable params
+        for mod in self.model.modules():
+            if any(p.requires_grad for p in mod.parameters(recurse=False)):
+                self._handles.append(mod.register_forward_pre_hook(pre_hook))
+                self._handles.append(mod.register_forward_hook(post_hook))
+
+        return self.usage_clones
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Remove hooks and restore parameters."""
+        for handle in self._handles:
+            handle.remove()
+        for module in self.model.modules():
+            self._restore_original_params(module)
+
+        return False  # don’t suppress exceptions
+
+    def _restore_original_params(self, module: nn.Module):
+        saved = self._orig_params_storage.get(id(module), {})
+        for name, orig_param in saved.items():
+            setattr(module, name, orig_param)
+        if id(module) in self._orig_params_storage:
+            del self._orig_params_storage[id(module)]