Add DiagonalSparseTensor with the default fallback to dense mechanism.

PierreQuinton · PierreQuinton · commit 32ef75e17778 · 2025-10-20T12:59:36.000+02:00
diff --git a/src/torchjd/autogram/diagonal_sparse_tensor.py b/src/torchjd/autogram/diagonal_sparse_tensor.py
@@ -0,0 +1,103 @@
+import torch
+from torch import Tensor
+from torch.utils._pytree import tree_map
+
+
+class DiagonalSparseTensor(torch.Tensor):
+
+    @staticmethod
+    def __new__(cls, data: Tensor, v_to_p: list[int]):
+        # At the moment, this class is not compositional, so we assert
+        # that the tensor we're wrapping is exactly a Tensor
+        assert type(data) is Tensor
+
+        # Note [Passing requires_grad=true tensors to subclasses]
+        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        # Calling _make_subclass directly in an autograd context is
+        # never the right thing to do, as this will detach you from
+        # the autograd graph.  You must create an autograd function
+        # representing the "constructor" (NegativeView, in this case)
+        # and call that instead.  This assert helps prevent direct usage
+        # (which is bad!)
+        assert not data.requires_grad or not torch.is_grad_enabled()
+
+        # There is something very subtle going on here.  In particular,
+        # suppose that elem is a view.  Does all of the view metadata
+        # (sizes, strides, storages) get propagated correctly?  Yes!
+        # Internally, the way _make_subclass works is it creates an
+        # alias (using Tensor.alias) of the original tensor, which
+        # means we replicate storage/strides, but with the Python object
+        # as an instance of your subclass.  In other words,
+        # _make_subclass is the "easy" case of metadata propagation,
+        # because anything that alias() propagates, you will get in
+        # your subclass.  It is _make_wrapper_subclass which is
+        # problematic...
+        #
+        # TODO: We need to think about how we want to turn this into
+        # official API.  I am thinking that something that does the
+        # assert above and this call could be made into a utility function
+        # that is in the public API
+        return Tensor._make_wrapper_subclass(
+            cls, [data.shape[i] for i in v_to_p], dtype=data.dtype, device=data.device
+        )
+
+    def __init__(self, data: Tensor, v_to_p: list[int]):
+        """
+        Represent a diagonal sparse tensor.
+
+        :param data: The physical contiguous data.
+        :param v_to_p: Maps virtual dimensions to physical dimensions.
+
+        An example is `data` of shape `[d_1, d_2, d_3]` and `v_to_p` equal to `[0, 1, 0, 2, 1]`
+        means the virtual shape is `[d_1, d_2, d_1, d_3, d_2]` and the represented Tensor, indexed
+        at `[i, j, k, l, m]` is `0.` unless `i==k` and `j==m`.
+        """
+        # Deliberate omission of `super().__init__()` as we have an unfaithful data.
+        self._data = data
+        self._v_to_p = v_to_p
+        self._v_shape = tuple(data.shape[i] for i in v_to_p)
+
+    def to_dense(self) -> Tensor:
+        first_indices = dict[int, int]()
+        identity_matrices = dict[int, Tensor]()
+        einsum_args: list[Tensor | list[int]] = [self._data, list(range(self._data.ndim))]
+        output_indices = list(range(len(self._v_to_p)))
+        for i, j in enumerate(self._v_to_p):
+            if j not in first_indices:
+                first_indices[j] = i
+            else:
+                if j not in identity_matrices:
+                    device = self._data.device
+                    dtype = self._data.dtype
+                    identity_matrices[j] = torch.eye(self._v_shape[i], device=device, dtype=dtype)
+                einsum_args += [identity_matrices[j], [first_indices[j], i]]
+
+        output = torch.einsum(*einsum_args, output_indices)
+        return output
+
+    @classmethod
+    def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
+        kwargs = kwargs if kwargs else {}
+
+        # TODO: Handle batched operations (apply to self._data and wrap)
+        # TODO: Handle all operations that can be represented with an einsum by translating them
+        #  to operations on self._data and wrapping accordingly.
+
+        # --- Fallback: Fold to Dense Tensor ---
+        def unwrap_to_dense(t):
+            if isinstance(t, cls):
+                return t.to_dense()
+            else:
+                return t
+
+        print(f"Falling back to dense for {func.__name__}...")
+        return func(*tree_map(unwrap_to_dense, args), **tree_map(unwrap_to_dense, kwargs))
+
+    def __repr__(self):
+        return (
+            f"DiagonalSparseTensor(\n"
+            f"  data={self._data},\n"
+            f"  v_to_p_map={self._v_to_p},\n"
+            f"  shape={self._v_shape}\n"
+            f")"
+        )
diff --git a/tests/unit/autogram/test_diagonal_sparse_tensor.py b/tests/unit/autogram/test_diagonal_sparse_tensor.py
@@ -0,0 +1,34 @@
+import torch
+from pytest import mark
+from torch.testing import assert_close
+
+from torchjd.autogram.diagonal_sparse_tensor import DiagonalSparseTensor
+
+
+@mark.parametrize(
+    "shape",
+    [
+        [],
+        [1],
+        [3],
+        [1, 1],
+        [1, 4],
+        [3, 1],
+        [1, 2, 3],
+    ],
+)
+def test_diagonal_spase_tensor_scalar(shape: list[int]):
+    a = torch.randn(shape)
+    b = DiagonalSparseTensor(a, list(range(len(shape))))
+
+    assert_close(a, b)
+
+
+@mark.parametrize("dim", [1, 2, 3, 4, 5, 10])
+def test_diag_equivalence(dim: int):
+    a = torch.randn([dim])
+    b = DiagonalSparseTensor(a, [0, 0])
+
+    diag_a = torch.diag(a)
+
+    assert_close(b, diag_a)