|
| 1 | +import torch |
| 2 | +from torch import Tensor |
| 3 | +from torch.utils._pytree import tree_map |
| 4 | + |
| 5 | + |
| 6 | +class DiagonalSparseTensor(torch.Tensor): |
| 7 | + |
| 8 | + @staticmethod |
| 9 | + def __new__(cls, data: Tensor, v_to_p: list[int]): |
| 10 | + # At the moment, this class is not compositional, so we assert |
| 11 | + # that the tensor we're wrapping is exactly a Tensor |
| 12 | + assert type(data) is Tensor |
| 13 | + |
| 14 | + # Note [Passing requires_grad=true tensors to subclasses] |
| 15 | + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 16 | + # Calling _make_subclass directly in an autograd context is |
| 17 | + # never the right thing to do, as this will detach you from |
| 18 | + # the autograd graph. You must create an autograd function |
| 19 | + # representing the "constructor" (NegativeView, in this case) |
| 20 | + # and call that instead. This assert helps prevent direct usage |
| 21 | + # (which is bad!) |
| 22 | + assert not data.requires_grad or not torch.is_grad_enabled() |
| 23 | + |
| 24 | + # There is something very subtle going on here. In particular, |
| 25 | + # suppose that elem is a view. Does all of the view metadata |
| 26 | + # (sizes, strides, storages) get propagated correctly? Yes! |
| 27 | + # Internally, the way _make_subclass works is it creates an |
| 28 | + # alias (using Tensor.alias) of the original tensor, which |
| 29 | + # means we replicate storage/strides, but with the Python object |
| 30 | + # as an instance of your subclass. In other words, |
| 31 | + # _make_subclass is the "easy" case of metadata propagation, |
| 32 | + # because anything that alias() propagates, you will get in |
| 33 | + # your subclass. It is _make_wrapper_subclass which is |
| 34 | + # problematic... |
| 35 | + # |
| 36 | + # TODO: We need to think about how we want to turn this into |
| 37 | + # official API. I am thinking that something that does the |
| 38 | + # assert above and this call could be made into a utility function |
| 39 | + # that is in the public API |
| 40 | + return Tensor._make_wrapper_subclass( |
| 41 | + cls, [data.shape[i] for i in v_to_p], dtype=data.dtype, device=data.device |
| 42 | + ) |
| 43 | + |
| 44 | + def __init__(self, data: Tensor, v_to_p: list[int]): |
| 45 | + """ |
| 46 | + Represent a diagonal sparse tensor. |
| 47 | +
|
| 48 | + :param data: The physical contiguous data. |
| 49 | + :param v_to_p: Maps virtual dimensions to physical dimensions. |
| 50 | +
|
| 51 | + An example is `data` of shape `[d_1, d_2, d_3]` and `v_to_p` equal to `[0, 1, 0, 2, 1]` |
| 52 | + means the virtual shape is `[d_1, d_2, d_1, d_3, d_2]` and the represented Tensor, indexed |
| 53 | + at `[i, j, k, l, m]` is `0.` unless `i==k` and `j==m`. |
| 54 | + """ |
| 55 | + # Deliberate omission of `super().__init__()` as we have an unfaithful data. |
| 56 | + self._data = data |
| 57 | + self._v_to_p = v_to_p |
| 58 | + self._v_shape = tuple(data.shape[i] for i in v_to_p) |
| 59 | + |
| 60 | + def to_dense(self) -> Tensor: |
| 61 | + first_indices = dict[int, int]() |
| 62 | + identity_matrices = dict[int, Tensor]() |
| 63 | + einsum_args: list[Tensor | list[int]] = [self._data, list(range(self._data.ndim))] |
| 64 | + output_indices = list(range(len(self._v_to_p))) |
| 65 | + for i, j in enumerate(self._v_to_p): |
| 66 | + if j not in first_indices: |
| 67 | + first_indices[j] = i |
| 68 | + else: |
| 69 | + if j not in identity_matrices: |
| 70 | + device = self._data.device |
| 71 | + dtype = self._data.dtype |
| 72 | + identity_matrices[j] = torch.eye(self._v_shape[i], device=device, dtype=dtype) |
| 73 | + einsum_args += [identity_matrices[j], [first_indices[j], i]] |
| 74 | + |
| 75 | + output = torch.einsum(*einsum_args, output_indices) |
| 76 | + return output |
| 77 | + |
| 78 | + @classmethod |
| 79 | + def __torch_dispatch__(cls, func, types, args=(), kwargs=None): |
| 80 | + kwargs = kwargs if kwargs else {} |
| 81 | + |
| 82 | + # TODO: Handle batched operations (apply to self._data and wrap) |
| 83 | + # TODO: Handle all operations that can be represented with an einsum by translating them |
| 84 | + # to operations on self._data and wrapping accordingly. |
| 85 | + |
| 86 | + # --- Fallback: Fold to Dense Tensor --- |
| 87 | + def unwrap_to_dense(t): |
| 88 | + if isinstance(t, cls): |
| 89 | + return t.to_dense() |
| 90 | + else: |
| 91 | + return t |
| 92 | + |
| 93 | + print(f"Falling back to dense for {func.__name__}...") |
| 94 | + return func(*tree_map(unwrap_to_dense, args), **tree_map(unwrap_to_dense, kwargs)) |
| 95 | + |
| 96 | + def __repr__(self): |
| 97 | + return ( |
| 98 | + f"DiagonalSparseTensor(\n" |
| 99 | + f" data={self._data},\n" |
| 100 | + f" v_to_p_map={self._v_to_p},\n" |
| 101 | + f" shape={self._v_shape}\n" |
| 102 | + f")" |
| 103 | + ) |
0 commit comments