TorchJD
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/torchjd/_autojac/_backward.py‎
Lines changed: 3 additions & 12 deletions b/‎src/torchjd/_autojac/_backward.py‎
Lines changed: 3 additions & 12 deletions
diff --git a/‎src/torchjd/_autojac/_mtl_backward.py‎
Lines changed: 6 additions & 18 deletions b/‎src/torchjd/_autojac/_mtl_backward.py‎
Lines changed: 6 additions & 18 deletions
diff --git a/‎src/torchjd/_autojac/_transform/__init__.py‎
Lines changed: 0 additions & 8 deletions b/‎src/torchjd/_autojac/_transform/__init__.py‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎src/torchjd/_autojac/_transform/_accumulate.py‎
Lines changed: 8 additions & 6 deletions b/‎src/torchjd/_autojac/_transform/_accumulate.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎src/torchjd/_autojac/_transform/_aggregate.py‎
Lines changed: 15 additions & 16 deletions b/‎src/torchjd/_autojac/_transform/_aggregate.py‎
Lines changed: 15 additions & 16 deletions
diff --git a/‎src/torchjd/_autojac/_transform/_base.py‎
Lines changed: 26 additions & 19 deletions b/‎src/torchjd/_autojac/_transform/_base.py‎
Lines changed: 26 additions & 19 deletions
@@ -25,6 +25,8 @@ changes that do not affect the user.
   `inf` or `-inf` values. This check was costly in memory and in time for large matrices so this
   should improve performance. However, if the optimization diverges for some reason (for instance
   due to a too large learning rate), the resulting exceptions may come from other sources.
+- Removed some runtime checks on the shapes of the internal tensors used by the `autojac` engine.
+  This should lead to a small performance improvement.
 
 ### Fixed
 
 
@@ -4,16 +4,7 @@
 
 from torchjd.aggregation import Aggregator
 
-from ._transform import (
-    Accumulate,
-    Aggregate,
-    Diagonalize,
-    EmptyTensorDict,
-    Init,
-    Jac,
-    OrderedSet,
-    Transform,
-)
+from ._transform import Accumulate, Aggregate, Diagonalize, Init, Jac, OrderedSet, Transform
 from ._utils import as_checked_ordered_set, check_optional_positive_chunk_size, get_leaf_tensors
 
 
@@ -95,7 +86,7 @@ def backward(
         parallel_chunk_size=parallel_chunk_size,
     )
 
-    backward_transform(EmptyTensorDict())
+    backward_transform({})
 
 
 def _create_transform(
@@ -104,7 +95,7 @@ def _create_transform(
     inputs: OrderedSet[Tensor],
     retain_graph: bool,
     parallel_chunk_size: int | None,
-) -> Transform[EmptyTensorDict, EmptyTensorDict]:
+) -> Transform:
     """Creates the Jacobian descent backward transform."""
 
     # Transform that creates gradient outputs containing only ones.
 
@@ -4,19 +4,7 @@
 
 from torchjd.aggregation import Aggregator
 
-from ._transform import (
-    Accumulate,
-    Aggregate,
-    EmptyTensorDict,
-    Grad,
-    Gradients,
-    Init,
-    Jac,
-    OrderedSet,
-    Select,
-    Stack,
-    Transform,
-)
+from ._transform import Accumulate, Aggregate, Grad, Init, Jac, OrderedSet, Select, Stack, Transform
 from ._utils import as_checked_ordered_set, check_optional_positive_chunk_size, get_leaf_tensors
 
 
@@ -114,7 +102,7 @@ def mtl_backward(
         parallel_chunk_size=parallel_chunk_size,
     )
 
-    backward_transform(EmptyTensorDict())
+    backward_transform({})
 
 
 def _create_transform(
@@ -125,7 +113,7 @@ def _create_transform(
     shared_params: OrderedSet[Tensor],
     retain_graph: bool,
     parallel_chunk_size: int | None,
-) -> Transform[EmptyTensorDict, EmptyTensorDict]:
+) -> Transform:
     """
     Creates the backward transform for a multi-task learning problem. It is a hybrid between
     Jacobian descent (for shared parameters) and multiple gradient descent branches (for
@@ -166,7 +154,7 @@ def _create_task_transform(
     task_params: OrderedSet[Tensor],
     loss: OrderedSet[Tensor],  # contains a single scalar loss
     retain_graph: bool,
-) -> Transform[EmptyTensorDict, Gradients]:
+) -> Transform:
     # Tensors with respect to which we compute the gradients.
     to_differentiate = task_params + features
 
@@ -179,10 +167,10 @@ def _create_task_transform(
 
     # Transform that accumulates the gradients w.r.t. the task-specific parameters into their
     # .grad fields.
-    accumulate = Accumulate() << Select[Gradients](task_params)
+    accumulate = Accumulate() << Select(task_params)
 
     # Transform that backpropagates the gradients of the losses w.r.t. the features.
-    backpropagate = Select[Gradients](features)
+    backpropagate = Select(features)
 
     # Transform that accumulates the gradient of the losses w.r.t. the task-specific parameters into
     # their .grad fields and backpropagates the gradient of the losses w.r.t. to the features.
 
@@ -8,11 +8,3 @@
 from ._ordered_set import OrderedSet
 from ._select import Select
 from ._stack import Stack
-from ._tensor_dict import (
-    EmptyTensorDict,
-    Gradients,
-    GradientVectors,
-    JacobianMatrices,
-    Jacobians,
-    TensorDict,
-)
@@ -1,13 +1,15 @@
 from torch import Tensor
 
-from ._base import Transform
-from ._tensor_dict import EmptyTensorDict, Gradients
+from ._base import TensorDict, Transform
 
 
-class Accumulate(Transform[Gradients, EmptyTensorDict]):
-    """Transform that accumulates gradients with respect to keys into their ``grad`` field."""
+class Accumulate(Transform):
+    """
+    Transform from Gradients to {} that accumulates gradients with respect to keys into their
+    ``grad`` field.
+    """
 
-    def __call__(self, gradients: Gradients) -> EmptyTensorDict:
+    def __call__(self, gradients: TensorDict) -> TensorDict:
         for key in gradients.keys():
             _check_expects_grad(key)
             if hasattr(key, "grad") and key.grad is not None:
@@ -19,7 +21,7 @@ def __call__(self, gradients: Gradients) -> EmptyTensorDict:
                 # (in case it was obtained via create_graph=True and a differentiable aggregator).
                 key.grad = gradients[key].clone()
 
-        return EmptyTensorDict()
+        return {}
 
     def check_keys(self, input_keys: set[Tensor]) -> set[Tensor]:
         return set()
 
@@ -7,15 +7,14 @@
 
 from torchjd.aggregation import Aggregator
 
-from ._base import RequirementError, Transform
+from ._base import RequirementError, TensorDict, Transform
 from ._ordered_set import OrderedSet
-from ._tensor_dict import EmptyTensorDict, Gradients, GradientVectors, JacobianMatrices, Jacobians
 
 _KeyType = TypeVar("_KeyType", bound=Hashable)
 _ValueType = TypeVar("_ValueType")
 
 
-class Aggregate(Transform[Jacobians, Gradients]):
+class Aggregate(Transform):
     """
     Transform aggregating Jacobians into Gradients.
 
@@ -35,14 +34,14 @@ def __init__(self, aggregator: Aggregator, key_order: OrderedSet[Tensor]):
         self._aggregator_str = str(aggregator)
         self.transform = reshape << aggregate_matrices << matrixify
 
-    def __call__(self, input: Jacobians) -> Gradients:
+    def __call__(self, input: TensorDict) -> TensorDict:
         return self.transform(input)
 
     def check_keys(self, input_keys: set[Tensor]) -> set[Tensor]:
         return self.transform.check_keys(input_keys)
 
 
-class _AggregateMatrices(Transform[JacobianMatrices, GradientVectors]):
+class _AggregateMatrices(Transform):
     """
     Transform aggregating JacobiansMatrices into GradientsVectors.
 
@@ -57,7 +56,7 @@ def __init__(self, aggregator: Aggregator, key_order: OrderedSet[Tensor]):
         self.key_order = key_order
         self.aggregator = aggregator
 
-    def __call__(self, jacobian_matrices: JacobianMatrices) -> GradientVectors:
+    def __call__(self, jacobian_matrices: TensorDict) -> TensorDict:
         """
         Concatenates the provided ``jacobian_matrices`` into a single matrix and aggregates it using
         the ``aggregator``. Returns the dictionary mapping each key from ``jacobian_matrices`` to
@@ -92,15 +91,15 @@ def _select_ordered_subdict(
     @staticmethod
     def _aggregate_group(
         jacobian_matrices: OrderedDict[Tensor, Tensor], aggregator: Aggregator
-    ) -> GradientVectors:
+    ) -> TensorDict:
         """
         Unites the jacobian matrices and aggregates them using an
         :class:`~torchjd.aggregation._aggregator_bases.Aggregator`. Returns the obtained gradient
         vectors.
         """
 
         if len(jacobian_matrices) == 0:
-            return EmptyTensorDict()
+            return {}
 
         united_jacobian_matrix = _AggregateMatrices._unite(jacobian_matrices)
         united_gradient_vector = aggregator(united_jacobian_matrix)
@@ -114,39 +113,39 @@ def _unite(jacobian_matrices: OrderedDict[Tensor, Tensor]) -> Tensor:
     @staticmethod
     def _disunite(
         united_gradient_vector: Tensor, jacobian_matrices: OrderedDict[Tensor, Tensor]
-    ) -> GradientVectors:
+    ) -> TensorDict:
         gradient_vectors = {}
         start = 0
         for key, jacobian_matrix in jacobian_matrices.items():
             end = start + jacobian_matrix.shape[1]
             current_gradient_vector = united_gradient_vector[start:end]
             gradient_vectors[key] = current_gradient_vector
             start = end
-        return GradientVectors(gradient_vectors)
+        return gradient_vectors
 
 
-class _Matrixify(Transform[Jacobians, JacobianMatrices]):
+class _Matrixify(Transform):
     """Transform reshaping Jacobians into JacobianMatrices."""
 
-    def __call__(self, jacobians: Jacobians) -> JacobianMatrices:
+    def __call__(self, jacobians: TensorDict) -> TensorDict:
         jacobian_matrices = {
             key: jacobian.view(jacobian.shape[0], -1) for key, jacobian in jacobians.items()
         }
-        return JacobianMatrices(jacobian_matrices)
+        return jacobian_matrices
 
     def check_keys(self, input_keys: set[Tensor]) -> set[Tensor]:
         return input_keys
 
 
-class _Reshape(Transform[GradientVectors, Gradients]):
+class _Reshape(Transform):
     """Transform reshaping GradientVectors into Gradients."""
 
-    def __call__(self, gradient_vectors: GradientVectors) -> Gradients:
+    def __call__(self, gradient_vectors: TensorDict) -> TensorDict:
         gradients = {
             key: gradient_vector.view(key.shape)
             for key, gradient_vector in gradient_vectors.items()
         }
-        return Gradients(gradients)
+        return gradients
 
     def check_keys(self, input_keys: set[Tensor]) -> set[Tensor]:
         return input_keys
@@ -2,11 +2,21 @@
 
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
-from typing import Generic
+from typing import TypeAlias
 
 from torch import Tensor
 
-from ._tensor_dict import _A, _B, _C, EmptyTensorDict, _least_common_ancestor
+TensorDict: TypeAlias = dict[Tensor, Tensor]
+# Some interesting cases of TensorDict that are worth defining informally (for performance reasons):
+# Gradients: A TensorDict in which the shape of each value must be the same as the shape of its
+#   corresponding key.
+# Jacobians: A TensorDict in which the values must all have the same first dimension and the rest of
+#   the shape of each value must be the same as the shape of its corresponding key.
+# GradientVectors: A TensorDict containing flattened gradients: the values must be vectors with the
+#   same number of elements as their corresponding key.
+# JacobianMatrices: A TensorDict containing matrixified (flattened into matrix shape) jacobians: the
+#   values must be matrices with a unique first dimension and with a second dimension equal to the
+#   number of elements of their corresponding key.
 
 
 class RequirementError(ValueError):
@@ -15,23 +25,23 @@ class RequirementError(ValueError):
     pass
 
 
-class Transform(Generic[_B, _C], ABC):
+class Transform(ABC):
     """
     Abstract base class for all transforms. Transforms are elementary building blocks of a jacobian
     descent backward phase. A transform maps a TensorDict to another.
     """
 
-    def compose(self, other: Transform[_A, _B]) -> Transform[_A, _C]:
+    def compose(self, other: Transform) -> Transform:
         return Composition(self, other)
 
-    def conjunct(self, other: Transform[_B, _C]) -> Transform[_B, _C]:
+    def conjunct(self, other: Transform) -> Transform:
         return Conjunction([self, other])
 
     def __str__(self) -> str:
         return type(self).__name__
 
     @abstractmethod
-    def __call__(self, input: _B) -> _C:
+    def __call__(self, input: TensorDict) -> TensorDict:
         """Applies the transform to the input."""
 
     @abstractmethod
@@ -51,22 +61,22 @@ def check_keys(self, input_keys: set[Tensor]) -> set[Tensor]:
     __or__ = conjunct
 
 
-class Composition(Transform[_B, _C]):
+class Composition(Transform):
     """
     Transform corresponding to the composition of two transforms inner and outer.
 
     :param inner: The transform to apply first, to the input.
     :param outer: The transform to apply second, to the result of ``inner``.
     """
 
-    def __init__(self, outer: Transform[_A, _C], inner: Transform[_B, _A]):
+    def __init__(self, outer: Transform, inner: Transform):
         self.outer = outer
         self.inner = inner
 
     def __str__(self) -> str:
         return str(self.outer) + " ∘ " + str(self.inner)
 
-    def __call__(self, input: _B) -> _C:
+    def __call__(self, input: TensorDict) -> TensorDict:
         intermediate = self.inner(input)
         return self.outer(intermediate)
 
@@ -76,15 +86,15 @@ def check_keys(self, input_keys: set[Tensor]) -> set[Tensor]:
         return output_keys
 
 
-class Conjunction(Transform[_B, _C]):
+class Conjunction(Transform):
     """
     Transform applying several transforms to the same input, and combining the results (by union)
     into a single TensorDict.
 
     :param transforms: The transforms to apply. Their outputs should have disjoint sets of keys.
     """
 
-    def __init__(self, transforms: Sequence[Transform[_B, _C]]):
+    def __init__(self, transforms: Sequence[Transform]):
         self.transforms = transforms
 
     def __str__(self) -> str:
@@ -97,14 +107,11 @@ def __str__(self) -> str:
                 strings.append(s)
         return "(" + " | ".join(strings) + ")"
 
-    def __call__(self, tensor_dict: _B) -> _C:
-        tensor_dicts = [transform(tensor_dict) for transform in self.transforms]
-        output_type: type[_B] = EmptyTensorDict
-        output: _B = EmptyTensorDict()
-        for tensor_dict in tensor_dicts:
-            output_type = _least_common_ancestor(output_type, type(tensor_dict))
-            output |= tensor_dict
-        return output_type(output)
+    def __call__(self, tensor_dict: TensorDict) -> TensorDict:
+        union: TensorDict = {}
+        for transform in self.transforms:
+            union |= transform(tensor_dict)
+        return union
 
     def check_keys(self, input_keys: set[Tensor]) -> set[Tensor]:
         output_keys_list = [key for t in self.transforms for key in t.check_keys(input_keys)]