keras-team
diff --git a/‎keras/src/trainers/compile_utils.py‎
Lines changed: 24 additions & 12 deletions b/‎keras/src/trainers/compile_utils.py‎
Lines changed: 24 additions & 12 deletions
diff --git a/‎keras/src/trainers/trainer_test.py‎
Lines changed: 10 additions & 0 deletions b/‎keras/src/trainers/trainer_test.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎keras/src/tree/torchtree_impl.py‎
Lines changed: 215 additions & 0 deletions b/‎keras/src/tree/torchtree_impl.py‎
Lines changed: 215 additions & 0 deletions
diff --git a/‎keras/src/tree/tree_api.py‎
Lines changed: 6 additions & 1 deletion b/‎keras/src/tree/tree_api.py‎
Lines changed: 6 additions & 1 deletion
@@ -690,17 +690,34 @@ def __call__(self, y_true, y_pred, sample_weight=None):
             return self.call(y_true, y_pred, sample_weight)
 
     def call(self, y_true, y_pred, sample_weight=None):
+        def resolve_path(path, object):
+            for _path in path:
+                object = object[_path]
+            return object
+
         if not tree.is_nested(y_true) and not tree.is_nested(y_pred):
             # Fast path: single output case / no loss-tracking metric.
             if not self.built:
                 self.build(y_true, y_pred)
-            _, loss_fn, loss_weight, _ = self._flat_losses[0]
-            loss_value = ops.cast(
-                loss_fn(y_true, y_pred, sample_weight), dtype=self.dtype
-            )
-            if loss_weight is not None:
-                loss_value = ops.multiply(loss_value, loss_weight)
-            return loss_value
+            # Although we are in the fast path, we still need to iterate
+            # through the losses to prevent the torch compiler from failing.
+            loss_values = []
+            for path, loss_fn, loss_weight, _ in self._flat_losses:
+                y_t, y_p = (
+                    resolve_path(path, y_true),
+                    resolve_path(path, y_pred),
+                )
+                if sample_weight is not None and tree.is_nested(sample_weight):
+                    _sample_weight = resolve_path(path, sample_weight)
+                else:
+                    _sample_weight = sample_weight
+                value = ops.cast(
+                    loss_fn(y_t, y_p, _sample_weight), dtype=self.dtype
+                )
+                if loss_weight is not None:
+                    value = ops.multiply(value, loss_weight)
+                loss_values.append(value)
+            return loss_values[0]
 
         try:
             tree.assert_same_structure(y_pred, y_true)
@@ -779,11 +796,6 @@ def call(self, y_true, y_pred, sample_weight=None):
         # Iterate all losses in flat form.
         loss_values = []
 
-        def resolve_path(path, object):
-            for _path in path:
-                object = object[_path]
-            return object
-
         for (path, loss_fn, loss_weight, _), metric in zip(
             self._flat_losses, metrics
         ):
 
@@ -1869,6 +1869,11 @@ def test_training_arg(self):
     )
     @pytest.mark.requires_trainable_backend
     def test_on_batch_methods(self, run_eagerly, jit_compile):
+        if backend.backend() == "torch" and jit_compile:
+            self.skipTest(
+                "test_on_batch with jit_compile=True not supported in torch "
+                "backend yet."
+            )
         model = ExampleModel(units=3)
         x = np.ones((100, 4))
         y = np.zeros((100, 3))
@@ -1925,6 +1930,11 @@ def test_on_batch_methods(self, run_eagerly, jit_compile):
         ]
     )
     def test_on_batch_methods_without_training(self, run_eagerly, jit_compile):
+        if backend.backend() == "torch" and jit_compile:
+            self.skipTest(
+                "test_on_batch with jit_compile=True not supported in torch "
+                "backend yet."
+            )
         model = ExampleModel(units=3)
         x = np.ones((100, 4))
         y = np.zeros((100, 3))
 
@@ -0,0 +1,215 @@
+from collections import defaultdict
+
+from torch.utils import _pytree as torch_tree
+
+
+def register_tree_node_class(cls):
+    torch_tree.register_pytree_node(
+        cls,
+        flatten_fn=lambda x: x.torchtree_flatten(),
+        unflatten_fn=cls.torchtree_unflatten,
+        serialized_type_name=f"{cls.__name__}",
+        flatten_with_keys_fn=lambda x: x.torchtree_flatten_with_keys(),
+    )
+    return cls
+
+
+def _tree_is_leaf(tree, is_leaf=None):
+    if is_leaf is not None and is_leaf(tree):
+        return True
+    return torch_tree._get_node_type(tree) not in torch_tree.SUPPORTED_NODES
+
+
+def _dict_to_ordered_dict(structure):
+    # We need to sort dict and defaultdict to ensure a deterministic order that
+    # that is consistent with other tree implementations.
+    def func(x):
+        if type(x) is dict:
+            return {k: x[k] for k in sorted(x.keys())}
+        elif type(x) is defaultdict:
+            return defaultdict(
+                x.default_factory,
+                {k: x[k] for k in sorted(x.keys())},
+            )
+        return None
+
+    def traverse_children():
+        children, treedef = torch_tree.tree_flatten(
+            structure,
+            is_leaf=lambda x: x is not structure,
+        )
+        if treedef.num_nodes == 1 and treedef.num_leaves == 1:
+            return structure
+        else:
+            return torch_tree.tree_unflatten(
+                [_dict_to_ordered_dict(c) for c in children],
+                treedef,
+            )
+
+    ret = func(structure)
+    if ret is None:
+        return traverse_children()
+    if isinstance(ret, type) and ret.__name__ == "MAP_TO_NONE":
+        return None
+    return ret
+
+
+def is_nested(structure):
+    return not _tree_is_leaf(structure)
+
+
+def traverse(func, structure, top_down=True):
+    def traverse_children():
+        children, treedef = torch_tree.tree_flatten(
+            structure,
+            is_leaf=lambda x: x is not structure,
+        )
+        if treedef.num_nodes == 1 and treedef.num_leaves == 1:
+            return structure
+        else:
+            return torch_tree.tree_unflatten(
+                [traverse(func, c, top_down=top_down) for c in children],
+                treedef,
+            )
+
+    structure = _dict_to_ordered_dict(structure)
+    if top_down:
+        ret = func(structure)
+        if ret is None:
+            return traverse_children()
+    else:
+        traversed_structure = traverse_children()
+        ret = func(traversed_structure)
+        if ret is None:
+            return traversed_structure
+    # Detect MAP_TO_NONE without tree_api import to avoid circular import.
+    if isinstance(ret, type) and ret.__name__ == "MAP_TO_NONE":
+        return None
+    return ret
+
+
+def flatten(structure):
+    # We need to first sort dicts to ensure a deterministic order that is
+    # consistent with other tree implementations.
+    structure = _dict_to_ordered_dict(structure)
+    leaves, _ = torch_tree.tree_flatten(structure)
+    return leaves
+
+
+def flatten_with_path(structure):
+    # We need to first sort dicts to ensure a deterministic order that is
+    # consistent with other tree implementations.
+    structure = _dict_to_ordered_dict(structure)
+    leaves_with_path, _ = torch_tree.tree_flatten_with_path(structure)
+    results = []
+    fields = []
+    for key, leaf in leaves_with_path:
+        for k in key:
+            if isinstance(k, torch_tree.GetAttrKey) and k.name not in fields:
+                fields.append(k.name)
+    fields = sorted(fields)
+    field_to_idx = {f: i for i, f in enumerate(fields)}
+    for key, leaf in leaves_with_path:
+        # Convert to a tuple of keys.
+        path = []
+        for k in key:
+            if isinstance(k, torch_tree.SequenceKey):
+                path.append(k.idx)
+            elif isinstance(k, torch_tree.MappingKey):
+                path.append(k.key)
+            elif isinstance(k, torch_tree.GetAttrKey):
+                path.append(field_to_idx[k.name])
+        results.append((tuple(path), leaf))
+    return results
+
+
+def map_structure(func, *structures, none_is_leaf=True):
+    if not structures:
+        raise ValueError("Must provide at least one structure")
+
+    map_func = func
+    if not none_is_leaf:
+
+        def func_skipping_none(*args):
+            # Check if the reference entry (first one) is None
+            if args[0] is None:
+                if not all(s is None for s in args):
+                    raise ValueError(
+                        "Structure mismatch: some arguments are None, others "
+                        f"are not. Received arguments: {args}."
+                    )
+                return None
+            return func(*args)
+
+        map_func = func_skipping_none
+
+    return torch_tree.tree_map(map_func, *structures)
+
+
+def map_structure_up_to(shallow_structure, func, *structures):
+    if not structures:
+        raise ValueError("Must provide at least one structure")
+
+    # Add check that `shallow_structure` really is the shallowest.
+    # Also only call `func` on `structures` and not `shallow_structure`.
+    def func_with_check_without_shallow_structure(shallow, *args):
+        if not _tree_is_leaf(shallow):
+            raise ValueError("Structures don't have the same nested structure.")
+        return func(*args)
+
+    return torch_tree.tree_map(
+        func_with_check_without_shallow_structure,
+        shallow_structure,
+        *structures,
+    )
+
+
+def assert_same_structure(a, b):
+    def check(a_leaf, b_leaf):
+        if not _tree_is_leaf(a_leaf) or not _tree_is_leaf(b_leaf):
+            raise ValueError("Structures don't have the same nested structure.")
+        return None
+
+    torch_tree.tree_map(check, a, b)
+
+
+def assert_same_paths(a, b):
+    a_paths = set([path for path, _ in flatten_with_path(a)])
+    b_paths = set([path for path, _ in flatten_with_path(b)])
+
+    if a_paths != b_paths:
+        msg = "`a` and `b` don't have the same paths."
+        a_diff = a_paths.difference(b_paths)
+        if a_diff:
+            msg += f"\nPaths in `a` missing in `b`:\n{a_diff}"
+        b_diff = b_paths.difference(a_paths)
+        if b_diff:
+            msg += f"\nPaths in `b` missing in `a`:\n{b_diff}"
+        raise ValueError(msg)
+
+
+def pack_sequence_as(structure, flat_sequence):
+    # We need to first sort dicts to ensure a deterministic order that is
+    # consistent with other tree implementations.
+    structure = _dict_to_ordered_dict(structure)
+    _, treespec = torch_tree.tree_flatten(structure)
+    return torch_tree.tree_unflatten(flat_sequence, treespec)
+
+
+def lists_to_tuples(structure):
+    def list_to_tuple(instance):
+        return tuple(instance) if isinstance(instance, list) else None
+
+    return traverse(list_to_tuple, structure, top_down=False)
+
+
+def map_shape_structure(func, structure):
+    def is_shape_tuple(x):
+        return isinstance(x, (list, tuple)) and all(
+            isinstance(e, (int, type(None))) for e in x
+        )
+
+    # We need to first sort dicts to ensure a deterministic order that is
+    # consistent with other tree implementations.
+    structure = _dict_to_ordered_dict(structure)
+    return torch_tree.tree_map(func, structure, is_leaf=is_shape_tuple)
@@ -1,10 +1,15 @@
 import warnings
 
 from keras.src.api_export import keras_export
+from keras.src.backend.config import backend
 from keras.src.utils.module_utils import dmtree
 from keras.src.utils.module_utils import optree
 
-if optree.available:
+if backend() == "torch":
+    # torchtree_impl is especially used for Torch backend, as it works better
+    # with torch.compile.
+    from keras.src.tree import torchtree_impl as tree_impl
+elif optree.available:
     from keras.src.tree import optree_impl as tree_impl
 elif dmtree.available:
     from keras.src.tree import dmtree_impl as tree_impl