[dynamo][invoke_subgraph] Input aliasing and mutation check in Dynamo (pytorch#148953)

anijain2305 · pytorchmergebot · commit c9ebf517c2c5 · 2025-03-28T03:50:07.000Z
Pull Request resolved: pytorch#148953 Approved by: https://github.com/zou3519 ghstack dependencies: pytorch#149087, pytorch#149667, pytorch#150036
diff --git a/test/dynamo/test_base_hop.py b/test/dynamo/test_base_hop.py
@@ -159,10 +159,15 @@ def inner2(x, y):
         def f(inner, x, y):
             return invoke_quant_test(inner, x, y, scheme="nf4")
 
-        with self.assertRaisesRegex(RuntimeError, "aliases of the inputs"):
+        with self.assertRaisesRegex(
+            RuntimeError, "Encountered aliasing during higher order op tracing for HOP"
+        ):
             f(inner, x, y)
 
-        with self.assertRaisesRegex(RuntimeError, "inputs are mutated"):
+        with self.assertRaisesRegex(
+            RuntimeError,
+            "Encountered input mutation during higher order op tracing for HOP",
+        ):
             f(inner2, x, y)
 
     def test_eager_call(self):
diff --git a/test/higher_order_ops/test_invoke_subgraph.py b/test/higher_order_ops/test_invoke_subgraph.py
@@ -115,7 +115,58 @@ def fn(x, y):
 
         x = torch.randn(8, requires_grad=True)
         y = torch.randn(8, requires_grad=True)
-        ref = gn(x, y)
+        ref = fn(x, y)
+
+        x_clone = x.detach().clone().requires_grad_(True)
+        y_clone = y.detach().clone().requires_grad_(True)
+        res = torch.compile(fn, backend="inductor", fullgraph=True)(x_clone, y_clone)
+
+        # Run backward
+        ref.sum().backward()
+        res.sum().backward()
+
+        self.assertEqual(ref, res)
+        self.assertEqual(x.grad, x_clone.grad)
+        self.assertEqual(y.grad, y_clone.grad)
+
+    def test_list(self):
+        @mark_compile_region
+        def gn(x, y):
+            return [torch.mul(x, y), torch.add(x, y)]
+
+        def fn(x, y):
+            lst = gn(x, y)
+            lst.append(torch.sin(x))
+            return lst[0] + lst[1] + lst[2]
+
+        x = torch.randn(8, requires_grad=True)
+        y = torch.randn(8, requires_grad=True)
+        ref = fn(x, y)
+
+        x_clone = x.detach().clone().requires_grad_(True)
+        y_clone = y.detach().clone().requires_grad_(True)
+        res = torch.compile(fn, backend="inductor", fullgraph=True)(x_clone, y_clone)
+
+        # Run backward
+        ref.sum().backward()
+        res.sum().backward()
+
+        self.assertEqual(ref, res)
+        self.assertEqual(x.grad, x_clone.grad)
+        self.assertEqual(y.grad, y_clone.grad)
+
+    def test_tuple_of_tuple(self):
+        @mark_compile_region
+        def gn(x, y):
+            return ((torch.mul(x, y),), torch.add(x, y))
+
+        def fn(x, y):
+            tup = gn(x, y)
+            return tup[0][0] + tup[1]
+
+        x = torch.randn(8, requires_grad=True)
+        y = torch.randn(8, requires_grad=True)
+        ref = fn(x, y)
 
         x_clone = x.detach().clone().requires_grad_(True)
         y_clone = y.detach().clone().requires_grad_(True)
@@ -477,7 +528,29 @@ def fn(x, y):
 
         opt_fn = torch.compile(fn, backend="inductor", fullgraph=True)
         with self.assertRaisesRegex(
-            torch._dynamo.exc.Unsupported, "NYI: invoke_subgraph with aliasing"
+            torch._dynamo.exc.Unsupported,
+            "Encountered input mutation during higher order op tracing for HOP - invoke_subgraph",
+        ):
+            opt_fn(x, y)
+
+    def test_input_mutation_inference_mode(self):
+        @mark_compile_region
+        def gn(x, y):
+            x.add_(1)
+            return torch.mul(x, y)
+
+        def fn(x, y):
+            z = torch.cos(x)
+            with torch.inference_mode():
+                return gn(torch.cos(z), y)
+
+        opt_fn = torch.compile(fn, backend="inductor", fullgraph=True)
+        x = torch.randn(8, requires_grad=False)
+        y = torch.randn(8, requires_grad=False)
+
+        with self.assertRaisesRegex(
+            torch._dynamo.exc.Unsupported,
+            "Encountered input mutation during higher order op tracing",
         ):
             opt_fn(x, y)
 
@@ -520,7 +593,7 @@ def fn(x):
         ):
             opt_fn(x)
 
-    def test_input_aliasing(self):
+    def test_input_output_aliasing(self):
         @mark_compile_region
         def gn(x, y):
             return (x, torch.mul(x, y))
@@ -534,7 +607,73 @@ def fn(x, y):
 
         opt_fn = torch.compile(fn, backend="inductor", fullgraph=True)
         with self.assertRaisesRegex(
-            torch._dynamo.exc.Unsupported, "NYI: invoke_subgraph with aliasing"
+            torch._dynamo.exc.Unsupported,
+            "Encountered aliasing during higher order op tracing",
+        ):
+            opt_fn(x, y)
+
+    def test_input_input_aliasing(self):
+        @mark_compile_region
+        def gn(x, y):
+            return torch.mul(x, y)
+
+        def fn(x):
+            return gn(x, x.view(1, 8))
+
+        x = torch.randn(8, requires_grad=False)
+
+        opt_fn = torch.compile(fn, backend="inductor", fullgraph=True)
+        with self.assertRaisesRegex(
+            torch._dynamo.exc.Unsupported,
+            "Encountered aliasing during higher order op tracing",
+        ):
+            opt_fn(x)
+
+    def test_output_output_aliasing(self):
+        @mark_compile_region
+        def gn(x):
+            z = torch.cos(x)
+            return z, z.view(1, 8)
+
+        def fn(x):
+            return gn(x)
+
+        x = torch.randn(8, requires_grad=False)
+
+        opt_fn = torch.compile(fn, backend="inductor", fullgraph=True)
+        with self.assertRaisesRegex(
+            torch._dynamo.exc.Unsupported,
+            "Encountered aliasing during higher order op tracing",
+        ):
+            opt_fn(x)
+
+    def test_mod_attr_aliasing(self):
+        class MutateParam(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.a = torch.ones(8)
+
+            def forward(self, x):
+                self.a.add_(1)
+                return torch.mul(x, self.a)
+
+        @mark_compile_region
+        def gn(x):
+            return mod(x)
+
+        def fn(x, y):
+            return gn(x) * y
+
+        mod = MutateParam()
+        x = torch.randn(8, requires_grad=False)
+        y = torch.randn(8, requires_grad=False)
+
+        fn(x, y)
+
+        opt_fn = torch.compile(fn, backend="inductor", fullgraph=True)
+        with self.assertRaisesRegex(
+            torch._dynamo.exc.Unsupported,
+            "Encountered input mutation during higher order op tracing",
         ):
             opt_fn(x, y)
 
diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py
@@ -63,6 +63,7 @@
     ShapeEnv,
 )
 from torch.fx.passes.runtime_assert import insert_deferred_runtime_asserts
+from torch.multiprocessing.reductions import StorageWeakRef
 from torch.utils._ordered_set import OrderedSet
 from torch.utils._python_dispatch import is_traceable_wrapper_subclass
 
@@ -165,6 +166,18 @@ class VariableTrackerCacheKey:
     source: Source
 
 
+@dataclass(frozen=True)
+class AliasingInfo:
+    has_aliasing: bool
+    msg: str
+
+
+@dataclass(frozen=True)
+class MutationInfo:
+    has_mutation: bool
+    msg: str
+
+
 class VariableTrackerCache:
     def __init__(self):
         self.cache = {}
@@ -2023,6 +2036,13 @@ def __init__(self, output_graph, parent=None, is_export=False, source_target=Non
 
         # This is used to create a unique name for the placeholder
         self._used_names: OrderedSet[str] = OrderedSet()
+        # Stores the versions of the input tensors at the time they are inserted
+        # as placeholders in the graph. This is used to track input mutation.
+        self._input_versions_at_beginning: list[int] = []
+        if torch.is_inference_mode_enabled():
+            raise RuntimeError(
+                "Inference mode is supposed to be disabled during compilation. Please open an issue."
+            )
 
     # preserve original meta if it is available
     def _maybe_preserve_original_meta(self, tx, node):
@@ -2273,6 +2293,8 @@ def remove_node(self, node):
     def create_graph_input(
         self, name, type_expr, example_value, before=False, source=None
     ):
+        if isinstance(example_value, torch.Tensor):
+            self._input_versions_at_beginning.append(example_value._version)
         log.debug(
             "create_graph_input %s %s %s at debug_level %s before=%s",
             name,
@@ -2690,6 +2712,77 @@ def lookup_unbound_symbols(self, s: torch.SymInt) -> list[sympy.Symbol]:
         # Sort the symbols so that we can have a deterministic lifting order
         return sorted(to_be_bound, key=lambda s: s.name)
 
+    def has_input_mutation(self):
+        input_versions_at_beginning = self._input_versions_at_beginning
+        input_nodes = []
+
+        input_versions_at_end = []
+        for node in self.graph.nodes:
+            if node.op == "placeholder":
+                example_value = node.meta["example_value"]
+                if isinstance(example_value, torch.Tensor):
+                    input_versions_at_end.append(example_value._version)
+                    input_nodes.append(node)
+            else:
+                break
+
+        mutated_inputs = [
+            i
+            for i, (v1, v2) in enumerate(
+                zip(input_versions_at_beginning, input_versions_at_end)
+            )
+            if v1 != v2
+        ]
+
+        if len(mutated_inputs):
+            mutated_nodes = [input_nodes[i] for i in mutated_inputs]
+            msg = f"Input mutation detected at {mutated_nodes}"
+            return MutationInfo(True, msg)
+
+        return MutationInfo(False, "")
+
+    def has_aliasing(self):
+        input_storages: dict[StorageWeakRef, torch.fx.Node] = dict()
+
+        for node in self.graph.nodes:
+            if node.op == "placeholder":
+                example_value = node.meta["example_value"]
+                if isinstance(example_value, torch.Tensor):
+                    storage = StorageWeakRef(example_value._typed_storage())
+                    if storage in input_storages:
+                        # input-input aliasing
+                        msg = f"Input-to-input aliasing detected at nodes {input_storages[storage]} and {node}"
+                        return AliasingInfo(True, msg)
+                    input_storages[storage] = node
+            else:
+                break
+
+        output_storages: dict[StorageWeakRef, torch.fx.Node] = dict()
+        out_nodes = self.graph.find_nodes(op="output")[0]
+        for out_node in out_nodes.args[0]:
+            if out_node:
+                example_value = out_node.meta["example_value"]
+                assert not isinstance(example_value, list)
+                if isinstance(example_value, torch.Tensor):
+                    storage = StorageWeakRef(example_value._typed_storage())
+                    if storage in output_storages:
+                        # output-output aliasing
+                        msg = f"Output-to-output aliasing detected at nodes {output_storages[storage]} and {out_node}"
+                        return AliasingInfo(True, msg)
+                    output_storages[storage] = out_node
+
+        intersected_storages = input_storages.keys() & output_storages.keys()
+        if len(intersected_storages) > 0:
+            # input-output aliasing
+            aliased = [
+                (input_storages[s], output_storages[s]) for s in intersected_storages
+            ]
+            aliased = ", ".join([f"{i} and {o}" for i, o in aliased])
+            msg = f"Input-to-output aliasing detected at nodes {aliased}"
+            return AliasingInfo(True, msg)
+
+        return AliasingInfo(False, "")
+
 
 # NOTE: [HigherOrderOperator tracing design]
 # Ignoring HigherOrderOperators for a moment,
diff --git a/torch/_dynamo/variables/higher_order_ops.py b/torch/_dynamo/variables/higher_order_ops.py