Properly set mutable buffer lifespans (pytorch#12182)

JacobSzwejbka · facebook-github-bot · commit 065f3cae6062 · 2025-07-02T16:42:20.000-07:00
Summary:

Earlier iterations of mutable buffer memory planning relied on the insert copy_ pass to inject the placeholder node as the output. That is pretty hacky and doesn't compose well with the reinplacing pass. Fortunately we already have this pass so we can manually set the lifespan here to be infinite.

Reviewed By: nitish2112, larryliu0820

Differential Revision: D77618047
diff --git a/exir/memory_planning.py b/exir/memory_planning.py
@@ -299,14 +299,22 @@ def _is_inplace_node(node: torch.fx.Node) -> bool:
         )
     )
 
+def _is_mutable_buffer(node: torch.fx.Node, gs: Optional[ExportGraphSignature]) -> bool:
+    if gs is None:
+        return False
+    if node.target not in gs.inputs_to_buffers:
+        return False
+    buf = gs.inputs_to_buffers[node.target]
+    return buf in gs.buffers_to_mutate.values()
+
 
 def update_tensor_lifetime(
-    node: torch.fx.Node, spec: TensorSpec, node_idx: int
+    node: torch.fx.Node, spec: TensorSpec, node_idx: int, max_node_idx: int, gs: Optional[ExportGraphSignature] = None
 ) -> None:
     r"""
     Update the lifetime of the tensor to cover node_idx. A tensor's lifetime
     are represented by the index of the first and last node referring
-    that tensor in its inputs/outputs.
+    that tensor in its inputs/outputs. 
 
     Arguments:
         spec: the TensorSpec for the tensor
@@ -317,7 +325,12 @@ def update_tensor_lifetime(
         start = 0
     else:
         start = node_idx if start is None or start > node_idx else start
-    end = node_idx if end is None or end < node_idx else end
+
+    if node.op == "placeholder" and _is_mutable_buffer(node, gs):
+        # mutable buffers are never freed
+        end = max_node_idx
+    else:
+        end = node_idx if end is None or end < node_idx else end
     spec.lifetime = [start, end]
 
 
@@ -497,7 +510,7 @@ def update_all_tensors_lifetime(
     Set the lifetime for all the tensors encountered in the Fx graph.
     """
     specs = set()
-
+    max_node_idx = len(graph_module.graph.nodes) - 1
     for node_idx, node in enumerate(graph_module.graph.nodes):
         for spec in collect_specs_from_nodes(
             filter_nodes(itertools.chain([node], node.args, node.kwargs.values())),
@@ -509,7 +522,7 @@ def update_all_tensors_lifetime(
             do_assertion=False,
             ignore_dynamic_unbound_tensor=False,
         ):
-            update_tensor_lifetime(node, spec, node_idx)
+            update_tensor_lifetime(node, spec, node_idx, max_node_idx, graph_signature)
             specs.add(spec)
     return specs
 
diff --git a/exir/tests/test_memory_planning.py b/exir/tests/test_memory_planning.py
@@ -664,6 +664,33 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             .val.allocation_info.memory_offset_high,
         )
 
+    def test_mutable_buffers_infinite_lifespan(self) -> None:
+        class Simple(torch.nn.Module):
+            def __init__(self) -> None:
+                super().__init__()
+                self.register_buffer("state", torch.zeros(1))
+
+            def forward(self, x: torch.Tensor) -> torch.Tensor:
+                self.state.index_put_([torch.tensor([0]),], x)
+                y = x + self.state
+                z = x * y
+                return z
+
+        model = Simple()
+        inputs = (torch.ones(1),)
+
+        et = to_edge(export(model, inputs, strict=True)).to_executorch(ExecutorchBackendConfig(emit_mutable_buffer_names=True, run_reinplace_pass=True))
+
+        serialized_state = et.executorch_program.execution_plan[0].values[0].val
+        self.assertEqual(serialized_state.extra_tensor_info.fully_qualified_name, "state")
+        memory_base = serialized_state.allocation_info.memory_offset_low
+        memory_size = memory_base + 4  # 4 bytes for a single float
+        for value in et.executorch_program.execution_plan[0].values[1:]:
+            val = value.val
+            if hasattr(val, "allocation_info") and val.allocation_info is not None:
+                not_overlapping = val.allocation_info.memory_offset_low < memory_base or val.allocation_info.memory_offset_low >= memory_size
+                self.assertTrue(not_overlapping)
+
     def test_constants_not_memory_planned(self) -> None:
         class Simple(torch.nn.Module):
             def __init__(self) -> None: