[scan] disable functionalization key in backward tracing (pytorch#154343)

ydwu4 · pytorchmergebot · commit c8c892b4a50f · 2025-06-05T20:06:33.000Z
Previously, we didn't disable functionalization key when materializing backward graph. This causes the torch.zeros_like call for the case where grad is None to return a functional tensor that's not tracked by the proxy tensor mode. This PR fixes it by putting the tracing code under disable functionalization ctx manager. Fixes pytorch#153437. Pull Request resolved: pytorch#154343 Approved by: https://github.com/zou3519
diff --git a/test/inductor/test_control_flow.py b/test/inductor/test_control_flow.py
@@ -1642,6 +1642,18 @@ def accumulate_chunk(input_chunk, target_chunk):
                 torch.cat(grad_inputs, dim=0) / chunks,
             )
 
+    class ScanWithClamp(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+
+        def forward(self, scan_op, initial, xs):
+            def step(h_prev, x_t):
+                h_next = (h_prev + x_t).clamp(min=0.1)
+                return h_next, h_next.clone()
+
+            final, ys = scan_op(step, initial, xs)
+            return final, ys
+
 
 class ScanTests(TestCase):
     def _run_test(
@@ -1824,6 +1836,24 @@ def test_scan_compare_chunked_ce_with_no_scan(self, device, dynamic):
                 device=device,
             )
 
+    @requires_gpu
+    @parametrize("device", ["cpu", GPU_TYPE])
+    @parametrize("dynamic", [True, False])
+    @torch._dynamo.config.patch("capture_scalar_outputs", True)
+    def test_scan_with_clamp(self, device, dynamic):
+        B = 4
+        T = 8
+        H = 16
+        self._run_test(
+            model=ScanModels.ScanWithClamp(),
+            inputs=(
+                torch.randn((B, H)),
+                torch.randn((T, B, H), requires_grad=True),
+            ),
+            device=device,
+            dynamic=dynamic,
+        )
+
 
 class MapModels:
     class Simple(torch.nn.Module):
diff --git a/torch/_higher_order_ops/utils.py b/torch/_higher_order_ops/utils.py
@@ -1007,13 +1007,13 @@ def _materialize_as_graph_inner():
         with suspend_functionalization(), disable_functional_mode():
             with disable_proxy_modes_tracing():
                 unfunc_t = [_from_fun(arg) for arg in args]
-        with contextlib.ExitStack() as stack:
-            stack.enter_context(
-                torch._C._ForceDispatchKeyGuard(include_key_set, exclude_key_set),
-            )
-            if force_enable_grad:
-                stack.enter_context(torch.enable_grad())
-            return _maybe_reenter_make_fx(fn)(*unfunc_t)
+            with contextlib.ExitStack() as stack:
+                stack.enter_context(
+                    torch._C._ForceDispatchKeyGuard(include_key_set, exclude_key_set),
+                )
+                if force_enable_grad:
+                    stack.enter_context(torch.enable_grad())
+                return _maybe_reenter_make_fx(fn)(*unfunc_t)
 
     gm = _materialize_as_graph_inner()
     assert gm is not None