[aoti-cuda] Directly pass user input placeholders to torch._inductor.aot_compile (#14707)

larryliu0820 · web-flow · commit 0786faa693bc · 2025-10-01T00:02:41.000-07:00
torch._inductor.aot_compile Summary: As titled, this avoid issues like symint Test Plan: Reviewers: Subscribers: Tasks: Tags: ### Summary [PLEASE REMOVE] See [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests) for ExecuTorch PR guidelines. [PLEASE REMOVE] If this PR closes an issue, please add a `Fixes #<issue-id>` line. [PLEASE REMOVE] If this PR introduces a fix or feature that should be the upcoming release notes, please add a "Release notes: <area>" label. For a list of available release notes labels, check out [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests). ### Test plan [PLEASE REMOVE] How did you test this PR? Please write down any manual commands you used and note down tests that you have written if applicable.
diff --git a/backends/cuda/cuda_backend.py b/backends/cuda/cuda_backend.py
@@ -117,18 +117,6 @@ def preprocess(
             if node.op == "placeholder" and node.name in user_input_names:
                 user_input_placeholders.append(node.meta["val"])
 
-        # Create pseudo user inputs using torch.randn and metadata from input placeholders
-        faked_user_inputs = []
-        for placeholder in user_input_placeholders:
-            if isinstance(placeholder, torch.Tensor):
-                # Generate fake input with same shape and dtype, on CUDA
-                fake_input = torch.randn(
-                    placeholder.shape, dtype=placeholder.dtype, device="cuda"
-                )
-                faked_user_inputs.append(fake_input)
-
-        faked_user_inputs = tuple(faked_user_inputs)
-
         options: dict[str, typing.Any] = {
             # Embed CUDA kernel binaries directly into the compiled shared object
             "aot_inductor.embed_kernel_binary": True,
@@ -145,7 +133,7 @@ def preprocess(
         }
 
         with collect_unsupported_fallback_kernels():
-            so_path = torch._inductor.aot_compile(edge_program_module, faked_user_inputs, options=options)  # type: ignore[arg-type]
+            so_path = torch._inductor.aot_compile(edge_program_module, tuple(user_input_placeholders), options=options)  # type: ignore[arg-type]
             if len(missing_fallback_kernels) > 0:
                 formatted_kernels = "\n  - ".join(sorted(missing_fallback_kernels))
                 raise RuntimeError(