[AOTInductor] Generate kernels separately for const graph and main graph (pytorch#153040)

muchulee8 · pytorchmergebot · commit b3524080dccd · 2025-05-08T18:45:45.000Z
Summary: We should generate the kernel for const graph and main graph separately. The reason is that when we run autotuning, we would create separate kernel calls and we should make sure that main graph also contains the runner. Test Plan: python test/inductor/test_aot_inductor.py -k test_autotune_with_constant_folding Reviewers: Subscribers: Tasks: Tags: Differential Revision: [D74347765](https://our.internmc.facebook.com/intern/diff/D74347765) Pull Request resolved: pytorch#153040 Approved by: https://github.com/angelayi
diff --git a/test/inductor/test_aot_inductor.py b/test/inductor/test_aot_inductor.py
@@ -320,6 +320,31 @@ def forward(self, x):
         with config.patch({"aot_inductor.use_runtime_constant_folding": True}):
             self.check_model(Model(self.device), example_inputs)
 
+    def test_autotune_with_constant_folding(self):
+        class Model(torch.nn.Module):
+            def __init__(self, device) -> None:
+                super().__init__()
+                self.x = torch.randn(2048, 2048, dtype=torch.float16, device=device)
+
+            def _quantize(self, input):
+                return torch.abs(input)
+
+            def forward(self, y):
+                abs_weight = self._quantize(self.x)
+                abs_y = self._quantize(y)
+
+                return abs_weight, abs_y
+
+        input1 = (torch.rand(2048, 2048, dtype=torch.float16, device=self.device),)
+        model = Model(self.device).to(self.device)
+
+        _ = model(*input1)
+
+        ep = torch.export.export(model, input1, dynamic_shapes=None, strict=False)
+        torch._inductor.aoti_compile_and_package(
+            ep, inductor_configs={"aot_inductor.use_runtime_constant_folding": True}
+        )
+
     @requires_gpu
     def test_multi_device(self):
         if self.device == "cpu" and GPU_TYPE == "xpu":
diff --git a/torch/_inductor/graph.py b/torch/_inductor/graph.py
@@ -1947,12 +1947,7 @@ def init_wrapper_code(
         )
 
         if self.const_module:
-            # If we have const module, we could reuse the kernels
-            # This could avoid duplication and save time on doing recompilation (if Triton.)
             self.wrapper_code._names_iter = self.const_module.wrapper_code._names_iter
-            self.wrapper_code.src_to_kernel = (
-                self.const_module.wrapper_code.src_to_kernel
-            )
 
     def extract_autotune_inputs(
         self, example_inputs: list[Union[int, float, torch.Tensor]]