chore: remove torch_compile prefix for TorchCompileConfig field members (NVIDIA#5261)

nv-guomingz · web-flow · commit 6a388b105a2c · 2025-06-19T09:21:51.000+08:00
Signed-off-by: nv-guomingz &lt;137257613+nv-guomingz@users.noreply.github.com&gt;
diff --git a/examples/pytorch/quickstart_advanced.py b/examples/pytorch/quickstart_advanced.py
@@ -200,9 +200,10 @@ def setup_llm(args):
         print_iter_log=args.print_iter_log,
         enable_iter_perf_stats=args.print_iter_log,
         torch_compile_config=TorchCompileConfig(
-            torch_compile_fullgraph=args.use_torch_compile,
-            torch_compile_inductor_enabled=args.use_torch_compile,
-            torch_compile_piecewise_cuda_graph=args.use_piecewise_cuda_graph)
+            enable_fullgraph=args.use_torch_compile,
+            enable_inductor=args.use_torch_compile,
+            enable_piecewise_cuda_graph= \
+                args.use_piecewise_cuda_graph)
         if args.use_torch_compile else None,
         moe_backend=args.moe_backend,
         enable_trtllm_sampler=args.enable_trtllm_sampler,
diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py
@@ -1604,18 +1604,18 @@ class TorchCompileConfig(BaseModel):
     """
     Configuration for torch.compile.
     """
-    torch_compile_fullgraph: bool = Field(
+    enable_fullgraph: bool = Field(
         default=True,
         description="Enable full graph compilation in torch.compile.")
 
-    torch_compile_inductor_enabled: bool = Field(
+    enable_inductor: bool = Field(
         default=False, description="Enable inductor backend in torch.compile.")
 
-    torch_compile_piecewise_cuda_graph: bool = Field(
+    enable_piecewise_cuda_graph: bool = Field(
         default=False,
         description="Enable piecewise CUDA graph in torch.compile.")
 
-    torch_compile_enable_userbuffers: bool = Field(
+    enable_userbuffers: bool = Field(
         default=True,
         description=
         "When torch compile is enabled, userbuffers is enabled by default.")
@@ -1794,17 +1794,15 @@ def get_pytorch_backend_config(self) -> "PyTorchConfig":
             enable_iter_req_stats=self.enable_iter_req_stats,
             print_iter_log=self.print_iter_log,
             torch_compile_enabled=bool(self.torch_compile_config is not None),
-            torch_compile_fullgraph=self.torch_compile_config.
-            torch_compile_fullgraph
+            torch_compile_fullgraph=self.torch_compile_config.enable_fullgraph
             if self.torch_compile_config is not None else True,
             torch_compile_inductor_enabled=self.torch_compile_config.
-            torch_compile_inductor_enabled
-            if self.torch_compile_config is not None else False,
+            enable_inductor if self.torch_compile_config is not None else False,
             torch_compile_piecewise_cuda_graph=self.torch_compile_config.
-            torch_compile_piecewise_cuda_graph
+            enable_piecewise_cuda_graph
             if self.torch_compile_config is not None else False,
             torch_compile_enable_userbuffers=self.torch_compile_config.
-            torch_compile_enable_userbuffers
+            enable_userbuffers
             if self.torch_compile_config is not None else True,
             autotuner_enabled=self.autotuner_enabled,
             enable_layerwise_nvtx_marker=self.enable_layerwise_nvtx_marker,
diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -80,7 +80,7 @@ def test_chunked_prefill(self, attn_backend):
     @parametrize_with_ids("attn_backend", ["TRTLLM", "FLASHINFER"])
     def test_bfloat16(self, attn_backend, torch_compile):
         torch_compile_config = TorchCompileConfig(
-            torch_compile_fullgraph=True) if torch_compile else None
+            enable_fullgraph=True) if torch_compile else None
         pytorch_config = dict(
             torch_compile_config=torch_compile_config,
             cuda_graph_padding_enabled=torch_compile,
@@ -109,7 +109,7 @@ def test_bfloat16_4gpus(self, tp_size, pp_size, attn_backend,
                 "Issue: Unfusing flashinfer_fused_add_rmsnorm causes outputs to be "
                 "discarded at graph breaks.")
         torch_compile_config = TorchCompileConfig(
-            torch_compile_fullgraph=True) if torch_compile else None
+            enable_fullgraph=True) if torch_compile else None
         pytorch_config = dict(
             torch_compile_config=torch_compile_config,
             cuda_graph_padding_enabled=torch_compile,
@@ -136,7 +136,7 @@ def test_bfloat16_4gpus(self, tp_size, pp_size, attn_backend,
     def test_fp8(self, fp8kv, attn_backend, torch_compile):
         quant_config = QuantConfig(QuantAlgo.FP8)
         torch_compile_config = TorchCompileConfig(
-            torch_compile_fullgraph=True) if torch_compile else None
+            enable_fullgraph=True) if torch_compile else None
         pytorch_config = dict(
             torch_compile_config=torch_compile_config,
             cuda_graph_padding_enabled=torch_compile,
@@ -177,7 +177,7 @@ def test_fp8_4gpus(self, tp_size, pp_size, fp8kv, attn_backend,
                 "discarded at graph breaks.")
         quant_config = QuantConfig(QuantAlgo.FP8)
         torch_compile_config = TorchCompileConfig(
-            torch_compile_fullgraph=True) if torch_compile else None
+            enable_fullgraph=True) if torch_compile else None
         pytorch_config = dict(
             torch_compile_config=torch_compile_config,
             cuda_graph_padding_enabled=torch_compile,
@@ -505,9 +505,8 @@ def test_bfloat16(self, mtp_nextn, attention_dp, cuda_graph,
             pytest.skip("https://nvbugs/5252559")
         kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
         torch_compile_config = TorchCompileConfig(
-            torch_compile_fullgraph=True,
-            torch_compile_piecewise_cuda_graph=cuda_graph
-        ) if torch_compile else None
+            enable_fullgraph=True,
+            enable_piecewise_cuda_graph=cuda_graph) if torch_compile else None
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
             use_cuda_graph=cuda_graph,
@@ -552,9 +551,8 @@ def test_bfloat16_4gpus(self, tp_size, pp_size, ep_size, mtp_nextn,
             pytest.skip("PP with torch.compile is not supported yet.")
         kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
         torch_compile_config = TorchCompileConfig(
-            torch_compile_fullgraph=True,
-            torch_compile_piecewise_cuda_graph=cuda_graph
-        ) if torch_compile else None
+            enable_fullgraph=True,
+            enable_piecewise_cuda_graph=cuda_graph) if torch_compile else None
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
             use_cuda_graph=cuda_graph,
@@ -597,9 +595,8 @@ def test_fp8_block_scales(self, mtp_nextn, fp8kv, attention_dp, cuda_graph,
             pytest.skip("https://nvbugs/5252559")
         kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
         torch_compile_config = TorchCompileConfig(
-            torch_compile_fullgraph=True,
-            torch_compile_piecewise_cuda_graph=cuda_graph
-        ) if torch_compile else None
+            enable_fullgraph=True,
+            enable_piecewise_cuda_graph=cuda_graph) if torch_compile else None
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
             use_cuda_graph=cuda_graph,
@@ -719,9 +716,8 @@ def test_fp8_block_scales_4gpus(self, tp_size, pp_size, ep_size, mtp_nextn,
             pytest.skip("PP with torch.compile is not supported yet.")
         kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
         torch_compile_config = TorchCompileConfig(
-            torch_compile_fullgraph=True,
-            torch_compile_piecewise_cuda_graph=cuda_graph
-        ) if torch_compile else None
+            enable_fullgraph=True,
+            enable_piecewise_cuda_graph=cuda_graph) if torch_compile else None
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
             use_cuda_graph=cuda_graph,
@@ -808,9 +804,8 @@ def test_nvfp4(self, fp8kv, attention_dp, cuda_graph, overlap_scheduler,
             pytest.skip("https://nvbugs/5252559")
         kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
         torch_compile_config = TorchCompileConfig(
-            torch_compile_fullgraph=True,
-            torch_compile_piecewise_cuda_graph=cuda_graph
-        ) if torch_compile else None
+            enable_fullgraph=True,
+            enable_piecewise_cuda_graph=cuda_graph) if torch_compile else None
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
             use_cuda_graph=cuda_graph,
@@ -866,9 +861,8 @@ def test_nvfp4_4gpus(self, fp8kv, attention_dp, cuda_graph,
             pytest.skip("https://nvbugs/5336321")
         kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
         torch_compile_config = TorchCompileConfig(
-            torch_compile_fullgraph=True,
-            torch_compile_piecewise_cuda_graph=cuda_graph
-        ) if torch_compile else None
+            enable_fullgraph=True,
+            enable_piecewise_cuda_graph=cuda_graph) if torch_compile else None
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
             use_cuda_graph=cuda_graph,