NVIDIA
diff --git a/‎tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py‎
Lines changed: 1 addition & 0 deletions b/‎tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorrt_llm/_torch/pyexecutor/_util.py‎
Lines changed: 12 additions & 6 deletions b/‎tensorrt_llm/_torch/pyexecutor/_util.py‎
Lines changed: 12 additions & 6 deletions
@@ -436,6 +436,7 @@ def create_autodeploy_executor(ad_config: LlmArgs, tokenizer: Optional[Tokenizer
         max_total_draft_tokens=max_total_draft_tokens,
         max_num_sequences=max_num_sequences,
         max_beam_width=ad_config.max_beam_width,
+        disable_overlap_scheduler=ad_config.disable_overlap_scheduler,
     )
     sampler = TorchSampler(sampler_args)
 
 
@@ -824,11 +824,16 @@ def create_py_executor_instance(
         virtual_memory_pools=virtual_memory_pools)
 
 
-def create_torch_sampler_args(mapping: Mapping, *, max_seq_len: int,
-                              max_batch_size: int,
-                              speculative_config: SpeculativeConfig,
-                              max_beam_width: int,
-                              disable_flashinfer_sampling: bool):
+def create_torch_sampler_args(
+    mapping: Mapping,
+    *,
+    max_seq_len: int,
+    max_batch_size: int,
+    speculative_config: SpeculativeConfig,
+    max_beam_width: int,
+    disable_overlap_scheduler: bool,
+    disable_flashinfer_sampling: bool,
+):
     max_num_sequences = max_batch_size * mapping.pp_size
     max_draft_len = (0 if speculative_config is None else
                      speculative_config.max_draft_len)
@@ -842,7 +847,7 @@ def create_torch_sampler_args(mapping: Mapping, *, max_seq_len: int,
         max_num_sequences=max_num_sequences,
         max_beam_width=max_beam_width,
         disable_flashinfer_sampling=disable_flashinfer_sampling,
-    )
+        disable_overlap_scheduler=disable_overlap_scheduler)
 
 
 def instantiate_sampler(
@@ -865,6 +870,7 @@ def instantiate_sampler(
         max_batch_size=max_batch_size,
         speculative_config=speculative_config,
         max_beam_width=max_beam_width,
+        disable_overlap_scheduler=llm_args.disable_overlap_scheduler,
         disable_flashinfer_sampling=disable_flashinfer_sampling,
     )
     decoding_mode = get_decoding_mode(decoding_config=decoding_config,
Original file line number	Diff line number	Diff line change
`@@ -436,6 +436,7 @@ def create_autodeploy_executor(ad_config: LlmArgs, tokenizer: Optional[Tokenizer`
`436`	`436`	`max_total_draft_tokens=max_total_draft_tokens,`
`437`	`437`	`max_num_sequences=max_num_sequences,`
`438`	`438`	`max_beam_width=ad_config.max_beam_width,`
	`439`	`+ disable_overlap_scheduler=ad_config.disable_overlap_scheduler,`
`439`	`440`	`)`
`440`	`441`	`sampler = TorchSampler(sampler_args)`
`441`	`442`