File tree Expand file tree Collapse file tree 1 file changed +6
-4
lines changed
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 1 file changed +6
-4
lines changed Original file line number Diff line number Diff line change @@ -895,8 +895,6 @@ def _create_warmup_request(
895895 return None
896896
897897 num_extra_decoding_steps = self ._get_num_extra_decoding_steps ()
898- if num_extra_decoding_steps > 0 :
899- return None # Disable autotuning for fused drafting loops for now.
900898
901899 if num_gen_requests > self .batch_size :
902900 return None
@@ -909,7 +907,10 @@ def _create_warmup_request(
909907 ctx_requests = []
910908 gen_requests = []
911909
912- max_seq_len = self .max_seq_len - 1
910+ # For drafting loops, reduce max_seq_len to leave room for extra decoding steps
911+ max_seq_len = self .max_seq_len - 1 - num_extra_decoding_steps
912+ if max_seq_len < 1 :
913+ return None # Not enough sequence length for drafting loop
913914 num_full_seqs = 0
914915 num_left_over_tokens = 0
915916
@@ -954,7 +955,8 @@ def _create_warmup_request(
954955 token_nums = ctx_token_nums ,
955956 is_gen = False ,
956957 max_num_draft_tokens = self .runtime_draft_len ,
957- use_mrope = self .use_mrope )
958+ use_mrope = self .use_mrope ,
959+ num_extra_decoding_steps = num_extra_decoding_steps )
958960
959961 if spec_resource_manager is not None :
960962 spec_resource_manager .add_dummy_requests (
You can’t perform that action at this time.
0 commit comments