Skip to content

Commit f414a28

Browse files
Merge branch 'main' into preempted_prompt
2 parents 06b9b4a + 1770639 commit f414a28

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

vllm_gaudi/extension/bucketing/exponential.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,9 @@ def get_decode_cfgs(self, max_num_seqs, block_size, max_num_batched_tokens, max_
8686
decode_bs_bucket_cfg = [1, 2, max_num_seqs, decode_bs_limit]
8787
decode_query_bucket_cfg = [1, 1, 1, 1]
8888
max_decode_block_limit = math.ceil(math.log2(max_blocks)) + 1
89+
max_factor = int(max_blocks * max_num_seqs // 4)
8990
max_decode_blocks = max_blocks if use_contiguous_pa else \
90-
min((max_model_len // block_size * max_num_seqs), max_blocks)
91+
min((max_model_len // block_size * max_num_seqs), max_factor)
9192
decode_block_bucket_cfg = [1, max_num_seqs, max_decode_blocks, max_decode_block_limit]
9293

9394
msg = ("Decode bucket config (min, step, max_warmup, limit) "

0 commit comments

Comments
 (0)