File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -3060,7 +3060,7 @@ def _dummy_run(
3060
3060
assert not uniform_decode
3061
3061
# Create mixed batch:
3062
3062
# first half decode tokens, second half one prefill
3063
- num_decode_tokens = num_tokens // 2
3063
+ num_decode_tokens = min ( max_num_reqs - 1 , num_tokens // 2 )
3064
3064
num_prefill_tokens = num_tokens - num_decode_tokens
3065
3065
num_reqs = num_decode_tokens + 1
3066
3066
@@ -3072,7 +3072,7 @@ def _dummy_run(
3072
3072
max_query_len = num_prefill_tokens
3073
3073
elif uniform_decode :
3074
3074
assert not create_mixed_batch
3075
- num_reqs = cdiv (num_tokens , max_query_len )
3075
+ num_reqs = min ( max_num_reqs , cdiv (num_tokens , max_query_len ) )
3076
3076
num_scheduled_tokens_list = [max_query_len ] * num_reqs
3077
3077
if num_tokens % max_query_len != 0 :
3078
3078
num_scheduled_tokens_list [- 1 ] = num_tokens % max_query_len
You can’t perform that action at this time.
0 commit comments