Skip to content

Commit 61fbfe5

Browse files
[Bugfix] fixed inconsistent finish_reason handling between V0 and V1 engines (#27555)
Signed-off-by: chaunceyjiang <[email protected]>
1 parent 255e34c commit 61fbfe5

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

vllm/v1/core/sched/utils.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,6 @@ def remove_all(lst: list, items_to_remove: set) -> list:
4242
def check_stop(
4343
request: Request, max_model_len: int, pooler_output: torch.Tensor | None = None
4444
) -> bool:
45-
if (
46-
request.num_tokens >= max_model_len
47-
or request.num_output_tokens >= request.max_tokens
48-
):
49-
request.status = RequestStatus.FINISHED_LENGTH_CAPPED
50-
return True
51-
5245
if request.pooling_params:
5346
if pooler_output is not None:
5447
request.status = RequestStatus.FINISHED_STOPPED
@@ -70,4 +63,10 @@ def check_stop(
7063
request.status = RequestStatus.FINISHED_STOPPED
7164
request.stop_reason = last_token_id
7265
return True
66+
if (
67+
request.num_tokens >= max_model_len
68+
or request.num_output_tokens >= request.max_tokens
69+
):
70+
request.status = RequestStatus.FINISHED_LENGTH_CAPPED
71+
return True
7372
return False

0 commit comments

Comments
 (0)