Skip to content

Commit 922d3b4

Browse files
authored
[Bugfix] Handle the edge case in detokenizer where processed tokens contain both stop str and eos token (vllm-project#23938)
Signed-off-by: dtransposed <[email protected]>
1 parent 19332c0 commit 922d3b4

File tree

2 files changed

+106
-6
lines changed

2 files changed

+106
-6
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
import pytest
5+
6+
from vllm.sampling_params import SamplingParams
7+
from vllm.v1.engine import EngineCoreRequest
8+
from vllm.v1.engine.detokenizer import BaseIncrementalDetokenizer
9+
10+
11+
@pytest.fixture(params=[True, False])
12+
def include_stop_str_in_output(request):
13+
return request.param
14+
15+
16+
class _DummyDetokenizer(BaseIncrementalDetokenizer):
17+
18+
def __init__(self, request: EngineCoreRequest):
19+
super().__init__(request)
20+
21+
def decode_next(self, next_token_id: int) -> str:
22+
# Map token id to single ASCII character for deterministic testing.
23+
return chr(next_token_id)
24+
25+
26+
def _make_request(stop, include_stop_str_in_output: bool, min_tokens: int = 0):
27+
params = SamplingParams(
28+
stop=stop,
29+
include_stop_str_in_output=include_stop_str_in_output,
30+
min_tokens=min_tokens)
31+
# Keep other fields minimal for unit test purposes.
32+
req = EngineCoreRequest(
33+
request_id="test",
34+
prompt_token_ids=[],
35+
mm_features=None,
36+
sampling_params=params,
37+
pooling_params=None,
38+
eos_token_id=None,
39+
arrival_time=0.0,
40+
lora_request=None,
41+
cache_salt=None,
42+
data_parallel_rank=None,
43+
)
44+
return req
45+
46+
47+
def test_stop_string_while_stop_token_terminates(
48+
include_stop_str_in_output: bool):
49+
"""
50+
This test verifies that the detokenizer correctly handles the case where
51+
the generated token sequence contains both:
52+
- a stop token
53+
- an <eos> token
54+
55+
The detokenizer should respect the stop string and truncate the output
56+
accordingly.
57+
58+
Imagine the following sequence:
59+
- "abcdeZ" is generated, where "Z" is the <eos> token.
60+
- "cd" is the stop string.
61+
62+
If include_stop_str_in_output=False, the detokenizer should truncate the
63+
output to "ab" because the stop string "cd" is excluded.
64+
If include_stop_str_in_output=True, the detokenizer should include the stop
65+
string "cd" in the output, resulting in "abcd".
66+
67+
68+
This verifies the behavioral change introduced in BaseIncrementalDetokenizer
69+
where stop-string evaluation occurs before the early-return on
70+
stop_terminated.
71+
"""
72+
73+
# Generate text "abcdeZ" and tokenize it.
74+
generated_text = "abcde"
75+
eos_token = "Z"
76+
stop_string = "cd"
77+
generated_text = generated_text + eos_token
78+
token_ids = [ord(c) for c in generated_text]
79+
80+
# Create a request with the stop string and initialize the detokenizer.
81+
req = _make_request(stop=[stop_string],
82+
include_stop_str_in_output=include_stop_str_in_output)
83+
detok = _DummyDetokenizer(req)
84+
85+
# Simulate that the last token ('Z') is a stop token (stop_terminated=True).
86+
result = detok.update(new_token_ids=token_ids, stop_terminated=True)
87+
88+
# The update should not report a stop string
89+
assert result == stop_string
90+
91+
# Output text should reflect stop-string handling:
92+
# - include_stop_str_in_output=False => exclude "cd" => "ab"
93+
# - include_stop_str_in_output=True => include "cd" => "abcd"
94+
expected_text = "abcd" if include_stop_str_in_output else "ab"
95+
assert detok.output_text == expected_text
96+
97+
# The skipped final token should still be recorded in token_ids.
98+
assert detok.output_token_ids == token_ids
99+
100+
# get_next_output_text should return the full text when finished=True.
101+
# (Buffering only applies during streaming when finished=False.)
102+
assert detok.get_next_output_text(finished=True,
103+
delta=False) == expected_text

vllm/v1/engine/detokenizer.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,9 @@ def update(self, new_token_ids: list[int],
121121
self.output_token_ids) <= self.min_tokens:
122122
stop_check_offset = len(self.output_text)
123123

124-
if stop_terminated:
125-
if skipped_stop_token_id is not None:
126-
# Cleanup after skipping detokenization.
127-
self.token_ids.append(skipped_stop_token_id)
128-
# Stop token triggered; skip stop string check.
129-
return None
124+
if skipped_stop_token_id is not None:
125+
# Cleanup after skipping detokenization.
126+
self.token_ids.append(skipped_stop_token_id)
130127

131128
# 2) Evaluate stop strings.
132129
stop_string = None

0 commit comments

Comments
 (0)