File tree Expand file tree Collapse file tree 2 files changed +7
-4
lines changed
Expand file tree Collapse file tree 2 files changed +7
-4
lines changed Original file line number Diff line number Diff line change @@ -152,7 +152,10 @@ def __init__(self) -> None:
152152 self ._resolved_name : str | None = None
153153 self ._call_args = {"add_special_tokens" : False }
154154 self ._encode_args = {"add_special_tokens" : False }
155- self ._decode_args = {"skip_special_tokens" : True }
155+ # Prompt generation inserts BOS/EOS tokens as block separators
156+ # (see PromptGenerator._build_token_sequence). Skipping special tokens
157+ # during decode would silently strip those separators.
158+ self ._decode_args = {"skip_special_tokens" : False }
156159
157160 def _require_init (self ) -> None :
158161 """Raise NotInitializedError if tokenizer is not initialized."""
Original file line number Diff line number Diff line change @@ -189,7 +189,7 @@ def test_standard_tokenizer_keeps_defaults(self):
189189 tok = self ._make_tokenizer (StandardTokenizerBackend ())
190190 assert tok ._encode_args == {"add_special_tokens" : False }
191191 assert tok ._call_args == {"add_special_tokens" : False }
192- assert tok ._decode_args == {"skip_special_tokens" : True }
192+ assert tok ._decode_args == {"skip_special_tokens" : False }
193193
194194 def test_kimi_like_overrides_encode_and_call_args (self ):
195195 tok = self ._make_tokenizer (KimiLikeTokenizerBackend ())
@@ -210,14 +210,14 @@ def test_mismatched_call_encode_sets_args_independently(self):
210210 tok = self ._make_tokenizer (MismatchedCallEncodeBackend ())
211211 assert tok ._encode_args == {"allow_special_tokens" : False }
212212 assert tok ._call_args == {"add_special_tokens" : False }
213- assert tok ._decode_args == {"skip_special_tokens" : True }
213+ assert tok ._decode_args == {"skip_special_tokens" : False }
214214
215215 def test_none_tokenizer_is_noop (self ):
216216 tok = Tokenizer ()
217217 tok ._apply_kwarg_overrides ()
218218 assert tok ._encode_args == {"add_special_tokens" : False }
219219 assert tok ._call_args == {"add_special_tokens" : False }
220- assert tok ._decode_args == {"skip_special_tokens" : True }
220+ assert tok ._decode_args == {"skip_special_tokens" : False }
221221
222222
223223# -- End-to-end: encode/decode through Tokenizer wrapper --
You can’t perform that action at this time.
0 commit comments