22# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
44from pathlib import Path
5+ from typing import Any
56
67from transformers import BatchEncoding
78
8- from .deepseek_v32_encoding import encode_messages
9- from .hf import HfTokenizer , TokenizerLike
10- from .registry import TokenizerRegistry
9+ from vllm .entrypoints .chat_utils import ChatCompletionMessageParam
1110
11+ from .deepseek_v32_encoding import encode_messages
12+ from .hf import CachedHfTokenizer
13+ from .protocol import TokenizerLike
1214
13- @TokenizerRegistry .register ("deepseek_v32" )
14- class DeepseekV32Tokenizer (HfTokenizer ):
15- def __init__ (self , tokenizer : TokenizerLike ):
16- self .tokenizer = tokenizer
17- self .name_or_path = (
18- tokenizer .name_or_path if hasattr (tokenizer , "name_or_path" ) else ""
19- )
20- self ._added_vocab = self .tokenizer .get_added_vocab ()
21- self ._added_vocab_size = len (self ._added_vocab )
2215
16+ class DeepseekV32Tokenizer (CachedHfTokenizer ):
2317 @classmethod
2418 def from_pretrained (
2519 cls ,
@@ -40,7 +34,21 @@ def from_pretrained(
4034 )
4135 return DeepseekV32Tokenizer (tokenizer )
4236
43- def apply_chat_template (self , messages , tools = None , ** kwargs ):
37+ def __init__ (self , tokenizer : TokenizerLike ) -> None :
38+ super ().__init__ ()
39+
40+ self .tokenizer = tokenizer
41+ self .name_or_path = getattr (tokenizer , "name_or_path" , "" )
42+
43+ self ._added_vocab = self .tokenizer .get_added_vocab ()
44+ self ._added_vocab_size = len (self ._added_vocab )
45+
46+ def apply_chat_template (
47+ self ,
48+ messages : list ["ChatCompletionMessageParam" ],
49+ tools : list [dict [str , Any ]] | None = None ,
50+ ** kwargs ,
51+ ) -> str | list [int ]:
4452 thinking = kwargs .get ("thinking" , False )
4553 thinking_mode = "thinking"
4654 if not thinking :
@@ -49,13 +57,24 @@ def apply_chat_template(self, messages, tools=None, **kwargs):
4957 messages = conversation .copy ()
5058 if tools is not None and len (tools ) > 0 :
5159 messages .insert (0 , {"role" : "system" })
52- messages [0 ]["tools" ] = tools
60+ messages [0 ]["tools" ] = tools # type: ignore[typeddict-unknown-key]
5361
5462 # Historical reasoning content is dropped when a new user message is introduced
5563 drop_thinking = messages [- 1 ]["role" ] == "user"
5664
5765 encode_config = dict (thinking_mode = thinking_mode , drop_thinking = drop_thinking )
5866 prompt_str = encode_messages (messages , ** encode_config ) # type: ignore
67+
68+ if kwargs .get ("tokenize" , True ):
69+ tokenizer_kwargs = {
70+ k : kwargs [k ] for k in ("truncation" , "max_length" ) if k in kwargs
71+ }
72+ return self .encode (
73+ prompt_str ,
74+ add_special_tokens = False ,
75+ ** tokenizer_kwargs ,
76+ )
77+
5978 return prompt_str
6079
6180 def num_special_tokens_to_add (self ) -> int :
0 commit comments