|
10 | 10 | import vllm.envs |
11 | 11 | from vllm.logger import init_logger |
12 | 12 | from vllm.sampling_params import SamplingParams |
13 | | -from vllm.tokenizers import MistralTokenizer |
| 13 | +from vllm.tokenizers import DeepseekV32Tokenizer, MistralTokenizer |
14 | 14 | from vllm.utils.import_utils import LazyLoader |
15 | 15 | from vllm.v1.structured_output.backend_types import ( |
16 | 16 | StructuredOutputBackend, |
@@ -56,6 +56,27 @@ def __post_init__(self): |
56 | 56 | stop_token_ids=stop_token_ids, |
57 | 57 | add_prefix_space=True, |
58 | 58 | ) |
| 59 | + elif isinstance(self.tokenizer, DeepseekV32Tokenizer): |
| 60 | + # copy from xgr.TokenizerInfo.from_huggingface() |
| 61 | + # because we are using a custom tokenizer wrapper here. |
| 62 | + vocab_dict = self.tokenizer.get_vocab() |
| 63 | + tokenizer_vocab_size = max(len(vocab_dict), self.tokenizer.max_token_id + 1) |
| 64 | + vocab_size = self.vocab_size or tokenizer_vocab_size |
| 65 | + # maintain tokenizer's indexing |
| 66 | + encoded_vocab = [""] * vocab_size |
| 67 | + for token, idx in vocab_dict.items(): |
| 68 | + if idx < vocab_size: |
| 69 | + encoded_vocab[idx] = token |
| 70 | + stop_token_ids = [self.tokenizer.eos_token_id] |
| 71 | + backend_str = self.tokenizer.tokenizer.backend_tokenizer.to_str() |
| 72 | + metadata = xgr.TokenizerInfo._detect_metadata_from_hf(backend_str) |
| 73 | + tokenizer_info = xgr.TokenizerInfo( |
| 74 | + encoded_vocab=encoded_vocab, |
| 75 | + vocab_type=metadata["vocab_type"], |
| 76 | + vocab_size=vocab_size, |
| 77 | + stop_token_ids=stop_token_ids, |
| 78 | + add_prefix_space=metadata["add_prefix_space"], |
| 79 | + ) |
59 | 80 | else: |
60 | 81 | tokenizer_info = xgr.TokenizerInfo.from_huggingface( |
61 | 82 | self.tokenizer, |
|
0 commit comments