23
23
log_tracing_disabled_warning ,
24
24
)
25
25
from vllm .transformers_utils .tokenizer import AnyTokenizer # noqa: TCH002
26
+ from vllm .transformers_utils .tokenizers .mistral import MistralTokenizer
26
27
from vllm .utils import iterate_with_cancellation
27
28
28
29
from vllm_tgis_adapter .logging import init_logger
@@ -855,18 +856,25 @@ async def Tokenize(
855
856
tokenizer = await self ._get_tokenizer (adapter_kwargs )
856
857
857
858
responses : list [TokenizeResponse ] = []
859
+ is_mistral_tokenizer = isinstance (tokenizer , MistralTokenizer )
858
860
859
861
# TODO: maybe parallelize, also move convert_ids_to_tokens into the
860
862
# other threads
861
863
for req in request .requests :
862
- batch_encoding = tokenizer .encode_plus (
863
- text = req .text ,
864
- return_offsets_mapping = request .return_offsets ,
865
- add_special_tokens = ADD_SPECIAL_TOKENS ,
866
- )
864
+ if is_mistral_tokenizer :
865
+ token_ids = tokenizer .encode (
866
+ prompt = req .text ,
867
+ )
868
+ else :
869
+ batch_encoding = tokenizer .encode_plus (
870
+ text = req .text ,
871
+ return_offsets_mapping = request .return_offsets ,
872
+ add_special_tokens = ADD_SPECIAL_TOKENS ,
873
+ )
874
+
875
+ # Tokenize the input text
876
+ token_ids = batch_encoding .input_ids
867
877
868
- # Tokenize the input text
869
- token_ids = batch_encoding .input_ids
870
878
token_count = len (token_ids )
871
879
872
880
if 0 < request .truncate_input_tokens < token_count :
@@ -877,13 +885,19 @@ async def Tokenize(
877
885
offsets = None
878
886
879
887
if request .return_offsets :
880
- offsets = [
881
- {"start" : start , "end" : end }
882
- for start , end in batch_encoding .offset_mapping
883
- if start is not None and end is not None
884
- ]
885
- # Truncate offset list if request.truncate_input_tokens
886
- offsets = offsets [- token_count :]
888
+ if is_mistral_tokenizer :
889
+ logger .warning (
890
+ "Mistral tokenizer doesn't support "
891
+ "return_offsets at the moment. "
892
+ )
893
+ else :
894
+ offsets = [
895
+ {"start" : start , "end" : end }
896
+ for start , end in batch_encoding .offset_mapping
897
+ if start is not None and end is not None
898
+ ]
899
+ # Truncate offset list if request.truncate_input_tokens
900
+ offsets = offsets [- token_count :]
887
901
888
902
tokens = tokens [- token_count :] if request .return_tokens else None
889
903
0 commit comments