File tree Expand file tree Collapse file tree 3 files changed +38
-1
lines changed
src/vllm_tgis_adapter/grpc Expand file tree Collapse file tree 3 files changed +38
-1
lines changed Original file line number Diff line number Diff line change @@ -855,7 +855,9 @@ async def Tokenize(
855
855
# other threads
856
856
for req in request .requests :
857
857
batch_encoding = tokenizer .encode_plus (
858
- text = req .text , return_offsets_mapping = request .return_offsets
858
+ text = req .text ,
859
+ return_offsets_mapping = request .return_offsets ,
860
+ add_special_tokens = ADD_SPECIAL_TOKENS ,
859
861
)
860
862
861
863
# Tokenize the input text
Original file line number Diff line number Diff line change @@ -25,6 +25,14 @@ def test_generation_request(grpc_client):
25
25
assert response .stop_reason is not None
26
26
27
27
28
+ def test_tokenize_request (grpc_client ):
29
+ response_tokenize = grpc_client .make_request_tokenize (
30
+ text = "Please answer the following question.\n how far is Paris from New York?" ,
31
+ )
32
+
33
+ assert response_tokenize .token_count
34
+
35
+
28
36
def test_generation_request_stream (grpc_client ):
29
37
streaming_response = grpc_client .make_request_stream (
30
38
"The answer to life the universe and everything is " ,
Original file line number Diff line number Diff line change 11
11
12
12
from vllm_tgis_adapter .grpc .pb .generation_pb2 import (
13
13
BatchedGenerationRequest ,
14
+ BatchedTokenizeRequest ,
14
15
GenerationRequest ,
15
16
ModelInfoRequest ,
16
17
Parameters ,
17
18
SingleGenerationRequest ,
18
19
StoppingCriteria ,
20
+ TokenizeRequest ,
19
21
)
20
22
from vllm_tgis_adapter .grpc .pb .generation_pb2_grpc import GenerationServiceStub
21
23
25
27
from vllm_tgis_adapter .grpc .pb .generation_pb2 import (
26
28
GenerationResponse ,
27
29
ModelInfoResponse ,
30
+ TokenizeResponse ,
28
31
)
29
32
30
33
_T = TypeVar ("_T" )
@@ -173,6 +176,30 @@ def make_request_stream(
173
176
except grpc ._channel ._MultiThreadedRendezvous as exc : # noqa: SLF001
174
177
raise RuntimeError (exc .details ()) from exc
175
178
179
+ def make_request_tokenize (
180
+ self ,
181
+ text : str | list [str ],
182
+ model_id : str | None = None ,
183
+ adapter_id : str | None = None ,
184
+ ) -> TokenizeResponse | Sequence [TokenizeResponse ]:
185
+ if single_request := isinstance (text , str ):
186
+ text = [text ]
187
+
188
+ request = BatchedTokenizeRequest (
189
+ model_id = model_id ,
190
+ requests = [TokenizeRequest (text = piece ) for piece in text ],
191
+ adapter_id = adapter_id ,
192
+ )
193
+
194
+ response = self .generation_service_stub .Tokenize (
195
+ request = request ,
196
+ )
197
+
198
+ if single_request :
199
+ return response .responses [0 ]
200
+
201
+ return response .responses
202
+
176
203
def __enter__ (self ): # noqa: D105
177
204
return self
178
205
You can’t perform that action at this time.
0 commit comments