59
59
from vllm .entrypoints .openai .serving_tokenization import (
60
60
OpenAIServingTokenization )
61
61
from vllm .entrypoints .openai .tool_parsers import ToolParserManager
62
+ from vllm .entrypoints .utils import with_cancellation
62
63
from vllm .logger import init_logger
63
64
from vllm .usage .usage_lib import UsageContext
64
65
from vllm .utils import (FlexibleArgumentParser , get_open_zmq_ipc_path ,
@@ -311,6 +312,7 @@ async def health(raw_request: Request) -> Response:
311
312
312
313
313
314
@router .post ("/tokenize" )
315
+ @with_cancellation
314
316
async def tokenize (request : TokenizeRequest , raw_request : Request ):
315
317
handler = tokenization (raw_request )
316
318
@@ -325,6 +327,7 @@ async def tokenize(request: TokenizeRequest, raw_request: Request):
325
327
326
328
327
329
@router .post ("/detokenize" )
330
+ @with_cancellation
328
331
async def detokenize (request : DetokenizeRequest , raw_request : Request ):
329
332
handler = tokenization (raw_request )
330
333
@@ -353,6 +356,7 @@ async def show_version():
353
356
354
357
355
358
@router .post ("/v1/chat/completions" )
359
+ @with_cancellation
356
360
async def create_chat_completion (request : ChatCompletionRequest ,
357
361
raw_request : Request ):
358
362
handler = chat (raw_request )
@@ -373,6 +377,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
373
377
374
378
375
379
@router .post ("/v1/completions" )
380
+ @with_cancellation
376
381
async def create_completion (request : CompletionRequest , raw_request : Request ):
377
382
handler = completion (raw_request )
378
383
if handler is None :
@@ -390,6 +395,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
390
395
391
396
392
397
@router .post ("/v1/embeddings" )
398
+ @with_cancellation
393
399
async def create_embedding (request : EmbeddingRequest , raw_request : Request ):
394
400
handler = embedding (raw_request )
395
401
if handler is None :
@@ -407,6 +413,7 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
407
413
408
414
409
415
@router .post ("/score" )
416
+ @with_cancellation
410
417
async def create_score (request : ScoreRequest , raw_request : Request ):
411
418
handler = score (raw_request )
412
419
if handler is None :
@@ -424,6 +431,7 @@ async def create_score(request: ScoreRequest, raw_request: Request):
424
431
425
432
426
433
@router .post ("/v1/score" )
434
+ @with_cancellation
427
435
async def create_score_v1 (request : ScoreRequest , raw_request : Request ):
428
436
logger .warning (
429
437
"To indicate that Score API is not part of standard OpenAI API, we "
0 commit comments