5959from vllm .entrypoints .openai .serving_tokenization import (
6060 OpenAIServingTokenization )
6161from vllm .entrypoints .openai .tool_parsers import ToolParserManager
62+ from vllm .entrypoints .utils import with_cancellation
6263from vllm .logger import init_logger
6364from vllm .usage .usage_lib import UsageContext
6465from vllm .utils import (FlexibleArgumentParser , get_open_zmq_ipc_path ,
@@ -311,6 +312,7 @@ async def health(raw_request: Request) -> Response:
311312
312313
313314@router .post ("/tokenize" )
315+ @with_cancellation
314316async def tokenize (request : TokenizeRequest , raw_request : Request ):
315317 handler = tokenization (raw_request )
316318
@@ -325,6 +327,7 @@ async def tokenize(request: TokenizeRequest, raw_request: Request):
325327
326328
327329@router .post ("/detokenize" )
330+ @with_cancellation
328331async def detokenize (request : DetokenizeRequest , raw_request : Request ):
329332 handler = tokenization (raw_request )
330333
@@ -353,6 +356,7 @@ async def show_version():
353356
354357
355358@router .post ("/v1/chat/completions" )
359+ @with_cancellation
356360async def create_chat_completion (request : ChatCompletionRequest ,
357361 raw_request : Request ):
358362 handler = chat (raw_request )
@@ -373,6 +377,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
373377
374378
375379@router .post ("/v1/completions" )
380+ @with_cancellation
376381async def create_completion (request : CompletionRequest , raw_request : Request ):
377382 handler = completion (raw_request )
378383 if handler is None :
@@ -390,6 +395,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
390395
391396
392397@router .post ("/v1/embeddings" )
398+ @with_cancellation
393399async def create_embedding (request : EmbeddingRequest , raw_request : Request ):
394400 handler = embedding (raw_request )
395401 if handler is None :
@@ -407,6 +413,7 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
407413
408414
409415@router .post ("/score" )
416+ @with_cancellation
410417async def create_score (request : ScoreRequest , raw_request : Request ):
411418 handler = score (raw_request )
412419 if handler is None :
@@ -424,6 +431,7 @@ async def create_score(request: ScoreRequest, raw_request: Request):
424431
425432
426433@router .post ("/v1/score" )
434+ @with_cancellation
427435async def create_score_v1 (request : ScoreRequest , raw_request : Request ):
428436 logger .warning (
429437 "To indicate that Score API is not part of standard OpenAI API, we "
0 commit comments