44import logging
55import time
66from concurrent .futures import ThreadPoolExecutor
7- from contextlib import nullcontext
87from dataclasses import asdict
98from http import HTTPStatus
109from typing import List , Optional , Union
@@ -101,17 +100,10 @@ async def _prepare_request(request: Union[ChatCompletionRequest, CompletionReque
101100 if not is_valid :
102101 return create_error_response (HTTPStatus .BAD_REQUEST , 'API key error' )
103102
104- if _args .infer_backend == 'vllm' :
105- from .utils import vllm_context
106- model_or_engine = llm_engine
107- context = vllm_context (template )
108- elif _args .infer_backend == 'lmdeploy' :
109- from .utils import lmdeploy_context
103+ if _args .infer_backend in {'vllm' , 'lmdeploy' }:
110104 model_or_engine = llm_engine
111- context = lmdeploy_context (template )
112105 else :
113106 model_or_engine = model
114- context = nullcontext (template )
115107
116108 error_msg = await check_model (request )
117109 if error_msg is not None :
@@ -147,10 +139,9 @@ async def _prepare_request(request: Union[ChatCompletionRequest, CompletionReque
147139 example ['tools' ] = [tool ]
148140 elif request .tool_choice == 'auto' :
149141 example ['tools' ] = request .tools
150- with context :
151- executor = ThreadPoolExecutor (max_workers = 1 )
152- loop = asyncio .get_running_loop ()
153- inputs = (await loop .run_in_executor (executor , template .encode , example ))[0 ]
142+ executor = ThreadPoolExecutor (max_workers = 1 )
143+ loop = asyncio .get_running_loop ()
144+ inputs = (await loop .run_in_executor (executor , template .encode , example ))[0 ]
154145 request_id = f'chatcmpl-{ random_uuid ()} '
155146 _request ['messages' ] = messages
156147 else :
@@ -167,10 +158,9 @@ async def _prepare_request(request: Union[ChatCompletionRequest, CompletionReque
167158 example = {'query' : prompt }
168159 if len (images ) > 0 :
169160 example ['images' ] = images
170- with context :
171- executor = ThreadPoolExecutor (max_workers = 1 )
172- loop = asyncio .get_running_loop ()
173- inputs = (await loop .run_in_executor (executor , template .encode , example ))[0 ]
161+ executor = ThreadPoolExecutor (max_workers = 1 )
162+ loop = asyncio .get_running_loop ()
163+ inputs = (await loop .run_in_executor (executor , template .encode , example ))[0 ]
174164 request_id = f'cmpl-{ random_uuid ()} '
175165 _request ['prompt' ] = prompt
176166
@@ -709,9 +699,11 @@ def llm_deploy(args: DeployArguments) -> None:
709699 if args .infer_backend == 'vllm' :
710700 from .utils import prepare_vllm_engine_template
711701 llm_engine , template = prepare_vllm_engine_template (args , use_async = True )
702+ template ._is_vllm = True
712703 elif args .infer_backend == 'lmdeploy' :
713704 from .utils import prepare_lmdeploy_engine_template
714705 llm_engine , template = prepare_lmdeploy_engine_template (args )
706+ template ._is_lmdeploy = True
715707 else :
716708 model , template = prepare_model_template (args )
717709 uvicorn .run (app , host = args .host , port = args .port , ssl_keyfile = args .ssl_keyfile , ssl_certfile = args .ssl_certfile )
0 commit comments