77)
88from typing import Any , Dict , AsyncGenerator
99from lmdeploy .archs import get_task
10+ # from lmdeploy.serve.openai.reasoning_parser import ReasoningParserManager
1011from lmdeploy .serve .async_engine import get_names_from_model
1112from loguru import logger
1213from gpt_server .model_backend .base import ModelBackend
@@ -87,6 +88,8 @@ def __init__(self, model_path) -> None:
8788 self .messages_type_select = (
8889 model_type [1 ] == "base"
8990 ) # 如果为True 则使用 prompt:str 否则: messages:list
91+ # self.reasoning_parser = False
92+ # self.tokenizer = self.async_engine.tokenizer
9093
9194 async def stream_chat (self , params : Dict [str , Any ]) -> AsyncGenerator :
9295 prompt = params .get ("prompt" , "" )
@@ -131,12 +134,16 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
131134 results_generator = self .async_engine .generate (
132135 messages = messages , session_id = int (request_id ), gen_config = gen_config
133136 )
134- text_outputs = ""
137+ previous_text = ""
138+ current_text = ""
139+ previous_token_ids = []
140+ current_token_ids = []
141+ delta_token_ids = []
135142 async for request_output in results_generator :
136143 if await request .is_disconnected ():
137144 # Abort the request if the client disconnects.
138145 await self .async_engine .stop_session (session_id = request_id )
139- text_outputs += request_output .response
146+ current_text = current_text + request_output .response
140147
141148 usage = {
142149 "prompt_tokens" : request_output .input_token_len ,
@@ -145,16 +152,39 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
145152 + request_output .generate_token_len ,
146153 }
147154 ret = {
148- "text" : text_outputs ,
155+ "text" : current_text ,
149156 "error_code" : 0 ,
150157 "usage" : usage ,
151158 "finish_reason" : request_output .finish_reason ,
152159 }
160+ # if self.reasoning_parser is not None:
161+ # delta_token_ids = (
162+ # request_output.token_ids
163+ # if request_output.token_ids is not None
164+ # else []
165+ # )
166+ # current_token_ids = current_token_ids + delta_token_ids
167+ # reasoning_parser = ReasoningParserManager.get("deepseek-r1")(
168+ # self.tokenizer
169+ # )
170+ # reasoning_delta = reasoning_parser.reasoning_parser.extract_reasoning_content_streaming(
171+ # previous_text=previous_text,
172+ # current_text=current_text,
173+ # delta_text=request_output.response,
174+ # previous_token_ids=previous_token_ids,
175+ # current_token_ids=current_token_ids,
176+ # delta_token_ids=delta_token_ids,
177+ # )
178+ # if reasoning_delta is not None:
179+ # ret["text"] = reasoning_delta.content
180+ # ret["reasoning_content"] = reasoning_delta.reasoning_content
181+ # previous_text = current_text
182+ # previous_token_ids = current_token_ids
153183 # TODO -------------------------------------------------------------------
154184 output_info_list = []
155185 for stop_str in list (stop ):
156186 if stop_str :
157- text , bool_value = is_stop (output = text_outputs , stop_str = stop_str )
187+ text , bool_value = is_stop (output = current_text , stop_str = stop_str )
158188 output_info_list .append (
159189 {"text" : text , "bool_value" : bool_value , "text_len" : len (text )}
160190 )
@@ -167,5 +197,5 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
167197 break
168198 # TODO -------------------------------------------------------------------
169199 yield ret
170- logger .info (text_outputs )
200+ logger .info (current_text )
171201 logger .info (usage )
0 commit comments