88import asyncio
99import json
1010import re
11+ import time
1112import traceback
1213
1314from collections .abc import Generator
@@ -170,12 +171,18 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
170171 )
171172
172173 model = chat_req .model_name_or_path or next (iter (self .chat_llms .keys ()))
174+
175+ self .logger .info (f"[Cloud Service Chat Complete Model]: { model } " )
176+ strat = time .time ()
173177 response = self .chat_llms [model ].generate (current_messages , model_name_or_path = model )
178+ end = time .time ()
179+ self .logger .info (f"[Cloud Service Chat Complete Time]: { end - strat } seconds" )
174180
175181 # Step 4: start add after chat asynchronously
176182 if chat_req .add_message_on_answer :
177183 # Resolve writable cube IDs (for add)
178184 writable_cube_ids = chat_req .writable_cube_ids or [chat_req .user_id ]
185+ start = time .time ()
179186 self ._start_add_to_memory (
180187 user_id = chat_req .user_id ,
181188 writable_cube_ids = writable_cube_ids ,
@@ -184,6 +191,8 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
184191 full_response = response ,
185192 async_mode = "async" ,
186193 )
194+ end = time .time ()
195+ self .logger .info (f"[Cloud Service Chat Add Time]: { end - start } seconds" )
187196
188197 match = re .search (r"<think>([\s\S]*?)</think>" , response )
189198 reasoning_text = match .group (1 ) if match else None
@@ -295,9 +304,14 @@ def generate_chat_response() -> Generator[str, None, None]:
295304 )
296305
297306 model = chat_req .model_name_or_path or next (iter (self .chat_llms .keys ()))
307+ self .logger .info (f"[Cloud Service Chat Stream Model]: { model } " )
308+
309+ start = time .time ()
298310 response_stream = self .chat_llms [model ].generate_stream (
299311 current_messages , model_name_or_path = model
300312 )
313+ end = time .time ()
314+ self .logger .info (f"[Cloud Service Chat Stream Time]: { end - start } seconds" )
301315
302316 # Stream the response
303317 buffer = ""
@@ -329,6 +343,7 @@ def generate_chat_response() -> Generator[str, None, None]:
329343 writable_cube_ids = chat_req .writable_cube_ids or (
330344 [chat_req .mem_cube_id ] if chat_req .mem_cube_id else [chat_req .user_id ]
331345 )
346+ start = time .time ()
332347 self ._start_add_to_memory (
333348 user_id = chat_req .user_id ,
334349 writable_cube_ids = writable_cube_ids ,
@@ -337,7 +352,10 @@ def generate_chat_response() -> Generator[str, None, None]:
337352 full_response = full_response ,
338353 async_mode = "async" ,
339354 )
340-
355+ end = time .time ()
356+ self .logger .info (
357+ f"[Cloud Service Chat Stream Add Time]: { end - start } seconds"
358+ )
341359 except Exception as e :
342360 self .logger .error (f"Error in chat stream: { e } " , exc_info = True )
343361 error_data = f"data: { json .dumps ({'type' : 'error' , 'content' : str (traceback .format_exc ())})} \n \n "
0 commit comments