Skip to content

Commit d4f610e

Browse files
authored
feat(log):add_request_and_response_log (#3373)
1 parent 396dba0 commit d4f610e

File tree

3 files changed

+26
-3
lines changed

3 files changed

+26
-3
lines changed

fastdeploy/entrypoints/openai/api_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
251251
"""
252252
Create a chat completion for the provided prompt and parameters.
253253
"""
254+
api_server_logger.info(f"Chat Received request: {request.model_dump_json()}")
254255
if app.state.dynamic_load_weight:
255256
status, msg = app.state.engine_client.is_workers_alive()
256257
if not status:
@@ -279,6 +280,7 @@ async def create_completion(request: CompletionRequest):
279280
"""
280281
Create a completion for the provided prompt and parameters.
281282
"""
283+
api_server_logger.info(f"Completion Received request: {request.model_dump_json()}")
282284
if app.state.dynamic_load_weight:
283285
status, msg = app.state.engine_client.is_workers_alive()
284286
if not status:

fastdeploy/entrypoints/openai/serving_chat.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ async def chat_completion_stream_generator(
239239
prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=num_cached_tokens),
240240
)
241241
yield f"data: {chunk.model_dump_json(exclude_unset=True)} \n\n"
242+
api_server_logger.info(f"Chat Streaming response send_idx 0: {chunk.model_dump_json()}")
242243
first_iteration = False
243244

244245
output = res["outputs"]
@@ -273,6 +274,7 @@ async def chat_completion_stream_generator(
273274
logprobs=logprobs_res,
274275
arrival_time=arrival_time,
275276
)
277+
276278
if res["finished"]:
277279
num_choices -= 1
278280
work_process_metrics.e2e_request_latency.observe(
@@ -304,6 +306,9 @@ async def chat_completion_stream_generator(
304306
if len(choices) == max_streaming_response_tokens or res["finished"]:
305307
chunk.choices = choices
306308
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
309+
# 打印尾包
310+
if res["finished"]:
311+
api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
307312
choices = []
308313

309314
if choices:
@@ -456,13 +461,15 @@ async def chat_completion_full_generator(
456461
prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=final_res.get("num_cached_tokens", 0)),
457462
)
458463
work_process_metrics.e2e_request_latency.observe(time.time() - final_res["metrics"]["request_start_time"])
459-
return ChatCompletionResponse(
464+
res = ChatCompletionResponse(
460465
id=request_id,
461466
created=created_time,
462467
model=model_name,
463468
choices=choices,
464469
usage=usage,
465470
)
471+
api_server_logger.info(f"Chat response: {res.model_dump_json()}")
472+
return res
466473

467474
def _create_chat_logprobs(
468475
self,

fastdeploy/entrypoints/openai/serving_completion.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,7 @@ async def completion_full_generator(
221221
valid_results[rid] = data
222222
num_choices -= 1
223223
break
224-
225-
return self.request_output_to_completion_response(
224+
res = self.request_output_to_completion_response(
226225
final_res_batch=valid_results,
227226
request=request,
228227
request_id=request_id,
@@ -232,6 +231,8 @@ async def completion_full_generator(
232231
completion_batched_token_ids=completion_batched_token_ids,
233232
text_after_process_list=text_after_process_list,
234233
)
234+
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
235+
return res
235236
except Exception as e:
236237
api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True)
237238
raise
@@ -323,6 +324,9 @@ async def completion_stream_generator(
323324
],
324325
)
325326
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
327+
api_server_logger.info(
328+
f"Completion Streaming response send_idx 0: {chunk.model_dump_json()}"
329+
)
326330
first_iteration[idx] = False
327331

328332
self.engine_client.data_processor.process_response_dict(
@@ -376,6 +380,15 @@ async def completion_stream_generator(
376380
choices[-1].finish_reason = self.calc_finish_reason(
377381
request.max_tokens, output_tokens[idx], output, tool_called
378382
)
383+
send_idx = output.get("send_idx")
384+
# 只有当 send_idx 明确为 0 时才记录日志
385+
if send_idx == 0 and not request.return_token_ids:
386+
chunk_temp = chunk
387+
chunk_temp.choices = choices
388+
api_server_logger.info(
389+
f"Completion Streaming response send_idx 0: {chunk_temp.model_dump_json()}"
390+
)
391+
del chunk_temp
379392

380393
if len(choices) == max_streaming_response_tokens or res["finished"]:
381394
chunk = CompletionStreamResponse(
@@ -402,6 +415,7 @@ async def completion_stream_generator(
402415
),
403416
)
404417
yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
418+
api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
405419
if choices:
406420
chunk.choices = choices
407421
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"

0 commit comments

Comments
 (0)