Skip to content

Commit fb4345f

Browse files
heheda12345zhewenl
authored andcommitted
[Frontend] Add --log-error-stack to print stack trace for error response (vllm-project#22960)
Signed-off-by: Chen Zhang <[email protected]>
1 parent 66b2f22 commit fb4345f

13 files changed

+51
-8
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,6 +1749,7 @@ async def init_app_state(
17491749
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
17501750
enable_force_include_usage=args.enable_force_include_usage,
17511751
enable_log_outputs=args.enable_log_outputs,
1752+
log_error_stack=args.log_error_stack,
17521753
) if "generate" in supported_tasks else None
17531754
state.openai_serving_chat = OpenAIServingChat(
17541755
engine_client,
@@ -1767,6 +1768,7 @@ async def init_app_state(
17671768
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
17681769
enable_force_include_usage=args.enable_force_include_usage,
17691770
enable_log_outputs=args.enable_log_outputs,
1771+
log_error_stack=args.log_error_stack,
17701772
) if "generate" in supported_tasks else None
17711773
state.openai_serving_completion = OpenAIServingCompletion(
17721774
engine_client,
@@ -1776,6 +1778,7 @@ async def init_app_state(
17761778
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
17771779
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
17781780
enable_force_include_usage=args.enable_force_include_usage,
1781+
log_error_stack=args.log_error_stack,
17791782
) if "generate" in supported_tasks else None
17801783
state.openai_serving_pooling = OpenAIServingPooling(
17811784
engine_client,
@@ -1784,6 +1787,7 @@ async def init_app_state(
17841787
request_logger=request_logger,
17851788
chat_template=resolved_chat_template,
17861789
chat_template_content_format=args.chat_template_content_format,
1790+
log_error_stack=args.log_error_stack,
17871791
) if "encode" in supported_tasks else None
17881792
state.openai_serving_embedding = OpenAIServingEmbedding(
17891793
engine_client,
@@ -1792,12 +1796,14 @@ async def init_app_state(
17921796
request_logger=request_logger,
17931797
chat_template=resolved_chat_template,
17941798
chat_template_content_format=args.chat_template_content_format,
1799+
log_error_stack=args.log_error_stack,
17951800
) if "embed" in supported_tasks else None
17961801
state.openai_serving_classification = ServingClassification(
17971802
engine_client,
17981803
model_config,
17991804
state.openai_serving_models,
18001805
request_logger=request_logger,
1806+
log_error_stack=args.log_error_stack,
18011807
) if "classify" in supported_tasks else None
18021808

18031809
enable_serving_reranking = ("classify" in supported_tasks and getattr(
@@ -1807,6 +1813,7 @@ async def init_app_state(
18071813
model_config,
18081814
state.openai_serving_models,
18091815
request_logger=request_logger,
1816+
log_error_stack=args.log_error_stack,
18101817
) if ("embed" in supported_tasks or enable_serving_reranking) else None
18111818

18121819
state.openai_serving_tokenization = OpenAIServingTokenization(
@@ -1816,18 +1823,21 @@ async def init_app_state(
18161823
request_logger=request_logger,
18171824
chat_template=resolved_chat_template,
18181825
chat_template_content_format=args.chat_template_content_format,
1826+
log_error_stack=args.log_error_stack,
18191827
)
18201828
state.openai_serving_transcription = OpenAIServingTranscription(
18211829
engine_client,
18221830
model_config,
18231831
state.openai_serving_models,
18241832
request_logger=request_logger,
1833+
log_error_stack=args.log_error_stack,
18251834
) if "transcription" in supported_tasks else None
18261835
state.openai_serving_translation = OpenAIServingTranslation(
18271836
engine_client,
18281837
model_config,
18291838
state.openai_serving_models,
18301839
request_logger=request_logger,
1840+
log_error_stack=args.log_error_stack,
18311841
) if "transcription" in supported_tasks else None
18321842

18331843
state.enable_server_load_tracking = args.enable_server_load_tracking

vllm/entrypoints/openai/cli_args.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ class FrontendArgs:
180180
h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT
181181
"""Maximum number of HTTP headers allowed in a request for h11 parser.
182182
Helps mitigate header abuse. Default: 256."""
183+
log_error_stack: bool = envs.VLLM_SERVER_DEV_MODE
184+
"""If set to True, log the stack trace of error responses"""
183185

184186
@staticmethod
185187
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:

vllm/entrypoints/openai/serving_chat.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,15 @@ def __init__(
7676
enable_prompt_tokens_details: bool = False,
7777
enable_force_include_usage: bool = False,
7878
enable_log_outputs: bool = False,
79+
log_error_stack: bool = False,
7980
) -> None:
8081
super().__init__(engine_client=engine_client,
8182
model_config=model_config,
8283
models=models,
8384
request_logger=request_logger,
8485
return_tokens_as_token_ids=return_tokens_as_token_ids,
85-
enable_force_include_usage=enable_force_include_usage)
86+
enable_force_include_usage=enable_force_include_usage,
87+
log_error_stack=log_error_stack)
8688

8789
self.response_role = response_role
8890
self.chat_template = chat_template

vllm/entrypoints/openai/serving_classification.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,12 +129,14 @@ def __init__(
129129
models: OpenAIServingModels,
130130
*,
131131
request_logger: Optional[RequestLogger],
132+
log_error_stack: bool = False,
132133
) -> None:
133134
super().__init__(
134135
engine_client=engine_client,
135136
model_config=model_config,
136137
models=models,
137138
request_logger=request_logger,
139+
log_error_stack=log_error_stack,
138140
)
139141

140142
async def create_classify(

vllm/entrypoints/openai/serving_completion.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def __init__(
5959
return_tokens_as_token_ids: bool = False,
6060
enable_prompt_tokens_details: bool = False,
6161
enable_force_include_usage: bool = False,
62+
log_error_stack: bool = False,
6263
):
6364
super().__init__(
6465
engine_client=engine_client,
@@ -67,6 +68,7 @@ def __init__(
6768
request_logger=request_logger,
6869
return_tokens_as_token_ids=return_tokens_as_token_ids,
6970
enable_force_include_usage=enable_force_include_usage,
71+
log_error_stack=log_error_stack,
7072
)
7173
self.enable_prompt_tokens_details = enable_prompt_tokens_details
7274
self.default_sampling_params = (

vllm/entrypoints/openai/serving_embedding.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,11 +593,13 @@ def __init__(
593593
request_logger: Optional[RequestLogger],
594594
chat_template: Optional[str],
595595
chat_template_content_format: ChatTemplateContentFormatOption,
596+
log_error_stack: bool = False,
596597
) -> None:
597598
super().__init__(engine_client=engine_client,
598599
model_config=model_config,
599600
models=models,
600-
request_logger=request_logger)
601+
request_logger=request_logger,
602+
log_error_stack=log_error_stack)
601603

602604
self.chat_template = chat_template
603605
self.chat_template_content_format: Final = chat_template_content_format

vllm/entrypoints/openai/serving_engine.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import json
66
import sys
77
import time
8+
import traceback
89
from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence
910
from concurrent.futures import ThreadPoolExecutor
1011
from http import HTTPStatus
@@ -205,6 +206,7 @@ def __init__(
205206
request_logger: Optional[RequestLogger],
206207
return_tokens_as_token_ids: bool = False,
207208
enable_force_include_usage: bool = False,
209+
log_error_stack: bool = False,
208210
):
209211
super().__init__()
210212

@@ -222,6 +224,7 @@ def __init__(
222224

223225
self._async_tokenizer_pool: dict[AnyTokenizer,
224226
AsyncMicrobatchTokenizer] = {}
227+
self.log_error_stack = log_error_stack
225228

226229
def _get_async_tokenizer(self, tokenizer) -> AsyncMicrobatchTokenizer:
227230
"""
@@ -412,6 +415,12 @@ def create_error_response(
412415
message: str,
413416
err_type: str = "BadRequestError",
414417
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> ErrorResponse:
418+
if self.log_error_stack:
419+
exc_type, _, _ = sys.exc_info()
420+
if exc_type is not None:
421+
traceback.print_exc()
422+
else:
423+
traceback.print_stack()
415424
return ErrorResponse(error=ErrorInfo(
416425
message=message, type=err_type, code=status_code.value))
417426

vllm/entrypoints/openai/serving_pooling.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,13 @@ def __init__(
5858
request_logger: Optional[RequestLogger],
5959
chat_template: Optional[str],
6060
chat_template_content_format: ChatTemplateContentFormatOption,
61+
log_error_stack: bool = False,
6162
) -> None:
6263
super().__init__(engine_client=engine_client,
6364
model_config=model_config,
6465
models=models,
65-
request_logger=request_logger)
66+
request_logger=request_logger,
67+
log_error_stack=log_error_stack)
6668

6769
self.chat_template = chat_template
6870
self.chat_template_content_format: Final = chat_template_content_format

vllm/entrypoints/openai/serving_responses.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def __init__(
8888
enable_prompt_tokens_details: bool = False,
8989
enable_force_include_usage: bool = False,
9090
enable_log_outputs: bool = False,
91+
log_error_stack: bool = False,
9192
) -> None:
9293
super().__init__(
9394
engine_client=engine_client,
@@ -96,6 +97,7 @@ def __init__(
9697
request_logger=request_logger,
9798
return_tokens_as_token_ids=return_tokens_as_token_ids,
9899
enable_force_include_usage=enable_force_include_usage,
100+
log_error_stack=log_error_stack,
99101
)
100102

101103
self.chat_template = chat_template

vllm/entrypoints/openai/serving_score.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,13 @@ def __init__(
4747
models: OpenAIServingModels,
4848
*,
4949
request_logger: Optional[RequestLogger],
50+
log_error_stack: bool = False,
5051
) -> None:
5152
super().__init__(engine_client=engine_client,
5253
model_config=model_config,
5354
models=models,
54-
request_logger=request_logger)
55+
request_logger=request_logger,
56+
log_error_stack=log_error_stack)
5557

5658
async def _embedding_score(
5759
self,

0 commit comments

Comments
 (0)