Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
45aa822
Add return_token_ids_alongside parameter to OpenAI API endpoints
ultmaster Aug 10, 2025
48dd2f4
Add comment
ultmaster Aug 10, 2025
4f6ea7f
Improve comment formatting for token_ids field
ultmaster Aug 10, 2025
fe99500
Refactor conditional token_ids formatting for better readability
ultmaster Aug 10, 2025
81678bf
Works for non-streaming case
ultmaster Aug 12, 2025
4f6801e
Works for streaming case
ultmaster Aug 12, 2025
8c565a8
Merge remote-tracking branch 'origin/main' into add-token-ids-alongsi…
ultmaster Aug 12, 2025
ab6d7ef
Merge remote-tracking branch 'origin/main' into add-token-ids-alongsi…
ultmaster Aug 12, 2025
928f65b
No prompt token ids when echo=True
ultmaster Aug 14, 2025
8499774
Merge remote-tracking branch 'origin/main' into add-token-ids-alongsi…
ultmaster Aug 14, 2025
a83e288
remove debug flags
ultmaster Aug 14, 2025
561ab1c
Merge remote-tracking branch 'origin' into add-token-ids-alongside-fe…
ultmaster Aug 15, 2025
c2ac6c1
Merge branch 'main' into add-token-ids-alongside-feature
ultmaster Aug 15, 2025
7bfdf87
Merge branch 'main' into add-token-ids-alongside-feature
ultmaster Aug 15, 2025
da82529
Merge branch 'main' into add-token-ids-alongside-feature
simon-mo Aug 15, 2025
79ce026
Merge remote-tracking branch 'origin' into add-token-ids-alongside-fe…
ultmaster Aug 18, 2025
8fec3f9
incorporate as_list
ultmaster Aug 18, 2025
a187971
update false to none
ultmaster Aug 18, 2025
abb90ee
Merge remote-tracking branch 'origin' into add-token-ids-alongside-fe…
ultmaster Aug 18, 2025
517b675
filtering out invalid guided grammar
ultmaster Aug 19, 2025
507cbf1
Merge remote-tracking branch 'origin' into add-token-ids-alongside-fe…
ultmaster Aug 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions vllm/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,13 @@ class ChatCompletionRequest(OpenAIBaseModel):
"If specified with 'logprobs', tokens are represented "
" as strings of the form 'token_id:{token_id}' so that tokens "
"that are not JSON-encodable can be identified."))
return_token_ids_alongside: Optional[bool] = Field(
default=False,
description=(
"If specified, the result will include both prompt and response "
"token ids alongside the generated text. "
"This is useful for debugging or when you "
"need to map generated text back to input tokens."))
cache_salt: Optional[str] = Field(
default=None,
description=(
Expand Down Expand Up @@ -1053,6 +1060,13 @@ class CompletionRequest(OpenAIBaseModel):
"If specified with 'logprobs', tokens are represented "
" as strings of the form 'token_id:{token_id}' so that tokens "
"that are not JSON-encodable can be identified."))
return_token_ids_alongside: Optional[bool] = Field(
default=False,
description=(
"If specified, the result will include both prompt and response "
"token ids alongside the generated text. "
"This is useful for debugging or when you "
"need to map generated text back to input tokens."))

cache_salt: Optional[str] = Field(
default=None,
Expand Down Expand Up @@ -1471,7 +1485,9 @@ class CompletionResponseChoice(OpenAIBaseModel):
"to stop, None if the completion finished for some other reason "
"including encountering the EOS token"),
)
token_ids: Optional[list[int]] = None # For response
prompt_logprobs: Optional[list[Optional[dict[int, Logprob]]]] = None
prompt_token_ids: Optional[list[int]] = None # For prompt


class CompletionResponse(OpenAIBaseModel):
Expand Down Expand Up @@ -1671,6 +1687,9 @@ class ChatCompletionResponseChoice(OpenAIBaseModel):
finish_reason: Optional[str] = "stop"
# not part of the OpenAI spec but included in vLLM for legacy reasons
stop_reason: Optional[Union[int, str]] = None
# not part of the OpenAI spec but is useful for tracing the tokens
# in agent scenarios
token_ids: Optional[list[int]] = None


class ChatCompletionResponse(OpenAIBaseModel):
Expand All @@ -1686,6 +1705,7 @@ class ChatCompletionResponse(OpenAIBaseModel):

# vLLM-specific fields that are not in OpenAI spec
prompt_logprobs: Optional[list[Optional[dict[int, Logprob]]]] = None
prompt_token_ids: Optional[list[int]] = None
kv_transfer_params: Optional[dict[str, Any]] = Field(
default=None, description="KVTransfer parameters.")

Expand Down
7 changes: 6 additions & 1 deletion vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1259,7 +1259,10 @@ async def chat_completion_full_generator(
logprobs=logprobs,
finish_reason="tool_calls" if auto_tools_called else
output.finish_reason if output.finish_reason else "stop",
stop_reason=output.stop_reason)
stop_reason=output.stop_reason,
token_ids=(token_ids
if request.return_token_ids_alongside else None),
)

choices.append(choice_data)

Expand Down Expand Up @@ -1300,6 +1303,8 @@ async def chat_completion_full_generator(
choices=choices,
usage=usage,
prompt_logprobs=clamp_prompt_logprobs(final_res.prompt_logprobs),
prompt_token_ids=(final_res.prompt_token_ids
if request.return_token_ids_alongside else None),
kv_transfer_params=final_res.kv_transfer_params,
)

Expand Down
5 changes: 5 additions & 0 deletions vllm/entrypoints/openai/serving_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,11 @@ def request_output_to_completion_response(
finish_reason=output.finish_reason,
stop_reason=output.stop_reason,
prompt_logprobs=final_res.prompt_logprobs,
prompt_token_ids=(prompt_token_ids
if request.return_token_ids_alongside
else None),
token_ids=(token_ids if request.return_token_ids_alongside
else None),
)
choices.append(choice_data)

Expand Down