Skip to content

Commit c5b0f9e

Browse files
authored
[https://nvbugs/5633700][fix] Cache tiktoken vocab for gpt-oss (#10219)
Signed-off-by: Pengyun Lin <[email protected]>
1 parent bfc5919 commit c5b0f9e

File tree

3 files changed

+35
-22
lines changed

3 files changed

+35
-22
lines changed

tensorrt_llm/serve/harmony_adapter.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,9 @@ def _create_delta_from_parser_state(self) -> dict[str, Any] | None:
217217

218218
# Check if tool is allowed
219219
if self.should_filter_tools and func_name not in self.available_tools:
220-
logger.debug("Request %s: tool %s not in available tools",
221-
self.request_id, func_name)
220+
logger.debug(
221+
f"Request {self.request_id}: tool {func_name} not in available tools"
222+
)
222223
return None
223224

224225
# Get or create tool call
@@ -273,8 +274,9 @@ def _create_delta_from_parser_state(self) -> dict[str, Any] | None:
273274
else:
274275
return {"content": self.parser.last_content_delta}
275276
else:
276-
logger.debug("Request %s: no delta generated for channel=%s",
277-
self.request_id, self.parser.current_channel)
277+
logger.debug(
278+
f"Request {self.request_id}: no delta generated for channel={self.parser.current_channel}"
279+
)
278280
return None
279281

280282
def _get_or_create_tool_call(self, func_name: str) -> str:
@@ -295,8 +297,9 @@ def _get_or_create_tool_call(self, func_name: str) -> str:
295297
"active": True
296298
}
297299
self.tool_call_index += 1
298-
logger.debug("Request %s: created new tool call %s for function %s",
299-
self.request_id, tool_id, func_name)
300+
logger.debug(
301+
f"Request {self.request_id}: created new tool call {tool_id} for function {func_name}"
302+
)
300303
return tool_id
301304

302305
def get_debug_info(self) -> dict[str, Any]:
@@ -896,8 +899,8 @@ def _parse_tool_call_from_harmony_message(
896899
}
897900
except json.JSONDecodeError:
898901
logger.warning(
899-
"Failed to parse tool call arguments as JSON: %s",
900-
function_call_args)
902+
f"Failed to parse tool call arguments as JSON: {function_call_args}"
903+
)
901904
return None
902905
elif msg_content_type and "code" in msg_content_type:
903906
function_name = str(msg_recipient)
@@ -1023,10 +1026,11 @@ def harmony_output_to_openai(
10231026
except (HarmonyError, UnicodeDecodeError,
10241027
ValueError) as parse_error:
10251028
logger.warning(
1026-
"Failed to parse harmony messages from tokens: %s",
1027-
parse_error)
1028-
logger.debug("Problematic clean tokens (%d): %s",
1029-
len(clean_tokens), clean_tokens)
1029+
f"Failed to parse harmony messages from tokens: {parse_error}"
1030+
)
1031+
logger.debug(
1032+
f"Problematic clean tokens ({len(clean_tokens)}): {clean_tokens}"
1033+
)
10301034
# Fallback to raw text parsing
10311035
raise RuntimeError(f"Harmony parsing failed: {parse_error}"
10321036
) # This will be caught by outer try-catch
@@ -1103,9 +1107,9 @@ def harmony_output_to_openai(
11031107
except Exception as e:
11041108
raw_text = self._safe_decode_utf8(harmony_output_tokens,
11051109
"HARMONY _OUTPUT: ")
1106-
logger.warning("Failed to parse harmony output: %s. Raw output: %s",
1107-
e, raw_text)
1108-
logger.debug("Detailed error: %s", traceback.format_exc())
1110+
logger.warning(
1111+
f"Failed to parse harmony output: {e}. Raw output: {raw_text}")
1112+
logger.debug(f"Detailed error: {traceback.format_exc()}")
11091113

11101114
# Check if raw_text contains a decode error (fallback content)
11111115
if "HARMONY_OUTPUT:" in raw_text:
@@ -1276,9 +1280,9 @@ def stateful_stream_harmony_tokens_to_openai_deltas(
12761280
return deltas
12771281
except (HarmonyError, UnicodeDecodeError, ValueError):
12781282
logger.error(
1279-
f"Streaming: Failed to process token batch of {len(tokens)} tokens for request {request_id}",
1283+
f"Streaming: Failed to process token batch of {len(tokens)} tokens for request {request_id}"
12801284
)
1281-
logger.debug("Problematic streaming tokens: %s", tokens)
1285+
logger.debug(f"Problematic streaming tokens: {tokens}")
12821286

12831287
# Return empty deltas to continue processing
12841288
return []
@@ -1457,8 +1461,8 @@ def create_stream_state(
14571461
"""
14581462
if request_id in self._stream_states:
14591463
logger.warning(
1460-
"Stream state already exists for request %s, replacing",
1461-
request_id)
1464+
f"Stream state already exists for request {request_id}, replacing"
1465+
)
14621466

14631467
stream_state = HarmonyStreamState(
14641468
request_id=request_id,
@@ -1494,7 +1498,7 @@ def _filter_tool_calls(
14941498

14951499
# Filter unavailable external tools
14961500
if should_filter_external_tools and func_name not in external_tools:
1497-
logger.debug("Filtered unavailable tool call: %s", func_name)
1501+
logger.debug(f"Filtered unavailable tool call: {func_name}")
14981502
continue
14991503

15001504
filtered.append(tool_call)
@@ -1644,7 +1648,7 @@ def handle_non_streaming_response(tools: List[ChatCompletionToolsParam],
16441648
output.token_ids, tools_for_parser, tool_choice)
16451649

16461650
# CONVERTED OUTPUT (after harmony to openai conversion)
1647-
logger.debug("✅ CONVERTED OUTPUT: %s", json.dumps(parsed_output, indent=2))
1651+
logger.debug(f"✅ CONVERTED OUTPUT: {json.dumps(parsed_output, indent=2)}")
16481652

16491653
# Create response message
16501654
response_message = _create_response_message(parsed_output)

tests/integration/test_lists/waives.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,6 @@ accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[tp4ep4-cutla
367367
accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[tp4ep4-trtllm] SKIP (https://nvbugs/5702795)
368368
test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True] SKIP (https://nvbugs/5648560)
369369
test_e2e.py::test_ptp_quickstart_multimodal_2gpu[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503] SKIP (https://nvbugs/5648560)
370-
test_e2e.py::test_openai_chat_harmony SKIP (https://nvbugs/5633700)
371370
accuracy/test_cli_flow.py::TestGpt2::test_weight_only[int4] SKIP (https://nvbugs/5705193)
372371
accuracy/test_cli_flow.py::TestGpt2::test_int8_kv_cache SKIP (https://nvbugs/5705193)
373372
accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_2gpus[cp2] SKIP (https://nvbugs/5705194)

tests/unittest/llmapi/apps/_test_openai_chat_harmony.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
import json
2+
import os
23

34
import openai
45
import pytest
6+
from utils.llm_data import llm_datasets_root
57

68
from ..test_llm import get_model_path
79
from .openai_server import RemoteOpenAIServer
810

911
pytestmark = pytest.mark.threadleak(enabled=False)
12+
os.environ['TIKTOKEN_RS_CACHE_DIR'] = os.path.join(llm_datasets_root(),
13+
'tiktoken_vocab')
14+
os.environ['TIKTOKEN_ENCODINGS_BASE'] = os.path.join(llm_datasets_root(),
15+
'tiktoken_vocab')
1016

1117

1218
@pytest.fixture(scope="module", ids=["GPT-OSS-20B"])
@@ -114,8 +120,10 @@ async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
114120
model=model,
115121
messages=messages,
116122
tools=[tool_get_current_weather],
123+
extra_body={"top_k": 1},
117124
)
118125
message = response.choices[0].message
126+
print(message)
119127
assert response.choices[0].finish_reason == "tool_calls"
120128
assert message.content is None
121129
assert message.reasoning
@@ -137,6 +145,7 @@ async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
137145
response = await client.chat.completions.create(
138146
model=model,
139147
messages=messages,
148+
extra_body={"top_k": 1},
140149
)
141150
message = response.choices[0].message
142151
assert message.content
@@ -205,6 +214,7 @@ async def test_streaming_tool_call(client: openai.AsyncOpenAI, model: str):
205214
messages=messages,
206215
tools=[tool_get_current_weather],
207216
stream=True,
217+
extra_body={"top_k": 1},
208218
)
209219
tool_name: str
210220
reasoning_chunks: list[str] = []

0 commit comments

Comments
 (0)