Skip to content

Commit 55bc6a5

Browse files
authored
[https://nvbugs/5753250][fix] Fix undefined local variable in responses utils (#10154)
Signed-off-by: Junyi Xu <[email protected]> Signed-off-by: JunyiXu-nv <[email protected]>
1 parent ee07a7c commit 55bc6a5

File tree

2 files changed

+55
-48
lines changed

2 files changed

+55
-48
lines changed

tensorrt_llm/serve/responses_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,7 @@ def _create_output_content(
974974
available_tools = _get_chat_completion_function_tools(tools)
975975

976976
for output in final_res.outputs:
977+
calls = []
977978
text, reasoning_text = _apply_reasoning_parser(reasoning_parser,
978979
output.index,
979980
output.text, False)

tests/unittest/llmapi/apps/_test_openai_responses.py

Lines changed: 54 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def client(server: RemoteOpenAIServer):
5151

5252

5353
def check_reponse(response, prefix=""):
54+
print(f"response: {response}")
5455
reasoning_exist, message_exist = False, False
5556
for output in response.output:
5657
if output.type == "reasoning":
@@ -63,6 +64,7 @@ def check_reponse(response, prefix=""):
6364

6465

6566
def check_tool_calling(response, first_resp=True, prefix=""):
67+
print(f"response: {response}")
6668
reasoning_exist, tool_call_exist, message_exist = False, False, False
6769
reasoning_content, message_content = "", ""
6870
function_call = None
@@ -90,18 +92,20 @@ def check_tool_calling(response, first_resp=True, prefix=""):
9092
assert not tool_call_exist, f"{err_msg} tool call content should not exist! ({function_call})"
9193

9294

93-
@pytest.mark.skip(reason="https://nvbugs/5753250")
95+
def _get_qwen3_nothink_input(model: str, input: str):
96+
return f"{input} /no_think" if model.startswith("Qwen3") else input
97+
98+
9499
@pytest.mark.asyncio(loop_scope="module")
95100
async def test_reasoning(client: openai.AsyncOpenAI, model: str):
96101
response = await client.responses.create(
97102
model=model,
98103
input="Which one is larger as numeric, 9.9 or 9.11?",
99-
max_output_tokens=1024)
104+
)
100105

101106
check_reponse(response, "test_reasoning: ")
102107

103108

104-
@pytest.mark.skip(reason="https://nvbugs/5753250")
105109
@pytest.mark.asyncio(loop_scope="module")
106110
async def test_reasoning_effort(client: openai.AsyncOpenAI, model: str):
107111
for effort in ["low", "medium", "high"]:
@@ -110,56 +114,57 @@ async def test_reasoning_effort(client: openai.AsyncOpenAI, model: str):
110114
instructions="Use less than 1024 tokens for the whole response",
111115
input="Which one is larger as numeric, 9.9 or 9.11?",
112116
reasoning={"effort": effort},
113-
max_output_tokens=1024)
117+
)
114118
check_reponse(response, f"test_reasoning_effort_{effort}: ")
115119

116120

117-
@pytest.mark.skip(reason="https://nvbugs/5753250")
118121
@pytest.mark.asyncio(loop_scope="module")
119122
async def test_chat(client: openai.AsyncOpenAI, model: str):
120-
response = await client.responses.create(model=model,
121-
input=[{
122-
"role":
123-
"developer",
124-
"content":
125-
"Respond in Chinese."
126-
}, {
127-
"role": "user",
128-
"content": "Hello!"
129-
}, {
130-
"role":
131-
"assistant",
132-
"content":
133-
"Hello! How can I help you?"
134-
}, {
135-
"role": "user",
136-
"content": "Tell me a joke."
137-
}],
138-
max_output_tokens=1024)
123+
response = await client.responses.create(
124+
model=model,
125+
input=[{
126+
"role": "developer",
127+
"content": "Respond in Chinese."
128+
}, {
129+
"role": "user",
130+
"content": "Hello!"
131+
}, {
132+
"role": "assistant",
133+
"content": "Hello! How can I help you?"
134+
}, {
135+
"role": "user",
136+
"content": "Tell me a joke."
137+
}],
138+
)
139139
check_reponse(response, "test_chat: ")
140140

141141

142-
@pytest.mark.skip(reason="https://nvbugs/5753250")
143142
@pytest.mark.asyncio(loop_scope="module")
144-
async def test_multi_turn_chat(client: openai.AsyncOpenAI, model: str):
145-
response = await client.responses.create(model=model,
146-
input="What is the answer of 1+1?",
147-
max_output_tokens=1024)
143+
async def test_multi_turn_chat(client: openai.AsyncOpenAI, model: str,
144+
num_postprocess_workers: int):
145+
if num_postprocess_workers > 0:
146+
pytest.skip(
147+
"Response store is disabled when num_postprocess_workers > 0")
148+
149+
response = await client.responses.create(
150+
model=model,
151+
input=_get_qwen3_nothink_input(model, "What is the answer of 1+1?"),
152+
)
148153
check_reponse(response, "test_multi_turn_chat_1: ")
149154

150155
response_2 = await client.responses.create(
151156
model=model,
152-
input="What is the answer of previous question?",
157+
input=_get_qwen3_nothink_input(
158+
model, "What is the answer of previous question?"),
153159
previous_response_id=response.id,
154-
max_output_tokens=1024)
160+
)
155161
check_reponse(response_2, "test_multi_turn_chat_2: ")
156162

157163

158164
def get_current_weather(location: str, format: str = "celsius") -> dict:
159165
return {"sunny": True, "temperature": 20 if format == "celsius" else 68}
160166

161167

162-
@pytest.mark.skip(reason="https://nvbugs/5753250")
163168
@pytest.mark.asyncio(loop_scope="module")
164169
async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
165170
if model.startswith("DeepSeek-R1"):
@@ -186,10 +191,11 @@ async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
186191
}
187192
}
188193
messages = [{"role": "user", "content": "What is the weather like in SF?"}]
189-
response = await client.responses.create(model=model,
190-
input=messages,
191-
tools=[tool_get_current_weather],
192-
max_output_tokens=1024)
194+
response = await client.responses.create(
195+
model=model,
196+
input=messages,
197+
tools=[tool_get_current_weather],
198+
)
193199
messages.extend(response.output)
194200
function_call = check_tool_calling(response, True, "test_tool_calls: ")
195201

@@ -203,22 +209,22 @@ async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
203209
"output": json.dumps(answer),
204210
})
205211

206-
response = await client.responses.create(model=model,
207-
input=messages,
208-
tools=[tool_get_current_weather],
209-
max_output_tokens=1024)
212+
response = await client.responses.create(
213+
model=model,
214+
input=messages,
215+
tools=[tool_get_current_weather],
216+
)
210217

211218
check_tool_calling(response, False, "test_tool_calls: ")
212219

213220

214-
@pytest.mark.skip(reason="https://nvbugs/5753250")
215221
@pytest.mark.asyncio(loop_scope="module")
216222
async def test_streaming(client: openai.AsyncOpenAI, model: str):
217223
stream = await client.responses.create(
218224
model=model,
219225
input="Explain the theory of relativity in brief.",
220226
stream=True,
221-
max_output_tokens=1024)
227+
)
222228

223229
reasoning_deltas, message_deltas = list(), list()
224230
async for event in stream:
@@ -233,7 +239,6 @@ async def test_streaming(client: openai.AsyncOpenAI, model: str):
233239
assert full_reasoning_response
234240

235241

236-
@pytest.mark.skip(reason="https://nvbugs/5753250")
237242
@pytest.mark.asyncio(loop_scope="module")
238243
async def test_streaming_tool_call(client: openai.AsyncOpenAI, model: str):
239244
if model.startswith("DeepSeek-R1"):
@@ -260,11 +265,12 @@ async def test_streaming_tool_call(client: openai.AsyncOpenAI, model: str):
260265
}
261266
}
262267
messages = [{"role": "user", "content": "What is the weather like in SF?"}]
263-
stream = await client.responses.create(model=model,
264-
input=messages,
265-
tools=[tool_get_current_weather],
266-
stream=True,
267-
max_output_tokens=1024)
268+
stream = await client.responses.create(
269+
model=model,
270+
input=messages,
271+
tools=[tool_get_current_weather],
272+
stream=True,
273+
)
268274

269275
function_call = None
270276
reasoning_deltas = list()

0 commit comments

Comments
 (0)