[https://nvbugs/5753250][fix] Fix undefined local variable in responses utils (#10154)

JunyiXu-nv · web-flow · commit 55bc6a5ff8d1 · 2025-12-28T06:59:32.000+08:00
Signed-off-by: Junyi Xu &lt;219237550+JunyiXu-nv@users.noreply.github.com&gt;
Signed-off-by: JunyiXu-nv &lt;219237550+JunyiXu-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/serve/responses_utils.py b/tensorrt_llm/serve/responses_utils.py
@@ -974,6 +974,7 @@ def _create_output_content(
     available_tools = _get_chat_completion_function_tools(tools)
 
     for output in final_res.outputs:
+        calls = []
         text, reasoning_text = _apply_reasoning_parser(reasoning_parser,
                                                        output.index,
                                                        output.text, False)
diff --git a/tests/unittest/llmapi/apps/_test_openai_responses.py b/tests/unittest/llmapi/apps/_test_openai_responses.py
@@ -51,6 +51,7 @@ def client(server: RemoteOpenAIServer):
 
 
 def check_reponse(response, prefix=""):
+    print(f"response: {response}")
     reasoning_exist, message_exist = False, False
     for output in response.output:
         if output.type == "reasoning":
@@ -63,6 +64,7 @@ def check_reponse(response, prefix=""):
 
 
 def check_tool_calling(response, first_resp=True, prefix=""):
+    print(f"response: {response}")
     reasoning_exist, tool_call_exist, message_exist = False, False, False
     reasoning_content, message_content = "", ""
     function_call = None
@@ -90,18 +92,20 @@ def check_tool_calling(response, first_resp=True, prefix=""):
         assert not tool_call_exist, f"{err_msg} tool call content should not exist! ({function_call})"
 
 
-@pytest.mark.skip(reason="https://nvbugs/5753250")
+def _get_qwen3_nothink_input(model: str, input: str):
+    return f"{input} /no_think" if model.startswith("Qwen3") else input
+
+
 @pytest.mark.asyncio(loop_scope="module")
 async def test_reasoning(client: openai.AsyncOpenAI, model: str):
     response = await client.responses.create(
         model=model,
         input="Which one is larger as numeric, 9.9 or 9.11?",
-        max_output_tokens=1024)
+    )
 
     check_reponse(response, "test_reasoning: ")
 
 
-@pytest.mark.skip(reason="https://nvbugs/5753250")
 @pytest.mark.asyncio(loop_scope="module")
 async def test_reasoning_effort(client: openai.AsyncOpenAI, model: str):
     for effort in ["low", "medium", "high"]:
@@ -110,56 +114,57 @@ async def test_reasoning_effort(client: openai.AsyncOpenAI, model: str):
             instructions="Use less than 1024 tokens for the whole response",
             input="Which one is larger as numeric, 9.9 or 9.11?",
             reasoning={"effort": effort},
-            max_output_tokens=1024)
+        )
         check_reponse(response, f"test_reasoning_effort_{effort}: ")
 
 
-@pytest.mark.skip(reason="https://nvbugs/5753250")
 @pytest.mark.asyncio(loop_scope="module")
 async def test_chat(client: openai.AsyncOpenAI, model: str):
-    response = await client.responses.create(model=model,
-                                             input=[{
-                                                 "role":
-                                                 "developer",
-                                                 "content":
-                                                 "Respond in Chinese."
-                                             }, {
-                                                 "role": "user",
-                                                 "content": "Hello!"
-                                             }, {
-                                                 "role":
-                                                 "assistant",
-                                                 "content":
-                                                 "Hello! How can I help you?"
-                                             }, {
-                                                 "role": "user",
-                                                 "content": "Tell me a joke."
-                                             }],
-                                             max_output_tokens=1024)
+    response = await client.responses.create(
+        model=model,
+        input=[{
+            "role": "developer",
+            "content": "Respond in Chinese."
+        }, {
+            "role": "user",
+            "content": "Hello!"
+        }, {
+            "role": "assistant",
+            "content": "Hello! How can I help you?"
+        }, {
+            "role": "user",
+            "content": "Tell me a joke."
+        }],
+    )
     check_reponse(response, "test_chat: ")
 
 
-@pytest.mark.skip(reason="https://nvbugs/5753250")
 @pytest.mark.asyncio(loop_scope="module")
-async def test_multi_turn_chat(client: openai.AsyncOpenAI, model: str):
-    response = await client.responses.create(model=model,
-                                             input="What is the answer of 1+1?",
-                                             max_output_tokens=1024)
+async def test_multi_turn_chat(client: openai.AsyncOpenAI, model: str,
+                               num_postprocess_workers: int):
+    if num_postprocess_workers > 0:
+        pytest.skip(
+            "Response store is disabled when num_postprocess_workers > 0")
+
+    response = await client.responses.create(
+        model=model,
+        input=_get_qwen3_nothink_input(model, "What is the answer of 1+1?"),
+    )
     check_reponse(response, "test_multi_turn_chat_1: ")
 
     response_2 = await client.responses.create(
         model=model,
-        input="What is the answer of previous question?",
+        input=_get_qwen3_nothink_input(
+            model, "What is the answer of previous question?"),
         previous_response_id=response.id,
-        max_output_tokens=1024)
+    )
     check_reponse(response_2, "test_multi_turn_chat_2: ")
 
 
 def get_current_weather(location: str, format: str = "celsius") -> dict:
     return {"sunny": True, "temperature": 20 if format == "celsius" else 68}
 
 
-@pytest.mark.skip(reason="https://nvbugs/5753250")
 @pytest.mark.asyncio(loop_scope="module")
 async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
     if model.startswith("DeepSeek-R1"):
@@ -186,10 +191,11 @@ async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
         }
     }
     messages = [{"role": "user", "content": "What is the weather like in SF?"}]
-    response = await client.responses.create(model=model,
-                                             input=messages,
-                                             tools=[tool_get_current_weather],
-                                             max_output_tokens=1024)
+    response = await client.responses.create(
+        model=model,
+        input=messages,
+        tools=[tool_get_current_weather],
+    )
     messages.extend(response.output)
     function_call = check_tool_calling(response, True, "test_tool_calls: ")
 
@@ -203,22 +209,22 @@ async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
         "output": json.dumps(answer),
     })
 
-    response = await client.responses.create(model=model,
-                                             input=messages,
-                                             tools=[tool_get_current_weather],
-                                             max_output_tokens=1024)
+    response = await client.responses.create(
+        model=model,
+        input=messages,
+        tools=[tool_get_current_weather],
+    )
 
     check_tool_calling(response, False, "test_tool_calls: ")
 
 
-@pytest.mark.skip(reason="https://nvbugs/5753250")
 @pytest.mark.asyncio(loop_scope="module")
 async def test_streaming(client: openai.AsyncOpenAI, model: str):
     stream = await client.responses.create(
         model=model,
         input="Explain the theory of relativity in brief.",
         stream=True,
-        max_output_tokens=1024)
+    )
 
     reasoning_deltas, message_deltas = list(), list()
     async for event in stream:
@@ -233,7 +239,6 @@ async def test_streaming(client: openai.AsyncOpenAI, model: str):
     assert full_reasoning_response
 
 
-@pytest.mark.skip(reason="https://nvbugs/5753250")
 @pytest.mark.asyncio(loop_scope="module")
 async def test_streaming_tool_call(client: openai.AsyncOpenAI, model: str):
     if model.startswith("DeepSeek-R1"):
@@ -260,11 +265,12 @@ async def test_streaming_tool_call(client: openai.AsyncOpenAI, model: str):
         }
     }
     messages = [{"role": "user", "content": "What is the weather like in SF?"}]
-    stream = await client.responses.create(model=model,
-                                           input=messages,
-                                           tools=[tool_get_current_weather],
-                                           stream=True,
-                                           max_output_tokens=1024)
+    stream = await client.responses.create(
+        model=model,
+        input=messages,
+        tools=[tool_get_current_weather],
+        stream=True,
+    )
 
     function_call = None
     reasoning_deltas = list()