@@ -51,6 +51,7 @@ def client(server: RemoteOpenAIServer):
5151
5252
5353def check_reponse (response , prefix = "" ):
54+ print (f"response: { response } " )
5455 reasoning_exist , message_exist = False , False
5556 for output in response .output :
5657 if output .type == "reasoning" :
@@ -63,6 +64,7 @@ def check_reponse(response, prefix=""):
6364
6465
6566def check_tool_calling (response , first_resp = True , prefix = "" ):
67+ print (f"response: { response } " )
6668 reasoning_exist , tool_call_exist , message_exist = False , False , False
6769 reasoning_content , message_content = "" , ""
6870 function_call = None
@@ -90,18 +92,20 @@ def check_tool_calling(response, first_resp=True, prefix=""):
9092 assert not tool_call_exist , f"{ err_msg } tool call content should not exist! ({ function_call } )"
9193
9294
93- @pytest .mark .skip (reason = "https://nvbugs/5753250" )
95+ def _get_qwen3_nothink_input (model : str , input : str ):
96+ return f"{ input } /no_think" if model .startswith ("Qwen3" ) else input
97+
98+
9499@pytest .mark .asyncio (loop_scope = "module" )
95100async def test_reasoning (client : openai .AsyncOpenAI , model : str ):
96101 response = await client .responses .create (
97102 model = model ,
98103 input = "Which one is larger as numeric, 9.9 or 9.11?" ,
99- max_output_tokens = 1024 )
104+ )
100105
101106 check_reponse (response , "test_reasoning: " )
102107
103108
104- @pytest .mark .skip (reason = "https://nvbugs/5753250" )
105109@pytest .mark .asyncio (loop_scope = "module" )
106110async def test_reasoning_effort (client : openai .AsyncOpenAI , model : str ):
107111 for effort in ["low" , "medium" , "high" ]:
@@ -110,56 +114,57 @@ async def test_reasoning_effort(client: openai.AsyncOpenAI, model: str):
110114 instructions = "Use less than 1024 tokens for the whole response" ,
111115 input = "Which one is larger as numeric, 9.9 or 9.11?" ,
112116 reasoning = {"effort" : effort },
113- max_output_tokens = 1024 )
117+ )
114118 check_reponse (response , f"test_reasoning_effort_{ effort } : " )
115119
116120
117- @pytest .mark .skip (reason = "https://nvbugs/5753250" )
118121@pytest .mark .asyncio (loop_scope = "module" )
119122async def test_chat (client : openai .AsyncOpenAI , model : str ):
120- response = await client .responses .create (model = model ,
121- input = [{
122- "role" :
123- "developer" ,
124- "content" :
125- "Respond in Chinese."
126- }, {
127- "role" : "user" ,
128- "content" : "Hello!"
129- }, {
130- "role" :
131- "assistant" ,
132- "content" :
133- "Hello! How can I help you?"
134- }, {
135- "role" : "user" ,
136- "content" : "Tell me a joke."
137- }],
138- max_output_tokens = 1024 )
123+ response = await client .responses .create (
124+ model = model ,
125+ input = [{
126+ "role" : "developer" ,
127+ "content" : "Respond in Chinese."
128+ }, {
129+ "role" : "user" ,
130+ "content" : "Hello!"
131+ }, {
132+ "role" : "assistant" ,
133+ "content" : "Hello! How can I help you?"
134+ }, {
135+ "role" : "user" ,
136+ "content" : "Tell me a joke."
137+ }],
138+ )
139139 check_reponse (response , "test_chat: " )
140140
141141
142- @pytest .mark .skip (reason = "https://nvbugs/5753250" )
143142@pytest .mark .asyncio (loop_scope = "module" )
144- async def test_multi_turn_chat (client : openai .AsyncOpenAI , model : str ):
145- response = await client .responses .create (model = model ,
146- input = "What is the answer of 1+1?" ,
147- max_output_tokens = 1024 )
143+ async def test_multi_turn_chat (client : openai .AsyncOpenAI , model : str ,
144+ num_postprocess_workers : int ):
145+ if num_postprocess_workers > 0 :
146+ pytest .skip (
147+ "Response store is disabled when num_postprocess_workers > 0" )
148+
149+ response = await client .responses .create (
150+ model = model ,
151+ input = _get_qwen3_nothink_input (model , "What is the answer of 1+1?" ),
152+ )
148153 check_reponse (response , "test_multi_turn_chat_1: " )
149154
150155 response_2 = await client .responses .create (
151156 model = model ,
152- input = "What is the answer of previous question?" ,
157+ input = _get_qwen3_nothink_input (
158+ model , "What is the answer of previous question?" ),
153159 previous_response_id = response .id ,
154- max_output_tokens = 1024 )
160+ )
155161 check_reponse (response_2 , "test_multi_turn_chat_2: " )
156162
157163
158164def get_current_weather (location : str , format : str = "celsius" ) -> dict :
159165 return {"sunny" : True , "temperature" : 20 if format == "celsius" else 68 }
160166
161167
162- @pytest .mark .skip (reason = "https://nvbugs/5753250" )
163168@pytest .mark .asyncio (loop_scope = "module" )
164169async def test_tool_calls (client : openai .AsyncOpenAI , model : str ):
165170 if model .startswith ("DeepSeek-R1" ):
@@ -186,10 +191,11 @@ async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
186191 }
187192 }
188193 messages = [{"role" : "user" , "content" : "What is the weather like in SF?" }]
189- response = await client .responses .create (model = model ,
190- input = messages ,
191- tools = [tool_get_current_weather ],
192- max_output_tokens = 1024 )
194+ response = await client .responses .create (
195+ model = model ,
196+ input = messages ,
197+ tools = [tool_get_current_weather ],
198+ )
193199 messages .extend (response .output )
194200 function_call = check_tool_calling (response , True , "test_tool_calls: " )
195201
@@ -203,22 +209,22 @@ async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
203209 "output" : json .dumps (answer ),
204210 })
205211
206- response = await client .responses .create (model = model ,
207- input = messages ,
208- tools = [tool_get_current_weather ],
209- max_output_tokens = 1024 )
212+ response = await client .responses .create (
213+ model = model ,
214+ input = messages ,
215+ tools = [tool_get_current_weather ],
216+ )
210217
211218 check_tool_calling (response , False , "test_tool_calls: " )
212219
213220
214- @pytest .mark .skip (reason = "https://nvbugs/5753250" )
215221@pytest .mark .asyncio (loop_scope = "module" )
216222async def test_streaming (client : openai .AsyncOpenAI , model : str ):
217223 stream = await client .responses .create (
218224 model = model ,
219225 input = "Explain the theory of relativity in brief." ,
220226 stream = True ,
221- max_output_tokens = 1024 )
227+ )
222228
223229 reasoning_deltas , message_deltas = list (), list ()
224230 async for event in stream :
@@ -233,7 +239,6 @@ async def test_streaming(client: openai.AsyncOpenAI, model: str):
233239 assert full_reasoning_response
234240
235241
236- @pytest .mark .skip (reason = "https://nvbugs/5753250" )
237242@pytest .mark .asyncio (loop_scope = "module" )
238243async def test_streaming_tool_call (client : openai .AsyncOpenAI , model : str ):
239244 if model .startswith ("DeepSeek-R1" ):
@@ -260,11 +265,12 @@ async def test_streaming_tool_call(client: openai.AsyncOpenAI, model: str):
260265 }
261266 }
262267 messages = [{"role" : "user" , "content" : "What is the weather like in SF?" }]
263- stream = await client .responses .create (model = model ,
264- input = messages ,
265- tools = [tool_get_current_weather ],
266- stream = True ,
267- max_output_tokens = 1024 )
268+ stream = await client .responses .create (
269+ model = model ,
270+ input = messages ,
271+ tools = [tool_get_current_weather ],
272+ stream = True ,
273+ )
268274
269275 function_call = None
270276 reasoning_deltas = list ()
0 commit comments