@@ -103,31 +103,138 @@ def test_streaming(client):
103103 assert len (content_chunks ) > 0
104104
105105
106+ def test_reasoning_tokens_in_response (client ):
107+ """Test that reasoning tokens are included in API responses"""
108+ response = client .chat .completions .create (
109+ model = "gpt-4o-mini" ,
110+ messages = [
111+ {"role" : "system" , "content" : "Think step by step and show your reasoning." },
112+ {"role" : "user" , "content" : "What is 15 × 23? Please think through this step by step." }
113+ ],
114+ max_tokens = 100
115+ )
116+
117+ # Check basic response structure
118+ assert hasattr (response , 'choices' )
119+ assert len (response .choices ) > 0
120+ assert hasattr (response , 'usage' )
121+
122+ # Check that completion_tokens_details exists and has reasoning_tokens
123+ assert hasattr (response .usage , 'completion_tokens_details' )
124+ assert hasattr (response .usage .completion_tokens_details , 'reasoning_tokens' )
125+
126+ # reasoning_tokens should be an integer >= 0
127+ reasoning_tokens = response .usage .completion_tokens_details .reasoning_tokens
128+ assert isinstance (reasoning_tokens , int )
129+ assert reasoning_tokens >= 0
130+
131+
132+ def test_reasoning_tokens_with_thinking_prompt (client ):
133+ """Test reasoning tokens with a prompt designed to trigger thinking"""
134+ response = client .chat .completions .create (
135+ model = "gpt-4o-mini" ,
136+ messages = [
137+ {"role" : "system" , "content" : "You are a helpful assistant. Use <think> tags to show your reasoning process." },
138+ {"role" : "user" , "content" : "I have 12 apples. I eat 3, give away 4, and buy 7 more. How many apples do I have now?" }
139+ ],
140+ max_tokens = 150
141+ )
142+
143+ # Basic checks
144+ assert hasattr (response , 'usage' )
145+ assert hasattr (response .usage , 'completion_tokens_details' )
146+ assert hasattr (response .usage .completion_tokens_details , 'reasoning_tokens' )
147+
148+ reasoning_tokens = response .usage .completion_tokens_details .reasoning_tokens
149+ assert isinstance (reasoning_tokens , int )
150+ assert reasoning_tokens >= 0
151+
152+ # If the model used thinking tags, reasoning_tokens should be > 0
153+ # (This depends on the model's response, so we just check the structure)
154+
155+
156+ def test_reasoning_tokens_with_multiple_responses (client ):
157+ """Test reasoning tokens with n > 1"""
158+ response = client .chat .completions .create (
159+ model = "gpt-4o-mini" ,
160+ messages = [
161+ {"role" : "user" , "content" : "Think about this: What's 2+2?" }
162+ ],
163+ n = 2 ,
164+ max_tokens = 50
165+ )
166+
167+ # Should have 2 choices
168+ assert len (response .choices ) == 2
169+
170+ # Should have reasoning token information
171+ assert hasattr (response .usage , 'completion_tokens_details' )
172+ assert hasattr (response .usage .completion_tokens_details , 'reasoning_tokens' )
173+
174+ reasoning_tokens = response .usage .completion_tokens_details .reasoning_tokens
175+ assert isinstance (reasoning_tokens , int )
176+ assert reasoning_tokens >= 0
177+
178+
179+ def test_reasoning_tokens_backward_compatibility (client ):
180+ """Test that responses without thinking still work normally"""
181+ response = client .chat .completions .create (
182+ model = "gpt-4o-mini" ,
183+ messages = [
184+ {"role" : "user" , "content" : "Say hello" }
185+ ],
186+ max_tokens = 10
187+ )
188+
189+ # Should still have reasoning token structure, but with 0 tokens
190+ assert hasattr (response .usage , 'completion_tokens_details' )
191+ assert hasattr (response .usage .completion_tokens_details , 'reasoning_tokens' )
192+
193+ reasoning_tokens = response .usage .completion_tokens_details .reasoning_tokens
194+ assert isinstance (reasoning_tokens , int )
195+ assert reasoning_tokens >= 0 # Usually 0 for simple responses
196+
197+
106198if __name__ == "__main__" :
107199 # Run basic tests if pytest not available
108200 client = OpenAI (
109201 api_key = os .environ .get ("OPENAI_API_KEY" , "test-key" ),
110202 base_url = "http://localhost:8000/v1"
111203 )
112204
113- print ("Running basic API compatibility tests..." )
205+ print ("Running API compatibility tests..." )
206+
207+ tests = [
208+ ("Basic completion" , test_basic_completion ),
209+ ("N parameter" , test_n_parameter ),
210+ ("Approach prefix" , test_approach_prefix ),
211+ ("Extra body approach" , test_extra_body_approach ),
212+ ("Streaming" , test_streaming ),
213+ ("Reasoning tokens in response" , test_reasoning_tokens_in_response ),
214+ ("Reasoning tokens with thinking prompt" , test_reasoning_tokens_with_thinking_prompt ),
215+ ("Reasoning tokens with multiple responses" , test_reasoning_tokens_with_multiple_responses ),
216+ ("Reasoning tokens backward compatibility" , test_reasoning_tokens_backward_compatibility ),
217+ ]
114218
115- try :
116- test_basic_completion (client )
117- print ("✅ Basic completion test passed" )
118- except Exception as e :
119- print (f"❌ Basic completion test failed: { e } " )
219+ passed = 0
220+ failed = 0
120221
121- try :
122- test_n_parameter (client )
123- print ("✅ N parameter test passed" )
124- except Exception as e :
125- print (f"❌ N parameter test failed: { e } " )
222+ for test_name , test_func in tests :
223+ try :
224+ print (f"Running { test_name } ..." , end = ' ' )
225+ test_func (client )
226+ print ("✅ PASSED" )
227+ passed += 1
228+ except Exception as e :
229+ print (f"❌ FAILED: { e } " )
230+ failed += 1
126231
127- try :
128- test_approach_prefix (client )
129- print ("✅ Approach prefix test passed" )
130- except Exception as e :
131- print (f"❌ Approach prefix test failed: { e } " )
232+ print (f"\n === Test Summary ===" )
233+ print (f"Passed: { passed } " )
234+ print (f"Failed: { failed } " )
235+ print (f"Total: { passed + failed } " )
132236
133- print ("\n Done!" )
237+ if failed == 0 :
238+ print ("🎉 All tests passed!" )
239+ else :
240+ print (f"⚠️ { failed } test(s) failed." )
0 commit comments