1+ #!/usr/bin/env python3
2+ """
3+ Integration tests for reasoning token functionality with OptILLM API
4+ """
5+
6+ import pytest
7+ import sys
8+ import os
9+ import json
10+ from unittest .mock import Mock , MagicMock , patch
11+ from typing import Dict , Any
12+
13+ # Add parent directory to path to import optillm modules
14+ sys .path .append (os .path .dirname (os .path .dirname (os .path .abspath (__file__ ))))
15+
16+ from optillm import app , count_reasoning_tokens
17+
18+
19+ class MockOpenAIClient :
20+ """Enhanced mock client that can generate responses with think tags"""
21+
22+ def __init__ (self , include_thinking = True ):
23+ self .include_thinking = include_thinking
24+ self .chat = self .Chat (include_thinking )
25+
26+ class Chat :
27+ def __init__ (self , include_thinking ):
28+ self .completions = self .Completions (include_thinking )
29+ self .include_thinking = include_thinking
30+
31+ class Completions :
32+ def __init__ (self , include_thinking ):
33+ self .include_thinking = include_thinking
34+
35+ def create (self , ** kwargs ):
36+ messages = kwargs .get ('messages' , [])
37+ n = kwargs .get ('n' , 1 )
38+
39+ # Generate response based on the query content
40+ if self .include_thinking and any ('think' in str (msg ).lower () for msg in messages ):
41+ # Generate response with thinking
42+ content = "<think>Let me work through this step by step. First, I need to understand what's being asked. This requires careful analysis.</think>\n \n Based on my analysis, the answer is 42."
43+ else :
44+ # Simple response without thinking
45+ content = "The answer is 42."
46+
47+ class MockChoice :
48+ def __init__ (self , content , index = 0 ):
49+ self .message = type ('Message' , (), {'content' : content })()
50+ self .index = index
51+ self .finish_reason = 'stop'
52+
53+ class MockUsage :
54+ completion_tokens = 50
55+ total_tokens = 75
56+ prompt_tokens = 25
57+
58+ class MockResponse :
59+ def __init__ (self , choices , usage ):
60+ self .choices = choices
61+ self .usage = usage
62+
63+ def model_dump (self ):
64+ return {
65+ 'choices' : [
66+ {
67+ 'index' : choice .index ,
68+ 'message' : {'content' : choice .message .content },
69+ 'finish_reason' : choice .finish_reason
70+ } for choice in self .choices
71+ ],
72+ 'usage' : {
73+ 'completion_tokens' : self .usage .completion_tokens ,
74+ 'total_tokens' : self .usage .total_tokens ,
75+ 'prompt_tokens' : self .usage .prompt_tokens
76+ }
77+ }
78+
79+ # Create multiple choices if n > 1
80+ choices = []
81+ for i in range (n ):
82+ if self .include_thinking :
83+ varied_content = f"<think>Thinking process { i + 1 } : Let me analyze this carefully...</think>\n \n Answer { i + 1 } : The result is { 42 + i } ."
84+ else :
85+ varied_content = f"Answer { i + 1 } : The result is { 42 + i } ."
86+ choices .append (MockChoice (varied_content , i ))
87+
88+ return MockResponse (choices , MockUsage ())
89+
90+
91+ class TestReasoningTokensAPIIntegration :
92+ """Test reasoning tokens in API responses"""
93+
94+ def setup_method (self ):
95+ """Setup test client"""
96+ app .config ['TESTING' ] = True
97+ self .client = app .test_client ()
98+
99+ # Mock the get_config function to return our mock client
100+ self .mock_client = MockOpenAIClient (include_thinking = True )
101+
102+ @patch ('optillm.get_config' )
103+ def test_api_response_includes_reasoning_tokens (self , mock_get_config ):
104+ """Test that API responses include reasoning_tokens in completion_tokens_details"""
105+ mock_get_config .return_value = (self .mock_client , "test-key" )
106+
107+ # Test request with none approach (direct proxy)
108+ response = self .client .post ('/v1/chat/completions' ,
109+ json = {
110+ 'model' : 'none-gpt-4o-mini' ,
111+ 'messages' : [
112+ {'role' : 'user' , 'content' : 'Please think about this problem step by step.' }
113+ ]
114+ },
115+ headers = {'Authorization' : 'Bearer test-key' })
116+
117+ assert response .status_code == 200
118+ data = response .get_json ()
119+
120+ # Check response structure
121+ assert 'usage' in data
122+ assert 'completion_tokens_details' in data ['usage' ]
123+ assert 'reasoning_tokens' in data ['usage' ]['completion_tokens_details' ]
124+
125+ # Should have reasoning tokens since mock returns thinking content
126+ reasoning_tokens = data ['usage' ]['completion_tokens_details' ]['reasoning_tokens' ]
127+ assert reasoning_tokens > 0
128+
129+ @patch ('optillm.get_config' )
130+ def test_api_response_no_reasoning_tokens (self , mock_get_config ):
131+ """Test that responses without think tags have 0 reasoning tokens"""
132+ mock_client_no_thinking = MockOpenAIClient (include_thinking = False )
133+ mock_get_config .return_value = (mock_client_no_thinking , "test-key" )
134+
135+ response = self .client .post ('/v1/chat/completions' ,
136+ json = {
137+ 'model' : 'none-gpt-4o-mini' ,
138+ 'messages' : [
139+ {'role' : 'user' , 'content' : 'What is 2+2?' }
140+ ]
141+ },
142+ headers = {'Authorization' : 'Bearer test-key' })
143+
144+ assert response .status_code == 200
145+ data = response .get_json ()
146+
147+ # Should have 0 reasoning tokens
148+ reasoning_tokens = data ['usage' ]['completion_tokens_details' ]['reasoning_tokens' ]
149+ assert reasoning_tokens == 0
150+
151+ @patch ('optillm.get_config' )
152+ def test_multiple_responses_reasoning_tokens (self , mock_get_config ):
153+ """Test reasoning tokens calculation with n > 1"""
154+ mock_get_config .return_value = (self .mock_client , "test-key" )
155+
156+ response = self .client .post ('/v1/chat/completions' ,
157+ json = {
158+ 'model' : 'none-gpt-4o-mini' ,
159+ 'messages' : [
160+ {'role' : 'user' , 'content' : 'Think through this problem.' }
161+ ],
162+ 'n' : 3
163+ },
164+ headers = {'Authorization' : 'Bearer test-key' })
165+
166+ assert response .status_code == 200
167+ data = response .get_json ()
168+
169+ # Should have 3 choices
170+ assert len (data ['choices' ]) == 3
171+
172+ # Should sum reasoning tokens from all responses
173+ reasoning_tokens = data ['usage' ]['completion_tokens_details' ]['reasoning_tokens' ]
174+ assert reasoning_tokens > 0
175+
176+ # Each response should have thinking content, so total should be > individual
177+ # (This is a rough check since we're mocking)
178+ assert reasoning_tokens >= 10 # Reasonable minimum
179+
180+ def test_reasoning_tokens_calculation_accuracy (self ):
181+ """Test that reasoning token calculation is accurate"""
182+ # Test direct function with known content
183+ test_content = "<think>This is a test thinking block with exactly twenty words to verify the token counting accuracy works properly.</think>Result: 42"
184+
185+ expected_thinking = "This is a test thinking block with exactly twenty words to verify the token counting accuracy works properly."
186+ tokens = count_reasoning_tokens (test_content )
187+
188+ # With fallback estimation (4 chars per token)
189+ expected_tokens = len (expected_thinking ) // 4
190+ assert tokens == expected_tokens
191+
192+ @patch ('optillm.get_config' )
193+ def test_error_handling_invalid_response (self , mock_get_config ):
194+ """Test error handling when response processing fails"""
195+ # Mock client that returns malformed response
196+ mock_client = Mock ()
197+ mock_client .chat .completions .create .side_effect = Exception ("API Error" )
198+ mock_get_config .return_value = (mock_client , "test-key" )
199+
200+ response = self .client .post ('/v1/chat/completions' ,
201+ json = {
202+ 'model' : 'none-gpt-4o-mini' ,
203+ 'messages' : [{'role' : 'user' , 'content' : 'test' }]
204+ },
205+ headers = {'Authorization' : 'Bearer test-key' })
206+
207+ assert response .status_code == 500
208+ data = response .get_json ()
209+ assert 'error' in data
210+
211+
212+ class TestApproachIntegration :
213+ """Test reasoning tokens with different OptILLM approaches"""
214+
215+ def setup_method (self ):
216+ """Setup test client"""
217+ app .config ['TESTING' ] = True
218+ self .client = app .test_client ()
219+
220+ def test_reasoning_tokens_with_mock_approach (self ):
221+ """Test reasoning tokens with a mock approach that generates thinking"""
222+
223+ # Create a simple test that doesn't require external API calls
224+ test_text_with_thinking = """
225+ <think>
226+ I need to analyze this problem step by step:
227+ 1. First, understand the requirements
228+ 2. Then, consider the constraints
229+ 3. Finally, provide a solution
230+
231+ This seems straightforward but requires careful thought.
232+ </think>
233+
234+ Based on my analysis, the answer is: 42
235+ """
236+
237+ # Test the reasoning token extraction directly
238+ reasoning_tokens = count_reasoning_tokens (test_text_with_thinking )
239+ assert reasoning_tokens > 0
240+
241+ # The thinking content should be properly extracted
242+ thinking_content = """
243+ I need to analyze this problem step by step:
244+ 1. First, understand the requirements
245+ 2. Then, consider the constraints
246+ 3. Finally, provide a solution
247+
248+ This seems straightforward but requires careful thought.
249+ """
250+
251+ # Rough token estimate (fallback method)
252+ expected_tokens = len (thinking_content .strip ()) // 4
253+ assert abs (reasoning_tokens - expected_tokens ) <= 5 # Allow small variance
254+
255+ def test_complex_thinking_patterns (self ):
256+ """Test various thinking patterns that approaches might generate"""
257+
258+ test_cases = [
259+ # Single block
260+ "<think>Simple thinking</think>Answer: Yes" ,
261+
262+ # Multiple blocks
263+ "<think>First thought</think>Intermediate result<think>Second thought</think>Final answer" ,
264+
265+ # Nested structure (should extract outer)
266+ "<think>Outer<think>inner</think>more outer</think>Result" ,
267+
268+ # With code blocks inside thinking
269+ "<think>Let me write some code:\n ```python\n x = 1\n ```\n That should work.</think>Code solution provided" ,
270+
271+ # With mathematical notation
272+ "<think>If x = 2, then x² = 4, so the equation becomes 4 + 3 = 7</think>The result is 7"
273+ ]
274+
275+ for i , test_case in enumerate (test_cases ):
276+ tokens = count_reasoning_tokens (test_case )
277+ assert tokens > 0 , f"Test case { i + 1 } should have reasoning tokens: { test_case } "
278+
279+ def test_backward_compatibility (self ):
280+ """Test that non-thinking responses work normally"""
281+ normal_responses = [
282+ "This is a normal response without any thinking." ,
283+ "The answer is 42." ,
284+ "I can help you with that. Here's the solution: x = 5" ,
285+ "" , # Empty response
286+ ]
287+
288+ for response in normal_responses :
289+ tokens = count_reasoning_tokens (response )
290+ assert tokens == 0 , f"Normal response should have 0 reasoning tokens: { response } "
291+
292+
293+ class TestStreamingIntegration :
294+ """Test reasoning tokens with streaming responses"""
295+
296+ def setup_method (self ):
297+ """Setup test client"""
298+ app .config ['TESTING' ] = True
299+ self .client = app .test_client ()
300+
301+ @patch ('optillm.get_config' )
302+ def test_streaming_response_format (self , mock_get_config ):
303+ """Test that streaming responses don't break with reasoning tokens"""
304+ mock_client = MockOpenAIClient (include_thinking = True )
305+ mock_get_config .return_value = (mock_client , "test-key" )
306+
307+ # Note: Streaming responses in OptILLM don't include reasoning token details
308+ # in the same way as non-streaming, but we test that it doesn't break
309+ response = self .client .post ('/v1/chat/completions' ,
310+ json = {
311+ 'model' : 'none-gpt-4o-mini' ,
312+ 'messages' : [
313+ {'role' : 'user' , 'content' : 'Think about this' }
314+ ],
315+ 'stream' : True
316+ },
317+ headers = {'Authorization' : 'Bearer test-key' })
318+
319+ # Streaming should work without errors
320+ assert response .status_code == 200
321+ assert response .content_type == 'text/event-stream; charset=utf-8'
322+
323+
324+ if __name__ == "__main__" :
325+ # Run tests if pytest not available
326+ import traceback
327+
328+ test_classes = [
329+ TestReasoningTokensAPIIntegration ,
330+ TestApproachIntegration ,
331+ TestStreamingIntegration
332+ ]
333+
334+ for test_class in test_classes :
335+ print (f"\n === Running { test_class .__name__ } ===" )
336+ instance = test_class ()
337+ instance .setup_method ()
338+
339+ for method_name in dir (instance ):
340+ if method_name .startswith ('test_' ):
341+ try :
342+ print (f"Running { method_name } ..." , end = ' ' )
343+ getattr (instance , method_name )()
344+ print ("✅ PASSED" )
345+ except Exception as e :
346+ print (f"❌ FAILED: { e } " )
347+ traceback .print_exc ()
348+
349+ print ("\n === Integration Tests Complete ===" )
0 commit comments