1818import traceback
1919import platform
2020import sys
21+ import re
2122
2223from optillm .cot_decoding import cot_decode
2324from optillm .entropy_decoding import entropy_decode
2930logging .basicConfig (level = logging .INFO )
3031logger = logging .getLogger (__name__ )
3132
33+ def count_reasoning_tokens (text : str , tokenizer = None ) -> int :
34+ """
35+ Count tokens within <think>...</think> tags in the given text.
36+
37+ Args:
38+ text: The text to analyze
39+ tokenizer: Optional tokenizer instance for precise counting
40+
41+ Returns:
42+ Number of reasoning tokens (0 if no think tags found)
43+ """
44+ if not text or not isinstance (text , str ):
45+ return 0
46+
47+ # Extract all content within <think>...</think> tags
48+ think_pattern = r'<think>(.*?)</think>'
49+ matches = re .findall (think_pattern , text , re .DOTALL )
50+
51+ if not matches :
52+ return 0
53+
54+ # Combine all thinking content
55+ thinking_content = '' .join (matches )
56+
57+ if tokenizer and hasattr (tokenizer , 'encode' ):
58+ # Use tokenizer for precise counting
59+ try :
60+ tokens = tokenizer .encode (thinking_content )
61+ return len (tokens )
62+ except Exception as e :
63+ logger .warning (f"Failed to count tokens with tokenizer: { e } " )
64+
65+ # Fallback: rough estimation (4 chars per token on average)
66+ return max (0 , len (thinking_content .strip ()) // 4 )
67+
3268# MLX Support for Apple Silicon
3369try :
3470 import mlx .core as mx
@@ -1502,10 +1538,11 @@ def __init__(
15021538 self .message .logprobs = logprobs
15031539
15041540class ChatCompletionUsage :
1505- def __init__ (self , prompt_tokens : int , completion_tokens : int , total_tokens : int ):
1541+ def __init__ (self , prompt_tokens : int , completion_tokens : int , total_tokens : int , reasoning_tokens : int = 0 ):
15061542 self .prompt_tokens = prompt_tokens
15071543 self .completion_tokens = completion_tokens
15081544 self .total_tokens = total_tokens
1545+ self .reasoning_tokens = reasoning_tokens
15091546
15101547class ChatCompletion :
15111548 def __init__ (self , response_dict : Dict ):
@@ -1547,7 +1584,10 @@ def model_dump(self) -> Dict:
15471584 "usage" : {
15481585 "prompt_tokens" : self .usage .prompt_tokens ,
15491586 "completion_tokens" : self .usage .completion_tokens ,
1550- "total_tokens" : self .usage .total_tokens
1587+ "total_tokens" : self .usage .total_tokens ,
1588+ "completion_tokens_details" : {
1589+ "reasoning_tokens" : getattr (self .usage , 'reasoning_tokens' , 0 )
1590+ }
15511591 }
15521592 }
15531593
@@ -1766,15 +1806,15 @@ def create(
17661806
17671807 logger .debug (f"ThinkDeeper tokens: user={ user_max_tokens } , thinking={ max_thinking_tokens } , adjusted={ adjusted_max_tokens } " )
17681808
1769- result = thinkdeeper_decode_mlx (
1809+ result , reasoning_tokens = thinkdeeper_decode_mlx (
17701810 pipeline .model ,
17711811 pipeline .tokenizer ,
17721812 messages ,
17731813 thinkdeeper_config_with_tokens
17741814 )
17751815 else :
17761816 logger .info ("Using PyTorch ThinkDeeper implementation" )
1777- result = thinkdeeper_decode (
1817+ result , reasoning_tokens = thinkdeeper_decode (
17781818 pipeline .current_model ,
17791819 pipeline .tokenizer ,
17801820 messages ,
@@ -1850,6 +1890,11 @@ def create(
18501890 prompt_tokens = len (pipeline .tokenizer .encode (prompt ))
18511891 completion_tokens = sum (token_counts )
18521892
1893+ # Calculate reasoning tokens from all responses
1894+ total_reasoning_tokens = 0
1895+ for response in responses :
1896+ total_reasoning_tokens += count_reasoning_tokens (response , pipeline .tokenizer )
1897+
18531898 # Create OpenAI-compatible response format
18541899 response_dict = {
18551900 "id" : f"chatcmpl-{ int (time .time ()* 1000 )} " ,
@@ -1871,7 +1916,8 @@ def create(
18711916 "usage" : {
18721917 "prompt_tokens" : prompt_tokens ,
18731918 "completion_tokens" : completion_tokens ,
1874- "total_tokens" : completion_tokens + prompt_tokens
1919+ "total_tokens" : completion_tokens + prompt_tokens ,
1920+ "reasoning_tokens" : total_reasoning_tokens
18751921 }
18761922 }
18771923
0 commit comments