@@ -18,16 +18,37 @@ def _process_thinking_tokens(
1818) -> bool :
1919 """
2020 Process tokens to filter out thinking content between <think> and </think> tags.
21+ Handles cases where providers only send a closing tag or mix reasoning_content.
2122 """
22- if is_thinking :
23- return THINK_END_PATTERN not in new_token
23+ # Check for end tag first, as it might appear in the same token as start tag
24+ if THINK_END_PATTERN in new_token :
25+ # If we were never in think mode, treat everything accumulated so far as reasoning and clear it
26+ if not is_thinking :
27+ token_join .clear ()
28+ if callback :
29+ callback ("" ) # clear any previously streamed reasoning content
30+
31+ # Exit thinking mode and only keep content after </think>
32+ _ , _ , after_end = new_token .partition (THINK_END_PATTERN )
33+ is_thinking = False
34+ new_token = after_end
35+ # Continue processing the remaining content in this token
2436
37+ # Check for start tag (after processing end tag, in case both are in the same token)
2538 if THINK_START_PATTERN in new_token :
39+ # Drop any content before <think> and switch to thinking mode
40+ _ , _ , after_start = new_token .partition (THINK_START_PATTERN )
41+ new_token = after_start
42+ is_thinking = True
43+
44+ if is_thinking :
45+ # Still inside thinking content; ignore until we exit
2646 return True
2747
28- token_join .append (new_token )
29- if callback :
30- callback ("" .join (token_join ))
48+ if new_token :
49+ token_join .append (new_token )
50+ if callback :
51+ callback ("" .join (token_join ))
3152
3253 return False
3354
@@ -46,8 +67,8 @@ def call_llm_for_system_prompt(
4667
4768 llm = OpenAIModel (
4869 model_id = get_model_name_from_config (llm_model_config ) if llm_model_config else "" ,
49- api_base = llm_model_config .get ("base_url" , "" ),
50- api_key = llm_model_config .get ("api_key" , "" ),
70+ api_base = llm_model_config .get ("base_url" , "" ) if llm_model_config else "" ,
71+ api_key = llm_model_config .get ("api_key" , "" ) if llm_model_config else "" ,
5172 temperature = 0.3 ,
5273 top_p = 0.95 ,
5374 )
@@ -65,16 +86,38 @@ def call_llm_for_system_prompt(
6586 current_request = llm .client .chat .completions .create (stream = True , ** completion_kwargs )
6687 token_join : List [str ] = []
6788 is_thinking = False
89+ reasoning_content_seen = False
90+ content_tokens_seen = 0
6891 for chunk in current_request :
69- new_token = chunk .choices [0 ].delta .content
92+ delta = chunk .choices [0 ].delta
93+ reasoning_content = getattr (delta , "reasoning_content" , None )
94+ new_token = delta .content
95+
96+ # Note: reasoning_content is separate metadata and doesn't affect content filtering
97+ # We only filter content based on <think> tags in delta.content
98+ if reasoning_content :
99+ reasoning_content_seen = True
100+ logger .debug ("Received reasoning_content (metadata only, not filtering content)" )
101+
102+ # Process content token if it exists
70103 if new_token is not None :
104+ content_tokens_seen += 1
71105 is_thinking = _process_thinking_tokens (
72106 new_token ,
73107 is_thinking ,
74108 token_join ,
75109 callback ,
76110 )
77- return "" .join (token_join )
111+
112+ result = "" .join (token_join )
113+ if not result and content_tokens_seen > 0 :
114+ logger .warning (
115+ "Generated prompt is empty but %d content tokens were processed. "
116+ "This suggests all content was filtered out." ,
117+ content_tokens_seen
118+ )
119+
120+ return result
78121 except Exception as exc :
79122 logger .error ("Failed to generate prompt from LLM: %s" , str (exc ))
80123 raise
0 commit comments