@@ -339,43 +339,47 @@ def __call__(self, messages: list[dict], n_samples: int = 1, temperature: float
339339 def _extract_thinking_content_from_response (self , response , wrap_tag = "think" ) -> tuple [str , str ]:
340340 """Extract reasoning and action content from an API response.
341341
342- Handles multiple formats:
343- 1. OpenAI/DeepSeek: reasoning in 'reasoning_content' or 'reasoning' field
344- 2. Apriel: reasoning before [BEGIN FINAL RESPONSE]...[END FINAL RESPONSE] tags
345- 3. Standard: content as-is
342+ Logic:
343+ 1. If reasoning_content exists: use it as think, use content as action
344+ (remove BEGIN/END FINAL RESPONSE tokens if present, add action tags)
345+ 2. If reasoning_content is empty: search content for last BEGIN/END FINAL RESPONSE block,
346+ use everything before as think, use content inside tags as action
346347
347348 Args:
348349 response: The API response object.
349350 wrap_tag: Tag name to wrap reasoning content (default: "think").
350351
351352 Returns:
352- tuple: (reasoning_wrapped , action_wrapped)
353+ tuple: (think_wrapped , action_wrapped)
353354 """
354355 message = response .choices [0 ].message
355356 msg_dict = message .to_dict () if hasattr (message , 'to_dict' ) else dict (message )
356357
357- reasoning = msg_dict .get ("reasoning_content" ) or msg_dict .get ("reasoning" )
358- content = msg_dict .get ("content" , "" ) or msg_dict .get ("text" , "" )
358+ reasoning = msg_dict .get ("reasoning_content" ) or msg_dict .get ("reasoning" ) or ""
359+ content = msg_dict .get ("content" , "" ) or msg_dict .get ("text" , "" ) or ""
359360
360361 # Case 1: Explicit reasoning field from API
361362 if reasoning :
362- reasoning_wrapped = f"<{ wrap_tag } >{ reasoning } </{ wrap_tag } >\n "
363- if "[BEGIN FINAL RESPONSE]" in content and "[END FINAL RESPONSE]" in content :
364- action = self ._extract_last_action_from_tags (content )
365- action_wrapped = f"<action>\n { action } \n </action>"
366- else :
367- action_wrapped = content
368- return reasoning_wrapped , action_wrapped
363+ think_wrapped = f"<{ wrap_tag } >{ reasoning } </{ wrap_tag } >"
364+ # Remove BEGIN/END FINAL RESPONSE tokens from content if present
365+ action_text = self ._remove_final_response_tokens (content )
366+ action_wrapped = f"<action>{ action_text } </action>"
367+ return think_wrapped , action_wrapped
369368
370- # Case 2: Apriel-style format in content
371- if "[BEGIN FINAL RESPONSE]" in content :
372- reasoning_text , action_text = self ._parse_apriel_format (content )
373- reasoning_wrapped = f"<{ wrap_tag } >\n { reasoning_text } \n </{ wrap_tag } >" if reasoning_text else ""
374- action_wrapped = f"<action>\n { action_text } \n </action>" if action_text else ""
375- return reasoning_wrapped , action_wrapped
369+ # Case 2: No reasoning field - parse content for BEGIN/END FINAL RESPONSE
370+ if "[BEGIN FINAL RESPONSE]" in content and "[END FINAL RESPONSE]" in content :
371+ think_text , action_text = self ._parse_apriel_format (content )
372+ think_wrapped = f"<{ wrap_tag } >{ think_text } </{ wrap_tag } >" if think_text else ""
373+ action_wrapped = f"<action>{ action_text } </action>" if action_text else ""
374+ return think_wrapped , action_wrapped
376375
377- # Case 3: No special format
378- return "" , content
376+ # Case 3: No special format - return content as action
377+ return "" , f"<action>{ content } </action>" if content else ""
378+
379+ def _remove_final_response_tokens (self , content : str ) -> str :
380+ """Remove [BEGIN FINAL RESPONSE] and [END FINAL RESPONSE] tokens from content."""
381+ result = content .replace ("[BEGIN FINAL RESPONSE]" , "" ).replace ("[END FINAL RESPONSE]" , "" )
382+ return result .strip ()
379383
380384 def _extract_last_action_from_tags (self , content : str ) -> str :
381385 """Extract content from the LAST [BEGIN FINAL RESPONSE]...[END FINAL RESPONSE] block."""
0 commit comments