1+ """
2+ Answer extraction utilities for MARS
3+ Extracts clean final answers from MARS synthesis output
4+ """
5+
6+ import re
7+ import logging
8+
9+ logger = logging .getLogger (__name__ )
10+
11+
12+ def extract_clean_answer (text : str , mode : str = 'auto' ) -> str :
13+ """
14+ Extract clean final answer from MARS synthesis text
15+
16+ Args:
17+ text: Full synthesis output with reasoning
18+ mode: 'auto', 'code', 'math', or 'none'
19+
20+ Returns:
21+ Clean final answer without intermediate reasoning
22+ """
23+ if mode == 'none' :
24+ return text
25+
26+ # Auto-detect mode if not specified
27+ if mode == 'auto' :
28+ mode = detect_answer_type (text )
29+
30+ if mode == 'code' :
31+ return extract_code_answer (text )
32+ elif mode == 'math' :
33+ return extract_math_answer (text )
34+ else :
35+ return extract_generic_answer (text )
36+
37+
38+ def detect_answer_type (text : str ) -> str :
39+ """Detect whether this is a code, math, or generic problem"""
40+ # Check for code indicators
41+ code_indicators = ['```' , 'def ' , 'import ' , 'class ' , 'return ' , 'for ' , 'while ' ]
42+ has_code = any (indicator in text for indicator in code_indicators )
43+
44+ # Check for math indicators
45+ math_indicators = ['\\ boxed' , '\\ frac' , '\\ sum' , '\\ int' , '$$' , '$\\ ' ]
46+ has_math = any (indicator in text for indicator in math_indicators )
47+
48+ if has_code :
49+ return 'code'
50+ elif has_math :
51+ return 'math'
52+ else :
53+ return 'generic'
54+
55+
56+ def extract_code_answer (text : str ) -> str :
57+ """
58+ Extract clean code from synthesis output
59+ Finds the last complete code block as the final answer
60+ """
61+ # Try to find code blocks with language specifier
62+ code_blocks = re .findall (r'```(?:python|cpp|java|javascript|go|rust)?\n(.*?)\n```' , text , re .DOTALL )
63+
64+ if code_blocks :
65+ # Return last code block (most likely the final solution)
66+ final_code = code_blocks [- 1 ].strip ()
67+ logger .info (f"π EXTRACTION: Found { len (code_blocks )} code blocks, using last one ({ len (final_code )} chars)" )
68+ return f"```python\n { final_code } \n ```"
69+
70+ # Fallback: Look for code after common section headers
71+ sections = re .split (r'\n#+\s+(?:Final Solution|Solution|Implementation|Code)\s*\n' , text , flags = re .IGNORECASE )
72+ if len (sections ) > 1 :
73+ final_section = sections [- 1 ].strip ()
74+ logger .info (f"π EXTRACTION: Using code from final section ({ len (final_section )} chars)" )
75+ return final_section
76+
77+ # Last resort: Return text after last heading
78+ parts = text .split ('###' )
79+ if len (parts ) > 1 :
80+ final_part = parts [- 1 ].strip ()
81+ logger .info (f"π EXTRACTION: Using text after last heading ({ len (final_part )} chars)" )
82+ return final_part
83+
84+ logger .warning ("β οΈ EXTRACTION: No clear code found, returning full text" )
85+ return text
86+
87+
88+ def extract_math_answer (text : str ) -> str :
89+ """
90+ Extract clean math answer from synthesis output
91+ Finds the last \\ boxed{} answer as the final answer
92+ """
93+ # Find all boxed answers
94+ boxed_answers = re .findall (r'\\boxed\{([^}]+)\}' , text )
95+
96+ if boxed_answers :
97+ # Return last boxed answer (most likely the final one)
98+ final_answer = boxed_answers [- 1 ]
99+ logger .info (f"π EXTRACTION: Found { len (boxed_answers )} boxed answers, using last one: { final_answer } " )
100+ return f"The final answer is $\\ boxed{{{ final_answer } }}$"
101+
102+ # Fallback: Look for "final answer" or similar phrases
103+ final_patterns = [
104+ r'[Ff]inal answer[:\s]+(.+?)(?:\n|$)' ,
105+ r'[Tt]he answer is[:\s]+(.+?)(?:\n|$)' ,
106+ r'[Tt]herefore[,\s]+(.+?)(?:\n|$)' ,
107+ ]
108+
109+ for pattern in final_patterns :
110+ matches = re .findall (pattern , text )
111+ if matches :
112+ final_answer = matches [- 1 ].strip ()
113+ logger .info (f"π EXTRACTION: Found answer via pattern '{ pattern } ': { final_answer } " )
114+ return final_answer
115+
116+ # Last resort: Return last paragraph
117+ paragraphs = [p .strip () for p in text .split ('\n \n ' ) if p .strip ()]
118+ if paragraphs :
119+ final_para = paragraphs [- 1 ]
120+ logger .info (f"π EXTRACTION: Using last paragraph ({ len (final_para )} chars)" )
121+ return final_para
122+
123+ logger .warning ("β οΈ EXTRACTION: No clear math answer found, returning full text" )
124+ return text
125+
126+
127+ def extract_generic_answer (text : str ) -> str :
128+ """
129+ Extract answer for generic (non-code, non-math) problems
130+ Returns the last paragraph or sentence as the final answer
131+ """
132+ # Try to find conclusion markers
133+ conclusion_markers = [
134+ 'In conclusion' ,
135+ 'Therefore' ,
136+ 'Thus' ,
137+ 'Hence' ,
138+ 'Finally' ,
139+ 'The answer is' ,
140+ 'The final answer' ,
141+ ]
142+
143+ for marker in conclusion_markers :
144+ if marker in text :
145+ # Get text after last occurrence of marker
146+ parts = text .rsplit (marker , 1 )
147+ if len (parts ) > 1 :
148+ answer = parts [1 ].strip ()
149+ # Get first sentence/paragraph after marker
150+ first_para = answer .split ('\n \n ' )[0 ].strip ()
151+ logger .info (f"π EXTRACTION: Found answer after '{ marker } ' ({ len (first_para )} chars)" )
152+ return first_para
153+
154+ # Fallback: Return last paragraph
155+ paragraphs = [p .strip () for p in text .split ('\n \n ' ) if p .strip ()]
156+ if paragraphs :
157+ final_para = paragraphs [- 1 ]
158+ logger .info (f"π EXTRACTION: Using last paragraph ({ len (final_para )} chars)" )
159+ return final_para
160+
161+ # Last resort: Return last sentence
162+ sentences = [s .strip () for s in text .split ('.' ) if s .strip ()]
163+ if sentences :
164+ final_sentence = sentences [- 1 ] + '.'
165+ logger .info (f"π EXTRACTION: Using last sentence ({ len (final_sentence )} chars)" )
166+ return final_sentence
167+
168+ logger .warning ("β οΈ EXTRACTION: No clear answer found, returning full text" )
169+ return text
170+
171+
172+ def wrap_with_thinking_tags (reasoning : str , final_answer : str ) -> str :
173+ """
174+ Wrap reasoning in <think> tags and append clean final answer
175+
176+ Args:
177+ reasoning: All intermediate reasoning, logs, agent outputs
178+ final_answer: Clean final answer extracted from synthesis
179+
180+ Returns:
181+ Formatted output with thinking tags
182+ """
183+ return f"<think>\n { reasoning } \n </think>\n \n { final_answer } "
184+
185+
186+ def strip_thinking_tags (text : str ) -> str :
187+ """
188+ Remove <think></think> tags from text (for debugging/logging)
189+
190+ Args:
191+ text: Text potentially containing thinking tags
192+
193+ Returns:
194+ Text with thinking tags removed
195+ """
196+ # Remove thinking tags and content
197+ text = re .sub (r'<think>.*?</think>' , '' , text , flags = re .DOTALL )
198+ return text .strip ()
199+
200+
201+ def get_answer_after_thinking (text : str ) -> str :
202+ """
203+ Extract only the content after </think> tag
204+
205+ Args:
206+ text: Text with thinking tags
207+
208+ Returns:
209+ Content after </think> tag, or full text if no tags
210+ """
211+ match = re .search (r'</think>\s*(.+)' , text , re .DOTALL )
212+ if match :
213+ return match .group (1 ).strip ()
214+ return text
0 commit comments