Skip to content

Commit d9c04b5

Browse files
committed
fixes
1 parent 72561e5 commit d9c04b5

File tree

2 files changed

+244
-1
lines changed

2 files changed

+244
-1
lines changed
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
"""
2+
Answer extraction utilities for MARS
3+
Extracts clean final answers from MARS synthesis output
4+
"""
5+
6+
import re
7+
import logging
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
def extract_clean_answer(text: str, mode: str = 'auto') -> str:
13+
"""
14+
Extract clean final answer from MARS synthesis text
15+
16+
Args:
17+
text: Full synthesis output with reasoning
18+
mode: 'auto', 'code', 'math', or 'none'
19+
20+
Returns:
21+
Clean final answer without intermediate reasoning
22+
"""
23+
if mode == 'none':
24+
return text
25+
26+
# Auto-detect mode if not specified
27+
if mode == 'auto':
28+
mode = detect_answer_type(text)
29+
30+
if mode == 'code':
31+
return extract_code_answer(text)
32+
elif mode == 'math':
33+
return extract_math_answer(text)
34+
else:
35+
return extract_generic_answer(text)
36+
37+
38+
def detect_answer_type(text: str) -> str:
39+
"""Detect whether this is a code, math, or generic problem"""
40+
# Check for code indicators
41+
code_indicators = ['```', 'def ', 'import ', 'class ', 'return ', 'for ', 'while ']
42+
has_code = any(indicator in text for indicator in code_indicators)
43+
44+
# Check for math indicators
45+
math_indicators = ['\\boxed', '\\frac', '\\sum', '\\int', '$$', '$\\']
46+
has_math = any(indicator in text for indicator in math_indicators)
47+
48+
if has_code:
49+
return 'code'
50+
elif has_math:
51+
return 'math'
52+
else:
53+
return 'generic'
54+
55+
56+
def extract_code_answer(text: str) -> str:
57+
"""
58+
Extract clean code from synthesis output
59+
Finds the last complete code block as the final answer
60+
"""
61+
# Try to find code blocks with language specifier
62+
code_blocks = re.findall(r'```(?:python|cpp|java|javascript|go|rust)?\n(.*?)\n```', text, re.DOTALL)
63+
64+
if code_blocks:
65+
# Return last code block (most likely the final solution)
66+
final_code = code_blocks[-1].strip()
67+
logger.info(f"πŸ“ EXTRACTION: Found {len(code_blocks)} code blocks, using last one ({len(final_code)} chars)")
68+
return f"```python\n{final_code}\n```"
69+
70+
# Fallback: Look for code after common section headers
71+
sections = re.split(r'\n#+\s+(?:Final Solution|Solution|Implementation|Code)\s*\n', text, flags=re.IGNORECASE)
72+
if len(sections) > 1:
73+
final_section = sections[-1].strip()
74+
logger.info(f"πŸ“ EXTRACTION: Using code from final section ({len(final_section)} chars)")
75+
return final_section
76+
77+
# Last resort: Return text after last heading
78+
parts = text.split('###')
79+
if len(parts) > 1:
80+
final_part = parts[-1].strip()
81+
logger.info(f"πŸ“ EXTRACTION: Using text after last heading ({len(final_part)} chars)")
82+
return final_part
83+
84+
logger.warning("⚠️ EXTRACTION: No clear code found, returning full text")
85+
return text
86+
87+
88+
def extract_math_answer(text: str) -> str:
89+
"""
90+
Extract clean math answer from synthesis output
91+
Finds the last \\boxed{} answer as the final answer
92+
"""
93+
# Find all boxed answers
94+
boxed_answers = re.findall(r'\\boxed\{([^}]+)\}', text)
95+
96+
if boxed_answers:
97+
# Return last boxed answer (most likely the final one)
98+
final_answer = boxed_answers[-1]
99+
logger.info(f"πŸ“ EXTRACTION: Found {len(boxed_answers)} boxed answers, using last one: {final_answer}")
100+
return f"The final answer is $\\boxed{{{final_answer}}}$"
101+
102+
# Fallback: Look for "final answer" or similar phrases
103+
final_patterns = [
104+
r'[Ff]inal answer[:\s]+(.+?)(?:\n|$)',
105+
r'[Tt]he answer is[:\s]+(.+?)(?:\n|$)',
106+
r'[Tt]herefore[,\s]+(.+?)(?:\n|$)',
107+
]
108+
109+
for pattern in final_patterns:
110+
matches = re.findall(pattern, text)
111+
if matches:
112+
final_answer = matches[-1].strip()
113+
logger.info(f"πŸ“ EXTRACTION: Found answer via pattern '{pattern}': {final_answer}")
114+
return final_answer
115+
116+
# Last resort: Return last paragraph
117+
paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
118+
if paragraphs:
119+
final_para = paragraphs[-1]
120+
logger.info(f"πŸ“ EXTRACTION: Using last paragraph ({len(final_para)} chars)")
121+
return final_para
122+
123+
logger.warning("⚠️ EXTRACTION: No clear math answer found, returning full text")
124+
return text
125+
126+
127+
def extract_generic_answer(text: str) -> str:
128+
"""
129+
Extract answer for generic (non-code, non-math) problems
130+
Returns the last paragraph or sentence as the final answer
131+
"""
132+
# Try to find conclusion markers
133+
conclusion_markers = [
134+
'In conclusion',
135+
'Therefore',
136+
'Thus',
137+
'Hence',
138+
'Finally',
139+
'The answer is',
140+
'The final answer',
141+
]
142+
143+
for marker in conclusion_markers:
144+
if marker in text:
145+
# Get text after last occurrence of marker
146+
parts = text.rsplit(marker, 1)
147+
if len(parts) > 1:
148+
answer = parts[1].strip()
149+
# Get first sentence/paragraph after marker
150+
first_para = answer.split('\n\n')[0].strip()
151+
logger.info(f"πŸ“ EXTRACTION: Found answer after '{marker}' ({len(first_para)} chars)")
152+
return first_para
153+
154+
# Fallback: Return last paragraph
155+
paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
156+
if paragraphs:
157+
final_para = paragraphs[-1]
158+
logger.info(f"πŸ“ EXTRACTION: Using last paragraph ({len(final_para)} chars)")
159+
return final_para
160+
161+
# Last resort: Return last sentence
162+
sentences = [s.strip() for s in text.split('.') if s.strip()]
163+
if sentences:
164+
final_sentence = sentences[-1] + '.'
165+
logger.info(f"πŸ“ EXTRACTION: Using last sentence ({len(final_sentence)} chars)")
166+
return final_sentence
167+
168+
logger.warning("⚠️ EXTRACTION: No clear answer found, returning full text")
169+
return text
170+
171+
172+
def wrap_with_thinking_tags(reasoning: str, final_answer: str) -> str:
173+
"""
174+
Wrap reasoning in <think> tags and append clean final answer
175+
176+
Args:
177+
reasoning: All intermediate reasoning, logs, agent outputs
178+
final_answer: Clean final answer extracted from synthesis
179+
180+
Returns:
181+
Formatted output with thinking tags
182+
"""
183+
return f"<think>\n{reasoning}\n</think>\n\n{final_answer}"
184+
185+
186+
def strip_thinking_tags(text: str) -> str:
187+
"""
188+
Remove <think></think> tags from text (for debugging/logging)
189+
190+
Args:
191+
text: Text potentially containing thinking tags
192+
193+
Returns:
194+
Text with thinking tags removed
195+
"""
196+
# Remove thinking tags and content
197+
text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
198+
return text.strip()
199+
200+
201+
def get_answer_after_thinking(text: str) -> str:
202+
"""
203+
Extract only the content after </think> tag
204+
205+
Args:
206+
text: Text with thinking tags
207+
208+
Returns:
209+
Content after </think> tag, or full text if no tags
210+
"""
211+
match = re.search(r'</think>\s*(.+)', text, re.DOTALL)
212+
if match:
213+
return match.group(1).strip()
214+
return text

β€Žoptillm/mars/mars.pyβ€Ž

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
from .aggregator import MARSAggregator
2121
from .strategy_network import StrategyNetwork
2222
from .prompts import SYNTHESIS_PROMPT
23+
from .answer_extraction import (
24+
extract_clean_answer,
25+
wrap_with_thinking_tags,
26+
)
2327

2428
logger = logging.getLogger(__name__)
2529

@@ -43,6 +47,9 @@
4347
'enable_strategy_network': True, # Enable cross-agent strategy sharing
4448
'strategy_extraction_enabled': True, # Extract reasoning strategies from solutions
4549
'cross_agent_enhancement': True, # Generate enhanced solutions using peer strategies
50+
# Thinking tags for clean answer extraction
51+
'use_thinking_tags': True, # Wrap reasoning in <think></think> tags
52+
'answer_extraction_mode': 'auto', # 'auto', 'code', 'math', or 'none'
4653
}
4754

4855
# Lightweight MARS configuration for coding benchmarks (faster, simpler)
@@ -61,6 +68,9 @@
6168
'enable_strategy_network': False, # Skip strategy network
6269
'strategy_extraction_enabled': False,
6370
'cross_agent_enhancement': False,
71+
# Thinking tags for clean answer extraction
72+
'use_thinking_tags': True, # Wrap reasoning in <think></think> tags
73+
'answer_extraction_mode': 'auto', # 'auto', 'code', 'math', or 'none'
6474
}
6575

6676
def multi_agent_reasoning_system(
@@ -266,7 +276,26 @@ async def _run_mars_parallel(
266276
percentage = (duration / total_time) * 100
267277
logger.info(f"🏁 {phase}: {duration:.2f}s ({percentage:.1f}%)")
268278

269-
return final_solution, total_reasoning_tokens
279+
# Apply thinking tags if enabled
280+
if config.get('use_thinking_tags', True):
281+
logger.info(f"πŸ“ ANSWER EXTRACTION: Extracting clean answer with mode '{config.get('answer_extraction_mode', 'auto')}'")
282+
283+
# Extract clean answer from synthesis output
284+
clean_answer = extract_clean_answer(
285+
final_solution,
286+
mode=config.get('answer_extraction_mode', 'auto')
287+
)
288+
289+
logger.info(f"πŸ“ ANSWER EXTRACTION: Extracted {len(clean_answer)} char answer from {len(final_solution)} char synthesis")
290+
291+
# Wrap reasoning in thinking tags
292+
formatted_output = wrap_with_thinking_tags(final_solution, clean_answer)
293+
294+
logger.info(f"πŸ“ ANSWER EXTRACTION: Final output: {len(formatted_output)} chars (with thinking tags)")
295+
return formatted_output, total_reasoning_tokens
296+
else:
297+
logger.info(f"πŸ“ ANSWER EXTRACTION: Thinking tags disabled, returning raw synthesis")
298+
return final_solution, total_reasoning_tokens
270299

271300
except Exception as e:
272301
logger.error(f"MARS execution failed: {str(e)}")

0 commit comments

Comments
Β (0)