fixes

codelion · codelion · commit d9c04b54e44f · 2025-10-01T12:41:47.000+08:00
diff --git a/optillm/mars/answer_extraction.py b/optillm/mars/answer_extraction.py
@@ -0,0 +1,214 @@
+"""
+Answer extraction utilities for MARS
+Extracts clean final answers from MARS synthesis output
+"""
+
+import re
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def extract_clean_answer(text: str, mode: str = 'auto') -> str:
+    """
+    Extract clean final answer from MARS synthesis text
+
+    Args:
+        text: Full synthesis output with reasoning
+        mode: 'auto', 'code', 'math', or 'none'
+
+    Returns:
+        Clean final answer without intermediate reasoning
+    """
+    if mode == 'none':
+        return text
+
+    # Auto-detect mode if not specified
+    if mode == 'auto':
+        mode = detect_answer_type(text)
+
+    if mode == 'code':
+        return extract_code_answer(text)
+    elif mode == 'math':
+        return extract_math_answer(text)
+    else:
+        return extract_generic_answer(text)
+
+
+def detect_answer_type(text: str) -> str:
+    """Detect whether this is a code, math, or generic problem"""
+    # Check for code indicators
+    code_indicators = ['```', 'def ', 'import ', 'class ', 'return ', 'for ', 'while ']
+    has_code = any(indicator in text for indicator in code_indicators)
+
+    # Check for math indicators
+    math_indicators = ['\\boxed', '\\frac', '\\sum', '\\int', '$$', '$\\']
+    has_math = any(indicator in text for indicator in math_indicators)
+
+    if has_code:
+        return 'code'
+    elif has_math:
+        return 'math'
+    else:
+        return 'generic'
+
+
+def extract_code_answer(text: str) -> str:
+    """
+    Extract clean code from synthesis output
+    Finds the last complete code block as the final answer
+    """
+    # Try to find code blocks with language specifier
+    code_blocks = re.findall(r'```(?:python|cpp|java|javascript|go|rust)?\n(.*?)\n```', text, re.DOTALL)
+
+    if code_blocks:
+        # Return last code block (most likely the final solution)
+        final_code = code_blocks[-1].strip()
+        logger.info(f"📝 EXTRACTION: Found {len(code_blocks)} code blocks, using last one ({len(final_code)} chars)")
+        return f"```python\n{final_code}\n```"
+
+    # Fallback: Look for code after common section headers
+    sections = re.split(r'\n#+\s+(?:Final Solution|Solution|Implementation|Code)\s*\n', text, flags=re.IGNORECASE)
+    if len(sections) > 1:
+        final_section = sections[-1].strip()
+        logger.info(f"📝 EXTRACTION: Using code from final section ({len(final_section)} chars)")
+        return final_section
+
+    # Last resort: Return text after last heading
+    parts = text.split('###')
+    if len(parts) > 1:
+        final_part = parts[-1].strip()
+        logger.info(f"📝 EXTRACTION: Using text after last heading ({len(final_part)} chars)")
+        return final_part
+
+    logger.warning("⚠️  EXTRACTION: No clear code found, returning full text")
+    return text
+
+
+def extract_math_answer(text: str) -> str:
+    """
+    Extract clean math answer from synthesis output
+    Finds the last \\boxed{} answer as the final answer
+    """
+    # Find all boxed answers
+    boxed_answers = re.findall(r'\\boxed\{([^}]+)\}', text)
+
+    if boxed_answers:
+        # Return last boxed answer (most likely the final one)
+        final_answer = boxed_answers[-1]
+        logger.info(f"📝 EXTRACTION: Found {len(boxed_answers)} boxed answers, using last one: {final_answer}")
+        return f"The final answer is $\\boxed{{{final_answer}}}$"
+
+    # Fallback: Look for "final answer" or similar phrases
+    final_patterns = [
+        r'[Ff]inal answer[:\s]+(.+?)(?:\n|$)',
+        r'[Tt]he answer is[:\s]+(.+?)(?:\n|$)',
+        r'[Tt]herefore[,\s]+(.+?)(?:\n|$)',
+    ]
+
+    for pattern in final_patterns:
+        matches = re.findall(pattern, text)
+        if matches:
+            final_answer = matches[-1].strip()
+            logger.info(f"📝 EXTRACTION: Found answer via pattern '{pattern}': {final_answer}")
+            return final_answer
+
+    # Last resort: Return last paragraph
+    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
+    if paragraphs:
+        final_para = paragraphs[-1]
+        logger.info(f"📝 EXTRACTION: Using last paragraph ({len(final_para)} chars)")
+        return final_para
+
+    logger.warning("⚠️  EXTRACTION: No clear math answer found, returning full text")
+    return text
+
+
+def extract_generic_answer(text: str) -> str:
+    """
+    Extract answer for generic (non-code, non-math) problems
+    Returns the last paragraph or sentence as the final answer
+    """
+    # Try to find conclusion markers
+    conclusion_markers = [
+        'In conclusion',
+        'Therefore',
+        'Thus',
+        'Hence',
+        'Finally',
+        'The answer is',
+        'The final answer',
+    ]
+
+    for marker in conclusion_markers:
+        if marker in text:
+            # Get text after last occurrence of marker
+            parts = text.rsplit(marker, 1)
+            if len(parts) > 1:
+                answer = parts[1].strip()
+                # Get first sentence/paragraph after marker
+                first_para = answer.split('\n\n')[0].strip()
+                logger.info(f"📝 EXTRACTION: Found answer after '{marker}' ({len(first_para)} chars)")
+                return first_para
+
+    # Fallback: Return last paragraph
+    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
+    if paragraphs:
+        final_para = paragraphs[-1]
+        logger.info(f"📝 EXTRACTION: Using last paragraph ({len(final_para)} chars)")
+        return final_para
+
+    # Last resort: Return last sentence
+    sentences = [s.strip() for s in text.split('.') if s.strip()]
+    if sentences:
+        final_sentence = sentences[-1] + '.'
+        logger.info(f"📝 EXTRACTION: Using last sentence ({len(final_sentence)} chars)")
+        return final_sentence
+
+    logger.warning("⚠️  EXTRACTION: No clear answer found, returning full text")
+    return text
+
+
+def wrap_with_thinking_tags(reasoning: str, final_answer: str) -> str:
+    """
+    Wrap reasoning in <think> tags and append clean final answer
+
+    Args:
+        reasoning: All intermediate reasoning, logs, agent outputs
+        final_answer: Clean final answer extracted from synthesis
+
+    Returns:
+        Formatted output with thinking tags
+    """
+    return f"<think>\n{reasoning}\n</think>\n\n{final_answer}"
+
+
+def strip_thinking_tags(text: str) -> str:
+    """
+    Remove <think></think> tags from text (for debugging/logging)
+
+    Args:
+        text: Text potentially containing thinking tags
+
+    Returns:
+        Text with thinking tags removed
+    """
+    # Remove thinking tags and content
+    text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
+    return text.strip()
+
+
+def get_answer_after_thinking(text: str) -> str:
+    """
+    Extract only the content after </think> tag
+
+    Args:
+        text: Text with thinking tags
+
+    Returns:
+        Content after </think> tag, or full text if no tags
+    """
+    match = re.search(r'</think>\s*(.+)', text, re.DOTALL)
+    if match:
+        return match.group(1).strip()
+    return text
diff --git a/optillm/mars/mars.py b/optillm/mars/mars.py
@@ -20,6 +20,10 @@
 from .aggregator import MARSAggregator
 from .strategy_network import StrategyNetwork
 from .prompts import SYNTHESIS_PROMPT
+from .answer_extraction import (
+    extract_clean_answer,
+    wrap_with_thinking_tags,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -43,6 +47,9 @@
     'enable_strategy_network': True,  # Enable cross-agent strategy sharing
     'strategy_extraction_enabled': True,  # Extract reasoning strategies from solutions
     'cross_agent_enhancement': True,  # Generate enhanced solutions using peer strategies
+    # Thinking tags for clean answer extraction
+    'use_thinking_tags': True,  # Wrap reasoning in <think></think> tags
+    'answer_extraction_mode': 'auto',  # 'auto', 'code', 'math', or 'none'
 }
 
 # Lightweight MARS configuration for coding benchmarks (faster, simpler)
@@ -61,6 +68,9 @@
     'enable_strategy_network': False,  # Skip strategy network
     'strategy_extraction_enabled': False,
     'cross_agent_enhancement': False,
+    # Thinking tags for clean answer extraction
+    'use_thinking_tags': True,  # Wrap reasoning in <think></think> tags
+    'answer_extraction_mode': 'auto',  # 'auto', 'code', 'math', or 'none'
 }
 
 def multi_agent_reasoning_system(
@@ -266,7 +276,26 @@ async def _run_mars_parallel(
             percentage = (duration / total_time) * 100
             logger.info(f"🏁   {phase}: {duration:.2f}s ({percentage:.1f}%)")
 
-        return final_solution, total_reasoning_tokens
+        # Apply thinking tags if enabled
+        if config.get('use_thinking_tags', True):
+            logger.info(f"📝 ANSWER EXTRACTION: Extracting clean answer with mode '{config.get('answer_extraction_mode', 'auto')}'")
+
+            # Extract clean answer from synthesis output
+            clean_answer = extract_clean_answer(
+                final_solution,
+                mode=config.get('answer_extraction_mode', 'auto')
+            )
+
+            logger.info(f"📝 ANSWER EXTRACTION: Extracted {len(clean_answer)} char answer from {len(final_solution)} char synthesis")
+
+            # Wrap reasoning in thinking tags
+            formatted_output = wrap_with_thinking_tags(final_solution, clean_answer)
+
+            logger.info(f"📝 ANSWER EXTRACTION: Final output: {len(formatted_output)} chars (with thinking tags)")
+            return formatted_output, total_reasoning_tokens
+        else:
+            logger.info(f"📝 ANSWER EXTRACTION: Thinking tags disabled, returning raw synthesis")
+            return final_solution, total_reasoning_tokens
 
     except Exception as e:
         logger.error(f"MARS execution failed: {str(e)}")