Fix response parsing for failing tasks

jeremymanning · claude · jeremymanning · commit b57955c03b65 · 2025-07-04T12:12:15.000-04:00
- Enhanced score parsing to handle scores 0-10 and normalize to 0-1 range - Added support for Title/Idea format in brainstorming responses (for FORK_IDEA) - Made review parsing case-insensitive for strengths/concerns headers - Added debug tests to validate parsing improvements - Fixed parsing edge cases that were causing task failures 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/code/llmxive-automation/src/response_parser.py b/code/llmxive-automation/src/response_parser.py
@@ -3,6 +3,7 @@
 import re
 import json
 import logging
+from datetime import datetime
 from typing import Optional, Dict, List, Any, Union
 
 logger = logging.getLogger(__name__)
@@ -69,7 +70,10 @@ def parse_review_score(self, text: str) -> Optional[float]:
             if match:
                 try:
                     score = extractor(match)
-                    if 0 <= score <= 1.0:
+                    # Allow scores up to 10 and normalize to 0-1
+                    if 0 <= score <= 10:
+                        if score > 1.0:
+                            score = score / 10.0  # Normalize to 0-1 range
                         return score
                 except ValueError:
                     continue
@@ -134,6 +138,40 @@ def parse_brainstorm_response(self, text: str) -> Optional[Dict[str, str]]:
         """Parse brainstorming response"""
         result = {}
         
+        # Check for Title/Idea format (used in FORK_IDEA)
+        title_match = re.search(r'Title:\s*(.+?)(?:\n|$)', text, re.IGNORECASE)
+        if title_match:
+            # Parse Title/Idea format
+            idea_match = re.search(r'Idea:\s*(.+?)(?:\n|$)', text, re.IGNORECASE)
+            if idea_match:
+                result['idea'] = idea_match.group(1).strip()
+            else:
+                result['idea'] = title_match.group(1).strip()
+                
+            # Extract other fields
+            field_match = re.search(r'Field:\s*(.+?)(?:\n|$)', text, re.IGNORECASE)
+            if field_match:
+                result['field'] = field_match.group(1).strip()
+                
+            # Generate ID from field
+            if 'field' in result:
+                field_short = result['field'].lower().replace(' ', '-')[:20]
+                date_str = datetime.now().strftime('%Y%m%d')
+                result['id'] = f"{field_short}-{date_str}-001"
+                
+            # Extract keywords
+            keywords_match = re.search(r'Keywords?:\s*(.+?)(?:\n|$)', text, re.IGNORECASE)
+            if keywords_match:
+                result['keywords'] = keywords_match.group(1).strip()
+            else:
+                # Generate keywords from idea
+                words = result.get('idea', '').lower().split()[:5]
+                result['keywords'] = ', '.join(words)
+                
+            if 'field' in result and 'idea' in result:
+                return result
+        
+        # Try standard format
         # Required fields
         fields = ['field', 'idea', 'id', 'keywords']
         
@@ -169,12 +207,12 @@ def parse_review_response(self, text: str) -> Optional[Dict[str, Any]]:
         result = {}
         
         # Extract strengths
-        strengths_match = re.search(r'Strengths?:?\s*\n((?:[-*•]\s*.+\n?)+)', text, re.MULTILINE)
+        strengths_match = re.search(r'Strengths?:?\s*\n((?:[-*•]\s*.+\n?)+)', text, re.MULTILINE | re.IGNORECASE)
         if strengths_match:
             result['strengths'] = strengths_match.group(1).strip()
             
         # Extract concerns
-        concerns_match = re.search(r'Concerns?:?\s*\n((?:[-*•]\s*.+\n?)+)', text, re.MULTILINE)
+        concerns_match = re.search(r'Concerns?:?\s*\n((?:[-*•]\s*.+\n?)+)', text, re.MULTILINE | re.IGNORECASE)
         if concerns_match:
             result['concerns'] = concerns_match.group(1).strip()
             
diff --git a/code/llmxive-automation/src/task_executor.py b/code/llmxive-automation/src/task_executor.py
@@ -2,6 +2,7 @@
 
 import os
 import re
+import json
 import logging
 from datetime import datetime
 from typing import Optional, Dict, List, Any
diff --git a/code/llmxive-automation/tests/test_failing_tasks_debug.py b/code/llmxive-automation/tests/test_failing_tasks_debug.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""Debug script for the 3 failing tasks"""
+
+import os
+import sys
+from unittest.mock import Mock
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+from src.task_executor import TaskExecutor
+from src.conversation_manager import ConversationManager
+from src.github_handler import GitHubHandler
+
+
+def test_review_technical_design():
+    """Debug REVIEW_TECHNICAL_DESIGN task"""
+    print("\n=== Testing REVIEW_TECHNICAL_DESIGN ===")
+    
+    # Setup
+    conv_mgr = Mock(spec=ConversationManager)
+    conv_mgr.model_name = "test-model"
+    github = Mock(spec=GitHubHandler)
+    
+    # Mock responses
+    github.get_file_content.return_value = "# Technical Design\n\nThis is a test design document."
+    conv_mgr.query_model.return_value = """Score: 8.5
+
+Strengths:
+- Clear methodology
+- Novel approach
+- Good feasibility
+
+Concerns:
+- Needs more evaluation metrics
+- Timeline seems aggressive
+
+Recommendation: Accept
+Summary: Strong proposal with minor revisions needed."""
+    
+    github.create_file.return_value = True
+    github.get_issue_score.return_value = 5.0
+    github.update_issue_score.return_value = True
+    
+    executor = TaskExecutor(conv_mgr, github)
+    
+    # Test with proper context
+    context = {
+        "design_path": "technical_design_documents/test-proj/design.md",
+        "project_id": "test-proj",  # This was missing in the test!
+        "issue_number": 123
+    }
+    
+    result = executor.execute_task("REVIEW_TECHNICAL_DESIGN", context)
+    print(f"Result: {result}")
+    print(f"Success: {result.get('success', False)}")
+    if not result.get('success'):
+        print(f"Error: {result.get('error')}")
+        if 'raw_response' in result:
+            print(f"Raw response: {result['raw_response']}")
+
+
+def test_update_readme_table():
+    """Debug UPDATE_README_TABLE task"""
+    print("\n=== Testing UPDATE_README_TABLE ===")
+    
+    # Setup
+    conv_mgr = Mock(spec=ConversationManager)
+    conv_mgr.model_name = "test-model"
+    github = Mock(spec=GitHubHandler)
+    
+    # Mock responses
+    github.get_file_content.return_value = """# README
+
+## Projects Table
+
+| ID | Name | Status |
+|----|------|--------|
+| p1 | Project 1 | Active |
+"""
+    
+    conv_mgr.query_model.return_value = "| test-proj | Test Project | In Progress |"
+    github.insert_table_row.return_value = True
+    
+    executor = TaskExecutor(conv_mgr, github)
+    
+    # Test with CORRECT parameter names
+    context = {
+        "file_path": "README.md",  # Not readme_path
+        "table_identifier": "Projects Table",  # Not table_name
+        "new_entry": {"id": "test-proj", "name": "Test Project", "status": "In Progress"}  # Not new_row
+    }
+    
+    result = executor.execute_task("UPDATE_README_TABLE", context)
+    print(f"Result: {result}")
+    print(f"Success: {result.get('success', False)}")
+    if not result.get('success'):
+        print(f"Error: {result.get('error')}")
+
+
+def test_fork_idea():
+    """Debug FORK_IDEA task"""
+    print("\n=== Testing FORK_IDEA ===")
+    
+    # Setup
+    conv_mgr = Mock(spec=ConversationManager)
+    conv_mgr.model_name = "test-model"
+    github = Mock(spec=GitHubHandler)
+    
+    # Mock original issue
+    original_issue = Mock()
+    original_issue.number = 123
+    original_issue.title = "Original Research Idea"
+    original_issue.body = """**Field**: Computer Science
+**Abstract**: This is the original idea
+**Approach**: Novel approach"""
+    
+    github.get_issue.return_value = original_issue
+    
+    # Mock model response with proper format
+    conv_mgr.query_model.return_value = """Title: Variation 1 - Applied to Biology
+Field: Biology
+Idea: Apply the same technique to biological systems
+Abstract: Testing in biological context
+Approach: Adapted approach for bio
+
+Title: Variation 2 - Different Method
+Field: Computer Science  
+Idea: Use alternative algorithm for same problem
+Abstract: Different algorithmic approach
+Approach: New method
+
+Title: Variation 3 - Extended Complexity
+Field: Computer Science
+Idea: Extend original with multi-modal inputs
+Abstract: Enhanced version with more features
+Approach: Extended framework"""
+    
+    # Mock issue creation
+    created_issue = Mock()
+    created_issue.number = 124
+    github.create_issue.return_value = created_issue
+    
+    executor = TaskExecutor(conv_mgr, github)
+    
+    # Test with correct parameter name
+    context = {
+        "issue_number": 123  # This is correct but was being called as parent_issue in test
+    }
+    
+    result = executor.execute_task("FORK_IDEA", context)
+    print(f"Result: {result}")
+    print(f"Success: {result.get('success', False)}")
+    if not result.get('success'):
+        print(f"Error: {result.get('error')}")
+    else:
+        print(f"Created issues: {result.get('forked_issues', [])}")
+
+
+def test_parse_issue_for_idea():
+    """Test the _parse_issue_for_idea helper"""
+    print("\n=== Testing _parse_issue_for_idea helper ===")
+    
+    # Setup
+    conv_mgr = Mock(spec=ConversationManager)
+    github = Mock(spec=GitHubHandler)
+    executor = TaskExecutor(conv_mgr, github)
+    
+    # Test issue
+    issue = Mock()
+    issue.title = "[Idea] Test Research Idea"
+    issue.body = """**Field**: Computer Science
+
+**Abstract**: This is a test abstract
+
+**Approach**: Novel approach using AI
+
+**Keywords**: AI, machine learning, research"""
+    
+    result = executor._parse_issue_for_idea(issue)
+    print(f"Parsed result: {result}")
+
+
+if __name__ == "__main__":
+    test_review_technical_design()
+    test_update_readme_table()
+    test_fork_idea()
+    test_parse_issue_for_idea()
diff --git a/code/llmxive-automation/tests/test_review_parser_debug.py b/code/llmxive-automation/tests/test_review_parser_debug.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+"""Debug the review parser specifically"""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+from src.response_parser import ResponseParser
+
+
+def test_review_parsing():
+    """Test review parsing with actual response"""
+    
+    parser = ResponseParser()
+    
+    test_response = """Score: 8.5
+
+Strengths:
+- Clear methodology
+- Novel approach
+- Good feasibility
+
+Concerns:
+- Needs more evaluation metrics
+- Timeline seems aggressive
+
+Recommendation: Accept
+Summary: Strong proposal with minor revisions needed."""
+
+    print("Testing review response parsing...")
+    print("="*60)
+    print("Input:")
+    print(test_response)
+    print("="*60)
+    
+    result = parser.parse_review_response(test_response)
+    print("\nParsed result:")
+    print(result)
+    
+    # Test individual components
+    print("\n\nTesting individual components:")
+    
+    # Test score parsing
+    score = parser.parse_review_score(test_response)
+    print(f"Score: {score}")
+    
+    # Test strengths regex
+    import re
+    strengths_match = re.search(r'Strengths?:?\s*\n((?:[-*•]\s*.+\n?)+)', test_response, re.MULTILINE | re.IGNORECASE)
+    if strengths_match:
+        print(f"\nStrengths matched: '{strengths_match.group(1)}'")
+    else:
+        print("\nStrengths NOT matched")
+        
+    # Test with different patterns
+    print("\n\nTrying different patterns:")
+    
+    # Pattern 1: Look for lines starting with -
+    pattern1 = r'Strengths?:.*?\n((?:.*\n)*?)(?:\n(?:[A-Z]|$))'
+    match1 = re.search(pattern1, test_response, re.DOTALL | re.IGNORECASE)
+    if match1:
+        print(f"Pattern 1 matched: '{match1.group(1).strip()}'")
+        
+    # Pattern 2: Everything between Strengths and Concerns
+    pattern2 = r'Strengths?:.*?\n(.*?)(?=\n\w+:|$)'
+    match2 = re.search(pattern2, test_response, re.DOTALL | re.IGNORECASE)
+    if match2:
+        print(f"Pattern 2 matched: '{match2.group(1).strip()}'")
+
+
+if __name__ == "__main__":
+    test_review_parsing()
diff --git a/code/llmxive-automation/tests/test_score_parser_debug.py b/code/llmxive-automation/tests/test_score_parser_debug.py