Skip to content

Commit b57955c

Browse files
jeremymanningclaude
andcommitted
Fix response parsing for failing tasks
- Enhanced score parsing to handle scores 0-10 and normalize to 0-1 range - Added support for Title/Idea format in brainstorming responses (for FORK_IDEA) - Made review parsing case-insensitive for strengths/concerns headers - Added debug tests to validate parsing improvements - Fixed parsing edge cases that were causing task failures 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 0193c4d commit b57955c

File tree

5 files changed

+354
-3
lines changed

5 files changed

+354
-3
lines changed

code/llmxive-automation/src/response_parser.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import re
44
import json
55
import logging
6+
from datetime import datetime
67
from typing import Optional, Dict, List, Any, Union
78

89
logger = logging.getLogger(__name__)
@@ -69,7 +70,10 @@ def parse_review_score(self, text: str) -> Optional[float]:
6970
if match:
7071
try:
7172
score = extractor(match)
72-
if 0 <= score <= 1.0:
73+
# Allow scores up to 10 and normalize to 0-1
74+
if 0 <= score <= 10:
75+
if score > 1.0:
76+
score = score / 10.0 # Normalize to 0-1 range
7377
return score
7478
except ValueError:
7579
continue
@@ -134,6 +138,40 @@ def parse_brainstorm_response(self, text: str) -> Optional[Dict[str, str]]:
134138
"""Parse brainstorming response"""
135139
result = {}
136140

141+
# Check for Title/Idea format (used in FORK_IDEA)
142+
title_match = re.search(r'Title:\s*(.+?)(?:\n|$)', text, re.IGNORECASE)
143+
if title_match:
144+
# Parse Title/Idea format
145+
idea_match = re.search(r'Idea:\s*(.+?)(?:\n|$)', text, re.IGNORECASE)
146+
if idea_match:
147+
result['idea'] = idea_match.group(1).strip()
148+
else:
149+
result['idea'] = title_match.group(1).strip()
150+
151+
# Extract other fields
152+
field_match = re.search(r'Field:\s*(.+?)(?:\n|$)', text, re.IGNORECASE)
153+
if field_match:
154+
result['field'] = field_match.group(1).strip()
155+
156+
# Generate ID from field
157+
if 'field' in result:
158+
field_short = result['field'].lower().replace(' ', '-')[:20]
159+
date_str = datetime.now().strftime('%Y%m%d')
160+
result['id'] = f"{field_short}-{date_str}-001"
161+
162+
# Extract keywords
163+
keywords_match = re.search(r'Keywords?:\s*(.+?)(?:\n|$)', text, re.IGNORECASE)
164+
if keywords_match:
165+
result['keywords'] = keywords_match.group(1).strip()
166+
else:
167+
# Generate keywords from idea
168+
words = result.get('idea', '').lower().split()[:5]
169+
result['keywords'] = ', '.join(words)
170+
171+
if 'field' in result and 'idea' in result:
172+
return result
173+
174+
# Try standard format
137175
# Required fields
138176
fields = ['field', 'idea', 'id', 'keywords']
139177

@@ -169,12 +207,12 @@ def parse_review_response(self, text: str) -> Optional[Dict[str, Any]]:
169207
result = {}
170208

171209
# Extract strengths
172-
strengths_match = re.search(r'Strengths?:?\s*\n((?:[-*•]\s*.+\n?)+)', text, re.MULTILINE)
210+
strengths_match = re.search(r'Strengths?:?\s*\n((?:[-*•]\s*.+\n?)+)', text, re.MULTILINE | re.IGNORECASE)
173211
if strengths_match:
174212
result['strengths'] = strengths_match.group(1).strip()
175213

176214
# Extract concerns
177-
concerns_match = re.search(r'Concerns?:?\s*\n((?:[-*•]\s*.+\n?)+)', text, re.MULTILINE)
215+
concerns_match = re.search(r'Concerns?:?\s*\n((?:[-*•]\s*.+\n?)+)', text, re.MULTILINE | re.IGNORECASE)
178216
if concerns_match:
179217
result['concerns'] = concerns_match.group(1).strip()
180218

code/llmxive-automation/src/task_executor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import os
44
import re
5+
import json
56
import logging
67
from datetime import datetime
78
from typing import Optional, Dict, List, Any
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
#!/usr/bin/env python3
2+
"""Debug script for the 3 failing tasks"""
3+
4+
import os
5+
import sys
6+
from unittest.mock import Mock
7+
8+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
9+
10+
from src.task_executor import TaskExecutor
11+
from src.conversation_manager import ConversationManager
12+
from src.github_handler import GitHubHandler
13+
14+
15+
def test_review_technical_design():
16+
"""Debug REVIEW_TECHNICAL_DESIGN task"""
17+
print("\n=== Testing REVIEW_TECHNICAL_DESIGN ===")
18+
19+
# Setup
20+
conv_mgr = Mock(spec=ConversationManager)
21+
conv_mgr.model_name = "test-model"
22+
github = Mock(spec=GitHubHandler)
23+
24+
# Mock responses
25+
github.get_file_content.return_value = "# Technical Design\n\nThis is a test design document."
26+
conv_mgr.query_model.return_value = """Score: 8.5
27+
28+
Strengths:
29+
- Clear methodology
30+
- Novel approach
31+
- Good feasibility
32+
33+
Concerns:
34+
- Needs more evaluation metrics
35+
- Timeline seems aggressive
36+
37+
Recommendation: Accept
38+
Summary: Strong proposal with minor revisions needed."""
39+
40+
github.create_file.return_value = True
41+
github.get_issue_score.return_value = 5.0
42+
github.update_issue_score.return_value = True
43+
44+
executor = TaskExecutor(conv_mgr, github)
45+
46+
# Test with proper context
47+
context = {
48+
"design_path": "technical_design_documents/test-proj/design.md",
49+
"project_id": "test-proj", # This was missing in the test!
50+
"issue_number": 123
51+
}
52+
53+
result = executor.execute_task("REVIEW_TECHNICAL_DESIGN", context)
54+
print(f"Result: {result}")
55+
print(f"Success: {result.get('success', False)}")
56+
if not result.get('success'):
57+
print(f"Error: {result.get('error')}")
58+
if 'raw_response' in result:
59+
print(f"Raw response: {result['raw_response']}")
60+
61+
62+
def test_update_readme_table():
63+
"""Debug UPDATE_README_TABLE task"""
64+
print("\n=== Testing UPDATE_README_TABLE ===")
65+
66+
# Setup
67+
conv_mgr = Mock(spec=ConversationManager)
68+
conv_mgr.model_name = "test-model"
69+
github = Mock(spec=GitHubHandler)
70+
71+
# Mock responses
72+
github.get_file_content.return_value = """# README
73+
74+
## Projects Table
75+
76+
| ID | Name | Status |
77+
|----|------|--------|
78+
| p1 | Project 1 | Active |
79+
"""
80+
81+
conv_mgr.query_model.return_value = "| test-proj | Test Project | In Progress |"
82+
github.insert_table_row.return_value = True
83+
84+
executor = TaskExecutor(conv_mgr, github)
85+
86+
# Test with CORRECT parameter names
87+
context = {
88+
"file_path": "README.md", # Not readme_path
89+
"table_identifier": "Projects Table", # Not table_name
90+
"new_entry": {"id": "test-proj", "name": "Test Project", "status": "In Progress"} # Not new_row
91+
}
92+
93+
result = executor.execute_task("UPDATE_README_TABLE", context)
94+
print(f"Result: {result}")
95+
print(f"Success: {result.get('success', False)}")
96+
if not result.get('success'):
97+
print(f"Error: {result.get('error')}")
98+
99+
100+
def test_fork_idea():
101+
"""Debug FORK_IDEA task"""
102+
print("\n=== Testing FORK_IDEA ===")
103+
104+
# Setup
105+
conv_mgr = Mock(spec=ConversationManager)
106+
conv_mgr.model_name = "test-model"
107+
github = Mock(spec=GitHubHandler)
108+
109+
# Mock original issue
110+
original_issue = Mock()
111+
original_issue.number = 123
112+
original_issue.title = "Original Research Idea"
113+
original_issue.body = """**Field**: Computer Science
114+
**Abstract**: This is the original idea
115+
**Approach**: Novel approach"""
116+
117+
github.get_issue.return_value = original_issue
118+
119+
# Mock model response with proper format
120+
conv_mgr.query_model.return_value = """Title: Variation 1 - Applied to Biology
121+
Field: Biology
122+
Idea: Apply the same technique to biological systems
123+
Abstract: Testing in biological context
124+
Approach: Adapted approach for bio
125+
126+
Title: Variation 2 - Different Method
127+
Field: Computer Science
128+
Idea: Use alternative algorithm for same problem
129+
Abstract: Different algorithmic approach
130+
Approach: New method
131+
132+
Title: Variation 3 - Extended Complexity
133+
Field: Computer Science
134+
Idea: Extend original with multi-modal inputs
135+
Abstract: Enhanced version with more features
136+
Approach: Extended framework"""
137+
138+
# Mock issue creation
139+
created_issue = Mock()
140+
created_issue.number = 124
141+
github.create_issue.return_value = created_issue
142+
143+
executor = TaskExecutor(conv_mgr, github)
144+
145+
# Test with correct parameter name
146+
context = {
147+
"issue_number": 123 # This is correct but was being called as parent_issue in test
148+
}
149+
150+
result = executor.execute_task("FORK_IDEA", context)
151+
print(f"Result: {result}")
152+
print(f"Success: {result.get('success', False)}")
153+
if not result.get('success'):
154+
print(f"Error: {result.get('error')}")
155+
else:
156+
print(f"Created issues: {result.get('forked_issues', [])}")
157+
158+
159+
def test_parse_issue_for_idea():
160+
"""Test the _parse_issue_for_idea helper"""
161+
print("\n=== Testing _parse_issue_for_idea helper ===")
162+
163+
# Setup
164+
conv_mgr = Mock(spec=ConversationManager)
165+
github = Mock(spec=GitHubHandler)
166+
executor = TaskExecutor(conv_mgr, github)
167+
168+
# Test issue
169+
issue = Mock()
170+
issue.title = "[Idea] Test Research Idea"
171+
issue.body = """**Field**: Computer Science
172+
173+
**Abstract**: This is a test abstract
174+
175+
**Approach**: Novel approach using AI
176+
177+
**Keywords**: AI, machine learning, research"""
178+
179+
result = executor._parse_issue_for_idea(issue)
180+
print(f"Parsed result: {result}")
181+
182+
183+
if __name__ == "__main__":
184+
test_review_technical_design()
185+
test_update_readme_table()
186+
test_fork_idea()
187+
test_parse_issue_for_idea()
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/env python3
2+
"""Debug the review parser specifically"""
3+
4+
import sys
5+
import os
6+
7+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
8+
9+
from src.response_parser import ResponseParser
10+
11+
12+
def test_review_parsing():
13+
"""Test review parsing with actual response"""
14+
15+
parser = ResponseParser()
16+
17+
test_response = """Score: 8.5
18+
19+
Strengths:
20+
- Clear methodology
21+
- Novel approach
22+
- Good feasibility
23+
24+
Concerns:
25+
- Needs more evaluation metrics
26+
- Timeline seems aggressive
27+
28+
Recommendation: Accept
29+
Summary: Strong proposal with minor revisions needed."""
30+
31+
print("Testing review response parsing...")
32+
print("="*60)
33+
print("Input:")
34+
print(test_response)
35+
print("="*60)
36+
37+
result = parser.parse_review_response(test_response)
38+
print("\nParsed result:")
39+
print(result)
40+
41+
# Test individual components
42+
print("\n\nTesting individual components:")
43+
44+
# Test score parsing
45+
score = parser.parse_review_score(test_response)
46+
print(f"Score: {score}")
47+
48+
# Test strengths regex
49+
import re
50+
strengths_match = re.search(r'Strengths?:?\s*\n((?:[-*•]\s*.+\n?)+)', test_response, re.MULTILINE | re.IGNORECASE)
51+
if strengths_match:
52+
print(f"\nStrengths matched: '{strengths_match.group(1)}'")
53+
else:
54+
print("\nStrengths NOT matched")
55+
56+
# Test with different patterns
57+
print("\n\nTrying different patterns:")
58+
59+
# Pattern 1: Look for lines starting with -
60+
pattern1 = r'Strengths?:.*?\n((?:.*\n)*?)(?:\n(?:[A-Z]|$))'
61+
match1 = re.search(pattern1, test_response, re.DOTALL | re.IGNORECASE)
62+
if match1:
63+
print(f"Pattern 1 matched: '{match1.group(1).strip()}'")
64+
65+
# Pattern 2: Everything between Strengths and Concerns
66+
pattern2 = r'Strengths?:.*?\n(.*?)(?=\n\w+:|$)'
67+
match2 = re.search(pattern2, test_response, re.DOTALL | re.IGNORECASE)
68+
if match2:
69+
print(f"Pattern 2 matched: '{match2.group(1).strip()}'")
70+
71+
72+
if __name__ == "__main__":
73+
test_review_parsing()

0 commit comments

Comments
 (0)