Skip to content

Commit 8b82a31

Browse files
committed
More resilient regex in utils.code_utils.extract_diffs and removed redundant implementation in apply_diff() function
1 parent 1f08698 commit 8b82a31

File tree

2 files changed

+81
-74
lines changed

2 files changed

+81
-74
lines changed

openevolve/utils/code_utils.py

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,20 @@
88
def parse_evolve_blocks(code: str) -> List[Tuple[int, int, str]]:
99
"""
1010
Parse evolve blocks from code
11-
11+
1212
Args:
1313
code: Source code with evolve blocks
14-
14+
1515
Returns:
1616
List of tuples (start_line, end_line, block_content)
1717
"""
1818
lines = code.split("\n")
1919
blocks = []
20-
20+
2121
in_block = False
2222
start_line = -1
2323
block_content = []
24-
24+
2525
for i, line in enumerate(lines):
2626
if "# EVOLVE-BLOCK-START" in line:
2727
in_block = True
@@ -32,137 +32,136 @@ def parse_evolve_blocks(code: str) -> List[Tuple[int, int, str]]:
3232
blocks.append((start_line, i, "\n".join(block_content)))
3333
elif in_block:
3434
block_content.append(line)
35-
35+
3636
return blocks
3737

3838

3939
def apply_diff(original_code: str, diff_text: str) -> str:
4040
"""
4141
Apply a diff to the original code
42-
42+
4343
Args:
4444
original_code: Original source code
4545
diff_text: Diff in the SEARCH/REPLACE format
46-
46+
4747
Returns:
4848
Modified code
4949
"""
5050
# Split into lines for easier processing
5151
original_lines = original_code.split("\n")
5252
result_lines = original_lines.copy()
53-
53+
5454
# Extract diff blocks
55-
diff_pattern = r"<<<<<<< SEARCH\n(.*?)\n=======\n(.*?)\n>>>>>>> REPLACE"
56-
diff_blocks = re.findall(diff_pattern, diff_text, re.DOTALL)
57-
55+
diff_blocks = extract_diffs(diff_text)
56+
5857
# Apply each diff block
5958
for search_text, replace_text in diff_blocks:
6059
search_lines = search_text.split("\n")
6160
replace_lines = replace_text.split("\n")
62-
61+
6362
# Find where the search pattern starts in the original code
6463
for i in range(len(result_lines) - len(search_lines) + 1):
6564
if result_lines[i:i+len(search_lines)] == search_lines:
6665
# Replace the matched section
6766
result_lines[i:i+len(search_lines)] = replace_lines
6867
break
69-
68+
7069
return "\n".join(result_lines)
7170

7271

7372
def extract_diffs(diff_text: str) -> List[Tuple[str, str]]:
7473
"""
7574
Extract diff blocks from the diff text
76-
75+
7776
Args:
7877
diff_text: Diff in the SEARCH/REPLACE format
79-
78+
8079
Returns:
8180
List of tuples (search_text, replace_text)
8281
"""
83-
diff_pattern = r"<<<<<<< SEARCH\n(.*?)\n=======\n(.*?)\n>>>>>>> REPLACE"
82+
diff_pattern = r"<<<<<<< SEARCH\n(.*?)=======\n(.*?)>>>>>>> REPLACE"
8483
diff_blocks = re.findall(diff_pattern, diff_text, re.DOTALL)
85-
return diff_blocks
84+
return [(match[0].rstrip(), match[1].rstrip()) for match in diff_blocks]
8685

8786

8887
def parse_full_rewrite(llm_response: str, language: str = "python") -> Optional[str]:
8988
"""
9089
Extract a full rewrite from an LLM response
91-
90+
9291
Args:
9392
llm_response: Response from the LLM
9493
language: Programming language
95-
94+
9695
Returns:
9796
Extracted code or None if not found
9897
"""
9998
code_block_pattern = r"```" + language + r"\n(.*?)```"
10099
matches = re.findall(code_block_pattern, llm_response, re.DOTALL)
101-
100+
102101
if matches:
103102
return matches[0].strip()
104-
103+
105104
# Fallback to any code block
106105
code_block_pattern = r"```(.*?)```"
107106
matches = re.findall(code_block_pattern, llm_response, re.DOTALL)
108-
107+
109108
if matches:
110109
return matches[0].strip()
111-
110+
112111
return None
113112

114113

115114
def format_diff_summary(diff_blocks: List[Tuple[str, str]]) -> str:
116115
"""
117116
Create a human-readable summary of the diff
118-
117+
119118
Args:
120119
diff_blocks: List of (search_text, replace_text) tuples
121-
120+
122121
Returns:
123122
Summary string
124123
"""
125124
summary = []
126-
125+
127126
for i, (search_text, replace_text) in enumerate(diff_blocks):
128127
search_lines = search_text.strip().split("\n")
129128
replace_lines = replace_text.strip().split("\n")
130-
129+
131130
# Create a short summary
132131
if len(search_lines) == 1 and len(replace_lines) == 1:
133132
summary.append(f"Change {i+1}: '{search_lines[0]}' to '{replace_lines[0]}'")
134133
else:
135134
search_summary = f"{len(search_lines)} lines" if len(search_lines) > 1 else search_lines[0]
136135
replace_summary = f"{len(replace_lines)} lines" if len(replace_lines) > 1 else replace_lines[0]
137136
summary.append(f"Change {i+1}: Replace {search_summary} with {replace_summary}")
138-
137+
139138
return "\n".join(summary)
140139

141140

142141
def calculate_edit_distance(code1: str, code2: str) -> int:
143142
"""
144143
Calculate the Levenshtein edit distance between two code snippets
145-
144+
146145
Args:
147146
code1: First code snippet
148147
code2: Second code snippet
149-
148+
150149
Returns:
151150
Edit distance (number of operations needed to transform code1 into code2)
152151
"""
153152
if code1 == code2:
154153
return 0
155-
154+
156155
# Simple implementation of Levenshtein distance
157156
m, n = len(code1), len(code2)
158157
dp = [[0 for _ in range(n + 1)] for _ in range(m + 1)]
159-
158+
160159
for i in range(m + 1):
161160
dp[i][0] = i
162-
161+
163162
for j in range(n + 1):
164163
dp[0][j] = j
165-
164+
166165
for i in range(1, m + 1):
167166
for j in range(1, n + 1):
168167
cost = 0 if code1[i-1] == code2[j-1] else 1
@@ -171,17 +170,17 @@ def calculate_edit_distance(code1: str, code2: str) -> int:
171170
dp[i][j-1] + 1, # insertion
172171
dp[i-1][j-1] + cost, # substitution
173172
)
174-
173+
175174
return dp[m][n]
176175

177176

178177
def extract_code_language(code: str) -> str:
179178
"""
180179
Try to determine the language of a code snippet
181-
180+
182181
Args:
183182
code: Code snippet
184-
183+
185184
Returns:
186185
Detected language or "unknown"
187186
"""
@@ -198,5 +197,5 @@ def extract_code_language(code: str) -> str:
198197
return "rust"
199198
elif re.search(r"^(SELECT|CREATE TABLE|INSERT INTO)", code, re.MULTILINE):
200199
return "sql"
201-
200+
202201
return "unknown"

0 commit comments

Comments
 (0)