Skip to content

Commit eb6ef50

Browse files
authored
Update diff line number change calculation (#670)
* update calc num lines to include subtracted lines * add more helpful messages * add more unit tests and use set list
1 parent b845a03 commit eb6ef50

File tree

3 files changed

+95
-15
lines changed

3 files changed

+95
-15
lines changed

src/codemodder/codemods/test/utils.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,11 @@ def run_and_assert(
9393
return
9494

9595
assert len(changes) == 1
96-
assert len(changes[0].changes) == num_changes
96+
assert (
97+
actual_num := len(changes[0].changes)
98+
) == num_changes, (
99+
f"Expected {num_changes} changes but {actual_num} were created."
100+
)
97101

98102
self.assert_changes(
99103
tmpdir,
@@ -196,7 +200,11 @@ def run_and_assert(
196200
return
197201

198202
assert len(changes) == 1
199-
assert len(changes[0].changes) == num_changes
203+
assert (
204+
actual_num := len(changes[0].changes)
205+
) == num_changes, (
206+
f"Expected {num_changes} changes but {actual_num} were created."
207+
)
200208

201209
self.assert_changes(
202210
tmpdir,

src/codemodder/diff.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,36 +22,47 @@ def create_diff_and_linenums(
2222
original_lines: list[str], new_lines: list[str]
2323
) -> tuple[str, list[int]]:
2424
diff_lines = list(difflib.unified_diff(original_lines, new_lines))
25-
return difflines_to_str(diff_lines), calc_new_line_nums(diff_lines)
25+
return difflines_to_str(diff_lines), calc_line_num_changes(diff_lines)
2626

2727

28-
def calc_new_line_nums(diff_lines: list[str]) -> list[int]:
28+
def calc_line_num_changes(diff_lines: list[str]) -> list[int]:
29+
"""
30+
Calculates the line numbers changed from a list of diff lines
31+
Returns a list with unique elements.
32+
"""
2933
if not diff_lines:
3034
return []
3135

32-
added_line_nums = []
36+
changed_line_nums: list[int] = []
3337
current_line_number = 0
38+
original_line_number = 0
3439

3540
for line in diff_lines:
3641
if line.startswith("@@"):
3742
# Extract the starting line number for the updated file from the diff metadata.
3843
# The format is @@ -x,y +a,b @@, where a is the starting line number in the updated file.
39-
start_line = line.split(" ")[2]
40-
current_line_number = (
41-
int(start_line.split(",")[0][1:]) - 1
42-
) # Subtract 1 because line numbers are 1-indexed
44+
start_line_original, start_line_updated = line.split(" ")[1:3]
45+
original_line_number = int(start_line_original.split(",")[0][1:]) - 1
46+
current_line_number = int(start_line_updated.split(",")[0][1:]) - 1
4347

4448
elif line.startswith("+"):
4549
# Increment line number for each line in the updated file
4650
current_line_number += 1
47-
if not line.startswith("++"): # Ignore the diff metadata lines
48-
added_line_nums.append(current_line_number)
49-
50-
elif not line.startswith("-"):
51-
# Increment line number for unchanged/context lines
51+
if not line.startswith("+++"): # Ignore the diff metadata lines
52+
changed_line_nums.append(current_line_number)
53+
54+
elif line.startswith("-"):
55+
# Increment line number for each line in the original file
56+
original_line_number += 1
57+
if not line.startswith("---"): # Ignore the diff metadata lines
58+
changed_line_nums.append(original_line_number)
59+
60+
else:
61+
# Increment line numbers for unchanged/context lines
62+
original_line_number += 1
5263
current_line_number += 1
5364

54-
return added_line_nums
65+
return list(set(changed_line_nums))
5566

5667

5768
def difflines_to_str(diff_lines: list[str]) -> str:

tests/test_diff.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from codemodder.diff import calc_line_num_changes
2+
3+
4+
def test_calc_line_nums_remove_line():
5+
lines = [
6+
"--- \n",
7+
"+++ \n",
8+
"@@ -5,7 +5,6 @@\n",
9+
" port: 443,\n",
10+
" path: '/',\n",
11+
" method: 'GET',\n",
12+
"- checkServerIdentity: function() {},\n",
13+
" };\n",
14+
" \n",
15+
" const req = https.request(options, (res) => {\n",
16+
]
17+
assert calc_line_num_changes(lines) == [8]
18+
19+
20+
def test_calc_line_nums_add_line():
21+
lines = [
22+
"--- original.py",
23+
"+++ modified.py",
24+
"@@ -1,3 +1,4 @@",
25+
" def test_name(self):",
26+
" codemod = get_codemod()",
27+
"+ print(codemod)",
28+
' assert codemod.name == "django", f"incorrect name"',
29+
]
30+
assert calc_line_num_changes(lines) == [3]
31+
32+
33+
def test_calc_line_nums_change_same_line():
34+
lines = [
35+
"--- original.py",
36+
"+++ modified.py",
37+
"@@ -1,4 +1,4 @@",
38+
" def test_name(self):",
39+
" codemod = get_codemod()",
40+
"- assert codemod.name == 'django'",
41+
"+ assert codemod.name == 'django', f'incorrect name'",
42+
]
43+
assert calc_line_num_changes(lines) == [3]
44+
45+
46+
def test_calc_line_nums_multiple_hunks():
47+
lines = [
48+
"--- one.txt 2024-06-28 11:52:23",
49+
"+++ two.txt 2024-06-28 11:52:13",
50+
"@@ -1,3 +1,9 @@",
51+
" def test_name(self):",
52+
" codemod = get_codemod()",
53+
"+ print(codemod)",
54+
' assert codemod.name == "django", f"incorrect name"',
55+
'+ print("test OK")',
56+
"+",
57+
"+",
58+
"+def test_new():",
59+
"+ pass",
60+
]
61+
assert calc_line_num_changes(lines) == [3, 5, 6, 7, 8, 9]

0 commit comments

Comments
 (0)