Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 116 additions & 7 deletions patchwork/steps/ModifyCode/ModifyCode.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,63 @@
from patchwork.step import Step, StepStatus


def normalize_line_endings(content: str, target_ending: str) -> str:
"""Normalize all line endings in content to the target ending.

Rules:
1. Normalize any existing line endings to the target ending
2. Preserve intentional lack of line endings:
- If original content had no line endings, don't add them
- If original content had line endings, ensure they're present
3. Handle mixed line endings by converting all to the target
"""
# Detect if original content had any line endings
had_line_endings = ('\r\n' in content) or ('\n' in content) or ('\r' in content)
ends_with_line_ending = content.endswith('\r\n') or content.endswith('\n') or content.endswith('\r')

# First standardize all line endings to \n
tmp = content.replace('\r\n', '\n') # Convert CRLF to LF
tmp = tmp.replace('\r', '\n') # Convert CR to LF

# Then convert all \n to target ending (if not already \n)
if target_ending != '\n':
tmp = tmp.replace('\n', target_ending)

# Handle final line ending
if had_line_endings:
# If original had line endings, ensure all lines have them
if not tmp.endswith(target_ending):
tmp += target_ending
else:
# If original had no line endings, remove any we might have added
if tmp.endswith(target_ending):
tmp = tmp[:-len(target_ending)]

return tmp

def save_file_contents(file_path, content):
"""Utility function to save content to a file."""
with open(file_path, "w") as file:
file.write(content)
"""Utility function to save content to a file while preserving line endings."""
# Detect the target line ending from existing file if it exists
target_ending = '\n'
if Path(file_path).exists():
try:
with open(file_path, 'rb') as f:
target_ending = detect_line_ending(f.read())
except Exception:
pass

# Normalize line endings in content
content = normalize_line_endings(content, target_ending)

try:
# Try UTF-8 first
content_bytes = content.encode('utf-8')
with open(file_path, "wb") as file:
file.write(content_bytes)
except UnicodeEncodeError:
# Fallback to system default encoding if UTF-8 fails
with open(file_path, "w", newline='') as file:
file.write(content)


def handle_indent(src: list[str], target: list[str], start: int, end: int) -> list[str]:
Expand All @@ -32,21 +85,77 @@ def handle_indent(src: list[str], target: list[str], start: int, end: int) -> li
return [indent + line for line in target]


def detect_line_ending(content: bytes) -> str:
"""Detect the dominant line ending in a file.

Rules:
1. If the file has line endings, use the most common one (with CRLF taking precedence if tied)
2. If the file has no line endings:
- For empty files or files with no line endings, return '\n' (Unix style)
- The caller will handle whether to add line endings or not
"""
if not content:
return '\n' # default for empty files

# Count all occurrences first
crlf_count = content.count(b'\r\n')
total_lf = content.count(b'\n')
total_cr = content.count(b'\r')

# Calculate individual counts
lf_count = total_lf - crlf_count # Lone \n
cr_count = total_cr - crlf_count # Lone \r

# If there are no line endings at all, default to Unix style
if crlf_count == 0 and lf_count == 0 and cr_count == 0:
return '\n'

# Return dominant ending with slight bias towards CRLF if it exists
if crlf_count >= max(lf_count, cr_count): # Use >= to prefer CRLF when tied
return '\r\n'
elif lf_count > cr_count:
return '\n'
elif cr_count > 0:
return '\r'
return '\n' # default if no clear winner

def replace_code_in_file(
file_path: str,
start_line: int | None,
end_line: int | None,
new_code: str,
) -> None:
path = Path(file_path)
content = b""
text = ""
line_ending = "\n" # default

# Read existing file and detect line endings
if path.exists():
try:
with open(file_path, 'rb') as f:
content = f.read()
line_ending = detect_line_ending(content)

# Try decoding with UTF-8 first, then fallback
try:
text = content.decode('utf-8')
except UnicodeDecodeError:
try:
text = content.decode('latin1')
except Exception:
# If all decodings fail, treat as empty
text = ""
except Exception:
# If file can't be read, use defaults
pass

# Normalize the new code to match the file's line endings
new_code = normalize_line_endings(new_code, line_ending)
new_code_lines = new_code.splitlines(keepends=True)
if len(new_code_lines) > 0 and not new_code_lines[-1].endswith("\n"):
new_code_lines[-1] += "\n"

if path.exists() and start_line is not None and end_line is not None:
"""Replaces specified lines in a file with new code."""
text = path.read_text()

lines = text.splitlines(keepends=True)

# Insert the new code at the start line after converting it into a list of lines
Expand Down
4 changes: 4 additions & 0 deletions patchwork/test_crlf.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
line1
new line
with unix ending
line3
71 changes: 71 additions & 0 deletions patchwork/test_endings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from steps.ModifyCode.ModifyCode import replace_code_in_file
import os
import binascii

def hex_dump(file_path):
with open(file_path, 'rb') as f:
content = f.read()
hex_content = binascii.hexlify(content).decode()
print(f"\nHex dump of {file_path}:")
print(hex_content)

# Add visual markers for line endings
content_str = content.decode('latin1')
print("\nLine endings analysis:")
for i, line in enumerate(content_str.splitlines(True)):
ending = ""
if line.endswith('\r\n'):
ending = "CRLF (\\r\\n)"
elif line.endswith('\n'):
ending = "LF (\\n)"
elif line.endswith('\r'):
ending = "CR (\\r)"
else:
ending = "NO ENDING"
print(f"Line {i+1}: {ending}")
print()

# Test cases
test_cases = [
{
'name': 'test_no_endings.txt',
'content': 'line1line2line3', # No line endings at all
'replace_line': 0,
'new_content': 'new\nline\nhere\n'
},
{
'name': 'test_crlf.txt',
'content': 'line1\r\nline2\r\nline3\r\n',
'replace_line': 1,
'new_content': 'new line\nwith unix ending\n'
},
{
'name': 'test_lf.txt',
'content': 'line1\nline2\nline3\n',
'replace_line': 1,
'new_content': 'new line\r\nwith crlf ending\r\n'
},
{
'name': 'test_mixed.txt',
'content': 'line1\r\nline2\nline3\r\nline4\n',
'replace_line': 1,
'new_content': 'new line\nwith unix ending\n'
}
]

for test in test_cases:
print(f"=== Testing {test['name']} ===")

# Create test file
with open(test['name'], 'wb') as f:
f.write(test['content'].encode())

print("Original file:")
hex_dump(test['name'])

# Test replacing content
print(f"Replacing line {test['replace_line']+1} with new content...")
replace_code_in_file(test['name'], test['replace_line'], test['replace_line']+1, test['new_content'])

print("After replacement:")
hex_dump(test['name'])
27 changes: 27 additions & 0 deletions patchwork/test_line_endings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from pathlib import Path

def test_line_endings():
# Create a test file with CRLF endings
test_content = "line1\r\nline2\r\nline3\r\n"
test_file = "test.txt"

# Write with CRLF
with open(test_file, "wb") as f:
f.write(test_content.encode())

# Read and write using current implementation
path = Path(test_file)
text = path.read_text()
with open(test_file, "w") as file:
file.write(text)

# Check if endings changed
with open(test_file, "rb") as f:
result = f.read().decode()

print("Original had CRLF:", "\r\n" in test_content)
print("Result has CRLF:", "\r\n" in result)
print("\nOriginal content (hex):", test_content.encode().hex())
print("Final content (hex):", result.encode().hex())

test_line_endings()
38 changes: 38 additions & 0 deletions patchwork/verify_endings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from steps.ModifyCode.ModifyCode import replace_code_in_file
import os
import binascii

def hex_dump(file_path):
with open(file_path, 'rb') as f:
content = f.read()
hex_content = binascii.hexlify(content).decode()
print(f"Hex dump of {file_path}:")
print(hex_content)
# Add visual markers for line endings
content_str = content.decode('latin1')
print("\nLine endings analysis:")
for i, line in enumerate(content_str.splitlines(True)):
ending = ""
if line.endswith('\r\n'):
ending = "CRLF (\\r\\n)"
elif line.endswith('\n'):
ending = "LF (\\n)"
elif line.endswith('\r'):
ending = "CR (\\r)"
else:
ending = "NO ENDING"
print(f"Line {i+1}: {ending}")
print()

# Test file paths
test_file = "test_crlf.txt"

print("Original file:")
hex_dump(test_file)

# Test replacing content
print("Replacing line 2 with new content...")
replace_code_in_file(test_file, 1, 2, "new line\nwith unix ending")

print("After replacement:")
hex_dump(test_file)
Loading