diff --git a/patchwork/steps/ModifyCode/ModifyCode.py b/patchwork/steps/ModifyCode/ModifyCode.py index f7c6d7e4f..aa7a12953 100644 --- a/patchwork/steps/ModifyCode/ModifyCode.py +++ b/patchwork/steps/ModifyCode/ModifyCode.py @@ -5,10 +5,63 @@ from patchwork.step import Step, StepStatus +def normalize_line_endings(content: str, target_ending: str) -> str: + """Normalize all line endings in content to the target ending. + + Rules: + 1. Normalize any existing line endings to the target ending + 2. Preserve intentional lack of line endings: + - If original content had no line endings, don't add them + - If original content had line endings, ensure they're present + 3. Handle mixed line endings by converting all to the target + """ + # Detect if original content had any line endings + had_line_endings = ('\r\n' in content) or ('\n' in content) or ('\r' in content) + ends_with_line_ending = content.endswith('\r\n') or content.endswith('\n') or content.endswith('\r') + + # First standardize all line endings to \n + tmp = content.replace('\r\n', '\n') # Convert CRLF to LF + tmp = tmp.replace('\r', '\n') # Convert CR to LF + + # Then convert all \n to target ending (if not already \n) + if target_ending != '\n': + tmp = tmp.replace('\n', target_ending) + + # Handle final line ending + if had_line_endings: + # If original had line endings, ensure all lines have them + if not tmp.endswith(target_ending): + tmp += target_ending + else: + # If original had no line endings, remove any we might have added + if tmp.endswith(target_ending): + tmp = tmp[:-len(target_ending)] + + return tmp + def save_file_contents(file_path, content): - """Utility function to save content to a file.""" - with open(file_path, "w") as file: - file.write(content) + """Utility function to save content to a file while preserving line endings.""" + # Detect the target line ending from existing file if it exists + target_ending = '\n' + if Path(file_path).exists(): + try: + with open(file_path, 'rb') as f: + target_ending = detect_line_ending(f.read()) + except Exception: + pass + + # Normalize line endings in content + content = normalize_line_endings(content, target_ending) + + try: + # Try UTF-8 first + content_bytes = content.encode('utf-8') + with open(file_path, "wb") as file: + file.write(content_bytes) + except UnicodeEncodeError: + # Fallback to system default encoding if UTF-8 fails + with open(file_path, "w", newline='') as file: + file.write(content) def handle_indent(src: list[str], target: list[str], start: int, end: int) -> list[str]: @@ -32,6 +85,40 @@ def handle_indent(src: list[str], target: list[str], start: int, end: int) -> li return [indent + line for line in target] +def detect_line_ending(content: bytes) -> str: + """Detect the dominant line ending in a file. + + Rules: + 1. If the file has line endings, use the most common one (with CRLF taking precedence if tied) + 2. If the file has no line endings: + - For empty files or files with no line endings, return '\n' (Unix style) + - The caller will handle whether to add line endings or not + """ + if not content: + return '\n' # default for empty files + + # Count all occurrences first + crlf_count = content.count(b'\r\n') + total_lf = content.count(b'\n') + total_cr = content.count(b'\r') + + # Calculate individual counts + lf_count = total_lf - crlf_count # Lone \n + cr_count = total_cr - crlf_count # Lone \r + + # If there are no line endings at all, default to Unix style + if crlf_count == 0 and lf_count == 0 and cr_count == 0: + return '\n' + + # Return dominant ending with slight bias towards CRLF if it exists + if crlf_count >= max(lf_count, cr_count): # Use >= to prefer CRLF when tied + return '\r\n' + elif lf_count > cr_count: + return '\n' + elif cr_count > 0: + return '\r' + return '\n' # default if no clear winner + def replace_code_in_file( file_path: str, start_line: int | None, @@ -39,14 +126,36 @@ def replace_code_in_file( new_code: str, ) -> None: path = Path(file_path) + content = b"" + text = "" + line_ending = "\n" # default + + # Read existing file and detect line endings + if path.exists(): + try: + with open(file_path, 'rb') as f: + content = f.read() + line_ending = detect_line_ending(content) + + # Try decoding with UTF-8 first, then fallback + try: + text = content.decode('utf-8') + except UnicodeDecodeError: + try: + text = content.decode('latin1') + except Exception: + # If all decodings fail, treat as empty + text = "" + except Exception: + # If file can't be read, use defaults + pass + + # Normalize the new code to match the file's line endings + new_code = normalize_line_endings(new_code, line_ending) new_code_lines = new_code.splitlines(keepends=True) - if len(new_code_lines) > 0 and not new_code_lines[-1].endswith("\n"): - new_code_lines[-1] += "\n" if path.exists() and start_line is not None and end_line is not None: """Replaces specified lines in a file with new code.""" - text = path.read_text() - lines = text.splitlines(keepends=True) # Insert the new code at the start line after converting it into a list of lines diff --git a/patchwork/test_crlf.txt b/patchwork/test_crlf.txt new file mode 100644 index 000000000..976459e09 --- /dev/null +++ b/patchwork/test_crlf.txt @@ -0,0 +1,4 @@ +line1 +new line +with unix ending +line3 diff --git a/patchwork/test_endings.py b/patchwork/test_endings.py new file mode 100644 index 000000000..d94f20298 --- /dev/null +++ b/patchwork/test_endings.py @@ -0,0 +1,71 @@ +from steps.ModifyCode.ModifyCode import replace_code_in_file +import os +import binascii + +def hex_dump(file_path): + with open(file_path, 'rb') as f: + content = f.read() + hex_content = binascii.hexlify(content).decode() + print(f"\nHex dump of {file_path}:") + print(hex_content) + + # Add visual markers for line endings + content_str = content.decode('latin1') + print("\nLine endings analysis:") + for i, line in enumerate(content_str.splitlines(True)): + ending = "" + if line.endswith('\r\n'): + ending = "CRLF (\\r\\n)" + elif line.endswith('\n'): + ending = "LF (\\n)" + elif line.endswith('\r'): + ending = "CR (\\r)" + else: + ending = "NO ENDING" + print(f"Line {i+1}: {ending}") + print() + +# Test cases +test_cases = [ + { + 'name': 'test_no_endings.txt', + 'content': 'line1line2line3', # No line endings at all + 'replace_line': 0, + 'new_content': 'new\nline\nhere\n' + }, + { + 'name': 'test_crlf.txt', + 'content': 'line1\r\nline2\r\nline3\r\n', + 'replace_line': 1, + 'new_content': 'new line\nwith unix ending\n' + }, + { + 'name': 'test_lf.txt', + 'content': 'line1\nline2\nline3\n', + 'replace_line': 1, + 'new_content': 'new line\r\nwith crlf ending\r\n' + }, + { + 'name': 'test_mixed.txt', + 'content': 'line1\r\nline2\nline3\r\nline4\n', + 'replace_line': 1, + 'new_content': 'new line\nwith unix ending\n' + } +] + +for test in test_cases: + print(f"=== Testing {test['name']} ===") + + # Create test file + with open(test['name'], 'wb') as f: + f.write(test['content'].encode()) + + print("Original file:") + hex_dump(test['name']) + + # Test replacing content + print(f"Replacing line {test['replace_line']+1} with new content...") + replace_code_in_file(test['name'], test['replace_line'], test['replace_line']+1, test['new_content']) + + print("After replacement:") + hex_dump(test['name']) \ No newline at end of file diff --git a/patchwork/test_line_endings.py b/patchwork/test_line_endings.py new file mode 100644 index 000000000..e8ecc5c50 --- /dev/null +++ b/patchwork/test_line_endings.py @@ -0,0 +1,27 @@ +from pathlib import Path + +def test_line_endings(): + # Create a test file with CRLF endings + test_content = "line1\r\nline2\r\nline3\r\n" + test_file = "test.txt" + + # Write with CRLF + with open(test_file, "wb") as f: + f.write(test_content.encode()) + + # Read and write using current implementation + path = Path(test_file) + text = path.read_text() + with open(test_file, "w") as file: + file.write(text) + + # Check if endings changed + with open(test_file, "rb") as f: + result = f.read().decode() + + print("Original had CRLF:", "\r\n" in test_content) + print("Result has CRLF:", "\r\n" in result) + print("\nOriginal content (hex):", test_content.encode().hex()) + print("Final content (hex):", result.encode().hex()) + +test_line_endings() \ No newline at end of file diff --git a/patchwork/verify_endings.py b/patchwork/verify_endings.py new file mode 100644 index 000000000..3adbe1fc9 --- /dev/null +++ b/patchwork/verify_endings.py @@ -0,0 +1,38 @@ +from steps.ModifyCode.ModifyCode import replace_code_in_file +import os +import binascii + +def hex_dump(file_path): + with open(file_path, 'rb') as f: + content = f.read() + hex_content = binascii.hexlify(content).decode() + print(f"Hex dump of {file_path}:") + print(hex_content) + # Add visual markers for line endings + content_str = content.decode('latin1') + print("\nLine endings analysis:") + for i, line in enumerate(content_str.splitlines(True)): + ending = "" + if line.endswith('\r\n'): + ending = "CRLF (\\r\\n)" + elif line.endswith('\n'): + ending = "LF (\\n)" + elif line.endswith('\r'): + ending = "CR (\\r)" + else: + ending = "NO ENDING" + print(f"Line {i+1}: {ending}") + print() + +# Test file paths +test_file = "test_crlf.txt" + +print("Original file:") +hex_dump(test_file) + +# Test replacing content +print("Replacing line 2 with new content...") +replace_code_in_file(test_file, 1, 2, "new line\nwith unix ending") + +print("After replacement:") +hex_dump(test_file) \ No newline at end of file