patched-codes · patched-codes · Jan 6, 2025 · Jan 6, 2025 · Jan 6, 2025 · Jan 6, 2025
diff --git a/patchwork/steps/ModifyCode/ModifyCode.py b/patchwork/steps/ModifyCode/ModifyCode.py
@@ -5,10 +5,63 @@
 from patchwork.step import Step, StepStatus
 
 
+def normalize_line_endings(content: str, target_ending: str) -> str:
+    """Normalize all line endings in content to the target ending.
+
+    Rules:
+    1. Normalize any existing line endings to the target ending
+    2. Preserve intentional lack of line endings:
+       - If original content had no line endings, don't add them
+       - If original content had line endings, ensure they're present
+    3. Handle mixed line endings by converting all to the target
+    """
+    # Detect if original content had any line endings
+    had_line_endings = ('\r\n' in content) or ('\n' in content) or ('\r' in content)
+    ends_with_line_ending = content.endswith('\r\n') or content.endswith('\n') or content.endswith('\r')
+
+    # First standardize all line endings to \n
+    tmp = content.replace('\r\n', '\n')  # Convert CRLF to LF
+    tmp = tmp.replace('\r', '\n')        # Convert CR to LF
+
+    # Then convert all \n to target ending (if not already \n)
+    if target_ending != '\n':
+        tmp = tmp.replace('\n', target_ending)
+
+    # Handle final line ending
+    if had_line_endings:
+        # If original had line endings, ensure all lines have them
+        if not tmp.endswith(target_ending):
+            tmp += target_ending
+    else:
+        # If original had no line endings, remove any we might have added
+        if tmp.endswith(target_ending):
+            tmp = tmp[:-len(target_ending)]
+
+    return tmp
+
 def save_file_contents(file_path, content):
-    """Utility function to save content to a file."""
-    with open(file_path, "w") as file:
-        file.write(content)
+    """Utility function to save content to a file while preserving line endings."""
+    # Detect the target line ending from existing file if it exists
+    target_ending = '\n'
+    if Path(file_path).exists():
+        try:
+            with open(file_path, 'rb') as f:
+                target_ending = detect_line_ending(f.read())
+        except Exception:
+            pass
+
+    # Normalize line endings in content
+    content = normalize_line_endings(content, target_ending)
+
+    try:
+        # Try UTF-8 first
+        content_bytes = content.encode('utf-8')
+        with open(file_path, "wb") as file:
+            file.write(content_bytes)
+    except UnicodeEncodeError:
+        # Fallback to system default encoding if UTF-8 fails
+        with open(file_path, "w", newline='') as file:
+            file.write(content)
 
 
 def handle_indent(src: list[str], target: list[str], start: int, end: int) -> list[str]:
@@ -32,21 +85,77 @@ def handle_indent(src: list[str], target: list[str], start: int, end: int) -> li
     return [indent + line for line in target]
 
 
+def detect_line_ending(content: bytes) -> str:
+    """Detect the dominant line ending in a file.
+
+    Rules:
+    1. If the file has line endings, use the most common one (with CRLF taking precedence if tied)
+    2. If the file has no line endings:
+       - For empty files or files with no line endings, return '\n' (Unix style)
+       - The caller will handle whether to add line endings or not
+    """
+    if not content:
+        return '\n'  # default for empty files
+
+    # Count all occurrences first
+    crlf_count = content.count(b'\r\n')
+    total_lf = content.count(b'\n')
+    total_cr = content.count(b'\r')
+
+    # Calculate individual counts
+    lf_count = total_lf - crlf_count  # Lone \n
+    cr_count = total_cr - crlf_count  # Lone \r
+
+    # If there are no line endings at all, default to Unix style
+    if crlf_count == 0 and lf_count == 0 and cr_count == 0:
+        return '\n'
+
+    # Return dominant ending with slight bias towards CRLF if it exists
+    if crlf_count >= max(lf_count, cr_count):  # Use >= to prefer CRLF when tied
+        return '\r\n'
+    elif lf_count > cr_count:
+        return '\n'
+    elif cr_count > 0:
+        return '\r'
+    return '\n'  # default if no clear winner
+
 def replace_code_in_file(
     file_path: str,
     start_line: int | None,
     end_line: int | None,
     new_code: str,
 ) -> None:
     path = Path(file_path)
+    content = b""
+    text = ""
+    line_ending = "\n"  # default
+
+    # Read existing file and detect line endings
+    if path.exists():
+        try:
+            with open(file_path, 'rb') as f:
+                content = f.read()
+            line_ending = detect_line_ending(content)
+
+            # Try decoding with UTF-8 first, then fallback
+            try:
+                text = content.decode('utf-8')
+            except UnicodeDecodeError:
+                try:
+                    text = content.decode('latin1')
+                except Exception:
+                    # If all decodings fail, treat as empty
+                    text = ""
+        except Exception:
+            # If file can't be read, use defaults
+            pass
+
+    # Normalize the new code to match the file's line endings
+    new_code = normalize_line_endings(new_code, line_ending)
     new_code_lines = new_code.splitlines(keepends=True)
-    if len(new_code_lines) > 0 and not new_code_lines[-1].endswith("\n"):
-        new_code_lines[-1] += "\n"
 
     if path.exists() and start_line is not None and end_line is not None:
         """Replaces specified lines in a file with new code."""
-        text = path.read_text()
-
         lines = text.splitlines(keepends=True)
 
         # Insert the new code at the start line after converting it into a list of lines

diff --git a/patchwork/test_crlf.txt b/patchwork/test_crlf.txt
@@ -0,0 +1,4 @@
+line1
+new line
+with unix ending
+line3
diff --git a/patchwork/test_endings.py b/patchwork/test_endings.py
@@ -0,0 +1,71 @@
+from steps.ModifyCode.ModifyCode import replace_code_in_file
+import os
+import binascii
+
+def hex_dump(file_path):
+    with open(file_path, 'rb') as f:
+        content = f.read()
+    hex_content = binascii.hexlify(content).decode()
+    print(f"\nHex dump of {file_path}:")
+    print(hex_content)
+
+    # Add visual markers for line endings
+    content_str = content.decode('latin1')
+    print("\nLine endings analysis:")
+    for i, line in enumerate(content_str.splitlines(True)):
+        ending = ""
+        if line.endswith('\r\n'):
+            ending = "CRLF (\\r\\n)"
+        elif line.endswith('\n'):
+            ending = "LF (\\n)"
+        elif line.endswith('\r'):
+            ending = "CR (\\r)"
+        else:
+            ending = "NO ENDING"
+        print(f"Line {i+1}: {ending}")
+    print()
+
+# Test cases
+test_cases = [
+    {
+        'name': 'test_no_endings.txt',
+        'content': 'line1line2line3',  # No line endings at all
+        'replace_line': 0,
+        'new_content': 'new\nline\nhere\n'
+    },
+    {
+        'name': 'test_crlf.txt',
+        'content': 'line1\r\nline2\r\nline3\r\n',
+        'replace_line': 1,
+        'new_content': 'new line\nwith unix ending\n'
+    },
+    {
+        'name': 'test_lf.txt',
+        'content': 'line1\nline2\nline3\n',
+        'replace_line': 1,
+        'new_content': 'new line\r\nwith crlf ending\r\n'
+    },
+    {
+        'name': 'test_mixed.txt',
+        'content': 'line1\r\nline2\nline3\r\nline4\n',
+        'replace_line': 1,
+        'new_content': 'new line\nwith unix ending\n'
+    }
+]
+
+for test in test_cases:
+    print(f"=== Testing {test['name']} ===")
+
+    # Create test file
+    with open(test['name'], 'wb') as f:
+        f.write(test['content'].encode())
+
+    print("Original file:")
+    hex_dump(test['name'])
+
+    # Test replacing content
+    print(f"Replacing line {test['replace_line']+1} with new content...")
+    replace_code_in_file(test['name'], test['replace_line'], test['replace_line']+1, test['new_content'])
+
+    print("After replacement:")
+    hex_dump(test['name'])
diff --git a/patchwork/test_line_endings.py b/patchwork/test_line_endings.py
@@ -0,0 +1,27 @@
+from pathlib import Path
+
+def test_line_endings():
+    # Create a test file with CRLF endings
+    test_content = "line1\r\nline2\r\nline3\r\n"
+    test_file = "test.txt"
+
+    # Write with CRLF
+    with open(test_file, "wb") as f:
+        f.write(test_content.encode())
+
+    # Read and write using current implementation
+    path = Path(test_file)
+    text = path.read_text()
+    with open(test_file, "w") as file:
+        file.write(text)
+
+    # Check if endings changed
+    with open(test_file, "rb") as f:
+        result = f.read().decode()
+
+    print("Original had CRLF:", "\r\n" in test_content)
+    print("Result has CRLF:", "\r\n" in result)
+    print("\nOriginal content (hex):", test_content.encode().hex())
+    print("Final content (hex):", result.encode().hex())
+
+test_line_endings()
diff --git a/patchwork/verify_endings.py b/patchwork/verify_endings.py
@@ -0,0 +1,38 @@
+from steps.ModifyCode.ModifyCode import replace_code_in_file
+import os
+import binascii
+
+def hex_dump(file_path):
+    with open(file_path, 'rb') as f:
+        content = f.read()
+    hex_content = binascii.hexlify(content).decode()
+    print(f"Hex dump of {file_path}:")
+    print(hex_content)
+    # Add visual markers for line endings
+    content_str = content.decode('latin1')
+    print("\nLine endings analysis:")
+    for i, line in enumerate(content_str.splitlines(True)):
+        ending = ""
+        if line.endswith('\r\n'):
+            ending = "CRLF (\\r\\n)"
+        elif line.endswith('\n'):
+            ending = "LF (\\n)"
+        elif line.endswith('\r'):
+            ending = "CR (\\r)"
+        else:
+            ending = "NO ENDING"
+        print(f"Line {i+1}: {ending}")
+    print()
+
+# Test file paths
+test_file = "test_crlf.txt"
+
+print("Original file:")
+hex_dump(test_file)
+
+# Test replacing content
+print("Replacing line 2 with new content...")
+replace_code_in_file(test_file, 1, 2, "new line\nwith unix ending")
+
+print("After replacement:")
+hex_dump(test_file)