diff --git a/patchwork/steps/ModifyCode/ModifyCode.py b/patchwork/steps/ModifyCode/ModifyCode.py index f7c6d7e4f..4bc2003cd 100644 --- a/patchwork/steps/ModifyCode/ModifyCode.py +++ b/patchwork/steps/ModifyCode/ModifyCode.py @@ -1,23 +1,48 @@ from __future__ import annotations from pathlib import Path +from typing import Union, List from patchwork.step import Step, StepStatus -def save_file_contents(file_path, content): - """Utility function to save content to a file.""" - with open(file_path, "w") as file: +def save_file_contents(file_path: str, content: Union[str, bytes]) -> None: + """Utility function to save content to a file in binary mode to preserve line endings. + + Args: + file_path: Path to the file to write + content: Content to write, either as string or bytes. If string, it will be encoded as UTF-8.""" + with open(file_path, "wb") as file: + # Convert string to bytes if needed + if isinstance(content, str): + content = content.encode('utf-8') file.write(content) -def handle_indent(src: list[str], target: list[str], start: int, end: int) -> list[str]: +def handle_indent(src: List[str], target: List[str], start: int, end: int) -> List[str]: + """Handles indentation of new code to match the original code's indentation level. + + Args: + src: Source lines from the original file + target: New lines that need to be indented + start: Start line number in the source file + end: End line number in the source file + + Returns: + List of strings with proper indentation applied + + Note: + - If target is empty, returns it as is + - If start equals end, uses start + 1 as end to ensure at least one line + - Preserves existing indentation characters (spaces or tabs) + """ if len(target) < 1: return target if start == end: end = start + 1 + # Find first non-empty line in source and target first_src_line = next((line for line in src[start:end] if line.strip() != ""), "") src_indent_count = len(first_src_line) - len(first_src_line.lstrip()) first_target_line = next((line for line in target if line.strip() != ""), "") @@ -26,36 +51,101 @@ def handle_indent(src: list[str], target: list[str], start: int, end: int) -> li indent = "" if indent_diff > 0: + # Use the same indentation character as the source (space or tab) indent_unit = first_src_line[0] indent = indent_unit * indent_diff return [indent + line for line in target] +def detect_line_ending(content: bytes) -> bytes: + """Detect the dominant line ending style in the given bytes content. + + Args: + content: File content in bytes to analyze + + Returns: + The detected line ending as bytes (b'\\r\\n', b'\\n', or b'\\r') + + Note: + - Counts occurrences of different line endings (CRLF, LF, CR) + - Returns the most common line ending + - Handles cases where \r\n is treated as one ending, not two + - Defaults to \\n if no line endings are found""" + crlf_count = content.count(b'\r\n') + lf_count = content.count(b'\n') - crlf_count # Don't count \n that are part of \r\n + cr_count = content.count(b'\r') - crlf_count # Don't count \r that are part of \r\n + + if crlf_count > max(lf_count, cr_count): + return b'\r\n' + elif lf_count > cr_count: + return b'\n' + elif cr_count > 0: + return b'\r' + return b'\n' # Default to \n if no line endings found + def replace_code_in_file( file_path: str, - start_line: int | None, - end_line: int | None, + start_line: Union[int, None], + end_line: Union[int, None], new_code: str, ) -> None: + """Replace specified lines in a file with new code while preserving line endings. + + Args: + file_path: Path to the file to modify + start_line: Starting line number for replacement (0-based). If None, writes entire file + end_line: Ending line number for replacement (0-based). If None, writes entire file + new_code: New content to insert + + Note: + - Preserves the original file's line ending style (CRLF, LF, or CR) + - Handles indentation to match the original code + - Creates new file if it doesn't exist + - Uses system default line ending for new files + - Ensures all lines end with proper line ending + - Preserves UTF-8 encoding + """ path = Path(file_path) + + # Convert new_code to use \n for initial splitting new_code_lines = new_code.splitlines(keepends=True) if len(new_code_lines) > 0 and not new_code_lines[-1].endswith("\n"): new_code_lines[-1] += "\n" if path.exists() and start_line is not None and end_line is not None: """Replaces specified lines in a file with new code.""" - text = path.read_text() - + # Read file in binary mode to preserve original line endings + with open(file_path, 'rb') as f: + content = f.read() + + # Detect original line ending + line_ending = detect_line_ending(content) + + # Decode content for line operations + text = content.decode('utf-8') lines = text.splitlines(keepends=True) - - # Insert the new code at the start line after converting it into a list of lines + + # Handle indentation for new code lines lines[start_line:end_line] = handle_indent(lines, new_code_lines, start_line, end_line) + + # Join all lines and encode ensuring all line endings match the original + result = ''.join(lines) + # Normalize to \n first + result = result.replace('\r\n', '\n').replace('\r', '\n') + # Then convert to detected line ending + if line_ending == b'\r\n': + result = result.replace('\n', '\r\n') + elif line_ending == b'\r': + result = result.replace('\n', '\r') + + content = result.encode('utf-8') else: - lines = new_code_lines + # For new files, use system default line ending + content = ''.join(new_code_lines).encode('utf-8') # Save the modified contents back to the file - save_file_contents(file_path, "".join(lines)) + save_file_contents(file_path, content) class ModifyCode(Step): diff --git a/patchwork/test_line_endings.py b/patchwork/test_line_endings.py new file mode 100644 index 000000000..1ae66321a --- /dev/null +++ b/patchwork/test_line_endings.py @@ -0,0 +1,29 @@ +import tempfile +import os + +def test_line_endings(): + # Create a test file with CRLF line endings + content = "line1\r\nline2\r\nline3\r\n" + + with tempfile.NamedTemporaryFile(delete=False, mode='wb') as f: + f.write(content.encode('utf-8')) + temp_path = f.name + + # Now try to modify it using our ModifyCode functions + from steps.ModifyCode.ModifyCode import replace_code_in_file + + # Try to replace the second line + replace_code_in_file(temp_path, 1, 2, "new line2\r\n") + + # Read the result + with open(temp_path, 'rb') as f: + result = f.read().decode('utf-8') + + print("Original line endings preserved?" + str("\r\n" in result)) + print("Result content (in hex):") + print(''.join(hex(ord(c))[2:].zfill(2) for c in result)) + + os.unlink(temp_path) + +if __name__ == "__main__": + test_line_endings() \ No newline at end of file