diff --git a/patchwork/steps/FixIssue/FixIssue.py b/patchwork/steps/FixIssue/FixIssue.py index 7652ede25..bcb81e7a2 100644 --- a/patchwork/steps/FixIssue/FixIssue.py +++ b/patchwork/steps/FixIssue/FixIssue.py @@ -1,8 +1,10 @@ +import difflib import re from pathlib import Path from typing import Any, Optional -from git import Repo +from git import Repo, InvalidGitRepositoryError +from patchwork.logger import logger from openai.types.chat import ChatCompletionMessageParam from patchwork.common.client.llm.aio import AioLlmClient @@ -97,11 +99,31 @@ def is_stop(self, messages: list[ChatCompletionMessageParam]) -> bool: class FixIssue(Step, input_class=FixIssueInputs, output_class=FixIssueOutputs): def __init__(self, inputs): + """Initialize the FixIssue step. + + Args: + inputs: Dictionary containing input parameters including: + - base_path: Optional path to the repository root + - Other LLM-related parameters + """ super().__init__(inputs) - self.base_path = inputs.get("base_path") - if self.base_path is None: - repo = Repo(Path.cwd(), search_parent_directories=True) - self.base_path = repo.working_tree_dir + cwd = str(Path.cwd()) + original_base_path = inputs.get("base_path") + + if original_base_path is not None: + original_base_path = str(Path(str(original_base_path)).resolve()) + + # Check if we're in a git repository + try: + self.repo = Repo(original_base_path or cwd, search_parent_directories=True) + except (InvalidGitRepositoryError, Exception): + self.repo = None + + repo_working_dir = None + if self.repo is not None: + repo_working_dir = self.repo.working_dir + + self.base_path = original_base_path or repo_working_dir or cwd llm_client = AioLlmClient.create_aio_client(inputs) if llm_client is None: @@ -122,10 +144,40 @@ def __init__(self, inputs): ) def run(self): + """Execute the FixIssue step. + + This method: + 1. Executes the multi-turn LLM conversation to analyze and fix the issue + 2. Tracks file modifications made by the CodeEditTool + 3. Generates in-memory diffs for all modified files + + Returns: + dict: Dictionary containing list of modified files with their diffs + """ self.multiturn_llm_call.execute(limit=100) + + modified_files = [] + cwd = Path.cwd() for tool in self.multiturn_llm_call.tool_set.values(): - if isinstance(tool, CodeEditTool): - cwd = Path.cwd() - modified_files = [file_path.relative_to(cwd) for file_path in tool.tool_records["modified_files"]] - return dict(modified_files=[{"path": str(file)} for file in modified_files]) - return dict() + if not isinstance(tool, CodeEditTool): + continue + tool_modified_files = [ + dict(path=str(file_path.relative_to(cwd)), diff="") + for file_path in tool.tool_records["modified_files"] + ] + modified_files.extend(tool_modified_files) + + # Generate diffs for modified files + # Only try to generate git diff if we're in a git repository + if self.repo is not None: + for modified_file in modified_files: + file = modified_file["path"] + try: + # Try to get the diff using git + diff = self.repo.git.diff('HEAD', file) + modified_file["diff"] = diff or "" + except Exception as e: + # Git-specific errors (untracked files, etc) - keep empty diff + logger.warning(f"Could not get git diff for {file}: {str(e)}") + + return dict(modified_files=modified_files) diff --git a/patchwork/steps/FixIssue/typed.py b/patchwork/steps/FixIssue/typed.py index 35732a6fa..2ead5452f 100644 --- a/patchwork/steps/FixIssue/typed.py +++ b/patchwork/steps/FixIssue/typed.py @@ -35,5 +35,21 @@ class FixIssueInputs(__FixIssueRequiredInputs, total=False): ] +class ModifiedFile(TypedDict): + """Represents a file that has been modified by the FixIssue step. + + Attributes: + path: The relative path to the modified file from the repository root + diff: A unified diff string showing the changes made to the file. + Generated using Python's difflib to compare the original and + modified file contents in memory. + + Note: + The diff is generated by comparing file contents before and after + modifications, without relying on version control systems. + """ + path: str + diff: str + class FixIssueOutputs(TypedDict): - modified_files: List[Dict] + modified_files: List[ModifiedFile] diff --git a/patchwork/steps/ModifyCode/ModifyCode.py b/patchwork/steps/ModifyCode/ModifyCode.py index f7c6d7e4f..5f517c1e4 100644 --- a/patchwork/steps/ModifyCode/ModifyCode.py +++ b/patchwork/steps/ModifyCode/ModifyCode.py @@ -1,13 +1,21 @@ from __future__ import annotations +import difflib from pathlib import Path +from patchwork.logger import logger from patchwork.step import Step, StepStatus -def save_file_contents(file_path, content): - """Utility function to save content to a file.""" - with open(file_path, "w") as file: +def save_file_contents(file_path: str | Path, content: str) -> None: + """Utility function to save content to a file. + + Args: + file_path: Path to the file to save content to (str or Path) + content: Content to write to the file + """ + path = Path(file_path) + with path.open("w") as file: file.write(content) @@ -33,20 +41,26 @@ def handle_indent(src: list[str], target: list[str], start: int, end: int) -> li def replace_code_in_file( - file_path: str, + file_path: str | Path, start_line: int | None, end_line: int | None, new_code: str, ) -> None: + """Replace code in a file at the specified line range. + + Args: + file_path: Path to the file to modify (str or Path) + start_line: Starting line number (1-based) + end_line: Ending line number (1-based) + new_code: New code to insert + """ path = Path(file_path) new_code_lines = new_code.splitlines(keepends=True) if len(new_code_lines) > 0 and not new_code_lines[-1].endswith("\n"): new_code_lines[-1] += "\n" if path.exists() and start_line is not None and end_line is not None: - """Replaces specified lines in a file with new code.""" text = path.read_text() - lines = text.splitlines(keepends=True) # Insert the new code at the start line after converting it into a list of lines @@ -55,7 +69,7 @@ def replace_code_in_file( lines = new_code_lines # Save the modified contents back to the file - save_file_contents(file_path, "".join(lines)) + save_file_contents(path, "".join(lines)) class ModifyCode(Step): @@ -81,7 +95,8 @@ def run(self) -> dict: return dict(modified_code_files=[]) for code_snippet, extracted_response in sorted_list: - uri = code_snippet.get("uri") + # Use Path for consistent path handling + file_path = Path(code_snippet.get("uri", "")) start_line = code_snippet.get("startLine") end_line = code_snippet.get("endLine") new_code = extracted_response.get("patch") @@ -89,8 +104,44 @@ def run(self) -> dict: if new_code is None: continue - replace_code_in_file(uri, start_line, end_line, new_code) - modified_code_file = dict(path=uri, start_line=start_line, end_line=end_line, **extracted_response) + # Get the original content for diffing + diff = "" + try: + # Store original content in memory + original_content = file_path.read_text() if file_path.exists() else "" + + # Apply the changes + replace_code_in_file(file_path, start_line, end_line, new_code) + + # Read modified content + current_content = file_path.read_text() if file_path.exists() else "" + + # Generate unified diff + fromfile = f"a/{file_path}" + tofile = f"b/{file_path}" + diff = "".join(difflib.unified_diff( + original_content.splitlines(keepends=True), + current_content.splitlines(keepends=True), + fromfile=fromfile, + tofile=tofile + )) + + if not diff and new_code: # If no diff but we have new code (new file) + diff = f"+++ {file_path}\n{new_code}" + except (OSError, IOError) as e: + logger.warning(f"Failed to generate diff for {file_path}: {str(e)}") + # Still proceed with the modification even if diff generation fails + replace_code_in_file(file_path, start_line, end_line, new_code) + diff = f"+++ {file_path}\n{new_code}" # Use new code as diff on error + + # Create the modified code file dictionary + modified_code_file = dict( + path=str(file_path), + start_line=start_line, + end_line=end_line, + diff=diff, + **extracted_response + ) modified_code_files.append(modified_code_file) return dict(modified_code_files=modified_code_files) diff --git a/patchwork/steps/ModifyCode/typed.py b/patchwork/steps/ModifyCode/typed.py index 2cfba5e8f..6f26f79b4 100644 --- a/patchwork/steps/ModifyCode/typed.py +++ b/patchwork/steps/ModifyCode/typed.py @@ -11,6 +11,22 @@ class ModifyCodeOutputs(TypedDict): class ModifiedCodeFile(TypedDict, total=False): + """Represents a file that has been modified by the ModifyCode step. + + Attributes: + path: The path to the modified file + start_line: The starting line number of the modification (1-based) + end_line: The ending line number of the modification (1-based) + diff: A unified diff string showing the changes made to the file. + Generated using Python's difflib for in-memory comparison + of original and modified file contents. + + Note: + The diff field is generated using difflib.unified_diff() to compare + the original and modified file contents in memory, ensuring efficient + and secure diff generation. + """ path: str start_line: int end_line: int + diff: str diff --git a/pyproject.toml b/pyproject.toml index aa7a33390..9d5da77d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "patchwork-cli" -version = "0.0.90" +version = "0.0.91" description = "" authors = ["patched.codes"] license = "AGPL"