Skip to content

Commit 278fde2

Browse files
refactor: remove git dependency in FixIssue, switch to in-memory diff in FixIssue & ModifyCode
Co-Authored-By: Patched <[email protected]>
1 parent d424ceb commit 278fde2

File tree

4 files changed

+104
-101
lines changed

4 files changed

+104
-101
lines changed

patchwork/steps/FixIssue/FixIssue.py

Lines changed: 65 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
1+
import difflib
12
import re
2-
import shlex
33
from pathlib import Path
44
from typing import Any, Optional
55

6-
from git import Repo
7-
from git.exc import GitCommandError
86
from openai.types.chat import ChatCompletionMessageParam
97

108
from patchwork.common.client.llm.aio import AioLlmClient
@@ -99,11 +97,20 @@ def is_stop(self, messages: list[ChatCompletionMessageParam]) -> bool:
9997

10098
class FixIssue(Step, input_class=FixIssueInputs, output_class=FixIssueOutputs):
10199
def __init__(self, inputs):
100+
"""Initialize the FixIssue step.
101+
102+
Args:
103+
inputs: Dictionary containing input parameters including:
104+
- base_path: Optional path to the repository root
105+
- Other LLM-related parameters
106+
"""
102107
super().__init__(inputs)
103-
self.base_path = inputs.get("base_path")
104-
if self.base_path is None:
105-
repo = Repo(Path.cwd(), search_parent_directories=True)
106-
self.base_path = repo.working_tree_dir
108+
base_path = inputs.get("base_path")
109+
# Handle base_path carefully to avoid type issues
110+
if base_path is not None:
111+
self.base_path = str(Path(str(base_path)).resolve())
112+
else:
113+
self.base_path = str(Path.cwd())
107114

108115
llm_client = AioLlmClient.create_aio_client(inputs)
109116
if llm_client is None:
@@ -124,47 +131,66 @@ def __init__(self, inputs):
124131
)
125132

126133
def run(self):
134+
"""Execute the FixIssue step.
135+
136+
This method:
137+
1. Executes the multi-turn LLM conversation to analyze and fix the issue
138+
2. Tracks file modifications made by the CodeEditTool
139+
3. Generates in-memory diffs for all modified files
140+
141+
Returns:
142+
dict: Dictionary containing list of modified files with their diffs
143+
"""
127144
self.multiturn_llm_call.execute(limit=100)
128145
for tool in self.multiturn_llm_call.tool_set.values():
129146
if isinstance(tool, CodeEditTool):
130147
cwd = Path.cwd()
131148
modified_files = [file_path.relative_to(cwd) for file_path in tool.tool_records["modified_files"]]
132-
# Get the diff for each modified file using git
149+
# Generate diffs for modified files using in-memory comparison
133150
modified_files_with_diffs = []
134-
repo = Repo(cwd, search_parent_directories=True)
151+
file_contents = {} # Store original contents before modifications
152+
153+
# First pass: store original contents
135154
for file in modified_files:
136-
# Sanitize the file path to prevent command injection
137-
safe_file = shlex.quote(str(file))
155+
file_path = Path(file)
138156
try:
139-
# Check if file is tracked by git, even if deleted
140-
is_tracked = str(file) in repo.git.ls_files('--', safe_file).splitlines()
141-
is_staged = str(file) in repo.git.diff('--cached', '--name-only', safe_file).splitlines()
142-
is_unstaged = str(file) in repo.git.diff('--name-only', safe_file).splitlines()
157+
if file_path.exists():
158+
file_contents[str(file)] = file_path.read_text()
159+
else:
160+
file_contents[str(file)] = ""
161+
except (OSError, IOError) as e:
162+
print(f"Warning: Failed to read original content for {file}: {str(e)}")
163+
file_contents[str(file)] = ""
164+
165+
# Apply modifications through CodeEditTool (happens in the background)
166+
167+
# Second pass: generate diffs
168+
for file in modified_files:
169+
file_path = Path(file)
170+
try:
171+
# Get current content after modifications
172+
current_content = file_path.read_text() if file_path.exists() else ""
173+
original_content = file_contents.get(str(file), "")
143174

144-
if is_tracked or is_staged or is_unstaged:
145-
# Get both staged and unstaged changes
146-
staged_diff = repo.git.diff('--cached', safe_file) if is_staged else ""
147-
unstaged_diff = repo.git.diff(safe_file) if is_unstaged else ""
148-
149-
# Combine both diffs
150-
combined_diff = staged_diff + ('\n' + unstaged_diff if unstaged_diff else '')
151-
152-
if combined_diff.strip():
153-
# Validate dictionary structure before adding
154-
modified_file = {
155-
"path": str(file),
156-
"diff": combined_diff
157-
}
158-
# Ensure all required fields are present with correct types
159-
if not isinstance(modified_file["path"], str):
160-
raise TypeError(f"path must be str, got {type(modified_file['path'])}")
161-
if not isinstance(modified_file["diff"], str):
162-
raise TypeError(f"diff must be str, got {type(modified_file['diff'])}")
163-
modified_files_with_diffs.append(modified_file)
164-
except GitCommandError as e:
165-
# Log the error but continue processing other files
166-
print(f"Warning: Failed to generate diff for {safe_file}: {str(e)}")
167-
continue
175+
# Generate unified diff
176+
fromfile = f"a/{file}"
177+
tofile = f"b/{file}"
178+
diff = "".join(difflib.unified_diff(
179+
original_content.splitlines(keepends=True),
180+
current_content.splitlines(keepends=True),
181+
fromfile=fromfile,
182+
tofile=tofile
183+
))
168184

185+
if diff: # Only add if there are actual changes
186+
modified_file = {
187+
"path": str(file),
188+
"diff": diff
189+
}
190+
modified_files_with_diffs.append(modified_file)
191+
except (OSError, IOError) as e:
192+
print(f"Warning: Failed to generate diff for {file}: {str(e)}")
193+
continue
194+
169195
return dict(modified_files=modified_files_with_diffs)
170196
return dict()

patchwork/steps/FixIssue/typed.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,12 @@ class ModifiedFile(TypedDict):
4141
Attributes:
4242
path: The relative path to the modified file from the repository root
4343
diff: A unified diff string showing the changes made to the file.
44-
The diff includes both staged and unstaged changes, and is
45-
generated using git diff commands with proper path sanitization.
44+
Generated using Python's difflib to compare the original and
45+
modified file contents in memory.
46+
47+
Note:
48+
The diff is generated by comparing file contents before and after
49+
modifications, without relying on version control systems.
4650
"""
4751
path: str
4852
diff: str

patchwork/steps/ModifyCode/ModifyCode.py

Lines changed: 28 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from __future__ import annotations
22

33
import difflib
4-
import shutil
5-
import tempfile
64
from pathlib import Path
75

86
from patchwork.step import Step, StepStatus
@@ -107,67 +105,42 @@ def run(self) -> dict:
107105

108106
# Get the original content for diffing
109107
diff = ""
108+
try:
109+
# Store original content in memory
110+
original_content = file_path.read_text() if file_path.exists() else ""
111+
112+
# Apply the changes
113+
replace_code_in_file(file_path, start_line, end_line, new_code)
114+
115+
# Read modified content
116+
current_content = file_path.read_text() if file_path.exists() else ""
117+
118+
# Generate unified diff
119+
fromfile = f"a/{file_path}"
120+
tofile = f"b/{file_path}"
121+
diff = "".join(difflib.unified_diff(
122+
original_content.splitlines(keepends=True),
123+
current_content.splitlines(keepends=True),
124+
fromfile=fromfile,
125+
tofile=tofile
126+
))
127+
128+
if not diff and new_code: # If no diff but we have new code (new file)
129+
diff = f"+++ {file_path}\n{new_code}"
130+
except (OSError, IOError) as e:
131+
print(f"Warning: Failed to generate diff for {file_path}: {str(e)}")
132+
# Still proceed with the modification even if diff generation fails
133+
replace_code_in_file(file_path, start_line, end_line, new_code)
134+
diff = f"+++ {file_path}\n{new_code}" # Use new code as diff on error
110135

111-
if file_path.exists():
112-
try:
113-
# Create a temporary directory with restricted permissions
114-
with tempfile.TemporaryDirectory(prefix='modifycode_') as temp_dir:
115-
# Create temporary file path within the secure directory
116-
temp_path = Path(temp_dir) / 'original_file'
117-
118-
# Copy original file with same permissions
119-
shutil.copy2(file_path, temp_path)
120-
121-
# Store original content
122-
with temp_path.open('r') as f1:
123-
original_lines = f1.readlines()
124-
125-
# Apply the changes
126-
replace_code_in_file(file_path, start_line, end_line, new_code)
127-
128-
# Read modified content
129-
with file_path.open('r') as f2:
130-
modified_lines = f2.readlines()
131-
132-
# Generate a proper unified diff
133-
# Use Path for consistent path handling
134-
relative_path = str(file_path)
135-
diff = ''.join(difflib.unified_diff(
136-
original_lines,
137-
modified_lines,
138-
fromfile=str(Path('a') / relative_path),
139-
tofile=str(Path('b') / relative_path)
140-
))
141-
142-
# temp_dir and its contents are automatically cleaned up
143-
except (OSError, IOError) as e:
144-
print(f"Warning: Failed to generate diff for {file_path}: {str(e)}")
145-
# Still proceed with the modification even if diff generation fails
146-
replace_code_in_file(file_path, start_line, end_line, new_code)
147-
else:
148-
# If file doesn't exist, just store the new code as the diff
149-
# Use Path for consistent path handling
150-
relative_path = str(file_path)
151-
diff = f"+++ {Path(relative_path)}\n{new_code}"
152-
153-
# Create and validate the modified code file dictionary
136+
# Create the modified code file dictionary
154137
modified_code_file = dict(
155138
path=str(file_path),
156139
start_line=start_line,
157140
end_line=end_line,
158141
diff=diff,
159142
**extracted_response
160143
)
161-
162-
# Ensure all required fields are present with correct types
163-
if not isinstance(modified_code_file["path"], str):
164-
raise TypeError(f"path must be str, got {type(modified_code_file['path'])}")
165-
if not isinstance(modified_code_file["start_line"], (int, type(None))):
166-
raise TypeError(f"start_line must be int or None, got {type(modified_code_file['start_line'])}")
167-
if not isinstance(modified_code_file["end_line"], (int, type(None))):
168-
raise TypeError(f"end_line must be int or None, got {type(modified_code_file['end_line'])}")
169-
if not isinstance(modified_code_file["diff"], str):
170-
raise TypeError(f"diff must be str, got {type(modified_code_file['diff'])}")
171144
modified_code_files.append(modified_code_file)
172145

173146
return dict(modified_code_files=modified_code_files)

patchwork/steps/ModifyCode/typed.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ class ModifiedCodeFile(TypedDict, total=False):
1818
start_line: The starting line number of the modification (1-based)
1919
end_line: The ending line number of the modification (1-based)
2020
diff: A unified diff string showing the changes made to the file.
21-
Generated using Python's difflib with proper file handling
22-
and cleanup of temporary files.
21+
Generated using Python's difflib for in-memory comparison
22+
of original and modified file contents.
2323
2424
Note:
25-
The diff field is generated securely using temporary files that
26-
are automatically cleaned up. File paths are properly sanitized
27-
to prevent potential security issues.
25+
The diff field is generated using difflib.unified_diff() to compare
26+
the original and modified file contents in memory, ensuring efficient
27+
and secure diff generation.
2828
"""
2929
path: str
3030
start_line: int

0 commit comments

Comments
 (0)