1+ import difflib
12import re
2- import shlex
33from pathlib import Path
44from typing import Any , Optional
55
6- from git import Repo
7- from git .exc import GitCommandError
86from openai .types .chat import ChatCompletionMessageParam
97
108from patchwork .common .client .llm .aio import AioLlmClient
@@ -99,11 +97,20 @@ def is_stop(self, messages: list[ChatCompletionMessageParam]) -> bool:
9997
10098class FixIssue (Step , input_class = FixIssueInputs , output_class = FixIssueOutputs ):
10199 def __init__ (self , inputs ):
100+ """Initialize the FixIssue step.
101+
102+ Args:
103+ inputs: Dictionary containing input parameters including:
104+ - base_path: Optional path to the repository root
105+ - Other LLM-related parameters
106+ """
102107 super ().__init__ (inputs )
103- self .base_path = inputs .get ("base_path" )
104- if self .base_path is None :
105- repo = Repo (Path .cwd (), search_parent_directories = True )
106- self .base_path = repo .working_tree_dir
108+ base_path = inputs .get ("base_path" )
109+ # Handle base_path carefully to avoid type issues
110+ if base_path is not None :
111+ self .base_path = str (Path (str (base_path )).resolve ())
112+ else :
113+ self .base_path = str (Path .cwd ())
107114
108115 llm_client = AioLlmClient .create_aio_client (inputs )
109116 if llm_client is None :
@@ -124,47 +131,66 @@ def __init__(self, inputs):
124131 )
125132
126133 def run (self ):
134+ """Execute the FixIssue step.
135+
136+ This method:
137+ 1. Executes the multi-turn LLM conversation to analyze and fix the issue
138+ 2. Tracks file modifications made by the CodeEditTool
139+ 3. Generates in-memory diffs for all modified files
140+
141+ Returns:
142+ dict: Dictionary containing list of modified files with their diffs
143+ """
127144 self .multiturn_llm_call .execute (limit = 100 )
128145 for tool in self .multiturn_llm_call .tool_set .values ():
129146 if isinstance (tool , CodeEditTool ):
130147 cwd = Path .cwd ()
131148 modified_files = [file_path .relative_to (cwd ) for file_path in tool .tool_records ["modified_files" ]]
132- # Get the diff for each modified file using git
149+ # Generate diffs for modified files using in-memory comparison
133150 modified_files_with_diffs = []
134- repo = Repo (cwd , search_parent_directories = True )
151+ file_contents = {} # Store original contents before modifications
152+
153+ # First pass: store original contents
135154 for file in modified_files :
136- # Sanitize the file path to prevent command injection
137- safe_file = shlex .quote (str (file ))
155+ file_path = Path (file )
138156 try :
139- # Check if file is tracked by git, even if deleted
140- is_tracked = str (file ) in repo .git .ls_files ('--' , safe_file ).splitlines ()
141- is_staged = str (file ) in repo .git .diff ('--cached' , '--name-only' , safe_file ).splitlines ()
142- is_unstaged = str (file ) in repo .git .diff ('--name-only' , safe_file ).splitlines ()
157+ if file_path .exists ():
158+ file_contents [str (file )] = file_path .read_text ()
159+ else :
160+ file_contents [str (file )] = ""
161+ except (OSError , IOError ) as e :
162+ print (f"Warning: Failed to read original content for { file } : { str (e )} " )
163+ file_contents [str (file )] = ""
164+
165+ # Apply modifications through CodeEditTool (happens in the background)
166+
167+ # Second pass: generate diffs
168+ for file in modified_files :
169+ file_path = Path (file )
170+ try :
171+ # Get current content after modifications
172+ current_content = file_path .read_text () if file_path .exists () else ""
173+ original_content = file_contents .get (str (file ), "" )
143174
144- if is_tracked or is_staged or is_unstaged :
145- # Get both staged and unstaged changes
146- staged_diff = repo .git .diff ('--cached' , safe_file ) if is_staged else ""
147- unstaged_diff = repo .git .diff (safe_file ) if is_unstaged else ""
148-
149- # Combine both diffs
150- combined_diff = staged_diff + ('\n ' + unstaged_diff if unstaged_diff else '' )
151-
152- if combined_diff .strip ():
153- # Validate dictionary structure before adding
154- modified_file = {
155- "path" : str (file ),
156- "diff" : combined_diff
157- }
158- # Ensure all required fields are present with correct types
159- if not isinstance (modified_file ["path" ], str ):
160- raise TypeError (f"path must be str, got { type (modified_file ['path' ])} " )
161- if not isinstance (modified_file ["diff" ], str ):
162- raise TypeError (f"diff must be str, got { type (modified_file ['diff' ])} " )
163- modified_files_with_diffs .append (modified_file )
164- except GitCommandError as e :
165- # Log the error but continue processing other files
166- print (f"Warning: Failed to generate diff for { safe_file } : { str (e )} " )
167- continue
175+ # Generate unified diff
176+ fromfile = f"a/{ file } "
177+ tofile = f"b/{ file } "
178+ diff = "" .join (difflib .unified_diff (
179+ original_content .splitlines (keepends = True ),
180+ current_content .splitlines (keepends = True ),
181+ fromfile = fromfile ,
182+ tofile = tofile
183+ ))
168184
185+ if diff : # Only add if there are actual changes
186+ modified_file = {
187+ "path" : str (file ),
188+ "diff" : diff
189+ }
190+ modified_files_with_diffs .append (modified_file )
191+ except (OSError , IOError ) as e :
192+ print (f"Warning: Failed to generate diff for { file } : { str (e )} " )
193+ continue
194+
169195 return dict (modified_files = modified_files_with_diffs )
170196 return dict ()
0 commit comments