11"""Module for handling Git operations."""
22import subprocess
33from dataclasses import dataclass
4- from typing import List , Optional
4+ from typing import List , Optional , Dict
5+ import os
56
67@dataclass
78class FileChange :
89 """Represents changes in a single file."""
910 filename : str
1011 content : str
12+ file_content : Optional [str ] = None
1113
1214class GitHandler :
1315 @staticmethod
1416 def get_file_changes () -> List [FileChange ]:
15- """Retrieves only staged changes from Git (after git add) ."""
17+ """Retrieves staged changes from Git and their corresponding file content efficiently ."""
1618 try :
1719 # Get staged changes
1820 staged_cmd = subprocess .run (
@@ -22,12 +24,115 @@ def get_file_changes() -> List[FileChange]:
2224
2325 if not staged_cmd .stdout :
2426 return []
25-
26- return GitHandler ._parse_diff_output (staged_cmd .stdout )
27+
28+ # Parse the diff output first
29+ changes = GitHandler ._parse_diff_output (staged_cmd .stdout )
30+
31+ # Get the list of files we need content for
32+ files_to_fetch = [change .filename for change in changes ]
33+
34+ # Batch fetch file contents
35+ file_contents = GitHandler ._batch_get_file_contents (files_to_fetch )
36+
37+ # Update FileChange objects with their content
38+ for change in changes :
39+ change .file_content = file_contents .get (change .filename )
40+
41+ return changes
2742
2843 except subprocess .CalledProcessError as e :
2944 raise RuntimeError (f"Git command failed: { e .stderr } " )
3045
46+ @staticmethod
47+ def _batch_get_file_contents (filenames : List [str ]) -> Dict [str , Optional [str ]]:
48+ """
49+ Efficiently get contents of multiple files using git cat-file --batch.
50+ Returns a dictionary mapping filenames to their content.
51+ """
52+ if not filenames :
53+ return {}
54+
55+ try :
56+ # Get object IDs for staged versions of files
57+ file_revs = {}
58+ for filename in filenames :
59+ try :
60+ rev_cmd = subprocess .run (
61+ ['git' , 'rev-parse' , f':"{ filename } "' ],
62+ capture_output = True , text = True , check = True
63+ )
64+ file_revs [filename ] = rev_cmd .stdout .strip ()
65+ except subprocess .CalledProcessError :
66+ # File might be new/deleted
67+ file_revs [filename ] = None
68+
69+ # Prepare batch input
70+ valid_revs = {f : rev for f , rev in file_revs .items () if rev is not None }
71+ if not valid_revs :
72+ return {f : None for f in filenames }
73+
74+ # Start git cat-file --batch process
75+ process = subprocess .Popen (
76+ ['git' , 'cat-file' , '--batch' ],
77+ stdin = subprocess .PIPE ,
78+ stdout = subprocess .PIPE ,
79+ stderr = subprocess .PIPE
80+ )
81+
82+ # Write object IDs to git cat-file
83+ input_data = '\n ' .join (valid_revs .values ()) + '\n '
84+ stdout , stderr = process .communicate (input_data .encode ())
85+
86+ if process .returncode != 0 :
87+ raise subprocess .CalledProcessError (
88+ process .returncode , 'git cat-file' , stderr
89+ )
90+
91+ # Parse the output
92+ contents = {}
93+ current_content = []
94+ current_file = None
95+ rev_to_file = {rev : f for f , rev in valid_revs .items ()}
96+
97+ for line in stdout .decode ().split ('\n ' ):
98+ if line .strip () and ' blob ' in line :
99+ # New blob header - save previous content if any
100+ if current_file and current_content :
101+ contents [current_file ] = '' .join (current_content )
102+ current_content = []
103+
104+ # Get filename for this blob
105+ obj_id = line .split ()[0 ]
106+ current_file = rev_to_file .get (obj_id )
107+ else :
108+ current_content .append (line + '\n ' )
109+
110+ # Save last file's content
111+ if current_file and current_content :
112+ contents [current_file ] = '' .join (current_content )
113+
114+ # Include None for files that weren't found
115+ return {f : contents .get (f ) for f in filenames }
116+
117+ except Exception as e :
118+ # If batch operation fails, fall back to individual git show commands
119+ return GitHandler ._fallback_get_file_contents (filenames )
120+
121+ @staticmethod
122+ def _fallback_get_file_contents (filenames : List [str ]) -> Dict [str , Optional [str ]]:
123+ """Fallback method to get file contents using git show."""
124+ contents = {}
125+ for filename in filenames :
126+ try :
127+ show_cmd = subprocess .run (
128+ ['git' , 'show' , f':{ filename } ' ],
129+ capture_output = True , text = True , check = True
130+ )
131+ contents [filename ] = show_cmd .stdout
132+ except subprocess .CalledProcessError :
133+ contents [filename ] = None
134+ return contents
135+
31136 @staticmethod
32137 def _parse_diff_output (diff_output : str ) -> List [FileChange ]:
33138 """Parse git diff output into FileChange objects."""
0 commit comments