1+ """
2+ Helper functions to load and process commit data from GitHub
3+ """
4+ import re
5+ import requests
6+
7+
8+ class CommitParse :
9+ def __init__ (self , repo_owner : str , repo_name : bool , sha : str ) -> object :
10+ """Initialize a class to hold the data for parsing the commit data
11+
12+ Args:
13+ repo_owner (str): Repo owner
14+ repo_name (str): Repo name
15+ sha (str): Target commit SHA
16+
17+ Returns:
18+ object: CommitParse
19+ """
20+ self .repo_owner = repo_owner
21+ self .repo_name = repo_name
22+ self .sha = sha
23+ self .message = None
24+ self .file_name = None
25+ self .file_number = None
26+ self .file_extension = None
27+ self .total_files_changed = None
28+ self .raw_file_patch = None
29+ self .patch_number = None
30+ self .total_patches = None
31+ self .raw_patch_header = None
32+ self .raw_patch = None
33+ self .original_code = None
34+ self .original_line_start = None
35+ self .original_line_length = None
36+ self .original_line_end = None
37+ self .modified_code = None
38+ self .modified_line_start = None
39+ self .modified_line_length = None
40+ self .modified_line_end = None
41+ self .additions = None
42+ self .added_code = None
43+ self .deletions = None
44+ self .deleted_code = None
45+ self .changes = None
46+ self .status = None
47+ self .total_file_additions = None
48+ self .total_file_deletions = None
49+ self .total_file_changes = None
50+
51+
52+ def parse_commit_info (commit_info : list , parsed_commit : CommitParse ) -> list :
53+ """Parses the commit_info list
54+
55+ Args:
56+ commit_info (list): commit_info list from original data
57+ parsed_commit (CommitParse): Set CommitParse class with basic info
58+
59+ Returns:
60+ list: List of dictionaries with desired data for project
61+ """
62+
63+ """Master list to hold information"""
64+ data = []
65+
66+ total_files_changed = len (commit_info )
67+
68+ """
69+ Enumerate through each row withiin commit_info.
70+ A row represents changed files in the commit
71+ """
72+ for index , row in enumerate (commit_info ):
73+ file_name = row ["filename" ]
74+ file_number = index
75+ file_extension = file_name .split ("." )[- 1 ]
76+ raw_file_patch = row ["patch" ]
77+ status = row ["status" ]
78+ total_file_additions = row ["additions" ]
79+ total_file_deletions = row ["deletions" ]
80+ total_file_changes = row ["changes" ]
81+
82+ """Patches are None in some instances (e.g., XLSX files)"""
83+ if raw_file_patch is not None :
84+ """Find patch headers (e.g., @@ @@)"""
85+ headers_search = re .findall (r"@@(.*?)@@" , raw_file_patch )
86+
87+ """Cleaning the headers, found @@REPLACE_ME@@ in some random code"""
88+ headers = []
89+ for head_row in headers_search :
90+ if '-' in head_row and '+' in head_row :
91+ headers .append (f"@@{ head_row } @@" )
92+ total_patches = len (headers )
93+
94+ for index , header in enumerate (headers ):
95+ patch_number = index
96+ """Get line numbers changed for original code"""
97+ original_lines = re .search (f"@@ -(.*?) \+" , header ).group (1 )
98+ if "," in original_lines :
99+ original_line_start = int (original_lines .split ("," )[0 ])
100+ original_line_length = int (original_lines .split ("," )[1 ])
101+ else :
102+ """This occus for added txt files where the total length is 1: appears as @@ -A -B @@"""
103+ original_line_start = int (original_lines )
104+ original_line_length = int (original_lines )
105+ original_line_end = original_line_start + original_line_length - 1
106+
107+ """Get line numbers changed for modified code"""
108+ modified_lines = re .search (f" \+(.*) @@" , header ).group (1 )
109+ if "," in modified_lines :
110+ modified_line_start = int (modified_lines .split ("," )[0 ])
111+ modified_line_length = int (modified_lines .split ("," )[1 ])
112+ else :
113+ """This occurs for added binary files the header will appear as @@ -A,X -B @@"""
114+ modified_line_start = int (modified_lines )
115+ modified_line_length = int (modified_lines )
116+
117+ modified_line_end = modified_line_start + modified_line_length - 1
118+
119+ """Check if length of index is equal to last patch, if so read to end of raw_patch"""
120+ if index + 1 == len (headers ):
121+ raw_patch = raw_file_patch [raw_file_patch .find (headers [index ])+ len (headers [index ]):]
122+ else :
123+ raw_patch = raw_file_patch [raw_file_patch .find (headers [index ])+ len (headers [index ]):raw_file_patch .find (headers [index + 1 ])]
124+
125+
126+ """Call the function to help parse the patch to get data"""
127+ patch_parse = parse_raw_patch (raw_patch )
128+
129+ """Create a temporary class to hold the parsed patch data"""
130+ temp_parsed_commit = CommitParse (parsed_commit .repo_owner ,
131+ parsed_commit .repo_name ,
132+ parsed_commit .sha )
133+
134+ """Set various values"""
135+ temp_parsed_commit .message = parsed_commit .message
136+ temp_parsed_commit .file_name = file_name
137+ temp_parsed_commit .file_number = file_number
138+ temp_parsed_commit .file_extension = file_extension
139+ temp_parsed_commit .total_files_changed = total_files_changed
140+ temp_parsed_commit .raw_file_patch = raw_file_patch
141+ temp_parsed_commit .patch_number = patch_number
142+ temp_parsed_commit .total_patches = total_patches
143+ temp_parsed_commit .raw_patch_header = header
144+ temp_parsed_commit .raw_patch = raw_patch
145+ temp_parsed_commit .original_code = patch_parse ["original_code" ]
146+ temp_parsed_commit .original_line_start = original_line_start
147+ temp_parsed_commit .original_line_length = original_line_length
148+ temp_parsed_commit .original_line_end = original_line_end
149+ temp_parsed_commit .modified_code = patch_parse ["modified_code" ]
150+ temp_parsed_commit .modified_line_start = modified_line_start
151+ temp_parsed_commit .modified_line_length = modified_line_length
152+ temp_parsed_commit .modified_line_end = modified_line_end
153+ temp_parsed_commit .additions = patch_parse ["additions" ]
154+ temp_parsed_commit .added_code = patch_parse ["added_code" ]
155+ temp_parsed_commit .deletions = patch_parse ["deletions" ]
156+ temp_parsed_commit .deleted_code = patch_parse ["deleted_code" ]
157+ temp_parsed_commit .changes = patch_parse ["changes" ]
158+ temp_parsed_commit .status = status
159+ temp_parsed_commit .total_file_additions = total_file_additions
160+ temp_parsed_commit .total_file_deletions = total_file_deletions
161+ temp_parsed_commit .total_file_changes = total_file_changes
162+
163+ """Append the class as a dictionary to the data list"""
164+ data .append (temp_parsed_commit .__dict__ )
165+ else :
166+ """Sometimes patch is None (e.g., XLSX files)"""
167+ temp_parsed_commit = CommitParse (parsed_commit .repo_owner ,
168+ parsed_commit .repo_name ,
169+ parsed_commit .sha )
170+
171+ temp_parsed_commit .message = parsed_commit .message
172+ temp_parsed_commit .file_name = file_name
173+ temp_parsed_commit .file_number = file_number
174+ temp_parsed_commit .file_extension = file_extension
175+ temp_parsed_commit .total_files_changed = total_files_changed
176+ temp_parsed_commit .raw_file_patch = raw_file_patch
177+ temp_parsed_commit .status = status
178+ temp_parsed_commit .total_file_additions = total_file_additions
179+ temp_parsed_commit .total_file_deletions = total_file_deletions
180+ temp_parsed_commit .total_file_changes = total_file_changes
181+
182+ """Append the class as a dictionary to the data list"""
183+ data .append (temp_parsed_commit .__dict__ )
184+
185+ return data
186+
187+
188+ def parse_raw_patch (temp_raw_patch : str ) -> dict :
189+ """Parses a single raw patch into original code and modified code
190+
191+ Args:
192+ temp_raw_patch (str): Raw string of a single patch
193+
194+ Returns:
195+ dict: Simple dictionary with various key values for parsing the raw patch
196+ """
197+
198+ """Split the code so we can parse line by line"""
199+ split_code = temp_raw_patch .splitlines ()
200+
201+ """Create placeholders for desired values"""
202+ original_code = []
203+ modified_code = []
204+
205+ additions = 0
206+ added_code = []
207+ deletions = 0
208+ deleted_code = []
209+
210+ """Loop through each line of code to parse it"""
211+ for line in split_code :
212+ """[1:] is due to the spaces added from the git diff for +/- indicators in str"""
213+ if line .startswith ("-" ):
214+ """- signs indicate original code"""
215+ original_code .append (line [1 :])
216+ deleted_code .append (line [1 :])
217+ deletions += 1
218+ elif line .startswith ("+" ):
219+ """+ signs indicate modified code"""
220+ modified_code .append (line [1 :])
221+ added_code .append (line [1 :])
222+ additions += 1
223+ else :
224+ """Add any unchanged lines to original/modified code"""
225+ original_code .append (line [1 :])
226+ modified_code .append (line [1 :])
227+
228+ original_code_str = "\n " .join (original_code )
229+ modified_code_str = "\n " .join (modified_code )
230+ added_code_str = "\n " .join (added_code )
231+ deleted_code_str = "\n " .join (deleted_code )
232+ changes = additions + deletions
233+
234+ """Create a simple patch to return"""
235+ patch_parse = dict (
236+ original_code = original_code_str ,
237+ modified_code = modified_code_str ,
238+ additions = additions ,
239+ added_code = added_code_str ,
240+ deletions = deletions ,
241+ deleted_code = deleted_code_str ,
242+ changes = changes
243+ )
244+
245+ return patch_parse
246+
247+
248+ def commit (repo_owner : str , repo_name : str , sha : str , verbose = False ) -> list :
249+ """Pass the GitHub repo_owner, repo_name, and associated commit to parse.
250+
251+ Args:
252+ repo_owner (str): Target repo owner
253+ repo_name (str): Target repo name
254+ commit_sha (str): Target commit SHA from GitHub
255+
256+ Returns:
257+ list: List of dictionaries strcutred around the class CommitParse
258+ """
259+
260+ """Commit info API URL"""
261+ url = f"https://api.github.com/repos/{ repo_owner } /{ repo_name } /commits/{ sha } "
262+
263+ """Get the response"""
264+ response = requests .get (url )
265+ response .close ()
266+
267+ """Convert to json"""
268+ commit_info = response .json ()
269+
270+ """Initialize a CommitParse to hold data"""
271+ parsed_commit = CommitParse (repo_owner = repo_owner ,
272+ repo_name = repo_name ,
273+ sha = commit_info ["sha" ])
274+
275+ """Add commit message"""
276+ parsed_commit .message = commit_info ["commit" ]["message" ]
277+
278+ """Parse the files"""
279+ parsed_files = parse_commit_info (commit_info ["files" ], parsed_commit )
280+
281+ return parsed_files
0 commit comments