3030owners for each directory.
3131"""
3232
33+ import argparse
3334import datetime
3435import json
3536import math
@@ -69,14 +70,15 @@ def progress_indicator(future) -> None:
6970 print ('.' , end = '' , flush = True )
7071
7172
72- def get_all_commits_of_folder (path : str ) -> list [str ]:
73+ def get_all_commits_of_folder (path : str , quiet : bool = False ) -> list [str ]:
7374 """Retrieves all raw commit logs for a folder over the last two years.
7475
7576 This function parallelizes the git log calls by splitting the time period
7677 into monthly chunks.
7778
7879 Args:
7980 path: The directory to retrieve commit logs for.
81+ quiet: If True, suppresses progress indicators.
8082
8183 Returns:
8284 A list of raw commit description strings.
@@ -88,9 +90,10 @@ def get_all_commits_of_folder(path: str) -> list[str]:
8890 executor .submit (get_commits_in_folder_in_period , path , dates )
8991 for dates in get_dates_range ()
9092 ]
91- # Register the progress indicator callback.
92- for future in futures :
93- future .add_done_callback (progress_indicator )
93+ if not quiet :
94+ # Register the progress indicator callback.
95+ for future in futures :
96+ future .add_done_callback (progress_indicator )
9497 # Iterate over all submitted tasks and get results as they are available.
9598 for future in as_completed (futures ):
9699 # Get the result for the next completed task.
@@ -102,32 +105,37 @@ def get_all_commits_of_folder(path: str) -> list[str]:
102105 return commits
103106
104107
105- def extract_commits_informations (commits : list [str ]) -> dict :
108+ def extract_commits_informations (commits : list [str ],
109+ quiet : bool = False ) -> dict :
106110 """Parses raw commit logs and aggregates statistics by folder.
107111
108112 Args:
109113 commits: A list of raw commit description strings.
114+ quiet: If True, suppresses progress indicators.
110115
111116 Returns:
112117 A dictionary where keys are folder paths and values are dictionaries
113118 containing aggregated commit/review stats for that folder.
114119 """
115120 allStatsPerFolder = {}
116121 commit_to_analyse_count = len (commits )
117- print ('Getting logs done. Total number of commits to analyse: ' ,
118- str (commit_to_analyse_count ))
122+ if not quiet :
123+ print ('Getting logs done. Total number of commits to analyse: ' ,
124+ str (commit_to_analyse_count ))
119125
120126 for commit_description in commits :
121127 commit_to_analyse_count -= 1
122128 analysed_commit = Commit (commit_description )
123- (author , reviewers , changes , path , date ,
129+ (author , reviewers , changes , path ,
130+ date ,
124131 commit_hash ) = analysed_commit .all_informations ()
125132 if len (changes ) == 0 or not path :
126133 continue
127- print (('Save commit ' + commit_hash + ' from ' + author + ' in\t ' +
128- path ),
129- end = '' ,
130- flush = True )
134+ if not quiet :
135+ print (('Save commit ' + commit_hash + ' from ' + author + ' in\t ' +
136+ path ),
137+ end = '' ,
138+ flush = True )
131139 if not path in allStatsPerFolder :
132140 allStatsPerFolder [path ] = dict (total_commit = 0 ,
133141 total_review = 0 ,
@@ -150,34 +158,42 @@ def extract_commits_informations(commits: list[str]) -> dict:
150158 commit_count = 0 , review_count = 0 )
151159 allStatsPerFolder [path ]['individual_stats' ][reviewer ][
152160 'review_count' ] += 1
153- print ('\t \t \t DONE, commits left: ' ,
154- commit_to_analyse_count ,
155- flush = True )
161+ if not quiet :
162+ print ('\t \t \t DONE, commits left: ' ,
163+ commit_to_analyse_count ,
164+ flush = True )
156165 return allStatsPerFolder
157166
158167
159168def get_all_git_blame_informations_for_folder (
160- file_paths : list [str ], date_filter : datetime ) -> list [str ]:
169+ file_paths : list [str ],
170+ date_filter : datetime ,
171+ quiet : bool = False ) -> list [str ]:
161172 """Retrieves all `git blame` output for a list of files in parallel.
162173
163174 Args:
164175 file_paths: A list of file paths to run `git blame` on.
165176 date_filter: The date to use for the `--after` flag in git blame.
177+ quiet: If True, suppresses progress indicators.
166178
167179 Returns:
168180 A list of strings, where each string is one line of blame output.
169181 """
170182 lines = []
171- print ('[Git blame] ' + os .path .dirname (file_paths [0 ]), end = '' , flush = True )
183+ if not quiet :
184+ print ('[Git blame] ' + os .path .dirname (file_paths [0 ]),
185+ end = '' ,
186+ flush = True )
172187 executor = ProcessPoolExecutor (max_workers = 6 )
173188 # Dispatch tasks into the process pool and create a list of futures.
174189 futures = [
175190 executor .submit (get_blame_for_file , file , date_filter )
176191 for file in file_paths
177192 ]
178- # Register the progress indicator callback.
179- for future in futures :
180- future .add_done_callback (progress_indicator )
193+ if not quiet :
194+ # Register the progress indicator callback.
195+ for future in futures :
196+ future .add_done_callback (progress_indicator )
181197 # Iterate over all submitted tasks and get results as they are available.
182198 for future in as_completed (futures ):
183199 # Get the result for the next completed task.
@@ -248,13 +264,15 @@ def determine_owners_from_git_blame_informations(
248264
249265
250266def determine_owners_from_git_blame (root : str , files : list [str ],
251- last_update : datetime ) -> list [str ]:
267+ last_update : datetime ,
268+ quiet : bool = False ) -> list [str ]:
252269 """High-level function to determine owners using the git blame strategy.
253270
254271 Args:
255272 root: The root directory of the files.
256273 files: A list of filenames within the root directory.
257274 last_update: The last update time for the directory, used for filtering.
275+ quiet: If True, suppresses progress indicators.
258276
259277 Returns:
260278 A list of usernames identified as owners.
@@ -269,7 +287,8 @@ def determine_owners_from_git_blame(root: str, files: list[str],
269287 return []
270288
271289 date_filter = last_update - datetime .timedelta (TWO_YEARS )
272- lines = get_all_git_blame_informations_for_folder (file_paths , date_filter )
290+ lines = get_all_git_blame_informations_for_folder (file_paths , date_filter ,
291+ quiet )
273292 stats , lines_count = extract_blame_informations (lines )
274293 return determine_owners_from_git_blame_informations (stats , lines_count )
275294
@@ -322,14 +341,34 @@ def determine_owners_from_zscore(stats: dict) -> list[str]:
322341
323342
324343if __name__ == '__main__' :
325- # TODO: Use argparse for options
326- root_folder = 'ios'
327- if len (sys .argv ) > 1 :
328- root_folder = sys .argv [1 ]
344+ parser = argparse .ArgumentParser (
345+ description = 'Automatic Ownership Calculator.' )
346+ parser .add_argument (
347+ '-q' ,
348+ '--quiet' ,
349+ action = 'store_true' ,
350+ help = 'Enable quiet mode, suppresses progress indicators.' )
351+ parser .add_argument (
352+ '--root-directory' ,
353+ default = 'ios' ,
354+ help = "The root directory to start the analysis from. Default: 'ios'." )
355+ parser .add_argument (
356+ '--output-file' ,
357+ default = 'final_algo.csv' ,
358+ help = "The path to the output CSV file. Defaults to 'final_algo.csv'." )
359+ args = parser .parse_args ()
360+
361+ root_folder = args .root_directory
362+ output_file = args .output_file
363+ quiet_mode = args .quiet
329364
330365 # Phase 1: Data Collection
331- commits = get_all_commits_of_folder (root_folder )
332- stats_per_folder = extract_commits_informations (commits )
366+ commits = get_all_commits_of_folder (root_folder , quiet = quiet_mode )
367+ stats_per_folder = extract_commits_informations (commits , quiet = quiet_mode )
368+
369+ # Clear output file before starting
370+ with open (output_file , 'w' ) as f :
371+ pass
333372
334373 # Phase 2: Analysis and Ownership Calculation
335374 steps = len (stats_per_folder )
@@ -338,24 +377,33 @@ def determine_owners_from_zscore(stats: dict) -> list[str]:
338377 if avoid_directory (root ):
339378 continue
340379 if not root in stats_per_folder :
341- with open ('final_algo.csv' , 'a' ) as file :
380+ with open (output_file , 'a' ) as file :
342381 file .write (root + '\n ' )
343382 continue
344383
345384 step_count += 1
346- print (str (step_count ) + '/' + str (steps ) + '\t ' , end = '' , flush = True )
385+ if not quiet_mode :
386+ print (
387+ str (step_count ) + '/' + str (steps ) + '\t ' ,
388+ end = '' ,
389+ flush = True )
347390
348391 # Decide which algorithm to use based on commit history.
349392 if stats_per_folder [root ]['total_commit' ] > 5 :
350393 owners = determine_owners_from_zscore (stats_per_folder [root ])
351- print ('[Z-Score] ' + root + '\t RESULT: ' + str (owners ))
394+ if not quiet_mode :
395+ print ('[Z-Score] ' + root + '\t RESULT: ' + str (owners ))
352396 else :
353397 owners = determine_owners_from_git_blame (
354- root , files , stats_per_folder [root ]['last_update' ])
355- print ('[Blame] ' + root + '\t RESULT: ' + str (owners ))
398+ root ,
399+ files ,
400+ stats_per_folder [root ]['last_update' ],
401+ quiet = quiet_mode )
402+ if not quiet_mode :
403+ print ('[Blame] ' + root + '\t RESULT: ' + str (owners ))
356404
357405 # Write results to the output CSV.
358- with open ('final_algo.csv' , 'a' ) as file :
406+ with open (output_file , 'a' ) as file :
359407 file .write (root + ', ' +
360408 str (stats_per_folder [root ]['last_update' ]))
361409 for owner in owners :
0 commit comments