@@ -119,7 +119,6 @@ def main(): # pragma: no cover
119119 f"Original commit author: { original_commit_author } , \
120120 with manager: { original_commit_author_manager } "
121121 )
122-
123122 # Create a dictionary mapping users to their managers for faster lookups
124123 user_to_manager = {}
125124 manager_to_reports = {}
@@ -169,56 +168,100 @@ def main(): # pragma: no cover
169168 print (f"All contributors: { all_contributors } " )
170169 print (f"Innersource contributors: { innersource_contributors } " )
171170
172- # Fetch all PRs and issues once
173- print ("Fetching all pull requests..." )
174- all_pulls = list (repo_data .pull_requests (state = "all" ))
175- print (f"Found { len (all_pulls )} pull requests" )
176-
177- print ("Fetching all issues..." )
178- all_issues = list (repo_data .issues (state = "all" ))
179- print (f"Found { len (all_issues )} issues" )
171+ # Process data in chunks to avoid memory issues while maintaining performance
172+ chunk_size = env_vars .chunk_size
173+ print (f"Using chunk size of { chunk_size } for data processing" )
180174
181- # Pre-process all data to create mappings of user to contribution counts
182175 print ("Pre-processing contribution data..." )
183176
184177 # Create mapping of commit authors to commit counts
178+ print ("Processing commits..." )
185179 commit_author_counts = {}
186180 for commit in commit_list :
187181 if hasattr (commit .author , "login" ):
188182 author = commit .author .login
189183 commit_author_counts [author ] = commit_author_counts .get (author , 0 ) + 1
190184
191- # Create mapping of PR authors to PR counts
185+ # Process pull requests in chunks
186+ print ("Processing pull requests in chunks..." )
192187 pr_author_counts = {}
193- for pull in all_pulls :
194- author = pull .user .login
195- pr_author_counts [author ] = pr_author_counts .get (author , 0 ) + 1
196-
197- # Create mapping of issue authors to issue counts
188+ total_prs = 0
189+
190+ # GitHub API returns an iterator that internally handles pagination
191+ # We'll manually chunk it to avoid loading everything at once
192+ pulls_iterator = repo_data .pull_requests (state = "all" )
193+ while True :
194+ # Process a chunk of pull requests
195+ chunk = []
196+ for _ in range (chunk_size ):
197+ try :
198+ chunk .append (next (pulls_iterator ))
199+ except StopIteration :
200+ break
201+
202+ if not chunk :
203+ break
204+
205+ # Update counts for this chunk
206+ for pull in chunk :
207+ if hasattr (pull .user , "login" ):
208+ author = pull .user .login
209+ pr_author_counts [author ] = pr_author_counts .get (author , 0 ) + 1
210+
211+ total_prs += len (chunk )
212+ print (f" Processed { total_prs } pull requests so far..." )
213+
214+ print (f"Found and processed { total_prs } pull requests" )
215+
216+ # Process issues in chunks
217+ print ("Processing issues in chunks..." )
198218 issue_author_counts = {}
199- for issue in all_issues :
200- if hasattr (issue .user , "login" ):
201- author = issue .user .login
202- issue_author_counts [author ] = issue_author_counts .get (author , 0 ) + 1
203-
204- # Count contributions for each innersource contributor
219+ total_issues = 0
220+
221+ # GitHub API returns an iterator that internally handles pagination
222+ # We'll manually chunk it to avoid loading everything at once
223+ issues_iterator = repo_data .issues (state = "all" )
224+ while True :
225+ # Process a chunk of issues
226+ chunk = []
227+ for _ in range (chunk_size ):
228+ try :
229+ chunk .append (next (issues_iterator ))
230+ except StopIteration :
231+ break
232+
233+ if not chunk :
234+ break
235+
236+ # Update counts for this chunk
237+ for issue in chunk :
238+ if hasattr (issue .user , "login" ):
239+ author = issue .user .login
240+ issue_author_counts [author ] = issue_author_counts .get (author , 0 ) + 1
241+
242+ total_issues += len (chunk )
243+ print (f" Processed { total_issues } issues so far..." )
244+
245+ print (f"Found and processed { total_issues } issues" )
246+
247+ # Count contributions for each innersource contributor using precompiled dictionaries
205248 innersource_contribution_counts = {}
206249 print ("Counting contributions for each innersource contributor..." )
207250 for contributor in innersource_contributors :
208251 # Initialize counter for this contributor
209252 innersource_contribution_counts [contributor ] = 0
210253
211- # Add commit counts
254+ # Add commit counts from the precompiled dictionary
212255 innersource_contribution_counts [contributor ] += commit_author_counts .get (
213256 contributor , 0
214257 )
215258
216- # Add PR counts
259+ # Add PR counts from the precompiled dictionary
217260 innersource_contribution_counts [contributor ] += pr_author_counts .get (
218261 contributor , 0
219262 )
220263
221- # Add issue counts
264+ # Add issue counts from the precompiled dictionary
222265 innersource_contribution_counts [contributor ] += issue_author_counts .get (
223266 contributor , 0
224267 )
@@ -227,22 +270,22 @@ def main(): # pragma: no cover
227270 for contributor , count in innersource_contribution_counts .items ():
228271 print (f" { contributor } : { count } contributions" )
229272
230- # count contributions for each user in team_members_that_own_the_repo
273+ # Count contributions for each team member using precompiled dictionaries
231274 team_member_contribution_counts = {}
232275 print ("Counting contributions for each team member that owns the repo..." )
233276 for member in team_members_that_own_the_repo :
234277 # Initialize counter for this team member
235278 team_member_contribution_counts [member ] = 0
236279
237- # Add commit counts
280+ # Add commit counts from the precompiled dictionary
238281 team_member_contribution_counts [member ] += commit_author_counts .get (
239282 member , 0
240283 )
241284
242- # Add PR counts
285+ # Add PR counts from the precompiled dictionary
243286 team_member_contribution_counts [member ] += pr_author_counts .get (member , 0 )
244287
245- # Add issue counts
288+ # Add issue counts from the precompiled dictionary
246289 team_member_contribution_counts [member ] += issue_author_counts .get (
247290 member , 0
248291 )
0 commit comments