Skip to content

Commit 4705e3f

Browse files
committed
perf: Optimize performance in contribution counting and team member identification
- Replace nested loops with dictionary-based lookups to reduce time complexity - Optimize contribution counting from O(n*m) to O(n+m) complexity - Pre-process commit, PR, and issue data into author-to-count mappings - Create user-to-manager and manager-to-reports dictionaries for faster team member identification - Add detailed logging of PR and issue counts for better visibility Signed-off-by: Zack Koppert <zkoppert@github.com>
1 parent 6caef60 commit 4705e3f

File tree

1 file changed

+61
-41
lines changed

1 file changed

+61
-41
lines changed

measure_innersource.py

Lines changed: 61 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -119,21 +119,31 @@ def main(): # pragma: no cover
119119
f"Original commit author: {original_commit_author}, \
120120
with manager: {original_commit_author_manager}"
121121
)
122+
123+
# Create a dictionary mapping users to their managers for faster lookups
124+
user_to_manager = {}
125+
manager_to_reports = {}
126+
127+
for user, data in org_data.items():
128+
manager = data["manager"]
129+
user_to_manager[user] = manager
130+
131+
# Also create reverse mapping of manager to direct reports
132+
if manager not in manager_to_reports:
133+
manager_to_reports[manager] = []
134+
manager_to_reports[manager].append(user)
135+
122136
# Find all users that report up to the same manager as the original commit author
123137
team_members_that_own_the_repo.append(original_commit_author)
124138
team_members_that_own_the_repo.append(original_commit_author_manager)
125139

126-
for user, data in org_data.items():
127-
if data["manager"] == original_commit_author_manager:
128-
team_members_that_own_the_repo.append(user)
140+
# Add all users reporting to the same manager
141+
if original_commit_author_manager in manager_to_reports:
142+
team_members_that_own_the_repo.extend(manager_to_reports[original_commit_author_manager])
129143

130-
# for each username in team_members_that_own_the_repo,
131-
# add everyone that has one of them listed as the manager
132-
for user, data in org_data.items():
133-
if (
134-
user not in team_members_that_own_the_repo
135-
and data["manager"] in team_members_that_own_the_repo
136-
):
144+
# Add everyone that has one of the team members listed as their manager
145+
for user, manager in user_to_manager.items():
146+
if manager in team_members_that_own_the_repo and user not in team_members_that_own_the_repo:
137147
team_members_that_own_the_repo.append(user)
138148

139149
# Remove duplicates from the team members list
@@ -157,34 +167,50 @@ def main(): # pragma: no cover
157167
# Fetch all PRs and issues once
158168
print("Fetching all pull requests...")
159169
all_pulls = list(repo_data.pull_requests(state="all"))
160-
170+
print(f"Found {len(all_pulls)} pull requests")
171+
161172
print("Fetching all issues...")
162173
all_issues = list(repo_data.issues(state="all"))
163-
174+
print(f"Found {len(all_issues)} issues")
175+
176+
# Pre-process all data to create mappings of user to contribution counts
177+
print("Pre-processing contribution data...")
178+
179+
# Create mapping of commit authors to commit counts
180+
commit_author_counts = {}
181+
for commit in commit_list:
182+
if hasattr(commit.author, "login"):
183+
author = commit.author.login
184+
commit_author_counts[author] = commit_author_counts.get(author, 0) + 1
185+
186+
# Create mapping of PR authors to PR counts
187+
pr_author_counts = {}
188+
for pull in all_pulls:
189+
author = pull.user.login
190+
pr_author_counts[author] = pr_author_counts.get(author, 0) + 1
191+
192+
# Create mapping of issue authors to issue counts
193+
issue_author_counts = {}
194+
for issue in all_issues:
195+
if hasattr(issue.user, "login"):
196+
author = issue.user.login
197+
issue_author_counts[author] = issue_author_counts.get(author, 0) + 1
198+
164199
# Count contributions for each innersource contributor
165200
innersource_contribution_counts = {}
166201
print("Counting contributions for each innersource contributor...")
167202
for contributor in innersource_contributors:
168203
# Initialize counter for this contributor
169204
innersource_contribution_counts[contributor] = 0
170-
171-
# Count commits by this contributor
172-
for commit in commit_list:
173-
if (
174-
hasattr(commit.author, "login")
175-
and commit.author.login == contributor
176-
):
177-
innersource_contribution_counts[contributor] += 1
178-
205+
206+
# Add commit counts
207+
innersource_contribution_counts[contributor] += commit_author_counts.get(contributor, 0)
208+
179209
# Add PR counts
180-
for pull in all_pulls:
181-
if pull.user.login == contributor:
182-
innersource_contribution_counts[contributor] += 1
183-
210+
innersource_contribution_counts[contributor] += pr_author_counts.get(contributor, 0)
211+
184212
# Add issue counts
185-
for issue in all_issues:
186-
if hasattr(issue.user, "login") and issue.user.login == contributor:
187-
innersource_contribution_counts[contributor] += 1
213+
innersource_contribution_counts[contributor] += issue_author_counts.get(contributor, 0)
188214

189215
print("Innersource contribution counts:")
190216
for contributor, count in innersource_contribution_counts.items():
@@ -196,21 +222,15 @@ def main(): # pragma: no cover
196222
for member in team_members_that_own_the_repo:
197223
# Initialize counter for this team member
198224
team_member_contribution_counts[member] = 0
199-
200-
# Count commits by this team member
201-
for commit in commit_list:
202-
if hasattr(commit.author, "login") and commit.author.login == member:
203-
team_member_contribution_counts[member] += 1
204-
225+
226+
# Add commit counts
227+
team_member_contribution_counts[member] += commit_author_counts.get(member, 0)
228+
205229
# Add PR counts
206-
for pull in all_pulls:
207-
if pull.user.login == member:
208-
team_member_contribution_counts[member] += 1
209-
230+
team_member_contribution_counts[member] += pr_author_counts.get(member, 0)
231+
210232
# Add issue counts
211-
for issue in all_issues:
212-
if hasattr(issue.user, "login") and issue.user.login == member:
213-
team_member_contribution_counts[member] += 1
233+
team_member_contribution_counts[member] += issue_author_counts.get(member, 0)
214234

215235
print("Team member contribution counts:")
216236
for member, count in team_member_contribution_counts.items():

0 commit comments

Comments
 (0)