Skip to content

Commit 1fabe84

Browse files
committed
feat: enhance co-author parsing with advanced email resolution
- Add support for extracting usernames from @github.com email addresses - Integrate GitHub Search Users API to resolve arbitrary emails to usernames - Add fallback behavior when API calls fail or return no results - Thread github_connection parameter through co-author parsing functions - Remove unused existing_usernames parameter from get_coauthor_contributors - Update all function signatures and test cases with new parameters - Improve co-author identification from noreply, GitHub, and external emails This significantly improves co-author recognition by automatically resolving multiple email formats to GitHub usernames, providing more accurate contributor attribution and better coverage of collaborative contributions. Signed-off-by: Zack Koppert <[email protected]>
1 parent 211543e commit 1fabe84

File tree

2 files changed

+75
-37
lines changed

2 files changed

+75
-37
lines changed

contributors.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,12 @@ def get_all_contributors(
143143
if repos:
144144
for repo in repos:
145145
repo_contributors = get_contributors(
146-
repo, start_date, end_date, ghe, acknowledge_coauthors
146+
repo,
147+
start_date,
148+
end_date,
149+
ghe,
150+
acknowledge_coauthors,
151+
github_connection,
147152
)
148153
if repo_contributors:
149154
all_contributors.append(repo_contributors)
@@ -154,18 +159,22 @@ def get_all_contributors(
154159
return all_contributors
155160

156161

157-
def get_coauthors_from_message(commit_message: str) -> List[str]:
162+
def get_coauthors_from_message(
163+
commit_message: str, github_connection: object = None
164+
) -> List[str]:
158165
"""
159166
Extract co-author identifiers from a commit message.
160167
161168
Co-authored-by trailers follow the format:
162169
Co-authored-by: Name <email>
163170
164171
For GitHub noreply emails ([email protected]), extracts the username.
165-
For other emails, extracts the full email address.
172+
For @github.com emails, extracts the username (part before @).
173+
For other emails, uses GitHub Search Users API to find the username, or falls back to email.
166174
167175
Args:
168176
commit_message (str): The commit message to parse
177+
github_connection (object): The authenticated GitHub connection object from PyGithub
169178
170179
Returns:
171180
List[str]: List of co-author identifiers (GitHub usernames or email addresses)
@@ -183,9 +192,28 @@ def get_coauthors_from_message(commit_message: str) -> List[str]:
183192
if noreply_match:
184193
# For GitHub noreply emails, extract just the username
185194
identifiers.append(noreply_match.group(2))
195+
elif email.endswith("@github.com"):
196+
# For @github.com emails, extract the username (part before @)
197+
username = email.split("@")[0]
198+
identifiers.append(username)
186199
else:
187-
# For other emails, use the full email address
188-
identifiers.append(email)
200+
# For other emails, try to find GitHub username using Search Users API
201+
if github_connection:
202+
try:
203+
# Search for users by email
204+
search_result = github_connection.search_users(f"email:{email}")
205+
if search_result.totalCount > 0:
206+
# Use the first matching user's login
207+
identifiers.append(search_result[0].login)
208+
else:
209+
# If no user found, fall back to email address
210+
identifiers.append(email)
211+
except Exception:
212+
# If API call fails, fall back to email address
213+
identifiers.append(email)
214+
else:
215+
# If no GitHub connection available, use the full email address
216+
identifiers.append(email)
189217
return identifiers
190218

191219

@@ -195,6 +223,7 @@ def get_contributors(
195223
end_date: str,
196224
ghe: str,
197225
acknowledge_coauthors: bool,
226+
github_connection: object,
198227
):
199228
"""
200229
Get contributors from a single repository and filter by start end dates if present.
@@ -205,6 +234,7 @@ def get_contributors(
205234
end_date (str): The end date of the date range for the contributor list.
206235
ghe (str): The GitHub Enterprise URL to use for authentication
207236
acknowledge_coauthors (bool): Whether to acknowledge co-authors from commit messages
237+
github_connection (object): The authenticated GitHub connection object from PyGithub
208238
209239
Returns:
210240
contributors (list): A list of ContributorStats objects
@@ -252,7 +282,11 @@ def get_contributors(
252282
# Get co-authors from commit messages if enabled
253283
if acknowledge_coauthors:
254284
coauthor_contributors = get_coauthor_contributors(
255-
repo, start_date, end_date, ghe, contributor_usernames
285+
repo,
286+
start_date,
287+
end_date,
288+
ghe,
289+
github_connection,
256290
)
257291
contributors.extend(coauthor_contributors)
258292

@@ -269,7 +303,7 @@ def get_coauthor_contributors(
269303
start_date: str,
270304
end_date: str,
271305
ghe: str,
272-
existing_usernames: set,
306+
github_connection: object,
273307
) -> List[contributor_stats.ContributorStats]:
274308
"""
275309
Get contributors who were co-authors on commits in the repository.
@@ -279,7 +313,7 @@ def get_coauthor_contributors(
279313
start_date (str): The start date of the date range for the contributor list.
280314
end_date (str): The end date of the date range for the contributor list.
281315
ghe (str): The GitHub Enterprise URL
282-
existing_usernames (set): Set of usernames already added as contributors
316+
github_connection (object): The authenticated GitHub connection object from PyGithub
283317
284318
Returns:
285319
List[ContributorStats]: A list of ContributorStats objects for co-authors
@@ -301,10 +335,9 @@ def get_coauthor_contributors(
301335
continue
302336

303337
# Extract co-authors from commit message
304-
coauthors = get_coauthors_from_message(commit_message)
338+
coauthors = get_coauthors_from_message(commit_message, github_connection)
305339
for username in coauthors:
306-
if username not in existing_usernames:
307-
coauthor_counts[username] = coauthor_counts.get(username, 0) + 1
340+
coauthor_counts[username] = coauthor_counts.get(username, 0) + 1
308341

309342
except Exception as e:
310343
print(f"Error getting co-authors for repository: {repo.full_name}")

test_contributors.py

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_get_contributors(self, mock_contributor_stats):
3030
mock_repo.contributors.return_value = [mock_user]
3131
mock_repo.full_name = "owner/repo"
3232

33-
get_contributors(mock_repo, "2022-01-01", "2022-12-31", "", False)
33+
get_contributors(mock_repo, "2022-01-01", "2022-12-31", "", False, None)
3434

3535
mock_contributor_stats.assert_called_once_with(
3636
"user",
@@ -81,10 +81,10 @@ def test_get_all_contributors_with_organization(self, mock_get_contributors):
8181
],
8282
)
8383
mock_get_contributors.assert_any_call(
84-
"repo1", "2022-01-01", "2022-12-31", ghe, False
84+
"repo1", "2022-01-01", "2022-12-31", ghe, False, mock_github_connection
8585
)
8686
mock_get_contributors.assert_any_call(
87-
"repo2", "2022-01-01", "2022-12-31", ghe, False
87+
"repo2", "2022-01-01", "2022-12-31", ghe, False, mock_github_connection
8888
)
8989

9090
@patch("contributors.get_contributors")
@@ -130,7 +130,7 @@ def test_get_all_contributors_with_repository(self, mock_get_contributors):
130130
],
131131
)
132132
mock_get_contributors.assert_called_once_with(
133-
"repo", "2022-01-01", "2022-12-31", ghe, False
133+
"repo", "2022-01-01", "2022-12-31", ghe, False, mock_github_connection
134134
)
135135

136136
@patch("contributors.contributor_stats.ContributorStats")
@@ -153,7 +153,7 @@ def test_get_contributors_skip_users_with_no_commits(self, mock_contributor_stat
153153
mock_repo.get_commits.side_effect = StopIteration
154154
ghe = ""
155155

156-
get_contributors(mock_repo, "2022-01-01", "2022-12-31", ghe, False)
156+
get_contributors(mock_repo, "2022-01-01", "2022-12-31", ghe, False, None)
157157

158158
# Note that only user is returned and user2 is not returned here because there were no commits in the date range
159159
mock_contributor_stats.assert_called_once_with(
@@ -181,7 +181,7 @@ def test_get_contributors_skip_bot(self, mock_contributor_stats):
181181
mock_repo.get_commits.side_effect = StopIteration
182182
ghe = ""
183183

184-
get_contributors(mock_repo, "2022-01-01", "2022-12-31", ghe, False)
184+
get_contributors(mock_repo, "2022-01-01", "2022-12-31", ghe, False, None)
185185

186186
# Note that only user is returned and user2 is not returned here because there were no commits in the date range
187187
mock_contributor_stats.isEmpty()
@@ -202,7 +202,7 @@ def test_get_contributors_no_commit_end_date(self, mock_contributor_stats):
202202
mock_repo.get_commits.side_effect = StopIteration
203203
ghe = ""
204204

205-
get_contributors(mock_repo, "2022-01-01", "", ghe, False)
205+
get_contributors(mock_repo, "2022-01-01", "", ghe, False, None)
206206

207207
# Note that only user is returned and user2 is not returned here because there were no commits in the date range
208208
mock_contributor_stats.assert_called_once_with(
@@ -228,7 +228,7 @@ def test_get_coauthors_from_message_with_noreply_email(self):
228228
229229
Co-authored-by: John Doe <[email protected]>
230230
"""
231-
result = get_coauthors_from_message(message)
231+
result = get_coauthors_from_message(message, None)
232232
self.assertEqual(result, ["johndoe"])
233233

234234
def test_get_coauthors_from_message_with_noreply_email_with_id(self):
@@ -239,7 +239,7 @@ def test_get_coauthors_from_message_with_noreply_email_with_id(self):
239239
240240
Co-authored-by: John Doe <[email protected]>
241241
"""
242-
result = get_coauthors_from_message(message)
242+
result = get_coauthors_from_message(message, None)
243243
self.assertEqual(result, ["johndoe"])
244244

245245
def test_get_coauthors_from_message_multiple_coauthors(self):
@@ -251,7 +251,7 @@ def test_get_coauthors_from_message_multiple_coauthors(self):
251251
Co-authored-by: Alice <[email protected]>
252252
Co-authored-by: Bob <[email protected]>
253253
"""
254-
result = get_coauthors_from_message(message)
254+
result = get_coauthors_from_message(message, None)
255255
self.assertEqual(result, ["alice", "bob"])
256256

257257
def test_get_coauthors_from_message_with_regular_email(self):
@@ -262,7 +262,7 @@ def test_get_coauthors_from_message_with_regular_email(self):
262262
263263
Co-authored-by: John Doe <[email protected]>
264264
"""
265-
result = get_coauthors_from_message(message)
265+
result = get_coauthors_from_message(message, None)
266266
self.assertEqual(result, ["[email protected]"])
267267

268268
def test_get_coauthors_from_message_case_insensitive(self):
@@ -274,22 +274,22 @@ def test_get_coauthors_from_message_case_insensitive(self):
274274
co-authored-by: John Doe <[email protected]>
275275
CO-AUTHORED-BY: Jane Doe <[email protected]>
276276
"""
277-
result = get_coauthors_from_message(message)
277+
result = get_coauthors_from_message(message, None)
278278
self.assertEqual(result, ["johndoe", "janedoe"])
279279

280280
def test_get_coauthors_from_message_empty_message(self):
281281
"""
282282
Test extracting co-authors from an empty commit message.
283283
"""
284-
result = get_coauthors_from_message("")
284+
result = get_coauthors_from_message("", None)
285285
self.assertEqual(result, [])
286286

287287
def test_get_coauthors_from_message_no_coauthors(self):
288288
"""
289289
Test extracting co-authors from a commit message without co-authors.
290290
"""
291291
message = "Fix bug in login system"
292-
result = get_coauthors_from_message(message)
292+
result = get_coauthors_from_message(message, None)
293293
self.assertEqual(result, [])
294294

295295
def test_get_coauthors_from_message_mixed_email_types(self):
@@ -302,7 +302,7 @@ def test_get_coauthors_from_message_mixed_email_types(self):
302302
Co-authored-by: Bob <[email protected]>
303303
Co-authored-by: Charlie <[email protected]>
304304
"""
305-
result = get_coauthors_from_message(message)
305+
result = get_coauthors_from_message(message, None)
306306
self.assertEqual(result, ["alice", "[email protected]", "charlie"])
307307

308308
def test_get_coauthor_contributors(self):
@@ -327,9 +327,8 @@ def test_get_coauthor_contributors(self):
327327

328328
mock_repo.commits.return_value = [mock_commit1, mock_commit2]
329329

330-
existing_usernames = set()
331330
result = get_coauthor_contributors(
332-
mock_repo, "2022-01-01", "2022-12-31", "", existing_usernames
331+
mock_repo, "2022-01-01", "2022-12-31", "", None
333332
)
334333

335334
# Alice should have count 2, Bob should have count 1
@@ -345,9 +344,9 @@ def test_get_coauthor_contributors(self):
345344
elif contributor.username == "bob":
346345
self.assertEqual(contributor.contribution_count, 1)
347346

348-
def test_get_coauthor_contributors_excludes_existing(self):
347+
def test_get_coauthor_contributors_includes_all(self):
349348
"""
350-
Test that get_coauthor_contributors excludes already existing contributors.
349+
Test that get_coauthor_contributors includes all co-authors, even if they are already main contributors.
351350
"""
352351
mock_repo = MagicMock()
353352
mock_repo.full_name = "owner/repo"
@@ -361,15 +360,16 @@ def test_get_coauthor_contributors_excludes_existing(self):
361360

362361
mock_repo.commits.return_value = [mock_commit]
363362

364-
# Alice is already a contributor
365-
existing_usernames = {"alice"}
363+
# Alice is already a main contributor, but should still be included in co-author results
366364
result = get_coauthor_contributors(
367-
mock_repo, "2022-01-01", "2022-12-31", "", existing_usernames
365+
mock_repo, "2022-01-01", "2022-12-31", "", None
368366
)
369367

370-
# Only Bob should be in the result
371-
self.assertEqual(len(result), 1)
372-
self.assertEqual(result[0].username, "bob")
368+
# Both Alice and Bob should be in the result
369+
self.assertEqual(len(result), 2)
370+
usernames = {c.username for c in result}
371+
self.assertIn("alice", usernames)
372+
self.assertIn("bob", usernames)
373373

374374
@patch("contributors.get_coauthor_contributors")
375375
def test_get_contributors_with_acknowledge_coauthors(
@@ -397,7 +397,12 @@ def test_get_contributors_with_acknowledge_coauthors(
397397
mock_get_coauthor_contributors.return_value = [mock_coauthor]
398398

399399
result = get_contributors(
400-
mock_repo, "2022-01-01", "2022-12-31", "", acknowledge_coauthors=True
400+
mock_repo,
401+
"2022-01-01",
402+
"2022-12-31",
403+
"",
404+
acknowledge_coauthors=True,
405+
github_connection=None,
401406
)
402407

403408
# Verify that get_coauthor_contributors was called

0 commit comments

Comments
 (0)