Skip to content

Commit ee109e5

Browse files
Copilotzkoppert
andcommitted
Correlate co-author emails to GitHub usernames and merge with existing contributors
Co-authored-by: zkoppert <[email protected]>
1 parent 211543e commit ee109e5

File tree

3 files changed

+117
-53
lines changed

3 files changed

+117
-53
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,13 @@ This action can be configured to authenticate with GitHub App Installation or Pe
9393
| `END_DATE` | False | Current Date | The date at which you want to stop gathering contributor information. Must be later than the `START_DATE`. ie. Aug 2nd, 2023 would be `2023-08-02` |
9494
| `SPONSOR_INFO` | False | False | If you want to include sponsor information in the output. This will include the sponsor count and the sponsor URL. This will impact action performance. ie. SPONSOR_INFO = "False" or SPONSOR_INFO = "True" |
9595
| `LINK_TO_PROFILE` | False | True | If you want to link usernames to their GitHub profiles in the output. ie. LINK_TO_PROFILE = "True" or LINK_TO_PROFILE = "False" |
96-
| `ACKNOWLEDGE_COAUTHORS` | False | True | If you want to include co-authors from commit messages as contributors. Co-authors are identified via the `Co-authored-by:` trailer in commit messages. The action will extract GitHub usernames from GitHub noreply emails (e.g., `[email protected]`) or use the full email address for other email domains. This will impact action performance as it requires scanning all commits. ie. ACKNOWLEDGE_COAUTHORS = "True" or ACKNOWLEDGE_COAUTHORS = "False" |
96+
| `ACKNOWLEDGE_COAUTHORS` | False | True | If you want to include co-authors from commit messages as contributors. Co-authors are identified via the `Co-authored-by:` trailer in commit messages. The action will correlate co-author emails to GitHub usernames when possible and merge their contributions with existing contributor stats. This will impact action performance as it requires scanning all commits. ie. ACKNOWLEDGE_COAUTHORS = "True" or ACKNOWLEDGE_COAUTHORS = "False" |
9797

9898
**Note**: If `start_date` and `end_date` are specified then the action will determine if the contributor is new. A new contributor is one that has contributed in the date range specified but not before the start date.
9999

100100
**Performance Note:** Using start and end dates will reduce speed of the action by approximately 63X. ie without dates if the action takes 1.7 seconds, it will take 1 minute and 47 seconds.
101101

102-
**Co-authors Note:** When `ACKNOWLEDGE_COAUTHORS` is enabled, the action will scan commit messages for `Co-authored-by:` trailers and include those users as contributors. For GitHub noreply email addresses (e.g., `[email protected]`), the username will be extracted. For other email addresses (e.g., `[email protected]`), the full email address will be used as the contributor identifier. See [GitHub's documentation on creating commits with multiple authors](https://docs.github.com/en/pull-requests/committing-changes-to-your-project/creating-and-editing-commits/creating-a-commit-with-multiple-authors).
102+
**Co-authors Note:** When `ACKNOWLEDGE_COAUTHORS` is enabled, the action will scan commit messages for `Co-authored-by:` trailers and include those users as contributors. The action attempts to match co-author emails to GitHub usernames by cross-referencing with commit author data. When a match is found, co-author contributions are merged with the existing contributor's stats. If no match is found, the email address itself is used as the contributor identifier. See [GitHub's documentation on creating commits with multiple authors](https://docs.github.com/en/pull-requests/committing-changes-to-your-project/creating-and-editing-commits/creating-a-commit-with-multiple-authors).
103103

104104
### Example workflows
105105

contributors.py

Lines changed: 63 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,6 @@ def get_contributors(
211211
"""
212212
all_repo_contributors = repo.contributors()
213213
contributors = []
214-
# Track usernames already added as contributors
215-
contributor_usernames = set()
216214

217215
try:
218216
for user in all_repo_contributors:
@@ -247,14 +245,12 @@ def get_contributors(
247245
"",
248246
)
249247
contributors.append(contributor)
250-
contributor_usernames.add(user.login)
251248

252249
# Get co-authors from commit messages if enabled
253250
if acknowledge_coauthors:
254-
coauthor_contributors = get_coauthor_contributors(
255-
repo, start_date, end_date, ghe, contributor_usernames
251+
contributors = get_coauthor_contributors(
252+
repo, start_date, end_date, ghe, contributors
256253
)
257-
contributors.extend(coauthor_contributors)
258254

259255
except Exception as e:
260256
print(f"Error getting contributors for repository: {repo.full_name}")
@@ -269,22 +265,26 @@ def get_coauthor_contributors(
269265
start_date: str,
270266
end_date: str,
271267
ghe: str,
272-
existing_usernames: set,
268+
existing_contributors: List[contributor_stats.ContributorStats],
273269
) -> List[contributor_stats.ContributorStats]:
274270
"""
275271
Get contributors who were co-authors on commits in the repository.
276272
273+
Matches co-author emails to GitHub usernames by looking them up from commit data.
274+
Merges co-author contributions with existing contributors when possible.
275+
277276
Args:
278277
repo (object): The repository object
279278
start_date (str): The start date of the date range for the contributor list.
280279
end_date (str): The end date of the date range for the contributor list.
281280
ghe (str): The GitHub Enterprise URL
282-
existing_usernames (set): Set of usernames already added as contributors
281+
existing_contributors (List[ContributorStats]): List of existing contributors to merge with
283282
284283
Returns:
285-
List[ContributorStats]: A list of ContributorStats objects for co-authors
284+
List[ContributorStats]: Updated list with co-author contributions merged
286285
"""
287-
coauthor_counts: dict = {} # username -> count
286+
coauthor_counts: dict = {} # email/username -> count
287+
email_to_username: dict = {} # email -> GitHub username mapping
288288
endpoint = ghe if ghe else "https://github.com"
289289

290290
try:
@@ -294,42 +294,74 @@ def get_coauthor_contributors(
294294
else:
295295
commits = repo.commits()
296296

297+
# First pass: build email to username mapping and count co-authors
297298
for commit in commits:
298299
# Get commit message from the commit object
299300
commit_message = commit.commit.message if commit.commit else ""
301+
302+
# Build email to username mapping from commit author info
303+
if commit.author and commit.commit and commit.commit.author:
304+
author_email = commit.commit.author.get("email", "")
305+
if author_email and commit.author.login:
306+
email_to_username[author_email] = commit.author.login
307+
300308
if not commit_message:
301309
continue
302310

303311
# Extract co-authors from commit message
304312
coauthors = get_coauthors_from_message(commit_message)
305-
for username in coauthors:
306-
if username not in existing_usernames:
307-
coauthor_counts[username] = coauthor_counts.get(username, 0) + 1
313+
for identifier in coauthors:
314+
coauthor_counts[identifier] = coauthor_counts.get(identifier, 0) + 1
308315

309316
except Exception as e:
310317
print(f"Error getting co-authors for repository: {repo.full_name}")
311318
print(e)
312-
return []
319+
return existing_contributors
313320

314-
# Create ContributorStats objects for co-authors
315-
coauthor_contributors = []
316-
for username, count in coauthor_counts.items():
317-
if start_date and end_date:
318-
commit_url = f"{endpoint}/{repo.full_name}/commits?author={username}&since={start_date}&until={end_date}"
321+
# Second pass: merge co-author contributions with existing contributors
322+
existing_usernames = {c.username for c in existing_contributors}
323+
324+
for identifier, count in coauthor_counts.items():
325+
# Try to resolve identifier to GitHub username
326+
username = None
327+
328+
# Check if it's already a username (from noreply email)
329+
if "@" not in identifier:
330+
username = identifier
331+
# Check if we can map this email to a known username
332+
elif identifier in email_to_username:
333+
username = email_to_username[identifier]
319334
else:
320-
commit_url = f"{endpoint}/{repo.full_name}/commits?author={username}"
321-
322-
contributor = contributor_stats.ContributorStats(
323-
username,
324-
False,
325-
"", # No avatar URL available for co-authors
326-
count,
327-
commit_url,
328-
"",
329-
)
330-
coauthor_contributors.append(contributor)
335+
# Use email as fallback identifier
336+
username = identifier
337+
338+
# Check if this user already exists in contributors
339+
merged = False
340+
for contributor in existing_contributors:
341+
if contributor.username == username:
342+
# Merge the co-author contributions with existing contributor
343+
contributor.contribution_count += count
344+
merged = True
345+
break
346+
347+
# If not merged, create a new contributor entry
348+
if not merged and username not in existing_usernames:
349+
if start_date and end_date:
350+
commit_url = f"{endpoint}/{repo.full_name}/commits?author={username}&since={start_date}&until={end_date}"
351+
else:
352+
commit_url = f"{endpoint}/{repo.full_name}/commits?author={username}"
353+
354+
contributor = contributor_stats.ContributorStats(
355+
username,
356+
False,
357+
"", # No avatar URL available for co-authors without user lookup
358+
count,
359+
commit_url,
360+
"",
361+
)
362+
existing_contributors.append(contributor)
331363

332-
return coauthor_contributors
364+
return existing_contributors
333365

334366

335367
if __name__ == "__main__":

test_contributors.py

Lines changed: 52 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -317,19 +317,25 @@ def test_get_coauthor_contributors(self):
317317
318318
Co-authored-by: Alice <[email protected]>
319319
"""
320+
mock_commit1.author = MagicMock()
321+
mock_commit1.author.login = "alice"
322+
mock_commit1.commit.author = {"email": "[email protected]"}
320323

321324
mock_commit2 = MagicMock()
322325
mock_commit2.commit.message = """Bug fix
323326
324327
Co-authored-by: Alice <[email protected]>
325328
Co-authored-by: Bob <[email protected]>
326329
"""
330+
mock_commit2.author = MagicMock()
331+
mock_commit2.author.login = "bob"
332+
mock_commit2.commit.author = {"email": "[email protected]"}
327333

328334
mock_repo.commits.return_value = [mock_commit1, mock_commit2]
329335

330-
existing_usernames = set()
336+
existing_contributors = []
331337
result = get_coauthor_contributors(
332-
mock_repo, "2022-01-01", "2022-12-31", "", existing_usernames
338+
mock_repo, "2022-01-01", "2022-12-31", "", existing_contributors
333339
)
334340

335341
# Alice should have count 2, Bob should have count 1
@@ -345,9 +351,9 @@ def test_get_coauthor_contributors(self):
345351
elif contributor.username == "bob":
346352
self.assertEqual(contributor.contribution_count, 1)
347353

348-
def test_get_coauthor_contributors_excludes_existing(self):
354+
def test_get_coauthor_contributors_merges_with_existing(self):
349355
"""
350-
Test that get_coauthor_contributors excludes already existing contributors.
356+
Test that get_coauthor_contributors merges with existing contributors.
351357
"""
352358
mock_repo = MagicMock()
353359
mock_repo.full_name = "owner/repo"
@@ -356,20 +362,39 @@ def test_get_coauthor_contributors_excludes_existing(self):
356362
mock_commit.commit.message = """Feature
357363
358364
Co-authored-by: Alice <[email protected]>
359-
Co-authored-by: Bob <bob@users.noreply.github.com>
365+
Co-authored-by: Bob <bob@example.com>
360366
"""
367+
mock_commit.author = MagicMock()
368+
mock_commit.author.login = "alice"
369+
mock_commit.commit.author = {"email": "[email protected]"}
361370

362371
mock_repo.commits.return_value = [mock_commit]
363372

364-
# Alice is already a contributor
365-
existing_usernames = {"alice"}
373+
# Alice is already a contributor with 5 commits
374+
existing_contributors = [
375+
ContributorStats(
376+
"alice",
377+
False,
378+
"https://avatars.githubusercontent.com/u/",
379+
5,
380+
"url1",
381+
"",
382+
)
383+
]
366384
result = get_coauthor_contributors(
367-
mock_repo, "2022-01-01", "2022-12-31", "", existing_usernames
385+
mock_repo, "2022-01-01", "2022-12-31", "", existing_contributors
368386
)
369387

370-
# Only Bob should be in the result
371-
self.assertEqual(len(result), 1)
372-
self.assertEqual(result[0].username, "bob")
388+
# Alice should be merged (5 + 1 = 6), Bob should be added
389+
self.assertEqual(len(result), 2)
390+
usernames = {c.username for c in result}
391+
self.assertIn("alice", usernames)
392+
self.assertIn("[email protected]", usernames)
393+
394+
# Check Alice's count was incremented
395+
for contributor in result:
396+
if contributor.username == "alice":
397+
self.assertEqual(contributor.contribution_count, 6)
373398

374399
@patch("contributors.get_coauthor_contributors")
375400
def test_get_contributors_with_acknowledge_coauthors(
@@ -386,15 +411,22 @@ def test_get_contributors_with_acknowledge_coauthors(
386411
mock_repo.contributors.return_value = [mock_user]
387412
mock_repo.full_name = "owner/repo"
388413

389-
mock_coauthor = ContributorStats(
390-
"coauthor",
391-
False,
392-
"",
393-
1,
394-
"https://github.com/owner/repo/commits?author=coauthor&since=2022-01-01&until=2022-12-31",
395-
"",
396-
)
397-
mock_get_coauthor_contributors.return_value = [mock_coauthor]
414+
# Mock the return value to include both existing and new contributor
415+
def side_effect(repo, start, end, ghe, existing): # pylint: disable=unused-argument
416+
# Add a coauthor to the existing list
417+
existing.append(
418+
ContributorStats(
419+
"coauthor",
420+
False,
421+
"",
422+
1,
423+
"https://github.com/owner/repo/commits?author=coauthor&since=2022-01-01&until=2022-12-31",
424+
"",
425+
)
426+
)
427+
return existing
428+
429+
mock_get_coauthor_contributors.side_effect = side_effect
398430

399431
result = get_contributors(
400432
mock_repo, "2022-01-01", "2022-12-31", "", acknowledge_coauthors=True

0 commit comments

Comments
 (0)