diff --git a/.env-example b/.env-example index 9c25853..152c042 100644 --- a/.env-example +++ b/.env-example @@ -4,6 +4,9 @@ END_DATE = "" ORGANIZATION = "organization" REPOSITORY = "organization/repository" START_DATE = "" +SPONSOR_INFO = "False" +LINK_TO_PROFILE = "True" +ACKNOWLEDGE_COAUTHORS = "True" # GITHUB APP GH_APP_ID = "" diff --git a/README.md b/README.md index 46b9cb3..8aed861 100644 --- a/README.md +++ b/README.md @@ -84,20 +84,23 @@ This action can be configured to authenticate with GitHub App Installation or Pe #### Other Configuration Options -| field | required | default | description | -| ------------------- | ----------------------------------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `GH_ENTERPRISE_URL` | False | "" | The `GH_ENTERPRISE_URL` is used to connect to an enterprise server instance of GitHub. github.com users should not enter anything here. | -| `ORGANIZATION` | Required to have `ORGANIZATION` or `REPOSITORY` | | The name of the GitHub organization which you want the contributor information of all repos from. ie. github.com/github would be `github` | -| `REPOSITORY` | Required to have `ORGANIZATION` or `REPOSITORY` | | The name of the repository and organization which you want the contributor information from. ie. `github/contributors` or a comma separated list of multiple repositories `github/contributor,super-linter/super-linter` | -| `START_DATE` | False | Beginning of time | The date from which you want to start gathering contributor information. ie. Aug 1st, 2023 would be `2023-08-01`. | -| `END_DATE` | False | Current Date | The date at which you want to stop gathering contributor information. Must be later than the `START_DATE`. ie. Aug 2nd, 2023 would be `2023-08-02` | -| `SPONSOR_INFO` | False | False | If you want to include sponsor information in the output. This will include the sponsor count and the sponsor URL. This will impact action performance. ie. SPONSOR_INFO = "False" or SPONSOR_INFO = "True" | -| `LINK_TO_PROFILE` | False | True | If you want to link usernames to their GitHub profiles in the output. ie. LINK_TO_PROFILE = "True" or LINK_TO_PROFILE = "False" | +| field | required | default | description | +| ----------------------- | ----------------------------------------------- | ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `GH_ENTERPRISE_URL` | False | "" | The `GH_ENTERPRISE_URL` is used to connect to an enterprise server instance of GitHub. github.com users should not enter anything here. | +| `ORGANIZATION` | Required to have `ORGANIZATION` or `REPOSITORY` | | The name of the GitHub organization which you want the contributor information of all repos from. ie. github.com/github would be `github` | +| `REPOSITORY` | Required to have `ORGANIZATION` or `REPOSITORY` | | The name of the repository and organization which you want the contributor information from. ie. `github/contributors` or a comma separated list of multiple repositories `github/contributor,super-linter/super-linter` | +| `START_DATE` | False | Beginning of time | The date from which you want to start gathering contributor information. ie. Aug 1st, 2023 would be `2023-08-01`. | +| `END_DATE` | False | Current Date | The date at which you want to stop gathering contributor information. Must be later than the `START_DATE`. ie. Aug 2nd, 2023 would be `2023-08-02` | +| `SPONSOR_INFO` | False | False | If you want to include sponsor information in the output. This will include the sponsor count and the sponsor URL. This will impact action performance. ie. SPONSOR_INFO = "False" or SPONSOR_INFO = "True" | +| `LINK_TO_PROFILE` | False | True | If you want to link usernames to their GitHub profiles in the output. ie. LINK_TO_PROFILE = "True" or LINK_TO_PROFILE = "False" | +| `ACKNOWLEDGE_COAUTHORS` | False | True | If you want to include co-authors from commit messages as contributors. Co-authors are identified via the `Co-authored-by:` trailer in commit messages. The action will extract GitHub usernames from GitHub noreply emails (e.g., `username@users.noreply.github.com`) or use the full email address for other email domains. This will impact action performance as it requires scanning all commits. ie. ACKNOWLEDGE_COAUTHORS = "True" or ACKNOWLEDGE_COAUTHORS = "False" | **Note**: If `start_date` and `end_date` are specified then the action will determine if the contributor is new. A new contributor is one that has contributed in the date range specified but not before the start date. **Performance Note:** Using start and end dates will reduce speed of the action by approximately 63X. ie without dates if the action takes 1.7 seconds, it will take 1 minute and 47 seconds. +**Co-authors Note:** When `ACKNOWLEDGE_COAUTHORS` is enabled, the action will scan commit messages for `Co-authored-by:` trailers and include those users as contributors. For GitHub noreply email addresses (e.g., `username@users.noreply.github.com`), the username will be extracted. For other email addresses (e.g., `john@example.com`), the full email address will be used as the contributor identifier. See [GitHub's documentation on creating commits with multiple authors](https://docs.github.com/en/pull-requests/committing-changes-to-your-project/creating-and-editing-commits/creating-a-commit-with-multiple-authors). + ### Example workflows **Be sure to change at least these values: ``, ``** diff --git a/contributors.py b/contributors.py index 96cdd86..0c700f7 100644 --- a/contributors.py +++ b/contributors.py @@ -1,6 +1,7 @@ # pylint: disable=broad-exception-caught """This file contains the main() and other functions needed to get contributor information from the organization or repository""" +import re from typing import List import auth @@ -27,6 +28,7 @@ def main(): end_date, sponsor_info, link_to_profile, + acknowledge_coauthors, ) = env.get_env_vars() # Auth to GitHub.com @@ -46,7 +48,13 @@ def main(): # Get the contributors contributors = get_all_contributors( - organization, repository_list, start_date, end_date, github_connection, ghe + organization, + repository_list, + start_date, + end_date, + github_connection, + ghe, + acknowledge_coauthors, ) # Check for new contributor if user provided start_date and end_date @@ -60,6 +68,7 @@ def main(): end_date=start_date, github_connection=github_connection, ghe=ghe, + acknowledge_coauthors=acknowledge_coauthors, ) for contributor in contributors: contributor.new_contributor = contributor_stats.is_new_contributor( @@ -103,6 +112,7 @@ def get_all_contributors( end_date: str, github_connection: object, ghe: str, + acknowledge_coauthors: bool, ): """ Get all contributors from the organization or repository @@ -113,6 +123,8 @@ def get_all_contributors( start_date (str): The start date of the date range for the contributor list. end_date (str): The end date of the date range for the contributor list. github_connection (object): The authenticated GitHub connection object from PyGithub + ghe (str): The GitHub Enterprise URL to use for authentication + acknowledge_coauthors (bool): Whether to acknowledge co-authors from commit messages Returns: all_contributors (list): A list of ContributorStats objects @@ -130,7 +142,14 @@ def get_all_contributors( all_contributors = [] if repos: for repo in repos: - repo_contributors = get_contributors(repo, start_date, end_date, ghe) + repo_contributors = get_contributors( + repo, + start_date, + end_date, + ghe, + acknowledge_coauthors, + github_connection, + ) if repo_contributors: all_contributors.append(repo_contributors) @@ -140,7 +159,72 @@ def get_all_contributors( return all_contributors -def get_contributors(repo: object, start_date: str, end_date: str, ghe: str): +def get_coauthors_from_message( + commit_message: str, github_connection: object = None +) -> List[str]: + """ + Extract co-author identifiers from a commit message. + + Co-authored-by trailers follow the format: + Co-authored-by: Name + + For GitHub noreply emails (username@users.noreply.github.com), extracts the username. + For @github.com emails, extracts the username (part before @). + For other emails, uses GitHub Search Users API to find the username, or falls back to email. + + Args: + commit_message (str): The commit message to parse + github_connection (object): The authenticated GitHub connection object from PyGithub + + Returns: + List[str]: List of co-author identifiers (GitHub usernames or email addresses) + """ + # Match Co-authored-by trailers - case insensitive + # Format: Co-authored-by: Name + pattern = r"Co-authored-by:\s*[^<]*<([^>]+)>" + matches = re.findall(pattern, commit_message, re.IGNORECASE) + + identifiers = [] + for email in matches: + # Check if it's a GitHub noreply email format: username@users.noreply.github.com + noreply_pattern = r"^(\d+\+)?([^@]+)@users\.noreply\.github\.com$" + noreply_match = re.match(noreply_pattern, email) + if noreply_match: + # For GitHub noreply emails, extract just the username + identifiers.append(noreply_match.group(2)) + elif email.endswith("@github.com"): + # For @github.com emails, extract the username (part before @) + username = email.split("@")[0] + identifiers.append(username) + else: + # For other emails, try to find GitHub username using Search Users API + if github_connection: + try: + # Search for users by email + search_result = github_connection.search_users(f"email:{email}") + if search_result.totalCount > 0: + # Use the first matching user's login + identifiers.append(search_result[0].login) + else: + # If no user found, fall back to email address + identifiers.append(email) + except Exception: + # If API call fails, fall back to email address + identifiers.append(email) + else: + # If no GitHub connection available, use the full email address + identifiers.append(email) + return identifiers + + +def get_contributors( + repo: object, + start_date: str, + end_date: str, + ghe: str, + acknowledge_coauthors: bool, + github_connection: object, +): """ Get contributors from a single repository and filter by start end dates if present. @@ -148,12 +232,18 @@ def get_contributors(repo: object, start_date: str, end_date: str, ghe: str): repo (object): The repository object from PyGithub start_date (str): The start date of the date range for the contributor list. end_date (str): The end date of the date range for the contributor list. + ghe (str): The GitHub Enterprise URL to use for authentication + acknowledge_coauthors (bool): Whether to acknowledge co-authors from commit messages + github_connection (object): The authenticated GitHub connection object from PyGithub Returns: contributors (list): A list of ContributorStats objects """ all_repo_contributors = repo.contributors() contributors = [] + # Track usernames already added as contributors + contributor_usernames = set() + try: for user in all_repo_contributors: # Ignore contributors with [bot] in their name @@ -187,6 +277,19 @@ def get_contributors(repo: object, start_date: str, end_date: str, ghe: str): "", ) contributors.append(contributor) + contributor_usernames.add(user.login) + + # Get co-authors from commit messages if enabled + if acknowledge_coauthors: + coauthor_contributors = get_coauthor_contributors( + repo, + start_date, + end_date, + ghe, + github_connection, + ) + contributors.extend(coauthor_contributors) + except Exception as e: print(f"Error getting contributors for repository: {repo.full_name}") print(e) @@ -195,5 +298,72 @@ def get_contributors(repo: object, start_date: str, end_date: str, ghe: str): return contributors +def get_coauthor_contributors( + repo: object, + start_date: str, + end_date: str, + ghe: str, + github_connection: object, +) -> List[contributor_stats.ContributorStats]: + """ + Get contributors who were co-authors on commits in the repository. + + Args: + repo (object): The repository object + start_date (str): The start date of the date range for the contributor list. + end_date (str): The end date of the date range for the contributor list. + ghe (str): The GitHub Enterprise URL + github_connection (object): The authenticated GitHub connection object from PyGithub + + Returns: + List[ContributorStats]: A list of ContributorStats objects for co-authors + """ + coauthor_counts: dict = {} # username -> count + endpoint = ghe if ghe else "https://github.com" + + try: + # Get all commits in the date range + if start_date and end_date: + commits = repo.commits(since=start_date, until=end_date) + else: + commits = repo.commits() + + for commit in commits: + # Get commit message from the commit object + commit_message = commit.commit.message if commit.commit else "" + if not commit_message: + continue + + # Extract co-authors from commit message + coauthors = get_coauthors_from_message(commit_message, github_connection) + for username in coauthors: + coauthor_counts[username] = coauthor_counts.get(username, 0) + 1 + + except Exception as e: + print(f"Error getting co-authors for repository: {repo.full_name}") + print(e) + return [] + + # Create ContributorStats objects for co-authors + coauthor_contributors = [] + for username, count in coauthor_counts.items(): + if start_date and end_date: + commit_url = f"{endpoint}/{repo.full_name}/commits?author={username}&since={start_date}&until={end_date}" + else: + commit_url = f"{endpoint}/{repo.full_name}/commits?author={username}" + + contributor = contributor_stats.ContributorStats( + username, + False, + "", # No avatar URL available for co-authors + count, + commit_url, + "", + ) + coauthor_contributors.append(contributor) + + return coauthor_contributors + + if __name__ == "__main__": main() diff --git a/env.py b/env.py index b160dc2..2dbd99f 100644 --- a/env.py +++ b/env.py @@ -85,6 +85,7 @@ def get_env_vars( str, bool, bool, + bool, ]: """ Get the environment variables for use in the action. @@ -105,6 +106,7 @@ def get_env_vars( end_date (str): The end date to get contributor information to. sponsor_info (str): Whether to get sponsor information on the contributor link_to_profile (str): Whether to link username to Github profile in markdown output + acknowledge_coauthors (bool): Whether to acknowledge co-authors from commit messages """ if not test: @@ -145,6 +147,7 @@ def get_env_vars( sponsor_info = get_bool_env_var("SPONSOR_INFO", False) link_to_profile = get_bool_env_var("LINK_TO_PROFILE", False) + acknowledge_coauthors = get_bool_env_var("ACKNOWLEDGE_COAUTHORS", True) # Separate repositories_str into a list based on the comma separator repositories_list = [] @@ -166,4 +169,5 @@ def get_env_vars( end_date, sponsor_info, link_to_profile, + acknowledge_coauthors, ) diff --git a/test_contributors.py b/test_contributors.py index 542c72b..1642288 100644 --- a/test_contributors.py +++ b/test_contributors.py @@ -4,7 +4,12 @@ from unittest.mock import MagicMock, patch from contributor_stats import ContributorStats -from contributors import get_all_contributors, get_contributors +from contributors import ( + get_all_contributors, + get_coauthor_contributors, + get_coauthors_from_message, + get_contributors, +) class TestContributors(unittest.TestCase): @@ -25,7 +30,7 @@ def test_get_contributors(self, mock_contributor_stats): mock_repo.contributors.return_value = [mock_user] mock_repo.full_name = "owner/repo" - get_contributors(mock_repo, "2022-01-01", "2022-12-31", "") + get_contributors(mock_repo, "2022-01-01", "2022-12-31", "", False, None) mock_contributor_stats.assert_called_once_with( "user", @@ -59,7 +64,7 @@ def test_get_all_contributors_with_organization(self, mock_get_contributors): ghe = "" result = get_all_contributors( - "org", "", "2022-01-01", "2022-12-31", mock_github_connection, ghe + "org", "", "2022-01-01", "2022-12-31", mock_github_connection, ghe, False ) self.assertEqual( @@ -75,8 +80,12 @@ def test_get_all_contributors_with_organization(self, mock_get_contributors): ), ], ) - mock_get_contributors.assert_any_call("repo1", "2022-01-01", "2022-12-31", ghe) - mock_get_contributors.assert_any_call("repo2", "2022-01-01", "2022-12-31", ghe) + mock_get_contributors.assert_any_call( + "repo1", "2022-01-01", "2022-12-31", ghe, False, mock_github_connection + ) + mock_get_contributors.assert_any_call( + "repo2", "2022-01-01", "2022-12-31", ghe, False, mock_github_connection + ) @patch("contributors.get_contributors") def test_get_all_contributors_with_repository(self, mock_get_contributors): @@ -98,7 +107,13 @@ def test_get_all_contributors_with_repository(self, mock_get_contributors): ghe = "" result = get_all_contributors( - "", ["owner/repo"], "2022-01-01", "2022-12-31", mock_github_connection, ghe + "", + ["owner/repo"], + "2022-01-01", + "2022-12-31", + mock_github_connection, + ghe, + False, ) self.assertEqual( @@ -115,7 +130,7 @@ def test_get_all_contributors_with_repository(self, mock_get_contributors): ], ) mock_get_contributors.assert_called_once_with( - "repo", "2022-01-01", "2022-12-31", ghe + "repo", "2022-01-01", "2022-12-31", ghe, False, mock_github_connection ) @patch("contributors.contributor_stats.ContributorStats") @@ -138,7 +153,7 @@ def test_get_contributors_skip_users_with_no_commits(self, mock_contributor_stat mock_repo.get_commits.side_effect = StopIteration ghe = "" - get_contributors(mock_repo, "2022-01-01", "2022-12-31", ghe) + get_contributors(mock_repo, "2022-01-01", "2022-12-31", ghe, False, None) # Note that only user is returned and user2 is not returned here because there were no commits in the date range mock_contributor_stats.assert_called_once_with( @@ -166,7 +181,7 @@ def test_get_contributors_skip_bot(self, mock_contributor_stats): mock_repo.get_commits.side_effect = StopIteration ghe = "" - get_contributors(mock_repo, "2022-01-01", "2022-12-31", ghe) + get_contributors(mock_repo, "2022-01-01", "2022-12-31", ghe, False, None) # Note that only user is returned and user2 is not returned here because there were no commits in the date range mock_contributor_stats.isEmpty() @@ -187,7 +202,7 @@ def test_get_contributors_no_commit_end_date(self, mock_contributor_stats): mock_repo.get_commits.side_effect = StopIteration ghe = "" - get_contributors(mock_repo, "2022-01-01", "", ghe) + get_contributors(mock_repo, "2022-01-01", "", ghe, False, None) # Note that only user is returned and user2 is not returned here because there were no commits in the date range mock_contributor_stats.assert_called_once_with( @@ -200,5 +215,205 @@ def test_get_contributors_no_commit_end_date(self, mock_contributor_stats): ) +class TestCoauthorFunctions(unittest.TestCase): + """ + Test case for the co-author related functions in the contributors module. + """ + + def test_get_coauthors_from_message_with_noreply_email(self): + """ + Test extracting co-authors from a commit message with noreply email. + """ + message = """Fix bug in login + +Co-authored-by: John Doe +""" + result = get_coauthors_from_message(message, None) + self.assertEqual(result, ["johndoe"]) + + def test_get_coauthors_from_message_with_noreply_email_with_id(self): + """ + Test extracting co-authors from a commit message with noreply email containing ID prefix. + """ + message = """Fix bug in login + +Co-authored-by: John Doe <12345678+johndoe@users.noreply.github.com> +""" + result = get_coauthors_from_message(message, None) + self.assertEqual(result, ["johndoe"]) + + def test_get_coauthors_from_message_multiple_coauthors(self): + """ + Test extracting multiple co-authors from a commit message. + """ + message = """Feature implementation + +Co-authored-by: Alice +Co-authored-by: Bob +""" + result = get_coauthors_from_message(message, None) + self.assertEqual(result, ["alice", "bob"]) + + def test_get_coauthors_from_message_with_regular_email(self): + """ + Test that regular emails are extracted as co-authors. + """ + message = """Fix bug + +Co-authored-by: John Doe +""" + result = get_coauthors_from_message(message, None) + self.assertEqual(result, ["john@example.com"]) + + def test_get_coauthors_from_message_case_insensitive(self): + """ + Test that co-authored-by is case insensitive. + """ + message = """Fix bug + +co-authored-by: John Doe +CO-AUTHORED-BY: Jane Doe +""" + result = get_coauthors_from_message(message, None) + self.assertEqual(result, ["johndoe", "janedoe"]) + + def test_get_coauthors_from_message_empty_message(self): + """ + Test extracting co-authors from an empty commit message. + """ + result = get_coauthors_from_message("", None) + self.assertEqual(result, []) + + def test_get_coauthors_from_message_no_coauthors(self): + """ + Test extracting co-authors from a commit message without co-authors. + """ + message = "Fix bug in login system" + result = get_coauthors_from_message(message, None) + self.assertEqual(result, []) + + def test_get_coauthors_from_message_mixed_email_types(self): + """ + Test extracting co-authors with both GitHub noreply and regular emails. + """ + message = """Feature implementation + +Co-authored-by: Alice +Co-authored-by: Bob +Co-authored-by: Charlie <12345+charlie@users.noreply.github.com> +""" + result = get_coauthors_from_message(message, None) + self.assertEqual(result, ["alice", "bob@example.com", "charlie"]) + + def test_get_coauthor_contributors(self): + """ + Test the get_coauthor_contributors function. + """ + mock_repo = MagicMock() + mock_repo.full_name = "owner/repo" + + mock_commit1 = MagicMock() + mock_commit1.commit.message = """Feature implementation + +Co-authored-by: Alice +""" + + mock_commit2 = MagicMock() + mock_commit2.commit.message = """Bug fix + +Co-authored-by: Alice +Co-authored-by: Bob +""" + + mock_repo.commits.return_value = [mock_commit1, mock_commit2] + + result = get_coauthor_contributors( + mock_repo, "2022-01-01", "2022-12-31", "", None + ) + + # Alice should have count 2, Bob should have count 1 + self.assertEqual(len(result), 2) + usernames = {c.username for c in result} + self.assertIn("alice", usernames) + self.assertIn("bob", usernames) + + # Check counts + for contributor in result: + if contributor.username == "alice": + self.assertEqual(contributor.contribution_count, 2) + elif contributor.username == "bob": + self.assertEqual(contributor.contribution_count, 1) + + def test_get_coauthor_contributors_includes_all(self): + """ + Test that get_coauthor_contributors includes all co-authors, even if they are already main contributors. + """ + mock_repo = MagicMock() + mock_repo.full_name = "owner/repo" + + mock_commit = MagicMock() + mock_commit.commit.message = """Feature + +Co-authored-by: Alice +Co-authored-by: Bob +""" + + mock_repo.commits.return_value = [mock_commit] + + # Alice is already a main contributor, but should still be included in co-author results + result = get_coauthor_contributors( + mock_repo, "2022-01-01", "2022-12-31", "", None + ) + + # Both Alice and Bob should be in the result + self.assertEqual(len(result), 2) + usernames = {c.username for c in result} + self.assertIn("alice", usernames) + self.assertIn("bob", usernames) + + @patch("contributors.get_coauthor_contributors") + def test_get_contributors_with_acknowledge_coauthors( + self, mock_get_coauthor_contributors + ): + """ + Test that get_contributors calls get_coauthor_contributors when acknowledge_coauthors is True. + """ + mock_repo = MagicMock() + mock_user = MagicMock() + mock_user.login = "user" + mock_user.avatar_url = "https://avatars.githubusercontent.com/u/12345678?v=4" + mock_user.contributions_count = 100 + mock_repo.contributors.return_value = [mock_user] + mock_repo.full_name = "owner/repo" + + mock_coauthor = ContributorStats( + "coauthor", + False, + "", + 1, + "https://github.com/owner/repo/commits?author=coauthor&since=2022-01-01&until=2022-12-31", + "", + ) + mock_get_coauthor_contributors.return_value = [mock_coauthor] + + result = get_contributors( + mock_repo, + "2022-01-01", + "2022-12-31", + "", + acknowledge_coauthors=True, + github_connection=None, + ) + + # Verify that get_coauthor_contributors was called + mock_get_coauthor_contributors.assert_called_once() + + # Verify that the result includes both the regular contributor and the co-author + self.assertEqual(len(result), 2) + usernames = [c.username for c in result] + self.assertIn("user", usernames) + self.assertIn("coauthor", usernames) + + if __name__ == "__main__": unittest.main() diff --git a/test_env.py b/test_env.py index 638e6e5..d4e5d1a 100644 --- a/test_env.py +++ b/test_env.py @@ -65,6 +65,7 @@ def test_get_env_vars(self): end_date, sponsor_info, link_to_profile, + acknowledge_coauthors, ) = env.get_env_vars() self.assertEqual(organization, "org") @@ -79,6 +80,7 @@ def test_get_env_vars(self): self.assertEqual(end_date, "2022-12-31") self.assertFalse(sponsor_info) self.assertTrue(link_to_profile) + self.assertTrue(acknowledge_coauthors) @patch.dict( os.environ, @@ -175,6 +177,7 @@ def test_get_env_vars_no_dates(self): end_date, sponsor_info, link_to_profile, + acknowledge_coauthors, ) = env.get_env_vars() self.assertEqual(organization, "org") @@ -189,6 +192,7 @@ def test_get_env_vars_no_dates(self): self.assertEqual(end_date, "") self.assertFalse(sponsor_info) self.assertTrue(link_to_profile) + self.assertTrue(acknowledge_coauthors) @patch.dict(os.environ, {}) def test_get_env_vars_missing_org_or_repo(self):