Skip to content

Commit 7e3b2f6

Browse files
WIP python implementation: special email cases, filling missing name, email and caching
Signed-off-by: Lukasz Gryglicki <[email protected]>
1 parent 8eb11da commit 7e3b2f6

File tree

2 files changed

+133
-71
lines changed

2 files changed

+133
-71
lines changed

cla-backend-go/github/github_repository.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ func GetCoAuthorCommits(
413413

414414
log.WithFields(f).Debugf("Getting co-author details: %+v", coAuthor)
415415

416-
// Check for email in "[email protected]" format:
416+
// 1. Check for email in "[email protected]" format:
417417
if matches := NoreplyIDPattern.FindStringSubmatch(email); matches != nil {
418418
idStr, loginStr := matches[1], matches[2]
419419
if githubID, err = strconv.ParseInt(idStr, 10, 64); err == nil {
@@ -426,7 +426,7 @@ func GetCoAuthorCommits(
426426
}
427427
}
428428

429-
// Check for email in "[email protected]" format:
429+
// 2. Check for email in "[email protected]" format:
430430
if user == nil {
431431
if matches := NoreplyUserPattern.FindStringSubmatch(email); matches != nil {
432432
loginStr := matches[1]
@@ -439,7 +439,7 @@ func GetCoAuthorCommits(
439439
}
440440
}
441441

442-
// Try to find user by email
442+
// 3. Try to find user by email
443443
if user == nil {
444444
user, err = SearchGithubUserByEmail(ctx, client, email)
445445
if err != nil {
@@ -448,7 +448,7 @@ func GetCoAuthorCommits(
448448
}
449449
}
450450

451-
// Last resort - try to find by name=login
451+
// 4. Last resort - try to find by name=login
452452
if user == nil {
453453
// Note that Co-authored-by: name <email> is not actually a GitHub login but rather a name - but we are trying hard to find a GitHub profile
454454
user, err = GetGithubUserByLogin(ctx, client, name)

cla-backend/cla/models/github_models.py

Lines changed: 129 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434

3535
# some emails we want to exclude when we register the users
3636
EXCLUDE_GITHUB_EMAILS = ["noreply.github.com"]
37+
NOREPLY_ID_PATTERN = re.compile(r"^(\d+)\+([a-zA-Z0-9-]+)@users\.noreply\.github\.com$")
38+
NOREPLY_USER_PATTERN = re.compile(r"^([a-zA-Z0-9-]+)@users\.noreply\.github\.com$")
3739

3840

3941
class GitHub(repository_service_interface.RepositoryService):
@@ -724,7 +726,8 @@ def update_merge_group(self, installation_id, github_repository_id, merge_group_
724726
f"{fn} - PR: {pull_request.number}, Failed to update change request "
725727
f"of repository {github_repository_id} - returning"
726728
)
727-
return
729+
# XXX
730+
# return
728731

729732
project_id = repository.get_repository_project_id()
730733
project = get_project_instance()
@@ -865,7 +868,8 @@ def update_change_request(self, installation_id, github_repository_id, change_re
865868
f"{fn} - PR: {pull_request.number}, Failed to update change request "
866869
f"of repository {github_repository_id} - returning"
867870
)
868-
return
871+
# XXX
872+
# return
869873

870874
# Retrieve project ID from the repository.
871875
project_id = repository.get_repository_project_id()
@@ -1220,6 +1224,28 @@ def get_github_user_by_login(self, login, installation_id):
12201224
cla.log.error("Invalid GitHub credentials provided: %s", str(err))
12211225
return None
12221226

1227+
def get_github_user_by_id(self, github_id, installation_id):
1228+
"""
1229+
Helper method to get the GitHub user object from GitHub by their numeric ID.
1230+
1231+
:param github_id: The numeric GitHub user ID.
1232+
:type github_id: int
1233+
:param installation_id: The ID of the GitHub app installation for this repo.
1234+
:type installation_id: int | None
1235+
"""
1236+
cla.log.debug("Getting GitHub user by ID: %s", github_id)
1237+
if self.client is None:
1238+
self.client = get_github_integration_client(installation_id)
1239+
try:
1240+
user = self.client.get_user_by_id(github_id)
1241+
return user
1242+
except UnknownObjectException:
1243+
cla.log.error("Could not find GitHub user with ID: %s", github_id)
1244+
return None
1245+
except BadCredentialsException as err:
1246+
cla.log.error("Invalid GitHub credentials provided: %s", str(err))
1247+
return None
1248+
12231249

12241250
def get_or_create_user(self, request):
12251251
"""
@@ -1694,61 +1720,53 @@ def get_author_summary(commit, pr, installation_id) -> List[UserCommitSummary]:
16941720
"""
16951721
fn = "cla.models.github_models.get_author_summary"
16961722
commit_authors = []
1697-
if commit.author:
1723+
1724+
# get id, login, name, email from commit.author and commit.commit.author
1725+
id, login, name, email = None, None, None, None
1726+
try:
1727+
id = commit.author.id
1728+
except (AttributeError, GithubException, IncompletableObject):
1729+
pass
1730+
1731+
try:
1732+
login = commit.author.login
1733+
except (AttributeError, GithubException, IncompletableObject):
1734+
pass
1735+
1736+
try:
1737+
name = commit.author.name
1738+
if name is None or name.strip() == "":
1739+
name = commit.commit.author.name
1740+
except (AttributeError, GithubException, IncompletableObject):
16981741
try:
1699-
commit_author_summary = UserCommitSummary(
1700-
commit.sha,
1701-
commit.author.id,
1702-
commit.author.login,
1703-
commit.author.name,
1704-
commit.author.email,
1705-
False,
1706-
False, # default not authorized - will be evaluated and updated later
1707-
)
1708-
cla.log.debug(f"{fn} - PR: {pr}, {commit_author_summary}")
1709-
# check for co-author details
1710-
# issue # 3884
1711-
commit_authors.append(commit_author_summary)
1712-
expand_with_co_authors(commit, pr, installation_id, commit_authors)
1713-
return commit_authors
1714-
except (GithubException, IncompletableObject) as exc:
1715-
cla.log.warning(f"{fn} - PR: {pr}, unable to get commit author summary: {exc}")
1716-
try:
1717-
# commit.commit.author is a github.GitAuthor.GitAuthor object type - object
1718-
# only has date, name and email attributes - no ID attribute/value
1719-
# https://pygithub.readthedocs.io/en/latest/github_objects/GitAuthor.html
1720-
commit_author_summary = UserCommitSummary(
1721-
commit.sha,
1722-
None,
1723-
None,
1724-
commit.commit.author.name,
1725-
commit.commit.author.email,
1726-
False,
1727-
False, # default not authorized - will be evaluated and updated later
1728-
)
1729-
cla.log.debug(f"{fn} - github.GitAuthor.GitAuthor object: {commit.commit.author}")
1730-
cla.log.debug(
1731-
f"{fn} - PR: {pr}, "
1732-
f"GitHub NamedUser author NOT found for commit SHA {commit_author_summary} "
1733-
f"however, we did find GitAuthor info"
1734-
)
1735-
cla.log.debug(f"{fn} - PR: {pr}, {commit_author_summary}")
1736-
commit_authors.append(commit_author_summary)
1737-
expand_with_co_authors(commit, pr, installation_id, commit_authors)
1738-
return commit_authors
1739-
except (GithubException, IncompletableObject) as exc:
1740-
cla.log.warning(f"{fn} - PR: {pr}, unable to get commit author summary: {exc}")
1741-
commit_author_summary = UserCommitSummary(commit.sha, None, None, None, None, False, False)
1742-
cla.log.warning(f"{fn} - PR: {pr}, " f"could not find any commit author for SHA {commit_author_summary}")
1743-
commit_authors.append(commit_author_summary)
1744-
expand_with_co_authors(commit, pr, installation_id, commit_authors)
1745-
return commit_authors
1746-
else:
1747-
cla.log.warning(f"{fn} - PR: {pr}, " f"could not find any commit author for SHA {commit.sha}")
1748-
commit_author_summary = UserCommitSummary(commit.sha, None, None, None, None, False, False)
1749-
commit_authors.append(commit_author_summary)
1750-
expand_with_co_authors(commit, pr, installation_id, commit_authors)
1751-
return commit_authors
1742+
name = commit.commit.author.name
1743+
except (AttributeError, GithubException, IncompletableObject):
1744+
pass
1745+
1746+
try:
1747+
email = commit.author.email
1748+
if email is None or email.strip() == "":
1749+
email = commit.commit.author.email
1750+
except (AttributeError, GithubException, IncompletableObject):
1751+
try:
1752+
email = commit.commit.author.email
1753+
except (AttributeError, GithubException, IncompletableObject):
1754+
pass
1755+
1756+
cla.log.debug(f"{fn}: (id: {id}, login: {login}, name: {name}, email: {email})")
1757+
commit_author_summary = UserCommitSummary(
1758+
commit.sha,
1759+
id,
1760+
login,
1761+
name,
1762+
email,
1763+
False,
1764+
False, # default not authorized - will be evaluated and updated later
1765+
)
1766+
cla.log.debug(f"{fn} - PR: {pr}, {commit_author_summary}")
1767+
commit_authors.append(commit_author_summary)
1768+
expand_with_co_authors(commit, pr, installation_id, commit_authors)
1769+
return commit_authors
17521770

17531771

17541772
def get_pull_request_commit_authors(pull_request, installation_id) -> List[UserCommitSummary]:
@@ -1795,19 +1813,49 @@ def get_co_author_commits(co_author, commit, pr, installation_id):
17951813
# check if co-author is a github user
17961814
co_author_summary = None
17971815
login, github_id = None, None
1798-
email = co_author[1]
1799-
name = co_author[0]
1816+
email = co_author[1].strip()
1817+
name = co_author[0].strip()
18001818

18011819
# get repository service
18021820
github = cla.utils.get_repository_service("github")
1821+
user = None
18031822

18041823
cla.log.debug(f"{fn} - getting co-author details: {co_author}, email: {email}, name: {name}")
1805-
try:
1806-
user = github.get_github_user_by_email(email, installation_id)
1807-
except (GithubException, IncompletableObject, RateLimitExceededException) as ex:
1808-
# user not found
1809-
cla.log.debug(f"{fn} - co-author github user not found via email {email}: {co_author} with exception: {ex}")
1810-
user = None
1824+
1825+
# 1. Check for "[email protected]"
1826+
m = NOREPLY_ID_PATTERN.match(email)
1827+
if m:
1828+
id_str, login_str = m.groups()
1829+
try:
1830+
github_id = int(id_str)
1831+
cla.log.debug(f"{fn} - Detected noreply GitHub email with ID: {id_str}, login: {login_str}")
1832+
user = github.get_github_user_by_id(github_id, installation_id)
1833+
except Exception as ex:
1834+
cla.log.warning(f"{fn} - Error fetching user by ID {id_str}: {ex}")
1835+
user = None
1836+
1837+
# 2. Check for "[email protected]"
1838+
if user is None:
1839+
m = NOREPLY_USER_PATTERN.match(email)
1840+
if m:
1841+
login_str = m.group(1)
1842+
try:
1843+
cla.log.debug(f"{fn} - Detected noreply GitHub email with login: {login_str}")
1844+
user = github.get_github_user_by_login(login_str, installation_id)
1845+
except Exception as ex:
1846+
cla.log.warning(f"{fn} - Error fetching user by login {login_str}: {ex}")
1847+
user = None
1848+
1849+
# 3. Try to find user by email
1850+
if user is None:
1851+
try:
1852+
user = github.get_github_user_by_email(email, installation_id)
1853+
except (GithubException, IncompletableObject, RateLimitExceededException) as ex:
1854+
# user not found
1855+
cla.log.debug(f"{fn} - co-author github user not found via email {email}: {co_author} with exception: {ex}")
1856+
user = None
1857+
1858+
# 4. Last resort: try to find by name (login)
18111859
if user is None:
18121860
try:
18131861
# Note that Co-authored-by: name <email> is not actually a GitHub login but rather a name - but we are trying hard to find a GitHub profile
@@ -1822,13 +1870,27 @@ def get_co_author_commits(co_author, commit, pr, installation_id):
18221870
if user:
18231871
login = user.login
18241872
github_id = user.id
1825-
cla.log.debug(f"{fn} - co-author github user details found : {co_author}, user: {user}, login: {login}, id: {github_id}")
1873+
final_name = name
1874+
final_email = email
1875+
try:
1876+
n = user.name
1877+
if n and n.strip():
1878+
final_name = n
1879+
except (AttributeError, GithubException, IncompletableObject):
1880+
pass
1881+
try:
1882+
e = user.email
1883+
if e and e.strip():
1884+
final_email = e
1885+
except (AttributeError, GithubException, IncompletableObject):
1886+
pass
1887+
cla.log.debug(f"{fn} - co-author github user details found: {co_author}, user: {user}, login: {login}, id: {github_id}, name: {final_name}, email: {final_email}")
18261888
co_author_summary = UserCommitSummary(
18271889
commit.sha,
18281890
github_id,
18291891
login,
1830-
name,
1831-
email,
1892+
final_name,
1893+
final_email,
18321894
False,
18331895
False, # default not authorized - will be evaluated and updated later
18341896
)

0 commit comments

Comments
 (0)