Skip to content

Commit 12de08d

Browse files
Merge pull request #4768 from linuxfoundation/unicron-co-authors-algorithm-updates
Updates to co-author search algorithm
2 parents 09bd1c9 + e95ee87 commit 12de08d

File tree

16 files changed

+480
-78
lines changed

16 files changed

+480
-78
lines changed

CO_AUTHORS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ If it does, the backend will process the co-authors as follows, assume trailer v
155155

156156
- Second we check if email is in format `[email protected]`. If it is we use username part as GitHub username/login and fetch the user from GitHub API. If the user is found, we use that user as co-author.
157157

158-
- Third we lookup for email using GitHub API. If the user is found, we use that user as co-author.
158+
- Third we lookup for email using GitHub API. If the user is found, we use that user as co-author. If not we lookup for this user in EasyCLA database by email (first by primary/LF email and then by other emails). If found, we use that user as co-author.
159159

160160
- Finally we use the name part for `name <email>`. If the name matches the GitHub username pattern (alphanumeric characters or hyphens, must be 3–39 characters long, cannot start or end with a hyphen, and cannot contain consecutive hyphens), then we lookup using the GitHub API assuming that this name is a GitHub username/login (this is the case for some bots). If the user is found, we use that user as co-author.
161161

cla-backend-go/github/github_repository.go

Lines changed: 121 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"time"
1616

1717
log "github.com/linuxfoundation/easycla/cla-backend-go/logging"
18+
"github.com/linuxfoundation/easycla/cla-backend-go/users"
1819
"github.com/linuxfoundation/easycla/cla-backend-go/utils"
1920
"github.com/sirupsen/logrus"
2021

@@ -30,6 +31,31 @@ var (
3031
GithubUsernameRegex = regexp.MustCompile(`^[A-Za-z0-9-]{3,39}$`)
3132
)
3233

34+
// Note: we use | and ||| as placeholders for inline and fenced code, then swap to backticks at render time.
35+
const MissingCoAuthorsMessage = `
36+
37+
One or more co-authors of this pull request were not found. You must specify co-authors in commit message trailer via:
38+
39+
|||
40+
Co-authored-by: name <email>
41+
|||
42+
43+
Supported |Co-authored-by:| formats include:
44+
45+
1) |Anything <[email protected]>| - it will locate your GitHub user by |id| part.
46+
2) |Anything <[email protected]>| - it will locate your GitHub user by |login| part.
47+
3) |Anything <public-email>| - it will locate your GitHub user by |public-email| part. Note that this email must be made public on Github.
48+
4) |Anything <other-email>| - it will locate your GitHub user by |other-email| part but only if that email was used before for any other CLA as a main commit author.
49+
5) |login <any-valid-email>| - it will locate your GitHub user by |login| part, note that |login| part must be at least 3 characters long.
50+
51+
Please update your commit message(s) by doing |git commit --amend| and then |git push [--force]| and then request re-running CLA check via commenting on this pull request:
52+
53+
|||
54+
/easycla
55+
|||
56+
57+
`
58+
3359
const (
3460
help = "https://help.github.com/en/github/committing-changes-to-your-project/why-are-my-commits-linked-to-the-wrong-user"
3561
unknown = "Unknown"
@@ -327,13 +353,15 @@ func GetCoAuthorsFromCommit(
327353
commitMessage := commit.GetCommit().GetMessage()
328354
// log.WithFields(f).Debugf("commit message: %s", commitMessage)
329355

330-
re := regexp.MustCompile(`(?i)co-authored-by: (.*) <(.*)>`)
356+
re := regexp.MustCompile(`(?i)co-authored-by:\s*(.+?)\s*<([^<>]+)>`)
331357
matches := re.FindAllStringSubmatch(commitMessage, -1)
332358
for _, match := range matches {
333359
name := strings.TrimSpace(match[1])
334-
email := strings.TrimSpace(match[2])
335-
coAuthors = append(coAuthors, [2]string{name, email})
336-
log.WithFields(f).Debugf("found co-author: name: %s, email: %s", name, email)
360+
email := strings.ToLower(strings.TrimSpace(match[2]))
361+
if name != "" && email != "" {
362+
coAuthors = append(coAuthors, [2]string{name, email})
363+
log.WithFields(f).Debugf("found co-author: name: %s, email: %s", name, email)
364+
}
337365
}
338366
}
339367
return coAuthors
@@ -343,21 +371,27 @@ func GetCoAuthorsFromCommit(
343371
func ExpandWithCoAuthors(
344372
ctx context.Context,
345373
client *github.Client,
374+
usersService users.Service,
346375
commit *github.RepositoryCommit,
347376
pr int,
348377
installationID int64,
349378
commitAuthors *[]*UserCommitSummary,
350-
) {
379+
) bool {
351380
f := logrus.Fields{
352381
"functionName": "github.github_repository.ExpandWithCoAuthors",
353382
"pr": pr,
354383
}
355384
coAuthors := GetCoAuthorsFromCommit(ctx, commit)
356385
log.WithFields(f).Debugf("co-authors found: %s", coAuthors)
386+
missing := false
357387
for _, coAuthor := range coAuthors {
358-
summary := GetCoAuthorCommits(ctx, client, coAuthor, commit, pr, installationID)
388+
summary, found := GetCoAuthorCommits(ctx, client, usersService, coAuthor, commit, pr, installationID)
359389
*commitAuthors = append(*commitAuthors, summary)
390+
if !missing && !found {
391+
missing = true
392+
}
360393
}
394+
return missing
361395
}
362396

363397
// IsValidGitHubUsername checks if the provided username is a valid GitHub username.
@@ -374,14 +408,16 @@ func IsValidGitHubUsername(username string) bool {
374408
return true
375409
}
376410

411+
//nolint:gocyclo // complexity is acceptable for now
377412
func GetCoAuthorCommits(
378413
ctx context.Context,
379414
client *github.Client,
415+
usersService users.Service,
380416
coAuthor [2]string,
381417
commit *github.RepositoryCommit,
382418
pr int,
383419
installationID int64,
384-
) *UserCommitSummary {
420+
) (*UserCommitSummary, bool) {
385421
f := logrus.Fields{
386422
"functionName": "github.github_repository.GetCoAuthorCommits",
387423
"pr": pr,
@@ -398,9 +434,12 @@ func GetCoAuthorCommits(
398434
)
399435
name = strings.TrimSpace(coAuthor[0])
400436
email = strings.TrimSpace(coAuthor[1])
437+
lName := strings.ToLower(name)
401438

402-
if cachedUser, ok := GithubUserCache.Get([2]string{name, email}); ok {
439+
cacheKey := [2]string{lName, email}
440+
if cachedUser, ok := GithubUserCache.Get(cacheKey); ok {
403441
log.WithFields(f).Debugf("GitHub user found in cache for name/email: %s/%s: %+v", name, email, cachedUser)
442+
found := false
404443
var summary *UserCommitSummary
405444
if cachedUser != nil {
406445
summary = &UserCommitSummary{
@@ -409,6 +448,7 @@ func GetCoAuthorCommits(
409448
Affiliated: false,
410449
Authorized: false,
411450
}
451+
found = cachedUser.ID != nil
412452
} else {
413453
summary = &UserCommitSummary{
414454
SHA: utils.StringValue(commit.SHA),
@@ -423,7 +463,7 @@ func GetCoAuthorCommits(
423463
}
424464
}
425465
log.WithFields(f).Debugf("PR: %d, %+v (from cache)", pr, summary)
426-
return summary
466+
return summary, found
427467
}
428468

429469
log.WithFields(f).Debugf("Getting co-author details: %+v", coAuthor)
@@ -454,19 +494,63 @@ func GetCoAuthorCommits(
454494
}
455495
}
456496

457-
// 3. Try to find user by email
497+
// 3. Try to find user by email via GitHub APIs
458498
if user == nil {
459499
user, err = SearchGithubUserByEmail(ctx, client, email)
460500
if err != nil {
461-
log.WithFields(f).Debugf("Co-author GitHub user not found via email %s: %v (error: %v)", email, coAuthor, err)
501+
log.WithFields(f).Debugf("Co-author GitHub user not found via github email %s: %v (error: %v)", email, coAuthor, err)
462502
user = nil
463503
}
464504
}
465505

506+
// 3b. Try to find user by email in our database
507+
if user == nil {
508+
var githubID string
509+
dbUsers, err2 := usersService.GetUsersByLFEmail(email)
510+
if err2 == nil {
511+
for _, dbUser := range dbUsers {
512+
if dbUser.GithubID != "" {
513+
githubID = dbUser.GithubID
514+
// log.WithFields(f).Debugf("FOUND githubID.1 = %s", githubID)
515+
break
516+
}
517+
}
518+
} else {
519+
log.WithFields(f).Debugf("Co-author GitHub user not found via lf email %s: %v (error: %v)", email, coAuthor, err2)
520+
}
521+
if githubID == "" {
522+
dbUsers, err2 := usersService.GetUsersByEmail(email)
523+
if err2 == nil {
524+
for _, dbUser := range dbUsers {
525+
if dbUser.GithubID != "" {
526+
githubID = dbUser.GithubID
527+
// log.WithFields(f).Debugf("FOUND githubID.2 = %s", githubID)
528+
break
529+
}
530+
}
531+
} else {
532+
log.WithFields(f).Debugf("Co-author GitHub user not found via emails %s: %v (error: %v)", email, coAuthor, err2)
533+
}
534+
}
535+
if githubID != "" {
536+
githubIDInt, err2 := strconv.ParseInt(githubID, 10, 64)
537+
if err2 != nil {
538+
log.WithFields(f).Debugf("Co-author GitHub user not found via lf email %s, wrong GitHub ID: %s: %v (error: %v)", email, githubID, coAuthor, err2)
539+
} else {
540+
user, err = GetGithubUserByID(ctx, client, githubIDInt)
541+
if err != nil {
542+
log.WithFields(f).Debugf("Error fetching user by ID %d: %v", githubIDInt, err)
543+
user = nil
544+
}
545+
// log.WithFields(f).Debugf("FOUND user = (%s, %d, %s, %s)", *user.Login, *user.ID, *user.Name, *user.Email)
546+
}
547+
}
548+
}
549+
466550
// 4. Last resort - try to find by name=login
467-
if user == nil && IsValidGitHubUsername(name) {
551+
if user == nil && IsValidGitHubUsername(lName) {
468552
// Note that Co-authored-by: name <email> is not actually a GitHub login but rather a name - but we are trying hard to find a GitHub profile
469-
user, err = GetGithubUserByLogin(ctx, client, name)
553+
user, err = GetGithubUserByLogin(ctx, client, lName)
470554
if err != nil {
471555
log.WithFields(f).Debugf("Co-author GitHub user not found via name=login=%s: %v (error: %v)", name, coAuthor, err)
472556
user = nil
@@ -476,12 +560,14 @@ func GetCoAuthorCommits(
476560
log.WithFields(f).Debugf("Co-author: %v, user: %+v", coAuthor, user)
477561

478562
var summary *UserCommitSummary
563+
found := false
479564
if user != nil {
480565
if user.Login != nil {
481566
login = *user.Login
482567
}
483568
if user.ID != nil {
484569
githubID = *user.ID
570+
found = true
485571
}
486572
if user.Name == nil || (user.Name != nil && strings.TrimSpace(*user.Name) == "") {
487573
user.Name = &name
@@ -512,11 +598,11 @@ func GetCoAuthorCommits(
512598
log.WithFields(f).Debugf("Co-author GitHub user details not found: %v", coAuthor)
513599
}
514600

515-
GithubUserCache.Set([2]string{name, email}, user)
516-
return summary
601+
GithubUserCache.Set(cacheKey, user)
602+
return summary, found
517603
}
518604

519-
func GetPullRequestCommitAuthors(ctx context.Context, installationID int64, pullRequestID int, owner, repo string, withCoAuthors bool) ([]*UserCommitSummary, *string, error) {
605+
func GetPullRequestCommitAuthors(ctx context.Context, usersService users.Service, installationID int64, pullRequestID int, owner, repo string, withCoAuthors bool) ([]*UserCommitSummary, *string, bool, error) {
520606
f := logrus.Fields{
521607
"functionName": "github.github_repository.GetPullRequestCommitAuthors",
522608
"pullRequestID": pullRequestID,
@@ -527,21 +613,22 @@ func GetPullRequestCommitAuthors(ctx context.Context, installationID int64, pull
527613
client, err := NewGithubAppClient(installationID)
528614
if err != nil {
529615
log.WithFields(f).WithError(err).Warn("unable to create Github client")
530-
return nil, nil, err
616+
return nil, nil, false, err
531617
}
532618

533619
commits, resp, comErr := client.PullRequests.ListCommits(ctx, owner, repo, pullRequestID, &github.ListOptions{})
534620
if comErr != nil {
535621
log.WithFields(f).WithError(comErr).Warnf("problem listing commits for repo: %s/%s pull request: %d", owner, repo, pullRequestID)
536-
return nil, nil, comErr
622+
return nil, nil, false, comErr
537623
}
538624
if resp.StatusCode != http.StatusOK {
539625
msg := fmt.Sprintf("unexpected status code: %d - expected: %d", resp.StatusCode, http.StatusOK)
540626
log.WithFields(f).Warn(msg)
541-
return nil, nil, errors.New(msg)
627+
return nil, nil, false, errors.New(msg)
542628
}
543629

544630
log.WithFields(f).Debugf("found %d commits for pull request: %d", len(commits), pullRequestID)
631+
anyMissing := false
545632
for _, commit := range commits {
546633
log.WithFields(f).Debugf("loaded commit: %+v", commit)
547634
commitAuthor := ""
@@ -571,7 +658,10 @@ func GetPullRequestCommitAuthors(ctx context.Context, installationID int64, pull
571658
Authorized: false,
572659
})
573660
if withCoAuthors {
574-
ExpandWithCoAuthors(ctx, client, commit, pullRequestID, installationID, &userCommitSummary)
661+
missing := ExpandWithCoAuthors(ctx, client, usersService, commit, pullRequestID, installationID, &userCommitSummary)
662+
if !anyMissing && missing {
663+
anyMissing = true
664+
}
575665
}
576666
}
577667

@@ -584,10 +674,10 @@ func GetPullRequestCommitAuthors(ctx context.Context, installationID int64, pull
584674
// }
585675
// log.WithFields(f).Debugf("user commit summary: %+v", *summary)
586676
//}
587-
return userCommitSummary, latestCommitSHA, nil
677+
return userCommitSummary, latestCommitSHA, anyMissing, nil
588678
}
589679

590-
func UpdatePullRequest(ctx context.Context, installationID int64, pullRequestID int, owner, repo string, repoID *int64, latestSHA string, signed []*UserCommitSummary, missing []*UserCommitSummary, CLABaseAPIURL, CLALandingPage, CLALogoURL string) error {
680+
func UpdatePullRequest(ctx context.Context, installationID int64, pullRequestID int, owner, repo string, repoID *int64, latestSHA string, signed []*UserCommitSummary, missing []*UserCommitSummary, anyMissing bool, CLABaseAPIURL, CLALandingPage, CLALogoURL string) error {
591681
f := logrus.Fields{
592682
"functionName": "github.github_repository.UpdatePullRequest",
593683
"installationID": installationID,
@@ -618,7 +708,7 @@ func UpdatePullRequest(ctx context.Context, installationID int64, pullRequestID
618708
return failedErr
619709
}
620710

621-
body := assembleCLAComment(ctx, int(installationID), pullRequestID, repoID, signed, missing, CLABaseAPIURL, CLALogoURL, CLALandingPage)
711+
body := assembleCLAComment(ctx, int(installationID), pullRequestID, repoID, signed, missing, anyMissing, CLABaseAPIURL, CLALogoURL, CLALandingPage)
622712

623713
if len(missing) == 0 {
624714
// All contributors are passing
@@ -649,7 +739,7 @@ func UpdatePullRequest(ctx context.Context, installationID int64, pullRequestID
649739
// If we have previously succeeded, then we also need to update the comment (pass => fail)
650740
log.WithFields(f).Debugf("Found previously succeeeded checks - updating the CLA comment in the PR : %d", pullRequestID)
651741
// Generate a new comment with all the failed CLA info
652-
failedComment := assembleCLAComment(ctx, int(installationID), pullRequestID, repoID, signed, missing, CLABaseAPIURL, CLALogoURL, CLALandingPage)
742+
failedComment := assembleCLAComment(ctx, int(installationID), pullRequestID, repoID, signed, missing, anyMissing, CLABaseAPIURL, CLALogoURL, CLALandingPage)
653743
previousSucceededComment.Body = &failedComment
654744
_, _, err = client.Issues.EditComment(ctx, owner, repo, *previousSucceededComment.ID, previousSucceededComment)
655745
if err != nil {
@@ -766,7 +856,7 @@ func assembleCLAStatus(authorName string, signed bool) (string, string) {
766856
return authorName, "Missing CLA Authorization."
767857
}
768858

769-
func assembleCLAComment(ctx context.Context, installationID, pullRequestID int, repositoryID *int64, signed, missing []*UserCommitSummary, apiBaseURL, CLALogoURL, CLALandingPage string) string {
859+
func assembleCLAComment(ctx context.Context, installationID, pullRequestID int, repositoryID *int64, signed, missing []*UserCommitSummary, anyMissing bool, apiBaseURL, CLALogoURL, CLALandingPage string) string {
770860
f := logrus.Fields{
771861
"functionName": "github.github_repository.assembleCLAComment",
772862
utils.XREQUESTID: ctx.Value(utils.XREQUESTID),
@@ -786,13 +876,13 @@ func assembleCLAComment(ctx context.Context, installationID, pullRequestID int,
786876

787877
log.WithFields(f).Debug("Building CLAComment body ")
788878
signURL := getFullSignURL(repositoryType, strconv.Itoa(installationID), strconv.Itoa(int(*repositoryID)), strconv.Itoa(pullRequestID), apiBaseURL)
789-
commentBody := getCommentBody(repositoryType, signURL, signed, missing)
879+
commentBody := getCommentBody(repositoryType, signURL, signed, missing, anyMissing)
790880
allSigned := len(missing) == 0
791881
badge := getCommentBadge(allSigned, signURL, missingID, false, CLALandingPage, CLALogoURL)
792882
return fmt.Sprintf("%s<br >%s", badge, commentBody)
793883
}
794884

795-
func getCommentBody(repositoryType, signURL string, signed, missing []*UserCommitSummary) string {
885+
func getCommentBody(repositoryType, signURL string, signed, missing []*UserCommitSummary, anyMissing bool) string {
796886
f := logrus.Fields{
797887
"functionName": "github.github_repository:getCommentBody",
798888
"repositoryType": repositoryType,
@@ -864,6 +954,10 @@ func getCommentBody(repositoryType, signURL string, signed, missing []*UserCommi
864954
text = "<br>The committers listed above are authorized under a signed CLA."
865955
}
866956

957+
if anyMissing {
958+
committersComment.WriteString(strings.ReplaceAll(MissingCoAuthorsMessage, "|", "`"))
959+
log.WithFields(f).Debug("some co-authors are missing for this PR, added the missing co-author message")
960+
}
867961
return fmt.Sprintf("%s%s", committersComment.String(), text)
868962
}
869963

cla-backend-go/signatures/service.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,7 @@ func (s service) updateChangeRequest(ctx context.Context, ghOrg *models.GithubOr
10231023
// Fetch committers
10241024
withCoAuthors := github.IsCoAuthorsEnabledForRepo(ghOrg.EnableCoAuthors, gitHubRepoName)
10251025
log.WithFields(f).Debugf("fetching commit authors for PR: %d using repository owner: %s, repo: %s", pullRequestID, gitHubOrgName, gitHubRepoName)
1026-
authors, latestSHA, authorsErr := github.GetPullRequestCommitAuthors(ctx, ghOrg.OrganizationInstallationID, int(pullRequestID), gitHubOrgName, gitHubRepoName, withCoAuthors)
1026+
authors, latestSHA, anyMissing, authorsErr := github.GetPullRequestCommitAuthors(ctx, s.usersService, ghOrg.OrganizationInstallationID, int(pullRequestID), gitHubOrgName, gitHubRepoName, withCoAuthors)
10271027
if authorsErr != nil {
10281028
log.WithFields(f).WithError(authorsErr).Warnf("unable to get commit authors for %s/%s for PR: %d", gitHubOrgName, gitHubRepoName, pullRequestID)
10291029
return authorsErr
@@ -1124,7 +1124,7 @@ func (s service) updateChangeRequest(ctx context.Context, ghOrg *models.GithubOr
11241124
log.WithFields(f).Debugf("commit authors status after allowlisting bots => signed: %+v, missing: %+v, allowlisted: %+v", signed, unsigned, allowlisted)
11251125

11261126
// update pull request
1127-
updateErr := github.UpdatePullRequest(ctx, ghOrg.OrganizationInstallationID, int(pullRequestID), gitHubOrgName, gitHubRepoName, githubRepository.ID, *latestSHA, signed, unsigned, s.claBaseAPIURL, s.claLandingPage, s.claLogoURL)
1127+
updateErr := github.UpdatePullRequest(ctx, ghOrg.OrganizationInstallationID, int(pullRequestID), gitHubOrgName, gitHubRepoName, githubRepository.ID, *latestSHA, signed, unsigned, anyMissing, s.claBaseAPIURL, s.claLandingPage, s.claLogoURL)
11281128
if updateErr != nil {
11291129
log.WithFields(f).Debugf("unable to update PR: %d", pullRequestID)
11301130
return updateErr

cla-backend-go/users/mocks/mock_repo.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)