Skip to content

Commit c7748a5

Browse files
Update co-authors searching algorithm - support lookup easyCLA DynamoDB database by emails
Signed-off-by: Lukasz Gryglicki <[email protected]> Assisted by [OpenAI](https://platform.openai.com/) Assisted by [GitHub Copilot](https://github.com/features/copilot)
1 parent 91bbdc6 commit c7748a5

File tree

13 files changed

+329
-36
lines changed

13 files changed

+329
-36
lines changed

CO_AUTHORS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ If it does, the backend will process the co-authors as follows, assume trailer v
155155

156156
- Second we check if email is in format `[email protected]`. If it is we use username part as GitHub username/login and fetch the user from GitHub API. If the user is found, we use that user as co-author.
157157

158-
- Third we lookup for email using GitHub API. If the user is found, we use that user as co-author.
158+
- Third we lookup for email using GitHub API. If the user is found, we use that user as co-author. If not we lookup for this user in EasyCLA database by email (first by primary/LF email and then by other emails). If found, we use that user as co-author.
159159

160160
- Finally we use the name part for `name <email>`. If the name matches the GitHub username pattern (alphanumeric characters or hyphens, must be 3–39 characters long, cannot start or end with a hyphen, and cannot contain consecutive hyphens), then we lookup using the GitHub API assuming that this name is a GitHub username/login (this is the case for some bots). If the user is found, we use that user as co-author.
161161

cla-backend-go/github/github_repository.go

Lines changed: 65 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"time"
1616

1717
log "github.com/linuxfoundation/easycla/cla-backend-go/logging"
18+
"github.com/linuxfoundation/easycla/cla-backend-go/users"
1819
"github.com/linuxfoundation/easycla/cla-backend-go/utils"
1920
"github.com/sirupsen/logrus"
2021

@@ -327,13 +328,15 @@ func GetCoAuthorsFromCommit(
327328
commitMessage := commit.GetCommit().GetMessage()
328329
// log.WithFields(f).Debugf("commit message: %s", commitMessage)
329330

330-
re := regexp.MustCompile(`(?i)co-authored-by: (.*) <(.*)>`)
331+
re := regexp.MustCompile(`(?i)co-authored-by:\s*(.+?)\s*<([^<>]+)>`)
331332
matches := re.FindAllStringSubmatch(commitMessage, -1)
332333
for _, match := range matches {
333334
name := strings.TrimSpace(match[1])
334-
email := strings.TrimSpace(match[2])
335-
coAuthors = append(coAuthors, [2]string{name, email})
336-
log.WithFields(f).Debugf("found co-author: name: %s, email: %s", name, email)
335+
email := strings.ToLower(strings.TrimSpace(match[2]))
336+
if name != "" && email != "" {
337+
coAuthors = append(coAuthors, [2]string{name, email})
338+
log.WithFields(f).Debugf("found co-author: name: %s, email: %s", name, email)
339+
}
337340
}
338341
}
339342
return coAuthors
@@ -343,6 +346,7 @@ func GetCoAuthorsFromCommit(
343346
func ExpandWithCoAuthors(
344347
ctx context.Context,
345348
client *github.Client,
349+
usersService users.Service,
346350
commit *github.RepositoryCommit,
347351
pr int,
348352
installationID int64,
@@ -355,7 +359,7 @@ func ExpandWithCoAuthors(
355359
coAuthors := GetCoAuthorsFromCommit(ctx, commit)
356360
log.WithFields(f).Debugf("co-authors found: %s", coAuthors)
357361
for _, coAuthor := range coAuthors {
358-
summary := GetCoAuthorCommits(ctx, client, coAuthor, commit, pr, installationID)
362+
summary := GetCoAuthorCommits(ctx, client, usersService, coAuthor, commit, pr, installationID)
359363
*commitAuthors = append(*commitAuthors, summary)
360364
}
361365
}
@@ -374,9 +378,11 @@ func IsValidGitHubUsername(username string) bool {
374378
return true
375379
}
376380

381+
//nolint:gocyclo // complexity is acceptable for now
377382
func GetCoAuthorCommits(
378383
ctx context.Context,
379384
client *github.Client,
385+
usersService users.Service,
380386
coAuthor [2]string,
381387
commit *github.RepositoryCommit,
382388
pr int,
@@ -398,8 +404,10 @@ func GetCoAuthorCommits(
398404
)
399405
name = strings.TrimSpace(coAuthor[0])
400406
email = strings.TrimSpace(coAuthor[1])
407+
lName := strings.ToLower(name)
401408

402-
if cachedUser, ok := GithubUserCache.Get([2]string{name, email}); ok {
409+
cacheKey := [2]string{lName, email}
410+
if cachedUser, ok := GithubUserCache.Get(cacheKey); ok {
403411
log.WithFields(f).Debugf("GitHub user found in cache for name/email: %s/%s: %+v", name, email, cachedUser)
404412
var summary *UserCommitSummary
405413
if cachedUser != nil {
@@ -454,19 +462,63 @@ func GetCoAuthorCommits(
454462
}
455463
}
456464

457-
// 3. Try to find user by email
465+
// 3. Try to find user by email via GitHub APIs
458466
if user == nil {
459467
user, err = SearchGithubUserByEmail(ctx, client, email)
460468
if err != nil {
461-
log.WithFields(f).Debugf("Co-author GitHub user not found via email %s: %v (error: %v)", email, coAuthor, err)
469+
log.WithFields(f).Debugf("Co-author GitHub user not found via github email %s: %v (error: %v)", email, coAuthor, err)
462470
user = nil
463471
}
464472
}
465473

474+
// 3b. Try to find user by email in our database
475+
if user == nil {
476+
var githubID string
477+
dbUsers, err2 := usersService.GetUsersByLFEmail(email)
478+
if err2 == nil {
479+
for _, dbUser := range dbUsers {
480+
if dbUser.GithubID != "" {
481+
githubID = dbUser.GithubID
482+
// log.WithFields(f).Debugf("FOUND githubID.1 = %s", githubID)
483+
break
484+
}
485+
}
486+
} else {
487+
log.WithFields(f).Debugf("Co-author GitHub user not found via lf email %s: %v (error: %v)", email, coAuthor, err2)
488+
}
489+
if githubID == "" {
490+
dbUsers, err2 := usersService.GetUsersByEmail(email)
491+
if err2 == nil {
492+
for _, dbUser := range dbUsers {
493+
if dbUser.GithubID != "" {
494+
githubID = dbUser.GithubID
495+
// log.WithFields(f).Debugf("FOUND githubID.2 = %s", githubID)
496+
break
497+
}
498+
}
499+
} else {
500+
log.WithFields(f).Debugf("Co-author GitHub user not found via emails %s: %v (error: %v)", email, coAuthor, err2)
501+
}
502+
}
503+
if githubID != "" {
504+
githubIDInt, err2 := strconv.ParseInt(githubID, 10, 64)
505+
if err2 != nil {
506+
log.WithFields(f).Debugf("Co-author GitHub user not found via lf email %s, wrong GitHub ID: %s: %v (error: %v)", email, githubID, coAuthor, err2)
507+
} else {
508+
user, err = GetGithubUserByID(ctx, client, githubIDInt)
509+
if err != nil {
510+
log.WithFields(f).Debugf("Error fetching user by ID %d: %v", githubIDInt, err)
511+
user = nil
512+
}
513+
// log.WithFields(f).Debugf("FOUND user = (%s, %d, %s, %s)", *user.Login, *user.ID, *user.Name, *user.Email)
514+
}
515+
}
516+
}
517+
466518
// 4. Last resort - try to find by name=login
467-
if user == nil && IsValidGitHubUsername(name) {
519+
if user == nil && IsValidGitHubUsername(lName) {
468520
// Note that Co-authored-by: name <email> is not actually a GitHub login but rather a name - but we are trying hard to find a GitHub profile
469-
user, err = GetGithubUserByLogin(ctx, client, name)
521+
user, err = GetGithubUserByLogin(ctx, client, lName)
470522
if err != nil {
471523
log.WithFields(f).Debugf("Co-author GitHub user not found via name=login=%s: %v (error: %v)", name, coAuthor, err)
472524
user = nil
@@ -512,11 +564,11 @@ func GetCoAuthorCommits(
512564
log.WithFields(f).Debugf("Co-author GitHub user details not found: %v", coAuthor)
513565
}
514566

515-
GithubUserCache.Set([2]string{name, email}, user)
567+
GithubUserCache.Set(cacheKey, user)
516568
return summary
517569
}
518570

519-
func GetPullRequestCommitAuthors(ctx context.Context, installationID int64, pullRequestID int, owner, repo string, withCoAuthors bool) ([]*UserCommitSummary, *string, error) {
571+
func GetPullRequestCommitAuthors(ctx context.Context, usersService users.Service, installationID int64, pullRequestID int, owner, repo string, withCoAuthors bool) ([]*UserCommitSummary, *string, error) {
520572
f := logrus.Fields{
521573
"functionName": "github.github_repository.GetPullRequestCommitAuthors",
522574
"pullRequestID": pullRequestID,
@@ -571,7 +623,7 @@ func GetPullRequestCommitAuthors(ctx context.Context, installationID int64, pull
571623
Authorized: false,
572624
})
573625
if withCoAuthors {
574-
ExpandWithCoAuthors(ctx, client, commit, pullRequestID, installationID, &userCommitSummary)
626+
ExpandWithCoAuthors(ctx, client, usersService, commit, pullRequestID, installationID, &userCommitSummary)
575627
}
576628
}
577629

cla-backend-go/signatures/service.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,7 @@ func (s service) updateChangeRequest(ctx context.Context, ghOrg *models.GithubOr
10231023
// Fetch committers
10241024
withCoAuthors := github.IsCoAuthorsEnabledForRepo(ghOrg.EnableCoAuthors, gitHubRepoName)
10251025
log.WithFields(f).Debugf("fetching commit authors for PR: %d using repository owner: %s, repo: %s", pullRequestID, gitHubOrgName, gitHubRepoName)
1026-
authors, latestSHA, authorsErr := github.GetPullRequestCommitAuthors(ctx, ghOrg.OrganizationInstallationID, int(pullRequestID), gitHubOrgName, gitHubRepoName, withCoAuthors)
1026+
authors, latestSHA, authorsErr := github.GetPullRequestCommitAuthors(ctx, s.usersService, ghOrg.OrganizationInstallationID, int(pullRequestID), gitHubOrgName, gitHubRepoName, withCoAuthors)
10271027
if authorsErr != nil {
10281028
log.WithFields(f).WithError(authorsErr).Warnf("unable to get commit authors for %s/%s for PR: %d", gitHubOrgName, gitHubRepoName, pullRequestID)
10291029
return authorsErr

cla-backend-go/users/mocks/mock_repo.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cla-backend-go/users/repository.go

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ type UserRepository interface {
4242
GetUserByExternalID(userExternalID string) (*models.User, error)
4343
GetUserByUserName(userName string, fullMatch bool) (*models.User, error)
4444
GetUserByEmail(userEmail string) (*models.User, error)
45+
GetUsersByLFEmail(userEmail string) ([]*models.User, error)
4546
GetUserByGitHubID(gitHubID string) (*models.User, error)
4647
GetUserByGitHubUsername(gitHubUsername string) (*models.User, error)
4748
GetUserByGitlabID(gitlabID int) (*models.User, error)
@@ -125,7 +126,7 @@ func (repo repository) CreateUser(user *models.User) (*models.User, error) {
125126

126127
if user.LfEmail != "" {
127128
attributes["lf_email"] = &dynamodb.AttributeValue{
128-
S: aws.String(user.LfEmail.String()),
129+
S: aws.String(strings.ToLower(user.LfEmail.String())),
129130
}
130131
}
131132

@@ -340,6 +341,7 @@ func (repo repository) Save(user *models.UserUpdate) (*models.User, error) {
340341
"tableName": repo.tableName,
341342
}
342343

344+
user.LfEmail = strings.ToLower(user.LfEmail)
343345
var oldUserModel *models.User
344346
var err error
345347
oldUserModel, err = repo.getUserByUpdateModel(user)
@@ -881,12 +883,76 @@ func (repo repository) GetUsersByEmail(userEmail string) ([]*models.User, error)
881883
return users, nil
882884
}
883885

886+
// GetUsersByLFEmail fetches the user record by email
887+
func (repo repository) GetUsersByLFEmail(userEmail string) ([]*models.User, error) {
888+
f := logrus.Fields{
889+
"functionName": "users.repository.GetUsersByLFEmail",
890+
"userEmail": userEmail,
891+
}
892+
userEmail = strings.ToLower(userEmail)
893+
// This is the key we want to match
894+
condition := expression.Key("lf_email").Equal(expression.Value(userEmail))
895+
896+
// These are the columns we want returned
897+
projection := buildUserProjection()
898+
899+
// Use the nice builder to create the expression
900+
expr, err := expression.NewBuilder().WithKeyCondition(condition).WithProjection(projection).Build()
901+
if err != nil {
902+
log.WithFields(f).WithError(err).Warnf("error building expression for lf_email : %s, error: %v", userEmail, err)
903+
return []*models.User{}, err
904+
}
905+
906+
// Assemble the query input parameters
907+
queryInput := &dynamodb.QueryInput{
908+
ExpressionAttributeNames: expr.Names(),
909+
ExpressionAttributeValues: expr.Values(),
910+
KeyConditionExpression: expr.KeyCondition(),
911+
ProjectionExpression: expr.Projection(),
912+
TableName: aws.String(repo.tableName),
913+
IndexName: aws.String("lf-email-index"),
914+
}
915+
916+
// Make the DynamoDB Query API call
917+
result, err := repo.dynamoDBClient.Query(queryInput)
918+
if err != nil {
919+
log.WithFields(f).WithError(err).Warnf("error retrieving user by lf_email: %s, error: %+v", userEmail, err)
920+
return []*models.User{}, err
921+
}
922+
923+
// The user model
924+
var dbUserModels []DBUser
925+
926+
err = dynamodbattribute.UnmarshalListOfMaps(result.Items, &dbUserModels)
927+
if err != nil {
928+
log.WithFields(f).WithError(err).Warnf("error unmarshalling user record from database for lf_email: %s, error: %+v", userEmail, err)
929+
return []*models.User{}, err
930+
}
931+
932+
if len(dbUserModels) == 0 {
933+
return []*models.User{}, &utils.UserNotFound{
934+
Message: fmt.Sprintf("user not found when searching by lf email: %s", userEmail),
935+
UserLFID: "",
936+
UserName: "",
937+
UserEmail: userEmail,
938+
Err: nil,
939+
}
940+
}
941+
942+
usrs := make([]*models.User, 0, len(dbUserModels))
943+
for _, dbUser := range dbUserModels {
944+
usrs = append(usrs, convertDBUserModel(dbUser))
945+
}
946+
return usrs, nil
947+
}
948+
884949
// GetUserByEmail fetches the user record by email
885950
func (repo repository) GetUserByEmail(userEmail string) (*models.User, error) {
886951
f := logrus.Fields{
887952
"functionName": "users.repository.GetUserByEmail",
888953
"userEmail": userEmail,
889954
}
955+
userEmail = strings.ToLower(userEmail)
890956
// This is the key we want to match
891957
condition := expression.Key("lf_email").Equal(expression.Value(userEmail))
892958

@@ -1325,7 +1391,7 @@ func convertDBUserModel(user DBUser) *models.User {
13251391
UserID: user.UserID,
13261392
UserExternalID: user.UserExternalID,
13271393
Admin: user.Admin,
1328-
LfEmail: strfmt.Email(user.LFEmail),
1394+
LfEmail: strfmt.Email(strings.ToLower(user.LFEmail)),
13291395
LfSub: user.LFSub,
13301396
LfUsername: user.LFUsername,
13311397
DateCreated: user.DateCreated,

cla-backend-go/users/service.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ type Service interface {
2222
GetUserByLFUserName(lfUserName string) (*models.User, error)
2323
GetUserByUserName(userName string, fullMatch bool) (*models.User, error)
2424
GetUserByEmail(userEmail string) (*models.User, error)
25+
GetUsersByEmail(userEmail string) ([]*models.User, error)
26+
GetUsersByLFEmail(userEmail string) ([]*models.User, error)
2527
GetUserByGitHubID(gitHubID string) (*models.User, error)
2628
GetUserByGitHubUsername(gitlabUsername string) (*models.User, error)
2729
GetUserByGitlabID(gitHubID int) (*models.User, error)
@@ -156,6 +158,22 @@ func (s service) GetUserByEmail(userEmail string) (*models.User, error) {
156158
return s.repo.GetUserByEmail(userEmail)
157159
}
158160

161+
// GetUsersByEmail fetches the users by email
162+
func (s service) GetUsersByEmail(userEmail string) ([]*models.User, error) {
163+
if userEmail == "" {
164+
return nil, errors.New("userEmail is empty")
165+
}
166+
return s.repo.GetUsersByEmail(userEmail)
167+
}
168+
169+
// GetUsersByLFEmail fetches the users by email
170+
func (s service) GetUsersByLFEmail(userEmail string) ([]*models.User, error) {
171+
if userEmail == "" {
172+
return nil, errors.New("userEmail is empty")
173+
}
174+
return s.repo.GetUsersByLFEmail(userEmail)
175+
}
176+
159177
// GetUserByGitHubID fetches the user by GitHub ID
160178
func (s service) GetUserByGitHubID(gitHubID string) (*models.User, error) {
161179
if gitHubID == "" {

cla-backend-go/v2/sign/helpers.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ func (s service) updateChangeRequest(ctx context.Context, installationID, reposi
6161
withCoAuthors = github.IsCoAuthorsEnabledForRepo(ghOrg.EnableCoAuthors, gitHubRepoName)
6262
}
6363
log.WithFields(f).Debugf("fetching commit authors for PR: %d using repository owner: %s, repo: %s", pullRequestID, gitHubOrgName, gitHubRepoName)
64-
authors, latestSHA, authorsErr := github.GetPullRequestCommitAuthors(ctx, installationID, int(pullRequestID), gitHubOrgName, gitHubRepoName, withCoAuthors)
64+
authors, latestSHA, authorsErr := github.GetPullRequestCommitAuthors(ctx, s.userService, installationID, int(pullRequestID), gitHubOrgName, gitHubRepoName, withCoAuthors)
6565
if authorsErr != nil {
6666
log.WithFields(f).WithError(authorsErr).Warnf("unable to get commit authors for %s/%s for PR: %d", gitHubOrgName, gitHubRepoName, pullRequestID)
6767
return authorsErr

0 commit comments

Comments
 (0)