Skip to content

Commit 8eb11da

Browse files
Finish golang implementation: special email cases, filling missing name, email and caching
Signed-off-by: Lukasz Gryglicki <[email protected]>
1 parent 6fec7c5 commit 8eb11da

File tree

2 files changed

+183
-8
lines changed

2 files changed

+183
-8
lines changed

cla-backend-go/github/github_repository.go

Lines changed: 181 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111
"regexp"
1212
"strconv"
1313
"strings"
14+
"sync"
15+
"time"
1416

1517
log "github.com/linuxfoundation/easycla/cla-backend-go/logging"
1618
"github.com/linuxfoundation/easycla/cla-backend-go/utils"
@@ -23,6 +25,8 @@ import (
2325
var (
2426
// ErrGitHubRepositoryNotFound is returned when github repository is not found
2527
ErrGitHubRepositoryNotFound = errors.New("github repository not found")
28+
NoreplyIDPattern = regexp.MustCompile(`^(\d+)\+([a-zA-Z0-9-]+)@users\.noreply\.github\.com$`)
29+
NoreplyUserPattern = regexp.MustCompile(`^([a-zA-Z0-9-]+)@users\.noreply\.github\.com$`)
2630
)
2731

2832
const (
@@ -33,6 +37,68 @@ const (
3337
svgVersion = "?v=2"
3438
)
3539

40+
type cacheEntry struct {
41+
value *github.User
42+
expiresAt time.Time
43+
}
44+
45+
type Cache struct {
46+
data map[[2]string]cacheEntry
47+
mu sync.Mutex
48+
ttl time.Duration
49+
}
50+
51+
func NewCache(ttl time.Duration) *Cache {
52+
return &Cache{
53+
data: make(map[[2]string]cacheEntry),
54+
ttl: ttl,
55+
}
56+
}
57+
58+
func (c *Cache) Get(key [2]string) (*github.User, bool) {
59+
c.mu.Lock()
60+
defer c.mu.Unlock()
61+
entry, found := c.data[key]
62+
if !found || time.Now().After(entry.expiresAt) {
63+
if found {
64+
delete(c.data, key)
65+
}
66+
return nil, false
67+
}
68+
return entry.value, true
69+
}
70+
71+
func (c *Cache) Set(key [2]string, value *github.User) {
72+
c.mu.Lock()
73+
defer c.mu.Unlock()
74+
c.data[key] = cacheEntry{
75+
value: value,
76+
expiresAt: time.Now().Add(c.ttl),
77+
}
78+
}
79+
80+
func (c *Cache) Cleanup() {
81+
c.mu.Lock()
82+
defer c.mu.Unlock()
83+
now := time.Now()
84+
for k, v := range c.data {
85+
if now.After(v.expiresAt) {
86+
delete(c.data, k)
87+
}
88+
}
89+
}
90+
91+
var GithubUserCache = NewCache(24 * time.Hour)
92+
93+
func init() {
94+
go func() {
95+
for {
96+
time.Sleep(time.Hour)
97+
GithubUserCache.Cleanup()
98+
}
99+
}()
100+
}
101+
36102
func GetGitHubRepository(ctx context.Context, installationID, githubRepositoryID int64) (*github.Repository, error) {
37103
f := logrus.Fields{
38104
"functionName": "github.github_repository.GetGitHubRepository",
@@ -199,7 +265,7 @@ func SearchGithubUserByEmail(ctx context.Context, client *github.Client, email s
199265

200266
// GetGitHubUserByLogin fetches a GitHub user by their login (username).
201267
// Returns (*github.User, nil) if found, (nil, nil) if not found, or (nil, error) on error.
202-
func SearchGithubUserByLogin(ctx context.Context, client *github.Client, login string) (*github.User, error) {
268+
func GetGithubUserByLogin(ctx context.Context, client *github.Client, login string) (*github.User, error) {
203269
f := logrus.Fields{
204270
"functionName": "github.github_repository.GetGitHubUserByLogin",
205271
"login": login,
@@ -222,6 +288,31 @@ func SearchGithubUserByLogin(ctx context.Context, client *github.Client, login s
222288
return user, nil
223289
}
224290

291+
// GetGitHubUserByID fetches a GitHub user by their GitHubID.
292+
// Returns (*github.User, nil) if found, (nil, nil) if not found, or (nil, error) on error.
293+
func GetGithubUserByID(ctx context.Context, client *github.Client, githubID int64) (*github.User, error) {
294+
f := logrus.Fields{
295+
"functionName": "github.github_repository.GetGitHubUserByID",
296+
"githubID": githubID,
297+
}
298+
log.WithFields(f).Debugf("Getting GitHub user by GitHub ID: %d", githubID)
299+
user, _, err := client.Users.GetByID(ctx, githubID)
300+
if err != nil {
301+
if ghErr, ok := err.(*github.ErrorResponse); ok && ghErr.Response.StatusCode == 404 {
302+
log.WithFields(f).Debugf("Could not find GitHub user with GitHub ID: %d", githubID)
303+
return nil, nil
304+
}
305+
log.WithFields(f).WithError(err).Errorf("Error getting GitHub user with GitHub ID: %d", githubID)
306+
return nil, err
307+
}
308+
if user == nil {
309+
log.WithFields(f).Debugf("No user object returned for GitHub ID: %d", githubID)
310+
return nil, nil
311+
}
312+
log.WithFields(f).Debugf("Found GitHub user by GitHub ID: %d", githubID)
313+
return user, nil
314+
}
315+
225316
// GetCoAuthorsFromCommit returns a slice of [2]string, each representing [name, email] of a co-author.
226317
func GetCoAuthorsFromCommit(
227318
ctx context.Context,
@@ -293,23 +384,80 @@ func GetCoAuthorCommits(
293384
name = strings.TrimSpace(coAuthor[0])
294385
email = strings.TrimSpace(coAuthor[1])
295386

387+
if cachedUser, ok := GithubUserCache.Get([2]string{name, email}); ok {
388+
log.WithFields(f).Debugf("GitHub user found in cache for name/email: %s/%s: %+v", name, email, cachedUser)
389+
var summary *UserCommitSummary
390+
if cachedUser != nil {
391+
summary = &UserCommitSummary{
392+
SHA: utils.StringValue(commit.SHA),
393+
CommitAuthor: cachedUser,
394+
Affiliated: false,
395+
Authorized: false,
396+
}
397+
} else {
398+
summary = &UserCommitSummary{
399+
SHA: utils.StringValue(commit.SHA),
400+
CommitAuthor: &github.User{
401+
Login: nil,
402+
ID: nil,
403+
Name: &name,
404+
Email: &email,
405+
},
406+
Affiliated: false,
407+
Authorized: false,
408+
}
409+
}
410+
log.WithFields(f).Debugf("PR: %d, %+v (from cache)", pr, summary)
411+
return summary
412+
}
413+
296414
log.WithFields(f).Debugf("Getting co-author details: %+v", coAuthor)
297415

416+
// Check for email in "[email protected]" format:
417+
if matches := NoreplyIDPattern.FindStringSubmatch(email); matches != nil {
418+
idStr, loginStr := matches[1], matches[2]
419+
if githubID, err = strconv.ParseInt(idStr, 10, 64); err == nil {
420+
log.WithFields(f).Debugf("Detected noreply GitHub email with ID: %s, login: %s", idStr, loginStr)
421+
user, err = GetGithubUserByID(ctx, client, githubID)
422+
if err != nil {
423+
log.WithFields(f).Warnf("Error fetching user by ID %d: %v", githubID, err)
424+
user = nil
425+
}
426+
}
427+
}
428+
429+
// Check for email in "[email protected]" format:
430+
if user == nil {
431+
if matches := NoreplyUserPattern.FindStringSubmatch(email); matches != nil {
432+
loginStr := matches[1]
433+
log.WithFields(f).Debugf("Detected noreply GitHub email with login: %s", loginStr)
434+
user, err = GetGithubUserByLogin(ctx, client, loginStr)
435+
if err != nil {
436+
log.WithFields(f).Warnf("Error fetching user by login %s: %v", loginStr, err)
437+
user = nil
438+
}
439+
}
440+
}
441+
298442
// Try to find user by email
299-
user, err = SearchGithubUserByEmail(ctx, client, email)
300-
if err != nil {
301-
log.WithFields(f).Debugf("Co-author GitHub user not found via email %s: %v (error: %v)", email, coAuthor, err)
302-
user = nil
443+
if user == nil {
444+
user, err = SearchGithubUserByEmail(ctx, client, email)
445+
if err != nil {
446+
log.WithFields(f).Debugf("Co-author GitHub user not found via email %s: %v (error: %v)", email, coAuthor, err)
447+
user = nil
448+
}
303449
}
304450

451+
// Last resort - try to find by name=login
305452
if user == nil {
306453
// Note that Co-authored-by: name <email> is not actually a GitHub login but rather a name - but we are trying hard to find a GitHub profile
307-
user, err = SearchGithubUserByLogin(ctx, client, name)
454+
user, err = GetGithubUserByLogin(ctx, client, name)
308455
if err != nil {
309456
log.WithFields(f).Debugf("Co-author GitHub user not found via name=login=%s: %v (error: %v)", name, coAuthor, err)
310457
user = nil
311458
}
312459
}
460+
313461
log.WithFields(f).Debugf("Co-author: %v, user: %+v", coAuthor, user)
314462

315463
var summary *UserCommitSummary
@@ -320,7 +468,13 @@ func GetCoAuthorCommits(
320468
if user.ID != nil {
321469
githubID = *user.ID
322470
}
323-
log.WithFields(f).Debugf("Co-author GitHub user details found: %v, user: %+v, login: %s, id: %d", coAuthor, user, login, githubID)
471+
if user.Name == nil || (user.Name != nil && strings.TrimSpace(*user.Name) == "") {
472+
user.Name = &name
473+
}
474+
if user.Email == nil || (user.Email != nil && strings.TrimSpace(*user.Email) == "") {
475+
user.Email = &email
476+
}
477+
log.WithFields(f).Debugf("Co-author GitHub user details found: %v, user: %+v, login: %s, id: %d for email=%s, name=%s", coAuthor, user, login, githubID, email, name)
324478
summary = &UserCommitSummary{
325479
SHA: utils.StringValue(commit.SHA),
326480
CommitAuthor: user,
@@ -343,6 +497,7 @@ func GetCoAuthorCommits(
343497
log.WithFields(f).Debugf("Co-author GitHub user details not found: %v", coAuthor)
344498
}
345499

500+
GithubUserCache.Set([2]string{name, email}, user)
346501
return summary
347502
}
348503

@@ -381,7 +536,18 @@ func GetPullRequestCommitAuthors(ctx context.Context, installationID int64, pull
381536
log.WithFields(f).Debugf("commit.Author.Login: %s", utils.StringValue(commit.Author.Login))
382537
commitAuthor = utils.StringValue(commit.Author.Login)
383538
}
384-
log.WithFields(f).Debugf("commitAuthor: %s", commitAuthor)
539+
name, email := "", ""
540+
if commit.Commit != nil && commit.Commit.Author != nil {
541+
name = utils.StringValue(commit.Commit.Author.Name)
542+
email = utils.StringValue(commit.Commit.Author.Email)
543+
if strings.TrimSpace(name) != "" && (commit.Author.Name == nil || (commit.Author.Name != nil && strings.TrimSpace(*commit.Author.Name) == "")) {
544+
commit.Author.Name = &name
545+
}
546+
if strings.TrimSpace(email) != "" && (commit.Author.Email == nil || (commit.Author.Email != nil && strings.TrimSpace(*commit.Author.Email) == "")) {
547+
commit.Author.Email = &email
548+
}
549+
}
550+
log.WithFields(f).Debugf("commitAuthor: %s, name: %s, email: %s", commitAuthor, name, email)
385551
userCommitSummary = append(userCommitSummary, &UserCommitSummary{
386552
SHA: *commit.SHA,
387553
CommitAuthor: commit.Author,
@@ -393,6 +559,13 @@ func GetPullRequestCommitAuthors(ctx context.Context, installationID int64, pull
393559

394560
// get latest commit SHA
395561
latestCommitSHA := commits[len(commits)-1].SHA
562+
// log.WithFields(f).Debugf("user commit summaries: %+v", userCommitSummary)
563+
// for _, summary := range userCommitSummary {
564+
// if summary == nil {
565+
// continue
566+
// }
567+
// log.WithFields(f).Debugf("user commit summary: %+v", *summary)
568+
//}
396569
return userCommitSummary, latestCommitSHA, nil
397570
}
398571

utils/skip_cla_entry.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# STAGE=dev DTFROM='1 hour ago' DTTO='1 second ago' ./utils/search_aws_log_group.sh 'cla-backend-dev-githubactivity' 'skip_cla'
1313
# MODE=delete-key ./utils/skip_cla_entry.sh 'sun-test-org' 're:(?i)^repo[0-9]+$'
1414
# STAGE=dev MODE=add-key DEBUG=1 ./utils/skip_cla_entry.sh 'sun-test-org' 'repo1' 'thakurveerendras;;*'
15+
# STAGE=dev MODE=add-key ./utils/skip_cla_entry.sh 'open-telemetry' '*' 'Copilot;re:^\d+\+Copilot@users\.noreply\.github\.com$;copilot-swe-agent[bot]'
16+
# STAGE=dev MODE=add-key ./utils/skip_cla_entry.sh 'openfga' 'vscode-ext' 'Copilot;re:^\d+\+Copilot@users\.noreply\.github\.com$;copilot-swe-agent[bot]'
1517
# STAGE=prod MODE=add-key DEBUG=1 ./utils/skip_cla_entry.sh 'open-telemetry' 'opentelemetry-rust' '*;re:^\d+\+Copilot@users\.noreply\.github\.com$;copilot-swe-agent[bot]'
1618

1719
if [ -z "$MODE" ]

0 commit comments

Comments
 (0)