Skip to content

Commit a52b7bc

Browse files
Merge pull request #4745 from linuxfoundation/unicron-support-co-author-github-case
Support co-authors in GitHub flows with Copilot AI as main author and actual contributor as co-author
2 parents bab92df + d1300d8 commit a52b7bc

File tree

6 files changed

+615
-72
lines changed

6 files changed

+615
-72
lines changed

cla-backend-go/github/github_repository.go

Lines changed: 351 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@ import (
88
"errors"
99
"fmt"
1010
"net/http"
11+
"regexp"
1112
"strconv"
1213
"strings"
14+
"sync"
15+
"time"
1316

1417
log "github.com/linuxfoundation/easycla/cla-backend-go/logging"
1518
"github.com/linuxfoundation/easycla/cla-backend-go/utils"
@@ -22,6 +25,8 @@ import (
2225
var (
2326
// ErrGitHubRepositoryNotFound is returned when github repository is not found
2427
ErrGitHubRepositoryNotFound = errors.New("github repository not found")
28+
NoreplyIDPattern = regexp.MustCompile(`^(\d+)\+([a-zA-Z0-9-]+)@users\.noreply\.github\.com$`)
29+
NoreplyUserPattern = regexp.MustCompile(`^([a-zA-Z0-9-]+)@users\.noreply\.github\.com$`)
2530
)
2631

2732
const (
@@ -32,6 +37,68 @@ const (
3237
svgVersion = "?v=2"
3338
)
3439

40+
type cacheEntry struct {
41+
value *github.User
42+
expiresAt time.Time
43+
}
44+
45+
type Cache struct {
46+
data map[[2]string]cacheEntry
47+
mu sync.Mutex
48+
ttl time.Duration
49+
}
50+
51+
func NewCache(ttl time.Duration) *Cache {
52+
return &Cache{
53+
data: make(map[[2]string]cacheEntry),
54+
ttl: ttl,
55+
}
56+
}
57+
58+
func (c *Cache) Get(key [2]string) (*github.User, bool) {
59+
c.mu.Lock()
60+
defer c.mu.Unlock()
61+
entry, found := c.data[key]
62+
if !found || time.Now().After(entry.expiresAt) {
63+
if found {
64+
delete(c.data, key)
65+
}
66+
return nil, false
67+
}
68+
return entry.value, true
69+
}
70+
71+
func (c *Cache) Set(key [2]string, value *github.User) {
72+
c.mu.Lock()
73+
defer c.mu.Unlock()
74+
c.data[key] = cacheEntry{
75+
value: value,
76+
expiresAt: time.Now().Add(c.ttl),
77+
}
78+
}
79+
80+
func (c *Cache) Cleanup() {
81+
c.mu.Lock()
82+
defer c.mu.Unlock()
83+
now := time.Now()
84+
for k, v := range c.data {
85+
if now.After(v.expiresAt) {
86+
delete(c.data, k)
87+
}
88+
}
89+
}
90+
91+
var GithubUserCache = NewCache(24 * time.Hour)
92+
93+
func init() {
94+
go func() {
95+
for {
96+
time.Sleep(time.Hour)
97+
GithubUserCache.Cleanup()
98+
}
99+
}()
100+
}
101+
35102
func GetGitHubRepository(ctx context.Context, installationID, githubRepositoryID int64) (*github.Repository, error) {
36103
f := logrus.Fields{
37104
"functionName": "github.github_repository.GetGitHubRepository",
@@ -170,6 +237,270 @@ func (u UserCommitSummary) getUserInfo(tagUser bool) string {
170237
return strings.Replace(sb.String(), "/ $", "", -1)
171238
}
172239

240+
// SearchGithubUserByEmail searches for a GitHub user by email using the GitHub search API.
241+
// Returns the first found *github.User, or nil if not found or on error.
242+
func SearchGithubUserByEmail(ctx context.Context, client *github.Client, email string) (*github.User, error) {
243+
f := logrus.Fields{
244+
"functionName": "github.github_repository.SearchGithubUserByEmail",
245+
"email": email,
246+
}
247+
log.WithFields(f).Debugf("Searching for GitHub user by email: %s", email)
248+
249+
query := fmt.Sprintf("%s in:email", email)
250+
opts := &github.SearchOptions{
251+
ListOptions: github.ListOptions{PerPage: 1},
252+
}
253+
result, _, err := client.Search.Users(ctx, query, opts)
254+
if err != nil {
255+
log.WithFields(f).WithError(err).Errorf("Error searching for user by email: %s", email)
256+
return nil, err
257+
}
258+
if result.GetTotal() == 0 || len(result.Users) == 0 {
259+
log.WithFields(f).Debugf("No GitHub user found with email: %s", email)
260+
return nil, nil
261+
}
262+
log.WithFields(f).Debugf("Found GitHub user by email: %s", *result.Users[0].Login)
263+
return result.Users[0], nil
264+
}
265+
266+
// GetGitHubUserByLogin fetches a GitHub user by their login (username).
267+
// Returns (*github.User, nil) if found, (nil, nil) if not found, or (nil, error) on error.
268+
func GetGithubUserByLogin(ctx context.Context, client *github.Client, login string) (*github.User, error) {
269+
f := logrus.Fields{
270+
"functionName": "github.github_repository.GetGitHubUserByLogin",
271+
"login": login,
272+
}
273+
log.WithFields(f).Debugf("Getting GitHub user by login: %s", login)
274+
user, _, err := client.Users.Get(ctx, login)
275+
if err != nil {
276+
if ghErr, ok := err.(*github.ErrorResponse); ok && ghErr.Response.StatusCode == 404 {
277+
log.WithFields(f).Debugf("Could not find GitHub user with login: %s", login)
278+
return nil, nil
279+
}
280+
log.WithFields(f).WithError(err).Errorf("Error getting GitHub user with login: %s", login)
281+
return nil, err
282+
}
283+
if user == nil {
284+
log.WithFields(f).Debugf("No user object returned for login: %s", login)
285+
return nil, nil
286+
}
287+
log.WithFields(f).Debugf("Found GitHub user by login: %s", login)
288+
return user, nil
289+
}
290+
291+
// GetGitHubUserByID fetches a GitHub user by their GitHubID.
292+
// Returns (*github.User, nil) if found, (nil, nil) if not found, or (nil, error) on error.
293+
func GetGithubUserByID(ctx context.Context, client *github.Client, githubID int64) (*github.User, error) {
294+
f := logrus.Fields{
295+
"functionName": "github.github_repository.GetGitHubUserByID",
296+
"githubID": githubID,
297+
}
298+
log.WithFields(f).Debugf("Getting GitHub user by GitHub ID: %d", githubID)
299+
user, _, err := client.Users.GetByID(ctx, githubID)
300+
if err != nil {
301+
if ghErr, ok := err.(*github.ErrorResponse); ok && ghErr.Response.StatusCode == 404 {
302+
log.WithFields(f).Debugf("Could not find GitHub user with GitHub ID: %d", githubID)
303+
return nil, nil
304+
}
305+
log.WithFields(f).WithError(err).Errorf("Error getting GitHub user with GitHub ID: %d", githubID)
306+
return nil, err
307+
}
308+
if user == nil {
309+
log.WithFields(f).Debugf("No user object returned for GitHub ID: %d", githubID)
310+
return nil, nil
311+
}
312+
log.WithFields(f).Debugf("Found GitHub user by GitHub ID: %d", githubID)
313+
return user, nil
314+
}
315+
316+
// GetCoAuthorsFromCommit returns a slice of [2]string, each representing [name, email] of a co-author.
317+
func GetCoAuthorsFromCommit(
318+
ctx context.Context,
319+
commit *github.RepositoryCommit,
320+
) [][2]string {
321+
f := logrus.Fields{
322+
"functionName": "github.github_repository.GetCoAuthorsFromCommit",
323+
}
324+
var coAuthors [][2]string
325+
if commit != nil && commit.Commit != nil && commit.Commit.Message != nil {
326+
commitMessage := commit.GetCommit().GetMessage()
327+
// log.WithFields(f).Debugf("commit message: %s", commitMessage)
328+
329+
re := regexp.MustCompile(`(?i)co-authored-by: (.*) <(.*)>`)
330+
matches := re.FindAllStringSubmatch(commitMessage, -1)
331+
for _, match := range matches {
332+
name := strings.TrimSpace(match[1])
333+
email := strings.TrimSpace(match[2])
334+
coAuthors = append(coAuthors, [2]string{name, email})
335+
log.WithFields(f).Debugf("found co-author: name: %s, email: %s", name, email)
336+
}
337+
}
338+
return coAuthors
339+
}
340+
341+
// ExpandWithCoAuthors appends UserCommitSummary objects for all co-authors to commitAuthors slice.
342+
func ExpandWithCoAuthors(
343+
ctx context.Context,
344+
client *github.Client,
345+
commit *github.RepositoryCommit,
346+
pr int,
347+
installationID int64,
348+
commitAuthors *[]*UserCommitSummary,
349+
) {
350+
f := logrus.Fields{
351+
"functionName": "github.github_repository.ExpandWithCoAuthors",
352+
"pr": pr,
353+
}
354+
coAuthors := GetCoAuthorsFromCommit(ctx, commit)
355+
log.WithFields(f).Debugf("co-authors found: %s", coAuthors)
356+
for _, coAuthor := range coAuthors {
357+
summary := GetCoAuthorCommits(ctx, client, coAuthor, commit, pr, installationID)
358+
*commitAuthors = append(*commitAuthors, summary)
359+
}
360+
}
361+
362+
func GetCoAuthorCommits(
363+
ctx context.Context,
364+
client *github.Client,
365+
coAuthor [2]string,
366+
commit *github.RepositoryCommit,
367+
pr int,
368+
installationID int64,
369+
) *UserCommitSummary {
370+
f := logrus.Fields{
371+
"functionName": "github.github_repository.GetCoAuthorCommits",
372+
"pr": pr,
373+
"installation-id": installationID,
374+
"co-author-name": coAuthor[0],
375+
"co-author-email": coAuthor[1],
376+
}
377+
378+
var (
379+
user *github.User
380+
githubID int64
381+
name, email, login string
382+
err error
383+
)
384+
name = strings.TrimSpace(coAuthor[0])
385+
email = strings.TrimSpace(coAuthor[1])
386+
387+
if cachedUser, ok := GithubUserCache.Get([2]string{name, email}); ok {
388+
log.WithFields(f).Debugf("GitHub user found in cache for name/email: %s/%s: %+v", name, email, cachedUser)
389+
var summary *UserCommitSummary
390+
if cachedUser != nil {
391+
summary = &UserCommitSummary{
392+
SHA: utils.StringValue(commit.SHA),
393+
CommitAuthor: cachedUser,
394+
Affiliated: false,
395+
Authorized: false,
396+
}
397+
} else {
398+
summary = &UserCommitSummary{
399+
SHA: utils.StringValue(commit.SHA),
400+
CommitAuthor: &github.User{
401+
Login: nil,
402+
ID: nil,
403+
Name: &name,
404+
Email: &email,
405+
},
406+
Affiliated: false,
407+
Authorized: false,
408+
}
409+
}
410+
log.WithFields(f).Debugf("PR: %d, %+v (from cache)", pr, summary)
411+
return summary
412+
}
413+
414+
log.WithFields(f).Debugf("Getting co-author details: %+v", coAuthor)
415+
416+
// 1. Check for email in "[email protected]" format:
417+
if matches := NoreplyIDPattern.FindStringSubmatch(email); matches != nil {
418+
idStr, loginStr := matches[1], matches[2]
419+
if githubID, err = strconv.ParseInt(idStr, 10, 64); err == nil {
420+
log.WithFields(f).Debugf("Detected noreply GitHub email with ID: %s, login: %s", idStr, loginStr)
421+
user, err = GetGithubUserByID(ctx, client, githubID)
422+
if err != nil {
423+
log.WithFields(f).Warnf("Error fetching user by ID %d: %v", githubID, err)
424+
user = nil
425+
}
426+
}
427+
}
428+
429+
// 2. Check for email in "[email protected]" format:
430+
if user == nil {
431+
if matches := NoreplyUserPattern.FindStringSubmatch(email); matches != nil {
432+
loginStr := matches[1]
433+
log.WithFields(f).Debugf("Detected noreply GitHub email with login: %s", loginStr)
434+
user, err = GetGithubUserByLogin(ctx, client, loginStr)
435+
if err != nil {
436+
log.WithFields(f).Warnf("Error fetching user by login %s: %v", loginStr, err)
437+
user = nil
438+
}
439+
}
440+
}
441+
442+
// 3. Try to find user by email
443+
if user == nil {
444+
user, err = SearchGithubUserByEmail(ctx, client, email)
445+
if err != nil {
446+
log.WithFields(f).Debugf("Co-author GitHub user not found via email %s: %v (error: %v)", email, coAuthor, err)
447+
user = nil
448+
}
449+
}
450+
451+
// 4. Last resort - try to find by name=login
452+
if user == nil {
453+
// Note that Co-authored-by: name <email> is not actually a GitHub login but rather a name - but we are trying hard to find a GitHub profile
454+
user, err = GetGithubUserByLogin(ctx, client, name)
455+
if err != nil {
456+
log.WithFields(f).Debugf("Co-author GitHub user not found via name=login=%s: %v (error: %v)", name, coAuthor, err)
457+
user = nil
458+
}
459+
}
460+
461+
log.WithFields(f).Debugf("Co-author: %v, user: %+v", coAuthor, user)
462+
463+
var summary *UserCommitSummary
464+
if user != nil {
465+
if user.Login != nil {
466+
login = *user.Login
467+
}
468+
if user.ID != nil {
469+
githubID = *user.ID
470+
}
471+
if user.Name == nil || (user.Name != nil && strings.TrimSpace(*user.Name) == "") {
472+
user.Name = &name
473+
}
474+
if user.Email == nil || (user.Email != nil && strings.TrimSpace(*user.Email) == "") {
475+
user.Email = &email
476+
}
477+
log.WithFields(f).Debugf("Co-author GitHub user details found: %v, user: %+v, login: %s, id: %d for email=%s, name=%s", coAuthor, user, login, githubID, email, name)
478+
summary = &UserCommitSummary{
479+
SHA: utils.StringValue(commit.SHA),
480+
CommitAuthor: user,
481+
Affiliated: false,
482+
Authorized: false,
483+
}
484+
log.WithFields(f).Debugf("PR: %d, %+v", pr, summary)
485+
} else {
486+
summary = &UserCommitSummary{
487+
SHA: utils.StringValue(commit.SHA),
488+
CommitAuthor: &github.User{
489+
Login: nil,
490+
ID: nil,
491+
Name: &name,
492+
Email: &email,
493+
},
494+
Affiliated: false,
495+
Authorized: false,
496+
}
497+
log.WithFields(f).Debugf("Co-author GitHub user details not found: %v", coAuthor)
498+
}
499+
500+
GithubUserCache.Set([2]string{name, email}, user)
501+
return summary
502+
}
503+
173504
func GetPullRequestCommitAuthors(ctx context.Context, installationID int64, pullRequestID int, owner, repo string) ([]*UserCommitSummary, *string, error) {
174505
f := logrus.Fields{
175506
"functionName": "github.github_repository.GetPullRequestCommitAuthors",
@@ -205,17 +536,36 @@ func GetPullRequestCommitAuthors(ctx context.Context, installationID int64, pull
205536
log.WithFields(f).Debugf("commit.Author.Login: %s", utils.StringValue(commit.Author.Login))
206537
commitAuthor = utils.StringValue(commit.Author.Login)
207538
}
208-
log.WithFields(f).Debugf("commitAuthor: %s", commitAuthor)
539+
name, email := "", ""
540+
if commit.Commit != nil && commit.Commit.Author != nil {
541+
name = utils.StringValue(commit.Commit.Author.Name)
542+
email = utils.StringValue(commit.Commit.Author.Email)
543+
if strings.TrimSpace(name) != "" && (commit.Author.Name == nil || (commit.Author.Name != nil && strings.TrimSpace(*commit.Author.Name) == "")) {
544+
commit.Author.Name = &name
545+
}
546+
if strings.TrimSpace(email) != "" && (commit.Author.Email == nil || (commit.Author.Email != nil && strings.TrimSpace(*commit.Author.Email) == "")) {
547+
commit.Author.Email = &email
548+
}
549+
}
550+
log.WithFields(f).Debugf("commitAuthor: %s, name: %s, email: %s", commitAuthor, name, email)
209551
userCommitSummary = append(userCommitSummary, &UserCommitSummary{
210552
SHA: *commit.SHA,
211553
CommitAuthor: commit.Author,
212554
Affiliated: false,
213555
Authorized: false,
214556
})
557+
ExpandWithCoAuthors(ctx, client, commit, pullRequestID, installationID, &userCommitSummary)
215558
}
216559

217560
// get latest commit SHA
218561
latestCommitSHA := commits[len(commits)-1].SHA
562+
// log.WithFields(f).Debugf("user commit summaries: %+v", userCommitSummary)
563+
// for _, summary := range userCommitSummary {
564+
// if summary == nil {
565+
// continue
566+
// }
567+
// log.WithFields(f).Debugf("user commit summary: %+v", *summary)
568+
//}
219569
return userCommitSummary, latestCommitSHA, nil
220570
}
221571

0 commit comments

Comments
 (0)