Skip to content

Commit 4d3744f

Browse files
Merge pull request #4807 from linuxfoundation/unicron-caching-authors-co-authors-updates
Unicron caching authors co authors updates
2 parents 4df87e1 + 17e7222 commit 4d3744f

File tree

10 files changed

+438
-45
lines changed

10 files changed

+438
-45
lines changed

COMMIT_AUTHORS_CACHING.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# EasyCLA: Author and Co-author Caching + Large-PR Support
22

33
- **Two-level caching** for author and co-author identity & identity plus per-project signature decisions.
4+
- **Caching** for git co-authors parsed from commit messages.
45
- **GraphQL-based commit ingestion** that comfortably handles PRs with **250+ commits (and beyond)**.
56

67
---
@@ -13,11 +14,14 @@
1314
---
1415

1516
## Caching
17+
- **Co-author cache keys** are based on normalized email and name from the commit trailers (`Co-authored-by:`).
1618
- **General cache key**: `(author_id, lower(login), lower(email)) → (user | None)`
1719
- **Per-project cache key**: `(project_id, author_id, lower(login), lower(email)) → (user | None, authorized, affiliated)`
18-
- **TTL policy**: positives **~24h**; negative/uncertain states use **Quick TTL = 5m**.
20+
- **TTL policy**: positives **~12h** (**~3h** for per-project with signature status); negative/uncertain states use **Negative TTL = 3m**.
1921
- **Flow**: per-project cache → general cache → cold DB path. Results are stored back with the appropriate TTL.
22+
- **When signature is signed**: per-project and general caches are updated to reflect the new status (general cache is updated because given user could have no DynamoDB entry yet before signing the CLA).
2023
- Thread-safe with periodic expired entries cleanup (once per hour).
24+
- There are `/v2/clear-cache` and `/v4/clear-cache` endpoints to clear caches (testing & ops).
2125

2226
---
2327

@@ -37,7 +41,7 @@
3741

3842
---
3943

40-
## Quick constants
41-
- `QUICK_CACHE_TTL = 300` seconds (negative/uncertain states).
42-
- Default positive cache TTL ≈ **24 hours**.
44+
## Constants
45+
- `NEGATIVE_CACHE_TTL = 180` seconds (negative/uncertain states).
46+
- Default positive cache TTL ≈ **12 hours** (**3 hours** for per-project with signature status).
4347
- GraphQL: `pageSize=100`, parallel workers tuned for throughput.

cla-backend-go/github/github_repository.go

Lines changed: 138 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,12 @@ Please update your commit message(s) by doing |git commit --amend| and then |git
6262
`
6363

6464
const (
65-
unknown = "Unknown"
66-
failureState = "failure"
67-
successState = "success"
68-
svgVersion = "?v=2"
69-
QuickCacheTTL = 5 * time.Minute
65+
unknown = "Unknown"
66+
failureState = "failure"
67+
successState = "success"
68+
svgVersion = "?v=2"
69+
NegativeCacheTTL = 3 * time.Minute // Used for negative caching of missing/not-signed users
70+
ProjectCacheTTL = 3 * time.Hour // Used for per-project caching of signed users
7071
)
7172

7273
// GraphQL related types
@@ -181,6 +182,15 @@ func (c *Cache) Set(key [2]string, value *github.User) {
181182
}
182183
}
183184

185+
func (c *Cache) SetWithTTL(key [2]string, value *github.User, tl time.Duration) {
186+
c.mu.Lock()
187+
defer c.mu.Unlock()
188+
c.data[key] = cacheEntry{
189+
value: value,
190+
expiresAt: time.Now().Add(tl),
191+
}
192+
}
193+
184194
func (c *Cache) Cleanup() {
185195
c.mu.Lock()
186196
defer c.mu.Unlock()
@@ -192,6 +202,12 @@ func (c *Cache) Cleanup() {
192202
}
193203
}
194204

205+
func (c *Cache) Clear() {
206+
c.mu.Lock()
207+
defer c.mu.Unlock()
208+
c.data = make(map[[2]string]cacheEntry)
209+
}
210+
195211
func (c *Cache) Delete(key [2]string) { c.mu.Lock(); delete(c.data, key); c.mu.Unlock() }
196212

197213
type userCacheEntry struct {
@@ -254,6 +270,12 @@ func (c *UserCache) Cleanup() {
254270
}
255271
}
256272

273+
func (c *UserCache) Clear() {
274+
c.mu.Lock()
275+
defer c.mu.Unlock()
276+
c.data = make(map[[3]string]userCacheEntry)
277+
}
278+
257279
func (c *UserCache) Delete(key [3]string) { c.mu.Lock(); delete(c.data, key); c.mu.Unlock() }
258280

259281
type projectUserCacheEntry struct {
@@ -322,11 +344,17 @@ func (c *ProjectUserCache) Cleanup() {
322344
}
323345
}
324346

347+
func (c *ProjectUserCache) Clear() {
348+
c.mu.Lock()
349+
defer c.mu.Unlock()
350+
c.data = make(map[[4]string]projectUserCacheEntry)
351+
}
352+
325353
func (c *ProjectUserCache) Delete(key [4]string) { c.mu.Lock(); delete(c.data, key); c.mu.Unlock() }
326354

327-
var GithubUserCache = NewCache(24 * time.Hour)
328-
var ModelUserCache = NewUserCache(24 * time.Hour)
329-
var ModelProjectUserCache = NewProjectUserCache(6 * time.Hour)
355+
var GithubUserCache = NewCache(12 * time.Hour)
356+
var ModelUserCache = NewUserCache(12 * time.Hour)
357+
var ModelProjectUserCache = NewProjectUserCache(3 * time.Hour)
330358

331359
func init() {
332360
go func() {
@@ -339,6 +367,17 @@ func init() {
339367
}()
340368
}
341369

370+
// ClearCaches clears all in-memory caches maintained by the GitHub module.
371+
func ClearCaches() {
372+
f := logrus.Fields{
373+
"functionName": "github.github_repository.ClearCaches",
374+
}
375+
GithubUserCache.Clear()
376+
ModelUserCache.Clear()
377+
ModelProjectUserCache.Clear()
378+
log.WithFields(f).Info("cleared caches")
379+
}
380+
342381
func GetGitHubRepository(ctx context.Context, installationID, githubRepositoryID int64) (*github.Repository, error) {
343382
f := logrus.Fields{
344383
"functionName": "github.github_repository.GetGitHubRepository",
@@ -813,8 +852,13 @@ func GetCoAuthorCommits(
813852
}
814853
log.WithFields(f).Debugf("Co-author GitHub user details not found: %v", coAuthor)
815854
}
855+
if found {
856+
GithubUserCache.Set(cacheKey, user)
857+
} else {
858+
// negative cache for 30 minutes (this is for GitHub user not found)
859+
GithubUserCache.SetWithTTL(cacheKey, user, 30*time.Minute)
860+
}
816861

817-
GithubUserCache.Set(cacheKey, user)
818862
return summary, found
819863
}
820864

@@ -973,7 +1017,7 @@ func GetCommitAuthorSignedStatus(
9731017
*unsigned = append(*unsigned, userSummary)
9741018
mu.Unlock()
9751019
log.WithFields(f).Debugf("store per-project cache: unsigned, user is null (%+v)", projectCacheKey)
976-
ModelProjectUserCache.SetWithTTL(projectCacheKey, nil, false, false, QuickCacheTTL)
1020+
ModelProjectUserCache.SetWithTTL(projectCacheKey, nil, false, false, NegativeCacheTTL)
9771021
return
9781022
}
9791023
user := cachedUser
@@ -984,7 +1028,7 @@ func GetCommitAuthorSignedStatus(
9841028
*unsigned = append(*unsigned, userSummary)
9851029
mu.Unlock()
9861030
log.WithFields(f).Debugf("store per-project cache: unsigned, hasUserSigned error (%+v)", projectCacheKey)
987-
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, false, QuickCacheTTL)
1031+
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, false, NegativeCacheTTL)
9881032
return
9891033
}
9901034

@@ -1005,14 +1049,14 @@ func GetCommitAuthorSignedStatus(
10051049
*unsigned = append(*unsigned, userSummary)
10061050
mu.Unlock()
10071051
log.WithFields(f).Debugf("store per-project cache: unsigned, authorized is false (%+v)", projectCacheKey)
1008-
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, userSummary.Affiliated, QuickCacheTTL)
1052+
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, userSummary.Affiliated, NegativeCacheTTL)
10091053
}
10101054
} else {
10111055
mu.Lock()
10121056
*unsigned = append(*unsigned, userSummary)
10131057
mu.Unlock()
10141058
log.WithFields(f).Debugf("store per-project cache: unsigned, userSigned is null (%+v)", projectCacheKey)
1015-
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, userSummary.Affiliated, QuickCacheTTL)
1059+
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, userSummary.Affiliated, NegativeCacheTTL)
10161060
}
10171061
return
10181062
}
@@ -1059,8 +1103,8 @@ func GetCommitAuthorSignedStatus(
10591103
mu.Lock()
10601104
*unsigned = append(*unsigned, userSummary)
10611105
mu.Unlock()
1062-
ModelProjectUserCache.SetWithTTL(projectCacheKey, nil, false, false, QuickCacheTTL)
1063-
ModelUserCache.SetWithTTL(cacheKey, nil, QuickCacheTTL)
1106+
ModelProjectUserCache.SetWithTTL(projectCacheKey, nil, false, false, NegativeCacheTTL)
1107+
ModelUserCache.SetWithTTL(cacheKey, nil, NegativeCacheTTL)
10641108
return
10651109
}
10661110

@@ -1072,8 +1116,8 @@ func GetCommitAuthorSignedStatus(
10721116
mu.Lock()
10731117
*unsigned = append(*unsigned, userSummary)
10741118
mu.Unlock()
1075-
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, false, QuickCacheTTL)
1076-
ModelUserCache.SetWithTTL(cacheKey, user, QuickCacheTTL)
1119+
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, false, NegativeCacheTTL)
1120+
ModelUserCache.SetWithTTL(cacheKey, user, NegativeCacheTTL)
10771121
return
10781122
}
10791123

@@ -1095,16 +1139,16 @@ func GetCommitAuthorSignedStatus(
10951139
mu.Lock()
10961140
*unsigned = append(*unsigned, userSummary)
10971141
mu.Unlock()
1098-
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, userSummary.Affiliated, QuickCacheTTL)
1099-
ModelUserCache.SetWithTTL(cacheKey, user, QuickCacheTTL)
1142+
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, userSummary.Affiliated, NegativeCacheTTL)
1143+
ModelUserCache.SetWithTTL(cacheKey, user, NegativeCacheTTL)
11001144
}
11011145
} else {
11021146
log.WithFields(f).Debugf("store caches: unsigned, userSigned is null (%+v)", projectCacheKey)
11031147
mu.Lock()
11041148
*unsigned = append(*unsigned, userSummary)
11051149
mu.Unlock()
1106-
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, userSummary.Affiliated, QuickCacheTTL)
1107-
ModelUserCache.SetWithTTL(cacheKey, user, QuickCacheTTL)
1150+
ModelProjectUserCache.SetWithTTL(projectCacheKey, user, false, userSummary.Affiliated, NegativeCacheTTL)
1151+
ModelUserCache.SetWithTTL(cacheKey, user, NegativeCacheTTL)
11081152
}
11091153
}
11101154

@@ -1542,6 +1586,79 @@ func hasCheckPreviouslyFailed(ctx context.Context, client *github.Client, owner,
15421586
return false, nil, nil
15431587
}
15441588

1589+
// UpdateCacheAfterSignature marks the user as authorized for the given project
1590+
func UpdateCacheAfterSignature(ctx context.Context, user *models.User, projectID string) error {
1591+
f := logrus.Fields{
1592+
"functionName": "github.github_repository.UpdateCacheAfterSignature",
1593+
"projectID": projectID,
1594+
}
1595+
1596+
if user == nil {
1597+
log.WithFields(f).Warn("nil user passed to UpdateCacheAfterSignature")
1598+
return fmt.Errorf("nil user")
1599+
}
1600+
projectID = strings.TrimSpace(projectID)
1601+
if projectID == "" {
1602+
log.WithFields(f).Warn("empty projectID passed to UpdateCacheAfterSignature")
1603+
return fmt.Errorf("empty projectID")
1604+
}
1605+
1606+
githubID := strings.TrimSpace(user.GithubID)
1607+
githubLogin := strings.TrimSpace(user.GithubUsername)
1608+
1609+
if githubID == "" || githubLogin == "" {
1610+
log.WithFields(f).Debugf("user lacks GitHub ID or username - skipping cache update (githubID=%q, login=%q)", githubID, githubLogin)
1611+
return nil
1612+
}
1613+
1614+
affiliated := strings.TrimSpace(user.CompanyID) != ""
1615+
1616+
emails := collectUserEmails(user)
1617+
if len(emails) == 0 {
1618+
log.WithFields(f).Debugf("no emails found for user (githubID=%s, login=%s) - nothing to cache", githubID, githubLogin)
1619+
return nil
1620+
}
1621+
1622+
loginLower := strings.ToLower(githubLogin)
1623+
1624+
for _, email := range emails {
1625+
genKey := UserKey(githubID, loginLower, email)
1626+
ModelUserCache.Set(genKey, user)
1627+
1628+
projKey := ProjectUserKey(projectID, githubID, loginLower, email)
1629+
ModelProjectUserCache.Set(projKey, user, true, affiliated)
1630+
}
1631+
1632+
log.WithFields(f).Infof("updated caches for user login=%q (GitHubID=%s), project=%s: marked as authorized for %d email(s)",
1633+
loginLower, githubID, projectID, len(emails))
1634+
1635+
return nil
1636+
}
1637+
1638+
// collectUserEmails returns a de-duplicated, lowercased list of the user's emails.
1639+
func collectUserEmails(u *models.User) []string {
1640+
uniq := make(map[string]struct{}, 4)
1641+
add := func(s string) {
1642+
s = strings.ToLower(strings.TrimSpace(s))
1643+
if s != "" {
1644+
uniq[s] = struct{}{}
1645+
}
1646+
}
1647+
1648+
add(string(u.LfEmail))
1649+
1650+
for _, em := range u.Emails {
1651+
add(em)
1652+
}
1653+
1654+
out := make([]string, 0, len(uniq))
1655+
for e := range uniq {
1656+
out = append(out, e)
1657+
}
1658+
sort.Strings(out)
1659+
return out
1660+
}
1661+
15451662
func hasCheckPreviouslySucceeded(ctx context.Context, client *github.Client, owner, repo string, pullRequestID int) (bool, *github.IssueComment, error) {
15461663
f := logrus.Fields{
15471664
"functionName": "github.github_repository.hasCheckPreviouslySucceeded",

cla-backend-go/swagger/cla.v2.yaml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4046,6 +4046,40 @@ paths:
40464046
tags:
40474047
- sign
40484048

4049+
/clear-cache:
4050+
post:
4051+
summary: Clear cache
4052+
description: Clears the service cache
4053+
operationId: clearCaches
4054+
parameters:
4055+
- $ref: '#/parameters/authorization'
4056+
- $ref: '#/parameters/x-request-id'
4057+
- $ref: '#/parameters/x-acl'
4058+
- $ref: '#/parameters/x-username'
4059+
- $ref: '#/parameters/x-email'
4060+
responses:
4061+
'200':
4062+
description: Success
4063+
headers:
4064+
x-request-id:
4065+
type: string
4066+
description: The unique request ID value - assigned/set by the API Gateway
4067+
based on the session
4068+
schema:
4069+
$ref: '#/definitions/clear-cache-output'
4070+
'400':
4071+
$ref: '#/responses/invalid-request'
4072+
'401':
4073+
$ref: '#/responses/unauthorized'
4074+
'403':
4075+
$ref: '#/responses/forbidden'
4076+
'404':
4077+
$ref: '#/responses/not-found'
4078+
'500':
4079+
$ref: '#/responses/internal-server-error'
4080+
tags:
4081+
- sign
4082+
40494083
/github/activity:
40504084
post:
40514085
summary: GitHub Activity Callback Handler
@@ -5965,6 +5999,12 @@ definitions:
59655999
description: on signing the document, page will get redirected to this url. This is valid only when send_as_email is false
59666000
format: uri
59676001

6002+
clear-cache-output:
6003+
type: object
6004+
properties:
6005+
status:
6006+
type: string
6007+
59686008
corporate-signature-output:
59696009
type: object
59706010
properties:

cla-backend-go/v2/sign/handlers.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,30 @@ func CCLADocusignMiddleware(next http.Handler) http.Handler {
7979
// Configure API call
8080
func Configure(api *operations.EasyclaAPI, service Service, userService users.Service) {
8181
// Retrieve a list of available templates
82+
api.SignClearCachesHandler = sign.ClearCachesHandlerFunc(
83+
func(params sign.ClearCachesParams, user *auth.User) middleware.Responder {
84+
reqID := utils.GetRequestID(params.XREQUESTID)
85+
ctx := utils.ContextWithRequestAndUser(params.HTTPRequest.Context(), reqID, user) // nolint
86+
utils.SetAuthUserProperties(user, params.XUSERNAME, params.XEMAIL)
87+
f := logrus.Fields{
88+
"functionName": "v2.sign.handlers.ClearCachesHandler",
89+
utils.XREQUESTID: reqID,
90+
"authUserName": utils.StringValue(params.XUSERNAME),
91+
"authUserEmail": utils.StringValue(params.XEMAIL),
92+
}
93+
log.WithFields(f).Info("clearing caches")
94+
resp, err := service.ClearCaches(ctx)
95+
if err != nil {
96+
log.WithFields(f).WithError(err).Warn("failed to clear caches")
97+
if strings.Contains(err.Error(), "internal server error") {
98+
return sign.NewClearCachesInternalServerError().WithPayload(errorResponse(reqID, err))
99+
}
100+
return sign.NewClearCachesBadRequest().WithPayload(errorResponse(reqID, err))
101+
}
102+
log.WithFields(f).Info("caches cleared successfully")
103+
return sign.NewClearCachesOK().WithPayload(resp)
104+
})
105+
82106
api.SignRequestCorporateSignatureHandler = sign.RequestCorporateSignatureHandlerFunc(
83107
func(params sign.RequestCorporateSignatureParams, user *auth.User) middleware.Responder {
84108
reqID := utils.GetRequestID(params.XREQUESTID)

0 commit comments

Comments
 (0)