Skip to content

Commit f8061ac

Browse files
feat: add support for httpcaching of github requests
Signed-off-by: Jean-Damien HATZENBUHLER <jean-damien.hatzenbuhler@contentsquare.com>
1 parent f6d00b7 commit f8061ac

File tree

14 files changed

+487
-8
lines changed

14 files changed

+487
-8
lines changed

applicationset/controllers/requeue_after_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ func TestRequeueAfter(t *testing.T) {
5656
},
5757
}
5858
fakeDynClient := dynfake.NewSimpleDynamicClientWithCustomListKinds(runtime.NewScheme(), gvrToListKind, duckType)
59-
scmConfig := generators.NewSCMConfig("", []string{""}, true, true, nil, true)
59+
scmConfig := generators.NewSCMConfig("", []string{""}, true, true, false, 100, nil, true)
6060
terminalGenerators := map[string]generators.Generator{
6161
"List": generators.NewListGenerator(),
6262
"Clusters": generators.NewClusterGenerator(ctx, k8sClient, appClientset, "argocd"),

applicationset/generators/pull_request_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ func TestAllowedSCMProviderPullRequest(t *testing.T) {
398398
"gitea.myorg.com",
399399
"bitbucket.myorg.com",
400400
"azuredevops.myorg.com",
401-
}, true, true, nil, true))
401+
}, true, true, false, 100, nil, true))
402402

403403
applicationSetInfo := argoprojiov1alpha1.ApplicationSet{
404404
ObjectMeta: metav1.ObjectMeta{
@@ -421,7 +421,7 @@ func TestAllowedSCMProviderPullRequest(t *testing.T) {
421421
}
422422

423423
func TestSCMProviderDisabled_PRGenerator(t *testing.T) {
424-
generator := NewPullRequestGenerator(nil, NewSCMConfig("", []string{}, false, true, nil, true))
424+
generator := NewPullRequestGenerator(nil, NewSCMConfig("", []string{}, false, true, false, 100, nil, true))
425425

426426
applicationSetInfo := argoprojiov1alpha1.ApplicationSet{
427427
ObjectMeta: metav1.ObjectMeta{

applicationset/generators/scm_provider.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,20 @@ type SCMConfig struct {
3737
allowedSCMProviders []string
3838
enableSCMProviders bool
3939
enableGitHubAPIMetrics bool
40+
enableGitHubCache bool
41+
githubCacheSize int
4042
GitHubApps github_app_auth.Credentials
4143
tokenRefStrictMode bool
4244
}
4345

44-
func NewSCMConfig(scmRootCAPath string, allowedSCMProviders []string, enableSCMProviders bool, enableGitHubAPIMetrics bool, gitHubApps github_app_auth.Credentials, tokenRefStrictMode bool) SCMConfig {
46+
func NewSCMConfig(scmRootCAPath string, allowedSCMProviders []string, enableSCMProviders bool, enableGitHubAPIMetrics bool, enableGitHubCache bool, githubCacheSize int, gitHubApps github_app_auth.Credentials, tokenRefStrictMode bool) SCMConfig {
4547
return SCMConfig{
4648
scmRootCAPath: scmRootCAPath,
4749
allowedSCMProviders: allowedSCMProviders,
4850
enableSCMProviders: enableSCMProviders,
4951
enableGitHubAPIMetrics: enableGitHubAPIMetrics,
52+
enableGitHubCache: enableGitHubCache,
53+
githubCacheSize: githubCacheSize,
5054
GitHubApps: gitHubApps,
5155
tokenRefStrictMode: tokenRefStrictMode,
5256
}
@@ -289,13 +293,21 @@ func (g *SCMProviderGenerator) githubProvider(ctx context.Context, github *argop
289293
httpClient = services.NewGitHubMetricsClient(metricsCtx)
290294
}
291295

296+
if g.enableGitHubCache {
297+
if g.enableGitHubAPIMetrics {
298+
httpClient = services.NewGitHubCache(g.githubCacheSize, httpClient.Transport)
299+
} else {
300+
httpClient = services.NewGitHubCache(g.githubCacheSize, nil)
301+
}
302+
}
303+
292304
if github.AppSecretName != "" {
293305
auth, err := g.GitHubApps.GetAuthSecret(ctx, github.AppSecretName)
294306
if err != nil {
295307
return nil, fmt.Errorf("error fetching Github app secret: %w", err)
296308
}
297309

298-
if g.enableGitHubAPIMetrics {
310+
if g.enableGitHubAPIMetrics || g.enableGitHubCache {
299311
return scm_provider.NewGithubAppProviderFor(ctx, *auth, github.Organization, github.API, github.AllBranches, httpClient)
300312
}
301313
return scm_provider.NewGithubAppProviderFor(ctx, *auth, github.Organization, github.API, github.AllBranches)
@@ -306,7 +318,7 @@ func (g *SCMProviderGenerator) githubProvider(ctx context.Context, github *argop
306318
return nil, fmt.Errorf("error fetching Github token: %w", err)
307319
}
308320

309-
if g.enableGitHubAPIMetrics {
321+
if g.enableGitHubAPIMetrics || g.enableGitHubCache {
310322
return scm_provider.NewGithubProvider(github.Organization, token, github.API, github.AllBranches, httpClient)
311323
}
312324
return scm_provider.NewGithubProvider(github.Organization, token, github.API, github.AllBranches)
Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
package services
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"encoding/hex"
7+
"fmt"
8+
"io"
9+
"maps"
10+
"net/http"
11+
"net/url"
12+
"strings"
13+
"sync"
14+
15+
gh_hash_token "github.com/bored-engineer/github-conditional-http-transport"
16+
log "github.com/sirupsen/logrus"
17+
"k8s.io/utils/lru"
18+
)
19+
20+
var VaryHeaders = []string{
21+
"Accept-Encoding",
22+
"Accept",
23+
"Authorization",
24+
}
25+
26+
var ExcludedCacheHeaders = []string{
27+
"Date",
28+
"Set-Cookie",
29+
"X-GitHub-Request-ID",
30+
"X-RateLimit-Limit",
31+
"X-RateLimit-Remaining",
32+
"X-RateLimit-Reset",
33+
"X-RateLimit-Resource",
34+
"X-RateLimit-Used",
35+
}
36+
37+
type cachedResponse struct {
38+
Response *http.Response
39+
Body []byte
40+
}
41+
42+
type Storage struct {
43+
lock *sync.RWMutex
44+
lruMap *lru.Cache
45+
}
46+
47+
func (s Storage) Get(_ context.Context, u *url.URL) (*http.Response, error) {
48+
s.lock.RLock()
49+
defer s.lock.RUnlock()
50+
body, ok := s.lruMap.Get(u.String())
51+
if !ok {
52+
return nil, nil
53+
}
54+
bodyCached, valid := body.(cachedResponse)
55+
if !valid {
56+
return nil, nil
57+
}
58+
resp := *bodyCached.Response
59+
resp.Body = io.NopCloser(bytes.NewReader(bodyCached.Body))
60+
return &resp, nil
61+
}
62+
63+
func (s Storage) Put(_ context.Context, u *url.URL, resp *http.Response) error {
64+
s.lock.Lock()
65+
defer s.lock.Unlock()
66+
body, err := io.ReadAll(resp.Body)
67+
if err != nil {
68+
return fmt.Errorf("(*http.Response).Body.Read failed: %w", err)
69+
}
70+
if err := resp.Body.Close(); err != nil {
71+
return fmt.Errorf("(*http.Response).Body.Close failed: %w", err)
72+
}
73+
resp.Body = nil
74+
s.lruMap.Add(u.String(), cachedResponse{
75+
Response: resp,
76+
Body: body,
77+
})
78+
return nil
79+
}
80+
81+
func NewLRUSStorage(size int) Storage {
82+
return Storage{
83+
lock: &sync.RWMutex{},
84+
lruMap: lru.New(size),
85+
}
86+
}
87+
88+
type GitHubCacheTransport struct {
89+
parent http.RoundTripper
90+
storage Storage
91+
}
92+
93+
func cacheable(req *http.Request) bool {
94+
if req.Method != http.MethodGet && req.Method != http.MethodHead {
95+
return false
96+
}
97+
if req.Header.Get("Range") != "" {
98+
return false
99+
}
100+
if req.URL.Path == "/rate_limit" || req.URL.Path == "/api/v3/rate_limit" {
101+
return false
102+
}
103+
return true
104+
}
105+
106+
func isSameCachedHeader(req *http.Request, resp *http.Response) bool {
107+
// Check if the hashed_token and Accept headers are the same
108+
for _, header := range VaryHeaders {
109+
if header == "Authorization" {
110+
if gh_hash_token.HashToken(req.Header.Get(header)) != resp.Header.Get("X-Varied-"+header) {
111+
return false
112+
}
113+
} else {
114+
if req.Header.Get(header) != resp.Header.Get("X-Varied-"+header) {
115+
return false
116+
}
117+
}
118+
}
119+
return true
120+
}
121+
122+
func (t *GitHubCacheTransport) cacheResponse(req *http.Request, resp *http.Response) (*http.Response, error) {
123+
// We can only cache successful responses
124+
if resp.StatusCode != http.StatusOK {
125+
return resp, nil
126+
}
127+
128+
// If there was no ETag, we can't cache it
129+
if resp.Header.Get("Etag") == "" {
130+
return resp, nil
131+
}
132+
133+
// Read the response body into memory
134+
body, err := io.ReadAll(resp.Body)
135+
if err != nil {
136+
return resp, fmt.Errorf("(*http.Response).Body.Read failed: %w", err)
137+
}
138+
if err := resp.Body.Close(); err != nil {
139+
return resp, fmt.Errorf("(*http.Response).Body.Close failed: %w", err)
140+
}
141+
142+
// Make a shallow copy of the *http.Response as we're going to modify the body/headers
143+
cacheResp := *resp
144+
cacheResp.Body = io.NopCloser(bytes.NewReader(body))
145+
cacheResp.ContentLength = int64(len(body))
146+
cacheResp.Header = maps.Clone(resp.Header)
147+
148+
// Remove excluded headers from the cached response
149+
for _, header := range ExcludedCacheHeaders {
150+
cacheResp.Header.Del(header)
151+
}
152+
153+
// Similar to httpcache, inject fake X-Varied-<header> "response" headers
154+
for _, header := range VaryHeaders {
155+
if vals := req.Header.Values(header); len(vals) > 0 {
156+
if header == "Authorization" {
157+
vals = []string{gh_hash_token.HashToken(vals[0])} // Don't leak/cache the raw authentication token
158+
}
159+
cacheResp.Header["X-Varied-"+header] = vals
160+
}
161+
}
162+
163+
if err := t.storage.Put(req.Context(), req.URL, &cacheResp); err != nil {
164+
return resp, fmt.Errorf("(Storage).Put failed: %w", err)
165+
}
166+
167+
// Replace the response body with the cached body
168+
resp.Body = io.NopCloser(bytes.NewReader(body))
169+
resp.ContentLength = int64(len(body))
170+
return resp, nil
171+
}
172+
173+
func (t *GitHubCacheTransport) injectEtagHeader(req *http.Request) (resp *http.Response, err error) {
174+
// Check if we have a cached response available in the storage for this URL, else bail
175+
resp, err = t.storage.Get(req.Context(), req.URL)
176+
if err != nil {
177+
return nil, fmt.Errorf("(Storage).Get failed: %w", err)
178+
} else if resp == nil {
179+
return nil, nil
180+
}
181+
defer func() {
182+
// If we're not using the cached response, ensure we close the body
183+
// But first, read it to completion to ensure the connection can be re-used
184+
if resp == nil {
185+
_, _ = io.Copy(io.Discard, resp.Body)
186+
_ = resp.Body.Close()
187+
}
188+
}()
189+
190+
// If we're using the same header, we can directly use the cached etag
191+
if isSameCachedHeader(req, resp) {
192+
req.Header.Set("If-None-Match", resp.Header.Get("Etag"))
193+
return resp, nil
194+
}
195+
196+
// We'll have to read the cached response body into memory to calculate the ETag
197+
var buf bytes.Buffer
198+
199+
// Calculate the _expected_ ETag from the _input_ headers but the cached body
200+
h := gh_hash_token.Hash(req.Header)
201+
if _, err := io.Copy(io.MultiWriter(&buf, h), resp.Body); err != nil {
202+
return nil, fmt.Errorf("(*http.Response).Body.Read failed: %w", err)
203+
}
204+
if err := resp.Body.Close(); err != nil {
205+
return nil, fmt.Errorf("(*http.Response).Body.Close failed: %w", err)
206+
}
207+
208+
// Add the If-None-Match header to the request with that calculated ETag
209+
req.Header.Set("If-None-Match", `"`+hex.EncodeToString(h.Sum(nil))+`"`)
210+
211+
// Make the next "read" from the cached body use the bytes we just read
212+
resp.Body = io.NopCloser(&buf)
213+
resp.ContentLength = int64(buf.Len())
214+
215+
return resp, nil
216+
}
217+
218+
func (t *GitHubCacheTransport) RoundTrip(req *http.Request) (*http.Response, error) {
219+
// If the request is not cacheable, just pass it through to the parent RoundTripper
220+
if !cacheable(req) {
221+
return t.parent.RoundTrip(req)
222+
}
223+
224+
// Attempt to fetch from storage and inject the cache headers to the request
225+
cachedResp, err := t.injectEtagHeader(req)
226+
if err != nil {
227+
return nil, err
228+
}
229+
230+
// Perform the upstream request
231+
resp, err := t.parent.RoundTrip(req)
232+
if err != nil {
233+
if cachedResp != nil {
234+
cachedResp.Body.Close()
235+
}
236+
return nil, err
237+
}
238+
239+
// If the upstream response is 304 Not Modified, we can use the cached response
240+
if cachedResp != nil {
241+
if resp.StatusCode == http.StatusNotModified {
242+
// Consume the rest of the response body to ensure the connection can be re-used
243+
if _, err := io.Copy(io.Discard, resp.Body); err != nil {
244+
cachedResp.Body.Close()
245+
return nil, fmt.Errorf("(*http.Response).Body.Read failed: %w", err)
246+
}
247+
if err := resp.Body.Close(); err != nil {
248+
cachedResp.Body.Close()
249+
return nil, fmt.Errorf("(*http.Response).Body.Close failed: %w", err)
250+
}
251+
252+
// Copy in any cached headers that are not already set
253+
for key, vals := range cachedResp.Header {
254+
if strings.HasPrefix(key, "X-Varied-") {
255+
continue // Skip the X-Varied-* headers, they are "internal" to the cache
256+
}
257+
if _, ok := resp.Header[key]; !ok {
258+
resp.Header[key] = vals
259+
}
260+
}
261+
262+
// Copy the body and status from the cache
263+
resp.StatusCode = cachedResp.StatusCode
264+
resp.Status = cachedResp.Status
265+
resp.Body = cachedResp.Body
266+
resp.ContentLength = cachedResp.ContentLength
267+
268+
return resp, nil
269+
} else {
270+
// Discard the cached response body, it wasn't valid/used
271+
_, _ = io.Copy(io.Discard, cachedResp.Body)
272+
_ = cachedResp.Body.Close()
273+
}
274+
}
275+
276+
// We got a valid response, try to cache it
277+
resp, err = t.cacheResponse(req, resp)
278+
if err != nil {
279+
return nil, err
280+
}
281+
return resp, nil
282+
}
283+
284+
func NewGitHubCacheTransport(storage Storage, parent http.RoundTripper) *GitHubCacheTransport {
285+
if parent == nil {
286+
parent = http.DefaultTransport
287+
}
288+
return &GitHubCacheTransport{
289+
parent: parent,
290+
storage: storage,
291+
}
292+
}
293+
294+
func NewGitHubCache(size int, parent http.RoundTripper) *http.Client {
295+
log.Debug("Creating new GitHub in memory cache")
296+
storage := NewLRUSStorage(size)
297+
return &http.Client{
298+
Transport: NewGitHubCacheTransport(storage, parent),
299+
}
300+
}

0 commit comments

Comments
 (0)