Skip to content

Commit bb72bc3

Browse files
committed
Implement retries on 503s
This caps total attempts of any given request to at most 3x 503 responses (in other words, the third 503 will "bubble up" as-is). The intent there is that if there *is* a serious issue with Hub, we don't want to continue retrying aggressively as that would likely contribute to the outage persisting longer.
1 parent d1d547b commit bb72bc3

File tree

1 file changed

+39
-26
lines changed

1 file changed

+39
-26
lines changed

registry/rate-limits.go

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,15 @@ type rateLimitedRetryingRoundTripper struct {
2020
}
2121

2222
func (d *rateLimitedRetryingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
23-
requestRetryLimiter := rate.NewLimiter(rate.Every(time.Second), 1) // cap request retries at once per second
24-
firstTry := true
25-
ctx := req.Context()
23+
var (
24+
// cap request retries at once per second
25+
requestRetryLimiter = rate.NewLimiter(rate.Every(time.Second), 1)
26+
27+
// if we see 3x 503 during retry, we should bail
28+
maxTry503 = 3
29+
30+
ctx = req.Context()
31+
)
2632
for {
2733
if err := requestRetryLimiter.Wait(ctx); err != nil {
2834
return nil, err
@@ -31,44 +37,51 @@ func (d *rateLimitedRetryingRoundTripper) RoundTrip(req *http.Request) (*http.Re
3137
return nil, err
3238
}
3339

34-
if !firstTry {
35-
// https://pkg.go.dev/net/http#RoundTripper
36-
// "RoundTrip should not modify the request, except for consuming and closing the Request's Body."
37-
if req.Body != nil {
38-
req.Body.Close()
39-
}
40-
req = req.Clone(ctx)
41-
if req.GetBody != nil {
42-
var err error
43-
req.Body, err = req.GetBody()
44-
if err != nil {
45-
return nil, err
46-
}
47-
}
48-
}
49-
firstTry = false
50-
5140
// in theory, this RoundTripper we're invoking should close req.Body (per the RoundTripper contract), so we shouldn't have to 🤞
5241
res, err := d.roundTripper.RoundTrip(req)
5342
if err != nil {
5443
return nil, err
5544
}
5645

57-
// TODO 503 should probably result in at least one or two auto-retries (especially with the automatic retry delay this injects)
46+
doRetry := false
47+
5848
if res.StatusCode == 429 {
49+
// just eat all available tokens and starve out the rate limiter (any 429 means we need to slow down, so our whole "bucket" is shot)
50+
for i := d.limiter.Tokens(); i > 0; i-- {
51+
_ = d.limiter.Allow()
52+
}
53+
doRetry = true // TODO maximum number of retries? (perhaps a deadline instead? req.WithContext to inject a deadline? 👀)
54+
}
55+
56+
// 503 should result in a few auto-retries (especially with the automatic retry delay this injects), but up to a limit so we don't contribute to the "thundering herd" too much in a serious outage
57+
if res.StatusCode == 503 && maxTry503 > 1 {
58+
maxTry503--
59+
doRetry = true
60+
// no need to eat up the rate limiter tokens as we do for 429 because this is not a rate limiting error (and we have the "requestRetryLimiter" that separately limits our retries of *this* request)
61+
}
62+
63+
if doRetry {
5964
// satisfy the big scary warnings on https://pkg.go.dev/net/http#RoundTripper and https://pkg.go.dev/net/http#Client.Do about the downsides of failing to Close the response body
6065
if err := res.Body.Close(); err != nil {
6166
return nil, err
6267
}
6368

64-
// just eat all available tokens and starve out the rate limiter (any 429 means we need to slow down, so our whole "bucket" is shot)
65-
for i := d.limiter.Tokens(); i > 0; i-- {
66-
_ = d.limiter.Allow()
69+
// https://pkg.go.dev/net/http#RoundTripper
70+
// "RoundTrip should not modify the request, except for consuming and closing the Request's Body."
71+
if req.Body != nil {
72+
req.Body.Close()
73+
}
74+
req = req.Clone(ctx)
75+
if req.GetBody != nil {
76+
var err error
77+
req.Body, err = req.GetBody()
78+
if err != nil {
79+
return nil, err
80+
}
6781
}
6882

6983
// TODO some way to notify upwards that we retried?
70-
// TODO maximum number of retries? (perhaps a deadline instead? req.WithContext to inject a deadline? 👀)
71-
// TODO implement more backoff logic than just one retry per second + docker hub rate limit?
84+
// TODO implement more backoff logic than just one retry per second + docker hub rate limit (+ limited 503 retry)?
7285
continue
7386
}
7487

0 commit comments

Comments
 (0)