Skip to content

Commit 0da9473

Browse files
authored
Merge pull request #2688 from jackfrancis/retry-after-create-resource-get
detect Retry-After during async “does resource exist?” flow
2 parents 3203466 + bcee198 commit 0da9473

File tree

3 files changed

+125
-6
lines changed

3 files changed

+125
-6
lines changed

azure/services/async/async.go

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@ package async
1818

1919
import (
2020
"context"
21+
"net/http"
22+
"strconv"
2123
"time"
2224

25+
"github.com/Azure/go-autorest/autorest"
2326
azureautorest "github.com/Azure/go-autorest/autorest/azure"
2427
"github.com/pkg/errors"
2528
infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
@@ -76,7 +79,7 @@ func processOngoingOperation(ctx context.Context, scope FutureScope, client Futu
7679

7780
// Operation is still in progress, update conditions and requeue.
7881
log.V(2).Info("long running operation is still ongoing", "service", serviceName, "resource", resourceName)
79-
return nil, azure.WithTransientError(azure.NewOperationNotDoneError(future), retryAfter(sdkFuture))
82+
return nil, azure.WithTransientError(azure.NewOperationNotDoneError(future), getRequeueAfterFromFuture(sdkFuture))
8083
}
8184
if err != nil {
8285
log.V(2).Error(err, "error checking long running operation status after it finished")
@@ -111,7 +114,8 @@ func (s *Service) CreateResource(ctx context.Context, spec azure.ResourceSpecGet
111114
// Get the resource if it already exists, and use it to construct the desired resource parameters.
112115
var existingResource interface{}
113116
if existing, err := s.Creator.Get(ctx, spec); err != nil && !azure.ResourceNotFound(err) {
114-
return nil, errors.Wrapf(err, "failed to get existing resource %s/%s (service: %s)", rgName, resourceName, serviceName)
117+
errWrapped := errors.Wrapf(err, "failed to get existing resource %s/%s (service: %s)", rgName, resourceName, serviceName)
118+
return nil, azure.WithTransientError(errWrapped, getRetryAfterFromError(err))
115119
} else if err == nil {
116120
existingResource = existing
117121
log.V(2).Info("successfully got existing resource", "service", serviceName, "resource", resourceName, "resourceGroup", rgName)
@@ -136,7 +140,7 @@ func (s *Service) CreateResource(ctx context.Context, spec azure.ResourceSpecGet
136140
return nil, errors.Wrapf(err, "failed to create resource %s/%s (service: %s)", rgName, resourceName, serviceName)
137141
}
138142
s.Scope.SetLongRunningOperationState(future)
139-
return nil, azure.WithTransientError(azure.NewOperationNotDoneError(future), retryAfter(sdkFuture))
143+
return nil, azure.WithTransientError(azure.NewOperationNotDoneError(future), getRequeueAfterFromFuture(sdkFuture))
140144
} else if err != nil {
141145
return nil, errors.Wrapf(err, "failed to create resource %s/%s (service: %s)", rgName, resourceName, serviceName)
142146
}
@@ -170,7 +174,7 @@ func (s *Service) DeleteResource(ctx context.Context, spec azure.ResourceSpecGet
170174
return errors.Wrapf(err, "failed to delete resource %s/%s (service: %s)", rgName, resourceName, serviceName)
171175
}
172176
s.Scope.SetLongRunningOperationState(future)
173-
return azure.WithTransientError(azure.NewOperationNotDoneError(future), retryAfter(sdkFuture))
177+
return azure.WithTransientError(azure.NewOperationNotDoneError(future), getRequeueAfterFromFuture(sdkFuture))
174178
} else if err != nil {
175179
if azure.ResourceNotFound(err) {
176180
// already deleted
@@ -183,12 +187,40 @@ func (s *Service) DeleteResource(ctx context.Context, spec azure.ResourceSpecGet
183187
return nil
184188
}
185189

186-
// retryAfter returns the max between the `RETRY-AFTER` header and the default requeue time.
190+
// getRequeueAfterFromFuture returns the max between the `RETRY-AFTER` header and the default requeue time.
187191
// This ensures we respect the retry-after header if it is set and avoid retrying too often during an API throttling event.
188-
func retryAfter(sdkFuture azureautorest.FutureAPI) time.Duration {
192+
func getRequeueAfterFromFuture(sdkFuture azureautorest.FutureAPI) time.Duration {
189193
retryAfter, _ := sdkFuture.GetPollingDelay()
190194
if retryAfter < reconciler.DefaultReconcilerRequeue {
191195
retryAfter = reconciler.DefaultReconcilerRequeue
192196
}
193197
return retryAfter
194198
}
199+
200+
// getRetryAfterFromError returns the time.Duration from the http.Response in the autorest.DetailedError.
201+
// If there is no Response object, or if there is no meaningful Retry-After header data, we return a default.
202+
func getRetryAfterFromError(err error) time.Duration {
203+
// In case we aren't able to introspect Retry-After from the error type, we'll return this default
204+
ret := reconciler.DefaultReconcilerRequeue
205+
var detailedError autorest.DetailedError
206+
// if we have a strongly typed autorest.DetailedError then we can introspect the HTTP response data
207+
if errors.As(err, &detailedError) {
208+
if detailedError.Response != nil {
209+
// If we have Retry-After HTTP header data for any reason, prefer it
210+
if retryAfter := detailedError.Response.Header.Get("Retry-After"); retryAfter != "" {
211+
// This handles the case where Retry-After data is in the form of units of seconds
212+
if rai, err := strconv.Atoi(retryAfter); err == nil {
213+
ret = time.Duration(rai) * time.Second
214+
// This handles the case where Retry-After data is in the form of absolute time
215+
} else if t, err := time.Parse(time.RFC1123, retryAfter); err == nil {
216+
ret = time.Until(t)
217+
}
218+
// If we didn't find Retry-After HTTP header data but the response type is 429,
219+
// we'll have to come up with our sane default.
220+
} else if detailedError.Response.StatusCode == http.StatusTooManyRequests {
221+
ret = reconciler.DefaultHTTP429RetryAfter
222+
}
223+
}
224+
}
225+
return ret
226+
}

azure/services/async/async_test.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"errors"
2222
"net/http"
2323
"testing"
24+
"time"
2425

2526
"github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2019-05-01/resources"
2627
"github.com/Azure/go-autorest/autorest"
@@ -31,6 +32,7 @@ import (
3132
"sigs.k8s.io/cluster-api-provider-azure/azure/mock_azure"
3233
"sigs.k8s.io/cluster-api-provider-azure/azure/services/async/mock_async"
3334
gomockinternal "sigs.k8s.io/cluster-api-provider-azure/internal/test/matchers/gomock"
35+
"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
3436
)
3537

3638
var (
@@ -434,3 +436,86 @@ func TestDeleteResource(t *testing.T) {
434436
})
435437
}
436438
}
439+
440+
func TestGetRetryAfterFromError(t *testing.T) {
441+
cases := []struct {
442+
name string
443+
input error
444+
expected time.Duration
445+
expectedRangeTolerance time.Duration
446+
}{
447+
{
448+
name: "Retry-After header data present in the form of units of seconds",
449+
input: autorest.DetailedError{
450+
Response: &http.Response{
451+
Header: http.Header{
452+
"Retry-After": []string{"2"},
453+
},
454+
},
455+
},
456+
expected: 2 * time.Second,
457+
},
458+
{
459+
name: "Retry-After header data present in the form of units of absolute time",
460+
input: autorest.DetailedError{
461+
Response: &http.Response{
462+
Header: http.Header{
463+
"Retry-After": []string{time.Now().Add(1 * time.Hour).Format(time.RFC1123)},
464+
},
465+
},
466+
},
467+
expected: 1 * time.Hour,
468+
expectedRangeTolerance: 5 * time.Second,
469+
},
470+
{
471+
name: "Retry-After header data not present",
472+
input: autorest.DetailedError{
473+
Response: &http.Response{
474+
Header: http.Header{
475+
"foo": []string{"bar"},
476+
},
477+
},
478+
},
479+
expected: reconciler.DefaultReconcilerRequeue,
480+
},
481+
{
482+
name: "Retry-After header data not present in HTTP 429",
483+
input: autorest.DetailedError{
484+
Response: &http.Response{
485+
StatusCode: http.StatusTooManyRequests,
486+
Header: http.Header{
487+
"foo": []string{"bar"},
488+
},
489+
},
490+
},
491+
expected: reconciler.DefaultHTTP429RetryAfter,
492+
},
493+
{
494+
name: "nil http.Response",
495+
input: autorest.DetailedError{
496+
Response: nil,
497+
},
498+
expected: reconciler.DefaultReconcilerRequeue,
499+
},
500+
{
501+
name: "error type is not autorest.DetailedError",
502+
input: errors.New("error"),
503+
expected: reconciler.DefaultReconcilerRequeue,
504+
},
505+
}
506+
507+
for _, c := range cases {
508+
c := c
509+
t.Run(c.name, func(t *testing.T) {
510+
t.Parallel()
511+
g := NewWithT(t)
512+
ret := getRetryAfterFromError(c.input)
513+
if c.expectedRangeTolerance > 0 {
514+
g.Expect(ret).To(BeNumerically("<", c.expected))
515+
g.Expect(ret + c.expectedRangeTolerance).To(BeNumerically(">", c.expected))
516+
} else {
517+
g.Expect(ret).To(Equal(c.expected))
518+
}
519+
})
520+
}
521+
}

util/reconciler/defaults.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ const (
3131
DefaultAzureCallTimeout = 2 * time.Second
3232
// DefaultReconcilerRequeue is the default value for the reconcile retry.
3333
DefaultReconcilerRequeue = 15 * time.Second
34+
// DefaultHTTP429RetryAfter is a default backoff wait time when we get a HTTP 429 response with no Retry-After data.
35+
DefaultHTTP429RetryAfter = 1 * time.Minute
3436
)
3537

3638
// DefaultedLoopTimeout will default the timeout if it is zero-valued.

0 commit comments

Comments
 (0)