Skip to content

Commit 360e72a

Browse files
authored
Add a retry mechanism for ReadOne calls in rm.createResource (#81)
Issue #, if available: Description of changes: In some rare cases and with some specific AWS APIs, calling `ReadOne` right after a `rm.Create` can return a `NotFound` error. We want to retry calling `rm.ReadOne` in hopes of receiving a correct response. This patch adds a backoff/retry mechanism around `ReadOne` call in `rm.createResource`. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent 594be6a commit 360e72a

File tree

6 files changed

+149
-1
lines changed

6 files changed

+149
-1
lines changed

ATTRIBUTION.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ License version 2.0, we include the full text of the package's License below.
3838
* `k8s.io/client-go`
3939
* `sigs.k8s.io/controller-runtime`
4040
* `sigs.k8s.io/controller-tools`
41+
* `github.com/cenkalti/backoff`
4142

4243
### github.com/aws/aws-sdk-go
4344

@@ -1388,3 +1389,29 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
13881389
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
13891390
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
13901391
SOFTWARE.
1392+
1393+
# github.com/cenkalti/backoff
1394+
1395+
The MIT License (MIT)
1396+
1397+
Copyright (c) 2014 Cenk Altı
1398+
1399+
Permission is hereby granted, free of charge, to any person obtaining a copy of
1400+
this software and associated documentation files (the "Software"), to deal in
1401+
the Software without restriction, including without limitation the rights to
1402+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
1403+
the Software, and to permit persons to whom the Software is furnished to do so,
1404+
subject to the following conditions:
1405+
1406+
The above copyright notice and this permission notice shall be included in all
1407+
copies or substantial portions of the Software.
1408+
1409+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1410+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
1411+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
1412+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
1413+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1414+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1415+
1416+
1417+
Subdependencies: N/A

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ go 1.17
44

55
require (
66
github.com/aws/aws-sdk-go v1.42.0
7+
github.com/cenkalti/backoff/v4 v4.1.2
78
github.com/go-logr/logr v1.2.0
89
github.com/google/go-cmp v0.5.5
910
github.com/itchyny/gojq v0.12.6

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB
7777
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
7878
github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM=
7979
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
80+
github.com/cenkalti/backoff/v4 v4.1.2 h1:6Yo7N8UP2K6LWZnW94DLVSSrbobcWdVzAYOisuDPIFo=
81+
github.com/cenkalti/backoff/v4 v4.1.2/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=
8082
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
8183
github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=
8284
github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=

pkg/errors/error.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ var (
5858
// SecretNotFound is returned if specified kubernetes secret is not found.
5959
SecretNotFound = fmt.Errorf(
6060
"kubernetes secret not found")
61+
// ReadOneFailedAfterCreate is returned if a ReadOne call fails right after
62+
// a create operation.
63+
ReadOneFailedAfterCreate = fmt.Errorf("ReadOne call failed after a Create operation")
6164
)
6265

6366
// AWSError returns the type conversion for the supplied error to an aws-sdk-go
@@ -74,6 +77,12 @@ func AWSRequestFailure(err error) (awserr.RequestFailure, bool) {
7477
return awsRF, ok
7578
}
7679

80+
// NewReadOneFailAfterCreate takes a number of attempts and returns a
81+
// ReadOneFailedAfterCreate error if multiple ReadOne calls fails.
82+
func NewReadOneFailAfterCreate(numAttempts int) error {
83+
return fmt.Errorf("%w: number of attempts: %d", ReadOneFailedAfterCreate, numAttempts)
84+
}
85+
7786
// HTTPStatusCode returns the HTTP status code from the supplied error by
7887
// introspecting the error to see if it's an awserr.RequestFailure interface
7988
// and if so, calling StatusCode() on that type-converted RequestFailure. If

pkg/runtime/reconciler.go

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ package runtime
1515

1616
import (
1717
"context"
18+
"fmt"
1819
"time"
1920

21+
backoff "github.com/cenkalti/backoff/v4"
2022
"github.com/go-logr/logr"
2123
"github.com/pkg/errors"
2224
corev1 "k8s.io/api/core/v1"
@@ -39,6 +41,10 @@ import (
3941
acktypes "github.com/aws-controllers-k8s/runtime/pkg/types"
4042
)
4143

44+
const (
45+
backoffReadOneTimeout = 10 * time.Second
46+
)
47+
4248
// reconciler describes a generic reconciler within ACK.
4349
type reconciler struct {
4450
sc acktypes.ServiceController
@@ -380,7 +386,21 @@ func (r *resourceReconciler) createResource(
380386
observed, err := rm.ReadOne(ctx, latest)
381387
rlog.Exit("rm.ReadOne", err)
382388
if err != nil {
383-
return latest, err
389+
if err == ackerr.NotFound {
390+
// Some eventually-consistent APIs return a 404 from a
391+
// ReadOne operation immediately after a successful
392+
// Create operation. In these exceptional cases
393+
// we retry the ReadOne operation with a backoff
394+
// until we get the expected 200 from the ReadOne.
395+
rlog.Enter("rm.delayedReadOneAfterCreate")
396+
observed, err = r.delayedReadOneAfterCreate(ctx, rm, latest)
397+
rlog.Exit("rm.delayedReadOneAfterCreate", err)
398+
if err != nil {
399+
return latest, err
400+
}
401+
} else {
402+
return latest, err
403+
}
384404
}
385405

386406
// Take the status from the latest ReadOne
@@ -397,6 +417,43 @@ func (r *resourceReconciler) createResource(
397417
return latest, nil
398418
}
399419

420+
// delayedReadOneAfterCreate is a helper function called when a ReadOne call
421+
// fails with a 404 error right after a Create call. It uses a backoff/retry
422+
// mechanism to retrieve the observed state right after a readone call.
423+
func (r *resourceReconciler) delayedReadOneAfterCreate(
424+
ctx context.Context,
425+
rm acktypes.AWSResourceManager,
426+
res acktypes.AWSResource,
427+
) (acktypes.AWSResource, error) {
428+
var err error
429+
rlog := ackrtlog.FromContext(ctx)
430+
exit := rlog.Trace("r.delayedReadOneAfterCreate")
431+
defer exit(err)
432+
433+
bo := backoff.NewExponentialBackOff()
434+
bo.MaxElapsedTime = backoffReadOneTimeout
435+
ticker := backoff.NewTicker(bo)
436+
attempts := 0
437+
438+
var observed acktypes.AWSResource
439+
440+
for range ticker.C {
441+
attempts++
442+
443+
rlog.Enter(fmt.Sprintf("rm.ReadOne (attempt %d)", attempts))
444+
observed, err = rm.ReadOne(ctx, res)
445+
rlog.Exit(fmt.Sprintf("rm.ReadOne (attempt %d)", attempts), err)
446+
if err == nil || err != ackerr.NotFound {
447+
ticker.Stop()
448+
break
449+
}
450+
}
451+
if err != nil {
452+
return res, ackerr.NewReadOneFailAfterCreate(attempts)
453+
}
454+
return observed, nil
455+
}
456+
400457
// updateResource calls one or more AWS APIs to modify the backend AWS resource
401458
// and patches the CR's Metadata and Spec back to the Kubernetes API.
402459
//

pkg/runtime/reconciler_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,58 @@ func managerFactoryMocks(
139139
return rmf, rd
140140
}
141141

142+
func TestReconcilerCreate_BackoffRetries(t *testing.T) {
143+
require := require.New(t)
144+
145+
ctx := context.TODO()
146+
arn := ackv1alpha1.AWSResourceName("mybook-arn")
147+
148+
desired, _, _ := resourceMocks()
149+
desired.On("ReplaceConditions", []*ackv1alpha1.Condition{}).Return()
150+
151+
ids := &ackmocks.AWSResourceIdentifiers{}
152+
ids.On("ARN").Return(&arn)
153+
154+
latest, latestRTObj, _ := resourceMocks()
155+
latest.On("Identifiers").Return(ids)
156+
157+
latest.On("Conditions").Return([]*ackv1alpha1.Condition{})
158+
latest.On(
159+
"ReplaceConditions",
160+
mock.AnythingOfType("[]*v1alpha1.Condition"),
161+
).Return()
162+
163+
rm := &ackmocks.AWSResourceManager{}
164+
rm.On("ResolveReferences", ctx, nil, desired).Return(
165+
desired, nil,
166+
).Times(2)
167+
rm.On("ReadOne", ctx, desired).Return(
168+
latest, ackerr.NotFound,
169+
).Once()
170+
rm.On("ReadOne", ctx, latest).Return(
171+
latest, ackerr.NotFound,
172+
).Times(4)
173+
rm.On("ReadOne", ctx, latest).Return(
174+
latest, nil,
175+
)
176+
rm.On("Create", ctx, desired).Return(
177+
latest, nil,
178+
)
179+
rm.On("IsSynced", ctx, latest).Return(true, nil)
180+
rmf, rd := managedResourceManagerFactoryMocks(desired, latest)
181+
182+
rm.On("LateInitialize", ctx, latest).Return(latest, nil)
183+
rd.On("IsManaged", desired).Return(true)
184+
rd.On("Delta", desired, latest).Return(ackcompare.NewDelta())
185+
rd.On("Delta", latest, latest).Return(ackcompare.NewDelta())
186+
187+
r, kc := reconcilerMocks(rmf)
188+
kc.On("Patch", ctx, latestRTObj, mock.AnythingOfType("*client.mergeFromPatch")).Return(nil)
189+
_, err := r.Sync(ctx, rm, desired)
190+
require.Nil(err)
191+
rm.AssertNumberOfCalls(t, "ReadOne", 6)
192+
}
193+
142194
func TestReconcilerCreate_UnManagedResource_CheckReferencesResolveTwice(t *testing.T) {
143195
require := require.New(t)
144196

0 commit comments

Comments
 (0)