Skip to content

Commit c9de180

Browse files
authored
Merge pull request #3381 from sonasingh46/fix_intermittent_statuscode=0
mark context canceled or exceeded as transient failures
2 parents 946fdd3 + 3e10a47 commit c9de180

File tree

3 files changed

+75
-0
lines changed

3 files changed

+75
-0
lines changed

azure/errors.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package azure
1818

1919
import (
20+
"context"
2021
"errors"
2122
"fmt"
2223
"time"
@@ -153,3 +154,12 @@ func IsOperationNotDoneError(target error) bool {
153154
}
154155
return errors.As(target, &OperationNotDoneError{})
155156
}
157+
158+
// IsContextDeadlineExceededOrCanceledError checks if it's a context deadline
159+
// exceeded or canceled error.
160+
func IsContextDeadlineExceededOrCanceledError(err error) bool {
161+
if err == nil {
162+
return false
163+
}
164+
return errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled)
165+
}

azure/errors_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package azure
2+
3+
import (
4+
"context"
5+
"testing"
6+
"time"
7+
8+
"github.com/pkg/errors"
9+
)
10+
11+
func TestIsContextDeadlineExceededOrCanceled(t *testing.T) {
12+
tests := []struct {
13+
name string
14+
want bool
15+
err error
16+
}{
17+
{
18+
name: "Context deadline exceeded error",
19+
err: func() error {
20+
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(-7*time.Hour))
21+
defer cancel()
22+
return ctx.Err()
23+
}(),
24+
want: true,
25+
},
26+
{
27+
name: "Context canceled exceeded error",
28+
err: func() error {
29+
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(1*time.Hour))
30+
cancel()
31+
return ctx.Err()
32+
}(),
33+
want: true,
34+
},
35+
{
36+
name: "Nil error",
37+
err: nil,
38+
want: false,
39+
},
40+
{
41+
name: "Error other than context deadline exceeded or canceled error",
42+
err: errors.New("dummy error"),
43+
want: false,
44+
},
45+
}
46+
for _, tt := range tests {
47+
t.Run(tt.name, func(t *testing.T) {
48+
if got := IsContextDeadlineExceededOrCanceledError(tt.err); got != tt.want {
49+
t.Errorf("IsContextDeadlineExceededOrCanceled() = %v, want %v", got, tt.want)
50+
}
51+
})
52+
}
53+
}

azure/services/async/async.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ func (s *Service) CreateOrUpdateResource(ctx context.Context, spec azure.Resourc
140140
log.V(2).Info(fmt.Sprintf("%sing resource", logMessageVerbPrefix), "service", serviceName, "resource", resourceName, "resourceGroup", rgName)
141141
result, sdkFuture, err := s.Creator.CreateOrUpdateAsync(ctx, spec, parameters)
142142
errWrapped := errors.Wrapf(err, fmt.Sprintf("failed to %se resource %s/%s (service: %s)", logMessageVerbPrefix, rgName, resourceName, serviceName))
143+
143144
if sdkFuture != nil {
144145
future, err := converters.SDKToFuture(sdkFuture, infrav1.PutFuture, serviceName, resourceName, rgName)
145146
if err != nil {
@@ -148,6 +149,11 @@ func (s *Service) CreateOrUpdateResource(ctx context.Context, spec azure.Resourc
148149
s.Scope.SetLongRunningOperationState(future)
149150
return nil, azure.WithTransientError(azure.NewOperationNotDoneError(future), getRequeueAfterFromFuture(sdkFuture))
150151
} else if err != nil {
152+
// If it is an intermittent failure with context deadline exceeded or canceled as the reconciler could not complete
153+
// in the max amount of time, mark it as a transient error and return.
154+
if azure.IsContextDeadlineExceededOrCanceledError(ctx.Err()) {
155+
return nil, azure.WithTransientError(errWrapped, getRetryAfterFromError(err))
156+
}
151157
return nil, errWrapped
152158
}
153159

@@ -174,6 +180,7 @@ func (s *Service) DeleteResource(ctx context.Context, spec azure.ResourceSpecGet
174180
// No long running operation is active, so delete the resource.
175181
log.V(2).Info("deleting resource", "service", serviceName, "resource", resourceName, "resourceGroup", rgName)
176182
sdkFuture, err := s.Deleter.DeleteAsync(ctx, spec)
183+
177184
if sdkFuture != nil {
178185
future, err := converters.SDKToFuture(sdkFuture, infrav1.DeleteFuture, serviceName, resourceName, rgName)
179186
if err != nil {
@@ -186,6 +193,11 @@ func (s *Service) DeleteResource(ctx context.Context, spec azure.ResourceSpecGet
186193
// already deleted
187194
return nil
188195
}
196+
// If it is an intermittent failure with context deadline exceeded or canceled as the reconciler could not complete
197+
// in the max amount of time, mark it as a transient error and return.
198+
if azure.IsContextDeadlineExceededOrCanceledError(ctx.Err()) {
199+
return azure.WithTransientError(err, getRetryAfterFromError(err))
200+
}
189201
return errors.Wrapf(err, "failed to delete resource %s/%s (service: %s)", rgName, resourceName, serviceName)
190202
}
191203

0 commit comments

Comments
 (0)