Skip to content

Commit 12e1d94

Browse files
modular-magicianScottSuarez
authored andcommitted
Add retry for internal 160009 errors (#8017) (#5685)
Signed-off-by: Modular Magician <[email protected]>
1 parent 44d9044 commit 12e1d94

File tree

4 files changed

+84
-42
lines changed

4 files changed

+84
-42
lines changed

.changelog/8017.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
```release-note:bug
2+
serviceusage: added retries to handle internal error: type: "googleapis.com" subject: "160009" when activating services
3+
```
4+
```release-note:bug
5+
cloudresourcemanager: added retries to handle internal error: type: "googleapis.com" subject: "160009" when activating "compute.googleapis.com" to destroy the default network when `auto_create_network` is `false`
6+
```

google-beta/resource_google_project.go

Lines changed: 64 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -629,41 +629,79 @@ func EnableServiceUsageProjectServices(services []string, project, billingProjec
629629

630630
func doEnableServicesRequest(services []string, project, billingProject, userAgent string, config *transport_tpg.Config, timeout time.Duration) error {
631631
var op *serviceusage.Operation
632-
var call ServicesCall
633-
err := transport_tpg.RetryTimeDuration(func() error {
634-
var rerr error
635-
if len(services) == 1 {
636-
// BatchEnable returns an error for a single item, so just enable
637-
// using service endpoint.
638-
name := fmt.Sprintf("projects/%s/services/%s", project, services[0])
639-
req := &serviceusage.EnableServiceRequest{}
640-
call = config.NewServiceUsageClient(userAgent).Services.Enable(name, req)
641-
} else {
642-
// Batch enable for multiple services.
643-
name := fmt.Sprintf("projects/%s", project)
644-
req := &serviceusage.BatchEnableServicesRequest{ServiceIds: services}
645-
call = config.NewServiceUsageClient(userAgent).Services.BatchEnable(name, req)
632+
633+
// errors can come up at multiple points, so there are a few levels of
634+
// retrying here.
635+
// logicalErr / waitErr: overall error on the logical operation (enabling services)
636+
// but possibly also errors when retrieving the LRO (these are rare)
637+
// err / reqErr: precondition errors when sending the request received instead of an LRO
638+
logicalErr := transport_tpg.RetryTimeDuration(func() error {
639+
err := transport_tpg.RetryTimeDuration(func() error {
640+
var reqErr error
641+
var call ServicesCall
642+
if len(services) == 1 {
643+
// BatchEnable returns an error for a single item, so enable with single endpoint
644+
name := fmt.Sprintf("projects/%s/services/%s", project, services[0])
645+
req := &serviceusage.EnableServiceRequest{}
646+
call = config.NewServiceUsageClient(userAgent).Services.Enable(name, req)
647+
} else {
648+
// Batch enable for multiple services.
649+
name := fmt.Sprintf("projects/%s", project)
650+
req := &serviceusage.BatchEnableServicesRequest{ServiceIds: services}
651+
call = config.NewServiceUsageClient(userAgent).Services.BatchEnable(name, req)
652+
}
653+
654+
if config.UserProjectOverride && billingProject != "" {
655+
call.Header().Add("X-Goog-User-Project", billingProject)
656+
}
657+
658+
op, reqErr = call.Do()
659+
return handleServiceUsageRetryablePreconditionError(reqErr)
660+
},
661+
timeout,
662+
transport_tpg.ServiceUsageServiceBeingActivated,
663+
)
664+
if err != nil {
665+
return errwrap.Wrapf("failed on request preconditions: {{err}}", err)
646666
}
647-
if config.UserProjectOverride && billingProject != "" {
648-
call.Header().Add("X-Goog-User-Project", billingProject)
667+
668+
waitErr := serviceUsageOperationWait(config, op, billingProject, fmt.Sprintf("Enable Project %q Services: %+v", project, services), userAgent, timeout)
669+
if waitErr != nil {
670+
return waitErr
649671
}
650-
op, rerr = call.Do()
651-
return handleServiceUsageRetryableError(rerr)
672+
673+
return nil
652674
},
653675
timeout,
654-
transport_tpg.ServiceUsageServiceBeingActivated,
676+
transport_tpg.ServiceUsageInternalError160009,
655677
)
656-
if err != nil {
657-
return errwrap.Wrapf("failed to send enable services request: {{err}}", err)
658-
}
659-
// Poll for the API to return
660-
waitErr := serviceUsageOperationWait(config, op, billingProject, fmt.Sprintf("Enable Project %q Services: %+v", project, services), userAgent, timeout)
661-
if waitErr != nil {
662-
return waitErr
678+
679+
if logicalErr != nil {
680+
return errwrap.Wrapf("failed to enable services: {{err}}", logicalErr)
663681
}
682+
664683
return nil
665684
}
666685

686+
// Handle errors that are retryable at call time for serviceusage
687+
// Specifically, errors in https://cloud.google.com/service-usage/docs/reference/rest/v1/services/batchEnable#response-body
688+
// Errors in operations are handled separately.
689+
// NOTE(rileykarson): This should probably be turned into a retry predicate
690+
func handleServiceUsageRetryablePreconditionError(err error) error {
691+
if err == nil {
692+
return nil
693+
}
694+
if gerr, ok := err.(*googleapi.Error); ok {
695+
if (gerr.Code == 400 || gerr.Code == 412) && gerr.Message == "Precondition check failed." {
696+
return &googleapi.Error{
697+
Code: 503,
698+
Message: "api returned \"precondition failed\" while enabling service",
699+
}
700+
}
701+
}
702+
return err
703+
}
704+
667705
// Retrieve a project's services from the API
668706
// if a service has been renamed, this function will list both the old and new
669707
// forms of the service. LIST responses are expected to return only the old or

google-beta/serviceusage_operation.go

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"time"
66

77
transport_tpg "github.com/hashicorp/terraform-provider-google-beta/google-beta/transport"
8-
"google.golang.org/api/googleapi"
98
"google.golang.org/api/serviceusage/v1"
109
)
1110

@@ -22,18 +21,3 @@ func serviceUsageOperationWait(config *transport_tpg.Config, op *serviceusage.Op
2221
}
2322
return ServiceUsageOperationWaitTime(config, m, project, activity, userAgent, timeout)
2423
}
25-
26-
func handleServiceUsageRetryableError(err error) error {
27-
if err == nil {
28-
return nil
29-
}
30-
if gerr, ok := err.(*googleapi.Error); ok {
31-
if (gerr.Code == 400 || gerr.Code == 412) && gerr.Message == "Precondition check failed." {
32-
return &googleapi.Error{
33-
Code: 503,
34-
Message: "api returned \"precondition failed\" while enabling service",
35-
}
36-
}
37-
}
38-
return err
39-
}

google-beta/transport/error_retry_predicates.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,20 @@ func ServiceUsageServiceBeingActivated(err error) (bool, string) {
234234
return false, ""
235235
}
236236

237+
// See https://github.com/hashicorp/terraform-provider-google/issues/14691 for
238+
// details on the error message this handles
239+
// This is a post-operation error so it uses tpgresource.CommonOpError instead of googleapi.Error
240+
func ServiceUsageInternalError160009(err error) (bool, string) {
241+
// a cyclical dependency between transport/tpgresource blocks using tpgresource.CommonOpError
242+
// so just work off the error string. Ideally, we'd use that type instead.
243+
s := err.Error()
244+
if strings.Contains(s, "encountered internal error") && strings.Contains(s, "160009") && strings.Contains(s, "with failed services") {
245+
return true, "retrying internal error 160009."
246+
}
247+
248+
return false, ""
249+
}
250+
237251
// Retry if Bigquery operation returns a 403 with a specific message for
238252
// concurrent operations (which are implemented in terms of 'edit quota').
239253
func IsBigqueryIAMQuotaError(err error) (bool, string) {

0 commit comments

Comments
 (0)