Skip to content

Commit 9ef95ea

Browse files
Retry GCE's (new?) 403 ReadRequests errors (#5723) (#4064)
Signed-off-by: Modular Magician <[email protected]>
1 parent 51b09fb commit 9ef95ea

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

.changelog/5723.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:enhancement
2+
provider: added retries for `ReadRequest` errors incorrectly coded as `403` errors, particularly in Google Compute Engine
3+
```

google-beta/error_retry_predicates.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,20 @@ var defaultErrorRetryPredicates = []RetryErrorPredicateFunc{
3232
// Keeping it as a default for now.
3333
is409OperationInProgressError,
3434

35+
// GCE Error codes- we don't have a way to add these to all GCE resources
36+
// easily, so add them globally.
37+
3538
// GCE Subnetworks are considered unready for a brief period when certain
3639
// operations are performed on them, and the scope is likely too broad to
3740
// apply a mutex. If we attempt an operation w/ an unready subnetwork, retry
3841
// it.
3942
isSubnetworkUnreadyError,
43+
44+
// As of February 2022 GCE seems to have added extra quota enforcement on
45+
// reads, causing significant failure for our CI and for large customers.
46+
// GCE returns the wrong error code, as this should be a 429, which we retry
47+
// already.
48+
is403ReadRequestsForMinuteError,
4049
}
4150

4251
/** END GLOBAL ERROR RETRY PREDICATES HERE **/
@@ -116,6 +125,21 @@ func isSubnetworkUnreadyError(err error) (bool, string) {
116125
return false, ""
117126
}
118127

128+
// GCE (and possibly other APIs) incorrectly return a 403 rather than a 429 on
129+
// rate limits.
130+
func is403ReadRequestsForMinuteError(err error) (bool, string) {
131+
gerr, ok := err.(*googleapi.Error)
132+
if !ok {
133+
return false, ""
134+
}
135+
136+
if gerr.Code == 403 && strings.Contains(gerr.Body, "Quota exceeded for quota metric") && strings.Contains(gerr.Body, "Read requests per minute") {
137+
log.Printf("[DEBUG] Dismissed an error as retryable based on error code 403 and error message 'Quota exceeded for quota metric' on metric `Read requests per minute`: %s", err)
138+
return true, "Read requests per minute"
139+
}
140+
return false, ""
141+
}
142+
119143
// Retry on comon googleapi error codes for retryable errors.
120144
// TODO(#5609): This may not need to be applied globally - figure out
121145
// what retryable error codes apply to which API.

0 commit comments

Comments
 (0)