Skip to content

Commit 19c8a98

Browse files
authored
Merge pull request #56 from mrWinston/r53-only-retry-api-call-if-throttling
only retry R53 API calls that are throttling
2 parents 5462265 + a0a20be commit 19c8a98

File tree

1 file changed

+30
-13
lines changed

1 file changed

+30
-13
lines changed

route53.go

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"time"
99

1010
"github.com/aws/aws-sdk-go/aws"
11+
"github.com/aws/aws-sdk-go/aws/awserr"
1112
"github.com/aws/aws-sdk-go/aws/session"
1213
"github.com/aws/aws-sdk-go/service/route53"
1314
"github.com/aws/aws-sdk-go/service/servicequotas"
@@ -17,9 +18,11 @@ import (
1718
)
1819

1920
const (
20-
maxRetries = 10
21-
route53ServiceCode = "route53"
22-
hostedZonesQuotaCode = "L-4EA4796A"
21+
maxRetries = 10
22+
route53MaxConcurrency = 5
23+
route53ServiceCode = "route53"
24+
hostedZonesQuotaCode = "L-4EA4796A"
25+
errorCodeThrottling = "Throttling"
2326
)
2427

2528
type Route53Exporter struct {
@@ -66,7 +69,7 @@ func (e *Route53Exporter) getRecordsPerHostedZoneMetrics(client *route53.Route53
6669

6770
wg := &sync.WaitGroup{}
6871
wg.Add(len(hostedZones))
69-
sem := make(chan int, 10)
72+
sem := make(chan int, route53MaxConcurrency)
7073
defer close(sem)
7174
for i, hostedZone := range hostedZones {
7275

@@ -76,11 +79,10 @@ func (e *Route53Exporter) getRecordsPerHostedZoneMetrics(client *route53.Route53
7679
<-sem
7780
wg.Done()
7881
}()
79-
hostedZoneLimitOut, err := GetHostedZoneLimitWithBackoff(client, ctx, hostedZone.Id, maxRetries)
82+
hostedZoneLimitOut, err := GetHostedZoneLimitWithBackoff(client, ctx, hostedZone.Id, maxRetries, e.logger)
8083

8184
if err != nil {
8285
errChan <- fmt.Errorf("Could not get Limits for hosted zone with ID '%s' and name '%s'. Error was: %s", *hostedZone.Id, *hostedZone.Name, err.Error())
83-
level.Info(e.logger).Log("msg", fmt.Sprintf("Error while retrieving hosted zone limit for hosted zone with ID: %s", *hostedZone.Id), "err", err)
8486
exporterMetrics.IncrementErrors()
8587
return
8688
}
@@ -128,7 +130,7 @@ func (e *Route53Exporter) CollectLoop() {
128130
e.Cancel = ctxCancelFunc
129131
level.Info(e.logger).Log("msg", "Updating Route53 metrics...")
130132

131-
hostedZones, err := getAllHostedZones(route53Svc, ctx)
133+
hostedZones, err := getAllHostedZones(route53Svc, ctx, e.logger)
132134

133135
level.Info(e.logger).Log("msg", "Got all zones")
134136
if err != nil {
@@ -173,20 +175,20 @@ func (e *Route53Exporter) Describe(ch chan<- *prometheus.Desc) {
173175
ch <- e.LastUpdateTime
174176
}
175177

176-
func getAllHostedZones(client *route53.Route53, ctx context.Context) ([]*route53.HostedZone, error) {
178+
func getAllHostedZones(client *route53.Route53, ctx context.Context, logger log.Logger) ([]*route53.HostedZone, error) {
177179
result := []*route53.HostedZone{}
178180

179181
listZonesInput := route53.ListHostedZonesInput{}
180182

181-
listZonesOut, err := ListHostedZonesWithBackoff(client, ctx, &listZonesInput, maxRetries)
183+
listZonesOut, err := ListHostedZonesWithBackoff(client, ctx, &listZonesInput, maxRetries, logger)
182184
if err != nil {
183185
return nil, err
184186
}
185187
result = append(result, listZonesOut.HostedZones...)
186188

187189
for *listZonesOut.IsTruncated {
188190
listZonesInput.Marker = listZonesOut.NextMarker
189-
listZonesOut, err = client.ListHostedZonesWithContext(ctx, &listZonesInput)
191+
listZonesOut, err = ListHostedZonesWithBackoff(client, ctx, &listZonesInput, maxRetries, logger)
190192
if err != nil {
191193
return nil, err
192194
}
@@ -196,7 +198,7 @@ func getAllHostedZones(client *route53.Route53, ctx context.Context) ([]*route53
196198
return result, nil
197199
}
198200

199-
func ListHostedZonesWithBackoff(client *route53.Route53, ctx context.Context, input *route53.ListHostedZonesInput, maxTries int) (*route53.ListHostedZonesOutput, error) {
201+
func ListHostedZonesWithBackoff(client *route53.Route53, ctx context.Context, input *route53.ListHostedZonesInput, maxTries int, logger log.Logger) (*route53.ListHostedZonesOutput, error) {
200202
var listHostedZonesOut *route53.ListHostedZonesOutput
201203
var err error
202204

@@ -205,14 +207,17 @@ func ListHostedZonesWithBackoff(client *route53.Route53, ctx context.Context, in
205207
if err == nil {
206208
return listHostedZonesOut, err
207209
}
210+
if !isThrottlingError(err) {
211+
return nil, err
212+
}
213+
level.Debug(logger).Log("msg", "Retrying throttling api call", "tries", i+1, "endpoint", "ListHostedZones")
208214
backOffSeconds := math.Pow(2, float64(i-1))
209-
fmt.Printf("Backing off for %f.1 seconds\n", backOffSeconds)
210215
time.Sleep(time.Duration(backOffSeconds) * time.Second)
211216
}
212217
return nil, err
213218
}
214219

215-
func GetHostedZoneLimitWithBackoff(client *route53.Route53, ctx context.Context, hostedZoneId *string, maxTries int) (*route53.GetHostedZoneLimitOutput, error) {
220+
func GetHostedZoneLimitWithBackoff(client *route53.Route53, ctx context.Context, hostedZoneId *string, maxTries int, logger log.Logger) (*route53.GetHostedZoneLimitOutput, error) {
216221
hostedZoneLimitInput := &route53.GetHostedZoneLimitInput{
217222
HostedZoneId: hostedZoneId,
218223
Type: aws.String(route53.HostedZoneLimitTypeMaxRrsetsByZone),
@@ -225,8 +230,20 @@ func GetHostedZoneLimitWithBackoff(client *route53.Route53, ctx context.Context,
225230
if err == nil {
226231
return hostedZoneLimitOut, err
227232
}
233+
234+
if !isThrottlingError(err) {
235+
return nil, err
236+
}
237+
level.Debug(logger).Log("msg", "Retrying throttling api call", "tries", i+1, "endpoint", "GetHostedZoneLimit", "hostedZoneID", hostedZoneId)
228238
backOffSeconds := math.Pow(2, float64(i-1))
229239
time.Sleep(time.Duration(backOffSeconds) * time.Second)
240+
230241
}
231242
return nil, err
232243
}
244+
245+
// isThrottlingError returns true if the error given is an instance of awserr.Error and the error code matches the constant errorCodeThrottling. It's not compared against route53.ErrCodeThrottlingException as this does not match what the api is returning.
246+
func isThrottlingError(err error) bool {
247+
awsError, isAwsError := err.(awserr.Error)
248+
return isAwsError && awsError.Code() == errorCodeThrottling
249+
}

0 commit comments

Comments
 (0)