88 "time"
99
1010 "github.com/aws/aws-sdk-go/aws"
11+ "github.com/aws/aws-sdk-go/aws/awserr"
1112 "github.com/aws/aws-sdk-go/aws/session"
1213 "github.com/aws/aws-sdk-go/service/route53"
1314 "github.com/aws/aws-sdk-go/service/servicequotas"
@@ -17,9 +18,11 @@ import (
1718)
1819
1920const (
20- maxRetries = 10
21- route53ServiceCode = "route53"
22- hostedZonesQuotaCode = "L-4EA4796A"
21+ maxRetries = 10
22+ route53MaxConcurrency = 5
23+ route53ServiceCode = "route53"
24+ hostedZonesQuotaCode = "L-4EA4796A"
25+ errorCodeThrottling = "Throttling"
2326)
2427
2528type Route53Exporter struct {
@@ -66,7 +69,7 @@ func (e *Route53Exporter) getRecordsPerHostedZoneMetrics(client *route53.Route53
6669
6770 wg := & sync.WaitGroup {}
6871 wg .Add (len (hostedZones ))
69- sem := make (chan int , 10 )
72+ sem := make (chan int , route53MaxConcurrency )
7073 defer close (sem )
7174 for i , hostedZone := range hostedZones {
7275
@@ -76,11 +79,10 @@ func (e *Route53Exporter) getRecordsPerHostedZoneMetrics(client *route53.Route53
7679 <- sem
7780 wg .Done ()
7881 }()
79- hostedZoneLimitOut , err := GetHostedZoneLimitWithBackoff (client , ctx , hostedZone .Id , maxRetries )
82+ hostedZoneLimitOut , err := GetHostedZoneLimitWithBackoff (client , ctx , hostedZone .Id , maxRetries , e . logger )
8083
8184 if err != nil {
8285 errChan <- fmt .Errorf ("Could not get Limits for hosted zone with ID '%s' and name '%s'. Error was: %s" , * hostedZone .Id , * hostedZone .Name , err .Error ())
83- level .Info (e .logger ).Log ("msg" , fmt .Sprintf ("Error while retrieving hosted zone limit for hosted zone with ID: %s" , * hostedZone .Id ), "err" , err )
8486 exporterMetrics .IncrementErrors ()
8587 return
8688 }
@@ -128,7 +130,7 @@ func (e *Route53Exporter) CollectLoop() {
128130 e .Cancel = ctxCancelFunc
129131 level .Info (e .logger ).Log ("msg" , "Updating Route53 metrics..." )
130132
131- hostedZones , err := getAllHostedZones (route53Svc , ctx )
133+ hostedZones , err := getAllHostedZones (route53Svc , ctx , e . logger )
132134
133135 level .Info (e .logger ).Log ("msg" , "Got all zones" )
134136 if err != nil {
@@ -173,20 +175,20 @@ func (e *Route53Exporter) Describe(ch chan<- *prometheus.Desc) {
173175 ch <- e .LastUpdateTime
174176}
175177
176- func getAllHostedZones (client * route53.Route53 , ctx context.Context ) ([]* route53.HostedZone , error ) {
178+ func getAllHostedZones (client * route53.Route53 , ctx context.Context , logger log. Logger ) ([]* route53.HostedZone , error ) {
177179 result := []* route53.HostedZone {}
178180
179181 listZonesInput := route53.ListHostedZonesInput {}
180182
181- listZonesOut , err := ListHostedZonesWithBackoff (client , ctx , & listZonesInput , maxRetries )
183+ listZonesOut , err := ListHostedZonesWithBackoff (client , ctx , & listZonesInput , maxRetries , logger )
182184 if err != nil {
183185 return nil , err
184186 }
185187 result = append (result , listZonesOut .HostedZones ... )
186188
187189 for * listZonesOut .IsTruncated {
188190 listZonesInput .Marker = listZonesOut .NextMarker
189- listZonesOut , err = client . ListHostedZonesWithContext ( ctx , & listZonesInput )
191+ listZonesOut , err = ListHostedZonesWithBackoff ( client , ctx , & listZonesInput , maxRetries , logger )
190192 if err != nil {
191193 return nil , err
192194 }
@@ -196,7 +198,7 @@ func getAllHostedZones(client *route53.Route53, ctx context.Context) ([]*route53
196198 return result , nil
197199}
198200
199- func ListHostedZonesWithBackoff (client * route53.Route53 , ctx context.Context , input * route53.ListHostedZonesInput , maxTries int ) (* route53.ListHostedZonesOutput , error ) {
201+ func ListHostedZonesWithBackoff (client * route53.Route53 , ctx context.Context , input * route53.ListHostedZonesInput , maxTries int , logger log. Logger ) (* route53.ListHostedZonesOutput , error ) {
200202 var listHostedZonesOut * route53.ListHostedZonesOutput
201203 var err error
202204
@@ -205,14 +207,17 @@ func ListHostedZonesWithBackoff(client *route53.Route53, ctx context.Context, in
205207 if err == nil {
206208 return listHostedZonesOut , err
207209 }
210+ if ! isThrottlingError (err ) {
211+ return nil , err
212+ }
213+ level .Debug (logger ).Log ("msg" , "Retrying throttling api call" , "tries" , i + 1 , "endpoint" , "ListHostedZones" )
208214 backOffSeconds := math .Pow (2 , float64 (i - 1 ))
209- fmt .Printf ("Backing off for %f.1 seconds\n " , backOffSeconds )
210215 time .Sleep (time .Duration (backOffSeconds ) * time .Second )
211216 }
212217 return nil , err
213218}
214219
215- func GetHostedZoneLimitWithBackoff (client * route53.Route53 , ctx context.Context , hostedZoneId * string , maxTries int ) (* route53.GetHostedZoneLimitOutput , error ) {
220+ func GetHostedZoneLimitWithBackoff (client * route53.Route53 , ctx context.Context , hostedZoneId * string , maxTries int , logger log. Logger ) (* route53.GetHostedZoneLimitOutput , error ) {
216221 hostedZoneLimitInput := & route53.GetHostedZoneLimitInput {
217222 HostedZoneId : hostedZoneId ,
218223 Type : aws .String (route53 .HostedZoneLimitTypeMaxRrsetsByZone ),
@@ -225,8 +230,20 @@ func GetHostedZoneLimitWithBackoff(client *route53.Route53, ctx context.Context,
225230 if err == nil {
226231 return hostedZoneLimitOut , err
227232 }
233+
234+ if ! isThrottlingError (err ) {
235+ return nil , err
236+ }
237+ level .Debug (logger ).Log ("msg" , "Retrying throttling api call" , "tries" , i + 1 , "endpoint" , "GetHostedZoneLimit" , "hostedZoneID" , hostedZoneId )
228238 backOffSeconds := math .Pow (2 , float64 (i - 1 ))
229239 time .Sleep (time .Duration (backOffSeconds ) * time .Second )
240+
230241 }
231242 return nil , err
232243}
244+
245+ // isThrottlingError returns true if the error given is an instance of awserr.Error and the error code matches the constant errorCodeThrottling. It's not compared against route53.ErrCodeThrottlingException as this does not match what the api is returning.
246+ func isThrottlingError (err error ) bool {
247+ awsError , isAwsError := err .(awserr.Error )
248+ return isAwsError && awsError .Code () == errorCodeThrottling
249+ }
0 commit comments