@@ -258,7 +258,7 @@ func (m *MetricsCollectorAzureRmCosts) collectCostManagementMetrics(logger *zap.
258258 params .TimePeriod = & timePeriod
259259 }
260260
261- result , err := m .sendCostQuery (m .Context (), logger , scope , params , nil )
261+ result , err := m .sendCostQuery (m .Context (), logger , scope , params )
262262 if err != nil {
263263 logger .Panic (err )
264264 }
@@ -399,10 +399,10 @@ func (m *MetricsCollectorAzureRmCosts) collectCostManagementMetrics(logger *zap.
399399 time .Sleep (Config .Collectors .Costs .RequestDelay )
400400}
401401
402- func (m * MetricsCollectorAzureRmCosts ) sendCostQuery (ctx context.Context , logger * zap.SugaredLogger , scope string , parameters armcostmanagement.QueryDefinition , options * armcostmanagement. QueryClientUsageOptions ) (armcostmanagement.QueryClientUsageResponse , error ) {
402+ func (m * MetricsCollectorAzureRmCosts ) sendCostQuery (ctx context.Context , logger * zap.SugaredLogger , scope string , parameters armcostmanagement.QueryDefinition ) (armcostmanagement.QueryClientUsageResponse , error ) {
403403 clientOpts := AzureClient .NewArmClientOptions ()
404404
405- // cost queries should not retry soo fast, we have a strict rate limit on azure side
405+ // Initialize the client with appropriate retry options.
406406 clientOpts .Retry = policy.RetryOptions {
407407 MaxRetries : 3 ,
408408 RetryDelay : 30 * time .Second ,
@@ -420,7 +420,7 @@ func (m *MetricsCollectorAzureRmCosts) sendCostQuery(ctx context.Context, logger
420420 logger .Panic (err .Error ())
421421 }
422422
423- // paging
423+ // Set up the pipeline for paging.
424424 pl , err := armruntime .NewPipeline ("azurerm-costs" , gitTag , AzureClient .GetCred (), runtime.PipelineOptions {}, AzureClient .NewArmClientOptions ())
425425 if err != nil {
426426 logger .Panic (err .Error ())
@@ -445,6 +445,18 @@ func (m *MetricsCollectorAzureRmCosts) sendCostQuery(ctx context.Context, logger
445445 }
446446 defer resp .Body .Close ()
447447
448+ if resp .StatusCode == http .StatusTooManyRequests {
449+ retryAfterHeader := resp .Header .Get ("X-Ms-Ratelimit-Microsoft.costmanagement-Entity-Retry-After" )
450+ retryAfter , err := strconv .Atoi (retryAfterHeader )
451+ if err != nil {
452+ logger .Errorf ("Unable to parse retry-after header: %v" , retryAfterHeader )
453+ return fmt .Errorf ("unable to parse retry-after header: %v" , retryAfterHeader )
454+ }
455+ logger .Errorf ("Received 429 Too Many Requests. Retrying after %d seconds. Headers: %v" , retryAfter , resp .Header )
456+ time .Sleep (time .Duration (retryAfter ) * time .Second )
457+ return fmt .Errorf ("received 429 Too Many Requests, retrying after %d seconds" , retryAfter )
458+ }
459+
448460 if runtime .HasStatusCode (resp , http .StatusOK ) {
449461 pagerResult := armcostmanagement.QueryClientUsageResponse {}
450462 if err := runtime .UnmarshalAsJSON (resp , & pagerResult ); err == nil {
@@ -454,12 +466,16 @@ func (m *MetricsCollectorAzureRmCosts) sendCostQuery(ctx context.Context, logger
454466 logger .Panic (err .Error ())
455467 }
456468 } else {
457- return fmt .Errorf (` unexpected status code: %v` , resp .StatusCode )
469+ return fmt .Errorf (" unexpected status code: %v" , resp .StatusCode )
458470 }
459471
460472 return nil
461473 }()
462474 if err != nil {
475+ // If we encounter a rate limit error, retry after the specified delay.
476+ if strings .Contains (err .Error (), "received 429 Too Many Requests" ) {
477+ continue
478+ }
463479 return result , err
464480 }
465481
0 commit comments