@@ -40,6 +40,8 @@ const (
4040 refreshCheckInterval = 5 * time .Second
4141 refreshPendingBackoff = time .Minute
4242 refreshFailureBackoff = 5 * time .Minute
43+ quotaBackoffBase = time .Second
44+ quotaBackoffMax = 30 * time .Minute
4345)
4446
4547// Result captures execution outcome used to adjust auth state.
@@ -532,9 +534,15 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
532534 suspendReason = "payment_required"
533535 shouldSuspendModel = true
534536 case 429 :
535- next := now .Add (30 * time .Minute )
537+ cooldown , nextLevel := nextQuotaCooldown (state .Quota .BackoffLevel )
538+ next := now .Add (cooldown )
536539 state .NextRetryAfter = next
537- state .Quota = QuotaState {Exceeded : true , Reason : "quota" , NextRecoverAt : next }
540+ state .Quota = QuotaState {
541+ Exceeded : true ,
542+ Reason : "quota" ,
543+ NextRecoverAt : next ,
544+ BackoffLevel : nextLevel ,
545+ }
538546 suspendReason = "quota"
539547 shouldSuspendModel = true
540548 setModelQuota = true
@@ -608,6 +616,7 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
608616 earliestRetry := time.Time {}
609617 quotaExceeded := false
610618 quotaRecover := time.Time {}
619+ maxBackoffLevel := 0
611620 for _ , state := range auth .ModelStates {
612621 if state == nil {
613622 continue
@@ -636,6 +645,9 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
636645 if quotaRecover .IsZero () || (! state .Quota .NextRecoverAt .IsZero () && state .Quota .NextRecoverAt .Before (quotaRecover )) {
637646 quotaRecover = state .Quota .NextRecoverAt
638647 }
648+ if state .Quota .BackoffLevel > maxBackoffLevel {
649+ maxBackoffLevel = state .Quota .BackoffLevel
650+ }
639651 }
640652 }
641653 auth .Unavailable = allUnavailable
@@ -648,10 +660,12 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
648660 auth .Quota .Exceeded = true
649661 auth .Quota .Reason = "quota"
650662 auth .Quota .NextRecoverAt = quotaRecover
663+ auth .Quota .BackoffLevel = maxBackoffLevel
651664 } else {
652665 auth .Quota .Exceeded = false
653666 auth .Quota .Reason = ""
654667 auth .Quota .NextRecoverAt = time.Time {}
668+ auth .Quota .BackoffLevel = 0
655669 }
656670}
657671
@@ -685,6 +699,7 @@ func clearAuthStateOnSuccess(auth *Auth, now time.Time) {
685699 auth .Quota .Exceeded = false
686700 auth .Quota .Reason = ""
687701 auth .Quota .NextRecoverAt = time.Time {}
702+ auth .Quota .BackoffLevel = 0
688703 auth .LastError = nil
689704 auth .NextRetryAfter = time.Time {}
690705 auth .UpdatedAt = now
@@ -734,7 +749,9 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
734749 auth .StatusMessage = "quota exhausted"
735750 auth .Quota .Exceeded = true
736751 auth .Quota .Reason = "quota"
737- auth .Quota .NextRecoverAt = now .Add (30 * time .Minute )
752+ cooldown , nextLevel := nextQuotaCooldown (auth .Quota .BackoffLevel )
753+ auth .Quota .NextRecoverAt = now .Add (cooldown )
754+ auth .Quota .BackoffLevel = nextLevel
738755 auth .NextRetryAfter = auth .Quota .NextRecoverAt
739756 case 408 , 500 , 502 , 503 , 504 :
740757 auth .StatusMessage = "transient upstream error"
@@ -746,6 +763,21 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
746763 }
747764}
748765
766+ // nextQuotaCooldown returns the next cooldown duration and updated backoff level for repeated quota errors.
767+ func nextQuotaCooldown (prevLevel int ) (time.Duration , int ) {
768+ if prevLevel < 0 {
769+ prevLevel = 0
770+ }
771+ cooldown := quotaBackoffBase * time .Duration (1 << prevLevel )
772+ if cooldown < quotaBackoffBase {
773+ cooldown = quotaBackoffBase
774+ }
775+ if cooldown >= quotaBackoffMax {
776+ return quotaBackoffMax , prevLevel
777+ }
778+ return cooldown , prevLevel + 1
779+ }
780+
749781// List returns all auth entries currently known by the manager.
750782func (m * Manager ) List () []* Auth {
751783 m .mu .RLock ()
0 commit comments