Skip to content

Commit 3569e57

Browse files
committed
feat: enhance quota management with backoff levels and cooldown logic
1 parent 20985d1 commit 3569e57

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

sdk/cliproxy/auth/manager.go

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ const (
4040
refreshCheckInterval = 5 * time.Second
4141
refreshPendingBackoff = time.Minute
4242
refreshFailureBackoff = 5 * time.Minute
43+
quotaBackoffBase = time.Second
44+
quotaBackoffMax = 30 * time.Minute
4345
)
4446

4547
// Result captures execution outcome used to adjust auth state.
@@ -532,9 +534,15 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
532534
suspendReason = "payment_required"
533535
shouldSuspendModel = true
534536
case 429:
535-
next := now.Add(30 * time.Minute)
537+
cooldown, nextLevel := nextQuotaCooldown(state.Quota.BackoffLevel)
538+
next := now.Add(cooldown)
536539
state.NextRetryAfter = next
537-
state.Quota = QuotaState{Exceeded: true, Reason: "quota", NextRecoverAt: next}
540+
state.Quota = QuotaState{
541+
Exceeded: true,
542+
Reason: "quota",
543+
NextRecoverAt: next,
544+
BackoffLevel: nextLevel,
545+
}
538546
suspendReason = "quota"
539547
shouldSuspendModel = true
540548
setModelQuota = true
@@ -608,6 +616,7 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
608616
earliestRetry := time.Time{}
609617
quotaExceeded := false
610618
quotaRecover := time.Time{}
619+
maxBackoffLevel := 0
611620
for _, state := range auth.ModelStates {
612621
if state == nil {
613622
continue
@@ -636,6 +645,9 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
636645
if quotaRecover.IsZero() || (!state.Quota.NextRecoverAt.IsZero() && state.Quota.NextRecoverAt.Before(quotaRecover)) {
637646
quotaRecover = state.Quota.NextRecoverAt
638647
}
648+
if state.Quota.BackoffLevel > maxBackoffLevel {
649+
maxBackoffLevel = state.Quota.BackoffLevel
650+
}
639651
}
640652
}
641653
auth.Unavailable = allUnavailable
@@ -648,10 +660,12 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
648660
auth.Quota.Exceeded = true
649661
auth.Quota.Reason = "quota"
650662
auth.Quota.NextRecoverAt = quotaRecover
663+
auth.Quota.BackoffLevel = maxBackoffLevel
651664
} else {
652665
auth.Quota.Exceeded = false
653666
auth.Quota.Reason = ""
654667
auth.Quota.NextRecoverAt = time.Time{}
668+
auth.Quota.BackoffLevel = 0
655669
}
656670
}
657671

@@ -685,6 +699,7 @@ func clearAuthStateOnSuccess(auth *Auth, now time.Time) {
685699
auth.Quota.Exceeded = false
686700
auth.Quota.Reason = ""
687701
auth.Quota.NextRecoverAt = time.Time{}
702+
auth.Quota.BackoffLevel = 0
688703
auth.LastError = nil
689704
auth.NextRetryAfter = time.Time{}
690705
auth.UpdatedAt = now
@@ -734,7 +749,9 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
734749
auth.StatusMessage = "quota exhausted"
735750
auth.Quota.Exceeded = true
736751
auth.Quota.Reason = "quota"
737-
auth.Quota.NextRecoverAt = now.Add(30 * time.Minute)
752+
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
753+
auth.Quota.NextRecoverAt = now.Add(cooldown)
754+
auth.Quota.BackoffLevel = nextLevel
738755
auth.NextRetryAfter = auth.Quota.NextRecoverAt
739756
case 408, 500, 502, 503, 504:
740757
auth.StatusMessage = "transient upstream error"
@@ -746,6 +763,21 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
746763
}
747764
}
748765

766+
// nextQuotaCooldown returns the next cooldown duration and updated backoff level for repeated quota errors.
767+
func nextQuotaCooldown(prevLevel int) (time.Duration, int) {
768+
if prevLevel < 0 {
769+
prevLevel = 0
770+
}
771+
cooldown := quotaBackoffBase * time.Duration(1<<prevLevel)
772+
if cooldown < quotaBackoffBase {
773+
cooldown = quotaBackoffBase
774+
}
775+
if cooldown >= quotaBackoffMax {
776+
return quotaBackoffMax, prevLevel
777+
}
778+
return cooldown, prevLevel + 1
779+
}
780+
749781
// List returns all auth entries currently known by the manager.
750782
func (m *Manager) List() []*Auth {
751783
m.mu.RLock()

sdk/cliproxy/auth/types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ type QuotaState struct {
6565
Reason string `json:"reason,omitempty"`
6666
// NextRecoverAt is when the credential may become available again.
6767
NextRecoverAt time.Time `json:"next_recover_at"`
68+
// BackoffLevel stores the progressive cooldown exponent used for rate limits.
69+
BackoffLevel int `json:"backoff_level,omitempty"`
6870
}
6971

7072
// ModelState captures the execution state for a specific model under an auth entry.

0 commit comments

Comments
 (0)