88 "strconv"
99 "strings"
1010 "sync"
11+ "sync/atomic"
1112 "time"
1213
1314 "github.com/google/uuid"
@@ -44,6 +45,13 @@ const (
4445 quotaBackoffMax = 30 * time .Minute
4546)
4647
48+ var quotaCooldownDisabled atomic.Bool
49+
50+ // SetQuotaCooldownDisabled toggles quota cooldown scheduling globally.
51+ func SetQuotaCooldownDisabled (disable bool ) {
52+ quotaCooldownDisabled .Store (disable )
53+ }
54+
4755// Result captures execution outcome used to adjust auth state.
4856type Result struct {
4957 // AuthID references the auth that produced this result.
@@ -535,7 +543,10 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
535543 shouldSuspendModel = true
536544 case 429 :
537545 cooldown , nextLevel := nextQuotaCooldown (state .Quota .BackoffLevel )
538- next := now .Add (cooldown )
546+ var next time.Time
547+ if cooldown > 0 {
548+ next = now .Add (cooldown )
549+ }
539550 state .NextRetryAfter = next
540551 state .Quota = QuotaState {
541552 Exceeded : true ,
@@ -750,9 +761,13 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, now time.Time) {
750761 auth .Quota .Exceeded = true
751762 auth .Quota .Reason = "quota"
752763 cooldown , nextLevel := nextQuotaCooldown (auth .Quota .BackoffLevel )
753- auth .Quota .NextRecoverAt = now .Add (cooldown )
764+ var next time.Time
765+ if cooldown > 0 {
766+ next = now .Add (cooldown )
767+ }
768+ auth .Quota .NextRecoverAt = next
754769 auth .Quota .BackoffLevel = nextLevel
755- auth .NextRetryAfter = auth . Quota . NextRecoverAt
770+ auth .NextRetryAfter = next
756771 case 408 , 500 , 502 , 503 , 504 :
757772 auth .StatusMessage = "transient upstream error"
758773 auth .NextRetryAfter = now .Add (1 * time .Minute )
@@ -768,6 +783,9 @@ func nextQuotaCooldown(prevLevel int) (time.Duration, int) {
768783 if prevLevel < 0 {
769784 prevLevel = 0
770785 }
786+ if quotaCooldownDisabled .Load () {
787+ return 0 , prevLevel
788+ }
771789 cooldown := quotaBackoffBase * time .Duration (1 << prevLevel )
772790 if cooldown < quotaBackoffBase {
773791 cooldown = quotaBackoffBase
0 commit comments