@@ -2,7 +2,12 @@ package auth
22
33import (
44 "context"
5+ "encoding/json"
6+ "fmt"
7+ "math"
8+ "net/http"
59 "sort"
10+ "strconv"
611 "sync"
712 "time"
813
@@ -15,6 +20,84 @@ type RoundRobinSelector struct {
1520 cursors map [string ]int
1621}
1722
23+ type blockReason int
24+
25+ const (
26+ blockReasonNone blockReason = iota
27+ blockReasonCooldown
28+ blockReasonDisabled
29+ blockReasonOther
30+ )
31+
32+ type modelCooldownError struct {
33+ model string
34+ resetIn time.Duration
35+ provider string
36+ }
37+
38+ func newModelCooldownError (model , provider string , resetIn time.Duration ) * modelCooldownError {
39+ if resetIn < 0 {
40+ resetIn = 0
41+ }
42+ return & modelCooldownError {
43+ model : model ,
44+ provider : provider ,
45+ resetIn : resetIn ,
46+ }
47+ }
48+
49+ func (e * modelCooldownError ) Error () string {
50+ modelName := e .model
51+ if modelName == "" {
52+ modelName = "requested model"
53+ }
54+ message := fmt .Sprintf ("All credentials for model %s are cooling down" , modelName )
55+ if e .provider != "" {
56+ message = fmt .Sprintf ("%s via provider %s" , message , e .provider )
57+ }
58+ resetSeconds := int (math .Ceil (e .resetIn .Seconds ()))
59+ if resetSeconds < 0 {
60+ resetSeconds = 0
61+ }
62+ displayDuration := e .resetIn
63+ if displayDuration > 0 && displayDuration < time .Second {
64+ displayDuration = time .Second
65+ } else {
66+ displayDuration = displayDuration .Round (time .Second )
67+ }
68+ errorBody := map [string ]any {
69+ "code" : "model_cooldown" ,
70+ "message" : message ,
71+ "model" : e .model ,
72+ "reset_time" : displayDuration .String (),
73+ "reset_seconds" : resetSeconds ,
74+ }
75+ if e .provider != "" {
76+ errorBody ["provider" ] = e .provider
77+ }
78+ payload := map [string ]any {"error" : errorBody }
79+ data , err := json .Marshal (payload )
80+ if err != nil {
81+ return fmt .Sprintf (`{"error":{"code":"model_cooldown","message":"%s"}}` , message )
82+ }
83+ return string (data )
84+ }
85+
86+ func (e * modelCooldownError ) StatusCode () int {
87+ return http .StatusTooManyRequests
88+ }
89+
90+ func (e * modelCooldownError ) Headers () http.Header {
91+ headers := make (http.Header )
92+ headers .Set ("Content-Type" , "application/json" )
93+ resetSeconds := int (math .Ceil (e .resetIn .Seconds ()))
94+ if resetSeconds < 0 {
95+ resetSeconds = 0
96+ }
97+ headers .Set ("Retry-After" , strconv .Itoa (resetSeconds ))
98+ return headers
99+ }
100+
18101// Pick selects the next available auth for the provider in a round-robin manner.
19102func (s * RoundRobinSelector ) Pick (ctx context.Context , provider , model string , opts cliproxyexecutor.Options , auths []* Auth ) (* Auth , error ) {
20103 _ = ctx
@@ -27,14 +110,30 @@ func (s *RoundRobinSelector) Pick(ctx context.Context, provider, model string, o
27110 }
28111 available := make ([]* Auth , 0 , len (auths ))
29112 now := time .Now ()
113+ cooldownCount := 0
114+ var earliest time.Time
30115 for i := 0 ; i < len (auths ); i ++ {
31116 candidate := auths [i ]
32- if isAuthBlockedForModel (candidate , model , now ) {
117+ blocked , reason , next := isAuthBlockedForModel (candidate , model , now )
118+ if ! blocked {
119+ available = append (available , candidate )
33120 continue
34121 }
35- available = append (available , candidate )
122+ if reason == blockReasonCooldown {
123+ cooldownCount ++
124+ if ! next .IsZero () && (earliest .IsZero () || next .Before (earliest )) {
125+ earliest = next
126+ }
127+ }
36128 }
37129 if len (available ) == 0 {
130+ if cooldownCount == len (auths ) && ! earliest .IsZero () {
131+ resetIn := earliest .Sub (now )
132+ if resetIn < 0 {
133+ resetIn = 0
134+ }
135+ return nil , newModelCooldownError (model , provider , resetIn )
136+ }
38137 return nil , & Error {Code : "auth_unavailable" , Message : "no auth available" }
39138 }
40139 // Make round-robin deterministic even if caller's candidate order is unstable.
@@ -55,41 +154,54 @@ func (s *RoundRobinSelector) Pick(ctx context.Context, provider, model string, o
55154 return available [index % len (available )], nil
56155}
57156
58- func isAuthBlockedForModel (auth * Auth , model string , now time.Time ) bool {
157+ func isAuthBlockedForModel (auth * Auth , model string , now time.Time ) ( bool , blockReason , time. Time ) {
59158 if auth == nil {
60- return true
159+ return true , blockReasonOther , time. Time {}
61160 }
62161 if auth .Disabled || auth .Status == StatusDisabled {
63- return true
162+ return true , blockReasonDisabled , time. Time {}
64163 }
65- // If a specific model is requested, prefer its per-model state over any aggregated
66- // auth-level unavailable flag. This prevents a failure on one model (e.g., 429 quota)
67- // from blocking other models of the same provider that have no errors.
68164 if model != "" {
69165 if len (auth .ModelStates ) > 0 {
70166 if state , ok := auth .ModelStates [model ]; ok && state != nil {
71167 if state .Status == StatusDisabled {
72- return true
168+ return true , blockReasonDisabled , time. Time {}
73169 }
74170 if state .Unavailable {
75171 if state .NextRetryAfter .IsZero () {
76- return false
172+ return false , blockReasonNone , time. Time {}
77173 }
78174 if state .NextRetryAfter .After (now ) {
79- return true
175+ next := state .NextRetryAfter
176+ if ! state .Quota .NextRecoverAt .IsZero () && state .Quota .NextRecoverAt .After (now ) {
177+ next = state .Quota .NextRecoverAt
178+ }
179+ if next .Before (now ) {
180+ next = now
181+ }
182+ if state .Quota .Exceeded {
183+ return true , blockReasonCooldown , next
184+ }
185+ return true , blockReasonOther , next
80186 }
81187 }
82- // Explicit state exists and is not blocking.
83- return false
188+ return false , blockReasonNone , time.Time {}
84189 }
85190 }
86- // No explicit state for this model; do not block based on aggregated
87- // auth-level unavailable status. Allow trying this model.
88- return false
191+ return false , blockReasonNone , time.Time {}
89192 }
90- // No specific model context: fall back to auth-level unavailable window.
91193 if auth .Unavailable && auth .NextRetryAfter .After (now ) {
92- return true
194+ next := auth .NextRetryAfter
195+ if ! auth .Quota .NextRecoverAt .IsZero () && auth .Quota .NextRecoverAt .After (now ) {
196+ next = auth .Quota .NextRecoverAt
197+ }
198+ if next .Before (now ) {
199+ next = now
200+ }
201+ if auth .Quota .Exceeded {
202+ return true , blockReasonCooldown , next
203+ }
204+ return true , blockReasonOther , next
93205 }
94- return false
206+ return false , blockReasonNone , time. Time {}
95207}
0 commit comments