@@ -29,6 +29,7 @@ export abstract class BaseEmbeddingProvider implements EmbeddingProvider {
2929
3030 private readonly requestLimiter : Bottleneck ;
3131 private readonly tokenLimiter ?: Bottleneck ;
32+ private readonly tokenLimiterMaxConcurrent ?: number ;
3233
3334 constructor (
3435 public readonly config : EmbeddingModelConfig ,
@@ -46,6 +47,7 @@ export abstract class BaseEmbeddingProvider implements EmbeddingProvider {
4647 this . concurrencyLimit ,
4748 Math . ceil ( limits . maxTokensPerMinute )
4849 ) ;
50+ this . tokenLimiterMaxConcurrent = tokenConcurrency ;
4951 this . tokenLimiter = createRateLimiter ( tokenConcurrency , limits . maxTokensPerMinute ) ;
5052 }
5153 }
@@ -104,7 +106,10 @@ export abstract class BaseEmbeddingProvider implements EmbeddingProvider {
104106 return ;
105107 }
106108
107- const weight = Math . max ( 1 , Math . ceil ( tokens ) ) ;
109+ // Cap weight to maxConcurrent to prevent BottleneckError
110+ // Bottleneck doesn't allow weight > maxConcurrent
111+ const maxConcurrent = this . tokenLimiterMaxConcurrent ?? this . concurrencyLimit ;
112+ const weight = Math . max ( 1 , Math . min ( Math . ceil ( tokens ) , maxConcurrent ) ) ;
108113 await this . tokenLimiter . schedule ( { weight } , async ( ) => undefined ) ;
109114 }
110115}
@@ -115,6 +120,7 @@ export abstract class BaseChatProvider implements ChatProvider {
115120
116121 private readonly requestLimiter : Bottleneck ;
117122 private readonly tokenLimiter ?: Bottleneck ;
123+ private readonly tokenLimiterMaxConcurrent ?: number ;
118124
119125 constructor (
120126 public readonly config : ChatModelConfig ,
@@ -131,6 +137,7 @@ export abstract class BaseChatProvider implements ChatProvider {
131137 this . concurrencyLimit ,
132138 Math . ceil ( limits . maxTokensPerMinute )
133139 ) ;
140+ this . tokenLimiterMaxConcurrent = tokenConcurrency ;
134141 this . tokenLimiter = createRateLimiter ( tokenConcurrency , limits . maxTokensPerMinute ) ;
135142 }
136143 }
@@ -265,7 +272,10 @@ export abstract class BaseChatProvider implements ChatProvider {
265272 return ;
266273 }
267274
268- const weight = Math . max ( 1 , Math . ceil ( tokens ) ) ;
275+ // Cap weight to maxConcurrent to prevent BottleneckError
276+ // Bottleneck doesn't allow weight > maxConcurrent
277+ const maxConcurrent = this . tokenLimiterMaxConcurrent ?? this . concurrencyLimit ;
278+ const weight = Math . max ( 1 , Math . min ( Math . ceil ( tokens ) , maxConcurrent ) ) ;
269279 await this . tokenLimiter . schedule ( { weight } , async ( ) => undefined ) ;
270280 }
271281}
0 commit comments