@@ -264,25 +264,6 @@ const EMBED_TIMEOUT_MS = 10_000;
264264 */
265265const STRICT_REDUCTION_FACTOR = 0.5 ; // Each retry must be at most 50% of previous
266266
267- /**
268- * Safe character limits per model for forced truncation.
269- * CJK characters typically consume ~3 tokens each, so the char limit is
270- * conservative compared to the token limit.
271- */
272- const SAFE_CHAR_LIMITS : Record < string , number > = {
273- "nomic-embed-text" : 2300 ,
274- "mxbai-embed-large" : 2300 ,
275- "all-MiniLM-L6-v2" : 1000 ,
276- "all-mpnet-base-v2" : 1500 ,
277- } ;
278-
279- const DEFAULT_SAFE_CHAR_LIMIT = 2000 ;
280-
281- /** Return a safe character count for forced truncation given a model name. */
282- function getSafeCharLimit ( model : string ) : number {
283- return SAFE_CHAR_LIMITS [ model ] ?? DEFAULT_SAFE_CHAR_LIMIT ;
284- }
285-
286267export function getVectorDimensions ( model : string , overrideDims ?: number ) : number {
287268 if ( overrideDims && overrideDims > 0 ) {
288269 return overrideDims ;
@@ -493,6 +474,10 @@ export class Embedder {
493474 return this . withTimeout ( ( signal ) => this . embedSingle ( text , this . _taskPassage , signal ) , "embedPassage" ) ;
494475 }
495476
477+ // Note: embedBatchQuery/embedBatchPassage are NOT wrapped with withTimeout because
478+ // they handle multiple texts in a single API call. The timeout would fire after
479+ // EMBED_TIMEOUT_MS regardless of how many texts succeed. Individual text embedding
480+ // within the batch is protected by the SDK's own timeout handling.
496481 async embedBatchQuery ( texts : string [ ] ) : Promise < number [ ] [ ] > {
497482 return this . embedMany ( texts , this . _taskQuery ) ;
498483 }
0 commit comments