@@ -352,6 +352,22 @@ export function formatEmbeddingProviderError(
352352 return `${ genericPrefix } ${ detailText } ` ;
353353}
354354
355+ // ============================================================================
356+ // Safety Constants
357+ // ============================================================================
358+
359+ /** Maximum recursion depth for embedSingle chunking retries. */
360+ const MAX_EMBED_DEPTH = 3 ;
361+
362+ /** Global timeout for a single embedding operation (ms). */
363+ const EMBED_TIMEOUT_MS = 10_000 ;
364+
365+ /**
366+ * Strictly decreasing character limit for forced truncation.
367+ * Each recursion level MUST reduce input by this factor to guarantee progress.
368+ */
369+ const STRICT_REDUCTION_FACTOR = 0.5 ; // Each retry must be at most 50% of previous
370+
355371export function getVectorDimensions ( model : string , overrideDims ?: number ) : number {
356372 if ( overrideDims && overrideDims > 0 ) {
357373 return overrideDims ;
@@ -472,16 +488,23 @@ export class Embedder {
472488 /**
473489 * Call embeddings.create with automatic key rotation on rate-limit errors.
474490 * Tries each key in the pool at most once before giving up.
491+ * Accepts an optional AbortSignal to support true request cancellation.
475492 */
476- private async embedWithRetry ( payload : any ) : Promise < any > {
493+ private async embedWithRetry ( payload : any , signal ?: AbortSignal ) : Promise < any > {
477494 const maxAttempts = this . clients . length ;
478495 let lastError : Error | undefined ;
479496
480497 for ( let attempt = 0 ; attempt < maxAttempts ; attempt ++ ) {
481498 const client = this . nextClient ( ) ;
482499 try {
483- return await client . embeddings . create ( payload ) ;
500+ // Pass signal to OpenAI SDK if provided (SDK v6+ supports this)
501+ return await client . embeddings . create ( payload , signal ? { signal } : undefined ) ;
484502 } catch ( error ) {
503+ // If aborted, re-throw immediately
504+ if ( error instanceof Error && error . name === 'AbortError' ) {
505+ throw error ;
506+ }
507+
485508 lastError = error instanceof Error ? error : new Error ( String ( error ) ) ;
486509
487510 if ( this . isRateLimitError ( error ) && attempt < maxAttempts - 1 ) {
@@ -510,6 +533,13 @@ export class Embedder {
510533 return this . clients . length ;
511534 }
512535
536+ /** Wrap a single embedding operation with a global timeout via AbortSignal. */
537+ private withTimeout < T > ( promiseFactory : ( signal : AbortSignal ) => Promise < T > , _label : string ) : Promise < T > {
538+ const controller = new AbortController ( ) ;
539+ const timeoutId = setTimeout ( ( ) => controller . abort ( ) , EMBED_TIMEOUT_MS ) ;
540+ return promiseFactory ( controller . signal ) . finally ( ( ) => clearTimeout ( timeoutId ) ) ;
541+ }
542+
513543 // --------------------------------------------------------------------------
514544 // Backward-compatible API
515545 // --------------------------------------------------------------------------
@@ -534,13 +564,17 @@ export class Embedder {
534564 // --------------------------------------------------------------------------
535565
536566 async embedQuery ( text : string ) : Promise < number [ ] > {
537- return this . embedSingle ( text , this . _taskQuery ) ;
567+ return this . withTimeout ( ( signal ) => this . embedSingle ( text , this . _taskQuery , 0 , signal ) , "embedQuery" ) ;
538568 }
539569
540570 async embedPassage ( text : string ) : Promise < number [ ] > {
541- return this . embedSingle ( text , this . _taskPassage ) ;
571+ return this . withTimeout ( ( signal ) => this . embedSingle ( text , this . _taskPassage , 0 , signal ) , "embedPassage" ) ;
542572 }
543573
574+ // Note: embedBatchQuery/embedBatchPassage are NOT wrapped with withTimeout because
575+ // they handle multiple texts in a single API call. The timeout would fire after
576+ // EMBED_TIMEOUT_MS regardless of how many texts succeed. Individual text embedding
577+ // within the batch is protected by the SDK's own timeout handling.
544578 async embedBatchQuery ( texts : string [ ] ) : Promise < number [ ] [ ] > {
545579 return this . embedMany ( texts , this . _taskQuery ) ;
546580 }
@@ -595,17 +629,32 @@ export class Embedder {
595629 return payload ;
596630 }
597631
598- private async embedSingle ( text : string , task ?: string ) : Promise < number [ ] > {
632+ private async embedSingle ( text : string , task ?: string , depth : number = 0 , signal ?: AbortSignal ) : Promise < number [ ] > {
599633 if ( ! text || text . trim ( ) . length === 0 ) {
600634 throw new Error ( "Cannot embed empty text" ) ;
601635 }
602636
637+ // FR-01: Recursion depth limit — force truncate when too deep
638+ if ( depth >= MAX_EMBED_DEPTH ) {
639+ const safeLimit = Math . floor ( text . length * STRICT_REDUCTION_FACTOR ) ;
640+ console . warn (
641+ `[memory-lancedb-pro] Recursion depth ${ depth } reached MAX_EMBED_DEPTH (${ MAX_EMBED_DEPTH } ), ` +
642+ `force-truncating ${ text . length } chars → ${ safeLimit } chars (strict ${ STRICT_REDUCTION_FACTOR * 100 } % reduction)`
643+ ) ;
644+ if ( safeLimit < 100 ) {
645+ throw new Error (
646+ `[memory-lancedb-pro] Failed to embed: input too large for model context after ${ MAX_EMBED_DEPTH } retries`
647+ ) ;
648+ }
649+ text = text . slice ( 0 , safeLimit ) ;
650+ }
651+
603652 // Check cache first
604653 const cached = this . _cache . get ( text , task ) ;
605654 if ( cached ) return cached ;
606655
607656 try {
608- const response = await this . embedWithRetry ( this . buildPayload ( text , task ) ) ;
657+ const response = await this . embedWithRetry ( this . buildPayload ( text , task ) , signal ) ;
609658 const embedding = response . data [ 0 ] ?. embedding as number [ ] | undefined ;
610659 if ( ! embedding ) {
611660 throw new Error ( "No embedding returned from provider" ) ;
@@ -628,12 +677,35 @@ export class Embedder {
628677 throw new Error ( `Failed to chunk document: ${ errorMsg } ` ) ;
629678 }
630679
680+ // FR-03: Single chunk output detection — if smartChunk produced only
681+ // one chunk that is nearly the same size as the original text, chunking
682+ // did not actually reduce the problem. Force-truncate with STRICT
683+ // reduction to guarantee progress.
684+ if (
685+ chunkResult . chunks . length === 1 &&
686+ chunkResult . chunks [ 0 ] . length > text . length * 0.9
687+ ) {
688+ // Use strict reduction factor to guarantee each retry makes progress
689+ const safeLimit = Math . floor ( text . length * STRICT_REDUCTION_FACTOR ) ;
690+ console . warn (
691+ `[memory-lancedb-pro] smartChunk produced 1 chunk (${ chunkResult . chunks [ 0 ] . length } chars) ≈ original (${ text . length } chars). ` +
692+ `Force-truncating to ${ safeLimit } chars (strict ${ STRICT_REDUCTION_FACTOR * 100 } % reduction) to avoid infinite recursion.`
693+ ) ;
694+ if ( safeLimit < 100 ) {
695+ throw new Error (
696+ `[memory-lancedb-pro] Failed to embed: chunking couldn't reduce input size enough for model context`
697+ ) ;
698+ }
699+ const truncated = text . slice ( 0 , safeLimit ) ;
700+ return this . embedSingle ( truncated , task , depth + 1 , signal ) ;
701+ }
702+
631703 // Embed all chunks in parallel
632704 console . log ( `Split document into ${ chunkResult . chunkCount } chunks for embedding` ) ;
633705 const chunkEmbeddings = await Promise . all (
634706 chunkResult . chunks . map ( async ( chunk , idx ) => {
635707 try {
636- const embedding = await this . embedSingle ( chunk , task ) ;
708+ const embedding = await this . embedSingle ( chunk , task , depth + 1 , signal ) ;
637709 return { embedding } ;
638710 } catch ( chunkError ) {
639711 console . warn ( `Failed to embed chunk ${ idx } :` , chunkError ) ;
@@ -661,14 +733,9 @@ export class Embedder {
661733
662734 return finalEmbedding ;
663735 } catch ( chunkError ) {
664- // If chunking fails, throw the original error
665- console . warn ( `Chunking failed, using original error:` , chunkError ) ;
666- const friendly = formatEmbeddingProviderError ( error , {
667- baseURL : this . _baseURL ,
668- model : this . _model ,
669- mode : "single" ,
670- } ) ;
671- throw new Error ( friendly , { cause : error } ) ;
736+ // Preserve and surface the more specific chunkError
737+ console . warn ( `Chunking failed:` , chunkError ) ;
738+ throw chunkError ;
672739 }
673740 }
674741
0 commit comments