CortexReach · rwmjhb · Mar 18, 2026 · Mar 14, 2026 · Mar 16, 2026 · Mar 17, 2026
diff --git a/index.ts b/index.ts
@@ -2030,8 +2030,20 @@ const memoryLanceDBProPlugin = {
           const agentId = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey);
           const accessibleScopes = scopeManager.getAccessibleScopes(agentId);
 
+          // FR-04: Truncate long prompts (e.g. file attachments) before embedding.
+          // Auto-recall only needs the user's intent, not full attachment text.
+          const MAX_RECALL_QUERY_LENGTH = 1_000;
+          let recallQuery = event.prompt;
+          if (recallQuery.length > MAX_RECALL_QUERY_LENGTH) {
+            const originalLength = recallQuery.length;
+            recallQuery = recallQuery.slice(0, MAX_RECALL_QUERY_LENGTH);
+            api.logger.info(
+              `memory-lancedb-pro: auto-recall query truncated from ${originalLength} to ${MAX_RECALL_QUERY_LENGTH} chars`
+            );
+          }
+
           const results = filterUserMdExclusiveRecallResults(await retrieveWithRetry({
-            query: event.prompt,
+            query: recallQuery,
             limit: 3,
             scopeFilter: accessibleScopes,
             source: "auto-recall",

diff --git a/src/chunker.ts b/src/chunker.ts
@@ -162,6 +162,32 @@ function sliceTrimWithIndices(text: string, start: number, end: number): { chunk
   };
 }
 
+// ============================================================================
+// CJK Detection
+// ============================================================================
+
+// CJK Unicode ranges: Unified Ideographs, Extension A, Compatibility,
+// Hangul Syllables, Katakana, Hiragana
+const CJK_RE =
+  /[\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uAC00-\uD7AF\uF900-\uFAFF]/;
+
+/** Ratio of CJK characters to total non-whitespace characters. */
+function getCjkRatio(text: string): number {
+  let cjk = 0;
+  let total = 0;
+  for (const ch of text) {
+    if (/\s/.test(ch)) continue;
+    total++;
+    if (CJK_RE.test(ch)) cjk++;
+  }
+  return total === 0 ? 0 : cjk / total;
+}
+
+// CJK chars are ~2-3 tokens each. When text is predominantly CJK, we divide
+// char limits by this factor to stay within the model's token budget.
+const CJK_CHAR_TOKEN_DIVISOR = 2.5;
+const CJK_RATIO_THRESHOLD = 0.3;
+
 // ============================================================================
 // Chunking Core
 // ============================================================================
@@ -239,10 +265,15 @@ export function smartChunk(text: string, embedderModel?: string): ChunkResult {
   const limit = embedderModel ? EMBEDDING_CONTEXT_LIMITS[embedderModel] : undefined;
   const base = limit ?? 8192;
 
+  // CJK characters consume ~2-3 tokens each, so a char-based limit that works
+  // for Latin text will vastly overshoot the token budget for CJK-heavy text.
+  const cjkHeavy = getCjkRatio(text) > CJK_RATIO_THRESHOLD;
+  const divisor = cjkHeavy ? CJK_CHAR_TOKEN_DIVISOR : 1;
+
   const config: ChunkerConfig = {
-    maxChunkSize: Math.max(1000, Math.floor(base * 0.7)),
-    overlapSize: Math.max(0, Math.floor(base * 0.05)),
-    minChunkSize: Math.max(100, Math.floor(base * 0.1)),
+    maxChunkSize: Math.max(1000, Math.floor(base * 0.7 / divisor)),
+    overlapSize: Math.max(0, Math.floor(base * 0.05 / divisor)),
+    minChunkSize: Math.max(100, Math.floor(base * 0.1 / divisor)),
     semanticSplit: true,
     maxLinesPerChunk: 50,
   };

diff --git a/src/embedder.ts b/src/embedder.ts
@@ -248,6 +248,41 @@ export function formatEmbeddingProviderError(
   return `${genericPrefix}${detailText}`;
 }
 
+// ============================================================================
+// Safety Constants
+// ============================================================================
+
+/** Maximum recursion depth for embedSingle chunking retries. */
+const MAX_EMBED_DEPTH = 3;
+
+/** Global timeout for a single embedding operation (ms). */
+const EMBED_TIMEOUT_MS = 10_000;
+
+/**
+ * Strictly decreasing character limit for forced truncation.
+ * Each recursion level MUST reduce input by this factor to guarantee progress.
+ */
+const STRICT_REDUCTION_FACTOR = 0.5; // Each retry must be at most 50% of previous
+
+/**
+ * Safe character limits per model for forced truncation.
+ * CJK characters typically consume ~3 tokens each, so the char limit is
+ * conservative compared to the token limit.
+ */
+const SAFE_CHAR_LIMITS: Record<string, number> = {
+  "nomic-embed-text": 2300,
+  "mxbai-embed-large": 2300,
+  "all-MiniLM-L6-v2": 1000,
+  "all-mpnet-base-v2": 1500,
+};
+
+const DEFAULT_SAFE_CHAR_LIMIT = 2000;
+
+/** Return a safe character count for forced truncation given a model name. */
+function getSafeCharLimit(model: string): number {
+  return SAFE_CHAR_LIMITS[model] ?? DEFAULT_SAFE_CHAR_LIMIT;
+}
+
 export function getVectorDimensions(model: string, overrideDims?: number): number {
   if (overrideDims && overrideDims > 0) {
     return overrideDims;
@@ -353,16 +388,23 @@ export class Embedder {
   /**
    * Call embeddings.create with automatic key rotation on rate-limit errors.
    * Tries each key in the pool at most once before giving up.
+   * Accepts an optional AbortSignal to support true request cancellation.
    */
-  private async embedWithRetry(payload: any): Promise<any> {
+  private async embedWithRetry(payload: any, signal?: AbortSignal): Promise<any> {
     const maxAttempts = this.clients.length;
     let lastError: Error | undefined;
 
     for (let attempt = 0; attempt < maxAttempts; attempt++) {
       const client = this.nextClient();
       try {
-        return await client.embeddings.create(payload);
+        // Pass signal to OpenAI SDK if provided (SDK v6+ supports this)
+        return await client.embeddings.create(payload, signal ? { signal } : undefined);
       } catch (error) {
+        // If aborted, re-throw immediately
+        if (error instanceof Error && error.name === 'AbortError') {
+          throw error;
+        }
+
         lastError = error instanceof Error ? error : new Error(String(error));
 
         if (this.isRateLimitError(error) && attempt < maxAttempts - 1) {
@@ -391,6 +433,35 @@ export class Embedder {
     return this.clients.length;
   }
 
+  /** FR-05: Wrap a promise with a global timeout using AbortSignal for TRUE cancellation.
+   * @param promiseFactory - A function that receives an AbortSignal and returns a promise
+   */
+  private withTimeout<T>(promiseFactory: (signal: AbortSignal) => Promise<T>, label: string): Promise<T> {
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), EMBED_TIMEOUT_MS);
+
+    // Create the promise with the signal
+    const promise = promiseFactory(controller.signal);
+
+    // Race between the original promise and timeout
+    // When timeout fires, controller.abort() will:
+    // 1. Trigger the abort event below to reject
+    // 2. If embedWithRetry received the signal, it will cancel the underlying HTTP request
+    const timeoutPromise = new Promise<never>((_, reject) => {
+      controller.signal.addEventListener('abort', () => {
+        clearTimeout(timeoutId);
+        reject(new Error(
+          `[memory-lancedb-pro] ${label} timed out after ${EMBED_TIMEOUT_MS}ms`
+        ));
+      });
+    });
+
+    return Promise.race([promise, timeoutPromise])
+      .finally(() => {
+        clearTimeout(timeoutId);
+      }) as Promise<T>;
+  }
+
   // --------------------------------------------------------------------------
   // Backward-compatible API
   // --------------------------------------------------------------------------
@@ -415,11 +486,11 @@ export class Embedder {
   // --------------------------------------------------------------------------
 
   async embedQuery(text: string): Promise<number[]> {
-    return this.embedSingle(text, this._taskQuery);
+    return this.withTimeout((signal) => this.embedSingle(text, this._taskQuery, signal), "embedQuery");
   }
 
   async embedPassage(text: string): Promise<number[]> {
-    return this.embedSingle(text, this._taskPassage);
+    return this.withTimeout((signal) => this.embedSingle(text, this._taskPassage, signal), "embedPassage");
   }
 
   async embedBatchQuery(texts: string[]): Promise<number[][]> {
@@ -466,17 +537,32 @@ export class Embedder {
     return payload;
   }
 
-  private async embedSingle(text: string, task?: string): Promise<number[]> {
+  private async embedSingle(text: string, task?: string, depth: number = 0, signal?: AbortSignal): Promise<number[]> {
     if (!text || text.trim().length === 0) {
       throw new Error("Cannot embed empty text");
     }
 
+    // FR-01: Recursion depth limit — force truncate when too deep
+    if (depth >= MAX_EMBED_DEPTH) {
+      const safeLimit = Math.floor(text.length * STRICT_REDUCTION_FACTOR);
+      console.warn(
+        `[memory-lancedb-pro] Recursion depth ${depth} reached MAX_EMBED_DEPTH (${MAX_EMBED_DEPTH}), ` +
+        `force-truncating ${text.length} chars → ${safeLimit} chars (strict ${STRICT_REDUCTION_FACTOR * 100}% reduction)`
+      );
+      if (safeLimit < 100) {
+        throw new Error(
+          `[memory-lancedb-pro] Failed to embed: input too large for model context after ${MAX_EMBED_DEPTH} retries`
+        );
+      }
+      text = text.slice(0, safeLimit);
+    }
+
     // Check cache first
     const cached = this._cache.get(text, task);
     if (cached) return cached;
 
     try {
-      const response = await this.embedWithRetry(this.buildPayload(text, task));
+      const response = await this.embedWithRetry(this.buildPayload(text, task), signal);
       const embedding = response.data[0]?.embedding as number[] | undefined;
       if (!embedding) {
         throw new Error("No embedding returned from provider");
@@ -494,17 +580,40 @@ export class Embedder {
         try {
           console.log(`Document exceeded context limit (${errorMsg}), attempting chunking...`);
           const chunkResult = smartChunk(text, this._model);
-          
+
           if (chunkResult.chunks.length === 0) {
             throw new Error(`Failed to chunk document: ${errorMsg}`);
           }
 
+          // FR-03: Single chunk output detection — if smartChunk produced only
+          // one chunk that is nearly the same size as the original text, chunking
+          // did not actually reduce the problem. Force-truncate with STRICT
+          // reduction to guarantee progress.
+          if (
+            chunkResult.chunks.length === 1 &&
+            chunkResult.chunks[0].length > text.length * 0.9
+          ) {
+            // Use strict reduction factor to guarantee each retry makes progress
+            const safeLimit = Math.floor(text.length * STRICT_REDUCTION_FACTOR);
+            console.warn(
+              `[memory-lancedb-pro] smartChunk produced 1 chunk (${chunkResult.chunks[0].length} chars) ≈ original (${text.length} chars). ` +
+              `Force-truncating to ${safeLimit} chars (strict ${STRICT_REDUCTION_FACTOR * 100}% reduction) to avoid infinite recursion.`
+            );
+            if (safeLimit < 100) {
+              throw new Error(
+                `[memory-lancedb-pro] Failed to embed: chunking couldn't reduce input size enough for model context`
+              );
+            }
+            const truncated = text.slice(0, safeLimit);
+            return this.embedSingle(truncated, task, depth + 1, signal);
+          }
+
           // Embed all chunks in parallel
           console.log(`Split document into ${chunkResult.chunkCount} chunks for embedding`);
           const chunkEmbeddings = await Promise.all(
             chunkResult.chunks.map(async (chunk, idx) => {
               try {
-                const embedding = await this.embedSingle(chunk, task);
+                const embedding = await this.embedSingle(chunk, task, depth + 1, signal);
                 return { embedding };
               } catch (chunkError) {
                 console.warn(`Failed to embed chunk ${idx}:`, chunkError);
@@ -525,11 +634,11 @@ export class Embedder {
           );
 
           const finalEmbedding = avgEmbedding.map(v => v / chunkEmbeddings.length);
-          
+
           // Cache the result for the original text (using its hash)
           this._cache.set(text, task, finalEmbedding);
           console.log(`Successfully embedded long document as ${chunkEmbeddings.length} averaged chunks`);
-          
+
           return finalEmbedding;
         } catch (chunkError) {
           // If chunking fails, throw the original error