Skip to content
14 changes: 13 additions & 1 deletion index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2030,8 +2030,20 @@ const memoryLanceDBProPlugin = {
const agentId = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey);
const accessibleScopes = scopeManager.getAccessibleScopes(agentId);

// FR-04: Truncate long prompts (e.g. file attachments) before embedding.
// Auto-recall only needs the user's intent, not full attachment text.
const MAX_RECALL_QUERY_LENGTH = 1_000;
let recallQuery = event.prompt;
if (recallQuery.length > MAX_RECALL_QUERY_LENGTH) {
const originalLength = recallQuery.length;
recallQuery = recallQuery.slice(0, MAX_RECALL_QUERY_LENGTH);
api.logger.info(
`memory-lancedb-pro: auto-recall query truncated from ${originalLength} to ${MAX_RECALL_QUERY_LENGTH} chars`
);
}

const results = filterUserMdExclusiveRecallResults(await retrieveWithRetry({
query: event.prompt,
query: recallQuery,
limit: 3,
scopeFilter: accessibleScopes,
source: "auto-recall",
Expand Down
37 changes: 34 additions & 3 deletions src/chunker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,32 @@ function sliceTrimWithIndices(text: string, start: number, end: number): { chunk
};
}

// ============================================================================
// CJK Detection
// ============================================================================

// CJK Unicode ranges: Unified Ideographs, Extension A, Compatibility,
// Hangul Syllables, Katakana, Hiragana
const CJK_RE =
/[\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uAC00-\uD7AF\uF900-\uFAFF]/;

/** Ratio of CJK characters to total non-whitespace characters. */
function getCjkRatio(text: string): number {
let cjk = 0;
let total = 0;
for (const ch of text) {
if (/\s/.test(ch)) continue;
total++;
if (CJK_RE.test(ch)) cjk++;
}
return total === 0 ? 0 : cjk / total;
}

// CJK chars are ~2-3 tokens each. When text is predominantly CJK, we divide
// char limits by this factor to stay within the model's token budget.
const CJK_CHAR_TOKEN_DIVISOR = 2.5;
const CJK_RATIO_THRESHOLD = 0.3;

// ============================================================================
// Chunking Core
// ============================================================================
Expand Down Expand Up @@ -239,10 +265,15 @@ export function smartChunk(text: string, embedderModel?: string): ChunkResult {
const limit = embedderModel ? EMBEDDING_CONTEXT_LIMITS[embedderModel] : undefined;
const base = limit ?? 8192;

// CJK characters consume ~2-3 tokens each, so a char-based limit that works
// for Latin text will vastly overshoot the token budget for CJK-heavy text.
const cjkHeavy = getCjkRatio(text) > CJK_RATIO_THRESHOLD;
const divisor = cjkHeavy ? CJK_CHAR_TOKEN_DIVISOR : 1;

const config: ChunkerConfig = {
maxChunkSize: Math.max(1000, Math.floor(base * 0.7)),
overlapSize: Math.max(0, Math.floor(base * 0.05)),
minChunkSize: Math.max(100, Math.floor(base * 0.1)),
maxChunkSize: Math.max(1000, Math.floor(base * 0.7 / divisor)),
overlapSize: Math.max(0, Math.floor(base * 0.05 / divisor)),
minChunkSize: Math.max(100, Math.floor(base * 0.1 / divisor)),
semanticSplit: true,
maxLinesPerChunk: 50,
};
Expand Down
129 changes: 119 additions & 10 deletions src/embedder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,41 @@ export function formatEmbeddingProviderError(
return `${genericPrefix}${detailText}`;
}

// ============================================================================
// Safety Constants
// ============================================================================

/** Maximum recursion depth for embedSingle chunking retries. */
const MAX_EMBED_DEPTH = 3;

/** Global timeout for a single embedding operation (ms). */
const EMBED_TIMEOUT_MS = 10_000;

/**
* Strictly decreasing character limit for forced truncation.
* Each recursion level MUST reduce input by this factor to guarantee progress.
*/
const STRICT_REDUCTION_FACTOR = 0.5; // Each retry must be at most 50% of previous

/**
* Safe character limits per model for forced truncation.
* CJK characters typically consume ~3 tokens each, so the char limit is
* conservative compared to the token limit.
*/
const SAFE_CHAR_LIMITS: Record<string, number> = {
"nomic-embed-text": 2300,
"mxbai-embed-large": 2300,
"all-MiniLM-L6-v2": 1000,
"all-mpnet-base-v2": 1500,
};

const DEFAULT_SAFE_CHAR_LIMIT = 2000;

/** Return a safe character count for forced truncation given a model name. */
function getSafeCharLimit(model: string): number {
return SAFE_CHAR_LIMITS[model] ?? DEFAULT_SAFE_CHAR_LIMIT;
}

export function getVectorDimensions(model: string, overrideDims?: number): number {
if (overrideDims && overrideDims > 0) {
return overrideDims;
Expand Down Expand Up @@ -353,16 +388,23 @@ export class Embedder {
/**
* Call embeddings.create with automatic key rotation on rate-limit errors.
* Tries each key in the pool at most once before giving up.
* Accepts an optional AbortSignal to support true request cancellation.
*/
private async embedWithRetry(payload: any): Promise<any> {
private async embedWithRetry(payload: any, signal?: AbortSignal): Promise<any> {
const maxAttempts = this.clients.length;
let lastError: Error | undefined;

for (let attempt = 0; attempt < maxAttempts; attempt++) {
const client = this.nextClient();
try {
return await client.embeddings.create(payload);
// Pass signal to OpenAI SDK if provided (SDK v6+ supports this)
return await client.embeddings.create(payload, signal ? { signal } : undefined);
} catch (error) {
// If aborted, re-throw immediately
if (error instanceof Error && error.name === 'AbortError') {
throw error;
}

lastError = error instanceof Error ? error : new Error(String(error));

if (this.isRateLimitError(error) && attempt < maxAttempts - 1) {
Expand Down Expand Up @@ -391,6 +433,35 @@ export class Embedder {
return this.clients.length;
}

/** FR-05: Wrap a promise with a global timeout using AbortSignal for TRUE cancellation.
* @param promiseFactory - A function that receives an AbortSignal and returns a promise
*/
private withTimeout<T>(promiseFactory: (signal: AbortSignal) => Promise<T>, label: string): Promise<T> {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), EMBED_TIMEOUT_MS);

// Create the promise with the signal
const promise = promiseFactory(controller.signal);

// Race between the original promise and timeout
// When timeout fires, controller.abort() will:
// 1. Trigger the abort event below to reject
// 2. If embedWithRetry received the signal, it will cancel the underlying HTTP request
const timeoutPromise = new Promise<never>((_, reject) => {
controller.signal.addEventListener('abort', () => {
clearTimeout(timeoutId);
reject(new Error(
`[memory-lancedb-pro] ${label} timed out after ${EMBED_TIMEOUT_MS}ms`
));
});
});

return Promise.race([promise, timeoutPromise])
.finally(() => {
clearTimeout(timeoutId);
}) as Promise<T>;
}

// --------------------------------------------------------------------------
// Backward-compatible API
// --------------------------------------------------------------------------
Expand All @@ -415,11 +486,11 @@ export class Embedder {
// --------------------------------------------------------------------------

async embedQuery(text: string): Promise<number[]> {
return this.embedSingle(text, this._taskQuery);
return this.withTimeout((signal) => this.embedSingle(text, this._taskQuery, signal), "embedQuery");
}

async embedPassage(text: string): Promise<number[]> {
return this.embedSingle(text, this._taskPassage);
return this.withTimeout((signal) => this.embedSingle(text, this._taskPassage, signal), "embedPassage");
}

async embedBatchQuery(texts: string[]): Promise<number[][]> {
Expand Down Expand Up @@ -466,17 +537,32 @@ export class Embedder {
return payload;
}

private async embedSingle(text: string, task?: string): Promise<number[]> {
private async embedSingle(text: string, task?: string, depth: number = 0, signal?: AbortSignal): Promise<number[]> {
if (!text || text.trim().length === 0) {
throw new Error("Cannot embed empty text");
}

// FR-01: Recursion depth limit — force truncate when too deep
if (depth >= MAX_EMBED_DEPTH) {
const safeLimit = Math.floor(text.length * STRICT_REDUCTION_FACTOR);
console.warn(
`[memory-lancedb-pro] Recursion depth ${depth} reached MAX_EMBED_DEPTH (${MAX_EMBED_DEPTH}), ` +
`force-truncating ${text.length} chars → ${safeLimit} chars (strict ${STRICT_REDUCTION_FACTOR * 100}% reduction)`
);
if (safeLimit < 100) {
throw new Error(
`[memory-lancedb-pro] Failed to embed: input too large for model context after ${MAX_EMBED_DEPTH} retries`
);
}
text = text.slice(0, safeLimit);
}

// Check cache first
const cached = this._cache.get(text, task);
if (cached) return cached;

try {
const response = await this.embedWithRetry(this.buildPayload(text, task));
const response = await this.embedWithRetry(this.buildPayload(text, task), signal);
const embedding = response.data[0]?.embedding as number[] | undefined;
if (!embedding) {
throw new Error("No embedding returned from provider");
Expand All @@ -494,17 +580,40 @@ export class Embedder {
try {
console.log(`Document exceeded context limit (${errorMsg}), attempting chunking...`);
const chunkResult = smartChunk(text, this._model);

if (chunkResult.chunks.length === 0) {
throw new Error(`Failed to chunk document: ${errorMsg}`);
}

// FR-03: Single chunk output detection — if smartChunk produced only
// one chunk that is nearly the same size as the original text, chunking
// did not actually reduce the problem. Force-truncate with STRICT
// reduction to guarantee progress.
if (
chunkResult.chunks.length === 1 &&
chunkResult.chunks[0].length > text.length * 0.9
) {
// Use strict reduction factor to guarantee each retry makes progress
const safeLimit = Math.floor(text.length * STRICT_REDUCTION_FACTOR);
console.warn(
`[memory-lancedb-pro] smartChunk produced 1 chunk (${chunkResult.chunks[0].length} chars) ≈ original (${text.length} chars). ` +
`Force-truncating to ${safeLimit} chars (strict ${STRICT_REDUCTION_FACTOR * 100}% reduction) to avoid infinite recursion.`
);
if (safeLimit < 100) {
throw new Error(
`[memory-lancedb-pro] Failed to embed: chunking couldn't reduce input size enough for model context`
);
}
const truncated = text.slice(0, safeLimit);
return this.embedSingle(truncated, task, depth + 1, signal);
}

// Embed all chunks in parallel
console.log(`Split document into ${chunkResult.chunkCount} chunks for embedding`);
const chunkEmbeddings = await Promise.all(
chunkResult.chunks.map(async (chunk, idx) => {
try {
const embedding = await this.embedSingle(chunk, task);
const embedding = await this.embedSingle(chunk, task, depth + 1, signal);
return { embedding };
} catch (chunkError) {
console.warn(`Failed to embed chunk ${idx}:`, chunkError);
Expand All @@ -525,11 +634,11 @@ export class Embedder {
);

const finalEmbedding = avgEmbedding.map(v => v / chunkEmbeddings.length);

// Cache the result for the original text (using its hash)
this._cache.set(text, task, finalEmbedding);
console.log(`Successfully embedded long document as ${chunkEmbeddings.length} averaged chunks`);

return finalEmbedding;
} catch (chunkError) {
// If chunking fails, throw the original error
Expand Down
Loading