Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2002,8 +2002,20 @@ const memoryLanceDBProPlugin = {
const agentId = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey);
const accessibleScopes = scopeManager.getAccessibleScopes(agentId);

// FR-04: Truncate long prompts (e.g. file attachments) before embedding.
// Auto-recall only needs the user's intent, not full attachment text.
const MAX_RECALL_QUERY_LENGTH = 1_000;
let recallQuery = event.prompt;
if (recallQuery.length > MAX_RECALL_QUERY_LENGTH) {
const originalLength = recallQuery.length;
recallQuery = recallQuery.slice(0, MAX_RECALL_QUERY_LENGTH);
api.logger.info(
`memory-lancedb-pro: auto-recall query truncated from ${originalLength} to ${MAX_RECALL_QUERY_LENGTH} chars`
);
}

const results = await retrieveWithRetry({
query: event.prompt,
query: recallQuery,
limit: 3,
scopeFilter: accessibleScopes,
source: "auto-recall",
Expand Down
37 changes: 34 additions & 3 deletions src/chunker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,32 @@ function sliceTrimWithIndices(text: string, start: number, end: number): { chunk
};
}

// ============================================================================
// CJK Detection
// ============================================================================

// CJK Unicode ranges: Unified Ideographs, Extension A, Compatibility,
// Hangul Syllables, Katakana, Hiragana
const CJK_RE =
/[\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uAC00-\uD7AF\uF900-\uFAFF]/;

/** Ratio of CJK characters to total non-whitespace characters. */
function getCjkRatio(text: string): number {
let cjk = 0;
let total = 0;
for (const ch of text) {
if (/\s/.test(ch)) continue;
total++;
if (CJK_RE.test(ch)) cjk++;
}
return total === 0 ? 0 : cjk / total;
}

// CJK chars are ~2-3 tokens each. When text is predominantly CJK, we divide
// char limits by this factor to stay within the model's token budget.
const CJK_CHAR_TOKEN_DIVISOR = 2.5;
const CJK_RATIO_THRESHOLD = 0.3;

// ============================================================================
// Chunking Core
// ============================================================================
Expand Down Expand Up @@ -239,10 +265,15 @@ export function smartChunk(text: string, embedderModel?: string): ChunkResult {
const limit = embedderModel ? EMBEDDING_CONTEXT_LIMITS[embedderModel] : undefined;
const base = limit ?? 8192;

// CJK characters consume ~2-3 tokens each, so a char-based limit that works
// for Latin text will vastly overshoot the token budget for CJK-heavy text.
const cjkHeavy = getCjkRatio(text) > CJK_RATIO_THRESHOLD;
const divisor = cjkHeavy ? CJK_CHAR_TOKEN_DIVISOR : 1;

const config: ChunkerConfig = {
maxChunkSize: Math.max(1000, Math.floor(base * 0.7)),
overlapSize: Math.max(0, Math.floor(base * 0.05)),
minChunkSize: Math.max(100, Math.floor(base * 0.1)),
maxChunkSize: Math.max(1000, Math.floor(base * 0.7 / divisor)),
overlapSize: Math.max(0, Math.floor(base * 0.05 / divisor)),
minChunkSize: Math.max(100, Math.floor(base * 0.1 / divisor)),
semanticSplit: true,
maxLinesPerChunk: 50,
};
Expand Down
85 changes: 78 additions & 7 deletions src/embedder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,35 @@ export function formatEmbeddingProviderError(
return `${genericPrefix}${detailText}`;
}

// ============================================================================
// Safety Constants
// ============================================================================

/** Maximum recursion depth for embedSingle chunking retries. */
const MAX_EMBED_DEPTH = 3;

/** Global timeout for a single embedding operation (ms). */
const EMBED_TIMEOUT_MS = 10_000;

/**
* Safe character limits per model for forced truncation.
* CJK characters typically consume ~3 tokens each, so the char limit is
* conservative compared to the token limit.
*/
const SAFE_CHAR_LIMITS: Record<string, number> = {
"nomic-embed-text": 2300,
"mxbai-embed-large": 2300,
"all-MiniLM-L6-v2": 1000,
"all-mpnet-base-v2": 1500,
};

const DEFAULT_SAFE_CHAR_LIMIT = 2000;

/** Return a safe character count for forced truncation given a model name. */
function getSafeCharLimit(model: string): number {
return SAFE_CHAR_LIMITS[model] ?? DEFAULT_SAFE_CHAR_LIMIT;
}

export function getVectorDimensions(model: string, overrideDims?: number): number {
if (overrideDims && overrideDims > 0) {
return overrideDims;
Expand Down Expand Up @@ -391,6 +420,21 @@ export class Embedder {
return this.clients.length;
}

/** FR-05: Wrap a promise with a global timeout to prevent indefinite hangs. */
private withTimeout<T>(promise: Promise<T>, label: string): Promise<T> {
return Promise.race([
promise,
new Promise<never>((_, reject) => {
setTimeout(
() => reject(new Error(
`[memory-lancedb-pro] ${label} timed out after ${EMBED_TIMEOUT_MS}ms`
)),
EMBED_TIMEOUT_MS,
);
}),
]);
}

// --------------------------------------------------------------------------
// Backward-compatible API
// --------------------------------------------------------------------------
Expand All @@ -415,11 +459,11 @@ export class Embedder {
// --------------------------------------------------------------------------

async embedQuery(text: string): Promise<number[]> {
return this.embedSingle(text, this._taskQuery);
return this.withTimeout(this.embedSingle(text, this._taskQuery), "embedQuery");
}

async embedPassage(text: string): Promise<number[]> {
return this.embedSingle(text, this._taskPassage);
return this.withTimeout(this.embedSingle(text, this._taskPassage), "embedPassage");
}

async embedBatchQuery(texts: string[]): Promise<number[][]> {
Expand Down Expand Up @@ -466,11 +510,21 @@ export class Embedder {
return payload;
}

private async embedSingle(text: string, task?: string): Promise<number[]> {
private async embedSingle(text: string, task?: string, depth: number = 0): Promise<number[]> {
if (!text || text.trim().length === 0) {
throw new Error("Cannot embed empty text");
}

// FR-01: Recursion depth limit — force truncate when too deep
if (depth >= MAX_EMBED_DEPTH) {
const safeLimit = getSafeCharLimit(this._model);
console.warn(
`[memory-lancedb-pro] Recursion depth ${depth} reached MAX_EMBED_DEPTH (${MAX_EMBED_DEPTH}), ` +
`force-truncating ${text.length} chars → ${safeLimit} chars`
);
text = text.slice(0, safeLimit);
}

// Check cache first
const cached = this._cache.get(text, task);
if (cached) return cached;
Expand All @@ -494,17 +548,34 @@ export class Embedder {
try {
console.log(`Document exceeded context limit (${errorMsg}), attempting chunking...`);
const chunkResult = smartChunk(text, this._model);

if (chunkResult.chunks.length === 0) {
throw new Error(`Failed to chunk document: ${errorMsg}`);
}

// FR-03: Single chunk output detection — if smartChunk produced only
// one chunk that is nearly the same size as the original text, chunking
// did not actually reduce the problem. Force-truncate instead of
// recursing (which would loop forever).
if (
chunkResult.chunks.length === 1 &&
chunkResult.chunks[0].length > text.length * 0.9
) {
const safeLimit = getSafeCharLimit(this._model);
console.warn(
`[memory-lancedb-pro] smartChunk produced 1 chunk (${chunkResult.chunks[0].length} chars) ≈ original (${text.length} chars). ` +
`Force-truncating to ${safeLimit} chars to avoid infinite recursion.`
);
const truncated = text.slice(0, safeLimit);
return this.embedSingle(truncated, task, depth + 1);
}

// Embed all chunks in parallel
console.log(`Split document into ${chunkResult.chunkCount} chunks for embedding`);
const chunkEmbeddings = await Promise.all(
chunkResult.chunks.map(async (chunk, idx) => {
try {
const embedding = await this.embedSingle(chunk, task);
const embedding = await this.embedSingle(chunk, task, depth + 1);
return { embedding };
} catch (chunkError) {
console.warn(`Failed to embed chunk ${idx}:`, chunkError);
Expand All @@ -525,11 +596,11 @@ export class Embedder {
);

const finalEmbedding = avgEmbedding.map(v => v / chunkEmbeddings.length);

// Cache the result for the original text (using its hash)
this._cache.set(text, task, finalEmbedding);
console.log(`Successfully embedded long document as ${chunkEmbeddings.length} averaged chunks`);

return finalEmbedding;
} catch (chunkError) {
// If chunking fails, throw the original error
Expand Down
Loading