Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2093,8 +2093,20 @@ const memoryLanceDBProPlugin = {
const agentId = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey);
const accessibleScopes = resolveScopeFilter(scopeManager, agentId);

// FR-04: Truncate long prompts (e.g. file attachments) before embedding.
// Auto-recall only needs the user's intent, not full attachment text.
const MAX_RECALL_QUERY_LENGTH = 1_000;
let recallQuery = event.prompt;
if (recallQuery.length > MAX_RECALL_QUERY_LENGTH) {
const originalLength = recallQuery.length;
recallQuery = recallQuery.slice(0, MAX_RECALL_QUERY_LENGTH);
api.logger.info(
`memory-lancedb-pro: auto-recall query truncated from ${originalLength} to ${MAX_RECALL_QUERY_LENGTH} chars`
);
}

const results = filterUserMdExclusiveRecallResults(await retrieveWithRetry({
query: event.prompt,
query: recallQuery,
limit: 3,
scopeFilter: accessibleScopes,
source: "auto-recall",
Expand Down
14 changes: 0 additions & 14 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
"@lancedb/lancedb": "^0.26.2",
"@sinclair/typebox": "0.34.48",
"apache-arrow": "18.1.0",
"json5": "^2.2.3",
"openai": "^6.21.0"
},
"openclaw": {
Expand All @@ -37,7 +36,7 @@
]
},
"scripts": {
"test": "node test/embedder-error-hints.test.mjs && node test/migrate-legacy-schema.test.mjs && node --test test/config-session-strategy-migration.test.mjs && node --test test/scope-access-undefined.test.mjs && node --test test/reflection-bypass-hook.test.mjs && node --test test/smart-extractor-scope-filter.test.mjs && node --test test/store-empty-scope-filter.test.mjs && node --test test/recall-text-cleanup.test.mjs && node test/update-consistency-lancedb.test.mjs && node test/cli-smoke.mjs && node test/functional-e2e.mjs && node test/retriever-rerank-regression.mjs && node test/smart-memory-lifecycle.mjs && node test/smart-extractor-branches.mjs && node test/plugin-manifest-regression.mjs && node --test test/sync-plugin-version.test.mjs && node test/smart-metadata-v2.mjs && node test/vector-search-cosine.test.mjs && node test/context-support-e2e.mjs && node test/temporal-facts.test.mjs && node test/memory-update-supersede.test.mjs && node test/memory-upgrader-diagnostics.test.mjs && node --test test/llm-api-key-client.test.mjs && node --test test/llm-oauth-client.test.mjs && node --test test/cli-oauth-login.test.mjs && node --test test/workflow-fork-guards.test.mjs",
"test": "node test/embedder-error-hints.test.mjs && node test/cjk-recursion-regression.test.mjs && node test/migrate-legacy-schema.test.mjs && node --test test/config-session-strategy-migration.test.mjs && node --test test/scope-access-undefined.test.mjs && node --test test/reflection-bypass-hook.test.mjs && node --test test/smart-extractor-scope-filter.test.mjs && node --test test/store-empty-scope-filter.test.mjs && node --test test/recall-text-cleanup.test.mjs && node test/update-consistency-lancedb.test.mjs && node test/cli-smoke.mjs && node test/functional-e2e.mjs && node test/retriever-rerank-regression.mjs && node test/smart-memory-lifecycle.mjs && node test/smart-extractor-branches.mjs && node test/plugin-manifest-regression.mjs && node --test test/sync-plugin-version.test.mjs && node test/smart-metadata-v2.mjs && node test/vector-search-cosine.test.mjs && node test/context-support-e2e.mjs && node test/temporal-facts.test.mjs && node test/memory-update-supersede.test.mjs && node test/memory-upgrader-diagnostics.test.mjs && node --test test/llm-api-key-client.test.mjs && node --test test/llm-oauth-client.test.mjs && node --test test/cli-oauth-login.test.mjs && node --test test/workflow-fork-guards.test.mjs",
"test:openclaw-host": "node test/openclaw-host-functional.mjs",
"version": "node scripts/sync-plugin-version.mjs openclaw.plugin.json package.json && git add openclaw.plugin.json"
},
Expand Down
37 changes: 34 additions & 3 deletions src/chunker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,32 @@ function sliceTrimWithIndices(text: string, start: number, end: number): { chunk
};
}

// ============================================================================
// CJK Detection
// ============================================================================

// CJK Unicode ranges: Unified Ideographs, Extension A, Compatibility,
// Hangul Syllables, Katakana, Hiragana
const CJK_RE =
/[\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uAC00-\uD7AF\uF900-\uFAFF]/;

/** Ratio of CJK characters to total non-whitespace characters. */
function getCjkRatio(text: string): number {
let cjk = 0;
let total = 0;
for (const ch of text) {
if (/\s/.test(ch)) continue;
total++;
if (CJK_RE.test(ch)) cjk++;
}
return total === 0 ? 0 : cjk / total;
}

// CJK chars are ~2-3 tokens each. When text is predominantly CJK, we divide
// char limits by this factor to stay within the model's token budget.
const CJK_CHAR_TOKEN_DIVISOR = 2.5;
const CJK_RATIO_THRESHOLD = 0.3;

// ============================================================================
// Chunking Core
// ============================================================================
Expand Down Expand Up @@ -239,10 +265,15 @@ export function smartChunk(text: string, embedderModel?: string): ChunkResult {
const limit = embedderModel ? EMBEDDING_CONTEXT_LIMITS[embedderModel] : undefined;
const base = limit ?? 8192;

// CJK characters consume ~2-3 tokens each, so a char-based limit that works
// for Latin text will vastly overshoot the token budget for CJK-heavy text.
const cjkHeavy = getCjkRatio(text) > CJK_RATIO_THRESHOLD;
const divisor = cjkHeavy ? CJK_CHAR_TOKEN_DIVISOR : 1;

const config: ChunkerConfig = {
maxChunkSize: Math.max(1000, Math.floor(base * 0.7)),
overlapSize: Math.max(0, Math.floor(base * 0.05)),
minChunkSize: Math.max(100, Math.floor(base * 0.1)),
maxChunkSize: Math.max(200, Math.floor(base * 0.7 / divisor)),
overlapSize: Math.max(0, Math.floor(base * 0.05 / divisor)),
minChunkSize: Math.max(100, Math.floor(base * 0.1 / divisor)),
semanticSplit: true,
maxLinesPerChunk: 50,
};
Expand Down
97 changes: 82 additions & 15 deletions src/embedder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,22 @@ export function formatEmbeddingProviderError(
return `${genericPrefix}${detailText}`;
}

// ============================================================================
// Safety Constants
// ============================================================================

/** Maximum recursion depth for embedSingle chunking retries. */
const MAX_EMBED_DEPTH = 3;

/** Global timeout for a single embedding operation (ms). */
const EMBED_TIMEOUT_MS = 10_000;

/**
* Strictly decreasing character limit for forced truncation.
* Each recursion level MUST reduce input by this factor to guarantee progress.
*/
const STRICT_REDUCTION_FACTOR = 0.5; // Each retry must be at most 50% of previous

export function getVectorDimensions(model: string, overrideDims?: number): number {
if (overrideDims && overrideDims > 0) {
return overrideDims;
Expand Down Expand Up @@ -472,16 +488,23 @@ export class Embedder {
/**
* Call embeddings.create with automatic key rotation on rate-limit errors.
* Tries each key in the pool at most once before giving up.
* Accepts an optional AbortSignal to support true request cancellation.
*/
private async embedWithRetry(payload: any): Promise<any> {
private async embedWithRetry(payload: any, signal?: AbortSignal): Promise<any> {
const maxAttempts = this.clients.length;
let lastError: Error | undefined;

for (let attempt = 0; attempt < maxAttempts; attempt++) {
const client = this.nextClient();
try {
return await client.embeddings.create(payload);
// Pass signal to OpenAI SDK if provided (SDK v6+ supports this)
return await client.embeddings.create(payload, signal ? { signal } : undefined);
} catch (error) {
// If aborted, re-throw immediately
if (error instanceof Error && error.name === 'AbortError') {
throw error;
}

lastError = error instanceof Error ? error : new Error(String(error));

if (this.isRateLimitError(error) && attempt < maxAttempts - 1) {
Expand Down Expand Up @@ -510,6 +533,13 @@ export class Embedder {
return this.clients.length;
}

/** Wrap a single embedding operation with a global timeout via AbortSignal. */
private withTimeout<T>(promiseFactory: (signal: AbortSignal) => Promise<T>, _label: string): Promise<T> {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), EMBED_TIMEOUT_MS);
return promiseFactory(controller.signal).finally(() => clearTimeout(timeoutId));
}

// --------------------------------------------------------------------------
// Backward-compatible API
// --------------------------------------------------------------------------
Expand All @@ -534,13 +564,17 @@ export class Embedder {
// --------------------------------------------------------------------------

async embedQuery(text: string): Promise<number[]> {
return this.embedSingle(text, this._taskQuery);
return this.withTimeout((signal) => this.embedSingle(text, this._taskQuery, 0, signal), "embedQuery");
}

async embedPassage(text: string): Promise<number[]> {
return this.embedSingle(text, this._taskPassage);
return this.withTimeout((signal) => this.embedSingle(text, this._taskPassage, 0, signal), "embedPassage");
}

// Note: embedBatchQuery/embedBatchPassage are NOT wrapped with withTimeout because
// they handle multiple texts in a single API call. The timeout would fire after
// EMBED_TIMEOUT_MS regardless of how many texts succeed. Individual text embedding
// within the batch is protected by the SDK's own timeout handling.
async embedBatchQuery(texts: string[]): Promise<number[][]> {
return this.embedMany(texts, this._taskQuery);
}
Expand Down Expand Up @@ -595,17 +629,32 @@ export class Embedder {
return payload;
}

private async embedSingle(text: string, task?: string): Promise<number[]> {
private async embedSingle(text: string, task?: string, depth: number = 0, signal?: AbortSignal): Promise<number[]> {
if (!text || text.trim().length === 0) {
throw new Error("Cannot embed empty text");
}

// FR-01: Recursion depth limit — force truncate when too deep
if (depth >= MAX_EMBED_DEPTH) {
const safeLimit = Math.floor(text.length * STRICT_REDUCTION_FACTOR);
console.warn(
`[memory-lancedb-pro] Recursion depth ${depth} reached MAX_EMBED_DEPTH (${MAX_EMBED_DEPTH}), ` +
`force-truncating ${text.length} chars → ${safeLimit} chars (strict ${STRICT_REDUCTION_FACTOR * 100}% reduction)`
);
if (safeLimit < 100) {
throw new Error(
`[memory-lancedb-pro] Failed to embed: input too large for model context after ${MAX_EMBED_DEPTH} retries`
);
}
text = text.slice(0, safeLimit);
}

// Check cache first
const cached = this._cache.get(text, task);
if (cached) return cached;

try {
const response = await this.embedWithRetry(this.buildPayload(text, task));
const response = await this.embedWithRetry(this.buildPayload(text, task), signal);
const embedding = response.data[0]?.embedding as number[] | undefined;
if (!embedding) {
throw new Error("No embedding returned from provider");
Expand All @@ -628,12 +677,35 @@ export class Embedder {
throw new Error(`Failed to chunk document: ${errorMsg}`);
}

// FR-03: Single chunk output detection — if smartChunk produced only
// one chunk that is nearly the same size as the original text, chunking
// did not actually reduce the problem. Force-truncate with STRICT
// reduction to guarantee progress.
if (
chunkResult.chunks.length === 1 &&
chunkResult.chunks[0].length > text.length * 0.9
) {
// Use strict reduction factor to guarantee each retry makes progress
const safeLimit = Math.floor(text.length * STRICT_REDUCTION_FACTOR);
console.warn(
`[memory-lancedb-pro] smartChunk produced 1 chunk (${chunkResult.chunks[0].length} chars) ≈ original (${text.length} chars). ` +
`Force-truncating to ${safeLimit} chars (strict ${STRICT_REDUCTION_FACTOR * 100}% reduction) to avoid infinite recursion.`
);
if (safeLimit < 100) {
throw new Error(
`[memory-lancedb-pro] Failed to embed: chunking couldn't reduce input size enough for model context`
);
}
const truncated = text.slice(0, safeLimit);
return this.embedSingle(truncated, task, depth + 1, signal);
}

// Embed all chunks in parallel
console.log(`Split document into ${chunkResult.chunkCount} chunks for embedding`);
const chunkEmbeddings = await Promise.all(
chunkResult.chunks.map(async (chunk, idx) => {
try {
const embedding = await this.embedSingle(chunk, task);
const embedding = await this.embedSingle(chunk, task, depth + 1, signal);
return { embedding };
} catch (chunkError) {
console.warn(`Failed to embed chunk ${idx}:`, chunkError);
Expand Down Expand Up @@ -661,14 +733,9 @@ export class Embedder {

return finalEmbedding;
} catch (chunkError) {
// If chunking fails, throw the original error
console.warn(`Chunking failed, using original error:`, chunkError);
const friendly = formatEmbeddingProviderError(error, {
baseURL: this._baseURL,
model: this._model,
mode: "single",
});
throw new Error(friendly, { cause: error });
// Preserve and surface the more specific chunkError
console.warn(`Chunking failed:`, chunkError);
throw chunkError;
}
}

Expand Down
Loading
Loading