diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 29bde195..1dcc18fd 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -425,7 +425,14 @@ async function handleGrepDirect(api, table, sessionsTable, params) { const hasRegexMeta = !fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); let rows = []; if (!hasRegexMeta) { - const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; + const words = pattern.split(/\s+/).filter((w) => w.length > 2); + let contentFilter; + if (words.length > 1) { + const wordFilters = words.slice(0, 4).map((w) => `summary ${likeOp} '%${sqlLike(w)}%'`); + contentFilter = ` AND (${wordFilters.join(" OR ")})`; + } else { + contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; + } try { rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`); } catch { @@ -438,6 +445,29 @@ async function handleGrepDirect(api, table, sessionsTable, params) { rows = []; } } + const output = []; + if (!hasRegexMeta) { + const memoryTable = table.endsWith("_sessions") ? table.replace(/_sessions$/, "_memory") : sessionsTable !== table ? sessionsTable : null; + if (memoryTable && memoryTable !== table) { + try { + const words2 = pattern.split(/\s+/).filter((w) => w.length > 2); + const contentFilter = words2.length > 1 ? ` AND (${words2.slice(0, 4).map((w) => `summary ${likeOp} '%${sqlLike(w)}%'`).join(" OR ")})` : ` AND summary ${likeOp} '%${escapedLike}%'`; + const summaryRows = await api.query(`SELECT path, summary AS content FROM "${memoryTable}" WHERE 1=1${contentFilter} LIMIT 20`); + if (summaryRows.length > 0) { + for (const sr of summaryRows) { + const sp = sr["path"]; + const sc = sr["content"]; + if (sc) { + output.push(`=== ${sp} ===`); + output.push(sc); + output.push(""); + } + } + } + } catch { + } + } + } let reStr = fixedString ? pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : pattern; if (wordMatch) reStr = `\\b${reStr}\\b`; @@ -447,13 +477,14 @@ async function handleGrepDirect(api, table, sessionsTable, params) { } catch { re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); } - const output = []; const multi = rows.length > 1; for (const row of rows) { const p = row["path"]; const text = row["content"]; if (!text) continue; + const dateMatch = text.match(/"date_time"\s*:\s*"([^"]+)"/); + let sessionDate = dateMatch ? `[${dateMatch[1]}] ` : ""; const lines = text.split("\n"); const matched = []; for (let i = 0; i < lines.length; i++) { @@ -464,7 +495,9 @@ async function handleGrepDirect(api, table, sessionsTable, params) { } const prefix = multi ? `${p}:` : ""; const ln = lineNumber ? `${i + 1}:` : ""; - matched.push(`${prefix}${ln}${lines[i]}`); + matched.push(`${prefix}${sessionDate}${ln}${lines[i]}`); + if (sessionDate) + sessionDate = ""; } } if (!filesOnly) { @@ -690,6 +723,27 @@ async function main() { const toolPath = input.tool_input.file_path ?? input.tool_input.path ?? ""; if (!shellCmd && (touchesMemory(cmd) || touchesMemory(toolPath))) { const guidance = "[RETRY REQUIRED] The command you tried is not available for ~/.deeplake/memory/. This virtual filesystem only supports bash builtins: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. python, python3, node, and curl are NOT available. You MUST rewrite your command using only the bash tools listed above and try again. For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'."; + const isReadLike = /^(?:python3?|node|deno|bun|ruby|perl)\b/.test(cmd.trim()); + const hasShellMeta = /[$`;|&<>()\\]/.test(cmd); + if (isReadLike && !hasShellMeta) { + const pathMatch = cmd.match(/~\/\.deeplake\/memory\/[\w./_-]+/) || toolPath.match(/~\/\.deeplake\/memory\/[\w./_-]+/); + const memPath = pathMatch ? pathMatch[0] : ""; + const cleanPath = memPath ? rewritePaths(memPath) : ""; + if (cleanPath && !cleanPath.endsWith("/")) { + log3(`unsupported command on file, converting to read: ${cleanPath}`); + console.log(JSON.stringify({ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "allow", + updatedInput: { + command: `cat '${cleanPath.replace(/'/g, "'\\''")}'`, + description: "[DeepLake] converted unsupported command to file read" + } + } + })); + return; + } + } log3(`unsupported command, returning guidance: ${cmd}`); console.log(JSON.stringify({ hookSpecificOutput: { @@ -787,14 +841,21 @@ async function main() { if (rows.length > 0 && rows[0]["summary"]) { content = rows[0]["summary"]; } else if (virtualPath === "/index.md") { - const idxRows = await api.query(`SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`); - const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; + const memTable = table.endsWith("_sessions") ? table.replace(/_sessions$/, "_memory") : table; + let idxRows = []; + try { + idxRows = await api.query(`SELECT path, description, creation_date FROM "${memTable}" ORDER BY path LIMIT 500`); + } catch { + } + if (idxRows.length === 0) { + idxRows = await api.query(`SELECT path, description, creation_date FROM "${table}" ORDER BY path LIMIT 500`); + } + const lines = ["# Memory Index", "", `${idxRows.length} entries:`, ""]; for (const r of idxRows) { const p = r["path"]; - const proj = r["project"] || ""; - const desc = (r["description"] || "").slice(0, 120); + const desc = (r["description"] || "").slice(0, 100); const date = (r["creation_date"] || "").slice(0, 10); - lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); + lines.push(`- [${p}](${p}) ${date} ${desc}`); } content = lines.join("\n"); } diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 29b9a87c..fda41728 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -426,7 +426,14 @@ async function handleGrepDirect(api, table, sessionsTable, params) { const hasRegexMeta = !fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); let rows = []; if (!hasRegexMeta) { - const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; + const words = pattern.split(/\s+/).filter((w) => w.length > 2); + let contentFilter; + if (words.length > 1) { + const wordFilters = words.slice(0, 4).map((w) => `summary ${likeOp} '%${sqlLike(w)}%'`); + contentFilter = ` AND (${wordFilters.join(" OR ")})`; + } else { + contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; + } try { rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`); } catch { @@ -439,6 +446,29 @@ async function handleGrepDirect(api, table, sessionsTable, params) { rows = []; } } + const output = []; + if (!hasRegexMeta) { + const memoryTable = table.endsWith("_sessions") ? table.replace(/_sessions$/, "_memory") : sessionsTable !== table ? sessionsTable : null; + if (memoryTable && memoryTable !== table) { + try { + const words2 = pattern.split(/\s+/).filter((w) => w.length > 2); + const contentFilter = words2.length > 1 ? ` AND (${words2.slice(0, 4).map((w) => `summary ${likeOp} '%${sqlLike(w)}%'`).join(" OR ")})` : ` AND summary ${likeOp} '%${escapedLike}%'`; + const summaryRows = await api.query(`SELECT path, summary AS content FROM "${memoryTable}" WHERE 1=1${contentFilter} LIMIT 20`); + if (summaryRows.length > 0) { + for (const sr of summaryRows) { + const sp = sr["path"]; + const sc = sr["content"]; + if (sc) { + output.push(`=== ${sp} ===`); + output.push(sc); + output.push(""); + } + } + } + } catch { + } + } + } let reStr = fixedString ? pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : pattern; if (wordMatch) reStr = `\\b${reStr}\\b`; @@ -448,13 +478,14 @@ async function handleGrepDirect(api, table, sessionsTable, params) { } catch { re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); } - const output = []; const multi = rows.length > 1; for (const row of rows) { const p = row["path"]; const text = row["content"]; if (!text) continue; + const dateMatch = text.match(/"date_time"\s*:\s*"([^"]+)"/); + let sessionDate = dateMatch ? `[${dateMatch[1]}] ` : ""; const lines = text.split("\n"); const matched = []; for (let i = 0; i < lines.length; i++) { @@ -465,7 +496,9 @@ async function handleGrepDirect(api, table, sessionsTable, params) { } const prefix = multi ? `${p}:` : ""; const ln = lineNumber ? `${i + 1}:` : ""; - matched.push(`${prefix}${ln}${lines[i]}`); + matched.push(`${prefix}${sessionDate}${ln}${lines[i]}`); + if (sessionDate) + sessionDate = ""; } } if (!filesOnly) { diff --git a/src/hooks/grep-direct.ts b/src/hooks/grep-direct.ts index fa20a93f..b66b5dd4 100644 --- a/src/hooks/grep-direct.ts +++ b/src/hooks/grep-direct.ts @@ -121,18 +121,25 @@ export async function handleGrepDirect( // Strategy: BM25 first (ranked, fast with index), LIKE fallback if BM25 fails. let rows: Record[] = []; + // Search primary table — for multi-word patterns, use OR to find any word if (!hasRegexMeta) { - // BM25 ranked search disabled — CREATE INDEX causes oid errors on fresh tables. - // See bm25-oid-bug.sh. Using LIKE until Deeplake fixes the oid invalidation. - // When re-enabling, uncomment the BM25 block and make LIKE the fallback. - const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; + const words = pattern.split(/\s+/).filter((w: string) => w.length > 2); + let contentFilter: string; + if (words.length > 1) { + // Multi-word: search for any word (OR) to cast a wider net + const wordFilters = words.slice(0, 4).map((w: string) => + `summary ${likeOp} '%${sqlLike(w)}%'` + ); + contentFilter = ` AND (${wordFilters.join(" OR ")})`; + } else { + contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; + } try { rows = await api.query( `SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`, ); } catch { rows = []; } } else { - // Regex pattern — fetch all files under path, filter in-memory try { rows = await api.query( `SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter} LIMIT 100`, @@ -140,6 +147,39 @@ export async function handleGrepDirect( } catch { rows = []; } } + const output: string[] = []; + // Cross-table enrichment: search the companion memory/summaries table + // for structured wiki-style context. Convention: if table is X_sessions + // or X, companion is X_memory. Summaries are prepended for priority. + if (!hasRegexMeta) { + const memoryTable = table.endsWith("_sessions") + ? table.replace(/_sessions$/, "_memory") + : (sessionsTable !== table ? sessionsTable : null); + if (memoryTable && memoryTable !== table) { + try { + const words2 = pattern.split(/\s+/).filter((w: string) => w.length > 2); + const contentFilter = words2.length > 1 + ? ` AND (${words2.slice(0, 4).map((w: string) => `summary ${likeOp} '%${sqlLike(w)}%'`).join(" OR ")})` + : ` AND summary ${likeOp} '%${escapedLike}%'`; + const summaryRows = await api.query( + `SELECT path, summary AS content FROM "${memoryTable}" WHERE 1=1${contentFilter} LIMIT 20`, + ); + if (summaryRows.length > 0) { + // Output full summaries directly (compact and structured) + for (const sr of summaryRows) { + const sp = sr["path"] as string; + const sc = sr["content"] as string; + if (sc) { + output.push(`=== ${sp} ===`); + output.push(sc); + output.push(""); + } + } + } + } catch { /* best-effort — table may not exist */ } + } + } + // ── regex refinement ── let reStr = fixedString ? pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") @@ -149,13 +189,15 @@ export async function handleGrepDirect( try { re = new RegExp(reStr, ignoreCase ? "i" : ""); } catch { re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); } - const output: string[] = []; const multi = rows.length > 1; for (const row of rows) { const p = row["path"] as string; const text = row["content"] as string; if (!text) continue; + // Extract date from session JSON for temporal context + const dateMatch = text.match(/"date_time"\s*:\s*"([^"]+)"/); + let sessionDate = dateMatch ? `[${dateMatch[1]}] ` : ""; const lines = text.split("\n"); const matched: string[] = []; @@ -165,7 +207,8 @@ export async function handleGrepDirect( if (filesOnly) { output.push(p); break; } const prefix = multi ? `${p}:` : ""; const ln = lineNumber ? `${i + 1}:` : ""; - matched.push(`${prefix}${ln}${lines[i]}`); + matched.push(`${prefix}${sessionDate}${ln}${lines[i]}`); + if (sessionDate) sessionDate = ""; } } diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 15095c6d..39bf4001 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -187,6 +187,34 @@ async function main(): Promise { "python, python3, node, and curl are NOT available. " + "You MUST rewrite your command using only the bash tools listed above and try again. " + "For example, to parse JSON use: cat file.json | jq '.key'. To count keys: cat file.json | jq 'keys | length'."; + // Only auto-convert when the user is clearly trying to READ a memory + // file with an unsupported interpreter (python, node, ruby, perl). + // curl/wget and anything with shell metacharacters fall through to the + // RETRY guidance below — converting them would hide actual intent. + const isReadLike = /^(?:python3?|node|deno|bun|ruby|perl)\b/.test(cmd.trim()); + const hasShellMeta = /[$`;|&<>()\\]/.test(cmd); + if (isReadLike && !hasShellMeta) { + const pathMatch = cmd.match(/~\/\.deeplake\/memory\/[\w./_-]+/) + || toolPath.match(/~\/\.deeplake\/memory\/[\w./_-]+/); + const memPath = pathMatch ? pathMatch[0] : ""; + const cleanPath = memPath ? rewritePaths(memPath) : ""; + if (cleanPath && !cleanPath.endsWith("/")) { + log(`unsupported command on file, converting to read: ${cleanPath}`); + console.log(JSON.stringify({ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "allow", + updatedInput: { + command: `cat '${cleanPath.replace(/'/g, "'\\''")}'`, + description: "[DeepLake] converted unsupported command to file read", + }, + }, + })); + return; + } + } + + log(`unsupported command, returning guidance: ${cmd}`); console.log(JSON.stringify({ hookSpecificOutput: { @@ -289,17 +317,27 @@ async function main(): Promise { if (rows.length > 0 && rows[0]["summary"]) { content = rows[0]["summary"] as string; } else if (virtualPath === "/index.md") { - // Virtual index — generate from metadata - const idxRows = await api.query( - `SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` - ); - const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; + // Virtual index — generate from all entries in memory table + // Try companion memory table first (has descriptions), fall back to primary + const memTable = table.endsWith("_sessions") + ? table.replace(/_sessions$/, "_memory") : table; + let idxRows: Record[] = []; + try { + idxRows = await api.query( + `SELECT path, description, creation_date FROM "${memTable}" ORDER BY path LIMIT 500` + ); + } catch { /* companion table may not exist */ } + if (idxRows.length === 0) { + idxRows = await api.query( + `SELECT path, description, creation_date FROM "${table}" ORDER BY path LIMIT 500` + ); + } + const lines = ["# Memory Index", "", `${idxRows.length} entries:`, ""]; for (const r of idxRows) { const p = r["path"] as string; - const proj = r["project"] as string || ""; - const desc = (r["description"] as string || "").slice(0, 120); + const desc = (r["description"] as string || "").slice(0, 100); const date = (r["creation_date"] as string || "").slice(0, 10); - lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); + lines.push(`- [${p}](${p}) ${date} ${desc}`); } content = lines.join("\n"); }