diff --git a/api/admin.py b/api/admin.py index 1ee2386..318be10 100644 --- a/api/admin.py +++ b/api/admin.py @@ -1300,6 +1300,7 @@ async def _bool(key: str, default: str) -> str: "emb_model": await _cfg("memory.embedding.model", "BAAI/bge-small-en-v1.5"), "emb_base_url": await _cfg("memory.embedding.base_url", ""), "emb_top_k": await _cfg("memory.embedding.injection_top_k", "12"), + "emb_recall_top_k": await _cfg("memory.embedding.recall_top_k", "10"), "hygiene_enabled": await _bool("memory.hygiene_enabled", "true"), "default_importance": await _cfg("memory.default_importance", "5.0"), "archive_after_days": await _cfg("memory.archive_after_days", "90"), @@ -1656,6 +1657,7 @@ async def patch_config(body: ConfigPatchIn) -> dict: # Tier 3/4 lifecycle knobs so memory config changes apply live. agent.memory.embedder = agent._build_embedder() agent.memory.injection_top_k = mem_cfg.embedding.injection_top_k + agent.memory.recall_top_k = mem_cfg.embedding.recall_top_k agent.memory.default_importance = mem_cfg.default_importance agent.memory.archive_after_days = mem_cfg.archive_after_days agent.memory.archive_max_importance = mem_cfg.archive_max_importance @@ -3322,8 +3324,10 @@ def _config_requires_restart(values: dict) -> bool: # Function-tools that a persona may scope. ``load_skill`` is intentionally # excluded — it is always available (the core mechanic personae use to read -# their allowlisted skills). Kept here (not imported from core.agent) to avoid -# pulling the agent's heavy import graph into the admin app. +# their allowlisted skills); so are the vault tools and ``recall_memory`` +# (memory is injected for every persona, scope-filtered, so its on-demand +# counterpart is always available too). Kept here (not imported from core.agent) +# to avoid pulling the agent's heavy import graph into the admin app. GATEABLE_TOOLS = [ "run_command", "send_email", diff --git a/api/templates/partials/memory.html b/api/templates/partials/memory.html index b46150a..d865be4 100644 --- a/api/templates/partials/memory.html +++ b/api/templates/partials/memory.html @@ -9,6 +9,7 @@ embModel: {{ emb_model|default('BAAI/bge-small-en-v1.5', true)|tojson|forceescape }}, embBaseUrl: {{ emb_base_url|default('', true)|tojson|forceescape }}, embTopK: {{ emb_top_k|default('12', true)|tojson|forceescape }}, + embRecallTopK: {{ emb_recall_top_k|default('10', true)|tojson|forceescape }}, embResult: '', embOk: false, embStatus: null, embBusy: false, embTestResult: '', // Lifecycle (Tier 3/4) @@ -151,6 +152,12 @@

Semantic memory (embeddings)

How many of the most relevant long-term memories to put in the prompt each message.

+
+ + +

Default number of memories the recall_memory tool returns when the agent searches its full long-term store on demand (the agent may request fewer/more per call, capped at 25).

+
+
Model on disk: yes ✓ @@ -165,7 +172,8 @@

Semantic memory (embeddings)

'memory.embedding.provider': embProvider, 'memory.embedding.model': embModel, 'memory.embedding.base_url': embBaseUrl, - 'memory.embedding.injection_top_k': String(embTopK) + 'memory.embedding.injection_top_k': String(embTopK), + 'memory.embedding.recall_top_k': String(embRecallTopK) }, 'Embedding settings saved', 'embOk', 'embResult').then(loadEmbStatus)"> Save embedding settings diff --git a/core/agent.py b/core/agent.py index 63075a7..ebe6854 100644 --- a/core/agent.py +++ b/core/agent.py @@ -186,6 +186,31 @@ def _shell_quote(s: str) -> str: "required": ["name"], }, }, + { + "name": "recall_memory", + "description": ( + "Search your FULL long-term memory by meaning for facts about the user that " + "aren't already shown to you. Only the few most-relevant memories are injected " + "into each turn; call this when you suspect a relevant stored fact exists beyond " + "them — it searches the whole store, including older archived memories, and ranks " + "matches by relevance. Pass a natural-language query describing the fact you're " + "after (e.g. 'dietary restrictions and food allergies'), not just keywords." + ), + "input_schema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Natural-language description of the fact(s) to recall", + }, + "limit": { + "type": "integer", + "description": "Max memories to return (default 10).", + }, + }, + "required": ["query"], + }, + }, { "name": "manage_jobs", "description": ( @@ -374,7 +399,9 @@ def scoped_tools(persona: Persona | None) -> list[dict]: return TOOLS # ``load_skill`` and the vault discovery/request tools are always retained: # they are the mechanics personae rely on to read skills and obtain secrets. - _always = {"load_skill", "list_secrets", "request_secret"} + # ``recall_memory`` too — memory is injected for every persona (scope-filtered), + # so its on-demand counterpart exposes nothing extra and stays available (#47). + _always = {"load_skill", "recall_memory", "list_secrets", "request_secret"} return [t for t in TOOLS if persona.allows_tool(t["name"]) or t["name"] in _always] @@ -419,6 +446,7 @@ def __init__(self, config: Config, secret_store: SecretStore | None = None): long_term_limit=mem_cfg.long_term_limit, embedder=self._build_embedder(), injection_top_k=mem_cfg.embedding.injection_top_k, + recall_top_k=mem_cfg.embedding.recall_top_k, default_importance=mem_cfg.default_importance, archive_after_days=mem_cfg.archive_after_days, archive_max_importance=mem_cfg.archive_max_importance, @@ -1206,6 +1234,9 @@ async def _execute_tool( return {"error": f"Skill not found: {skill_name}"} return {"name": skill_name, "content": content} + if name == "recall_memory": + return await self._tool_recall_memory(params, request_state) + if name == "manage_jobs": log.info("Tool call: manage_jobs — %s", params.get("action", "")) result = await self._tool_manage_jobs(params) @@ -1602,6 +1633,28 @@ async def _tool_manage_jobs(self, params: dict) -> dict: return {"error": f"Unknown action: {action!r}. Use 'create', 'list', or 'cancel'."} + async def _tool_recall_memory(self, params: dict, request_state: dict | None = None) -> dict: + """Deliberate semantic search over the full long-term memory store (#47). + + Scoped to the active persona (#42): ``persona_name`` on the per-turn + request state is the persona's private memory scope (``""`` = the default + identity's shared-only view), so recall never crosses into another + persona's private memories — same boundary the injection readers enforce. + """ + query = str(params.get("query", "")).strip() + if not query: + return {"error": "Missing 'query'."} + limit = params.get("limit") + limit = limit if isinstance(limit, int) and not isinstance(limit, bool) else None + scope = (request_state or {}).get("persona_name") or "" + try: + memories = await self.memory.recall(query, limit, scope=scope) + except Exception: + log.exception("recall_memory failed for query: %s", query) + return {"error": "Memory recall failed."} + log.info("Tool call: recall_memory — %r (%d hits)", query, len(memories)) + return {"query": query, "count": len(memories), "memories": memories} + async def _tool_web_search(self, params: dict) -> dict: """Search the web via Tavily API.""" if not self.search_client: diff --git a/core/config.py b/core/config.py index fb07164..9082300 100644 --- a/core/config.py +++ b/core/config.py @@ -181,6 +181,7 @@ class EmbeddingConfig(BaseModel): base_url: str = "" # API providers only; falls back to the agent provider base URL when empty dimensions: int = 0 # 0 = provider default (API providers only) injection_top_k: int = 12 # relevance-ranked memories injected per turn + recall_top_k: int = 10 # max memories returned by the recall_memory tool (full-store lookup) class MemoryConfig(BaseModel): diff --git a/core/memory.py b/core/memory.py index 5d1eec3..677085a 100644 --- a/core/memory.py +++ b/core/memory.py @@ -492,6 +492,7 @@ def __init__( *, embedder: EmbeddingClient | None = None, injection_top_k: int = 12, + recall_top_k: int = 10, default_importance: float = 5.0, archive_after_days: int = 90, archive_max_importance: float = 4.0, @@ -503,6 +504,7 @@ def __init__( self.long_term_limit = long_term_limit self.embedder = embedder self.injection_top_k = injection_top_k + self.recall_top_k = recall_top_k self.default_importance = default_importance self.archive_after_days = archive_after_days self.archive_max_importance = archive_max_importance @@ -628,15 +630,90 @@ async def get_relevant_long_term(self, query: str, scope: str | None = None) -> for r in top ] - async def _reinforce(self, ids: list[int]) -> None: - """Strengthen recalled memories: bump access_count and last_accessed.""" + # Upper bound on how many memories one recall_memory call may return, and the + # minimum relevance a row needs to be worth returning at all. + _RECALL_MAX_LIMIT = 25 + # ponytail: relevance floor — raise to cut noise, lower to surface more long tail. + _RECALL_MIN_RELEVANCE = 0.1 + + async def recall( + self, query: str, limit: int | None = None, scope: str | None = None + ) -> list[dict]: + """Deliberate semantic search over the FULL long-term store (issue #47). + + This is the agent's on-demand recall tool — the complement to the + always-injected top-k (:meth:`get_relevant_long_term`). Where injection + ranks only *non-archived* rows by a recency+importance+relevance blend + and is capped to a small per-turn budget, recall searches *every* + long-term memory (archived included), ranks purely by semantic relevance + to *query*, and returns the best matches above a small relevance floor. + + Recalling reinforces the returned rows and un-archives any that had been + archived (the agent looked them up and they matched, so they are warm + again). Falls back to lexical token overlap when embeddings are + unavailable or the query can't be embedded, so recall always works. + + ``scope`` filters per #42 (see :func:`_scope_filter`) exactly like the + injection readers, so a persona only recalls shared + its own private + memories, never another persona's; ``None`` = every scope. + """ + query = (query or "").strip() + if not query: + return [] + limit = self.recall_top_k if not limit or limit < 1 else limit + limit = min(limit, self._RECALL_MAX_LIMIT) + + await self._ensure_schema() + clause, params = _scope_filter(scope) + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + cursor = await db.execute( + "SELECT id, category, subject, content, embedding, archived " # noqa: S608 + f"FROM long_term WHERE 1=1{clause}", + params, + ) + rows = [dict(r) for r in await cursor.fetchall()] + if not rows: + return [] + + # Embedding cosine for rows with a matching-dim vector; lexical for the rest. + query_vec = await self._safe_embed(query) + rel_map = _batch_relevance(query_vec, rows) + query_tokens = _tokens(query) + + scored: list[tuple[float, dict]] = [] + for i, row in enumerate(rows): + if i in rel_map: + relevance = rel_map[i] + else: + relevance = _similarity(query_tokens, _tokens(f"{row['subject']} {row['content']}")) + if relevance >= self._RECALL_MIN_RELEVANCE: + scored.append((relevance, row)) + + scored.sort(key=lambda pair: pair[0], reverse=True) + top = [row for _, row in scored[:limit]] + # Recall revives matches: reinforce + un-archive. The returned rows are + # therefore all non-archived now, so no archived flag is surfaced. + await self._reinforce([row["id"] for row in top], unarchive=True) + return [ + {"category": r["category"], "subject": r["subject"], "content": r["content"]} + for r in top + ] + + async def _reinforce(self, ids: list[int], *, unarchive: bool = False) -> None: + """Strengthen recalled memories: bump access_count and last_accessed. + + With *unarchive*, also clear the archived flag — a memory the agent + deliberately recalled and used is demonstrably warm again (issue #47). + """ if not ids: return await self._ensure_schema() + archived_clause = ", archived = 0" if unarchive else "" async with aiosqlite.connect(self.db_path) as db: await db.executemany( - "UPDATE long_term SET access_count = access_count + 1, " - "last_accessed = datetime('now') WHERE id = ?", + "UPDATE long_term SET access_count = access_count + 1, " # noqa: S608 + f"last_accessed = datetime('now'){archived_clause} WHERE id = ?", [(i,) for i in ids], ) await db.commit() diff --git a/core/permissions.py b/core/permissions.py index 787255b..00d3863 100644 --- a/core/permissions.py +++ b/core/permissions.py @@ -117,6 +117,7 @@ class PermissionLevel: "run_command:git*push*": "ASK", "run_command:git*commit*": "ASK", "web_search": "ALWAYS", + "recall_memory": "ALWAYS", # Write operations — ask first "send_email": "ASK", "reply_email": "ASK", diff --git a/core/prompt_builder.py b/core/prompt_builder.py index a95aef0..6fc9f64 100644 --- a/core/prompt_builder.py +++ b/core/prompt_builder.py @@ -181,7 +181,9 @@ def build_prompt_sections( memory_instruction = ( "You can store and recall memories using the sqlite3 CLI (see the memory skill).\n" "Proactively remember important facts about the user and their contacts.\n" - "Before inserting a new long-term memory, check if it already exists to avoid duplicates." + "Before inserting a new long-term memory, check if it already exists to avoid duplicates.\n" + "Only your most relevant memories are shown each turn; when you suspect a stored fact " + "isn't among them, call the recall_memory tool to search your full memory by meaning." ) history_handling = "" diff --git a/docs/content/docs/memory.mdx b/docs/content/docs/memory.mdx index c9001be..b9195e8 100644 --- a/docs/content/docs/memory.mdx +++ b/docs/content/docs/memory.mdx @@ -128,9 +128,21 @@ Set `memory.embedding.enabled: false` to fall back to **Tier-1 lexical** (word-o ### What embeddings power - **Relevance-ranked injection.** Instead of dumping the most recent `long_term_limit` rows into the prompt, only the `injection_top_k` (default 12) memories most relevant to the *current message* are injected. They're scored Generative-Agents style: `relevance + 0.5·importance + 0.3·recency`. Injection happens in the **per-turn preamble** (prepended to the current user message), not the static system prompt — so even in session mode (where the static prompt is snapshotted once) the current message is the query every turn, and a fact written mid-session is visible on the next turn without `/new`. +- **On-demand recall** (the `recall_memory` tool, below). - **Dedup.** `update_memory` retrieves ADD/UPDATE/DELETE/NOOP candidates by cosine similarity (with a per-row lexical fallback for any memory that has no vector yet). - **Hygiene clustering** (Tier 4, below). +### `recall_memory` — deliberate full-store lookup + +Relevance-ranked injection is a small always-on top-k: great for the obvious facts, but as the store grows it can't surface everything, and the agent *can't ask for a fact it doesn't suspect it has*. So injection is paired with a **`recall_memory` tool** the agent calls deliberately when it suspects a relevant stored fact isn't in the injected set (hybrid retrieval — always-on top-k **plus** on-demand recall). + +- Searches the agent's **entire** long-term store — **including archived** memories that injection never sees — ranked purely by semantic relevance to the agent's query (lexical-overlap fallback when embeddings are off). +- Returns up to `recall_top_k` (default 10) matches above a small relevance floor. +- Recalling **revives** the matched rows: they're reinforced (`access_count` / `last_accessed`) and any archived one is un-archived — a fact the agent looked up and used is warm again. +- **Scope-aware** (#42): recall is filtered to `scope IN ('', )` exactly like injection, so a persona only ever recalls shared facts plus its own private ones — never another persona's. The default identity recalls shared only. + +The tool is read-only (no approval prompt) and available to every persona (it can't surface anything injection couldn't already show that persona), alongside the sqlite CLI the memory skill teaches for exact-field queries and bulk edits. + All of this is configurable live from the **admin Memory tab** (enable toggle, backend, model, top-k, plus Download / Test buttons). ## Tier 3 — Forgetting, importance & reinforcement @@ -230,6 +242,7 @@ memory: base_url: "" # API providers only; falls back to agent provider base URL dimensions: 0 # 0 = provider default (API providers only) injection_top_k: 12 # relevance-ranked memories injected per turn + recall_top_k: 10 # max memories returned by the recall_memory tool # Tier 3 — forgetting / importance / reinforcement default_importance: 5.0 # 1-10 scale assigned to new long-term memories diff --git a/skills/memory.md b/skills/memory.md index 76549c1..76ec6da 100644 --- a/skills/memory.md +++ b/skills/memory.md @@ -37,6 +37,22 @@ sqlite3 data/memory.db "UPDATE long_term SET content = 'New value', updated_at = ## Querying memories +### Semantic recall (full store, by meaning) + +The sqlite queries below match on exact fields or `LIKE` substrings. To find a +fact by **meaning** across your *entire* long-term store — including older, +archived memories that aren't injected into the prompt — call the +`recall_memory` tool instead of writing SQL: + +- Each turn only injects your few most-relevant memories. When you suspect a + stored fact exists beyond them (e.g. "didn't they mention a food allergy?"), + call `recall_memory` with a natural-language `query` describing the fact. +- It ranks the whole store by semantic similarity and returns the best matches. + Recalling a memory also revives it (un-archives + reinforces it). + +Use `recall_memory` for fuzzy "do I know anything about X?" lookups; use the +sqlite queries below when you need exact filtering, counts, or bulk edits. + ### Search by subject ```bash diff --git a/tests/test_memory_tiers.py b/tests/test_memory_tiers.py index ded15f1..8895fe0 100644 --- a/tests/test_memory_tiers.py +++ b/tests/test_memory_tiers.py @@ -184,6 +184,86 @@ async def test_format_for_prompt_without_query_uses_recency(self, embed_store): assert "lives in zurich" in block +# -- recall_memory: deliberate full-store semantic lookup (issue #47) -- + + +async def _set_archived(store: MemoryStore, rid: int) -> None: + async with aiosqlite.connect(store.db_path) as db: + await db.execute("UPDATE long_term SET archived = 1 WHERE id = ?", (rid,)) + await db.commit() + + +class TestRecall: + async def test_empty_query_returns_nothing(self, embed_store): + await embed_store._insert_long_term("fact", "matteo", "lives in zurich") + assert await embed_store.recall(" ") == [] + + async def test_semantic_match_ranks_first(self, embed_store): + await embed_store._insert_long_term("health", "matteo", "allergic to shellfish") + await embed_store._insert_long_term("fact", "simge", "speaks turkish fluently") + + out = await embed_store.recall("food allergies and shellfish") + + assert out + assert out[0]["content"] == "allergic to shellfish" + + async def test_searches_and_unarchives_archived_rows(self, embed_store): + # An archived memory is invisible to injection but recall must still find + # it — and recalling it brings it back (un-archives + reinforces). + emb = await _HashEmbedder().embed_one("allergic to shellfish") + rid = await _insert( + embed_store, "matteo", "allergic to shellfish", category="health", embedding=emb + ) + await _set_archived(embed_store, rid) + assert await embed_store.get_long_term() == [] # archived → not injected + + out = await embed_store.recall("shellfish allergy") + + assert any(m["content"] == "allergic to shellfish" for m in out) + row = await _row(embed_store, rid) + assert row["archived"] == 0 # un-archived on recall + assert row["access_count"] >= 1 # reinforced + + async def test_respects_limit(self, embed_store): + for i in range(6): + await embed_store._insert_long_term("fact", "matteo", f"likes hobby number {i}") + out = await embed_store.recall("matteo likes hobby", limit=3) + # All 6 rows clear the floor, so the limit slice must cap at exactly 3. + assert len(out) == 3 + + async def test_lexical_fallback_without_embedder(self, store): + # No embedder configured → recall falls back to token overlap, still works. + await store._insert_long_term("health", "matteo", "allergic to shellfish") + await store._insert_long_term("fact", "simge", "speaks turkish") + out = await store.recall("shellfish allergy") + assert any("shellfish" in m["content"] for m in out) + # The relevance floor drops the zero-overlap row (deterministic on the + # lexical path — guards _RECALL_MIN_RELEVANCE against being lowered to 0). + assert all("turkish" not in m["content"] for m in out) + + async def test_scope_isolation(self, embed_store): + # Recall must honour persona scope (#42): a persona sees shared + its own + # private memories, never another persona's private rows. + await embed_store._insert_long_term("fact", "matteo", "allergic to dust", scope="") + await embed_store._insert_long_term( + "health", "matteo", "allergic to shellfish", scope="coach" + ) + await embed_store._insert_long_term( + "health", "matteo", "allergic to peanuts", scope="finance" + ) + + coach = { + m["content"] for m in await embed_store.recall("allergic allergies", scope="coach") + } + assert "allergic to shellfish" in coach # coach's own private + assert "allergic to dust" in coach # shared + assert "allergic to peanuts" not in coach # finance's private — never crosses + + # The default identity (scope="") sees shared only. + owner = {m["content"] for m in await embed_store.recall("allergic allergies", scope="")} + assert owner == {"allergic to dust"} + + # -- Tier 3: forgetting / importance / reinforcement -- diff --git a/tests/test_personae.py b/tests/test_personae.py index dadc78d..67ca6c6 100644 --- a/tests/test_personae.py +++ b/tests/test_personae.py @@ -78,11 +78,12 @@ def test_scoped_tools_filters_but_keeps_load_skill() -> None: def test_gateable_tools_in_sync_with_tools() -> None: # The admin UI lists GATEABLE_TOOLS for the scope checkboxes; it must stay # in sync with the real tool set (every tool except the always-on ones: - # load_skill plus the vault discovery/request tools — issue #19). + # load_skill, the vault discovery/request tools — issue #19 — and + # recall_memory, which mirrors always-on scoped memory injection — #47). from api.admin import GATEABLE_TOOLS from core.agent import TOOLS - always_on = {"load_skill", "list_secrets", "request_secret"} + always_on = {"load_skill", "recall_memory", "list_secrets", "request_secret"} assert set(GATEABLE_TOOLS) | always_on == {t["name"] for t in TOOLS} diff --git a/tests/test_tools.py b/tests/test_tools.py index 7faee9e..9ba21a2 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -471,3 +471,43 @@ async def fake_await(description, channel, user_id, tool_name=None, params=None) ) assert prompts["n"] == 0 # nothing to batch for a single write assert state["write_decisions"] == {} + + +# --------------------------------------------------------------------------- +# recall_memory tool — deliberate full-store semantic lookup (#47) +# --------------------------------------------------------------------------- + + +def _recall_call(call_id: str, **params): + from core.llm import LLMToolCall + + return LLMToolCall(id=call_id, name="recall_memory", arguments=dict(params)) + + +@pytest.mark.asyncio +async def test_recall_memory_tool_dispatch(agent, monkeypatch) -> None: + """recall_memory routes to the store and shapes the result; no approval prompt.""" + captured = {} + + async def fake_recall(query, limit=None, scope=None): + captured["query"], captured["limit"], captured["scope"] = query, limit, scope + return [{"category": "health", "subject": "matteo", "content": "allergic to shellfish"}] + + monkeypatch.setattr(agent.memory, "recall", fake_recall) + # The per-turn state carries the active persona's private memory scope, + # which recall must receive so it never crosses persona boundaries (#42). + state = agent._new_request_state() + state["persona_name"] = "coach" + result = await agent._execute_tool( + _recall_call("1", query="food allergies", limit=5), "telegram", "u1", state + ) + assert captured == {"query": "food allergies", "limit": 5, "scope": "coach"} + assert result["count"] == 1 + assert result["memories"][0]["content"] == "allergic to shellfish" + + +@pytest.mark.asyncio +async def test_recall_memory_requires_query(agent) -> None: + """A blank query is rejected before hitting the store.""" + result = await agent._execute_tool(_recall_call("1", query=" "), "telegram", "u1") + assert "error" in result