From df875f25b85d2dafbf0966bfc87b86bff196f0b6 Mon Sep 17 00:00:00 2001
From: Matteo Merola <mattmezza@gmail.com>
Date: Sun, 28 Jun 2026 23:07:48 +0200
Subject: [PATCH 1/5] =?UTF-8?q?feat(memory):=20recall=5Fmemory=20tool=20?=
 =?UTF-8?q?=E2=80=94=20deliberate=20full-store=20semantic=20lookup=20(#47)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 2 of #41. Always-on top-k injection covers the obvious facts but
can't surface everything as the store grows, and the agent can't ask for
a fact it doesn't suspect it has. Pair it with a recall_memory tool the
agent calls on demand:

- MemoryStore.recall(query, limit, scope): semantic search over the agent's
  ENTIRE long-term store (archived included), ranked purely by relevance
  above a small floor; lexical-overlap fallback when embeddings are off.
- Scope-aware (#42): filtered to scope IN ('', <persona>) exactly like the
  injection readers, so a persona only recalls shared + its own private
  memories, never another persona's. Scope is the per-turn persona_name.
- Recalling revives matches — reinforces them and un-archives any that
  were archived (looked up + matched = warm again).
- recall_memory tool: read-only (ALWAYS permission, no prompt), always
  retained per persona like load_skill (memory injection is always-on too,
  so recall exposes nothing extra); nudged in the memory prompt.
- recall_top_k config knob (default 10), hot-reloaded in patch_config.
---
 api/admin.py           |  7 +++-
 core/agent.py          | 55 ++++++++++++++++++++++++++-
 core/config.py         |  1 +
 core/memory.py         | 85 ++++++++++++++++++++++++++++++++++++++++--
 core/permissions.py    |  1 +
 core/prompt_builder.py |  4 +-
 6 files changed, 145 insertions(+), 8 deletions(-)
diff --git a/api/admin.py b/api/admin.py
index 1ee2386..6232914 100644
--- a/api/admin.py
+++ b/api/admin.py
@@ -1656,6 +1656,7 @@ async def patch_config(body: ConfigPatchIn) -> dict:
                 # Tier 3/4 lifecycle knobs so memory config changes apply live.
                 agent.memory.embedder = agent._build_embedder()
                 agent.memory.injection_top_k = mem_cfg.embedding.injection_top_k
+                agent.memory.recall_top_k = mem_cfg.embedding.recall_top_k
                 agent.memory.default_importance = mem_cfg.default_importance
                 agent.memory.archive_after_days = mem_cfg.archive_after_days
                 agent.memory.archive_max_importance = mem_cfg.archive_max_importance
@@ -3322,8 +3323,10 @@ def _config_requires_restart(values: dict) -> bool:
 
 # Function-tools that a persona may scope. ``load_skill`` is intentionally
 # excluded — it is always available (the core mechanic personae use to read
-# their allowlisted skills). Kept here (not imported from core.agent) to avoid
-# pulling the agent's heavy import graph into the admin app.
+# their allowlisted skills); so are the vault tools and ``recall_memory``
+# (memory is injected for every persona, scope-filtered, so its on-demand
+# counterpart is always available too). Kept here (not imported from core.agent)
+# to avoid pulling the agent's heavy import graph into the admin app.
 GATEABLE_TOOLS = [
     "run_command",
     "send_email",
diff --git a/core/agent.py b/core/agent.py
index 63075a7..ebe6854 100644
--- a/core/agent.py
+++ b/core/agent.py
@@ -186,6 +186,31 @@ def _shell_quote(s: str) -> str:
             "required": ["name"],
         },
     },
+    {
+        "name": "recall_memory",
+        "description": (
+            "Search your FULL long-term memory by meaning for facts about the user that "
+            "aren't already shown to you. Only the few most-relevant memories are injected "
+            "into each turn; call this when you suspect a relevant stored fact exists beyond "
+            "them — it searches the whole store, including older archived memories, and ranks "
+            "matches by relevance. Pass a natural-language query describing the fact you're "
+            "after (e.g. 'dietary restrictions and food allergies'), not just keywords."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "Natural-language description of the fact(s) to recall",
+                },
+                "limit": {
+                    "type": "integer",
+                    "description": "Max memories to return (default 10).",
+                },
+            },
+            "required": ["query"],
+        },
+    },
     {
         "name": "manage_jobs",
         "description": (
@@ -374,7 +399,9 @@ def scoped_tools(persona: Persona | None) -> list[dict]:
         return TOOLS
     # ``load_skill`` and the vault discovery/request tools are always retained:
     # they are the mechanics personae rely on to read skills and obtain secrets.
-    _always = {"load_skill", "list_secrets", "request_secret"}
+    # ``recall_memory`` too — memory is injected for every persona (scope-filtered),
+    # so its on-demand counterpart exposes nothing extra and stays available (#47).
+    _always = {"load_skill", "recall_memory", "list_secrets", "request_secret"}
     return [t for t in TOOLS if persona.allows_tool(t["name"]) or t["name"] in _always]
 
 
@@ -419,6 +446,7 @@ def __init__(self, config: Config, secret_store: SecretStore | None = None):
             long_term_limit=mem_cfg.long_term_limit,
             embedder=self._build_embedder(),
             injection_top_k=mem_cfg.embedding.injection_top_k,
+            recall_top_k=mem_cfg.embedding.recall_top_k,
             default_importance=mem_cfg.default_importance,
             archive_after_days=mem_cfg.archive_after_days,
             archive_max_importance=mem_cfg.archive_max_importance,
@@ -1206,6 +1234,9 @@ async def _execute_tool(
                 return {"error": f"Skill not found: {skill_name}"}
             return {"name": skill_name, "content": content}
 
+        if name == "recall_memory":
+            return await self._tool_recall_memory(params, request_state)
+
         if name == "manage_jobs":
             log.info("Tool call: manage_jobs — %s", params.get("action", ""))
             result = await self._tool_manage_jobs(params)
@@ -1602,6 +1633,28 @@ async def _tool_manage_jobs(self, params: dict) -> dict:
 
         return {"error": f"Unknown action: {action!r}. Use 'create', 'list', or 'cancel'."}
 
+    async def _tool_recall_memory(self, params: dict, request_state: dict | None = None) -> dict:
+        """Deliberate semantic search over the full long-term memory store (#47).
+
+        Scoped to the active persona (#42): ``persona_name`` on the per-turn
+        request state is the persona's private memory scope (``""`` = the default
+        identity's shared-only view), so recall never crosses into another
+        persona's private memories — same boundary the injection readers enforce.
+        """
+        query = str(params.get("query", "")).strip()
+        if not query:
+            return {"error": "Missing 'query'."}
+        limit = params.get("limit")
+        limit = limit if isinstance(limit, int) and not isinstance(limit, bool) else None
+        scope = (request_state or {}).get("persona_name") or ""
+        try:
+            memories = await self.memory.recall(query, limit, scope=scope)
+        except Exception:
+            log.exception("recall_memory failed for query: %s", query)
+            return {"error": "Memory recall failed."}
+        log.info("Tool call: recall_memory — %r (%d hits)", query, len(memories))
+        return {"query": query, "count": len(memories), "memories": memories}
+
     async def _tool_web_search(self, params: dict) -> dict:
         """Search the web via Tavily API."""
         if not self.search_client:
diff --git a/core/config.py b/core/config.py
index fb07164..9082300 100644
--- a/core/config.py
+++ b/core/config.py
@@ -181,6 +181,7 @@ class EmbeddingConfig(BaseModel):
     base_url: str = ""  # API providers only; falls back to the agent provider base URL when empty
     dimensions: int = 0  # 0 = provider default (API providers only)
     injection_top_k: int = 12  # relevance-ranked memories injected per turn
+    recall_top_k: int = 10  # max memories returned by the recall_memory tool (full-store lookup)
 
 
 class MemoryConfig(BaseModel):
diff --git a/core/memory.py b/core/memory.py
index 5d1eec3..677085a 100644
--- a/core/memory.py
+++ b/core/memory.py
@@ -492,6 +492,7 @@ def __init__(
         *,
         embedder: EmbeddingClient | None = None,
         injection_top_k: int = 12,
+        recall_top_k: int = 10,
         default_importance: float = 5.0,
         archive_after_days: int = 90,
         archive_max_importance: float = 4.0,
@@ -503,6 +504,7 @@ def __init__(
         self.long_term_limit = long_term_limit
         self.embedder = embedder
         self.injection_top_k = injection_top_k
+        self.recall_top_k = recall_top_k
         self.default_importance = default_importance
         self.archive_after_days = archive_after_days
         self.archive_max_importance = archive_max_importance
@@ -628,15 +630,90 @@ async def get_relevant_long_term(self, query: str, scope: str | None = None) ->
             for r in top
         ]
 
-    async def _reinforce(self, ids: list[int]) -> None:
-        """Strengthen recalled memories: bump access_count and last_accessed."""
+    # Upper bound on how many memories one recall_memory call may return, and the
+    # minimum relevance a row needs to be worth returning at all.
+    _RECALL_MAX_LIMIT = 25
+    # ponytail: relevance floor — raise to cut noise, lower to surface more long tail.
+    _RECALL_MIN_RELEVANCE = 0.1
+
+    async def recall(
+        self, query: str, limit: int | None = None, scope: str | None = None
+    ) -> list[dict]:
+        """Deliberate semantic search over the FULL long-term store (issue #47).
+
+        This is the agent's on-demand recall tool — the complement to the
+        always-injected top-k (:meth:`get_relevant_long_term`). Where injection
+        ranks only *non-archived* rows by a recency+importance+relevance blend
+        and is capped to a small per-turn budget, recall searches *every*
+        long-term memory (archived included), ranks purely by semantic relevance
+        to *query*, and returns the best matches above a small relevance floor.
+
+        Recalling reinforces the returned rows and un-archives any that had been
+        archived (the agent looked them up and they matched, so they are warm
+        again). Falls back to lexical token overlap when embeddings are
+        unavailable or the query can't be embedded, so recall always works.
+
+        ``scope`` filters per #42 (see :func:`_scope_filter`) exactly like the
+        injection readers, so a persona only recalls shared + its own private
+        memories, never another persona's; ``None`` = every scope.
+        """
+        query = (query or "").strip()
+        if not query:
+            return []
+        limit = self.recall_top_k if not limit or limit < 1 else limit
+        limit = min(limit, self._RECALL_MAX_LIMIT)
+
+        await self._ensure_schema()
+        clause, params = _scope_filter(scope)
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            cursor = await db.execute(
+                "SELECT id, category, subject, content, embedding, archived "  # noqa: S608
+                f"FROM long_term WHERE 1=1{clause}",
+                params,
+            )
+            rows = [dict(r) for r in await cursor.fetchall()]
+        if not rows:
+            return []
+
+        # Embedding cosine for rows with a matching-dim vector; lexical for the rest.
+        query_vec = await self._safe_embed(query)
+        rel_map = _batch_relevance(query_vec, rows)
+        query_tokens = _tokens(query)
+
+        scored: list[tuple[float, dict]] = []
+        for i, row in enumerate(rows):
+            if i in rel_map:
+                relevance = rel_map[i]
+            else:
+                relevance = _similarity(query_tokens, _tokens(f"{row['subject']} {row['content']}"))
+            if relevance >= self._RECALL_MIN_RELEVANCE:
+                scored.append((relevance, row))
+
+        scored.sort(key=lambda pair: pair[0], reverse=True)
+        top = [row for _, row in scored[:limit]]
+        # Recall revives matches: reinforce + un-archive. The returned rows are
+        # therefore all non-archived now, so no archived flag is surfaced.
+        await self._reinforce([row["id"] for row in top], unarchive=True)
+        return [
+            {"category": r["category"], "subject": r["subject"], "content": r["content"]}
+            for r in top
+        ]
+
+    async def _reinforce(self, ids: list[int], *, unarchive: bool = False) -> None:
+        """Strengthen recalled memories: bump access_count and last_accessed.
+
+        With *unarchive*, also clear the archived flag — a memory the agent
+        deliberately recalled and used is demonstrably warm again (issue #47).
+        """
         if not ids:
             return
         await self._ensure_schema()
+        archived_clause = ", archived = 0" if unarchive else ""
         async with aiosqlite.connect(self.db_path) as db:
             await db.executemany(
-                "UPDATE long_term SET access_count = access_count + 1, "
-                "last_accessed = datetime('now') WHERE id = ?",
+                "UPDATE long_term SET access_count = access_count + 1, "  # noqa: S608
+                f"last_accessed = datetime('now'){archived_clause} WHERE id = ?",
                 [(i,) for i in ids],
             )
             await db.commit()
diff --git a/core/permissions.py b/core/permissions.py
index 787255b..00d3863 100644
--- a/core/permissions.py
+++ b/core/permissions.py
@@ -117,6 +117,7 @@ class PermissionLevel:
     "run_command:git*push*": "ASK",
     "run_command:git*commit*": "ASK",
     "web_search": "ALWAYS",
+    "recall_memory": "ALWAYS",
     # Write operations — ask first
     "send_email": "ASK",
     "reply_email": "ASK",
diff --git a/core/prompt_builder.py b/core/prompt_builder.py
index a95aef0..6fc9f64 100644
--- a/core/prompt_builder.py
+++ b/core/prompt_builder.py
@@ -181,7 +181,9 @@ def build_prompt_sections(
     memory_instruction = (
         "You can store and recall memories using the sqlite3 CLI (see the memory skill).\n"
         "Proactively remember important facts about the user and their contacts.\n"
-        "Before inserting a new long-term memory, check if it already exists to avoid duplicates."
+        "Before inserting a new long-term memory, check if it already exists to avoid duplicates.\n"
+        "Only your most relevant memories are shown each turn; when you suspect a stored fact "
+        "isn't among them, call the recall_memory tool to search your full memory by meaning."
     )
 
     history_handling = ""

From c192cbf6e311a9018c418e26057f0687fe9290cc Mon Sep 17 00:00:00 2001
From: Matteo Merola <mattmezza@gmail.com>
Date: Sun, 28 Jun 2026 23:07:55 +0200
Subject: [PATCH 2/5] test(memory): recall full-store lookup, scope isolation +
 tool dispatch (#47)

Covers semantic ranking, relevance-floor exclusion, archived-row search +
un-archive/reinforce, limit cap, lexical fallback without an embedder,
per-persona scope isolation (a persona never recalls another's private
rows), tool dispatch with scope plumbing, and the blank-query guard.
---
 tests/test_memory_tiers.py | 79 ++++++++++++++++++++++++++++++++++++++
 tests/test_personae.py     |  5 ++-
 tests/test_tools.py        | 40 +++++++++++++++++++
 3 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/tests/test_memory_tiers.py b/tests/test_memory_tiers.py
index ded15f1..3c7739c 100644
--- a/tests/test_memory_tiers.py
+++ b/tests/test_memory_tiers.py
@@ -184,6 +184,85 @@ async def test_format_for_prompt_without_query_uses_recency(self, embed_store):
         assert "lives in zurich" in block
 
 
+# -- recall_memory: deliberate full-store semantic lookup (issue #47) --
+
+
+async def _set_archived(store: MemoryStore, rid: int) -> None:
+    async with aiosqlite.connect(store.db_path) as db:
+        await db.execute("UPDATE long_term SET archived = 1 WHERE id = ?", (rid,))
+        await db.commit()
+
+
+class TestRecall:
+    async def test_empty_query_returns_nothing(self, embed_store):
+        await embed_store._insert_long_term("fact", "matteo", "lives in zurich")
+        assert await embed_store.recall("   ") == []
+
+    async def test_semantic_match_ranks_first(self, embed_store):
+        await embed_store._insert_long_term("health", "matteo", "allergic to shellfish")
+        await embed_store._insert_long_term("fact", "simge", "speaks turkish fluently")
+
+        out = await embed_store.recall("food allergies and shellfish")
+
+        assert out
+        assert out[0]["content"] == "allergic to shellfish"
+        # The unrelated row is below the relevance floor and must be excluded.
+        assert all(m["content"] != "speaks turkish fluently" for m in out)
+
+    async def test_searches_and_unarchives_archived_rows(self, embed_store):
+        # An archived memory is invisible to injection but recall must still find
+        # it — and recalling it brings it back (un-archives + reinforces).
+        emb = await _HashEmbedder().embed_one("allergic to shellfish")
+        rid = await _insert(
+            embed_store, "matteo", "allergic to shellfish", category="health", embedding=emb
+        )
+        await _set_archived(embed_store, rid)
+        assert await embed_store.get_long_term() == []  # archived → not injected
+
+        out = await embed_store.recall("shellfish allergy")
+
+        assert any(m["content"] == "allergic to shellfish" for m in out)
+        row = await _row(embed_store, rid)
+        assert row["archived"] == 0  # un-archived on recall
+        assert row["access_count"] >= 1  # reinforced
+
+    async def test_respects_limit(self, embed_store):
+        for i in range(6):
+            await embed_store._insert_long_term("fact", "matteo", f"likes hobby number {i}")
+        out = await embed_store.recall("matteo likes hobby", limit=3)
+        # All 6 rows clear the floor, so the limit slice must cap at exactly 3.
+        assert len(out) == 3
+
+    async def test_lexical_fallback_without_embedder(self, store):
+        # No embedder configured → recall falls back to token overlap, still works.
+        await store._insert_long_term("health", "matteo", "allergic to shellfish")
+        await store._insert_long_term("fact", "simge", "speaks turkish")
+        out = await store.recall("shellfish allergy")
+        assert any("shellfish" in m["content"] for m in out)
+
+    async def test_scope_isolation(self, embed_store):
+        # Recall must honour persona scope (#42): a persona sees shared + its own
+        # private memories, never another persona's private rows.
+        await embed_store._insert_long_term("fact", "matteo", "allergic to dust", scope="")
+        await embed_store._insert_long_term(
+            "health", "matteo", "allergic to shellfish", scope="coach"
+        )
+        await embed_store._insert_long_term(
+            "health", "matteo", "allergic to peanuts", scope="finance"
+        )
+
+        coach = {
+            m["content"] for m in await embed_store.recall("allergic allergies", scope="coach")
+        }
+        assert "allergic to shellfish" in coach  # coach's own private
+        assert "allergic to dust" in coach  # shared
+        assert "allergic to peanuts" not in coach  # finance's private — never crosses
+
+        # The default identity (scope="") sees shared only.
+        owner = {m["content"] for m in await embed_store.recall("allergic allergies", scope="")}
+        assert owner == {"allergic to dust"}
+
+
 # -- Tier 3: forgetting / importance / reinforcement --
 
 
diff --git a/tests/test_personae.py b/tests/test_personae.py
index dadc78d..67ca6c6 100644
--- a/tests/test_personae.py
+++ b/tests/test_personae.py
@@ -78,11 +78,12 @@ def test_scoped_tools_filters_but_keeps_load_skill() -> None:
 def test_gateable_tools_in_sync_with_tools() -> None:
     # The admin UI lists GATEABLE_TOOLS for the scope checkboxes; it must stay
     # in sync with the real tool set (every tool except the always-on ones:
-    # load_skill plus the vault discovery/request tools — issue #19).
+    # load_skill, the vault discovery/request tools — issue #19 — and
+    # recall_memory, which mirrors always-on scoped memory injection — #47).
     from api.admin import GATEABLE_TOOLS
     from core.agent import TOOLS
 
-    always_on = {"load_skill", "list_secrets", "request_secret"}
+    always_on = {"load_skill", "recall_memory", "list_secrets", "request_secret"}
     assert set(GATEABLE_TOOLS) | always_on == {t["name"] for t in TOOLS}
 
 
diff --git a/tests/test_tools.py b/tests/test_tools.py
index 7faee9e..9ba21a2 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -471,3 +471,43 @@ async def fake_await(description, channel, user_id, tool_name=None, params=None)
     )
     assert prompts["n"] == 0  # nothing to batch for a single write
     assert state["write_decisions"] == {}
+
+
+# ---------------------------------------------------------------------------
+# recall_memory tool — deliberate full-store semantic lookup (#47)
+# ---------------------------------------------------------------------------
+
+
+def _recall_call(call_id: str, **params):
+    from core.llm import LLMToolCall
+
+    return LLMToolCall(id=call_id, name="recall_memory", arguments=dict(params))
+
+
+@pytest.mark.asyncio
+async def test_recall_memory_tool_dispatch(agent, monkeypatch) -> None:
+    """recall_memory routes to the store and shapes the result; no approval prompt."""
+    captured = {}
+
+    async def fake_recall(query, limit=None, scope=None):
+        captured["query"], captured["limit"], captured["scope"] = query, limit, scope
+        return [{"category": "health", "subject": "matteo", "content": "allergic to shellfish"}]
+
+    monkeypatch.setattr(agent.memory, "recall", fake_recall)
+    # The per-turn state carries the active persona's private memory scope,
+    # which recall must receive so it never crosses persona boundaries (#42).
+    state = agent._new_request_state()
+    state["persona_name"] = "coach"
+    result = await agent._execute_tool(
+        _recall_call("1", query="food allergies", limit=5), "telegram", "u1", state
+    )
+    assert captured == {"query": "food allergies", "limit": 5, "scope": "coach"}
+    assert result["count"] == 1
+    assert result["memories"][0]["content"] == "allergic to shellfish"
+
+
+@pytest.mark.asyncio
+async def test_recall_memory_requires_query(agent) -> None:
+    """A blank query is rejected before hitting the store."""
+    result = await agent._execute_tool(_recall_call("1", query="   "), "telegram", "u1")
+    assert "error" in result

From aa7799209fd103fed3d3562d928cb1275ed6acfc Mon Sep 17 00:00:00 2001
From: Matteo Merola <mattmezza@gmail.com>
Date: Sun, 28 Jun 2026 23:07:55 +0200
Subject: [PATCH 3/5] docs(memory): document recall_memory tool + recall_top_k
 (#47)

---
 docs/content/docs/memory.mdx | 13 +++++++++++++
 skills/memory.md             | 16 ++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/docs/content/docs/memory.mdx b/docs/content/docs/memory.mdx
index c9001be..b1703ae 100644
--- a/docs/content/docs/memory.mdx
+++ b/docs/content/docs/memory.mdx
@@ -128,9 +128,21 @@ Set `memory.embedding.enabled: false` to fall back to **Tier-1 lexical** (word-o
 ### What embeddings power
 
 - **Relevance-ranked injection.** Instead of dumping the most recent `long_term_limit` rows into the prompt, only the `injection_top_k` (default 12) memories most relevant to the *current message* are injected. They're scored Generative-Agents style: `relevance + 0.5·importance + 0.3·recency`. Injection happens in the **per-turn preamble** (prepended to the current user message), not the static system prompt — so even in session mode (where the static prompt is snapshotted once) the current message is the query every turn, and a fact written mid-session is visible on the next turn without `/new`.
+- **On-demand recall** (the `recall_memory` tool, below).
 - **Dedup.** `update_memory` retrieves ADD/UPDATE/DELETE/NOOP candidates by cosine similarity (with a per-row lexical fallback for any memory that has no vector yet).
 - **Hygiene clustering** (Tier 4, below).
 
+### `recall_memory` — deliberate full-store lookup
+
+Relevance-ranked injection is a small always-on top-k: great for the obvious facts, but as the store grows it can't surface everything, and the agent *can't ask for a fact it doesn't suspect it has*. So injection is paired with a **`recall_memory` tool** the agent calls deliberately when it suspects a relevant stored fact isn't in the injected set (hybrid retrieval — always-on top-k **plus** on-demand recall).
+
+- Searches the agent's **entire** long-term store — **including archived** memories that injection never sees — ranked purely by semantic relevance to the agent's query (lexical-overlap fallback when embeddings are off).
+- Returns up to `recall_top_k` (default 10) matches above a small relevance floor.
+- Recalling **revives** the matched rows: they're reinforced (`access_count` / `last_accessed`) and any archived one is un-archived — a fact the agent looked up and used is warm again.
+- **Scope-aware** (#42): recall is filtered to `scope IN ('', <active persona>)` exactly like injection, so a persona only ever recalls shared facts plus its own private ones — never another persona's. The default identity recalls shared only.
+
+The tool is read-only (no approval prompt) and available to every persona (it can't surface anything injection couldn't already show that persona), alongside the sqlite CLI the [memory skill](#) teaches for exact-field queries and bulk edits.
+
 All of this is configurable live from the **admin Memory tab** (enable toggle, backend, model, top-k, plus Download / Test buttons).
 
 ## Tier 3 — Forgetting, importance & reinforcement
@@ -230,6 +242,7 @@ memory:
     base_url: ""                       # API providers only; falls back to agent provider base URL
     dimensions: 0                      # 0 = provider default (API providers only)
     injection_top_k: 12                # relevance-ranked memories injected per turn
+    recall_top_k: 10                   # max memories returned by the recall_memory tool
 
   # Tier 3 — forgetting / importance / reinforcement
   default_importance: 5.0              # 1-10 scale assigned to new long-term memories
diff --git a/skills/memory.md b/skills/memory.md
index 76549c1..76ec6da 100644
--- a/skills/memory.md
+++ b/skills/memory.md
@@ -37,6 +37,22 @@ sqlite3 data/memory.db "UPDATE long_term SET content = 'New value', updated_at =
 
 ## Querying memories
 
+### Semantic recall (full store, by meaning)
+
+The sqlite queries below match on exact fields or `LIKE` substrings. To find a
+fact by **meaning** across your *entire* long-term store — including older,
+archived memories that aren't injected into the prompt — call the
+`recall_memory` tool instead of writing SQL:
+
+- Each turn only injects your few most-relevant memories. When you suspect a
+  stored fact exists beyond them (e.g. "didn't they mention a food allergy?"),
+  call `recall_memory` with a natural-language `query` describing the fact.
+- It ranks the whole store by semantic similarity and returns the best matches.
+  Recalling a memory also revives it (un-archives + reinforces it).
+
+Use `recall_memory` for fuzzy "do I know anything about X?" lookups; use the
+sqlite queries below when you need exact filtering, counts, or bulk edits.
+
 ### Search by subject
 
 ```bash

From 703ec80cd1feb10b16f80e3a94d6d15944e721c5 Mon Sep 17 00:00:00 2001
From: Matteo Merola <mattmezza@gmail.com>
Date: Sun, 28 Jun 2026 23:17:49 +0200
Subject: [PATCH 4/5] test(memory): de-flake recall relevance-floor check; fix
 dead doc link (#47)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The floor-exclusion assertion lived on the embedding path, where the test
_HashEmbedder buckets tokens via salted hash() into 64 dims — collisions
pushed the unrelated row above the floor for ~20% of PYTHONHASHSEEDs
(e.g. seed 42), failing intermittently. Move the exclusion check to the
lexical-fallback path, where zero token overlap is deterministic and still
guards _RECALL_MIN_RELEVANCE against being lowered to 0. Verified green
across seeds 0/1/2/7/42/123/999.

Also drop a placeholder (#) markdown link in the memory docs.
---
 docs/content/docs/memory.mdx | 2 +-
 tests/test_memory_tiers.py   | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/content/docs/memory.mdx b/docs/content/docs/memory.mdx
index b1703ae..b9195e8 100644
--- a/docs/content/docs/memory.mdx
+++ b/docs/content/docs/memory.mdx
@@ -141,7 +141,7 @@ Relevance-ranked injection is a small always-on top-k: great for the obvious fac
 - Recalling **revives** the matched rows: they're reinforced (`access_count` / `last_accessed`) and any archived one is un-archived — a fact the agent looked up and used is warm again.
 - **Scope-aware** (#42): recall is filtered to `scope IN ('', <active persona>)` exactly like injection, so a persona only ever recalls shared facts plus its own private ones — never another persona's. The default identity recalls shared only.
 
-The tool is read-only (no approval prompt) and available to every persona (it can't surface anything injection couldn't already show that persona), alongside the sqlite CLI the [memory skill](#) teaches for exact-field queries and bulk edits.
+The tool is read-only (no approval prompt) and available to every persona (it can't surface anything injection couldn't already show that persona), alongside the sqlite CLI the memory skill teaches for exact-field queries and bulk edits.
 
 All of this is configurable live from the **admin Memory tab** (enable toggle, backend, model, top-k, plus Download / Test buttons).
 
diff --git a/tests/test_memory_tiers.py b/tests/test_memory_tiers.py
index 3c7739c..8895fe0 100644
--- a/tests/test_memory_tiers.py
+++ b/tests/test_memory_tiers.py
@@ -206,8 +206,6 @@ async def test_semantic_match_ranks_first(self, embed_store):
 
         assert out
         assert out[0]["content"] == "allergic to shellfish"
-        # The unrelated row is below the relevance floor and must be excluded.
-        assert all(m["content"] != "speaks turkish fluently" for m in out)
 
     async def test_searches_and_unarchives_archived_rows(self, embed_store):
         # An archived memory is invisible to injection but recall must still find
@@ -239,6 +237,9 @@ async def test_lexical_fallback_without_embedder(self, store):
         await store._insert_long_term("fact", "simge", "speaks turkish")
         out = await store.recall("shellfish allergy")
         assert any("shellfish" in m["content"] for m in out)
+        # The relevance floor drops the zero-overlap row (deterministic on the
+        # lexical path — guards _RECALL_MIN_RELEVANCE against being lowered to 0).
+        assert all("turkish" not in m["content"] for m in out)
 
     async def test_scope_isolation(self, embed_store):
         # Recall must honour persona scope (#42): a persona sees shared + its own

From ce1615707e1d98ca52656afd83def60f941124d3 Mon Sep 17 00:00:00 2001
From: Matteo Merola <mattmezza@gmail.com>
Date: Sun, 28 Jun 2026 23:22:33 +0200
Subject: [PATCH 5/5] feat(admin): expose recall_top_k in the Memory tab (#47)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sibling of injection_top_k: surfaces memory.embedding.recall_top_k via the
config GET (emb_recall_top_k) and a number input next to the existing
top-k field, capped at 25 (the recall hard limit) so a set value is never
silently clamped. Saved through the same embedding-settings PATCH, which
already hot-applies it to the running store — no restart.
---
 api/admin.py                       |  1 +
 api/templates/partials/memory.html | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/api/admin.py b/api/admin.py
index 6232914..318be10 100644
--- a/api/admin.py
+++ b/api/admin.py
@@ -1300,6 +1300,7 @@ async def _bool(key: str, default: str) -> str:
             "emb_model": await _cfg("memory.embedding.model", "BAAI/bge-small-en-v1.5"),
             "emb_base_url": await _cfg("memory.embedding.base_url", ""),
             "emb_top_k": await _cfg("memory.embedding.injection_top_k", "12"),
+            "emb_recall_top_k": await _cfg("memory.embedding.recall_top_k", "10"),
             "hygiene_enabled": await _bool("memory.hygiene_enabled", "true"),
             "default_importance": await _cfg("memory.default_importance", "5.0"),
             "archive_after_days": await _cfg("memory.archive_after_days", "90"),
diff --git a/api/templates/partials/memory.html b/api/templates/partials/memory.html
index b46150a..d865be4 100644
--- a/api/templates/partials/memory.html
+++ b/api/templates/partials/memory.html
@@ -9,6 +9,7 @@
     embModel: {{ emb_model|default('BAAI/bge-small-en-v1.5', true)|tojson|forceescape }},
     embBaseUrl: {{ emb_base_url|default('', true)|tojson|forceescape }},
     embTopK: {{ emb_top_k|default('12', true)|tojson|forceescape }},
+    embRecallTopK: {{ emb_recall_top_k|default('10', true)|tojson|forceescape }},
     embResult: '', embOk: false,
     embStatus: null, embBusy: false, embTestResult: '',
     // Lifecycle (Tier 3/4)
@@ -151,6 +152,12 @@ <h2 class="text-base mb-1">Semantic memory (embeddings)</h2>
         <p class="text-muted text-xs mt-1">How many of the most relevant long-term memories to put in the prompt each message.</p>
       </div>
 
+      <div>
+        <label class="label">Recall results (recall_memory tool)</label>
+        <input type="number" class="input-sm" style="max-width:120px" x-model="embRecallTopK" :disabled="!embEnabled" min="1" max="25">
+        <p class="text-muted text-xs mt-1">Default number of memories the recall_memory tool returns when the agent searches its full long-term store on demand (the agent may request fewer/more per call, capped at 25).</p>
+      </div>
+
       <div class="text-xs" x-show="isLocal && embStatus">
         Model on disk:
         <span x-show="embStatus && embStatus.model_ready" class="text-success">yes ✓</span>
@@ -165,7 +172,8 @@ <h2 class="text-base mb-1">Semantic memory (embeddings)</h2>
                 'memory.embedding.provider': embProvider,
                 'memory.embedding.model': embModel,
                 'memory.embedding.base_url': embBaseUrl,
-                'memory.embedding.injection_top_k': String(embTopK)
+                'memory.embedding.injection_top_k': String(embTopK),
+                'memory.embedding.recall_top_k': String(embRecallTopK)
               }, 'Embedding settings saved', 'embOk', 'embResult').then(loadEmbStatus)">
         Save embedding settings
       </button>