From df875f25b85d2dafbf0966bfc87b86bff196f0b6 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 23:07:48 +0200 Subject: [PATCH 1/5] =?UTF-8?q?feat(memory):=20recall=5Fmemory=20tool=20?= =?UTF-8?q?=E2=80=94=20deliberate=20full-store=20semantic=20lookup=20(#47)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 of #41. Always-on top-k injection covers the obvious facts but can't surface everything as the store grows, and the agent can't ask for a fact it doesn't suspect it has. Pair it with a recall_memory tool the agent calls on demand: - MemoryStore.recall(query, limit, scope): semantic search over the agent's ENTIRE long-term store (archived included), ranked purely by relevance above a small floor; lexical-overlap fallback when embeddings are off. - Scope-aware (#42): filtered to scope IN ('', ) exactly like the injection readers, so a persona only recalls shared + its own private memories, never another persona's. Scope is the per-turn persona_name. - Recalling revives matches — reinforces them and un-archives any that were archived (looked up + matched = warm again). - recall_memory tool: read-only (ALWAYS permission, no prompt), always retained per persona like load_skill (memory injection is always-on too, so recall exposes nothing extra); nudged in the memory prompt. - recall_top_k config knob (default 10), hot-reloaded in patch_config. --- api/admin.py | 7 +++- core/agent.py | 55 ++++++++++++++++++++++++++- core/config.py | 1 + core/memory.py | 85 ++++++++++++++++++++++++++++++++++++++++-- core/permissions.py | 1 + core/prompt_builder.py | 4 +- 6 files changed, 145 insertions(+), 8 deletions(-) diff --git a/api/admin.py b/api/admin.py index 1ee2386..6232914 100644 --- a/api/admin.py +++ b/api/admin.py @@ -1656,6 +1656,7 @@ async def patch_config(body: ConfigPatchIn) -> dict: # Tier 3/4 lifecycle knobs so memory config changes apply live. agent.memory.embedder = agent._build_embedder() agent.memory.injection_top_k = mem_cfg.embedding.injection_top_k + agent.memory.recall_top_k = mem_cfg.embedding.recall_top_k agent.memory.default_importance = mem_cfg.default_importance agent.memory.archive_after_days = mem_cfg.archive_after_days agent.memory.archive_max_importance = mem_cfg.archive_max_importance @@ -3322,8 +3323,10 @@ def _config_requires_restart(values: dict) -> bool: # Function-tools that a persona may scope. ``load_skill`` is intentionally # excluded — it is always available (the core mechanic personae use to read -# their allowlisted skills). Kept here (not imported from core.agent) to avoid -# pulling the agent's heavy import graph into the admin app. +# their allowlisted skills); so are the vault tools and ``recall_memory`` +# (memory is injected for every persona, scope-filtered, so its on-demand +# counterpart is always available too). Kept here (not imported from core.agent) +# to avoid pulling the agent's heavy import graph into the admin app. GATEABLE_TOOLS = [ "run_command", "send_email", diff --git a/core/agent.py b/core/agent.py index 63075a7..ebe6854 100644 --- a/core/agent.py +++ b/core/agent.py @@ -186,6 +186,31 @@ def _shell_quote(s: str) -> str: "required": ["name"], }, }, + { + "name": "recall_memory", + "description": ( + "Search your FULL long-term memory by meaning for facts about the user that " + "aren't already shown to you. Only the few most-relevant memories are injected " + "into each turn; call this when you suspect a relevant stored fact exists beyond " + "them — it searches the whole store, including older archived memories, and ranks " + "matches by relevance. Pass a natural-language query describing the fact you're " + "after (e.g. 'dietary restrictions and food allergies'), not just keywords." + ), + "input_schema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Natural-language description of the fact(s) to recall", + }, + "limit": { + "type": "integer", + "description": "Max memories to return (default 10).", + }, + }, + "required": ["query"], + }, + }, { "name": "manage_jobs", "description": ( @@ -374,7 +399,9 @@ def scoped_tools(persona: Persona | None) -> list[dict]: return TOOLS # ``load_skill`` and the vault discovery/request tools are always retained: # they are the mechanics personae rely on to read skills and obtain secrets. - _always = {"load_skill", "list_secrets", "request_secret"} + # ``recall_memory`` too — memory is injected for every persona (scope-filtered), + # so its on-demand counterpart exposes nothing extra and stays available (#47). + _always = {"load_skill", "recall_memory", "list_secrets", "request_secret"} return [t for t in TOOLS if persona.allows_tool(t["name"]) or t["name"] in _always] @@ -419,6 +446,7 @@ def __init__(self, config: Config, secret_store: SecretStore | None = None): long_term_limit=mem_cfg.long_term_limit, embedder=self._build_embedder(), injection_top_k=mem_cfg.embedding.injection_top_k, + recall_top_k=mem_cfg.embedding.recall_top_k, default_importance=mem_cfg.default_importance, archive_after_days=mem_cfg.archive_after_days, archive_max_importance=mem_cfg.archive_max_importance, @@ -1206,6 +1234,9 @@ async def _execute_tool( return {"error": f"Skill not found: {skill_name}"} return {"name": skill_name, "content": content} + if name == "recall_memory": + return await self._tool_recall_memory(params, request_state) + if name == "manage_jobs": log.info("Tool call: manage_jobs — %s", params.get("action", "")) result = await self._tool_manage_jobs(params) @@ -1602,6 +1633,28 @@ async def _tool_manage_jobs(self, params: dict) -> dict: return {"error": f"Unknown action: {action!r}. Use 'create', 'list', or 'cancel'."} + async def _tool_recall_memory(self, params: dict, request_state: dict | None = None) -> dict: + """Deliberate semantic search over the full long-term memory store (#47). + + Scoped to the active persona (#42): ``persona_name`` on the per-turn + request state is the persona's private memory scope (``""`` = the default + identity's shared-only view), so recall never crosses into another + persona's private memories — same boundary the injection readers enforce. + """ + query = str(params.get("query", "")).strip() + if not query: + return {"error": "Missing 'query'."} + limit = params.get("limit") + limit = limit if isinstance(limit, int) and not isinstance(limit, bool) else None + scope = (request_state or {}).get("persona_name") or "" + try: + memories = await self.memory.recall(query, limit, scope=scope) + except Exception: + log.exception("recall_memory failed for query: %s", query) + return {"error": "Memory recall failed."} + log.info("Tool call: recall_memory — %r (%d hits)", query, len(memories)) + return {"query": query, "count": len(memories), "memories": memories} + async def _tool_web_search(self, params: dict) -> dict: """Search the web via Tavily API.""" if not self.search_client: diff --git a/core/config.py b/core/config.py index fb07164..9082300 100644 --- a/core/config.py +++ b/core/config.py @@ -181,6 +181,7 @@ class EmbeddingConfig(BaseModel): base_url: str = "" # API providers only; falls back to the agent provider base URL when empty dimensions: int = 0 # 0 = provider default (API providers only) injection_top_k: int = 12 # relevance-ranked memories injected per turn + recall_top_k: int = 10 # max memories returned by the recall_memory tool (full-store lookup) class MemoryConfig(BaseModel): diff --git a/core/memory.py b/core/memory.py index 5d1eec3..677085a 100644 --- a/core/memory.py +++ b/core/memory.py @@ -492,6 +492,7 @@ def __init__( *, embedder: EmbeddingClient | None = None, injection_top_k: int = 12, + recall_top_k: int = 10, default_importance: float = 5.0, archive_after_days: int = 90, archive_max_importance: float = 4.0, @@ -503,6 +504,7 @@ def __init__( self.long_term_limit = long_term_limit self.embedder = embedder self.injection_top_k = injection_top_k + self.recall_top_k = recall_top_k self.default_importance = default_importance self.archive_after_days = archive_after_days self.archive_max_importance = archive_max_importance @@ -628,15 +630,90 @@ async def get_relevant_long_term(self, query: str, scope: str | None = None) -> for r in top ] - async def _reinforce(self, ids: list[int]) -> None: - """Strengthen recalled memories: bump access_count and last_accessed.""" + # Upper bound on how many memories one recall_memory call may return, and the + # minimum relevance a row needs to be worth returning at all. + _RECALL_MAX_LIMIT = 25 + # ponytail: relevance floor — raise to cut noise, lower to surface more long tail. + _RECALL_MIN_RELEVANCE = 0.1 + + async def recall( + self, query: str, limit: int | None = None, scope: str | None = None + ) -> list[dict]: + """Deliberate semantic search over the FULL long-term store (issue #47). + + This is the agent's on-demand recall tool — the complement to the + always-injected top-k (:meth:`get_relevant_long_term`). Where injection + ranks only *non-archived* rows by a recency+importance+relevance blend + and is capped to a small per-turn budget, recall searches *every* + long-term memory (archived included), ranks purely by semantic relevance + to *query*, and returns the best matches above a small relevance floor. + + Recalling reinforces the returned rows and un-archives any that had been + archived (the agent looked them up and they matched, so they are warm + again). Falls back to lexical token overlap when embeddings are + unavailable or the query can't be embedded, so recall always works. + + ``scope`` filters per #42 (see :func:`_scope_filter`) exactly like the + injection readers, so a persona only recalls shared + its own private + memories, never another persona's; ``None`` = every scope. + """ + query = (query or "").strip() + if not query: + return [] + limit = self.recall_top_k if not limit or limit < 1 else limit + limit = min(limit, self._RECALL_MAX_LIMIT) + + await self._ensure_schema() + clause, params = _scope_filter(scope) + async with aiosqlite.connect(self.db_path) as db: + db.row_factory = aiosqlite.Row + cursor = await db.execute( + "SELECT id, category, subject, content, embedding, archived " # noqa: S608 + f"FROM long_term WHERE 1=1{clause}", + params, + ) + rows = [dict(r) for r in await cursor.fetchall()] + if not rows: + return [] + + # Embedding cosine for rows with a matching-dim vector; lexical for the rest. + query_vec = await self._safe_embed(query) + rel_map = _batch_relevance(query_vec, rows) + query_tokens = _tokens(query) + + scored: list[tuple[float, dict]] = [] + for i, row in enumerate(rows): + if i in rel_map: + relevance = rel_map[i] + else: + relevance = _similarity(query_tokens, _tokens(f"{row['subject']} {row['content']}")) + if relevance >= self._RECALL_MIN_RELEVANCE: + scored.append((relevance, row)) + + scored.sort(key=lambda pair: pair[0], reverse=True) + top = [row for _, row in scored[:limit]] + # Recall revives matches: reinforce + un-archive. The returned rows are + # therefore all non-archived now, so no archived flag is surfaced. + await self._reinforce([row["id"] for row in top], unarchive=True) + return [ + {"category": r["category"], "subject": r["subject"], "content": r["content"]} + for r in top + ] + + async def _reinforce(self, ids: list[int], *, unarchive: bool = False) -> None: + """Strengthen recalled memories: bump access_count and last_accessed. + + With *unarchive*, also clear the archived flag — a memory the agent + deliberately recalled and used is demonstrably warm again (issue #47). + """ if not ids: return await self._ensure_schema() + archived_clause = ", archived = 0" if unarchive else "" async with aiosqlite.connect(self.db_path) as db: await db.executemany( - "UPDATE long_term SET access_count = access_count + 1, " - "last_accessed = datetime('now') WHERE id = ?", + "UPDATE long_term SET access_count = access_count + 1, " # noqa: S608 + f"last_accessed = datetime('now'){archived_clause} WHERE id = ?", [(i,) for i in ids], ) await db.commit() diff --git a/core/permissions.py b/core/permissions.py index 787255b..00d3863 100644 --- a/core/permissions.py +++ b/core/permissions.py @@ -117,6 +117,7 @@ class PermissionLevel: "run_command:git*push*": "ASK", "run_command:git*commit*": "ASK", "web_search": "ALWAYS", + "recall_memory": "ALWAYS", # Write operations — ask first "send_email": "ASK", "reply_email": "ASK", diff --git a/core/prompt_builder.py b/core/prompt_builder.py index a95aef0..6fc9f64 100644 --- a/core/prompt_builder.py +++ b/core/prompt_builder.py @@ -181,7 +181,9 @@ def build_prompt_sections( memory_instruction = ( "You can store and recall memories using the sqlite3 CLI (see the memory skill).\n" "Proactively remember important facts about the user and their contacts.\n" - "Before inserting a new long-term memory, check if it already exists to avoid duplicates." + "Before inserting a new long-term memory, check if it already exists to avoid duplicates.\n" + "Only your most relevant memories are shown each turn; when you suspect a stored fact " + "isn't among them, call the recall_memory tool to search your full memory by meaning." ) history_handling = "" From c192cbf6e311a9018c418e26057f0687fe9290cc Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 23:07:55 +0200 Subject: [PATCH 2/5] test(memory): recall full-store lookup, scope isolation + tool dispatch (#47) Covers semantic ranking, relevance-floor exclusion, archived-row search + un-archive/reinforce, limit cap, lexical fallback without an embedder, per-persona scope isolation (a persona never recalls another's private rows), tool dispatch with scope plumbing, and the blank-query guard. --- tests/test_memory_tiers.py | 79 ++++++++++++++++++++++++++++++++++++++ tests/test_personae.py | 5 ++- tests/test_tools.py | 40 +++++++++++++++++++ 3 files changed, 122 insertions(+), 2 deletions(-) diff --git a/tests/test_memory_tiers.py b/tests/test_memory_tiers.py index ded15f1..3c7739c 100644 --- a/tests/test_memory_tiers.py +++ b/tests/test_memory_tiers.py @@ -184,6 +184,85 @@ async def test_format_for_prompt_without_query_uses_recency(self, embed_store): assert "lives in zurich" in block +# -- recall_memory: deliberate full-store semantic lookup (issue #47) -- + + +async def _set_archived(store: MemoryStore, rid: int) -> None: + async with aiosqlite.connect(store.db_path) as db: + await db.execute("UPDATE long_term SET archived = 1 WHERE id = ?", (rid,)) + await db.commit() + + +class TestRecall: + async def test_empty_query_returns_nothing(self, embed_store): + await embed_store._insert_long_term("fact", "matteo", "lives in zurich") + assert await embed_store.recall(" ") == [] + + async def test_semantic_match_ranks_first(self, embed_store): + await embed_store._insert_long_term("health", "matteo", "allergic to shellfish") + await embed_store._insert_long_term("fact", "simge", "speaks turkish fluently") + + out = await embed_store.recall("food allergies and shellfish") + + assert out + assert out[0]["content"] == "allergic to shellfish" + # The unrelated row is below the relevance floor and must be excluded. + assert all(m["content"] != "speaks turkish fluently" for m in out) + + async def test_searches_and_unarchives_archived_rows(self, embed_store): + # An archived memory is invisible to injection but recall must still find + # it — and recalling it brings it back (un-archives + reinforces). + emb = await _HashEmbedder().embed_one("allergic to shellfish") + rid = await _insert( + embed_store, "matteo", "allergic to shellfish", category="health", embedding=emb + ) + await _set_archived(embed_store, rid) + assert await embed_store.get_long_term() == [] # archived → not injected + + out = await embed_store.recall("shellfish allergy") + + assert any(m["content"] == "allergic to shellfish" for m in out) + row = await _row(embed_store, rid) + assert row["archived"] == 0 # un-archived on recall + assert row["access_count"] >= 1 # reinforced + + async def test_respects_limit(self, embed_store): + for i in range(6): + await embed_store._insert_long_term("fact", "matteo", f"likes hobby number {i}") + out = await embed_store.recall("matteo likes hobby", limit=3) + # All 6 rows clear the floor, so the limit slice must cap at exactly 3. + assert len(out) == 3 + + async def test_lexical_fallback_without_embedder(self, store): + # No embedder configured → recall falls back to token overlap, still works. + await store._insert_long_term("health", "matteo", "allergic to shellfish") + await store._insert_long_term("fact", "simge", "speaks turkish") + out = await store.recall("shellfish allergy") + assert any("shellfish" in m["content"] for m in out) + + async def test_scope_isolation(self, embed_store): + # Recall must honour persona scope (#42): a persona sees shared + its own + # private memories, never another persona's private rows. + await embed_store._insert_long_term("fact", "matteo", "allergic to dust", scope="") + await embed_store._insert_long_term( + "health", "matteo", "allergic to shellfish", scope="coach" + ) + await embed_store._insert_long_term( + "health", "matteo", "allergic to peanuts", scope="finance" + ) + + coach = { + m["content"] for m in await embed_store.recall("allergic allergies", scope="coach") + } + assert "allergic to shellfish" in coach # coach's own private + assert "allergic to dust" in coach # shared + assert "allergic to peanuts" not in coach # finance's private — never crosses + + # The default identity (scope="") sees shared only. + owner = {m["content"] for m in await embed_store.recall("allergic allergies", scope="")} + assert owner == {"allergic to dust"} + + # -- Tier 3: forgetting / importance / reinforcement -- diff --git a/tests/test_personae.py b/tests/test_personae.py index dadc78d..67ca6c6 100644 --- a/tests/test_personae.py +++ b/tests/test_personae.py @@ -78,11 +78,12 @@ def test_scoped_tools_filters_but_keeps_load_skill() -> None: def test_gateable_tools_in_sync_with_tools() -> None: # The admin UI lists GATEABLE_TOOLS for the scope checkboxes; it must stay # in sync with the real tool set (every tool except the always-on ones: - # load_skill plus the vault discovery/request tools — issue #19). + # load_skill, the vault discovery/request tools — issue #19 — and + # recall_memory, which mirrors always-on scoped memory injection — #47). from api.admin import GATEABLE_TOOLS from core.agent import TOOLS - always_on = {"load_skill", "list_secrets", "request_secret"} + always_on = {"load_skill", "recall_memory", "list_secrets", "request_secret"} assert set(GATEABLE_TOOLS) | always_on == {t["name"] for t in TOOLS} diff --git a/tests/test_tools.py b/tests/test_tools.py index 7faee9e..9ba21a2 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -471,3 +471,43 @@ async def fake_await(description, channel, user_id, tool_name=None, params=None) ) assert prompts["n"] == 0 # nothing to batch for a single write assert state["write_decisions"] == {} + + +# --------------------------------------------------------------------------- +# recall_memory tool — deliberate full-store semantic lookup (#47) +# --------------------------------------------------------------------------- + + +def _recall_call(call_id: str, **params): + from core.llm import LLMToolCall + + return LLMToolCall(id=call_id, name="recall_memory", arguments=dict(params)) + + +@pytest.mark.asyncio +async def test_recall_memory_tool_dispatch(agent, monkeypatch) -> None: + """recall_memory routes to the store and shapes the result; no approval prompt.""" + captured = {} + + async def fake_recall(query, limit=None, scope=None): + captured["query"], captured["limit"], captured["scope"] = query, limit, scope + return [{"category": "health", "subject": "matteo", "content": "allergic to shellfish"}] + + monkeypatch.setattr(agent.memory, "recall", fake_recall) + # The per-turn state carries the active persona's private memory scope, + # which recall must receive so it never crosses persona boundaries (#42). + state = agent._new_request_state() + state["persona_name"] = "coach" + result = await agent._execute_tool( + _recall_call("1", query="food allergies", limit=5), "telegram", "u1", state + ) + assert captured == {"query": "food allergies", "limit": 5, "scope": "coach"} + assert result["count"] == 1 + assert result["memories"][0]["content"] == "allergic to shellfish" + + +@pytest.mark.asyncio +async def test_recall_memory_requires_query(agent) -> None: + """A blank query is rejected before hitting the store.""" + result = await agent._execute_tool(_recall_call("1", query=" "), "telegram", "u1") + assert "error" in result From aa7799209fd103fed3d3562d928cb1275ed6acfc Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 23:07:55 +0200 Subject: [PATCH 3/5] docs(memory): document recall_memory tool + recall_top_k (#47) --- docs/content/docs/memory.mdx | 13 +++++++++++++ skills/memory.md | 16 ++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/docs/content/docs/memory.mdx b/docs/content/docs/memory.mdx index c9001be..b1703ae 100644 --- a/docs/content/docs/memory.mdx +++ b/docs/content/docs/memory.mdx @@ -128,9 +128,21 @@ Set `memory.embedding.enabled: false` to fall back to **Tier-1 lexical** (word-o ### What embeddings power - **Relevance-ranked injection.** Instead of dumping the most recent `long_term_limit` rows into the prompt, only the `injection_top_k` (default 12) memories most relevant to the *current message* are injected. They're scored Generative-Agents style: `relevance + 0.5·importance + 0.3·recency`. Injection happens in the **per-turn preamble** (prepended to the current user message), not the static system prompt — so even in session mode (where the static prompt is snapshotted once) the current message is the query every turn, and a fact written mid-session is visible on the next turn without `/new`. +- **On-demand recall** (the `recall_memory` tool, below). - **Dedup.** `update_memory` retrieves ADD/UPDATE/DELETE/NOOP candidates by cosine similarity (with a per-row lexical fallback for any memory that has no vector yet). - **Hygiene clustering** (Tier 4, below). +### `recall_memory` — deliberate full-store lookup + +Relevance-ranked injection is a small always-on top-k: great for the obvious facts, but as the store grows it can't surface everything, and the agent *can't ask for a fact it doesn't suspect it has*. So injection is paired with a **`recall_memory` tool** the agent calls deliberately when it suspects a relevant stored fact isn't in the injected set (hybrid retrieval — always-on top-k **plus** on-demand recall). + +- Searches the agent's **entire** long-term store — **including archived** memories that injection never sees — ranked purely by semantic relevance to the agent's query (lexical-overlap fallback when embeddings are off). +- Returns up to `recall_top_k` (default 10) matches above a small relevance floor. +- Recalling **revives** the matched rows: they're reinforced (`access_count` / `last_accessed`) and any archived one is un-archived — a fact the agent looked up and used is warm again. +- **Scope-aware** (#42): recall is filtered to `scope IN ('', )` exactly like injection, so a persona only ever recalls shared facts plus its own private ones — never another persona's. The default identity recalls shared only. + +The tool is read-only (no approval prompt) and available to every persona (it can't surface anything injection couldn't already show that persona), alongside the sqlite CLI the [memory skill](#) teaches for exact-field queries and bulk edits. + All of this is configurable live from the **admin Memory tab** (enable toggle, backend, model, top-k, plus Download / Test buttons). ## Tier 3 — Forgetting, importance & reinforcement @@ -230,6 +242,7 @@ memory: base_url: "" # API providers only; falls back to agent provider base URL dimensions: 0 # 0 = provider default (API providers only) injection_top_k: 12 # relevance-ranked memories injected per turn + recall_top_k: 10 # max memories returned by the recall_memory tool # Tier 3 — forgetting / importance / reinforcement default_importance: 5.0 # 1-10 scale assigned to new long-term memories diff --git a/skills/memory.md b/skills/memory.md index 76549c1..76ec6da 100644 --- a/skills/memory.md +++ b/skills/memory.md @@ -37,6 +37,22 @@ sqlite3 data/memory.db "UPDATE long_term SET content = 'New value', updated_at = ## Querying memories +### Semantic recall (full store, by meaning) + +The sqlite queries below match on exact fields or `LIKE` substrings. To find a +fact by **meaning** across your *entire* long-term store — including older, +archived memories that aren't injected into the prompt — call the +`recall_memory` tool instead of writing SQL: + +- Each turn only injects your few most-relevant memories. When you suspect a + stored fact exists beyond them (e.g. "didn't they mention a food allergy?"), + call `recall_memory` with a natural-language `query` describing the fact. +- It ranks the whole store by semantic similarity and returns the best matches. + Recalling a memory also revives it (un-archives + reinforces it). + +Use `recall_memory` for fuzzy "do I know anything about X?" lookups; use the +sqlite queries below when you need exact filtering, counts, or bulk edits. + ### Search by subject ```bash From 703ec80cd1feb10b16f80e3a94d6d15944e721c5 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 23:17:49 +0200 Subject: [PATCH 4/5] test(memory): de-flake recall relevance-floor check; fix dead doc link (#47) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The floor-exclusion assertion lived on the embedding path, where the test _HashEmbedder buckets tokens via salted hash() into 64 dims — collisions pushed the unrelated row above the floor for ~20% of PYTHONHASHSEEDs (e.g. seed 42), failing intermittently. Move the exclusion check to the lexical-fallback path, where zero token overlap is deterministic and still guards _RECALL_MIN_RELEVANCE against being lowered to 0. Verified green across seeds 0/1/2/7/42/123/999. Also drop a placeholder (#) markdown link in the memory docs. --- docs/content/docs/memory.mdx | 2 +- tests/test_memory_tiers.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/content/docs/memory.mdx b/docs/content/docs/memory.mdx index b1703ae..b9195e8 100644 --- a/docs/content/docs/memory.mdx +++ b/docs/content/docs/memory.mdx @@ -141,7 +141,7 @@ Relevance-ranked injection is a small always-on top-k: great for the obvious fac - Recalling **revives** the matched rows: they're reinforced (`access_count` / `last_accessed`) and any archived one is un-archived — a fact the agent looked up and used is warm again. - **Scope-aware** (#42): recall is filtered to `scope IN ('', )` exactly like injection, so a persona only ever recalls shared facts plus its own private ones — never another persona's. The default identity recalls shared only. -The tool is read-only (no approval prompt) and available to every persona (it can't surface anything injection couldn't already show that persona), alongside the sqlite CLI the [memory skill](#) teaches for exact-field queries and bulk edits. +The tool is read-only (no approval prompt) and available to every persona (it can't surface anything injection couldn't already show that persona), alongside the sqlite CLI the memory skill teaches for exact-field queries and bulk edits. All of this is configurable live from the **admin Memory tab** (enable toggle, backend, model, top-k, plus Download / Test buttons). diff --git a/tests/test_memory_tiers.py b/tests/test_memory_tiers.py index 3c7739c..8895fe0 100644 --- a/tests/test_memory_tiers.py +++ b/tests/test_memory_tiers.py @@ -206,8 +206,6 @@ async def test_semantic_match_ranks_first(self, embed_store): assert out assert out[0]["content"] == "allergic to shellfish" - # The unrelated row is below the relevance floor and must be excluded. - assert all(m["content"] != "speaks turkish fluently" for m in out) async def test_searches_and_unarchives_archived_rows(self, embed_store): # An archived memory is invisible to injection but recall must still find @@ -239,6 +237,9 @@ async def test_lexical_fallback_without_embedder(self, store): await store._insert_long_term("fact", "simge", "speaks turkish") out = await store.recall("shellfish allergy") assert any("shellfish" in m["content"] for m in out) + # The relevance floor drops the zero-overlap row (deterministic on the + # lexical path — guards _RECALL_MIN_RELEVANCE against being lowered to 0). + assert all("turkish" not in m["content"] for m in out) async def test_scope_isolation(self, embed_store): # Recall must honour persona scope (#42): a persona sees shared + its own From ce1615707e1d98ca52656afd83def60f941124d3 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 23:22:33 +0200 Subject: [PATCH 5/5] feat(admin): expose recall_top_k in the Memory tab (#47) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sibling of injection_top_k: surfaces memory.embedding.recall_top_k via the config GET (emb_recall_top_k) and a number input next to the existing top-k field, capped at 25 (the recall hard limit) so a set value is never silently clamped. Saved through the same embedding-settings PATCH, which already hot-applies it to the running store — no restart. --- api/admin.py | 1 + api/templates/partials/memory.html | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/api/admin.py b/api/admin.py index 6232914..318be10 100644 --- a/api/admin.py +++ b/api/admin.py @@ -1300,6 +1300,7 @@ async def _bool(key: str, default: str) -> str: "emb_model": await _cfg("memory.embedding.model", "BAAI/bge-small-en-v1.5"), "emb_base_url": await _cfg("memory.embedding.base_url", ""), "emb_top_k": await _cfg("memory.embedding.injection_top_k", "12"), + "emb_recall_top_k": await _cfg("memory.embedding.recall_top_k", "10"), "hygiene_enabled": await _bool("memory.hygiene_enabled", "true"), "default_importance": await _cfg("memory.default_importance", "5.0"), "archive_after_days": await _cfg("memory.archive_after_days", "90"), diff --git a/api/templates/partials/memory.html b/api/templates/partials/memory.html index b46150a..d865be4 100644 --- a/api/templates/partials/memory.html +++ b/api/templates/partials/memory.html @@ -9,6 +9,7 @@ embModel: {{ emb_model|default('BAAI/bge-small-en-v1.5', true)|tojson|forceescape }}, embBaseUrl: {{ emb_base_url|default('', true)|tojson|forceescape }}, embTopK: {{ emb_top_k|default('12', true)|tojson|forceescape }}, + embRecallTopK: {{ emb_recall_top_k|default('10', true)|tojson|forceescape }}, embResult: '', embOk: false, embStatus: null, embBusy: false, embTestResult: '', // Lifecycle (Tier 3/4) @@ -151,6 +152,12 @@

Semantic memory (embeddings)

How many of the most relevant long-term memories to put in the prompt each message.

+
+ + +

Default number of memories the recall_memory tool returns when the agent searches its full long-term store on demand (the agent may request fewer/more per call, capped at 25).

+
+
Model on disk: yes ✓ @@ -165,7 +172,8 @@

Semantic memory (embeddings)

'memory.embedding.provider': embProvider, 'memory.embedding.model': embModel, 'memory.embedding.base_url': embBaseUrl, - 'memory.embedding.injection_top_k': String(embTopK) + 'memory.embedding.injection_top_k': String(embTopK), + 'memory.embedding.recall_top_k': String(embRecallTopK) }, 'Embedding settings saved', 'embOk', 'embResult').then(loadEmbStatus)"> Save embedding settings