Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions api/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1300,6 +1300,7 @@ async def _bool(key: str, default: str) -> str:
"emb_model": await _cfg("memory.embedding.model", "BAAI/bge-small-en-v1.5"),
"emb_base_url": await _cfg("memory.embedding.base_url", ""),
"emb_top_k": await _cfg("memory.embedding.injection_top_k", "12"),
"emb_recall_top_k": await _cfg("memory.embedding.recall_top_k", "10"),
"hygiene_enabled": await _bool("memory.hygiene_enabled", "true"),
"default_importance": await _cfg("memory.default_importance", "5.0"),
"archive_after_days": await _cfg("memory.archive_after_days", "90"),
Expand Down Expand Up @@ -1656,6 +1657,7 @@ async def patch_config(body: ConfigPatchIn) -> dict:
# Tier 3/4 lifecycle knobs so memory config changes apply live.
agent.memory.embedder = agent._build_embedder()
agent.memory.injection_top_k = mem_cfg.embedding.injection_top_k
agent.memory.recall_top_k = mem_cfg.embedding.recall_top_k
agent.memory.default_importance = mem_cfg.default_importance
agent.memory.archive_after_days = mem_cfg.archive_after_days
agent.memory.archive_max_importance = mem_cfg.archive_max_importance
Expand Down Expand Up @@ -3322,8 +3324,10 @@ def _config_requires_restart(values: dict) -> bool:

# Function-tools that a persona may scope. ``load_skill`` is intentionally
# excluded — it is always available (the core mechanic personae use to read
# their allowlisted skills). Kept here (not imported from core.agent) to avoid
# pulling the agent's heavy import graph into the admin app.
# their allowlisted skills); so are the vault tools and ``recall_memory``
# (memory is injected for every persona, scope-filtered, so its on-demand
# counterpart is always available too). Kept here (not imported from core.agent)
# to avoid pulling the agent's heavy import graph into the admin app.
GATEABLE_TOOLS = [
"run_command",
"send_email",
Expand Down
10 changes: 9 additions & 1 deletion api/templates/partials/memory.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
embModel: {{ emb_model|default('BAAI/bge-small-en-v1.5', true)|tojson|forceescape }},
embBaseUrl: {{ emb_base_url|default('', true)|tojson|forceescape }},
embTopK: {{ emb_top_k|default('12', true)|tojson|forceescape }},
embRecallTopK: {{ emb_recall_top_k|default('10', true)|tojson|forceescape }},
embResult: '', embOk: false,
embStatus: null, embBusy: false, embTestResult: '',
// Lifecycle (Tier 3/4)
Expand Down Expand Up @@ -151,6 +152,12 @@ <h2 class="text-base mb-1">Semantic memory (embeddings)</h2>
<p class="text-muted text-xs mt-1">How many of the most relevant long-term memories to put in the prompt each message.</p>
</div>

<div>
<label class="label">Recall results (recall_memory tool)</label>
<input type="number" class="input-sm" style="max-width:120px" x-model="embRecallTopK" :disabled="!embEnabled" min="1" max="25">
<p class="text-muted text-xs mt-1">Default number of memories the recall_memory tool returns when the agent searches its full long-term store on demand (the agent may request fewer/more per call, capped at 25).</p>
</div>

<div class="text-xs" x-show="isLocal && embStatus">
Model on disk:
<span x-show="embStatus && embStatus.model_ready" class="text-success">yes ✓</span>
Expand All @@ -165,7 +172,8 @@ <h2 class="text-base mb-1">Semantic memory (embeddings)</h2>
'memory.embedding.provider': embProvider,
'memory.embedding.model': embModel,
'memory.embedding.base_url': embBaseUrl,
'memory.embedding.injection_top_k': String(embTopK)
'memory.embedding.injection_top_k': String(embTopK),
'memory.embedding.recall_top_k': String(embRecallTopK)
}, 'Embedding settings saved', 'embOk', 'embResult').then(loadEmbStatus)">
Save embedding settings
</button>
Expand Down
55 changes: 54 additions & 1 deletion core/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,31 @@ def _shell_quote(s: str) -> str:
"required": ["name"],
},
},
{
"name": "recall_memory",
"description": (
"Search your FULL long-term memory by meaning for facts about the user that "
"aren't already shown to you. Only the few most-relevant memories are injected "
"into each turn; call this when you suspect a relevant stored fact exists beyond "
"them — it searches the whole store, including older archived memories, and ranks "
"matches by relevance. Pass a natural-language query describing the fact you're "
"after (e.g. 'dietary restrictions and food allergies'), not just keywords."
),
"input_schema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Natural-language description of the fact(s) to recall",
},
"limit": {
"type": "integer",
"description": "Max memories to return (default 10).",
},
},
"required": ["query"],
},
},
{
"name": "manage_jobs",
"description": (
Expand Down Expand Up @@ -374,7 +399,9 @@ def scoped_tools(persona: Persona | None) -> list[dict]:
return TOOLS
# ``load_skill`` and the vault discovery/request tools are always retained:
# they are the mechanics personae rely on to read skills and obtain secrets.
_always = {"load_skill", "list_secrets", "request_secret"}
# ``recall_memory`` too — memory is injected for every persona (scope-filtered),
# so its on-demand counterpart exposes nothing extra and stays available (#47).
_always = {"load_skill", "recall_memory", "list_secrets", "request_secret"}
return [t for t in TOOLS if persona.allows_tool(t["name"]) or t["name"] in _always]


Expand Down Expand Up @@ -419,6 +446,7 @@ def __init__(self, config: Config, secret_store: SecretStore | None = None):
long_term_limit=mem_cfg.long_term_limit,
embedder=self._build_embedder(),
injection_top_k=mem_cfg.embedding.injection_top_k,
recall_top_k=mem_cfg.embedding.recall_top_k,
default_importance=mem_cfg.default_importance,
archive_after_days=mem_cfg.archive_after_days,
archive_max_importance=mem_cfg.archive_max_importance,
Expand Down Expand Up @@ -1206,6 +1234,9 @@ async def _execute_tool(
return {"error": f"Skill not found: {skill_name}"}
return {"name": skill_name, "content": content}

if name == "recall_memory":
return await self._tool_recall_memory(params, request_state)

if name == "manage_jobs":
log.info("Tool call: manage_jobs — %s", params.get("action", ""))
result = await self._tool_manage_jobs(params)
Expand Down Expand Up @@ -1602,6 +1633,28 @@ async def _tool_manage_jobs(self, params: dict) -> dict:

return {"error": f"Unknown action: {action!r}. Use 'create', 'list', or 'cancel'."}

async def _tool_recall_memory(self, params: dict, request_state: dict | None = None) -> dict:
"""Deliberate semantic search over the full long-term memory store (#47).

Scoped to the active persona (#42): ``persona_name`` on the per-turn
request state is the persona's private memory scope (``""`` = the default
identity's shared-only view), so recall never crosses into another
persona's private memories — same boundary the injection readers enforce.
"""
query = str(params.get("query", "")).strip()
if not query:
return {"error": "Missing 'query'."}
limit = params.get("limit")
limit = limit if isinstance(limit, int) and not isinstance(limit, bool) else None
scope = (request_state or {}).get("persona_name") or ""
try:
memories = await self.memory.recall(query, limit, scope=scope)
except Exception:
log.exception("recall_memory failed for query: %s", query)
return {"error": "Memory recall failed."}
log.info("Tool call: recall_memory — %r (%d hits)", query, len(memories))
return {"query": query, "count": len(memories), "memories": memories}

async def _tool_web_search(self, params: dict) -> dict:
"""Search the web via Tavily API."""
if not self.search_client:
Expand Down
1 change: 1 addition & 0 deletions core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ class EmbeddingConfig(BaseModel):
base_url: str = "" # API providers only; falls back to the agent provider base URL when empty
dimensions: int = 0 # 0 = provider default (API providers only)
injection_top_k: int = 12 # relevance-ranked memories injected per turn
recall_top_k: int = 10 # max memories returned by the recall_memory tool (full-store lookup)


class MemoryConfig(BaseModel):
Expand Down
85 changes: 81 additions & 4 deletions core/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ def __init__(
*,
embedder: EmbeddingClient | None = None,
injection_top_k: int = 12,
recall_top_k: int = 10,
default_importance: float = 5.0,
archive_after_days: int = 90,
archive_max_importance: float = 4.0,
Expand All @@ -503,6 +504,7 @@ def __init__(
self.long_term_limit = long_term_limit
self.embedder = embedder
self.injection_top_k = injection_top_k
self.recall_top_k = recall_top_k
self.default_importance = default_importance
self.archive_after_days = archive_after_days
self.archive_max_importance = archive_max_importance
Expand Down Expand Up @@ -628,15 +630,90 @@ async def get_relevant_long_term(self, query: str, scope: str | None = None) ->
for r in top
]

async def _reinforce(self, ids: list[int]) -> None:
"""Strengthen recalled memories: bump access_count and last_accessed."""
# Upper bound on how many memories one recall_memory call may return, and the
# minimum relevance a row needs to be worth returning at all.
_RECALL_MAX_LIMIT = 25
# ponytail: relevance floor — raise to cut noise, lower to surface more long tail.
_RECALL_MIN_RELEVANCE = 0.1

async def recall(
self, query: str, limit: int | None = None, scope: str | None = None
) -> list[dict]:
"""Deliberate semantic search over the FULL long-term store (issue #47).

This is the agent's on-demand recall tool — the complement to the
always-injected top-k (:meth:`get_relevant_long_term`). Where injection
ranks only *non-archived* rows by a recency+importance+relevance blend
and is capped to a small per-turn budget, recall searches *every*
long-term memory (archived included), ranks purely by semantic relevance
to *query*, and returns the best matches above a small relevance floor.

Recalling reinforces the returned rows and un-archives any that had been
archived (the agent looked them up and they matched, so they are warm
again). Falls back to lexical token overlap when embeddings are
unavailable or the query can't be embedded, so recall always works.

``scope`` filters per #42 (see :func:`_scope_filter`) exactly like the
injection readers, so a persona only recalls shared + its own private
memories, never another persona's; ``None`` = every scope.
"""
query = (query or "").strip()
if not query:
return []
limit = self.recall_top_k if not limit or limit < 1 else limit
limit = min(limit, self._RECALL_MAX_LIMIT)

await self._ensure_schema()
clause, params = _scope_filter(scope)
async with aiosqlite.connect(self.db_path) as db:
db.row_factory = aiosqlite.Row
cursor = await db.execute(
"SELECT id, category, subject, content, embedding, archived " # noqa: S608
f"FROM long_term WHERE 1=1{clause}",
params,
)
rows = [dict(r) for r in await cursor.fetchall()]
if not rows:
return []

# Embedding cosine for rows with a matching-dim vector; lexical for the rest.
query_vec = await self._safe_embed(query)
rel_map = _batch_relevance(query_vec, rows)
query_tokens = _tokens(query)

scored: list[tuple[float, dict]] = []
for i, row in enumerate(rows):
if i in rel_map:
relevance = rel_map[i]
else:
relevance = _similarity(query_tokens, _tokens(f"{row['subject']} {row['content']}"))
if relevance >= self._RECALL_MIN_RELEVANCE:
scored.append((relevance, row))

scored.sort(key=lambda pair: pair[0], reverse=True)
top = [row for _, row in scored[:limit]]
# Recall revives matches: reinforce + un-archive. The returned rows are
# therefore all non-archived now, so no archived flag is surfaced.
await self._reinforce([row["id"] for row in top], unarchive=True)
return [
{"category": r["category"], "subject": r["subject"], "content": r["content"]}
for r in top
]

async def _reinforce(self, ids: list[int], *, unarchive: bool = False) -> None:
"""Strengthen recalled memories: bump access_count and last_accessed.

With *unarchive*, also clear the archived flag — a memory the agent
deliberately recalled and used is demonstrably warm again (issue #47).
"""
if not ids:
return
await self._ensure_schema()
archived_clause = ", archived = 0" if unarchive else ""
async with aiosqlite.connect(self.db_path) as db:
await db.executemany(
"UPDATE long_term SET access_count = access_count + 1, "
"last_accessed = datetime('now') WHERE id = ?",
"UPDATE long_term SET access_count = access_count + 1, " # noqa: S608
f"last_accessed = datetime('now'){archived_clause} WHERE id = ?",
[(i,) for i in ids],
)
await db.commit()
Expand Down
1 change: 1 addition & 0 deletions core/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ class PermissionLevel:
"run_command:git*push*": "ASK",
"run_command:git*commit*": "ASK",
"web_search": "ALWAYS",
"recall_memory": "ALWAYS",
# Write operations — ask first
"send_email": "ASK",
"reply_email": "ASK",
Expand Down
4 changes: 3 additions & 1 deletion core/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,9 @@ def build_prompt_sections(
memory_instruction = (
"You can store and recall memories using the sqlite3 CLI (see the memory skill).\n"
"Proactively remember important facts about the user and their contacts.\n"
"Before inserting a new long-term memory, check if it already exists to avoid duplicates."
"Before inserting a new long-term memory, check if it already exists to avoid duplicates.\n"
"Only your most relevant memories are shown each turn; when you suspect a stored fact "
"isn't among them, call the recall_memory tool to search your full memory by meaning."
)

history_handling = ""
Expand Down
13 changes: 13 additions & 0 deletions docs/content/docs/memory.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,21 @@ Set `memory.embedding.enabled: false` to fall back to **Tier-1 lexical** (word-o
### What embeddings power

- **Relevance-ranked injection.** Instead of dumping the most recent `long_term_limit` rows into the prompt, only the `injection_top_k` (default 12) memories most relevant to the *current message* are injected. They're scored Generative-Agents style: `relevance + 0.5·importance + 0.3·recency`. Injection happens in the **per-turn preamble** (prepended to the current user message), not the static system prompt — so even in session mode (where the static prompt is snapshotted once) the current message is the query every turn, and a fact written mid-session is visible on the next turn without `/new`.
- **On-demand recall** (the `recall_memory` tool, below).
- **Dedup.** `update_memory` retrieves ADD/UPDATE/DELETE/NOOP candidates by cosine similarity (with a per-row lexical fallback for any memory that has no vector yet).
- **Hygiene clustering** (Tier 4, below).

### `recall_memory` — deliberate full-store lookup

Relevance-ranked injection is a small always-on top-k: great for the obvious facts, but as the store grows it can't surface everything, and the agent *can't ask for a fact it doesn't suspect it has*. So injection is paired with a **`recall_memory` tool** the agent calls deliberately when it suspects a relevant stored fact isn't in the injected set (hybrid retrieval — always-on top-k **plus** on-demand recall).

- Searches the agent's **entire** long-term store — **including archived** memories that injection never sees — ranked purely by semantic relevance to the agent's query (lexical-overlap fallback when embeddings are off).
- Returns up to `recall_top_k` (default 10) matches above a small relevance floor.
- Recalling **revives** the matched rows: they're reinforced (`access_count` / `last_accessed`) and any archived one is un-archived — a fact the agent looked up and used is warm again.
- **Scope-aware** (#42): recall is filtered to `scope IN ('', <active persona>)` exactly like injection, so a persona only ever recalls shared facts plus its own private ones — never another persona's. The default identity recalls shared only.

The tool is read-only (no approval prompt) and available to every persona (it can't surface anything injection couldn't already show that persona), alongside the sqlite CLI the memory skill teaches for exact-field queries and bulk edits.

All of this is configurable live from the **admin Memory tab** (enable toggle, backend, model, top-k, plus Download / Test buttons).

## Tier 3 — Forgetting, importance & reinforcement
Expand Down Expand Up @@ -230,6 +242,7 @@ memory:
base_url: "" # API providers only; falls back to agent provider base URL
dimensions: 0 # 0 = provider default (API providers only)
injection_top_k: 12 # relevance-ranked memories injected per turn
recall_top_k: 10 # max memories returned by the recall_memory tool

# Tier 3 — forgetting / importance / reinforcement
default_importance: 5.0 # 1-10 scale assigned to new long-term memories
Expand Down
16 changes: 16 additions & 0 deletions skills/memory.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,22 @@ sqlite3 data/memory.db "UPDATE long_term SET content = 'New value', updated_at =

## Querying memories

### Semantic recall (full store, by meaning)

The sqlite queries below match on exact fields or `LIKE` substrings. To find a
fact by **meaning** across your *entire* long-term store — including older,
archived memories that aren't injected into the prompt — call the
`recall_memory` tool instead of writing SQL:

- Each turn only injects your few most-relevant memories. When you suspect a
stored fact exists beyond them (e.g. "didn't they mention a food allergy?"),
call `recall_memory` with a natural-language `query` describing the fact.
- It ranks the whole store by semantic similarity and returns the best matches.
Recalling a memory also revives it (un-archives + reinforces it).

Use `recall_memory` for fuzzy "do I know anything about X?" lookups; use the
sqlite queries below when you need exact filtering, counts, or bulk edits.

### Search by subject

```bash
Expand Down
Loading
Loading