From d4c537629af756d4417bf09e254d67cd2d2a667d Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 22:45:24 +0200 Subject: [PATCH 1/6] fix(agent): inject skills index per-turn, not in frozen snapshot (#46) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Session mode snapshots the static system prompt once per session so the cacheable prefix stays stable. The skills index was baked into that snapshot, so a skill added mid-session (e.g. via skill-creator) stayed invisible to the model until /new reset the snapshot — the same staleness bug #41 fixed for memories/reflections. Move the skills index into the per-turn preamble (already an uncached seam carrying live date/time + memory + reflections + plan), gated by a new include_skills flag on build_prompt_sections. The index is now rebuilt fresh every turn from the local skills DB (cheap, like memory), scoped to the active persona's allowlist. Cost is only the block's own tokens on the new turn, not a cache miss on the static prefix. --- core/agent.py | 48 ++++++++++++++++++++++++++++-------------- core/prompt_builder.py | 3 ++- tests/test_tools.py | 26 +++++++++++++++++++++++ 3 files changed, 60 insertions(+), 17 deletions(-) diff --git a/core/agent.py b/core/agent.py index 63075a7..daa5bca 100644 --- a/core/agent.py +++ b/core/agent.py @@ -501,10 +501,11 @@ async def process( else: persona = await self._resolve_persona(channel, user_id, chat_id) - # Per-turn preamble: live date/time + fresh memory/reflections + plan. - # Memory is scoped to the active persona (#42): shared + its private. + # Per-turn preamble: live date/time + fresh memory/reflections + skills + # index + plan. Memory is scoped to the active persona (#42): shared + + # its private. Skills index is scoped to the persona's allowlist (#46). preamble = await self._turn_preamble( - decomposed_goal, query=message, scope=_persona_scope(persona) + decomposed_goal, query=message, scope=_persona_scope(persona), persona=persona ) tools = apply_feature_gates( @@ -620,26 +621,41 @@ async def _turn_preamble( decomposed_goal: DecomposedGoal | None, query: str | None = None, scope: str = "", + persona: Persona | None = None, ) -> str: """Build the per-turn preamble prepended to the current user message. Always carries the live date/time (so the agent knows 'now' every turn); - also carries fresh, query-relevant memory + reflections and the - execution plan when the request was decomposed. + also carries fresh, query-relevant memory + reflections, the live skills + index, and the execution plan when the request was decomposed. - Memory/reflections live here, not in the static system prompt: in + Memory/reflections/skills live here, not in the static system prompt: in session mode that prompt is snapshotted once and would freeze any - mid-session extraction out of view until ``/new`` (#41). The preamble is - rebuilt every turn and rides on the new (uncached) user message, so it - costs only the block's own tokens and is also relevance-ranked per turn. + mid-session change out of view until ``/new`` (#41, #46) — e.g. a skill + added via the skill-creator stayed invisible. The preamble is rebuilt + every turn and rides on the new (uncached) user message, so it costs only + the block's own tokens and is also relevance-ranked per turn. ``scope`` is the active persona's memory scope (#42): ``""`` = shared only, ``""`` = shared + that persona's private memory. + ``persona`` scopes the skills index to its allowlist. """ now = datetime.now(ZoneInfo(self.config.agent.timezone)) stamp = now.strftime("%A, %B %d, %Y %H:%M %Z") preamble = f"[Current date & time: {stamp}]" + # Skills index, rebuilt fresh per turn so a skill added mid-session (e.g. + # via skill-creator) is immediately visible without a /new (#46). Cheap: + # a local DB read, like memory. Scoped to the persona's allowlist. + try: + skills_index = await self.skills.get_index_block( + allow=persona.skills if persona else None + ) + if skills_index: + preamble += f"\n\n\n{skills_index}\n" + except Exception: + log.exception("Failed to load skills index for turn preamble") + # ponytail: in session mode this now runs a query embed + cosine scan + # reinforce-write every turn (was once per session). Intended — that is # what makes injection fresh and per-turn relevant — and cheap for a @@ -1915,16 +1931,15 @@ async def _build_system_prompt( decomposed_goal: DecomposedGoal | None = None, persona: Persona | None = None, ) -> str: - skills_index = await self.skills.get_index_block(allow=persona.skills if persona else None) - - # Memory + reflections are NOT baked into the static prompt: in session - # mode it is snapshotted once and would freeze stale (#41). They are - # injected fresh per turn in the preamble instead (see _turn_preamble), - # which also makes them query-relevant on every turn. + # Memory, reflections AND the skills index are NOT baked into the static + # prompt: in session mode it is snapshotted once and would freeze stale — + # a skill added mid-session stayed invisible until /new (#41, #46). All + # three are injected fresh per turn in the preamble instead (see + # _turn_preamble), which also makes memory query-relevant every turn. sections = build_prompt_sections( config=self.config, history_mode=self.history_mode, - skills_index=skills_index, + skills_index="", memories="", reflections="", decomposed_goal=decomposed_goal, @@ -1932,6 +1947,7 @@ async def _build_system_prompt( secrets_available=self.secret_store is not None, include_memories=False, include_reflections=False, + include_skills=False, ) return sections.full_prompt diff --git a/core/prompt_builder.py b/core/prompt_builder.py index a95aef0..9b3889f 100644 --- a/core/prompt_builder.py +++ b/core/prompt_builder.py @@ -116,6 +116,7 @@ def build_prompt_sections( secrets_available: bool = False, include_memories: bool = True, include_reflections: bool = True, + include_skills: bool = True, ) -> PromptSections: """Build all prompt sections with current config and dynamic context. @@ -193,7 +194,7 @@ def build_prompt_sections( memory_section = f"\n{memories}\n" skills_section = "" - if skills_index: + if include_skills and skills_index: skills_section = f"\n{skills_index}\n" reflections_section = "" diff --git a/tests/test_tools.py b/tests/test_tools.py index 7faee9e..efd9802 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -201,6 +201,32 @@ async def test_mid_session_memory_visible_next_turn_without_new(agent) -> None: assert await agent._session_system_prompt("telegram", "u1", "") == snapshot +@pytest.mark.asyncio +async def test_mid_session_skill_visible_next_turn_without_new(agent) -> None: + """A skill added mid-session must reach the model on the next turn (#46). + + The skills index rides the per-turn preamble, so a skill created mid-session + (e.g. via skill-creator) is advertised immediately — even though the static + session system prompt is snapshotted once and never rebuilt mid-session. + """ + # Snapshot the static prompt: it must NOT carry the skills index at all. + snapshot = await agent._session_system_prompt("telegram", "u1", "") + assert "available_skills" not in snapshot + + # A skill created after the snapshot (the staleness scenario from #46). + await agent.skills.store.upsert_skill( + "weather", "---\nname: weather\ndescription: fetch the forecast\n---\nbody" + ) + + # Next turn's preamble advertises it — no /new, no snapshot rebuild. + preamble = await agent._turn_preamble(None, query="what's the weather?") + assert "" in preamble + assert "weather" in preamble + + # Snapshot is still the frozen one (cache intact, not rebuilt). + assert await agent._session_system_prompt("telegram", "u1", "") == snapshot + + # --------------------------------------------------------------------------- # Per-action write state — one write's outcome must not block a different one # --------------------------------------------------------------------------- From deb9b5c446f84d0bbf9b399088a6acf89b9bfa1c Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 22:46:05 +0200 Subject: [PATCH 2/6] docs: skills index injects per-turn, not in the snapshot (#46) --- docs/content/docs/architecture.mdx | 4 ++-- docs/content/docs/skills.mdx | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx index c62f11b..7781a3f 100644 --- a/docs/content/docs/architecture.mdx +++ b/docs/content/docs/architecture.mdx @@ -70,8 +70,8 @@ The agent becomes a **thin orchestrator**: it reads skill files, passes them to The brain of MPA. Implements the LLM tool-use loop: 1. Load conversation history -2. Build the static system prompt (skills, character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn -3. Inject the live date/time, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees memories written mid-session, without mutating the cached prefix. (Memories live here, not in the snapshot, so a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) +2. Build the static system prompt (character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn +3. Inject the live date/time, the fresh skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) 4. Call the LLM 5. Handle tool calls with permission checks 6. Save conversation turn and extract memories diff --git a/docs/content/docs/skills.mdx b/docs/content/docs/skills.mdx index ac3a86e..eca9135 100644 --- a/docs/content/docs/skills.mdx +++ b/docs/content/docs/skills.mdx @@ -125,7 +125,7 @@ class SkillsEngine: return "\n\n".join(sections) ``` -Skills are wrapped in `` XML tags and injected into the `` block of the system prompt. The LLM uses these to construct correct CLI commands. +Skills are advertised in the `` block, which is rebuilt fresh each turn and injected at the start of the user message (alongside the live date/time and memories), not baked into the snapshotted static prompt. So a skill created mid-session — e.g. via the skill-creator skill — is visible to the model on the very next turn without a `/new`. The LLM uses the index to pick a skill, then `load_skill` pulls its full content to construct correct CLI commands. ## System prompt integration From 81b5e52af611e7851b0bae889481c899e5c91bc7 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 22:52:37 +0200 Subject: [PATCH 3/6] docs: fix lingering skills-in-system-prompt claims (#46) Adversarial review caught three doc passages that still said the skills index lives in the system prompt, now contradicting the fix: the skills.mdx 'How it works' step, its 'System prompt integration' example, and the architecture.mdx flow diagram. Align all three with per-turn injection. --- docs/content/docs/architecture.mdx | 3 ++- docs/content/docs/skills.mdx | 25 ++++++++++++++++--------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx index 7781a3f..fb0a4bf 100644 --- a/docs/content/docs/architecture.mdx +++ b/docs/content/docs/architecture.mdx @@ -124,7 +124,8 @@ Channel handler (telegram.py / whatsapp.py) ▼ Agent Core (agent.py) ├── Load history from SQLite - ├── Build system prompt (skills + character + personalia + memories) + ├── Build static system prompt (character + personalia + tools) + ├── Build per-turn preamble (date/time + skills index + memories + reflections) ├── Call LLM with tools │ │ │ ▼ diff --git a/docs/content/docs/skills.mdx b/docs/content/docs/skills.mdx index eca9135..be25d38 100644 --- a/docs/content/docs/skills.mdx +++ b/docs/content/docs/skills.mdx @@ -10,7 +10,7 @@ Skills are the central design idea of MPA. Instead of hardcoding how each integr ## How it works 1. Skill files are markdown documents stored in SQLite (seeded from `skills/` at startup) -2. On each conversation turn, relevant skills are injected into the LLM's system prompt +2. On each conversation turn, the skills index is injected at the start of the user message (not the snapshotted system prompt), so skills added mid-session are visible immediately 3. The LLM reads the skill documentation and constructs the correct CLI commands 4. Commands are executed via the tool executor with permission checks @@ -127,9 +127,10 @@ class SkillsEngine: Skills are advertised in the `` block, which is rebuilt fresh each turn and injected at the start of the user message (alongside the live date/time and memories), not baked into the snapshotted static prompt. So a skill created mid-session — e.g. via the skill-creator skill — is visible to the model on the very next turn without a `/new`. The LLM uses the index to pick a skill, then `load_skill` pulls its full content to construct correct CLI commands. -## System prompt integration +## How skills reach the model -The system prompt combines skills with identity and memory: +The **static** system prompt carries only identity and tools (snapshotted once per +session for cache stability): ``` You are {agent_name}, a personal AI assistant for {owner_name}. @@ -141,19 +142,25 @@ You are {agent_name}, a personal AI assistant for {owner_name}. {character.md content} +``` + +The `` index — and `` — are assembled fresh **per turn** +in the preamble at the head of the current user message, not in the static prompt +(see [Architecture](/docs/architecture)). That is what lets a skill added mid-session +show up on the next turn without a `/new`: - -{formatted memory context} - +``` +[Current date & time: ...] - -{skill content} - +- himalaya-email: send and read email over IMAP/SMTP ... ``` +The model picks a skill from the index, then calls `load_skill` to pull its full +content. + ## Tips for writing good skills - **Be specific** — include exact command syntax with flags From 2afa3c179f026d588c161a630c85b55808ddfe2b Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 23:32:26 +0200 Subject: [PATCH 4/6] perf(agent): re-send skills index only when it changed (#46) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The per-turn skills index is cheap (summaries only) but, in session mode, the preamble is persisted into the growing history — so re-sending the index every turn accumulated one identical copy per turn. Gate re-injection on a per-session hash of the rendered index: send it on the first turn and whenever it changes (a skill added/edited/removed), skip it otherwise. Between changes the model still sees the prior copy in the replayed history, so freshness (#46) is preserved while unchanged turns cost nothing extra. The hash is cleared on /new and on compaction — both can drop the old copy from view — forcing a fresh re-send. Injection mode and tests pass no session key, so the index is always included there (stateless window, nothing to gate). --- core/agent.py | 41 +++++++++++++++++++++++++----- docs/content/docs/architecture.mdx | 2 +- docs/content/docs/skills.mdx | 2 +- tests/test_history.py | 3 +++ tests/test_tools.py | 37 +++++++++++++++++++++++++++ 5 files changed, 77 insertions(+), 8 deletions(-) diff --git a/core/agent.py b/core/agent.py index daa5bca..1aa1646 100644 --- a/core/agent.py +++ b/core/agent.py @@ -437,6 +437,12 @@ def __init__(self, config: Config, secret_store: SecretStore | None = None): config_db = "data/config.db" self.permissions = PermissionEngine(db_path=config_db) self.prompt_capture: deque[dict[str, str]] = deque(maxlen=20) + # Per-session hash of the last skills index sent in the preamble (#46 + # follow-up). In session mode the index is re-sent only when it changed + # since the previous turn; between changes the model still sees the prior + # copy in replayed history. Keyed by (channel, user_id, chat_id); cleared + # on /new and on compaction (both can drop the old copy from view). + self._last_skills_hash: dict[tuple[str, str, str], str] = {} # Vision fallback caption cache (image hash -> "[Image: ...]"), LRU-bounded. self._vision_cache: OrderedDict[str, str] = OrderedDict() @@ -478,6 +484,8 @@ async def process( await self.history.clear_session(channel, user_id, chat_id) else: await self.history.clear(channel, user_id, chat_id) + # Drop the skills-index hash so a fresh session re-sends it next turn. + self._last_skills_hash.pop((channel, user_id, chat_id), None) log.info( "Conversation cleared by user (channel=%s, user=%s, chat=%s)", channel, @@ -504,8 +512,13 @@ async def process( # Per-turn preamble: live date/time + fresh memory/reflections + skills # index + plan. Memory is scoped to the active persona (#42): shared + # its private. Skills index is scoped to the persona's allowlist (#46). + session_key = (channel, user_id, chat_id) if self.history_mode == "session" else None preamble = await self._turn_preamble( - decomposed_goal, query=message, scope=_persona_scope(persona), persona=persona + decomposed_goal, + query=message, + scope=_persona_scope(persona), + persona=persona, + session_key=session_key, ) tools = apply_feature_gates( @@ -622,6 +635,7 @@ async def _turn_preamble( query: str | None = None, scope: str = "", persona: Persona | None = None, + session_key: tuple[str, str, str] | None = None, ) -> str: """Build the per-turn preamble prepended to the current user message. @@ -638,21 +652,33 @@ async def _turn_preamble( ``scope`` is the active persona's memory scope (#42): ``""`` = shared only, ``""`` = shared + that persona's private memory. - ``persona`` scopes the skills index to its allowlist. + ``persona`` scopes the skills index to its allowlist. ``session_key`` + gates skills re-injection (see below); ``None`` = always inject. """ now = datetime.now(ZoneInfo(self.config.agent.timezone)) stamp = now.strftime("%A, %B %d, %Y %H:%M %Z") preamble = f"[Current date & time: {stamp}]" - # Skills index, rebuilt fresh per turn so a skill added mid-session (e.g. - # via skill-creator) is immediately visible without a /new (#46). Cheap: - # a local DB read, like memory. Scoped to the persona's allowlist. + # Skills index, scoped to the persona's allowlist. Rebuilt fresh per turn + # so a skill added mid-session (e.g. via skill-creator) is immediately + # visible without a /new (#46). Cheap: a local DB read, like memory. + # + # In session mode (``session_key`` set) we re-send the block only when it + # changed since the previous turn; between changes the model still sees + # the prior copy in the replayed history, so re-sending every turn would + # just accumulate identical copies. The hash is cleared on /new and on + # compaction (which can drop the old copy from view). Injection mode and + # tests pass ``None`` → always include (stateless window, nothing to gate). try: skills_index = await self.skills.get_index_block( allow=persona.skills if persona else None ) if skills_index: - preamble += f"\n\n\n{skills_index}\n" + digest = hashlib.sha256(skills_index.encode("utf-8")).hexdigest() + if session_key is None or self._last_skills_hash.get(session_key) != digest: + preamble += f"\n\n\n{skills_index}\n" + if session_key is not None: + self._last_skills_hash[session_key] = digest except Exception: log.exception("Failed to load skills index for turn preamble") @@ -736,6 +762,9 @@ async def _maybe_compact( new_messages, _summary = result await self.history.replace_session(channel, user_id, new_messages, chat_id) + # Compaction can summarize away the turn that carried the skills index, so + # drop the hash to force a fresh re-send on the next turn (#46 follow-up). + self._last_skills_hash.pop((channel, user_id, chat_id), None) log.info( "Compacted session %s/%s/%s: %d → %d messages (~%d ctx tokens)", channel, diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx index fb0a4bf..8c596fb 100644 --- a/docs/content/docs/architecture.mdx +++ b/docs/content/docs/architecture.mdx @@ -71,7 +71,7 @@ The brain of MPA. Implements the LLM tool-use loop: 1. Load conversation history 2. Build the static system prompt (character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn -3. Inject the live date/time, the fresh skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) +3. Inject the live date/time, the skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) In session mode the skills index is re-sent only when it changed since the previous turn — between changes the model still sees the prior copy in the replayed history — so unchanged turns cost nothing extra. 4. Call the LLM 5. Handle tool calls with permission checks 6. Save conversation turn and extract memories diff --git a/docs/content/docs/skills.mdx b/docs/content/docs/skills.mdx index be25d38..1c9d6fb 100644 --- a/docs/content/docs/skills.mdx +++ b/docs/content/docs/skills.mdx @@ -10,7 +10,7 @@ Skills are the central design idea of MPA. Instead of hardcoding how each integr ## How it works 1. Skill files are markdown documents stored in SQLite (seeded from `skills/` at startup) -2. On each conversation turn, the skills index is injected at the start of the user message (not the snapshotted system prompt), so skills added mid-session are visible immediately +2. The skills index is injected at the start of the user message (not the snapshotted system prompt), so skills added mid-session are visible immediately. In session mode it is re-sent only when the registry changed since the previous turn (between changes the model still sees the prior copy in history) 3. The LLM reads the skill documentation and constructs the correct CLI commands 4. Commands are executed via the tool executor with permission checks diff --git a/tests/test_history.py b/tests/test_history.py index 1f31392..e24f641 100644 --- a/tests/test_history.py +++ b/tests/test_history.py @@ -333,6 +333,7 @@ async def test_new_command_clears_injection_history(tmp_path) -> None: db_path=config.history.db_path, max_turns=config.history.max_turns ) agent.history_mode = "injection" + agent._last_skills_hash = {} # Seed some history await agent.history.add_turn("telegram", "u1", "user", "hello") @@ -365,6 +366,7 @@ async def test_new_command_clears_session(tmp_path) -> None: db_path=config.history.db_path, max_turns=config.history.max_turns ) agent.history_mode = "session" + agent._last_skills_hash = {} # Seed a session await agent.history.append_session_message( @@ -399,6 +401,7 @@ async def test_new_command_case_insensitive(tmp_path) -> None: db_path=config.history.db_path, max_turns=config.history.max_turns ) agent.history_mode = "injection" + agent._last_skills_hash = {} await agent.history.add_turn("telegram", "u1", "user", "hello") await agent.history.add_turn("telegram", "u1", "assistant", "hi") diff --git a/tests/test_tools.py b/tests/test_tools.py index efd9802..cbeca3f 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -227,6 +227,43 @@ async def test_mid_session_skill_visible_next_turn_without_new(agent) -> None: assert await agent._session_system_prompt("telegram", "u1", "") == snapshot +@pytest.mark.asyncio +async def test_skills_index_resent_only_when_changed(agent) -> None: + """In session mode the skills index rides the preamble only when it changed + since the previous turn (#46 follow-up): between changes the model still sees + the prior copy in replayed history, so we don't re-send identical copies. + """ + key = ("telegram", "u1", "") + await agent.skills.store.upsert_skill("weather", "# weather\nfetch the forecast") + + # First turn: index is new for this session → included. + first = await agent._turn_preamble(None, query="hi", session_key=key) + assert "" in first and "weather" in first + + # Second turn, registry unchanged → index omitted (already in history). + second = await agent._turn_preamble(None, query="hi again", session_key=key) + assert "" not in second + + # A new skill changes the index → re-sent. + await agent.skills.store.upsert_skill("news", "# news\nread headlines") + third = await agent._turn_preamble(None, query="more", session_key=key) + assert "" in third and "news" in third + + # Still unchanged again → omitted. + fourth = await agent._turn_preamble(None, query="more", session_key=key) + assert "" not in fourth + + # Invalidation (e.g. /new or compaction clears the hash) → re-sent. + agent._last_skills_hash.pop(key, None) + fifth = await agent._turn_preamble(None, query="fresh", session_key=key) + assert "" in fifth + + # No session key (injection mode / tests) → always included, never gated. + a = await agent._turn_preamble(None, query="x") + b = await agent._turn_preamble(None, query="x") + assert "" in a and "" in b + + # --------------------------------------------------------------------------- # Per-action write state — one write's outcome must not block a different one # --------------------------------------------------------------------------- From 8bf6e15746cf7f40938905b9f63cad926c1c7ae7 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 23:45:24 +0200 Subject: [PATCH 5/6] fix(agent): gate skills re-send on real history, not a side hash (#46) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adversarial review of the previous commit's per-session hash gate found two real ways it could leave the model blind to skills: - the REPL's own /clear handler clears history directly, bypassing the hash invalidation in process_message, so the stale hash suppressed the index with no copy left in history; - a concurrent same-chat turn could read the hash set by an in-flight turn before that turn's index-bearing message was persisted, then skip with no visible copy. Both stem from a side cache that must stay in lockstep with every history mutation AND with message persistence. Drop it. Gate instead on ground truth: skip the block only when that exact block is already in the replayed session history. Correct by construction — /new, compaction, persona rebind and concurrent turns that drop or change the block simply won't find it, and the failure direction is a harmless re-send, never a blind turn. Removes the dict, both invalidation hooks and the fixture plumbing; the test now drives the real clear path. --- core/agent.py | 62 ++++++++++++++++++++++++++++--------------- tests/test_history.py | 3 --- tests/test_tools.py | 32 ++++++++++++++-------- 3 files changed, 61 insertions(+), 36 deletions(-) diff --git a/core/agent.py b/core/agent.py index 1aa1646..f287430 100644 --- a/core/agent.py +++ b/core/agent.py @@ -437,12 +437,6 @@ def __init__(self, config: Config, secret_store: SecretStore | None = None): config_db = "data/config.db" self.permissions = PermissionEngine(db_path=config_db) self.prompt_capture: deque[dict[str, str]] = deque(maxlen=20) - # Per-session hash of the last skills index sent in the preamble (#46 - # follow-up). In session mode the index is re-sent only when it changed - # since the previous turn; between changes the model still sees the prior - # copy in replayed history. Keyed by (channel, user_id, chat_id); cleared - # on /new and on compaction (both can drop the old copy from view). - self._last_skills_hash: dict[tuple[str, str, str], str] = {} # Vision fallback caption cache (image hash -> "[Image: ...]"), LRU-bounded. self._vision_cache: OrderedDict[str, str] = OrderedDict() @@ -484,8 +478,6 @@ async def process( await self.history.clear_session(channel, user_id, chat_id) else: await self.history.clear(channel, user_id, chat_id) - # Drop the skills-index hash so a fresh session re-sends it next turn. - self._last_skills_hash.pop((channel, user_id, chat_id), None) log.info( "Conversation cleared by user (channel=%s, user=%s, chat=%s)", channel, @@ -663,22 +655,25 @@ async def _turn_preamble( # so a skill added mid-session (e.g. via skill-creator) is immediately # visible without a /new (#46). Cheap: a local DB read, like memory. # - # In session mode (``session_key`` set) we re-send the block only when it - # changed since the previous turn; between changes the model still sees - # the prior copy in the replayed history, so re-sending every turn would - # just accumulate identical copies. The hash is cleared on /new and on - # compaction (which can drop the old copy from view). Injection mode and - # tests pass ``None`` → always include (stateless window, nothing to gate). + # In session mode (``session_key`` set) the preamble is persisted into the + # growing history, so re-sending an unchanged index every turn would just + # accumulate identical copies. We skip it only when the exact block is + # ALREADY present in the replayed history (so the model still sees it). + # Gating on the real history — not a side cache — keeps it correct by + # construction across /new, compaction, persona rebind and concurrent + # turns: any of those that drop or change the block simply won't find it, + # and the failure direction is a harmless re-send, never a blind turn. + # Injection mode and tests pass ``None`` → always include. try: skills_index = await self.skills.get_index_block( allow=persona.skills if persona else None ) if skills_index: - digest = hashlib.sha256(skills_index.encode("utf-8")).hexdigest() - if session_key is None or self._last_skills_hash.get(session_key) != digest: - preamble += f"\n\n\n{skills_index}\n" - if session_key is not None: - self._last_skills_hash[session_key] = digest + block = f"\n{skills_index}\n" + if session_key is None or not await self._skills_block_in_history( + session_key, block + ): + preamble += f"\n\n{block}" except Exception: log.exception("Failed to load skills index for turn preamble") @@ -713,6 +708,32 @@ async def _turn_preamble( ) return preamble + async def _skills_block_in_history(self, session_key: tuple[str, str, str], block: str) -> bool: + """True if the exact ```` block is already present in the + replayed session history — so the model still sees it and we needn't + re-send it this turn (#46 follow-up). + + Reads the same message array that will be sent to the model, so the + decision is correct by construction: after a /new or compaction the block + is gone (→ re-send), a persona rebind or new skill changes the block (→ + re-send), and concurrent turns that haven't yet persisted both re-send + (harmless). Cheap: a substring scan over the (compaction-bounded) history. + """ + try: + messages = await self.history.get_session(*session_key) + except Exception: + return False # safe direction: re-send rather than risk a blind turn + for m in messages: + content = m.get("content") + if isinstance(content, str): + if block in content: + return True + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and block in str(part.get("text", "")): + return True + return False + async def _session_system_prompt( self, channel: str, @@ -762,9 +783,6 @@ async def _maybe_compact( new_messages, _summary = result await self.history.replace_session(channel, user_id, new_messages, chat_id) - # Compaction can summarize away the turn that carried the skills index, so - # drop the hash to force a fresh re-send on the next turn (#46 follow-up). - self._last_skills_hash.pop((channel, user_id, chat_id), None) log.info( "Compacted session %s/%s/%s: %d → %d messages (~%d ctx tokens)", channel, diff --git a/tests/test_history.py b/tests/test_history.py index e24f641..1f31392 100644 --- a/tests/test_history.py +++ b/tests/test_history.py @@ -333,7 +333,6 @@ async def test_new_command_clears_injection_history(tmp_path) -> None: db_path=config.history.db_path, max_turns=config.history.max_turns ) agent.history_mode = "injection" - agent._last_skills_hash = {} # Seed some history await agent.history.add_turn("telegram", "u1", "user", "hello") @@ -366,7 +365,6 @@ async def test_new_command_clears_session(tmp_path) -> None: db_path=config.history.db_path, max_turns=config.history.max_turns ) agent.history_mode = "session" - agent._last_skills_hash = {} # Seed a session await agent.history.append_session_message( @@ -401,7 +399,6 @@ async def test_new_command_case_insensitive(tmp_path) -> None: db_path=config.history.db_path, max_turns=config.history.max_turns ) agent.history_mode = "injection" - agent._last_skills_hash = {} await agent.history.add_turn("telegram", "u1", "user", "hello") await agent.history.add_turn("telegram", "u1", "assistant", "hi") diff --git a/tests/test_tools.py b/tests/test_tools.py index cbeca3f..af04af3 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -229,32 +229,42 @@ async def test_mid_session_skill_visible_next_turn_without_new(agent) -> None: @pytest.mark.asyncio async def test_skills_index_resent_only_when_changed(agent) -> None: - """In session mode the skills index rides the preamble only when it changed - since the previous turn (#46 follow-up): between changes the model still sees - the prior copy in replayed history, so we don't re-send identical copies. + """In session mode the skills index rides the preamble only when it isn't + already in the replayed history (#46 follow-up): an unchanged index sits in + history from a prior turn, so re-sending it would just accumulate copies. The + gate reads the real history, so it stays correct across changes and clears. """ - key = ("telegram", "u1", "") + ch, uid, cid = "telegram", "u1", "" + key = (ch, uid, cid) + + async def persist(msg: dict) -> None: + # Mimic what _process_session does: the preamble-bearing user message is + # appended to the session, becoming visible to later turns. + await agent.history.append_session_message(ch, uid, msg, cid) + await agent.skills.store.upsert_skill("weather", "# weather\nfetch the forecast") - # First turn: index is new for this session → included. + # Turn 1: index new for this session → included; persist it as a real turn would. first = await agent._turn_preamble(None, query="hi", session_key=key) assert "" in first and "weather" in first + await persist({"role": "user", "content": first}) - # Second turn, registry unchanged → index omitted (already in history). - second = await agent._turn_preamble(None, query="hi again", session_key=key) + # Turn 2, registry unchanged → omitted (the block is already in history). + second = await agent._turn_preamble(None, query="again", session_key=key) assert "" not in second - # A new skill changes the index → re-sent. + # A new skill changes the index → re-sent (the old block no longer matches). await agent.skills.store.upsert_skill("news", "# news\nread headlines") third = await agent._turn_preamble(None, query="more", session_key=key) assert "" in third and "news" in third + await persist({"role": "user", "content": third}) - # Still unchanged again → omitted. + # Unchanged again → omitted. fourth = await agent._turn_preamble(None, query="more", session_key=key) assert "" not in fourth - # Invalidation (e.g. /new or compaction clears the hash) → re-sent. - agent._last_skills_hash.pop(key, None) + # /new (or compaction) empties the history → the only copy is gone → re-sent. + await agent.history.clear_session(ch, uid, cid) fifth = await agent._turn_preamble(None, query="fresh", session_key=key) assert "" in fifth From 057437932fa3891277540914cead6efa22d21f09 Mon Sep 17 00:00:00 2001 From: Matteo Merola Date: Sun, 28 Jun 2026 23:56:40 +0200 Subject: [PATCH 6/6] docs: describe history-based skills gate accurately (#46) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gate skips the index when the exact block is still in replayed history and re-sends after /new, compaction, or a skill change — not the earlier 'changed since the previous turn' hash framing. --- docs/content/docs/architecture.mdx | 2 +- docs/content/docs/skills.mdx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx index 8c596fb..9015251 100644 --- a/docs/content/docs/architecture.mdx +++ b/docs/content/docs/architecture.mdx @@ -71,7 +71,7 @@ The brain of MPA. Implements the LLM tool-use loop: 1. Load conversation history 2. Build the static system prompt (character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn -3. Inject the live date/time, the skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) In session mode the skills index is re-sent only when it changed since the previous turn — between changes the model still sees the prior copy in the replayed history — so unchanged turns cost nothing extra. +3. Inject the live date/time, the skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) In session mode the skills index is skipped only when that exact block is already present in the replayed history (so the model still sees it); once a `/new` or compaction drops it, or a new/rebound skill changes it, it is re-sent — so unchanged turns with the block still in history cost nothing extra. 4. Call the LLM 5. Handle tool calls with permission checks 6. Save conversation turn and extract memories diff --git a/docs/content/docs/skills.mdx b/docs/content/docs/skills.mdx index 1c9d6fb..95bc8bf 100644 --- a/docs/content/docs/skills.mdx +++ b/docs/content/docs/skills.mdx @@ -10,7 +10,7 @@ Skills are the central design idea of MPA. Instead of hardcoding how each integr ## How it works 1. Skill files are markdown documents stored in SQLite (seeded from `skills/` at startup) -2. The skills index is injected at the start of the user message (not the snapshotted system prompt), so skills added mid-session are visible immediately. In session mode it is re-sent only when the registry changed since the previous turn (between changes the model still sees the prior copy in history) +2. The skills index is injected at the start of the user message (not the snapshotted system prompt), so skills added mid-session are visible immediately. In session mode it is skipped when that exact block is still present in the replayed history, and re-sent once a `/new`/compaction drops it or a skill change alters it 3. The LLM reads the skill documentation and constructs the correct CLI commands 4. Commands are executed via the tool executor with permission checks