diff --git a/core/agent.py b/core/agent.py index 63075a7..f287430 100644 --- a/core/agent.py +++ b/core/agent.py @@ -501,10 +501,16 @@ async def process( else: persona = await self._resolve_persona(channel, user_id, chat_id) - # Per-turn preamble: live date/time + fresh memory/reflections + plan. - # Memory is scoped to the active persona (#42): shared + its private. + # Per-turn preamble: live date/time + fresh memory/reflections + skills + # index + plan. Memory is scoped to the active persona (#42): shared + + # its private. Skills index is scoped to the persona's allowlist (#46). + session_key = (channel, user_id, chat_id) if self.history_mode == "session" else None preamble = await self._turn_preamble( - decomposed_goal, query=message, scope=_persona_scope(persona) + decomposed_goal, + query=message, + scope=_persona_scope(persona), + persona=persona, + session_key=session_key, ) tools = apply_feature_gates( @@ -620,26 +626,57 @@ async def _turn_preamble( decomposed_goal: DecomposedGoal | None, query: str | None = None, scope: str = "", + persona: Persona | None = None, + session_key: tuple[str, str, str] | None = None, ) -> str: """Build the per-turn preamble prepended to the current user message. Always carries the live date/time (so the agent knows 'now' every turn); - also carries fresh, query-relevant memory + reflections and the - execution plan when the request was decomposed. + also carries fresh, query-relevant memory + reflections, the live skills + index, and the execution plan when the request was decomposed. - Memory/reflections live here, not in the static system prompt: in + Memory/reflections/skills live here, not in the static system prompt: in session mode that prompt is snapshotted once and would freeze any - mid-session extraction out of view until ``/new`` (#41). The preamble is - rebuilt every turn and rides on the new (uncached) user message, so it - costs only the block's own tokens and is also relevance-ranked per turn. + mid-session change out of view until ``/new`` (#41, #46) — e.g. a skill + added via the skill-creator stayed invisible. The preamble is rebuilt + every turn and rides on the new (uncached) user message, so it costs only + the block's own tokens and is also relevance-ranked per turn. ``scope`` is the active persona's memory scope (#42): ``""`` = shared only, ``""`` = shared + that persona's private memory. + ``persona`` scopes the skills index to its allowlist. ``session_key`` + gates skills re-injection (see below); ``None`` = always inject. """ now = datetime.now(ZoneInfo(self.config.agent.timezone)) stamp = now.strftime("%A, %B %d, %Y %H:%M %Z") preamble = f"[Current date & time: {stamp}]" + # Skills index, scoped to the persona's allowlist. Rebuilt fresh per turn + # so a skill added mid-session (e.g. via skill-creator) is immediately + # visible without a /new (#46). Cheap: a local DB read, like memory. + # + # In session mode (``session_key`` set) the preamble is persisted into the + # growing history, so re-sending an unchanged index every turn would just + # accumulate identical copies. We skip it only when the exact block is + # ALREADY present in the replayed history (so the model still sees it). + # Gating on the real history — not a side cache — keeps it correct by + # construction across /new, compaction, persona rebind and concurrent + # turns: any of those that drop or change the block simply won't find it, + # and the failure direction is a harmless re-send, never a blind turn. + # Injection mode and tests pass ``None`` → always include. + try: + skills_index = await self.skills.get_index_block( + allow=persona.skills if persona else None + ) + if skills_index: + block = f"\n{skills_index}\n" + if session_key is None or not await self._skills_block_in_history( + session_key, block + ): + preamble += f"\n\n{block}" + except Exception: + log.exception("Failed to load skills index for turn preamble") + # ponytail: in session mode this now runs a query embed + cosine scan + # reinforce-write every turn (was once per session). Intended — that is # what makes injection fresh and per-turn relevant — and cheap for a @@ -671,6 +708,32 @@ async def _turn_preamble( ) return preamble + async def _skills_block_in_history(self, session_key: tuple[str, str, str], block: str) -> bool: + """True if the exact ```` block is already present in the + replayed session history — so the model still sees it and we needn't + re-send it this turn (#46 follow-up). + + Reads the same message array that will be sent to the model, so the + decision is correct by construction: after a /new or compaction the block + is gone (→ re-send), a persona rebind or new skill changes the block (→ + re-send), and concurrent turns that haven't yet persisted both re-send + (harmless). Cheap: a substring scan over the (compaction-bounded) history. + """ + try: + messages = await self.history.get_session(*session_key) + except Exception: + return False # safe direction: re-send rather than risk a blind turn + for m in messages: + content = m.get("content") + if isinstance(content, str): + if block in content: + return True + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and block in str(part.get("text", "")): + return True + return False + async def _session_system_prompt( self, channel: str, @@ -1915,16 +1978,15 @@ async def _build_system_prompt( decomposed_goal: DecomposedGoal | None = None, persona: Persona | None = None, ) -> str: - skills_index = await self.skills.get_index_block(allow=persona.skills if persona else None) - - # Memory + reflections are NOT baked into the static prompt: in session - # mode it is snapshotted once and would freeze stale (#41). They are - # injected fresh per turn in the preamble instead (see _turn_preamble), - # which also makes them query-relevant on every turn. + # Memory, reflections AND the skills index are NOT baked into the static + # prompt: in session mode it is snapshotted once and would freeze stale — + # a skill added mid-session stayed invisible until /new (#41, #46). All + # three are injected fresh per turn in the preamble instead (see + # _turn_preamble), which also makes memory query-relevant every turn. sections = build_prompt_sections( config=self.config, history_mode=self.history_mode, - skills_index=skills_index, + skills_index="", memories="", reflections="", decomposed_goal=decomposed_goal, @@ -1932,6 +1994,7 @@ async def _build_system_prompt( secrets_available=self.secret_store is not None, include_memories=False, include_reflections=False, + include_skills=False, ) return sections.full_prompt diff --git a/core/prompt_builder.py b/core/prompt_builder.py index a95aef0..9b3889f 100644 --- a/core/prompt_builder.py +++ b/core/prompt_builder.py @@ -116,6 +116,7 @@ def build_prompt_sections( secrets_available: bool = False, include_memories: bool = True, include_reflections: bool = True, + include_skills: bool = True, ) -> PromptSections: """Build all prompt sections with current config and dynamic context. @@ -193,7 +194,7 @@ def build_prompt_sections( memory_section = f"\n{memories}\n" skills_section = "" - if skills_index: + if include_skills and skills_index: skills_section = f"\n{skills_index}\n" reflections_section = "" diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx index c62f11b..9015251 100644 --- a/docs/content/docs/architecture.mdx +++ b/docs/content/docs/architecture.mdx @@ -70,8 +70,8 @@ The agent becomes a **thin orchestrator**: it reads skill files, passes them to The brain of MPA. Implements the LLM tool-use loop: 1. Load conversation history -2. Build the static system prompt (skills, character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn -3. Inject the live date/time, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees memories written mid-session, without mutating the cached prefix. (Memories live here, not in the snapshot, so a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) +2. Build the static system prompt (character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn +3. Inject the live date/time, the skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) In session mode the skills index is skipped only when that exact block is already present in the replayed history (so the model still sees it); once a `/new` or compaction drops it, or a new/rebound skill changes it, it is re-sent — so unchanged turns with the block still in history cost nothing extra. 4. Call the LLM 5. Handle tool calls with permission checks 6. Save conversation turn and extract memories @@ -124,7 +124,8 @@ Channel handler (telegram.py / whatsapp.py) ▼ Agent Core (agent.py) ├── Load history from SQLite - ├── Build system prompt (skills + character + personalia + memories) + ├── Build static system prompt (character + personalia + tools) + ├── Build per-turn preamble (date/time + skills index + memories + reflections) ├── Call LLM with tools │ │ │ ▼ diff --git a/docs/content/docs/skills.mdx b/docs/content/docs/skills.mdx index ac3a86e..95bc8bf 100644 --- a/docs/content/docs/skills.mdx +++ b/docs/content/docs/skills.mdx @@ -10,7 +10,7 @@ Skills are the central design idea of MPA. Instead of hardcoding how each integr ## How it works 1. Skill files are markdown documents stored in SQLite (seeded from `skills/` at startup) -2. On each conversation turn, relevant skills are injected into the LLM's system prompt +2. The skills index is injected at the start of the user message (not the snapshotted system prompt), so skills added mid-session are visible immediately. In session mode it is skipped when that exact block is still present in the replayed history, and re-sent once a `/new`/compaction drops it or a skill change alters it 3. The LLM reads the skill documentation and constructs the correct CLI commands 4. Commands are executed via the tool executor with permission checks @@ -125,11 +125,12 @@ class SkillsEngine: return "\n\n".join(sections) ``` -Skills are wrapped in `` XML tags and injected into the `` block of the system prompt. The LLM uses these to construct correct CLI commands. +Skills are advertised in the `` block, which is rebuilt fresh each turn and injected at the start of the user message (alongside the live date/time and memories), not baked into the snapshotted static prompt. So a skill created mid-session — e.g. via the skill-creator skill — is visible to the model on the very next turn without a `/new`. The LLM uses the index to pick a skill, then `load_skill` pulls its full content to construct correct CLI commands. -## System prompt integration +## How skills reach the model -The system prompt combines skills with identity and memory: +The **static** system prompt carries only identity and tools (snapshotted once per +session for cache stability): ``` You are {agent_name}, a personal AI assistant for {owner_name}. @@ -141,19 +142,25 @@ You are {agent_name}, a personal AI assistant for {owner_name}. {character.md content} +``` + +The `` index — and `` — are assembled fresh **per turn** +in the preamble at the head of the current user message, not in the static prompt +(see [Architecture](/docs/architecture)). That is what lets a skill added mid-session +show up on the next turn without a `/new`: - -{formatted memory context} - +``` +[Current date & time: ...] - -{skill content} - +- himalaya-email: send and read email over IMAP/SMTP ... ``` +The model picks a skill from the index, then calls `load_skill` to pull its full +content. + ## Tips for writing good skills - **Be specific** — include exact command syntax with flags diff --git a/tests/test_tools.py b/tests/test_tools.py index 7faee9e..af04af3 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -201,6 +201,79 @@ async def test_mid_session_memory_visible_next_turn_without_new(agent) -> None: assert await agent._session_system_prompt("telegram", "u1", "") == snapshot +@pytest.mark.asyncio +async def test_mid_session_skill_visible_next_turn_without_new(agent) -> None: + """A skill added mid-session must reach the model on the next turn (#46). + + The skills index rides the per-turn preamble, so a skill created mid-session + (e.g. via skill-creator) is advertised immediately — even though the static + session system prompt is snapshotted once and never rebuilt mid-session. + """ + # Snapshot the static prompt: it must NOT carry the skills index at all. + snapshot = await agent._session_system_prompt("telegram", "u1", "") + assert "available_skills" not in snapshot + + # A skill created after the snapshot (the staleness scenario from #46). + await agent.skills.store.upsert_skill( + "weather", "---\nname: weather\ndescription: fetch the forecast\n---\nbody" + ) + + # Next turn's preamble advertises it — no /new, no snapshot rebuild. + preamble = await agent._turn_preamble(None, query="what's the weather?") + assert "" in preamble + assert "weather" in preamble + + # Snapshot is still the frozen one (cache intact, not rebuilt). + assert await agent._session_system_prompt("telegram", "u1", "") == snapshot + + +@pytest.mark.asyncio +async def test_skills_index_resent_only_when_changed(agent) -> None: + """In session mode the skills index rides the preamble only when it isn't + already in the replayed history (#46 follow-up): an unchanged index sits in + history from a prior turn, so re-sending it would just accumulate copies. The + gate reads the real history, so it stays correct across changes and clears. + """ + ch, uid, cid = "telegram", "u1", "" + key = (ch, uid, cid) + + async def persist(msg: dict) -> None: + # Mimic what _process_session does: the preamble-bearing user message is + # appended to the session, becoming visible to later turns. + await agent.history.append_session_message(ch, uid, msg, cid) + + await agent.skills.store.upsert_skill("weather", "# weather\nfetch the forecast") + + # Turn 1: index new for this session → included; persist it as a real turn would. + first = await agent._turn_preamble(None, query="hi", session_key=key) + assert "" in first and "weather" in first + await persist({"role": "user", "content": first}) + + # Turn 2, registry unchanged → omitted (the block is already in history). + second = await agent._turn_preamble(None, query="again", session_key=key) + assert "" not in second + + # A new skill changes the index → re-sent (the old block no longer matches). + await agent.skills.store.upsert_skill("news", "# news\nread headlines") + third = await agent._turn_preamble(None, query="more", session_key=key) + assert "" in third and "news" in third + await persist({"role": "user", "content": third}) + + # Unchanged again → omitted. + fourth = await agent._turn_preamble(None, query="more", session_key=key) + assert "" not in fourth + + # /new (or compaction) empties the history → the only copy is gone → re-sent. + await agent.history.clear_session(ch, uid, cid) + fifth = await agent._turn_preamble(None, query="fresh", session_key=key) + assert "" in fifth + + # No session key (injection mode / tests) → always included, never gated. + a = await agent._turn_preamble(None, query="x") + b = await agent._turn_preamble(None, query="x") + assert "" in a and "" in b + + # --------------------------------------------------------------------------- # Per-action write state — one write's outcome must not block a different one # ---------------------------------------------------------------------------