mattmezza · mattmezza · Jun 28, 2026 · Jun 28, 2026 · Jun 28, 2026 · Jun 28, 2026
diff --git a/core/agent.py b/core/agent.py
@@ -501,10 +501,16 @@ async def process(
         else:
             persona = await self._resolve_persona(channel, user_id, chat_id)
 
-        # Per-turn preamble: live date/time + fresh memory/reflections + plan.
-        # Memory is scoped to the active persona (#42): shared + its private.
+        # Per-turn preamble: live date/time + fresh memory/reflections + skills
+        # index + plan. Memory is scoped to the active persona (#42): shared +
+        # its private. Skills index is scoped to the persona's allowlist (#46).
+        session_key = (channel, user_id, chat_id) if self.history_mode == "session" else None
         preamble = await self._turn_preamble(
-            decomposed_goal, query=message, scope=_persona_scope(persona)
+            decomposed_goal,
+            query=message,
+            scope=_persona_scope(persona),
+            persona=persona,
+            session_key=session_key,
         )
 
         tools = apply_feature_gates(
@@ -620,26 +626,57 @@ async def _turn_preamble(
         decomposed_goal: DecomposedGoal | None,
         query: str | None = None,
         scope: str = "",
+        persona: Persona | None = None,
+        session_key: tuple[str, str, str] | None = None,
     ) -> str:
         """Build the per-turn preamble prepended to the current user message.
 
         Always carries the live date/time (so the agent knows 'now' every turn);
-        also carries fresh, query-relevant memory + reflections and the
-        execution plan when the request was decomposed.
+        also carries fresh, query-relevant memory + reflections, the live skills
+        index, and the execution plan when the request was decomposed.
 
-        Memory/reflections live here, not in the static system prompt: in
+        Memory/reflections/skills live here, not in the static system prompt: in
         session mode that prompt is snapshotted once and would freeze any
-        mid-session extraction out of view until ``/new`` (#41). The preamble is
-        rebuilt every turn and rides on the new (uncached) user message, so it
-        costs only the block's own tokens and is also relevance-ranked per turn.
+        mid-session change out of view until ``/new`` (#41, #46) — e.g. a skill
+        added via the skill-creator stayed invisible. The preamble is rebuilt
+        every turn and rides on the new (uncached) user message, so it costs only
+        the block's own tokens and is also relevance-ranked per turn.
 
         ``scope`` is the active persona's memory scope (#42): ``""`` = shared
         only, ``"<persona>"`` = shared + that persona's private memory.
+        ``persona`` scopes the skills index to its allowlist. ``session_key``
+        gates skills re-injection (see below); ``None`` = always inject.
         """
         now = datetime.now(ZoneInfo(self.config.agent.timezone))
         stamp = now.strftime("%A, %B %d, %Y %H:%M %Z")
         preamble = f"[Current date & time: {stamp}]"
 
+        # Skills index, scoped to the persona's allowlist. Rebuilt fresh per turn
+        # so a skill added mid-session (e.g. via skill-creator) is immediately
+        # visible without a /new (#46). Cheap: a local DB read, like memory.
+        #
+        # In session mode (``session_key`` set) the preamble is persisted into the
+        # growing history, so re-sending an unchanged index every turn would just
+        # accumulate identical copies. We skip it only when the exact block is
+        # ALREADY present in the replayed history (so the model still sees it).
+        # Gating on the real history — not a side cache — keeps it correct by
+        # construction across /new, compaction, persona rebind and concurrent
+        # turns: any of those that drop or change the block simply won't find it,
+        # and the failure direction is a harmless re-send, never a blind turn.
+        # Injection mode and tests pass ``None`` → always include.
+        try:
+            skills_index = await self.skills.get_index_block(
+                allow=persona.skills if persona else None
+            )
+            if skills_index:
+                block = f"<available_skills>\n{skills_index}\n</available_skills>"
+                if session_key is None or not await self._skills_block_in_history(
+                    session_key, block
+                ):
+                    preamble += f"\n\n{block}"
+        except Exception:
+            log.exception("Failed to load skills index for turn preamble")
+
         # ponytail: in session mode this now runs a query embed + cosine scan +
         # reinforce-write every turn (was once per session). Intended — that is
         # what makes injection fresh and per-turn relevant — and cheap for a
@@ -671,6 +708,32 @@ async def _turn_preamble(
             )
         return preamble
 
+    async def _skills_block_in_history(self, session_key: tuple[str, str, str], block: str) -> bool:
+        """True if the exact ``<available_skills>`` block is already present in the
+        replayed session history — so the model still sees it and we needn't
+        re-send it this turn (#46 follow-up).
+
+        Reads the same message array that will be sent to the model, so the
+        decision is correct by construction: after a /new or compaction the block
+        is gone (→ re-send), a persona rebind or new skill changes the block (→
+        re-send), and concurrent turns that haven't yet persisted both re-send
+        (harmless). Cheap: a substring scan over the (compaction-bounded) history.
+        """
+        try:
+            messages = await self.history.get_session(*session_key)
+        except Exception:
+            return False  # safe direction: re-send rather than risk a blind turn
+        for m in messages:
+            content = m.get("content")
+            if isinstance(content, str):
+                if block in content:
+                    return True
+            elif isinstance(content, list):
+                for part in content:
+                    if isinstance(part, dict) and block in str(part.get("text", "")):
+                        return True
+        return False
+
     async def _session_system_prompt(
         self,
         channel: str,
@@ -1915,23 +1978,23 @@ async def _build_system_prompt(
         decomposed_goal: DecomposedGoal | None = None,
         persona: Persona | None = None,
     ) -> str:
-        skills_index = await self.skills.get_index_block(allow=persona.skills if persona else None)
-
-        # Memory + reflections are NOT baked into the static prompt: in session
-        # mode it is snapshotted once and would freeze stale (#41). They are
-        # injected fresh per turn in the preamble instead (see _turn_preamble),
-        # which also makes them query-relevant on every turn.
+        # Memory, reflections AND the skills index are NOT baked into the static
+        # prompt: in session mode it is snapshotted once and would freeze stale —
+        # a skill added mid-session stayed invisible until /new (#41, #46). All
+        # three are injected fresh per turn in the preamble instead (see
+        # _turn_preamble), which also makes memory query-relevant every turn.
         sections = build_prompt_sections(
             config=self.config,
             history_mode=self.history_mode,
-            skills_index=skills_index,
+            skills_index="",
             memories="",
             reflections="",
             decomposed_goal=decomposed_goal,
             persona=persona,
             secrets_available=self.secret_store is not None,
             include_memories=False,
             include_reflections=False,
+            include_skills=False,
         )
         return sections.full_prompt
 

diff --git a/core/prompt_builder.py b/core/prompt_builder.py
@@ -116,6 +116,7 @@ def build_prompt_sections(
     secrets_available: bool = False,
     include_memories: bool = True,
     include_reflections: bool = True,
+    include_skills: bool = True,
 ) -> PromptSections:
     """Build all prompt sections with current config and dynamic context.
 
@@ -193,7 +194,7 @@ def build_prompt_sections(
         memory_section = f"<memories>\n{memories}\n</memories>"
 
     skills_section = ""
-    if skills_index:
+    if include_skills and skills_index:
         skills_section = f"<available_skills>\n{skills_index}\n</available_skills>"
 
     reflections_section = ""

diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx
@@ -70,8 +70,8 @@ The agent becomes a **thin orchestrator**: it reads skill files, passes them to
 The brain of MPA. Implements the LLM tool-use loop:
 
 1. Load conversation history
-2. Build the static system prompt (skills, character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn
-3. Inject the live date/time, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees memories written mid-session, without mutating the cached prefix. (Memories live here, not in the snapshot, so a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.)
+2. Build the static system prompt (character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn
+3. Inject the live date/time, the skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) In session mode the skills index is skipped only when that exact block is already present in the replayed history (so the model still sees it); once a `/new` or compaction drops it, or a new/rebound skill changes it, it is re-sent — so unchanged turns with the block still in history cost nothing extra.
 4. Call the LLM
 5. Handle tool calls with permission checks
 6. Save conversation turn and extract memories
@@ -124,7 +124,8 @@ Channel handler (telegram.py / whatsapp.py)
     ▼
 Agent Core (agent.py)
     ├── Load history from SQLite
-    ├── Build system prompt (skills + character + personalia + memories)
+    ├── Build static system prompt (character + personalia + tools)
+    ├── Build per-turn preamble (date/time + skills index + memories + reflections)
     ├── Call LLM with tools
     │     │
     │     ▼

diff --git a/docs/content/docs/skills.mdx b/docs/content/docs/skills.mdx
@@ -10,7 +10,7 @@ Skills are the central design idea of MPA. Instead of hardcoding how each integr
 ## How it works
 
 1. Skill files are markdown documents stored in SQLite (seeded from `skills/` at startup)
-2. On each conversation turn, relevant skills are injected into the LLM's system prompt
+2. The skills index is injected at the start of the user message (not the snapshotted system prompt), so skills added mid-session are visible immediately. In session mode it is skipped when that exact block is still present in the replayed history, and re-sent once a `/new`/compaction drops it or a skill change alters it
 3. The LLM reads the skill documentation and constructs the correct CLI commands
 4. Commands are executed via the tool executor with permission checks
 
@@ -125,11 +125,12 @@ class SkillsEngine:
         return "\n\n".join(sections)
 ```
 
-Skills are wrapped in `<skill>` XML tags and injected into the `<available_skills>` block of the system prompt. The LLM uses these to construct correct CLI commands.
+Skills are advertised in the `<available_skills>` block, which is rebuilt fresh each turn and injected at the start of the user message (alongside the live date/time and memories), not baked into the snapshotted static prompt. So a skill created mid-session — e.g. via the skill-creator skill — is visible to the model on the very next turn without a `/new`. The LLM uses the index to pick a skill, then `load_skill` pulls its full content to construct correct CLI commands.
 
-## System prompt integration
+## How skills reach the model
 
-The system prompt combines skills with identity and memory:
+The **static** system prompt carries only identity and tools (snapshotted once per
+session for cache stability):
 
 ```
 You are {agent_name}, a personal AI assistant for {owner_name}.
@@ -141,19 +142,25 @@ You are {agent_name}, a personal AI assistant for {owner_name}.
 <character>
 {character.md content}
 </character>
+```
+
+The `<available_skills>` index — and `<memories>` — are assembled fresh **per turn**
+in the preamble at the head of the current user message, not in the static prompt
+(see [Architecture](/docs/architecture)). That is what lets a skill added mid-session
+show up on the next turn without a `/new`:
 
-<memories>
-{formatted memory context}
-</memories>
+```
+[Current date & time: ...]
 
 <available_skills>
-<skill name="himalaya-email">
-{skill content}
-</skill>
+- himalaya-email: send and read email over IMAP/SMTP
 ...
 </available_skills>
 ```
 
+The model picks a skill from the index, then calls `load_skill` to pull its full
+content.
+
 ## Tips for writing good skills
 
 - **Be specific** — include exact command syntax with flags

diff --git a/tests/test_tools.py b/tests/test_tools.py
@@ -201,6 +201,79 @@ async def test_mid_session_memory_visible_next_turn_without_new(agent) -> None:
     assert await agent._session_system_prompt("telegram", "u1", "") == snapshot
 
 
+@pytest.mark.asyncio
+async def test_mid_session_skill_visible_next_turn_without_new(agent) -> None:
+    """A skill added mid-session must reach the model on the next turn (#46).
+
+    The skills index rides the per-turn preamble, so a skill created mid-session
+    (e.g. via skill-creator) is advertised immediately — even though the static
+    session system prompt is snapshotted once and never rebuilt mid-session.
+    """
+    # Snapshot the static prompt: it must NOT carry the skills index at all.
+    snapshot = await agent._session_system_prompt("telegram", "u1", "")
+    assert "available_skills" not in snapshot
+
+    # A skill created after the snapshot (the staleness scenario from #46).
+    await agent.skills.store.upsert_skill(
+        "weather", "---\nname: weather\ndescription: fetch the forecast\n---\nbody"
+    )
+
+    # Next turn's preamble advertises it — no /new, no snapshot rebuild.
+    preamble = await agent._turn_preamble(None, query="what's the weather?")
+    assert "<available_skills>" in preamble
+    assert "weather" in preamble
+
+    # Snapshot is still the frozen one (cache intact, not rebuilt).
+    assert await agent._session_system_prompt("telegram", "u1", "") == snapshot
+
+
+@pytest.mark.asyncio
+async def test_skills_index_resent_only_when_changed(agent) -> None:
+    """In session mode the skills index rides the preamble only when it isn't
+    already in the replayed history (#46 follow-up): an unchanged index sits in
+    history from a prior turn, so re-sending it would just accumulate copies. The
+    gate reads the real history, so it stays correct across changes and clears.
+    """
+    ch, uid, cid = "telegram", "u1", ""
+    key = (ch, uid, cid)
+
+    async def persist(msg: dict) -> None:
+        # Mimic what _process_session does: the preamble-bearing user message is
+        # appended to the session, becoming visible to later turns.
+        await agent.history.append_session_message(ch, uid, msg, cid)
+
+    await agent.skills.store.upsert_skill("weather", "# weather\nfetch the forecast")
+
+    # Turn 1: index new for this session → included; persist it as a real turn would.
+    first = await agent._turn_preamble(None, query="hi", session_key=key)
+    assert "<available_skills>" in first and "weather" in first
+    await persist({"role": "user", "content": first})
+
+    # Turn 2, registry unchanged → omitted (the block is already in history).
+    second = await agent._turn_preamble(None, query="again", session_key=key)
+    assert "<available_skills>" not in second
+
+    # A new skill changes the index → re-sent (the old block no longer matches).
+    await agent.skills.store.upsert_skill("news", "# news\nread headlines")
+    third = await agent._turn_preamble(None, query="more", session_key=key)
+    assert "<available_skills>" in third and "news" in third
+    await persist({"role": "user", "content": third})
+
+    # Unchanged again → omitted.
+    fourth = await agent._turn_preamble(None, query="more", session_key=key)
+    assert "<available_skills>" not in fourth
+
+    # /new (or compaction) empties the history → the only copy is gone → re-sent.
+    await agent.history.clear_session(ch, uid, cid)
+    fifth = await agent._turn_preamble(None, query="fresh", session_key=key)
+    assert "<available_skills>" in fifth
+
+    # No session key (injection mode / tests) → always included, never gated.
+    a = await agent._turn_preamble(None, query="x")
+    b = await agent._turn_preamble(None, query="x")
+    assert "<available_skills>" in a and "<available_skills>" in b
+
+
 # ---------------------------------------------------------------------------
 # Per-action write state — one write's outcome must not block a different one
 # ---------------------------------------------------------------------------