mattmezza · mattmezza · Jun 29, 2026 · Jun 28, 2026 · Jun 28, 2026 · Jun 28, 2026
diff --git a/api/admin.py b/api/admin.py
@@ -1848,6 +1848,7 @@ async def system_prompt_preview(body: PromptPreviewIn) -> dict:
             secrets_available=secret_store is not None,
             include_memories=body.include_memories,
             include_reflections=body.include_reflections,
+            skills_on_demand=config.agent.skills_index_mode == "on_demand",
         )
         full_prompt = sections.full_prompt
         section_map = sections.as_dict()
@@ -3412,7 +3413,8 @@ def _config_requires_restart(values: dict) -> bool:
 
 # Function-tools that a persona may scope. ``load_skill`` is intentionally
 # excluded — it is always available (the core mechanic personae use to read
-# their allowlisted skills); so are the vault tools and ``recall_memory``
+# their allowlisted skills); so are ``search_skills``/``list_skills`` (its
+# on-demand discovery counterparts — #50), the vault tools, and ``recall_memory``
 # (memory is injected for every persona, scope-filtered, so its on-demand
 # counterpart is always available too). Kept here (not imported from core.agent)
 # to avoid pulling the agent's heavy import graph into the admin app.

diff --git a/core/agent.py b/core/agent.py
@@ -28,7 +28,7 @@
 from core.models import AgentResponse, Attachment
 from core.permissions import PermissionEngine, PermissionLevel, format_approval_message
 from core.personae import Persona, PersonaStore
-from core.prompt_builder import build_prompt_sections
+from core.prompt_builder import SKILLS_DISCOVERY_POINTER, build_prompt_sections
 from core.scheduler import AgentScheduler
 from core.secret_store import SecretStore
 from core.skills import SkillsEngine
@@ -198,6 +198,41 @@ def _shell_quote(s: str) -> str:
             "required": ["name"],
         },
     },
+    # Skill discovery (#50) — only advertised when skills_index_mode == "on_demand"
+    # (the full index is NOT injected then). Return name + summary, never bodies;
+    # the model then calls load_skill to read the chosen skill in full.
+    {
+        "name": "search_skills",
+        "description": (
+            "Find skills relevant to the current task. Returns the top matching skills "
+            "as name + summary (NOT their full content). Pass a short natural-language "
+            "query or keywords describing what you need to do, then call `load_skill` "
+            "with a returned name to read that skill's full instructions."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "What you want to do (keywords or a short phrase)",
+                },
+                "limit": {
+                    "type": "integer",
+                    "description": "Max skills to return (default 10).",
+                },
+            },
+            "required": ["query"],
+        },
+    },
+    {
+        "name": "list_skills",
+        "description": (
+            "List every skill available to you as name + summary (NOT full content). "
+            "Use this to browse the whole catalogue; prefer `search_skills` when you "
+            "know what you're after. Call `load_skill` with a name to read one in full."
+        ),
+        "input_schema": {"type": "object", "properties": {}},
+    },
     {
         "name": "recall_memory",
         "description": (
@@ -493,9 +528,19 @@ def scoped_tools(persona: Persona | None) -> list[dict]:
         return TOOLS
     # ``load_skill`` and the vault discovery/request tools are always retained:
     # they are the mechanics personae rely on to read skills and obtain secrets.
-    # ``recall_memory`` too — memory is injected for every persona (scope-filtered),
-    # so its on-demand counterpart exposes nothing extra and stays available (#47).
-    _always = {"load_skill", "recall_memory", "list_secrets", "request_secret"}
+    # ``search_skills``/``list_skills`` mirror ``load_skill`` (a persona needs them
+    # to discover its own allowlisted skills in on-demand mode — #50); the feature
+    # gate below still drops them when that mode is off. ``recall_memory`` too —
+    # memory is injected for every persona (scope-filtered), so its on-demand
+    # counterpart exposes nothing extra and stays available (#47).
+    _always = {
+        "load_skill",
+        "search_skills",
+        "list_skills",
+        "recall_memory",
+        "list_secrets",
+        "request_secret",
+    }
     return [t for t in TOOLS if persona.allows_tool(t["name"]) or t["name"] in _always]
 
 
@@ -504,16 +549,21 @@ def apply_feature_gates(
     *,
     secrets_available: bool,
     artifacts_enabled: bool,
+    skills_on_demand: bool = False,
     subagents_enabled: bool = True,
 ) -> list[dict]:
     """Drop tools whose backing feature is unavailable/disabled, so the model is
     never offered a capability it can't use (defence in depth — the tool handlers
-    also refuse). Disabling ``artifacts`` here means no persona can call it."""
+    also refuse). Disabling ``artifacts`` here means no persona can call it. The
+    skill-discovery tools are offered only in on-demand index mode (#50); in the
+    default inject mode the full index is already in context, so they'd be noise."""
     out = tools
     if not secrets_available:
         out = [t for t in out if t["name"] not in ("list_secrets", "request_secret")]
     if not artifacts_enabled:
         out = [t for t in out if t["name"] != "write_artifact"]
+    if not skills_on_demand:
+        out = [t for t in out if t["name"] not in ("search_skills", "list_skills")]
     if not subagents_enabled:
         out = [t for t in out if t["name"] != "spawn_subagent"]
     return out
@@ -651,12 +701,7 @@ async def process(
             if note:
                 preamble = f"{preamble}\n\n{note}"
 
-        tools = apply_feature_gates(
-            scoped_tools(persona),
-            secrets_available=self.secret_store is not None,
-            artifacts_enabled=self.config.artifacts.enabled,
-            subagents_enabled=self.config.subagents.enabled,
-        )
+        tools = self._tools_for_turn(persona)
 
         # Static system prompt. In session mode it is snapshotted once at the
         # start of the session and reused for every turn (so the static content
@@ -760,6 +805,19 @@ async def bind_chat_persona_by_label(
                 return p.name
         return None
 
+    def _tools_for_turn(self, persona: Persona | None) -> list[dict]:
+        """The function-tool schemas offered to the model this turn: the persona's
+        tool scope, with feature-gated tools dropped — including the skill-discovery
+        tools when the index is not in on-demand mode (#50). The single seam that
+        translates ``skills_index_mode`` into the advertised tool set."""
+        return apply_feature_gates(
+            scoped_tools(persona),
+            secrets_available=self.secret_store is not None,
+            artifacts_enabled=self.config.artifacts.enabled,
+            skills_on_demand=self.config.agent.skills_index_mode == "on_demand",
+            subagents_enabled=self.config.subagents.enabled,
+        )
+
     async def _turn_preamble(
         self,
         decomposed_goal: DecomposedGoal | None,
@@ -804,16 +862,26 @@ async def _turn_preamble(
         # turns: any of those that drop or change the block simply won't find it,
         # and the failure direction is a harmless re-send, never a blind turn.
         # Injection mode and tests pass ``None`` → always include.
+        # On-demand mode (#50): omit the full index; carry only a short, static
+        # pointer to the search_skills/list_skills tools. The pointer is identical
+        # every turn, so the same history gate that dedups the index also dedups it
+        # (sent once per session, re-sent after a /new/compaction).
         try:
-            skills_index = await self.skills.get_index_block(
-                allow=persona.skills if persona else None
-            )
-            if skills_index:
-                block = f"<available_skills>\n{skills_index}\n</available_skills>"
-                if session_key is None or not await self._skills_block_in_history(
-                    session_key, block
-                ):
-                    preamble += f"\n\n{block}"
+            if self.config.agent.skills_index_mode == "on_demand":
+                block = f"<available_skills>\n{SKILLS_DISCOVERY_POINTER}\n</available_skills>"
+            else:
+                skills_index = await self.skills.get_index_block(
+                    allow=persona.skills if persona else None
+                )
+                block = (
+                    f"<available_skills>\n{skills_index}\n</available_skills>"
+                    if skills_index
+                    else ""
+                )
+            if block and (
+                session_key is None or not await self._skills_block_in_history(session_key, block)
+            ):
+                preamble += f"\n\n{block}"
         except Exception:
             log.exception("Failed to load skills index for turn preamble")
 
@@ -1487,6 +1555,23 @@ async def _execute_tool(
                 return {"error": f"Skill not found: {skill_name}"}
             return {"name": skill_name, "content": content}
 
+        if name == "search_skills":
+            query = str(params.get("query", "")).strip()
+            log.info("Tool call: search_skills — %r", query)
+            allowed = (request_state or {}).get("allowed_skills")
+            limit = params.get("limit")
+            try:
+                limit = int(limit) if limit else 10
+            except TypeError, ValueError:
+                limit = 10
+            matches = await self.skills.search_index(query, allow=allowed, limit=max(1, limit))
+            return {"skills": matches}
+
+        if name == "list_skills":
+            log.info("Tool call: list_skills")
+            allowed = (request_state or {}).get("allowed_skills")
+            return {"skills": await self.skills.index_entries(allow=allowed)}
+
         if name == "recall_memory":
             return await self._tool_recall_memory(params, request_state)
 
@@ -2107,12 +2192,9 @@ async def _run_subagent_loop(
         stops at this run's step/token budget (sized by the spawning agent).
         """
         cfg = self.config.subagents
-        tools = apply_feature_gates(
-            scoped_tools(child_persona),
-            secrets_available=self.secret_store is not None,
-            artifacts_enabled=self.config.artifacts.enabled,
-            subagents_enabled=cfg.enabled,
-        )
+        # Same gating as the main loop (incl. the #50 skill-discovery tools, which a
+        # subagent needs in on-demand mode — its preamble carries the pointer too).
+        tools = self._tools_for_turn(child_persona)
         # At the depth ceiling a subagent may not spawn further — don't even offer it.
         if child_state["depth"] >= cfg.recursion_depth:
             tools = [t for t in tools if t["name"] != "spawn_subagent"]

diff --git a/core/config.py b/core/config.py
@@ -77,6 +77,11 @@ class AgentConfig(BaseModel):
     timezone: str = "Europe/Zurich"
     skills_dir: str = "skills/"
     skills_db_path: str = "data/skills.db"
+    # How the skills index reaches the model (#50):
+    #   "inject"    — the full index rides every turn's preamble (default; unchanged)
+    #   "on_demand" — the preamble omits it; the model calls search_skills/list_skills
+    # Any unrecognised value falls back to "inject" (the safe default).
+    skills_index_mode: str = "inject"
     personae_dir: str = "personae/"
     personae_db_path: str = "data/personae.db"
     active_persona: str = ""  # empty = default identity (character/personalia below)

diff --git a/core/prompt_builder.py b/core/prompt_builder.py
@@ -30,6 +30,17 @@
 You may create or update skills using the `skills.py` CLI
 after loading the `skill-creator` skill."""
 
+# Shown instead of the full skills index when ``agent.skills_index_mode`` is
+# "on_demand" (#50). Mirrors the <secrets> pointer: advertise the discovery tool,
+# not the whole list — the model pulls matches lazily via search_skills.
+SKILLS_DISCOVERY_POINTER = (
+    "Skills (reusable instructions for specific tasks) are available but not listed "
+    "here, to keep this prompt small. When a request might need one, call the "
+    "`search_skills` tool with a short query to find matching skills (returns name + "
+    "summary), or `list_skills` to browse them all. Then call `load_skill` with a "
+    "name to read that skill's full instructions before acting."
+)
+
 DEFAULT_HISTORY_HANDLING_BLOCK = """Previous messages in this conversation
 have already been handled.
 Always focus exclusively on the latest user message as the current, active request.
@@ -117,6 +128,7 @@ def build_prompt_sections(
     include_memories: bool = True,
     include_reflections: bool = True,
     include_skills: bool = True,
+    skills_on_demand: bool = False,
 ) -> PromptSections:
     """Build all prompt sections with current config and dynamic context.
 
@@ -196,7 +208,9 @@ def build_prompt_sections(
         memory_section = f"<memories>\n{memories}\n</memories>"
 
     skills_section = ""
-    if include_skills and skills_index:
+    if skills_on_demand:
+        skills_section = f"<available_skills>\n{SKILLS_DISCOVERY_POINTER}\n</available_skills>"
+    elif include_skills and skills_index:
         skills_section = f"<available_skills>\n{skills_index}\n</available_skills>"
 
     reflections_section = ""

diff --git a/core/skills.py b/core/skills.py
@@ -124,23 +124,50 @@ class SkillsEngine:
     def __init__(self, db_path: str = "data/skills.db", seed_dir: str | Path = "skills/"):
         self.store = SkillsStore(db_path=db_path, seed_dir=seed_dir)
 
-    async def get_index_block(self, allow: list[str] | None = None) -> str:
-        """Render the skills index. When ``allow`` is given (a persona's
-        allowlist), only those skills are advertised; ``None``/empty = all."""
+    async def index_entries(self, allow: list[str] | None = None) -> list[dict]:
+        """The skills index as ``{name, summary}`` rows, scoped to ``allow``
+        (a persona's allowlist; ``None``/empty = all). Backs the index block and
+        the ``list_skills``/``search_skills`` discovery tools."""
         skills = await self.store.list_skills()
         if allow:
             allowed = set(allow)
             skills = [s for s in skills if s["name"] in allowed]
-        if not skills:
+        return [{"name": s["name"], "summary": (s.get("summary") or "").strip()} for s in skills]
+
+    async def get_index_block(self, allow: list[str] | None = None) -> str:
+        """Render the skills index. When ``allow`` is given (a persona's
+        allowlist), only those skills are advertised; ``None``/empty = all."""
+        entries = await self.index_entries(allow=allow)
+        if not entries:
             return ""
-        lines = []
-        for skill in skills:
-            summary = (skill.get("summary") or "").strip()
-            if summary:
-                lines.append(f"- {skill['name']}: {summary}")
-            else:
-                lines.append(f"- {skill['name']}")
-        return "\n".join(lines)
+        return "\n".join(
+            f"- {e['name']}: {e['summary']}" if e["summary"] else f"- {e['name']}" for e in entries
+        )
+
+    async def search_index(
+        self, query: str, allow: list[str] | None = None, limit: int = 10
+    ) -> list[dict]:
+        """Top-``limit`` index entries matching ``query`` (keyword scored over
+        name + summary), scoped to ``allow``. An empty query returns the first
+        ``limit`` entries (a cheap browse). No match → empty list.
+
+        ponytail: lexical scoring only; the issue defers embedding ranking until
+        keyword search measurably falls short.
+        """
+        entries = await self.index_entries(allow=allow)
+        terms = [t for t in query.lower().split() if t]
+        if not terms:
+            return entries[:limit]
+        scored = []
+        for e in entries:
+            haystack = f"{e['name']} {e['summary']}".lower()
+            score = sum(haystack.count(t) for t in terms)
+            if any(t in e["name"].lower() for t in terms):
+                score += 5  # a name hit beats a summary hit
+            if score:
+                scored.append((score, e))
+        scored.sort(key=lambda se: (-se[0], se[1]["name"]))
+        return [e for _, e in scored[:limit]]
 
     async def get_skill_content(self, name: str) -> str:
         skill = await self.store.get_skill(name)

diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx
@@ -71,7 +71,7 @@ The brain of MPA. Implements the LLM tool-use loop:
 
 1. Load conversation history
 2. Build the static system prompt (character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn
-3. Inject the live date/time, the skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) In session mode the skills index is skipped only when that exact block is already present in the replayed history (so the model still sees it); once a `/new` or compaction drops it, or a new/rebound skill changes it, it is re-sent — so unchanged turns with the block still in history cost nothing extra.
+3. Inject the live date/time, the skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) In session mode the skills index is skipped only when that exact block is already present in the replayed history (so the model still sees it); once a `/new` or compaction drops it, or a new/rebound skill changes it, it is re-sent — so unchanged turns with the block still in history cost nothing extra. When `agent.skills_index_mode` is `"on_demand"` the block carries only a short pointer instead of the full list, and the model fetches entries lazily via the `search_skills`/`list_skills` tools (see [Skills](/docs/skills#serving-the-index-on-demand)) — the same lazy pattern the secrets vault uses for `list_secrets`.
 4. Call the LLM
 5. Handle tool calls with permission checks
 6. Save conversation turn and extract memories

diff --git a/docs/content/docs/configuration.mdx b/docs/content/docs/configuration.mdx
@@ -61,6 +61,7 @@ agent:
   model: "claude-sonnet-4-5-20250514"
   timezone: "Europe/Zurich"
   skills_dir: "skills/"
+  skills_index_mode: "inject"   # "inject" (full index each turn) | "on_demand" (search_skills/list_skills)
   personae_dir: "personae/"
   active_persona: ""
 
@@ -131,7 +132,7 @@ memory:
 
 #### `agent`
 
-Core agent settings including name, owner, LLM provider, model, and timezone. The `skills_dir` points to the directory containing markdown skill files. `personae_dir` is the starter-gallery seed directory for [personae](/docs/personae), and `active_persona` is the slug of the active persona (`""` = the default identity).
+Core agent settings including name, owner, LLM provider, model, and timezone. The `skills_dir` points to the directory containing markdown skill files. `skills_index_mode` chooses how the skills index reaches the model: `"inject"` (default) puts the full index in every turn's preamble, while `"on_demand"` replaces it with a short pointer and lets the model fetch entries lazily via the `search_skills`/`list_skills` tools (see [Skills](/docs/skills#serving-the-index-on-demand)). `personae_dir` is the starter-gallery seed directory for [personae](/docs/personae), and `active_persona` is the slug of the active persona (`""` = the default identity).
 
 #### `channels`