diff --git a/api/admin.py b/api/admin.py index 244e02b..075409b 100644 --- a/api/admin.py +++ b/api/admin.py @@ -1848,6 +1848,7 @@ async def system_prompt_preview(body: PromptPreviewIn) -> dict: secrets_available=secret_store is not None, include_memories=body.include_memories, include_reflections=body.include_reflections, + skills_on_demand=config.agent.skills_index_mode == "on_demand", ) full_prompt = sections.full_prompt section_map = sections.as_dict() @@ -3412,7 +3413,8 @@ def _config_requires_restart(values: dict) -> bool: # Function-tools that a persona may scope. ``load_skill`` is intentionally # excluded — it is always available (the core mechanic personae use to read -# their allowlisted skills); so are the vault tools and ``recall_memory`` +# their allowlisted skills); so are ``search_skills``/``list_skills`` (its +# on-demand discovery counterparts — #50), the vault tools, and ``recall_memory`` # (memory is injected for every persona, scope-filtered, so its on-demand # counterpart is always available too). Kept here (not imported from core.agent) # to avoid pulling the agent's heavy import graph into the admin app. diff --git a/core/agent.py b/core/agent.py index 5fb1582..67ed14c 100644 --- a/core/agent.py +++ b/core/agent.py @@ -28,7 +28,7 @@ from core.models import AgentResponse, Attachment from core.permissions import PermissionEngine, PermissionLevel, format_approval_message from core.personae import Persona, PersonaStore -from core.prompt_builder import build_prompt_sections +from core.prompt_builder import SKILLS_DISCOVERY_POINTER, build_prompt_sections from core.scheduler import AgentScheduler from core.secret_store import SecretStore from core.skills import SkillsEngine @@ -198,6 +198,41 @@ def _shell_quote(s: str) -> str: "required": ["name"], }, }, + # Skill discovery (#50) — only advertised when skills_index_mode == "on_demand" + # (the full index is NOT injected then). Return name + summary, never bodies; + # the model then calls load_skill to read the chosen skill in full. + { + "name": "search_skills", + "description": ( + "Find skills relevant to the current task. Returns the top matching skills " + "as name + summary (NOT their full content). Pass a short natural-language " + "query or keywords describing what you need to do, then call `load_skill` " + "with a returned name to read that skill's full instructions." + ), + "input_schema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "What you want to do (keywords or a short phrase)", + }, + "limit": { + "type": "integer", + "description": "Max skills to return (default 10).", + }, + }, + "required": ["query"], + }, + }, + { + "name": "list_skills", + "description": ( + "List every skill available to you as name + summary (NOT full content). " + "Use this to browse the whole catalogue; prefer `search_skills` when you " + "know what you're after. Call `load_skill` with a name to read one in full." + ), + "input_schema": {"type": "object", "properties": {}}, + }, { "name": "recall_memory", "description": ( @@ -493,9 +528,19 @@ def scoped_tools(persona: Persona | None) -> list[dict]: return TOOLS # ``load_skill`` and the vault discovery/request tools are always retained: # they are the mechanics personae rely on to read skills and obtain secrets. - # ``recall_memory`` too — memory is injected for every persona (scope-filtered), - # so its on-demand counterpart exposes nothing extra and stays available (#47). - _always = {"load_skill", "recall_memory", "list_secrets", "request_secret"} + # ``search_skills``/``list_skills`` mirror ``load_skill`` (a persona needs them + # to discover its own allowlisted skills in on-demand mode — #50); the feature + # gate below still drops them when that mode is off. ``recall_memory`` too — + # memory is injected for every persona (scope-filtered), so its on-demand + # counterpart exposes nothing extra and stays available (#47). + _always = { + "load_skill", + "search_skills", + "list_skills", + "recall_memory", + "list_secrets", + "request_secret", + } return [t for t in TOOLS if persona.allows_tool(t["name"]) or t["name"] in _always] @@ -504,16 +549,21 @@ def apply_feature_gates( *, secrets_available: bool, artifacts_enabled: bool, + skills_on_demand: bool = False, subagents_enabled: bool = True, ) -> list[dict]: """Drop tools whose backing feature is unavailable/disabled, so the model is never offered a capability it can't use (defence in depth — the tool handlers - also refuse). Disabling ``artifacts`` here means no persona can call it.""" + also refuse). Disabling ``artifacts`` here means no persona can call it. The + skill-discovery tools are offered only in on-demand index mode (#50); in the + default inject mode the full index is already in context, so they'd be noise.""" out = tools if not secrets_available: out = [t for t in out if t["name"] not in ("list_secrets", "request_secret")] if not artifacts_enabled: out = [t for t in out if t["name"] != "write_artifact"] + if not skills_on_demand: + out = [t for t in out if t["name"] not in ("search_skills", "list_skills")] if not subagents_enabled: out = [t for t in out if t["name"] != "spawn_subagent"] return out @@ -651,12 +701,7 @@ async def process( if note: preamble = f"{preamble}\n\n{note}" - tools = apply_feature_gates( - scoped_tools(persona), - secrets_available=self.secret_store is not None, - artifacts_enabled=self.config.artifacts.enabled, - subagents_enabled=self.config.subagents.enabled, - ) + tools = self._tools_for_turn(persona) # Static system prompt. In session mode it is snapshotted once at the # start of the session and reused for every turn (so the static content @@ -760,6 +805,19 @@ async def bind_chat_persona_by_label( return p.name return None + def _tools_for_turn(self, persona: Persona | None) -> list[dict]: + """The function-tool schemas offered to the model this turn: the persona's + tool scope, with feature-gated tools dropped — including the skill-discovery + tools when the index is not in on-demand mode (#50). The single seam that + translates ``skills_index_mode`` into the advertised tool set.""" + return apply_feature_gates( + scoped_tools(persona), + secrets_available=self.secret_store is not None, + artifacts_enabled=self.config.artifacts.enabled, + skills_on_demand=self.config.agent.skills_index_mode == "on_demand", + subagents_enabled=self.config.subagents.enabled, + ) + async def _turn_preamble( self, decomposed_goal: DecomposedGoal | None, @@ -804,16 +862,26 @@ async def _turn_preamble( # turns: any of those that drop or change the block simply won't find it, # and the failure direction is a harmless re-send, never a blind turn. # Injection mode and tests pass ``None`` → always include. + # On-demand mode (#50): omit the full index; carry only a short, static + # pointer to the search_skills/list_skills tools. The pointer is identical + # every turn, so the same history gate that dedups the index also dedups it + # (sent once per session, re-sent after a /new/compaction). try: - skills_index = await self.skills.get_index_block( - allow=persona.skills if persona else None - ) - if skills_index: - block = f"\n{skills_index}\n" - if session_key is None or not await self._skills_block_in_history( - session_key, block - ): - preamble += f"\n\n{block}" + if self.config.agent.skills_index_mode == "on_demand": + block = f"\n{SKILLS_DISCOVERY_POINTER}\n" + else: + skills_index = await self.skills.get_index_block( + allow=persona.skills if persona else None + ) + block = ( + f"\n{skills_index}\n" + if skills_index + else "" + ) + if block and ( + session_key is None or not await self._skills_block_in_history(session_key, block) + ): + preamble += f"\n\n{block}" except Exception: log.exception("Failed to load skills index for turn preamble") @@ -1487,6 +1555,23 @@ async def _execute_tool( return {"error": f"Skill not found: {skill_name}"} return {"name": skill_name, "content": content} + if name == "search_skills": + query = str(params.get("query", "")).strip() + log.info("Tool call: search_skills — %r", query) + allowed = (request_state or {}).get("allowed_skills") + limit = params.get("limit") + try: + limit = int(limit) if limit else 10 + except TypeError, ValueError: + limit = 10 + matches = await self.skills.search_index(query, allow=allowed, limit=max(1, limit)) + return {"skills": matches} + + if name == "list_skills": + log.info("Tool call: list_skills") + allowed = (request_state or {}).get("allowed_skills") + return {"skills": await self.skills.index_entries(allow=allowed)} + if name == "recall_memory": return await self._tool_recall_memory(params, request_state) @@ -2107,12 +2192,9 @@ async def _run_subagent_loop( stops at this run's step/token budget (sized by the spawning agent). """ cfg = self.config.subagents - tools = apply_feature_gates( - scoped_tools(child_persona), - secrets_available=self.secret_store is not None, - artifacts_enabled=self.config.artifacts.enabled, - subagents_enabled=cfg.enabled, - ) + # Same gating as the main loop (incl. the #50 skill-discovery tools, which a + # subagent needs in on-demand mode — its preamble carries the pointer too). + tools = self._tools_for_turn(child_persona) # At the depth ceiling a subagent may not spawn further — don't even offer it. if child_state["depth"] >= cfg.recursion_depth: tools = [t for t in tools if t["name"] != "spawn_subagent"] diff --git a/core/config.py b/core/config.py index 34d724e..a6d6330 100644 --- a/core/config.py +++ b/core/config.py @@ -77,6 +77,11 @@ class AgentConfig(BaseModel): timezone: str = "Europe/Zurich" skills_dir: str = "skills/" skills_db_path: str = "data/skills.db" + # How the skills index reaches the model (#50): + # "inject" — the full index rides every turn's preamble (default; unchanged) + # "on_demand" — the preamble omits it; the model calls search_skills/list_skills + # Any unrecognised value falls back to "inject" (the safe default). + skills_index_mode: str = "inject" personae_dir: str = "personae/" personae_db_path: str = "data/personae.db" active_persona: str = "" # empty = default identity (character/personalia below) diff --git a/core/prompt_builder.py b/core/prompt_builder.py index e17022e..6c45db4 100644 --- a/core/prompt_builder.py +++ b/core/prompt_builder.py @@ -30,6 +30,17 @@ You may create or update skills using the `skills.py` CLI after loading the `skill-creator` skill.""" +# Shown instead of the full skills index when ``agent.skills_index_mode`` is +# "on_demand" (#50). Mirrors the pointer: advertise the discovery tool, +# not the whole list — the model pulls matches lazily via search_skills. +SKILLS_DISCOVERY_POINTER = ( + "Skills (reusable instructions for specific tasks) are available but not listed " + "here, to keep this prompt small. When a request might need one, call the " + "`search_skills` tool with a short query to find matching skills (returns name + " + "summary), or `list_skills` to browse them all. Then call `load_skill` with a " + "name to read that skill's full instructions before acting." +) + DEFAULT_HISTORY_HANDLING_BLOCK = """Previous messages in this conversation have already been handled. Always focus exclusively on the latest user message as the current, active request. @@ -117,6 +128,7 @@ def build_prompt_sections( include_memories: bool = True, include_reflections: bool = True, include_skills: bool = True, + skills_on_demand: bool = False, ) -> PromptSections: """Build all prompt sections with current config and dynamic context. @@ -196,7 +208,9 @@ def build_prompt_sections( memory_section = f"\n{memories}\n" skills_section = "" - if include_skills and skills_index: + if skills_on_demand: + skills_section = f"\n{SKILLS_DISCOVERY_POINTER}\n" + elif include_skills and skills_index: skills_section = f"\n{skills_index}\n" reflections_section = "" diff --git a/core/skills.py b/core/skills.py index 39ba939..ab56a9a 100644 --- a/core/skills.py +++ b/core/skills.py @@ -124,23 +124,50 @@ class SkillsEngine: def __init__(self, db_path: str = "data/skills.db", seed_dir: str | Path = "skills/"): self.store = SkillsStore(db_path=db_path, seed_dir=seed_dir) - async def get_index_block(self, allow: list[str] | None = None) -> str: - """Render the skills index. When ``allow`` is given (a persona's - allowlist), only those skills are advertised; ``None``/empty = all.""" + async def index_entries(self, allow: list[str] | None = None) -> list[dict]: + """The skills index as ``{name, summary}`` rows, scoped to ``allow`` + (a persona's allowlist; ``None``/empty = all). Backs the index block and + the ``list_skills``/``search_skills`` discovery tools.""" skills = await self.store.list_skills() if allow: allowed = set(allow) skills = [s for s in skills if s["name"] in allowed] - if not skills: + return [{"name": s["name"], "summary": (s.get("summary") or "").strip()} for s in skills] + + async def get_index_block(self, allow: list[str] | None = None) -> str: + """Render the skills index. When ``allow`` is given (a persona's + allowlist), only those skills are advertised; ``None``/empty = all.""" + entries = await self.index_entries(allow=allow) + if not entries: return "" - lines = [] - for skill in skills: - summary = (skill.get("summary") or "").strip() - if summary: - lines.append(f"- {skill['name']}: {summary}") - else: - lines.append(f"- {skill['name']}") - return "\n".join(lines) + return "\n".join( + f"- {e['name']}: {e['summary']}" if e["summary"] else f"- {e['name']}" for e in entries + ) + + async def search_index( + self, query: str, allow: list[str] | None = None, limit: int = 10 + ) -> list[dict]: + """Top-``limit`` index entries matching ``query`` (keyword scored over + name + summary), scoped to ``allow``. An empty query returns the first + ``limit`` entries (a cheap browse). No match → empty list. + + ponytail: lexical scoring only; the issue defers embedding ranking until + keyword search measurably falls short. + """ + entries = await self.index_entries(allow=allow) + terms = [t for t in query.lower().split() if t] + if not terms: + return entries[:limit] + scored = [] + for e in entries: + haystack = f"{e['name']} {e['summary']}".lower() + score = sum(haystack.count(t) for t in terms) + if any(t in e["name"].lower() for t in terms): + score += 5 # a name hit beats a summary hit + if score: + scored.append((score, e)) + scored.sort(key=lambda se: (-se[0], se[1]["name"])) + return [e for _, e in scored[:limit]] async def get_skill_content(self, name: str) -> str: skill = await self.store.get_skill(name) diff --git a/docs/content/docs/architecture.mdx b/docs/content/docs/architecture.mdx index 9015251..3abab9b 100644 --- a/docs/content/docs/architecture.mdx +++ b/docs/content/docs/architecture.mdx @@ -71,7 +71,7 @@ The brain of MPA. Implements the LLM tool-use loop: 1. Load conversation history 2. Build the static system prompt (character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn -3. Inject the live date/time, the skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) In session mode the skills index is skipped only when that exact block is already present in the replayed history (so the model still sees it); once a `/new` or compaction drops it, or a new/rebound skill changes it, it is re-sent — so unchanged turns with the block still in history cost nothing extra. +3. Inject the live date/time, the skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) In session mode the skills index is skipped only when that exact block is already present in the replayed history (so the model still sees it); once a `/new` or compaction drops it, or a new/rebound skill changes it, it is re-sent — so unchanged turns with the block still in history cost nothing extra. When `agent.skills_index_mode` is `"on_demand"` the block carries only a short pointer instead of the full list, and the model fetches entries lazily via the `search_skills`/`list_skills` tools (see [Skills](/docs/skills#serving-the-index-on-demand)) — the same lazy pattern the secrets vault uses for `list_secrets`. 4. Call the LLM 5. Handle tool calls with permission checks 6. Save conversation turn and extract memories diff --git a/docs/content/docs/configuration.mdx b/docs/content/docs/configuration.mdx index 15b79d8..d8b3f4a 100644 --- a/docs/content/docs/configuration.mdx +++ b/docs/content/docs/configuration.mdx @@ -61,6 +61,7 @@ agent: model: "claude-sonnet-4-5-20250514" timezone: "Europe/Zurich" skills_dir: "skills/" + skills_index_mode: "inject" # "inject" (full index each turn) | "on_demand" (search_skills/list_skills) personae_dir: "personae/" active_persona: "" @@ -131,7 +132,7 @@ memory: #### `agent` -Core agent settings including name, owner, LLM provider, model, and timezone. The `skills_dir` points to the directory containing markdown skill files. `personae_dir` is the starter-gallery seed directory for [personae](/docs/personae), and `active_persona` is the slug of the active persona (`""` = the default identity). +Core agent settings including name, owner, LLM provider, model, and timezone. The `skills_dir` points to the directory containing markdown skill files. `skills_index_mode` chooses how the skills index reaches the model: `"inject"` (default) puts the full index in every turn's preamble, while `"on_demand"` replaces it with a short pointer and lets the model fetch entries lazily via the `search_skills`/`list_skills` tools (see [Skills](/docs/skills#serving-the-index-on-demand)). `personae_dir` is the starter-gallery seed directory for [personae](/docs/personae), and `active_persona` is the slug of the active persona (`""` = the default identity). #### `channels` diff --git a/docs/content/docs/skills.mdx b/docs/content/docs/skills.mdx index 95bc8bf..d6dc1d2 100644 --- a/docs/content/docs/skills.mdx +++ b/docs/content/docs/skills.mdx @@ -14,6 +14,8 @@ Skills are the central design idea of MPA. Instead of hardcoding how each integr 3. The LLM reads the skill documentation and constructs the correct CLI commands 4. Commands are executed via the tool executor with permission checks +By default the **whole index** rides every turn. As the catalogue grows you can switch to [on-demand discovery](#serving-the-index-on-demand) so the model fetches only the entries it needs. + This means adding a new capability is as simple as writing a markdown file. No Python code changes needed. ## Built-in skills @@ -161,6 +163,38 @@ show up on the next turn without a `/new`: The model picks a skill from the index, then calls `load_skill` to pull its full content. +## Serving the index on-demand + +Injecting the full index every turn is fine at a few dozen skills, but it does not +scale indefinitely — every allowed skill's `name: summary` line sits in context +whether or not the current request needs it. When the catalogue grows, switch the +index to **on-demand** mode: + +```yaml +agent: + skills_index_mode: "on_demand" # default: "inject" +``` + +In on-demand mode the `` block carries only a short pointer +instead of the full list, and the model pulls entries lazily through two tools — +exactly how the secrets vault advertises `list_secrets` rather than dumping every +secret name into the prompt: + +| Tool | Returns | +|------|---------| +| `search_skills(query, limit?)` | The top matching skills as `name + summary` (keyword-ranked over name and summary) | +| `list_skills()` | Every available skill as `name + summary` (browse the whole catalogue) | + +Both are scoped to the active persona's skill allowlist, the same scope that +`load_skill` enforces — a persona never discovers a skill it can't load. The flow +becomes: `search_skills` → pick a name → `load_skill` → act. This extends the +existing lazy pattern (bodies are already loaded on demand) up one level to the +index itself. + +`"inject"` stays the default, so behaviour is unchanged until you opt in. The two +discovery tools are only offered in on-demand mode; in inject mode the full index +is already in context, so they would be redundant. + ## Tips for writing good skills - **Be specific** — include exact command syntax with flags diff --git a/tests/test_artifacts.py b/tests/test_artifacts.py index eaf849d..3ed9965 100644 --- a/tests/test_artifacts.py +++ b/tests/test_artifacts.py @@ -347,13 +347,21 @@ def names(ts): return {t["name"] for t in ts} assert "write_artifact" in names( - apply_feature_gates(TOOLS, secrets_available=True, artifacts_enabled=True) + apply_feature_gates( + TOOLS, secrets_available=True, artifacts_enabled=True, skills_on_demand=False + ) ) assert "write_artifact" not in names( - apply_feature_gates(TOOLS, secrets_available=True, artifacts_enabled=False) + apply_feature_gates( + TOOLS, secrets_available=True, artifacts_enabled=False, skills_on_demand=False + ) ) # The secrets gate still composes independently. - no_secrets = names(apply_feature_gates(TOOLS, secrets_available=False, artifacts_enabled=True)) + no_secrets = names( + apply_feature_gates( + TOOLS, secrets_available=False, artifacts_enabled=True, skills_on_demand=False + ) + ) assert "list_secrets" not in no_secrets and "request_secret" not in no_secrets diff --git a/tests/test_personae.py b/tests/test_personae.py index 67ca6c6..d1a075e 100644 --- a/tests/test_personae.py +++ b/tests/test_personae.py @@ -83,7 +83,14 @@ def test_gateable_tools_in_sync_with_tools() -> None: from api.admin import GATEABLE_TOOLS from core.agent import TOOLS - always_on = {"load_skill", "recall_memory", "list_secrets", "request_secret"} + always_on = { + "load_skill", + "search_skills", + "list_skills", + "recall_memory", + "list_secrets", + "request_secret", + } assert set(GATEABLE_TOOLS) | always_on == {t["name"] for t in TOOLS} diff --git a/tests/test_skills.py b/tests/test_skills.py index fc2fdd6..128ca05 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -36,3 +36,60 @@ async def test_get_skill_content_reads_seeded_skill(tmp_path) -> None: content = await engine.get_skill_content("memory") assert "Use sqlite3." in content + + +# --- on-demand discovery: index_entries + search_index (#50) --- + + +def _engine_with(tmp_path, **skills) -> SkillsEngine: + for name, summary in skills.items(): + (tmp_path / f"{name}.md").write_text(summary) + return SkillsEngine(db_path=str(tmp_path / "skills.db"), seed_dir=tmp_path) + + +@pytest.mark.asyncio +async def test_index_entries_returns_name_summary(tmp_path) -> None: + engine = _engine_with(tmp_path, email="send and read email", weather="fetch the forecast") + entries = await engine.index_entries() + by_name = {e["name"]: e["summary"] for e in entries} + assert by_name == {"email": "send and read email", "weather": "fetch the forecast"} + + +@pytest.mark.asyncio +async def test_index_entries_scoped_to_allowlist(tmp_path) -> None: + engine = _engine_with(tmp_path, email="send email", weather="forecast", news="headlines") + entries = await engine.index_entries(allow=["email", "news"]) + assert {e["name"] for e in entries} == {"email", "news"} + + +@pytest.mark.asyncio +async def test_search_index_ranks_name_hit_first(tmp_path) -> None: + # "weather" matches the name of one skill and the summary of another; the + # name hit must outrank the summary-only hit. + engine = _engine_with( + tmp_path, + weather="forecast lookups", + travel="plan trips, mentions weather in passing", + ) + hits = await engine.search_index("weather") + assert [h["name"] for h in hits] == ["weather", "travel"] + + +@pytest.mark.asyncio +async def test_search_index_no_match_is_empty(tmp_path) -> None: + engine = _engine_with(tmp_path, email="send email") + assert await engine.search_index("quantumchromodynamics") == [] + + +@pytest.mark.asyncio +async def test_search_index_empty_query_browses(tmp_path) -> None: + engine = _engine_with(tmp_path, a="x", b="y", c="z") + hits = await engine.search_index("", limit=2) + assert len(hits) == 2 # empty query = cheap browse, capped by limit + + +@pytest.mark.asyncio +async def test_search_index_respects_allowlist(tmp_path) -> None: + engine = _engine_with(tmp_path, email="send email", webmail="email in the browser") + hits = await engine.search_index("email", allow=["email"]) + assert {h["name"] for h in hits} == {"email"} diff --git a/tests/test_tools.py b/tests/test_tools.py index af8a175..258e669 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -64,6 +64,37 @@ def test_static_prompt_has_no_datetime() -> None: assert cfg.agent.timezone in sections.intro +def test_skills_on_demand_renders_pointer_not_index() -> None: + # The on-demand branch backs the admin prompt-preview; it must show the + # discovery pointer and never the full index, even when an index is supplied. + sections = build_prompt_sections( + config=Config(), + history_mode="session", + skills_index="- weather: fetch the forecast\n- email: send mail", + memories="", + reflections="", + decomposed_goal=None, + skills_on_demand=True, + ) + assert "" in sections.available_skills + assert "search_skills" in sections.available_skills + assert "list_skills" in sections.available_skills + assert "- weather: fetch the forecast" not in sections.available_skills # index omitted + # Default (inject) still renders the index. + assert "- weather: fetch the forecast" in _sections_with_index().available_skills + + +def _sections_with_index(): + return build_prompt_sections( + config=Config(), + history_mode="session", + skills_index="- weather: fetch the forecast", + memories="", + reflections="", + decomposed_goal=None, + ) + + def test_tools_section_only_when_enabled() -> None: cfg = Config() assert _sections(cfg).tools == "" @@ -274,6 +305,135 @@ async def persist(msg: dict) -> None: assert "" in a and "" in b +# --------------------------------------------------------------------------- +# On-demand skills index (#50): pointer instead of index + discovery tools +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_on_demand_preamble_swaps_index_for_pointer(agent) -> None: + await agent.skills.store.upsert_skill("weather", "fetch the forecast") + + # Default (inject) mode lists the skill in full. + inject = await agent._turn_preamble(None, query="hi") + assert "- weather: fetch the forecast" in inject + + # On-demand mode replaces the listing with the discovery pointer. + agent.config.agent.skills_index_mode = "on_demand" + on_demand = await agent._turn_preamble(None, query="hi") + assert "" in on_demand + assert "search_skills" in on_demand and "list_skills" in on_demand + assert "- weather: fetch the forecast" not in on_demand # full index omitted + + +@pytest.mark.asyncio +async def test_on_demand_pointer_resent_only_when_missing(agent) -> None: + """The static pointer is deduped by the same history gate as the index.""" + agent.config.agent.skills_index_mode = "on_demand" + ch, uid, cid = "telegram", "u1", "" + key = (ch, uid, cid) + + first = await agent._turn_preamble(None, query="hi", session_key=key) + assert "" in first and "search_skills" in first + await agent.history.append_session_message(ch, uid, {"role": "user", "content": first}, cid) + + second = await agent._turn_preamble(None, query="again", session_key=key) + assert "" not in second # already in history → not re-sent + + +def test_feature_gate_offers_discovery_tools_only_on_demand() -> None: + from core.agent import TOOLS, apply_feature_gates + + def names(on_demand): + return { + t["name"] + for t in apply_feature_gates( + TOOLS, + secrets_available=True, + artifacts_enabled=True, + skills_on_demand=on_demand, + ) + } + + inject = names(False) + assert "search_skills" not in inject and "list_skills" not in inject + on_demand = names(True) + assert "search_skills" in on_demand and "list_skills" in on_demand + + +@pytest.mark.asyncio +async def test_search_and_list_skills_dispatch_scoped(agent) -> None: + from core.llm import LLMToolCall + from core.personae import Persona + + await agent.skills.store.upsert_skill("email", "# email\nsend and read email") + await agent.skills.store.upsert_skill("weather", "# weather\nfetch the forecast") + + # A persona allowlisted to email only must not discover weather (#50 scoping). + rs = agent._new_request_state(Persona(name="p", skills=["email"])) + + search = await agent._execute_tool( + LLMToolCall(id="1", name="search_skills", arguments={"query": "weather"}), + "system", + "u", + rs, + ) + assert search["skills"] == [] # weather is out of scope + + listed = await agent._execute_tool( + LLMToolCall(id="2", name="list_skills", arguments={}), + "system", + "u", + rs, + ) + assert {s["name"] for s in listed["skills"]} == {"email"} + + # load_skill still reaches an in-scope skill end-to-end. + loaded = await agent._execute_tool( + LLMToolCall(id="3", name="load_skill", arguments={"name": "email"}), + "system", + "u", + rs, + ) + assert "send and read email" in loaded["content"] + + +@pytest.mark.asyncio +async def test_search_skills_limit_coercion_at_dispatch(agent) -> None: + """`limit` is LLM-controlled: a non-numeric value must fall back, and 0 must + not silently return nothing (the max(1, ...) floor).""" + from core.llm import LLMToolCall + + await agent.skills.store.upsert_skill("email", "send and read email") + + async def call(**limit_arg): + res = await agent._execute_tool( + LLMToolCall(id="x", name="search_skills", arguments={"query": "email", **limit_arg}), + "system", + "u", + agent._new_request_state(), + ) + return res["skills"] + + assert [s["name"] for s in await call(limit="not-a-number")] == ["email"] # graceful fallback + assert [s["name"] for s in await call(limit=0)] == ["email"] # floored, not empty + assert [s["name"] for s in await call()] == ["email"] # missing limit → default + + +def test_tools_for_turn_gates_discovery_on_config(agent) -> None: + """The config -> tools seam: skills_index_mode flips whether the discovery + tools are offered to the model (catches a typo'd comparison string).""" + + def names(): + return {t["name"] for t in agent._tools_for_turn(None)} + + agent.config.agent.skills_index_mode = "inject" + assert "search_skills" not in names() and "list_skills" not in names() + + agent.config.agent.skills_index_mode = "on_demand" + assert "search_skills" in names() and "list_skills" in names() + + # --------------------------------------------------------------------------- # Per-action write state — one write's outcome must not block a different one # ---------------------------------------------------------------------------