Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 79 additions & 16 deletions core/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,10 +501,16 @@ async def process(
else:
persona = await self._resolve_persona(channel, user_id, chat_id)

# Per-turn preamble: live date/time + fresh memory/reflections + plan.
# Memory is scoped to the active persona (#42): shared + its private.
# Per-turn preamble: live date/time + fresh memory/reflections + skills
# index + plan. Memory is scoped to the active persona (#42): shared +
# its private. Skills index is scoped to the persona's allowlist (#46).
session_key = (channel, user_id, chat_id) if self.history_mode == "session" else None
preamble = await self._turn_preamble(
decomposed_goal, query=message, scope=_persona_scope(persona)
decomposed_goal,
query=message,
scope=_persona_scope(persona),
persona=persona,
session_key=session_key,
)

tools = apply_feature_gates(
Expand Down Expand Up @@ -620,26 +626,57 @@ async def _turn_preamble(
decomposed_goal: DecomposedGoal | None,
query: str | None = None,
scope: str = "",
persona: Persona | None = None,
session_key: tuple[str, str, str] | None = None,
) -> str:
"""Build the per-turn preamble prepended to the current user message.

Always carries the live date/time (so the agent knows 'now' every turn);
also carries fresh, query-relevant memory + reflections and the
execution plan when the request was decomposed.
also carries fresh, query-relevant memory + reflections, the live skills
index, and the execution plan when the request was decomposed.

Memory/reflections live here, not in the static system prompt: in
Memory/reflections/skills live here, not in the static system prompt: in
session mode that prompt is snapshotted once and would freeze any
mid-session extraction out of view until ``/new`` (#41). The preamble is
rebuilt every turn and rides on the new (uncached) user message, so it
costs only the block's own tokens and is also relevance-ranked per turn.
mid-session change out of view until ``/new`` (#41, #46) — e.g. a skill
added via the skill-creator stayed invisible. The preamble is rebuilt
every turn and rides on the new (uncached) user message, so it costs only
the block's own tokens and is also relevance-ranked per turn.

``scope`` is the active persona's memory scope (#42): ``""`` = shared
only, ``"<persona>"`` = shared + that persona's private memory.
``persona`` scopes the skills index to its allowlist. ``session_key``
gates skills re-injection (see below); ``None`` = always inject.
"""
now = datetime.now(ZoneInfo(self.config.agent.timezone))
stamp = now.strftime("%A, %B %d, %Y %H:%M %Z")
preamble = f"[Current date & time: {stamp}]"

# Skills index, scoped to the persona's allowlist. Rebuilt fresh per turn
# so a skill added mid-session (e.g. via skill-creator) is immediately
# visible without a /new (#46). Cheap: a local DB read, like memory.
#
# In session mode (``session_key`` set) the preamble is persisted into the
# growing history, so re-sending an unchanged index every turn would just
# accumulate identical copies. We skip it only when the exact block is
# ALREADY present in the replayed history (so the model still sees it).
# Gating on the real history — not a side cache — keeps it correct by
# construction across /new, compaction, persona rebind and concurrent
# turns: any of those that drop or change the block simply won't find it,
# and the failure direction is a harmless re-send, never a blind turn.
# Injection mode and tests pass ``None`` → always include.
try:
skills_index = await self.skills.get_index_block(
allow=persona.skills if persona else None
)
if skills_index:
block = f"<available_skills>\n{skills_index}\n</available_skills>"
if session_key is None or not await self._skills_block_in_history(
session_key, block
):
preamble += f"\n\n{block}"
except Exception:
log.exception("Failed to load skills index for turn preamble")

# ponytail: in session mode this now runs a query embed + cosine scan +
# reinforce-write every turn (was once per session). Intended — that is
# what makes injection fresh and per-turn relevant — and cheap for a
Expand Down Expand Up @@ -671,6 +708,32 @@ async def _turn_preamble(
)
return preamble

async def _skills_block_in_history(self, session_key: tuple[str, str, str], block: str) -> bool:
"""True if the exact ``<available_skills>`` block is already present in the
replayed session history — so the model still sees it and we needn't
re-send it this turn (#46 follow-up).

Reads the same message array that will be sent to the model, so the
decision is correct by construction: after a /new or compaction the block
is gone (→ re-send), a persona rebind or new skill changes the block (→
re-send), and concurrent turns that haven't yet persisted both re-send
(harmless). Cheap: a substring scan over the (compaction-bounded) history.
"""
try:
messages = await self.history.get_session(*session_key)
except Exception:
return False # safe direction: re-send rather than risk a blind turn
for m in messages:
content = m.get("content")
if isinstance(content, str):
if block in content:
return True
elif isinstance(content, list):
for part in content:
if isinstance(part, dict) and block in str(part.get("text", "")):
return True
return False

async def _session_system_prompt(
self,
channel: str,
Expand Down Expand Up @@ -1915,23 +1978,23 @@ async def _build_system_prompt(
decomposed_goal: DecomposedGoal | None = None,
persona: Persona | None = None,
) -> str:
skills_index = await self.skills.get_index_block(allow=persona.skills if persona else None)

# Memory + reflections are NOT baked into the static prompt: in session
# mode it is snapshotted once and would freeze stale (#41). They are
# injected fresh per turn in the preamble instead (see _turn_preamble),
# which also makes them query-relevant on every turn.
# Memory, reflections AND the skills index are NOT baked into the static
# prompt: in session mode it is snapshotted once and would freeze stale —
# a skill added mid-session stayed invisible until /new (#41, #46). All
# three are injected fresh per turn in the preamble instead (see
# _turn_preamble), which also makes memory query-relevant every turn.
sections = build_prompt_sections(
config=self.config,
history_mode=self.history_mode,
skills_index=skills_index,
skills_index="",
memories="",
reflections="",
decomposed_goal=decomposed_goal,
persona=persona,
secrets_available=self.secret_store is not None,
include_memories=False,
include_reflections=False,
include_skills=False,
)
return sections.full_prompt

Expand Down
3 changes: 2 additions & 1 deletion core/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def build_prompt_sections(
secrets_available: bool = False,
include_memories: bool = True,
include_reflections: bool = True,
include_skills: bool = True,
) -> PromptSections:
"""Build all prompt sections with current config and dynamic context.

Expand Down Expand Up @@ -193,7 +194,7 @@ def build_prompt_sections(
memory_section = f"<memories>\n{memories}\n</memories>"

skills_section = ""
if skills_index:
if include_skills and skills_index:
skills_section = f"<available_skills>\n{skills_index}\n</available_skills>"

reflections_section = ""
Expand Down
7 changes: 4 additions & 3 deletions docs/content/docs/architecture.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ The agent becomes a **thin orchestrator**: it reads skill files, passes them to
The brain of MPA. Implements the LLM tool-use loop:

1. Load conversation history
2. Build the static system prompt (skills, character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn
3. Inject the live date/time, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees memories written mid-session, without mutating the cached prefix. (Memories live here, not in the snapshot, so a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.)
2. Build the static system prompt (character, personalia, active tools). In session mode this is snapshotted once per conversation (rebuilt after `/new`) and reused every turn, so the cacheable prefix stays stable; on Anthropic it is sent with a `cache_control` breakpoint so the tools + system prefix is not reprocessed each turn
3. Inject the live date/time, the skills index, the fresh relevance-ranked memories + task reflections, and any per-request execution plan at the start of the current user message — so the agent always knows "now" and sees skills/memories added mid-session, without mutating the cached prefix. (The skills index and memories live here, not in the snapshot, so a skill created mid-session — e.g. via skill-creator — or a fact extracted mid-session reaches the model on the very next turn instead of waiting for `/new`.) In session mode the skills index is skipped only when that exact block is already present in the replayed history (so the model still sees it); once a `/new` or compaction drops it, or a new/rebound skill changes it, it is re-sent — so unchanged turns with the block still in history cost nothing extra.
4. Call the LLM
5. Handle tool calls with permission checks
6. Save conversation turn and extract memories
Expand Down Expand Up @@ -124,7 +124,8 @@ Channel handler (telegram.py / whatsapp.py)
Agent Core (agent.py)
├── Load history from SQLite
├── Build system prompt (skills + character + personalia + memories)
├── Build static system prompt (character + personalia + tools)
├── Build per-turn preamble (date/time + skills index + memories + reflections)
├── Call LLM with tools
│ │
│ ▼
Expand Down
27 changes: 17 additions & 10 deletions docs/content/docs/skills.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Skills are the central design idea of MPA. Instead of hardcoding how each integr
## How it works

1. Skill files are markdown documents stored in SQLite (seeded from `skills/` at startup)
2. On each conversation turn, relevant skills are injected into the LLM's system prompt
2. The skills index is injected at the start of the user message (not the snapshotted system prompt), so skills added mid-session are visible immediately. In session mode it is skipped when that exact block is still present in the replayed history, and re-sent once a `/new`/compaction drops it or a skill change alters it
3. The LLM reads the skill documentation and constructs the correct CLI commands
4. Commands are executed via the tool executor with permission checks

Expand Down Expand Up @@ -125,11 +125,12 @@ class SkillsEngine:
return "\n\n".join(sections)
```

Skills are wrapped in `<skill>` XML tags and injected into the `<available_skills>` block of the system prompt. The LLM uses these to construct correct CLI commands.
Skills are advertised in the `<available_skills>` block, which is rebuilt fresh each turn and injected at the start of the user message (alongside the live date/time and memories), not baked into the snapshotted static prompt. So a skill created mid-session — e.g. via the skill-creator skill — is visible to the model on the very next turn without a `/new`. The LLM uses the index to pick a skill, then `load_skill` pulls its full content to construct correct CLI commands.

## System prompt integration
## How skills reach the model

The system prompt combines skills with identity and memory:
The **static** system prompt carries only identity and tools (snapshotted once per
session for cache stability):

```
You are {agent_name}, a personal AI assistant for {owner_name}.
Expand All @@ -141,19 +142,25 @@ You are {agent_name}, a personal AI assistant for {owner_name}.
<character>
{character.md content}
</character>
```

The `<available_skills>` index — and `<memories>` — are assembled fresh **per turn**
in the preamble at the head of the current user message, not in the static prompt
(see [Architecture](/docs/architecture)). That is what lets a skill added mid-session
show up on the next turn without a `/new`:

<memories>
{formatted memory context}
</memories>
```
[Current date & time: ...]

<available_skills>
<skill name="himalaya-email">
{skill content}
</skill>
- himalaya-email: send and read email over IMAP/SMTP
...
</available_skills>
```

The model picks a skill from the index, then calls `load_skill` to pull its full
content.

## Tips for writing good skills

- **Be specific** — include exact command syntax with flags
Expand Down
73 changes: 73 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,79 @@ async def test_mid_session_memory_visible_next_turn_without_new(agent) -> None:
assert await agent._session_system_prompt("telegram", "u1", "") == snapshot


@pytest.mark.asyncio
async def test_mid_session_skill_visible_next_turn_without_new(agent) -> None:
"""A skill added mid-session must reach the model on the next turn (#46).

The skills index rides the per-turn preamble, so a skill created mid-session
(e.g. via skill-creator) is advertised immediately — even though the static
session system prompt is snapshotted once and never rebuilt mid-session.
"""
# Snapshot the static prompt: it must NOT carry the skills index at all.
snapshot = await agent._session_system_prompt("telegram", "u1", "")
assert "available_skills" not in snapshot

# A skill created after the snapshot (the staleness scenario from #46).
await agent.skills.store.upsert_skill(
"weather", "---\nname: weather\ndescription: fetch the forecast\n---\nbody"
)

# Next turn's preamble advertises it — no /new, no snapshot rebuild.
preamble = await agent._turn_preamble(None, query="what's the weather?")
assert "<available_skills>" in preamble
assert "weather" in preamble

# Snapshot is still the frozen one (cache intact, not rebuilt).
assert await agent._session_system_prompt("telegram", "u1", "") == snapshot


@pytest.mark.asyncio
async def test_skills_index_resent_only_when_changed(agent) -> None:
"""In session mode the skills index rides the preamble only when it isn't
already in the replayed history (#46 follow-up): an unchanged index sits in
history from a prior turn, so re-sending it would just accumulate copies. The
gate reads the real history, so it stays correct across changes and clears.
"""
ch, uid, cid = "telegram", "u1", ""
key = (ch, uid, cid)

async def persist(msg: dict) -> None:
# Mimic what _process_session does: the preamble-bearing user message is
# appended to the session, becoming visible to later turns.
await agent.history.append_session_message(ch, uid, msg, cid)

await agent.skills.store.upsert_skill("weather", "# weather\nfetch the forecast")

# Turn 1: index new for this session → included; persist it as a real turn would.
first = await agent._turn_preamble(None, query="hi", session_key=key)
assert "<available_skills>" in first and "weather" in first
await persist({"role": "user", "content": first})

# Turn 2, registry unchanged → omitted (the block is already in history).
second = await agent._turn_preamble(None, query="again", session_key=key)
assert "<available_skills>" not in second

# A new skill changes the index → re-sent (the old block no longer matches).
await agent.skills.store.upsert_skill("news", "# news\nread headlines")
third = await agent._turn_preamble(None, query="more", session_key=key)
assert "<available_skills>" in third and "news" in third
await persist({"role": "user", "content": third})

# Unchanged again → omitted.
fourth = await agent._turn_preamble(None, query="more", session_key=key)
assert "<available_skills>" not in fourth

# /new (or compaction) empties the history → the only copy is gone → re-sent.
await agent.history.clear_session(ch, uid, cid)
fifth = await agent._turn_preamble(None, query="fresh", session_key=key)
assert "<available_skills>" in fifth

# No session key (injection mode / tests) → always included, never gated.
a = await agent._turn_preamble(None, query="x")
b = await agent._turn_preamble(None, query="x")
assert "<available_skills>" in a and "<available_skills>" in b


# ---------------------------------------------------------------------------
# Per-action write state — one write's outcome must not block a different one
# ---------------------------------------------------------------------------
Expand Down
Loading