From 11722f0fe928586d6a601a5e9fbce41990f19c48 Mon Sep 17 00:00:00 2001
From: Raymond Christopher <raymond.christopher@gdplabs.id>
Date: Thu, 1 Jan 2026 10:00:23 +0700
Subject: [PATCH 1/4] chore: update documentation and enhance agent guide

- Clean up whitespace in `AGENT_GUIDE.md` to improve readability.
- Add a new section in `PR-PLANS.md` outlining the project's objective for a local-first task manager.
- Revise the recommended execution order in `PR-PLANS.md` to prioritize observability and UX improvements.
- Update `INDEX.md` to include new PR specifications for agent tool-calling and observability.

These changes aim to enhance clarity in documentation and better outline project goals and execution strategies.
---
 docs/02-implementation/AGENT_GUIDE.md         |   4 +-
 docs/02-implementation/PR-PLANS.md            | 392 ++++---
 docs/02-implementation/pr-specs/INDEX.md      |   6 +
 .../pr-specs/PR-001-db-config.md              | 502 ++-------
 .../pr-specs/PR-002-task-crud-api.md          | 205 ++--
 .../pr-specs/PR-003-llm-chat-backbone.md      | 237 ++---
 .../pr-specs/PR-003B-agent-tool-calling.md    | 626 +++++++++++
 .../PR-004-attachments-link-detection.md      | 292 ++----
 .../pr-specs/PR-005-rag-semantic-search.md    | 134 +--
 .../pr-specs/PR-006-gmail-integration.md      | 324 ++----
 .../pr-specs/PR-007-github-integration.md     | 353 ++-----
 .../pr-specs/PR-008-interactive-tui.md        | 211 ++--
 .../pr-specs/PR-009-cli-subcommands.md        | 328 ++----
 .../pr-specs/PR-010-web-ui.md                 | 122 ++-
 .../pr-specs/PR-011-notifications.md          | 153 +--
 .../pr-specs/PR-012-deployment-docs.md        | 112 +-
 .../pr-specs/PR-013-event-system.md           | 429 ++++++++
 .../PR-014-multi-agent-orchestration.md       | 970 ++++++++++++++++++
 .../pr-specs/PR-015-agent-ux-panel.md         | 108 ++
 .../pr-specs/PR-016-observability-baseline.md | 113 ++
 .../pr-specs/PR-017-db-config-followups.md    | 128 +++
 docs/02-implementation/pr-specs/TEMPLATE.md   |  22 +-
 22 files changed, 3498 insertions(+), 2273 deletions(-)
 create mode 100644 docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md
 create mode 100644 docs/02-implementation/pr-specs/PR-013-event-system.md
 create mode 100644 docs/02-implementation/pr-specs/PR-014-multi-agent-orchestration.md
 create mode 100644 docs/02-implementation/pr-specs/PR-015-agent-ux-panel.md
 create mode 100644 docs/02-implementation/pr-specs/PR-016-observability-baseline.md
 create mode 100644 docs/02-implementation/pr-specs/PR-017-db-config-followups.md

diff --git a/docs/02-implementation/AGENT_GUIDE.md b/docs/02-implementation/AGENT_GUIDE.md
index 5093701..ce780be 100644
--- a/docs/02-implementation/AGENT_GUIDE.md
+++ b/docs/02-implementation/AGENT_GUIDE.md
@@ -203,7 +203,7 @@ schedule = ["24h", "6h"]
 @property
 def database_path(self) -> Path:
     """Get canonical database file path.
-    
+
     Automatically strips query parameters (e.g., ?mode=ro) from SQLite URLs
     before extracting the file path. This prevents invalid file paths when
     URLs include query parameters.
@@ -277,7 +277,7 @@ def init_db() -> None:
 
 async def init_db_async() -> None:
     """Initialize database asynchronously (for FastAPI lifespan).
-    
+
     Runs migrations in a threadpool using asyncio.to_thread() to avoid
     blocking the event loop.
     """
diff --git a/docs/02-implementation/PR-PLANS.md b/docs/02-implementation/PR-PLANS.md
index 0ac7258..2ddc8d2 100644
--- a/docs/02-implementation/PR-PLANS.md
+++ b/docs/02-implementation/PR-PLANS.md
@@ -2,19 +2,25 @@
 
 **Status:** Spec Complete | Implementation In Progress
 **Last Reviewed:** 2025-12-31
-**Total PRs:** 17 (PR-001 through PR-012, plus breakdowns: PR-003a/b/c, PR-005a/b)
+**Total PRs:** 17 (PR-001 through PR-012, plus PR-003B and PR-013 through PR-016)
 
 ## Overview
 
 This document tracks all planned Pull Requests for the TaskGenie project. The plan follows an **Interactive TUI-First** strategy, with chat as the primary capability within the interactive interface.
 
+## Objective (North Star)
+
+Ship a **local-first task manager** where a user can run `tgenie`, manage tasks in an interactive TUI, and use chat as the primary workflow (with RAG + integrations layered on later).
+
 **Strategy:**
 1.  **Foundation:** DB + Task API (unblocks everything)
-2.  **UX First:** Interactive TUI early, iterate fast on usability
-3.  **Chat Next:** Add LLM chat inside the TUI once the UX shell exists
-4.  **Feature Delivery:** Add attachments, then ship the earliest “tryable” features (Notifications and/or Integrations)
-5.  **Intelligence:** RAG + semantic search to level-up chat and discovery
-6.  **Polish:** Web UI (optional) + deployment/docs
+2.  **Observability:** Structured logs + telemetry early to de-risk debugging
+3.  **UX First:** Interactive TUI early, iterate fast on usability
+4.  **Chat Next:** Add LLM chat inside the TUI once the UX shell exists
+5.  **Agent Foundations:** Tool-calling + event system for safe actions and updates
+6.  **Feature Delivery:** Add attachments, then ship the earliest “tryable” features (Notifications and/or Integrations)
+7.  **Intelligence:** RAG + semantic search to level-up chat and discovery
+8.  **Polish:** Agent UX + Web UI (optional) + deployment/docs
 
 > **Note:** The primary UX is an interactive TUI entered via `tgenie` (default), with chat as the main mode. Non-interactive subcommands (`tgenie add`, `tgenie list`, etc.) exist for scripting and automation.
 
@@ -25,32 +31,36 @@ This document tracks all planned Pull Requests for the TaskGenie project. The pl
 - Interactive TUI: `docs/01-design/DESIGN_TUI.md`
 - Background jobs (no queue MVP): `docs/01-design/DESIGN_BACKGROUND_JOBS.md`
 - Core architecture: `docs/01-design/DESIGN_ARCHITECTURE.md`
+- Skill enrichment tags: `docs/02-implementation/SKILL_ENRICHMENT_SUMMARY.md`
 
 ## Recommended Execution Order (UX-First)
 
 This sequence prioritizes **something usable early** (good UX) and then adds capabilities bit-by-bit.
 
 | Seq | PR | Title | Why now? | Depends on | Skill Enrichment |
-|---:|---|---|---|---|
+|---:|---|---|---|---|---|
 | 1 | PR-001 | Database & Configuration | Foundation + migrations | - | - |
-| 2 | PR-002 | Task CRUD API | Core workflows + enables clients | PR-001 | api-testing |
-| 3 | PR-008 | Interactive TUI (Tasks MVP) | Validate UX early | PR-002 | tui-dev |
-| 4 | PR-003a | LLM Provider Abstraction | Provider configuration foundation | PR-001 | - |
-| 5 | PR-003b | Streaming Chat API | API surface for chat | PR-001, PR-002, PR-003a | api-testing |
-| 6 | PR-003c | TUI Chat Integration | Make chat real inside TUI | PR-002, PR-003a, PR-003b | tui-dev |
-| 7 | PR-009 | CLI Subcommands (Secondary) | Scriptable workflows | PR-002 | task-workflow |
-| 8 | PR-004 | Attachments + Link Detection | Context capture for real work | PR-002 | task-workflow |
-| 9 | PR-011 | Notifications | Early "daily value" | PR-002 | task-workflow |
+| 2 | PR-016 | Observability Baseline | De-risk debugging early | PR-001 | - |
+| 3 | PR-002 | Task CRUD API | Core workflows + enables clients | PR-001 | api-testing |
+| 4 | PR-008 | Interactive TUI (Tasks MVP) | Validate UX early | PR-002 | tui-dev |
+| 5 | PR-003 | LLM + Chat Backbone | Make chat real (provider + API + TUI) | PR-001, PR-002, PR-008 | api-testing, tui-dev |
+| 6 | PR-004 | Attachments + Link Detection | Context capture for real work | PR-002 | task-workflow |
+| 7 | PR-013 | Event System + Realtime Updates | Enable subscriptions + hooks | PR-002 | - |
+| 8 | PR-003B | Agent Tool-Calling Foundation | Safe tool execution | PR-003, PR-002, PR-004 | - |
+| 9 | PR-011 | Notifications | Early \"daily value\" | PR-002 | task-workflow |
 | 10 | PR-007 | GitHub Integration | High-value for dev tasks | PR-004 | integration-setup |
 | 11 | PR-006 | Gmail Integration | High-value, higher complexity | PR-004 | integration-setup |
-| 12 | PR-005a | ChromaDB + Embeddings | Vector store foundation | PR-001, PR-004 | rag-testing |
-| 13 | PR-005b | Semantic Search + RAG | Better recall + better chat | PR-001, PR-003b, PR-004, PR-005a | rag-testing, context-optimization, context-compression |
-| 14 | PR-010 | Web UI | Secondary UX for rich preview | PR-002 (chat optional: PR-003c) | - |
-| 15 | PR-012 | Deployment + Docs | Make it easy to run/share | PR-010, PR-011 | - |
+| 12 | PR-005 | RAG + Semantic Search | Better recall + better chat | PR-003, PR-004 | rag-testing, context-optimization, context-compression |
+| 13 | PR-014 | Multi-Agent Orchestration | Coordinated agent runs | PR-003B, PR-013 | - |
+| 14 | PR-015 | Agent UX Panel | Visibility + controls | PR-008, PR-003B, PR-013, PR-014 | - |
+| 15 | PR-009 | CLI Subcommands (Secondary) | Scriptable workflows + agent CLI | PR-002, PR-003B | task-workflow |
+| 16 | PR-010 | Web UI | Secondary UX for rich preview | PR-002 (chat optional: PR-003) | - |
+| 17 | PR-012 | Deployment + Docs | Make it easy to run/share | PR-010, PR-011 | - |
 
 Notes:
-- You can swap **Seq 7–9** based on what you can test earliest (notifications vs integrations).
+- You can swap **Seq 9–12** based on what you can test earliest (notifications vs integrations vs RAG).
 - PR-010 can be started earlier for task pages, but chat streaming needs PR-003.
+- PR-015 depends on PR-014 for agent run endpoints.
 - Specs (with test scenarios): `pr-specs/INDEX.md`
 
 ## PR Dependency Diagram
@@ -58,60 +68,141 @@ Notes:
 ```mermaid
 flowchart TD
   PR001["PR-001: Database & Config"]
+  PR016["PR-016: Observability Baseline"]
   PR002["PR-002: Task CRUD API"]
   PR008["PR-008: Interactive TUI (Tasks MVP)"]
-  PR003a["PR-003a: LLM Provider"]
-  PR003b["PR-003b: Streaming Chat API"]
-  PR003c["PR-003c: TUI Chat Integration"]
-  PR009["PR-009: CLI Subcommands"]
+  PR003["PR-003: LLM + Chat Backbone"]
   PR004["PR-004: Attachments + Link Detection"]
+  PR013["PR-013: Event System + Realtime Updates"]
+  PR003B["PR-003B: Agent Tool-Calling"]
   PR011["PR-011: Notifications"]
   PR007["PR-007: GitHub Integration"]
   PR006["PR-006: Gmail Integration"]
-  PR005a["PR-005a: ChromaDB + Embeddings"]
-  PR005b["PR-005b: Semantic Search + RAG"]
+  PR005["PR-005: RAG + Semantic Search"]
+  PR014["PR-014: Multi-Agent Orchestration"]
+  PR015["PR-015: Agent UX Panel"]
+  PR009["PR-009: CLI Subcommands"]
   PR010["PR-010: Web UI"]
   PR012["PR-012: Deployment + Docs"]
 
+  PR001 --> PR016
   PR001 --> PR002
   PR002 --> PR008
-  PR001 --> PR003a
-  PR002 --> PR003b
-  PR003a --> PR003b
-  PR002 --> PR003c
-  PR003a --> PR003c
-  PR003b --> PR003c
-  PR002 --> PR009
+  PR001 --> PR003
+  PR002 --> PR003
+  PR008 --> PR003
   PR002 --> PR004
+  PR002 --> PR013
+  PR003 --> PR003B
+  PR002 --> PR003B
+  PR004 --> PR003B
   PR004 --> PR007
   PR004 --> PR006
-  PR001 --> PR005a
-  PR004 --> PR005a
+  PR003 --> PR005
+  PR004 --> PR005
   PR002 --> PR011
-  PR001 --> PR005b
-  PR003b --> PR005b
-  PR004 --> PR005b
-  PR005a --> PR005b
+  PR003B --> PR014
+  PR013 --> PR014
+  PR008 --> PR015
+  PR003B --> PR015
+  PR013 --> PR015
+  PR014 --> PR015
+  PR003B --> PR009
   PR002 --> PR010
-  PR003c -. "chat UI (optional)" .-> PR010
+  PR003 -. "chat UI (optional)" .-> PR010
   PR010 --> PR012
   PR011 --> PR012
 ```
 
-Notes:
-- PR-003 has been split into PR-003a (provider), PR-003b (API), PR-003c (TUI integration).
-- PR-005 has been split into PR-005a (indexing) and PR-005b (search + RAG).
-- PR-010 can ship "tasks-only" early; chat streaming waits on PR-003c.
-
 Notes:
 - Edges reflect planned dependency relationships.
 - PR-010 can ship “tasks-only” early; chat streaming waits on PR-003.
 
-## Phase 1: Foundation + UX MVP (Weeks 1-2)
+## Dependency Diagrams by Phase
+
+These diagrams break the full dependency graph into smaller, phase-focused views.
+
+### Phase 1-3 (Foundation, UX, Chat, Attachments, Agent Foundations)
+
+```mermaid
+flowchart TD
+  PR001["PR-001: Database & Config"]
+  PR016["PR-016: Observability Baseline"]
+  PR002["PR-002: Task CRUD API"]
+  PR008["PR-008: Interactive TUI"]
+  PR003["PR-003: LLM + Chat Backbone"]
+  PR004["PR-004: Attachments + Link Detection"]
+  PR013["PR-013: Event System"]
+  PR003B["PR-003B: Agent Tool-Calling"]
+
+  PR001 --> PR016
+  PR001 --> PR002
+  PR002 --> PR008
+  PR001 --> PR003
+  PR002 --> PR003
+  PR008 --> PR003
+  PR002 --> PR004
+  PR002 --> PR013
+  PR003 --> PR003B
+  PR004 --> PR003B
+  PR002 --> PR003B
+```
+
+### Phase 4-6 (Early Value, Intelligence, Agent Orchestration + UX)
+
+```mermaid
+flowchart TD
+  PR002["PR-002: Task CRUD API"]
+  PR004["PR-004: Attachments + Link Detection"]
+  PR003["PR-003: LLM + Chat Backbone"]
+  PR013["PR-013: Event System"]
+  PR003B["PR-003B: Agent Tool-Calling"]
+  PR008["PR-008: Interactive TUI"]
+  PR011["PR-011: Notifications"]
+  PR007["PR-007: GitHub Integration"]
+  PR006["PR-006: Gmail Integration"]
+  PR005["PR-005: RAG + Semantic Search"]
+  PR014["PR-014: Multi-Agent Orchestration"]
+  PR015["PR-015: Agent UX Panel"]
+
+  PR002 --> PR011
+  PR004 --> PR007
+  PR004 --> PR006
+  PR003 --> PR005
+  PR004 --> PR005
+  PR003B --> PR014
+  PR013 --> PR014
+  PR008 --> PR015
+  PR003B --> PR015
+  PR013 --> PR015
+  PR014 --> PR015
+```
+
+### Phase 7-8 (Secondary UIs + Scripting, Deploy + Docs)
+
+```mermaid
+flowchart TD
+  PR002["PR-002: Task CRUD API"]
+  PR003B["PR-003B: Agent Tool-Calling"]
+  PR009["PR-009: CLI Subcommands"]
+  PR010["PR-010: Web UI"]
+  PR011["PR-011: Notifications"]
+  PR012["PR-012: Deployment + Docs"]
+  PR003["PR-003: LLM + Chat Backbone"]
+
+  PR002 --> PR009
+  PR003B --> PR009
+  PR002 --> PR010
+  PR003 -. "chat UI (optional)" .-> PR010
+  PR010 --> PR012
+  PR011 --> PR012
+```
+
+## Phase 1: Foundation + Observability + UX MVP (Weeks 1-2)
 
 ### PR-001: Database & Configuration Setup
 **Branch:** `feature/db-config`
-**Status:** ⬜ Not Started
+**Status:** ✅ Implemented
 **Description:** Initialize SQLite database with migrations and environment configuration.
 **Spec:** `pr-specs/PR-001-db-config.md`
 **Files to modify:**
@@ -123,6 +214,16 @@ Notes:
 - [ ] Environment variables load correctly
 - [ ] Tests pass for database operations
 
+### PR-016: Observability Baseline
+**Branch:** `feature/observability-baseline`
+**Status:** ⬜ Not Started
+**Dependency:** PR-001
+**Description:** Structured JSON logs, request IDs, and a lightweight telemetry endpoint.
+**Spec:** `pr-specs/PR-016-observability-baseline.md`
+**Acceptance Criteria:**
+- [ ] Logs include request IDs in JSON format
+- [ ] `/api/v1/telemetry` reports DB health and migration version
+
 ### PR-002: Task CRUD API Endpoints
 **Branch:** `feature/task-crud`
 **Status:** ⬜ Not Started
@@ -144,8 +245,8 @@ Notes:
 **Description:** The `tgenie` command entry point. Ships an interactive TUI early for core task workflows (chat can be a stub until PR-003).
 **Spec:** `pr-specs/PR-008-interactive-tui.md`
 **Files to modify:**
-- `backend/cli/main.py`
-- `backend/cli/chat_repl.py`
+- `backend/cli/main.py` - launch the TUI by default
+- `backend/cli/tui/*` - Textual app, screens/widgets, API client
 **Acceptance Criteria:**
 - [ ] `tgenie` opens interactive TUI
 - [ ] Core task flows work end-to-end (create/list/show/edit/done)
@@ -155,52 +256,17 @@ Notes:
 
 ## Phase 2: Chat + Attachments (Weeks 3-4)
 
-### PR-003a: LLM Provider Abstraction & Configuration
-**Branch:** `feature/llm-provider`
-**Status:** ⬜ Not Started
-**Dependency:** PR-001
-**Description:** Implement core LLM provider abstraction and configuration system.
-**Spec:** `pr-specs/PR-003a-llm-provider.md`
-**Files to modify:**
-- `backend/services/llm_service.py` - LLM Provider logic
-- `backend/config.py` - LLM configuration
-**Acceptance Criteria:**
-- [ ] `LLMService` class implements stream_chat interface
-- [ ] Configuration loads from env vars and config file
-- [ ] Missing API key raises clear `ValueError`
-- [ ] OpenRouter integration works end-to-end
-
-### PR-003b: Streaming Chat API Endpoint
-**Branch:** `feature/streaming-chat-api`
+### PR-003: LLM + Chat Backbone
+**Branch:** `feature/llm-chat-backbone`
 **Status:** ⬜ Not Started
-**Dependency:** PR-001, PR-002, PR-003a
-**Description:** Create FastAPI streaming chat endpoint with SSE.
-**Spec:** `pr-specs/PR-003b-streaming-chat-api.md`
-**Files to modify:**
-- `backend/api/chat.py` - Chat endpoint
-- `backend/schemas/chat.py` - Request/response schemas
+**Dependency:** PR-001, PR-002, PR-008
+**Description:** Provider configuration + streaming API + TUI wiring (tool-calling is in PR-003B).
+**Spec:** `pr-specs/PR-003-llm-chat-backbone.md`
 **Acceptance Criteria:**
-- [ ] `POST /api/v1/chat` endpoint exists
-- [ ] SSE format matches spec (`data:` prefix, `[DONE]` terminator)
-- [ ] Missing LLM API key returns 500 with clear error
-- [ ] OpenAPI docs include SSE protocol explanation
-
-### PR-003c: TUI Chat Integration
-**Branch:** `feature/tui-chat`
-**Status:** ⬜ Not Started
-**Dependency:** PR-002, PR-003a, PR-003b
-**Description:** Integrate chat functionality into interactive TUI from PR-008.
-**Spec:** `pr-specs/PR-003c-tui-chat-integration.md`
-**Files to modify:**
-- `backend/cli/tui/widgets/chat_panel.py` - Chat widget
-- `backend/cli/tui/screens/main.py` - Integrate chat panel
-- `backend/cli/tui/client.py` - Streaming support
-**Acceptance Criteria:**
-- [ ] Chat panel widget exists with input + message display
-- [ ] User messages appear immediately after sending
-- [ ] AI responses stream in real-time
-- [ ] Missing LLM API key shows clear error modal
-- [ ] Chat history persists for session
+- [ ] Chat endpoint returns a streaming response.
+- [ ] TUI can send a message and display a streamed reply.
+- [ ] Missing/invalid API key results in a clear error message (in TUI and API).
+- [ ] Provider selection (model/base_url) works via config.
 
 ### PR-004: Attachment API & Link Detection
 **Branch:** `feature/attachments`
@@ -218,7 +284,32 @@ Notes:
 
 ---
 
-## Phase 3: Early Value Track (Weeks 5-6)
+## Phase 3: Agent Foundations + Events (Weeks 5-6)
+
+### PR-013: Event System + Realtime Updates
+**Branch:** `feature/event-system`
+**Status:** ⬜ Not Started
+**Dependency:** PR-002
+**Description:** Event log + SSE stream for task/attachment lifecycle events.
+**Spec:** `pr-specs/PR-013-event-system.md`
+**Acceptance Criteria:**
+- [ ] Task and attachment changes emit events
+- [ ] SSE stream supports reconnect with `Last-Event-ID`
+
+### PR-003B: Agent Tool-Calling Foundation
+**Branch:** `feature/agent-tools`
+**Status:** ⬜ Not Started
+**Dependency:** PR-003, PR-002, PR-004
+**Description:** Tool schema + execution pipeline for safe agent actions.
+**Spec:** `pr-specs/PR-003B-agent-tool-calling.md`
+**Acceptance Criteria:**
+- [ ] Tool schema exposed to LLM provider
+- [ ] Tool calls execute with validation and timeouts
+- [ ] Destructive tools require confirmation
+
+---
+
+## Phase 4: Early Value Track (Weeks 7-8)
 
 This phase is intentionally flexible: pick what’s easiest to validate early from a user POV.
 
@@ -259,57 +350,58 @@ This phase is intentionally flexible: pick what’s easiest to validate early fr
 
 ---
 
-## Phase 4: Intelligence (Weeks 8-9)
+## Phase 5: Intelligence (Weeks 9-10)
 
-### PR-005a: ChromaDB Setup & Embeddings Pipeline
-**Branch:** `feature/chromadb-indexing`
+### PR-005: RAG + Semantic Search
+**Branch:** `feature/rag-semantic-search`
 **Status:** ⬜ Not Started
-**Dependency:** PR-001, PR-004
-**Description:** Implement ChromaDB vector store and embedding service with sentence-transformers.
-**Spec:** `pr-specs/PR-005a-chromadb-embeddings.md`
-**Files to modify:**
-- `backend/services/rag_service.py` - ChromaDB setup
-- `backend/services/embedding_service.py` - Embedding generation
-- `backend/config.py` - ChromaDB configuration
+**Dependency:** PR-003, PR-004
+**Description:** Vector store + indexing + semantic search + chat context injection (single PR spec for now; see “Potential Future Splits”).
+**Spec:** `pr-specs/PR-005-rag-semantic-search.md`
+**Acceptance Criteria:**
+- [ ] Tasks and cached attachments are embedded and indexed automatically.
+- [ ] Semantic search returns relevant results for representative queries.
+- [ ] Chat responses cite retrieved task/attachment context where applicable.
+
+---
+
+## Phase 6: Agent Orchestration + UX (Weeks 11-12)
+
+### PR-014: Multi-Agent Orchestration
+**Branch:** `feature/multi-agent`
+**Status:** ⬜ Not Started
+**Dependency:** PR-003B, PR-013
+**Description:** Run management and coordination for multi-agent workflows.
+**Spec:** `pr-specs/PR-014-multi-agent-orchestration.md`
 **Acceptance Criteria:**
-- [ ] `EmbeddingService` generates 384-dimension embeddings
-- [ ] `RAGService` creates ChromaDB collection on first use
-- [ ] Task indexing works on create/update
-- [ ] Attachment content is included in parent task's document
-- [ ] Batch indexing processes multiple tasks efficiently
-
-### PR-005b: Semantic Search API & RAG Context Injection
-**Branch:** `feature/semantic-search-rag`
+- [ ] Agent runs can be started, paused, and canceled
+- [ ] Run status is persisted and queryable
+
+### PR-015: Agent UX Panel
+**Branch:** `feature/agent-ux`
 **Status:** ⬜ Not Started
-**Dependency:** PR-001, PR-003b, PR-004, PR-005a
-**Description:** Implement semantic search API endpoint and RAG context injection for chat.
-**Spec:** `pr-specs/PR-005b-semantic-search-rag.md`
-**Files to modify:**
-- `backend/api/search.py` - Semantic search endpoint
-- `backend/services/rag_service.py` - Search and context building
-- `backend/api/chat.py` - RAG context integration
+**Dependency:** PR-008, PR-003B, PR-013, PR-014
+**Description:** TUI panel for agent status, tool execution, and controls.
+**Spec:** `pr-specs/PR-015-agent-ux-panel.md`
 **Acceptance Criteria:**
-- [ ] `GET /api/v1/search/semantic` endpoint exists
-- [ ] Search returns relevant results sorted by similarity
-- [ ] Filters (status, priority) work correctly
-- [ ] RAG context builder includes task metadata
-- [ ] Context truncation respects token budget
-- [ ] Chat endpoint injects RAG context into prompts
+- [ ] Agent panel updates in real time
+- [ ] Pause/resume/cancel controls work
 
 ---
 
-## Phase 5: Secondary UIs + Scripting (Weeks 9-10)
+## Phase 7: Secondary UIs + Scripting (Weeks 13-14)
 
 ### PR-009: CLI Standard Commands (Secondary)
 **Branch:** `feature/cli-commands`
 **Status:** ⬜ Not Started
-**Dependency:** PR-002
-**Description:** Standard commands (`add`, `list`, `edit`) for scripting/power users.
+**Dependency:** PR-002, PR-003B
+**Description:** Standard commands for scripting plus agent run/status commands.
 **Spec:** `pr-specs/PR-009-cli-subcommands.md`
 **Files to modify:**
 - `backend/cli/commands.py`
 **Acceptance Criteria:**
 - [ ] `tgenie list`, `tgenie add` work as subcommands (for scripting)
+- [ ] `tgenie agent run` and `tgenie agent status` return usable results
 - [ ] Rich terminal output
 
 ### PR-010: Web UI (Chat & Tasks)
@@ -328,7 +420,7 @@ This phase is intentionally flexible: pick what’s easiest to validate early fr
 
 ---
 
-## Phase 6: Deploy + Docs (Weeks 11-12)
+## Phase 8: Deploy + Docs (Weeks 15-16)
 
 ### PR-012: Deployment & Documentation
 **Branch:** `feature/deploy`
@@ -346,24 +438,34 @@ This phase is intentionally flexible: pick what’s easiest to validate early fr
 
 | Phase | Focus | Weeks | Key PRs |
 |-------|-------|--------|----------|
-| **1** | **Foundation + UX MVP** | 1-2 | PR-001 (DB), PR-002 (Task API), PR-008 (TUI Tasks) |
-| **2** | **Chat + Attachments** | 3-4 | PR-003a (Provider), PR-003b (API), PR-003c (TUI), PR-004 (Attachments) |
-| **3** | **Early Value Track** | 5-6 | PR-011 (Notifications) and/or PR-007 (GitHub) / PR-006 (Gmail) |
-| **4** | **Intelligence** | 8-9 | PR-005a (Indexing), PR-005b (Search+RAG) |
-| **5** | **Secondary UIs** | 10-11 | PR-009 (CLI subcommands), PR-010 (Web UI) |
-| **6** | **Deploy + Docs** | 12 | PR-012 (Deployment & Docs) |
-
-**Total Estimated Effort:** ~150 hours (~18-20 weeks for one developer)
+| **1** | **Foundation + Observability + UX MVP** | 1-2 | PR-001 (DB), PR-016 (Observability), PR-002 (Task API), PR-008 (TUI Tasks) |
+| **2** | **Chat + Attachments** | 3-4 | PR-003 (Chat backbone), PR-004 (Attachments) |
+| **3** | **Agent Foundations + Events** | 5-6 | PR-013 (Events), PR-003B (Tool-calling) |
+| **4** | **Early Value Track** | 7-8 | PR-011 (Notifications) and/or PR-007 (GitHub) / PR-006 (Gmail) |
+| **5** | **Intelligence** | 9-10 | PR-005 (RAG + Semantic Search) |
+| **6** | **Agent Orchestration + UX** | 11-12 | PR-014 (Multi-agent), PR-015 (Agent UX) |
+| **7** | **Secondary UIs + Scripting** | 13-14 | PR-009 (CLI subcommands), PR-010 (Web UI) |
+| **8** | **Deploy + Docs** | 15-16 | PR-012 (Deployment & Docs) |
 
-**Key Changes:**
-- **PR-003 split**: Provider (003a) → API (003b) → TUI (003c) for parallel work and focused testing
-- **PR-005 split**: Indexing (005a) + Search/RAG (005b) to separate technical concerns
+**Total Estimated Effort:** ~140 hours (≈16–18 weeks at ~8h/week, or ≈4 weeks full-time)
 
 **Skill Integration:**
-- `api-testing`: PR-002, PR-003b
-- `rag-testing`: PR-005a, PR-005b
+- `api-testing`: PR-002, PR-003
+- `rag-testing`: PR-005
 - `integration-setup`: PR-006, PR-007
-- `tui-dev`: PR-008, PR-003c
-- `context-optimization`: PR-005b
-- `context-compression`: PR-005b (future chat history)
+- `tui-dev`: PR-008, PR-003
+- `context-optimization`: PR-005
+- `context-compression`: PR-005 (future chat history)
 - `task-workflow`: PR-004, PR-009, PR-011
+
+## Potential Future Splits (Not Part of Current Specs)
+
+If additional splits are needed, prefer numeric suffixes (e.g., `PR-003-1`, `PR-003-2`) for easier sorting. PR-003B is reserved for the tool-calling foundation.
+
+- PR-003 (candidate):
+  - `PR-003-1`: LLM provider config + service abstraction
+  - `PR-003-2`: Streaming chat API (SSE)
+  - `PR-003-3`: TUI chat integration
+- PR-005 (candidate):
+  - `PR-005-1`: Vector store + embeddings + indexing
+  - `PR-005-2`: Semantic search + RAG context injection
diff --git a/docs/02-implementation/pr-specs/INDEX.md b/docs/02-implementation/pr-specs/INDEX.md
index ea66187..5612654 100644
--- a/docs/02-implementation/pr-specs/INDEX.md
+++ b/docs/02-implementation/pr-specs/INDEX.md
@@ -12,6 +12,7 @@ Design deep-dives live in `docs/01-design/` (notably `DESIGN_TUI.md`, `DESIGN_CH
 - [PR-001-db-config.md](PR-001-db-config.md) - Database, config, migrations, backup/restore
 - [PR-002-task-crud-api.md](PR-002-task-crud-api.md) - Task CRUD API
 - [PR-003-llm-chat-backbone.md](PR-003-llm-chat-backbone.md) - LLM service + chat API + TUI chat wiring
+- [PR-003B-agent-tool-calling.md](PR-003B-agent-tool-calling.md) - Tool-calling foundation for agents
 - [PR-004-attachments-link-detection.md](PR-004-attachments-link-detection.md) - Attachments API + link detection
 - [PR-005-rag-semantic-search.md](PR-005-rag-semantic-search.md) - ChromaDB/RAG + semantic search + chat context
 - [PR-006-gmail-integration.md](PR-006-gmail-integration.md) - Gmail URL/OAuth + content fetch/cache
@@ -21,3 +22,8 @@ Design deep-dives live in `docs/01-design/` (notably `DESIGN_TUI.md`, `DESIGN_CH
 - [PR-010-web-ui.md](PR-010-web-ui.md) - Web UI (tasks first, chat optional)
 - [PR-011-notifications.md](PR-011-notifications.md) - Notifications scheduling + delivery + history
 - [PR-012-deployment-docs.md](PR-012-deployment-docs.md) - Docker/deploy + docs
+- [PR-013-event-system.md](PR-013-event-system.md) - Event system + realtime updates
+- [PR-014-multi-agent-orchestration.md](PR-014-multi-agent-orchestration.md) - Multi-agent orchestration
+- [PR-015-agent-ux-panel.md](PR-015-agent-ux-panel.md) - TUI agent panel + controls
+- [PR-016-observability-baseline.md](PR-016-observability-baseline.md) - Structured logging + telemetry
+- [PR-017-db-config-followups.md](PR-017-db-config-followups.md) - DB config follow-up fixes
diff --git a/docs/02-implementation/pr-specs/PR-001-db-config.md b/docs/02-implementation/pr-specs/PR-001-db-config.md
index a09fb3a..8b39336 100644
--- a/docs/02-implementation/pr-specs/PR-001-db-config.md
+++ b/docs/02-implementation/pr-specs/PR-001-db-config.md
@@ -6,54 +6,43 @@
 
 ## Goal
 
-Establish a reliable local-first foundation:
-- configuration loading (env + config file)
-- SQLite database initialization
-- repeatable schema migrations
-- safe backup/restore workflows
+Establish the local-first foundation: config loading, SQLite initialization,
+migrations, and backup/restore workflows.
 
 ## User Value
 
 - The app starts consistently on any machine.
-- Schema changes are safe (no “delete DB and hope”).
-- Users can backup/restore with one or two commands.
+- Schema changes are safe and repeatable.
+- Users can back up and restore data with clear commands.
+
+## References
+
+- `docs/01-design/DESIGN_DATA.md`
+- `docs/01-design/DESIGN_BACKGROUND_JOBS.md`
+- `docs/01-design/DECISIONS.md`
 
 ## Scope
 
 ### In
 
-- Standardize config sources and precedence (e.g., env vars → `.env` → `~/.taskgenie/config.toml`).
-- Introduce migrations (recommended: Alembic) for SQLite schema evolution.
-- Add a simple DB CLI surface (either under `tgenie db ...` or a dedicated script):
-  - upgrade/downgrade migrations
-  - dump to `.sql`
-  - restore from `.sql` (with clear warnings)
-- Define canonical data paths (DB file, vector store, attachments cache).
+- Configuration sources and precedence (env vars, `.env`, user config, defaults).
+- Canonical app data directory and subpaths (DB, cache, vector store, attachments, logs).
+- Alembic migrations for SQLite and CLI wrappers for upgrade/downgrade/revision.
+- Backup and restore commands with explicit confirmation on restore.
+- FastAPI startup initializes the database and runs migrations when needed.
 
 ### Out
 
-- Building a full “admin console” for DB management.
-- Cross-database support (Postgres/MySQL). SQLite-first only.
+- Admin UI for DB management.
+- Non-SQLite databases.
 
 ## Mini-Specs
 
-- Config:
-  - precedence and validation (env + `.env` + `~/.taskgenie/config.toml`).
-- Paths:
-  - canonical app data dir and subpaths (DB, cache, logs, vector store).
-- Migrations:
-  - Alembic initialized and runnable for SQLite.
-  - wrapper CLI surface (`tgenie db upgrade|downgrade|revision`).
-- Backup/restore:
-  - dump to `.sql` and restore from `.sql` with explicit overwrite confirmation.
-- Docs:
-  - backup/restore + migration commands documented with examples.
-
-## References
-
-- `docs/01-design/DESIGN_DATA.md` (schema + backup/migration notes)
-- `docs/01-design/DESIGN_BACKGROUND_JOBS.md` (job persistence patterns)
-- `docs/01-design/DECISIONS.md` (TUI-first and migration approach)
+- Implement Pydantic Settings in `backend/config.py` with defined precedence.
+- Add path helpers for data directory resolution without creating directories at import time.
+- Initialize Alembic in `backend/migrations/` with SQLite-safe config and FK enforcement.
+- Provide `tgenie db` commands: upgrade, downgrade, revision, dump, restore (reset optional).
+- Document backup/restore and migration usage in `docs/`.
 
 ## User Stories
 
@@ -62,430 +51,87 @@ Establish a reliable local-first foundation:
 - As a user, I can dump my data to `backup.sql` and restore it on a new machine.
 - As a developer, I can reset my local DB quickly during iteration.
 
-## Technical Design
+## UX Notes (if applicable)
 
-### Config precedence
+- Destructive actions (restore/reset) require explicit confirmation.
 
-- Use **Pydantic Settings** for validation and environment management.
-- Recommended precedence (highest wins):
-  1. env vars
-  2. `.env` file (dev convenience)
-  3. config file (default: `~/.taskgenie/config.toml`)
-  4. built-in defaults
+## Technical Design
 
-### Data locations
+### Architecture
 
-- Define a canonical “app data dir” (configurable) for:
-  - DB file
-  - vector store
-  - attachment cache
-  - logs
-- Default to `~/.taskgenie/` to match the rest of the design docs.
-- (Optional) On Linux, consider XDG mappings later (config/data/state) if we want to be a better citizen.
+- Use Pydantic Settings with cached `get_settings()` and explicit precedence:
+  env vars > `.env` > `~/.taskgenie/config.toml` > defaults.
+- Provide a single resolver for app data paths (configurable via `TASKGENIE_DATA_DIR`).
+- FastAPI lifespan calls `init_db_async()` to avoid blocking the event loop.
 
-### Migrations (Alembic)
+### Data Model / Migrations
 
-- Add Alembic configuration in `backend/migrations/`.
-- Ensure `env.py` is configured for SQLite (handling `PRAGMA foreign_keys=ON`).
-- Support:
-  - `tgenie db upgrade [--rev head]` (wraps `alembic upgrade head`)
-  - `tgenie db downgrade --rev <rev>|-1`
-  - `tgenie db revision -m "..." --autogenerate`
+- Initialize Alembic under `backend/migrations/`.
+- Ensure SQLite foreign keys are enabled on every connection.
+- Migrations use `sqlite://` for Alembic even when runtime uses `sqlite+aiosqlite://`.
 
-### Backup/restore
+### API Contract
 
-- Support the standard SQLite `.dump` workflow.
-- If wrapped in CLI, require explicit confirmation on restore.
+- CLI surface under `tgenie db`:
+  `upgrade`, `downgrade`, `revision`, `dump`, `restore` (and optional `reset`).
 
-### DB CLI surface
+### Background Jobs
 
-- Provide (or plan) these commands:
-  - `tgenie db dump --out backup.sql`
-  - `tgenie db restore --in backup.sql` (confirm overwrite)
-  - `tgenie db reset --yes` (dev-only convenience)
+- N/A.
 
-## Acceptance Criteria
+### Security / Privacy
 
-### AC1: Fresh Install Creates Database Automatically ✅
+- Backup/restore commands do not log SQL contents.
+- Restore and reset require confirmation before overwrite.
 
-**Success Criteria:**
-- [ ] No database file exists initially
-- [ ] Running `tgenie db upgrade head` creates database at `~/.taskgenie/data/taskgenie.db` (or configured path)
-- [ ] All required tables are created: `tasks`, `attachments`, `notifications`, `chat_history`, `config`, `alembic_version`
-- [ ] FastAPI startup automatically runs migrations if DB doesn't exist
-- [ ] `/health` endpoint returns `200 OK` after startup
-
-**How to Test:**
-
-**Automated:**
-```python
-def test_fresh_install_creates_db(tmp_path, monkeypatch):
-    """Test that fresh install creates DB and tables."""
-    db_path = tmp_path / "test.db"
-    monkeypatch.setenv("DATABASE_URL", f"sqlite+aiosqlite:///{db_path}")
-
-    # Verify DB doesn't exist
-    assert not db_path.exists()
-
-    # Run upgrade
-    runner = CliRunner()
-    result = runner.invoke(db_app, ["upgrade", "head"])
-    assert result.exit_code == 0
-
-    # Verify DB created with all tables
-    assert db_path.exists()
-    conn = sqlite3.connect(str(db_path))
-    cursor = conn.cursor()
-    cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
-    tables = {row[0] for row in cursor.fetchall()}
-    assert "tasks" in tables
-    assert "attachments" in tables
-    assert "notifications" in tables
-    assert "alembic_version" in tables
-    conn.close()
-```
-
-**Manual:**
-```bash
-# 1. Remove existing DB (if any)
-rm ~/.taskgenie/data/taskgenie.db
-
-# 2. Run upgrade
-uv run tgenie db upgrade head
-
-# 3. Verify DB created
-ls -la ~/.taskgenie/data/taskgenie.db
-
-# 4. Start API and verify health
-uv run python -m backend.main &
-sleep 2
-curl http://localhost:8080/health
-# Expected: {"status": "ok", "version": "0.1.0"}
-```
-
----
-
-### AC2: Migrations Can Be Created and Applied ✅
+### Error Handling
 
-**Success Criteria:**
-- [ ] `tgenie db revision -m "..." --autogenerate` creates new migration file
-- [ ] `tgenie db upgrade head` applies all pending migrations
-- [ ] `tgenie db downgrade -1` reverts last migration (where feasible)
-- [ ] `alembic_version` table tracks current revision
-- [ ] Multiple sequential migrations apply correctly
-
-**How to Test:**
-
-**Automated:**
-```python
-def test_create_and_apply_migration(tmp_path, monkeypatch):
-    """Test creating and applying a migration."""
-    # Setup: Fresh DB at initial migration
-    runner.invoke(db_app, ["upgrade", "head"])
-
-    # Create new migration
-    result = runner.invoke(
-        db_app,
-        ["revision", "-m", "test migration", "--autogenerate"]
-    )
-    assert result.exit_code == 0
-
-    # Verify migration file created
-    versions_dir = Path("backend/migrations/versions")
-    migration_files = list(versions_dir.glob("*.py"))
-    assert len(migration_files) >= 2  # initial + new
-
-    # Apply migration
-    result = runner.invoke(db_app, ["upgrade", "head"])
-    assert result.exit_code == 0
-
-    # Verify alembic_version updated
-    conn = sqlite3.connect(str(db_path))
-    cursor = conn.cursor()
-    cursor.execute("SELECT version_num FROM alembic_version")
-    version = cursor.fetchone()[0]
-    assert version is not None
-    conn.close()
-```
-
-**Manual:**
-```bash
-# 1. Ensure DB is at head
-uv run tgenie db upgrade head
-
-# 2. Create a test migration (add a dummy column to tasks)
-# Edit backend/models/task.py to add a test column
-# Then generate migration:
-uv run tgenie db revision -m "add test column" --autogenerate
-
-# 3. Review generated migration file
-cat backend/migrations/versions/*_add_test_column.py
-
-# 4. Apply migration
-uv run tgenie db upgrade head
-
-# 5. Verify schema changed
-sqlite3 ~/.taskgenie/data/taskgenie.db ".schema tasks"
-
-# 6. (Optional) Test downgrade
-uv run tgenie db downgrade -1
-sqlite3 ~/.taskgenie/data/taskgenie.db ".schema tasks"  # Verify reverted
-```
-
----
-
-### AC3: Backup and Restore Work Correctly ✅
+- Missing optional config files are non-fatal with clear defaults.
+- Migration and backup/restore failures surface actionable CLI errors.
 
-**Success Criteria:**
-- [ ] `tgenie db dump --out backup.sql` creates SQL dump file
-- [ ] Dump file contains all table schemas and data
-- [ ] `tgenie db restore --in backup.sql` restores database (with confirmation)
-- [ ] Restored database has same schema and data as original
-- [ ] Foreign key relationships preserved after restore
-- [ ] Restore prompts for confirmation before overwriting
-
-**How to Test:**
-
-**Automated:**
-```python
-async def test_backup_restore_preserves_data(tmp_path, temp_settings):
-    """Test that backup/restore preserves all data and relationships."""
-    # Create task with attachment
-    async for session in get_db():
-        task = Task(id="test-1", title="Test", status="pending", priority="medium")
-        session.add(task)
-        await session.flush()
-        attachment = Attachment(task_id=task.id, type="file", reference="/test.txt")
-        session.add(attachment)
-        await session.commit()
-
-    # Backup
-    backup_file = tmp_path / "backup.sql"
-    result = runner.invoke(db_app, ["dump", "--out", str(backup_file)])
-    assert result.exit_code == 0
-    assert backup_file.exists()
-
-    # Delete DB
-    settings.db_path.unlink()
-
-    # Restore
-    result = runner.invoke(
-        db_app, ["restore", "--in", str(backup_file)],
-        input="y\n"
-    )
-    assert result.exit_code == 0
-
-    # Verify data restored
-    async for session in get_db():
-        task = await session.get(Task, "test-1")
-        assert task is not None
-        assert len(task.attachments) == 1
-```
-
-**Manual:**
-```bash
-# 1. Create some test data (via SQLAlchemy or direct SQL)
-sqlite3 ~/.taskgenie/data/taskgenie.db <<EOF
-INSERT INTO tasks (id, title, status, priority)
-VALUES ('test-1', 'Test Task', 'pending', 'high');
-EOF
-
-# 2. Create backup
-uv run tgenie db dump --out backup.sql
-
-# 3. Verify backup file exists and contains data
-cat backup.sql | grep "INSERT INTO tasks"
-
-# 4. Delete original DB
-rm ~/.taskgenie/data/taskgenie.db
-
-# 5. Restore (will prompt for confirmation)
-uv run tgenie db restore --in backup.sql
-# Type 'y' when prompted
-
-# 6. Verify data restored
-sqlite3 ~/.taskgenie/data/taskgenie.db "SELECT * FROM tasks WHERE id='test-1'"
-# Expected: test-1 | Test Task | pending | high | ...
-```
-
----
-
-### AC4: Configuration Precedence Works Correctly ✅
+## Acceptance Criteria
 
-**Success Criteria:**
-- [ ] Environment variables override `.env` file
-- [ ] `.env` file overrides `config.toml`
-- [ ] `config.toml` overrides built-in defaults
-- [ ] `DATABASE_URL` env var correctly sets database path
-- [ ] `TASKGENIE_CONFIG_FILE` env var overrides default config path
-- [ ] App data directories created at configured paths
-
-**How to Test:**
-
-**Automated:**
-```python
-def test_config_precedence_env_overrides_toml(tmp_path, monkeypatch):
-    """Test that env vars override config.toml."""
-    # Create config.toml with one DB path
-    config_file = tmp_path / "config.toml"
-    config_file.write_text('[database]\nurl = "sqlite+aiosqlite:///config.db"\n')
-    monkeypatch.setenv("TASKGENIE_CONFIG_FILE", str(config_file))
-
-    # Set env var to different path
-    env_db_path = tmp_path / "env.db"
-    monkeypatch.setenv("DATABASE_URL", f"sqlite+aiosqlite:///{env_db_path}")
-
-    # Clear cache and get settings
-    config.get_settings.cache_clear()
-    settings = config.get_settings()
-
-    # Verify env var path used
-    assert str(env_db_path) in settings.database_url_resolved
-```
-
-**Manual:**
-```bash
-# 1. Create config.toml with custom DB path
-mkdir -p ~/.taskgenie
-cat > ~/.taskgenie/config.toml <<EOF
-[database]
-url = "sqlite+aiosqlite:///tmp/config_db.db"
-EOF
-
-# 2. Set env var to override
-export DATABASE_URL="sqlite+aiosqlite:///tmp/env_db.db"
-
-# 3. Run upgrade and verify env var path used
-uv run tgenie db upgrade head
-ls -la /tmp/env_db.db  # Should exist
-ls -la /tmp/config_db.db  # Should NOT exist
-
-# 4. Unset env var and verify config.toml used
-unset DATABASE_URL
-rm /tmp/env_db.db
-uv run tgenie db upgrade head
-ls -la /tmp/config_db.db  # Should exist
-```
-
----
-
-### AC5: FastAPI Lifespan Integration ✅
+### AC1: Config Precedence and Paths
 
 **Success Criteria:**
-- [ ] FastAPI startup automatically initializes database
-- [ ] Migrations run automatically on startup if needed
-- [ ] Database connections properly closed on shutdown
-- [ ] `/health` endpoint accessible after startup
-- [ ] No errors in logs during startup/shutdown
-
-**How to Test:**
-
-**Automated:**
-```python
-def test_fastapi_lifespan_initializes_db(tmp_path, monkeypatch):
-    """Test that FastAPI lifespan initializes DB and runs migrations."""
-    db_path = tmp_path / "test.db"
-    monkeypatch.setenv("DATABASE_URL", f"sqlite+aiosqlite:///{db_path}")
-
-    # TestClient triggers lifespan automatically
-    client = TestClient(app)
-
-    # Verify DB created
-    assert db_path.exists()
-
-    # Verify tables exist
-    conn = sqlite3.connect(str(db_path))
-    cursor = conn.cursor()
-    cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
-    tables = {row[0] for row in cursor.fetchall()}
-    assert "tasks" in tables
-    assert "alembic_version" in tables
-    conn.close()
-
-    # Verify health endpoint works
-    response = client.get("/health")
-    assert response.status_code == 200
-    assert response.json() == {"status": "ok", "version": "0.1.0"}
-```
-
-**Manual:**
-```bash
-# 1. Remove existing DB
-rm ~/.taskgenie/data/taskgenie.db
-
-# 2. Start FastAPI (will auto-create DB and run migrations)
-uv run python -m backend.main
-
-# 3. In another terminal, verify health endpoint
-curl http://localhost:8080/health
-# Expected: {"status": "ok", "version": "0.1.0"}
-
-# 4. Verify DB was created
-ls -la ~/.taskgenie/data/taskgenie.db
-
-# 5. Check logs for any errors
-# Should see: "Database initialized" or similar
-```
-
----
-
-## Test Plan
-
-### Automated Tests
+- [ ] Env vars override `.env`, which overrides user config, which overrides defaults.
+- [ ] App data directories resolve to configurable, stable paths.
+- [ ] No directories are created at import time.
 
-**Unit Tests:**
-- ✅ Config precedence and validation (`tests/test_config.py`, `tests/test_config_extended.py`)
-- ✅ Database initialization (`tests/test_database.py`, `tests/test_database_extended.py`)
-- ✅ Model relationships (`tests/test_models.py`)
-- ✅ CLI commands (`tests/test_cli_db.py`, `tests/test_cli_db_extended.py`)
+### AC2: Migrations and DB Initialization
 
-**Integration Tests:**
-- ✅ FastAPI lifespan integration (`tests/test_main.py`)
-- ✅ End-to-end workflows (see `TEST_PLAN_POST_PR001.md`)
-
-**Run all tests:**
-```bash
-make test
-# or
-pytest -v
-```
+**Success Criteria:**
+- [ ] Alembic is configured for SQLite with FK enforcement.
+- [ ] `tgenie db upgrade head` creates the DB and applies migrations.
+- [ ] `tgenie db revision --autogenerate` produces a migration file.
 
-### Manual Test Checklist
+### AC3: Backup and Restore
 
-**Before marking PR-001 complete, verify:**
+**Success Criteria:**
+- [ ] `tgenie db dump --out backup.sql` writes a valid SQL dump.
+- [ ] `tgenie db restore --in backup.sql` restores data after confirmation.
 
-- [ ] **Fresh Install Test**
-  - [ ] Delete `~/.taskgenie/data/taskgenie.db`
-  - [ ] Run `uv run tgenie db upgrade head`
-  - [ ] Verify DB created with all tables
-  - [ ] Start API, verify `/health` returns 200
+### AC4: FastAPI Startup Integration
 
-- [ ] **Migration Test**
-  - [ ] Create a test migration: `uv run tgenie db revision -m "test" --autogenerate`
-  - [ ] Apply: `uv run tgenie db upgrade head`
-  - [ ] Verify schema changed
-  - [ ] Test downgrade: `uv run tgenie db downgrade -1`
+**Success Criteria:**
+- [ ] FastAPI startup initializes the DB and applies pending migrations.
+- [ ] `/health` returns OK after startup.
 
-- [ ] **Backup/Restore Test**
-  - [ ] Create test data (at least 1 task)
-  - [ ] Dump: `uv run tgenie db dump --out backup.sql`
-  - [ ] Delete DB
-  - [ ] Restore: `uv run tgenie db restore --in backup.sql`
-  - [ ] Verify data restored correctly
+## Test Plan
 
-- [ ] **Configuration Test**
-  - [ ] Test env var override: `export DATABASE_URL=...`
-  - [ ] Test config.toml: Create `~/.taskgenie/config.toml`
-  - [ ] Verify precedence: env > .env > config.toml > defaults
+### Automated
 
----
+- Unit: settings precedence and path resolution.
+- Integration: Alembic upgrade/downgrade paths against temp DB.
+- CLI: `tgenie db` commands (upgrade, dump, restore).
 
-## Related Test Documentation
+### Manual
 
-- **Migrations Guide:** [`MIGRATIONS.md`](../MIGRATIONS.md) - How to create and manage migrations
-- **Testing Guide:** [`TESTING_GUIDE.md`](../TESTING_GUIDE.md) - General testing patterns and practices
+- Run `tgenie db upgrade head` on a clean machine; confirm DB created.
+- Create data, dump/restore, and confirm data persists.
+- Start API and verify `/health` returns OK.
 
 ## Notes / Risks / Open Questions
 
-- SQLite downgrade support can be limited depending on migration operations; keep early migrations simple.
-- If `tgenie` CLI wiring isn’t available yet, these commands can be exposed via `uv run ...` as an interim step, but the end state should be `tgenie db ...`.
+- SQLite downgrade support is limited; keep early migrations simple.
+- Structured logging and telemetry are handled in PR-016.
diff --git a/docs/02-implementation/pr-specs/PR-002-task-crud-api.md b/docs/02-implementation/pr-specs/PR-002-task-crud-api.md
index 792932a..b39204f 100644
--- a/docs/02-implementation/pr-specs/PR-002-task-crud-api.md
+++ b/docs/02-implementation/pr-specs/PR-002-task-crud-api.md
@@ -6,187 +6,120 @@
 
 ## Goal
 
-Provide the minimal API surface to create, read, update, and delete tasks (plus basic filtering/pagination), so UIs can ship early.
+Provide the minimal API surface to create, read, update, and delete tasks with basic
+filtering and pagination.
 
 ## User Value
 
 - Enables the first usable UI (TUI) quickly.
-- Establishes a stable contract for future features (attachments, notifications, chat actions).
+- Establishes a stable contract for later features.
+
+## References
+
+- `docs/01-design/DESIGN_DATA.md`
+- `docs/01-design/API_REFERENCE.md`
+- `docs/01-design/DESIGN_CLI.md`
 
 ## Scope
 
 ### In
 
-- REST endpoints for tasks:
-  - create (`POST /api/v1/tasks`)
-  - list (`GET /api/v1/tasks?status=&priority=&due_before=&due_after=&limit=&offset=`)
-  - get by id (`GET /api/v1/tasks/{id}`)
-  - update (`PATCH /api/v1/tasks/{id}`)
-  - delete (`DELETE /api/v1/tasks/{id}`)
-- Validation rules (required title, enum validation, timestamps).
-- Consistent error format for 400/404/500.
+- REST endpoints for task CRUD.
+- Basic filters (status, priority, due_before, due_after) and pagination.
+- Input validation and consistent error shape.
 
 ### Out
 
 - Attachments (PR-004).
 - Semantic search (PR-005).
-- Authentication/multi-user (future).
+- Event system and realtime updates (PR-013).
+- Agent tool schema and actions (PR-003B).
+- Authentication/multi-user.
 
 ## Mini-Specs
 
-- API routes:
-  - `POST /api/v1/tasks`
-  - `GET /api/v1/tasks` (filters + pagination)
-  - `GET /api/v1/tasks/{id}`
-  - `PATCH /api/v1/tasks/{id}`
-  - `DELETE /api/v1/tasks/{id}`
-- Validation:
-  - `title` required; enums validated; timestamps server-generated.
-- Error format:
-  - 404 uses `{"error": "...", "code": "TASK_NOT_FOUND"}` (align `API_REFERENCE.md`).
-- OpenAPI:
-  - Endpoints appear in `/docs` with correct schemas and examples.
-- Tests:
-  - CRUD happy path + validation errors + list filters/pagination.
-
-## References
-
-- `docs/01-design/DESIGN_DATA.md` (task schema + relationships)
-- `docs/01-design/API_REFERENCE.md` (endpoint examples)
-- `docs/01-design/DESIGN_CLI.md` (commands that will rely on this API)
+- Implement `/api/v1/tasks` CRUD endpoints with pagination defaults.
+- Validate enums and required fields at the API boundary.
+- Standardize error responses (404 and validation).
+- Publish OpenAPI schemas with examples aligned to `API_REFERENCE.md`.
 
 ## User Stories
 
 - As a user, I can create tasks and see them appear in my task list.
 - As a user, I can update task status/priority/ETA and see it reflected everywhere.
-- As a user, I can filter tasks by status/priority/due window.
+- As a user, I can filter tasks by status, priority, and due window.
+
+## UX Notes (if applicable)
+
+- N/A.
 
 ## Technical Design
 
-### Data model (SQLAlchemy)
+### Architecture
 
-- `Task` model in `backend/models/task.py`:
-  - `id`: UUID (stored as `VARCHAR(36)` / string)
-  - `title`: `VARCHAR(255) NOT NULL`
-  - `description`: `TEXT NULL`
-  - `status`: `VARCHAR(20) NOT NULL` (`pending|in_progress|completed`)
-  - `priority`: `VARCHAR(20) NOT NULL` (`low|medium|high|critical`)
-  - `eta`: `DATETIME NULL`
-  - `created_at`, `updated_at`: `DATETIME NOT NULL`
-  - `tags`: `JSON NULL`
-  - `metadata`: `JSON NULL`
-- Index: `(status, eta)` for common “what’s due?” queries.
+- FastAPI router with service/repository layer; no direct DB access from clients.
+- Use async SQLAlchemy sessions.
 
-### API schemas (Pydantic)
+### Data Model / Migrations
 
-- Use the types in `backend/schemas/task.py`:
-  - `TaskCreate`, `TaskUpdate`, `TaskResponse`
-  - `TaskStatus` / `TaskPriority` enums validated at the API boundary
-  - `TaskListResponse`: `{"tasks": [...], "total": 42, "page": 1, "page_size": 50}`
+- `Task` fields: id (UUID string), title, description, status, priority, eta,
+  created_at, updated_at, tags, metadata.
+- Index on `(status, eta)` to support "what is due" queries.
 
-### API contract
+### API Contract
 
-- `POST /api/v1/tasks` → 201 Created
-- `GET /api/v1/tasks` → 200 OK
-  - Defaults: `limit=50`, `offset=0`
-- `GET /api/v1/tasks/{id}` → 200 OK
-- `PATCH /api/v1/tasks/{id}` → 200 OK
-- `DELETE /api/v1/tasks/{id}` → 204 No Content
+- `POST /api/v1/tasks` -> 201 Created.
+- `GET /api/v1/tasks` -> 200 OK, supports filters and `limit`/`offset` (defaults 50/0).
+- `GET /api/v1/tasks/{id}` -> 200 OK.
+- `PATCH /api/v1/tasks/{id}` -> 200 OK, partial update.
+- `DELETE /api/v1/tasks/{id}` -> 204 No Content.
+- Error shape: `{"error": "...", "code": "TASK_NOT_FOUND"}` for 404s.
 
-### Validation + transitions
+### Background Jobs
 
-- Validate enums at the API boundary.
-- Status transitions can be permissive in MVP; tighten later if needed.
+- N/A.
 
-### Error format
+### Security / Privacy
 
-- Standardize errors:
-  - `{"error": "...", "code": "..."}`
+- N/A (no auth in MVP).
+
+### Error Handling
+
+- Validation errors return 422 with clear field errors.
+- Not found uses standard error shape and code.
 
 ## Acceptance Criteria
 
-- [ ] All CRUD endpoints exist under `/api/v1/tasks` and match `docs/01-design/API_REFERENCE.md`.
-- [ ] Validation rejects empty title and invalid enums.
-- [ ] List supports `status`, `priority`, `due_before`, `due_after`, `limit`, `offset`.
-- [ ] 404s return the standard error shape and code.
-- [ ] OpenAPI docs render the endpoints + schemas.
+### AC1: CRUD Endpoints
+
+**Success Criteria:**
+- [ ] All CRUD endpoints exist and match `API_REFERENCE.md`.
+- [ ] OpenAPI docs render endpoints and schemas.
+
+### AC2: Validation and Error Shape
+
+**Success Criteria:**
+- [ ] Empty titles and invalid enums are rejected.
+- [ ] 404 responses use the standard error shape and code.
+
+### AC3: Filters and Pagination
+
+**Success Criteria:**
+- [ ] List endpoint supports status, priority, due_before, due_after.
+- [ ] Limit/offset default to 50/0 and are enforced.
 
 ## Test Plan
 
 ### Automated
 
-```python
-# tests/test_api/test_tasks_crud.py
-import pytest
-from httpx import AsyncClient
-
-class TestTasksCRUD:
-    """Core CRUD scenarios for /api/v1/tasks."""
-
-    @pytest.mark.asyncio
-    async def test_create_list_get_update_delete(self, client: AsyncClient):
-        create_resp = await client.post("/api/v1/tasks", json={"title": "Buy groceries"})
-        assert create_resp.status_code == 201
-
-        task = create_resp.json()
-        task_id = task["id"]
-        assert task["title"] == "Buy groceries"
-        assert task["status"] == "pending"
-        assert task["priority"] == "medium"
-
-        list_resp = await client.get("/api/v1/tasks", params={"limit": 50, "offset": 0})
-        assert list_resp.status_code == 200
-        list_data = list_resp.json()
-        assert any(t["id"] == task_id for t in list_data["tasks"])
-        assert isinstance(list_data["total"], int)
-
-        get_resp = await client.get(f"/api/v1/tasks/{task_id}")
-        assert get_resp.status_code == 200
-        assert get_resp.json()["id"] == task_id
-
-        patch_resp = await client.patch(f"/api/v1/tasks/{task_id}", json={"status": "in_progress"})
-        assert patch_resp.status_code == 200
-        assert patch_resp.json()["status"] == "in_progress"
-
-        del_resp = await client.delete(f"/api/v1/tasks/{task_id}")
-        assert del_resp.status_code == 204
-
-        missing_resp = await client.get(f"/api/v1/tasks/{task_id}")
-        assert missing_resp.status_code == 404
-
-    @pytest.mark.asyncio
-    async def test_validation_errors(self, client: AsyncClient):
-        response = await client.post("/api/v1/tasks", json={"title": ""})
-        assert response.status_code == 422
-
-    @pytest.mark.asyncio
-    async def test_list_filters(self, client: AsyncClient):
-        await client.post("/api/v1/tasks", json={"title": "Pending", "status": "pending"})
-        await client.post("/api/v1/tasks", json={"title": "Completed", "status": "completed"})
-
-        response = await client.get("/api/v1/tasks", params={"status": "pending"})
-        assert response.status_code == 200
-        tasks = response.json()["tasks"]
-        assert all(t["status"] == "pending" for t in tasks)
-```
+- API tests for CRUD, validation, and 404s.
+- API tests for filters and pagination ordering.
 
 ### Manual
 
-1. Start the API server.
-2. Create:
-   - `curl -X POST http://localhost:8080/api/v1/tasks -H 'content-type: application/json' -d '{"title":"Buy groceries"}'`
-3. List:
-   - `curl http://localhost:8080/api/v1/tasks?limit=50&offset=0`
-4. Get by id:
-   - `curl http://localhost:8080/api/v1/tasks/<id>`
-5. Update:
-   - `curl -X PATCH http://localhost:8080/api/v1/tasks/<id> -H 'content-type: application/json' -d '{"status":"in_progress"}'`
-6. Delete:
-   - `curl -X DELETE http://localhost:8080/api/v1/tasks/<id>`
-3. `curl .../tasks` to list and verify filters.
-4. `curl -X PATCH .../tasks/{id}` to update.
-5. `curl -X DELETE .../tasks/{id}` then `GET` to confirm 404.
+- Use curl to create/list/get/update/delete tasks.
+- Verify filtering results for status and due windows.
 
 ## Notes / Risks / Open Questions
 
-- Decide whether list endpoints should default-sort (e.g., ETA ascending, then created_at).
+- Decide default sort order (eta asc vs created_at desc).
diff --git a/docs/02-implementation/pr-specs/PR-003-llm-chat-backbone.md b/docs/02-implementation/pr-specs/PR-003-llm-chat-backbone.md
index 04615f4..85bf972 100644
--- a/docs/02-implementation/pr-specs/PR-003-llm-chat-backbone.md
+++ b/docs/02-implementation/pr-specs/PR-003-llm-chat-backbone.md
@@ -6,216 +6,115 @@
 
 ## Goal
 
-Make chat real:
-- implement multi-provider LLM service (OpenRouter/BYOK)
-- expose a streaming Chat API
-- wire the interactive TUI chat panel to the backend
+Implement provider-agnostic LLM chat with streaming responses and wire it into the TUI.
 
 ## User Value
 
-- The primary interaction mode (chat inside the TUI) becomes usable.
-- Users can ask questions and trigger task actions without leaving the TUI.
+- Chat inside the TUI becomes usable.
+- Users can bring their own API keys and models.
+
+## References
+
+- `docs/01-design/DESIGN_CHAT.md`
+- `docs/01-design/API_REFERENCE.md`
+- `docs/01-design/DESIGN_TUI.md`
+- `docs/01-design/DECISIONS.md`
 
 ## Scope
 
 ### In
 
-- LLM provider abstraction (at minimum OpenRouter via OpenAI SDK; optional OpenAI-compatible providers).
-- Chat endpoint supporting streaming.
-- TUI chat view talks to the API and renders streaming responses.
-- “No LLM configured” is a first-class UX (clear message, no crash).
+- LLM provider abstraction (OpenRouter via OpenAI-compatible SDK).
+- Streaming chat API endpoint (SSE).
+- TUI chat panel sends messages and renders streamed output.
+- Friendly UX for missing/invalid configuration.
 
 ### Out
 
 - RAG/semantic search context injection (PR-005).
-- Advanced tool-calling / agent spawning (future).
+- Tool-calling foundation (PR-003B).
+- Multi-agent orchestration (PR-014).
 
 ## Mini-Specs
 
-- LLM provider configuration:
-  - OpenRouter via OpenAI SDK (OpenAI-compatible `base_url` + `api_key`).
-  - model selection via config (`LLM_MODEL`, etc).
-- Streaming chat endpoint:
-  - `POST /api/v1/chat` returns SSE (chunked `data:` messages + `[DONE]`).
-- TUI wiring:
-  - chat panel sends user messages and renders streaming output.
-  - friendly UX for “not configured”, 401, rate limit, and network errors.
-- Observability:
-  - structured logs for provider errors (no secrets in logs).
+- Configurable provider settings (`LLM_API_KEY`, `LLM_BASE_URL`, `LLM_MODEL`).
+- `POST /api/v1/chat` returns SSE with `[DONE]` terminator.
+- TUI chat client handles streaming, rate limits, and network errors.
+- Structured logging for provider errors without leaking content.
 
-## References
+## User Stories
 
-- `docs/01-design/DESIGN_CHAT.md` (streaming + error handling)
-- `docs/01-design/API_REFERENCE.md` (chat endpoint shape)
-- `docs/01-design/DESIGN_TUI.md` (TUI chat panel wiring)
-- `docs/01-design/DECISIONS.md` (provider strategy)
+- As a user, I can ask "what is due today?" and get a streamed reply in the TUI.
+- As a user, I can use my own API key and switch models/providers.
+- As a user, I see clear guidance when my API key is missing or invalid.
 
-## User Stories
+## UX Notes (if applicable)
 
-- As a user, I can ask “what’s due today?” from inside the TUI and get a streamed response.
-- As a user, I can use my own API key (BYOK) and switch models/providers.
-- As a user, if my API key is missing/invalid, the UI tells me exactly what to configure.
+- Show a clear "LLM not configured" state with setup guidance.
 
 ## Technical Design
 
-### Provider abstraction
+### Architecture
+
+- `LLMService` exposes `stream_chat(messages)` and hides provider details.
+- Use OpenAI-compatible client with configurable base URL (OpenRouter default).
+- TUI uses `httpx.AsyncClient` streaming to append chunks in real time.
+
+### Data Model / Migrations
 
-- `LLMService` in `backend/services/llm_service.py`:
-  - `async def stream_chat(messages: List[Dict[str, str]]) -> AsyncIterator[str]`
-  - Configurable `base_url` and `api_key` for OpenRouter/OpenAI compatibility.
+- N/A (chat history persistence is out of scope for this PR).
 
-### API contract (streaming)
+### API Contract
 
-- `POST /api/v1/chat` (see `docs/01-design/API_REFERENCE.md` for request fields)
-- Request: `application/json`
-- Response: Server-Sent Events (SSE) stream (`text/event-stream`)
-  - Each chunk: `data: <text chunk>\n\n`
-  - Terminator: `data: [DONE]\n\n`
+- `POST /api/v1/chat` accepts message list or single message per `API_REFERENCE.md`.
+- Response is `text/event-stream` with `data:` chunks and `[DONE]` terminator.
 
-### TUI integration
+### Background Jobs
 
-- `ChatPanel` widget:
-  - Uses `httpx.AsyncClient` with `stream=True`
-  - Appends streamed text chunks to the chat transcript incrementally
-  - Handles `ConnectTimeout` and `401 Unauthorized` with user-friendly modals
+- N/A.
 
-### Error mapping
+### Security / Privacy
 
-- Normalize common failures:
-  - missing API key → “configure LLM” message
-  - 401 → “invalid key”
-  - 429 → “rate limit” + retry suggestion
-  - network error → “cannot reach provider”
+- Do not log prompts or responses by default.
+- Keys are read from env/config and never persisted to DB.
+
+### Error Handling
+
+- Map provider failures to user-facing errors: missing key, 401 invalid key, 429
+  rate limit, and network timeouts.
 
 ## Acceptance Criteria
 
-- [ ] Chat endpoint returns a streaming response.
+### AC1: Streaming Chat API
+
+**Success Criteria:**
+- [ ] Chat endpoint streams SSE chunks and ends with `[DONE]`.
+
+### AC2: TUI Chat Integration
+
+**Success Criteria:**
 - [ ] TUI can send a message and display a streamed reply.
-- [ ] Missing/invalid API key results in a clear error message (in TUI and API).
-- [ ] Provider selection (model/base_url) works via config.
+
+### AC3: Configuration and Error UX
+
+**Success Criteria:**
+- [ ] Missing or invalid keys produce clear, non-crashing messages.
+- [ ] Provider/model configuration works via settings.
 
 ## Test Plan
 
 ### Automated
 
-```python
-# tests/test_api/test_chat.py
-import pytest
-from httpx import AsyncClient, ASGITransport
-from unittest.mock import AsyncMock, patch, MagicMock
-
-class TestChatEndpoint:
-    """Tests for POST /api/v1/chat"""
-
-    @pytest.mark.asyncio
-    async def test_chat_streams_response(self, client: AsyncClient):
-        """Chat endpoint returns SSE stream."""
-        mock_llm = AsyncMock()
-        mock_llm.stream_chat = AsyncMock(return_value=async_gen_chunks(["Hello", " ", "world", "!"]))
-
-        with patch("backend.services.llm_service.llm_service.stream_chat", mock_llm):
-            response = await client.post("/api/v1/chat", json={"message": "Hello"})
-            assert response.status_code == 200
-
-            # Verify SSE stream format
-            content = response.text
-            assert "data:" in content
-            assert "[DONE]" in content
-
-    @pytest.mark.asyncio
-    async def test_chat_missing_api_key(self, client: AsyncClient):
-        """Missing API key returns clear error."""
-        with patch("backend.services.llm_service.llm_service", side_effect=ValueError("API key not configured")):
-            response = await client.post("/api/v1/chat", json={"message": "test"})
-            assert response.status_code == 500
-            assert "configure" in response.json()["detail"].lower()
-
-
-class TestLLMService:
-    """Tests for LLM provider abstraction"""
-
-    @pytest.mark.asyncio
-    async def test_stream_chat_openrouter(self):
-        """OpenRouter integration works."""
-        from backend.services.llm_service import LLMService
-        from unittest.mock import AsyncMock
-
-        mock_client = AsyncMock()
-        mock_response = MagicMock()
-        mock_response.aiter_bytes.return_value = [b"data: Hello", b"data: World", b"data: [DONE]"]
-        mock_client.stream.return_value = mock_response
-
-        service = LLMService(api_key="test-key", base_url="https://openrouter.ai/api")
-        service._client = mock_client
-
-        chunks = []
-        async for chunk in service.stream_chat([{"role": "user", "content": "test"}]):
-            chunks.append(chunk)
-
-        assert len(chunks) == 3
-        assert chunks[0].decode() == "data: Hello"
-        assert chunks[2].decode() == "data: [DONE]"
-
-    @pytest.mark.asyncio
-    async def test_error_handling_401(self):
-        """401 Unauthorized returns clear error."""
-        from backend.services.llm_service import LLMService
-        from unittest.mock import AsyncMock, MagicMock
-
-        mock_client = AsyncMock()
-        mock_response = MagicMock()
-        mock_response.status_code = 401
-        mock_response.json.return_value = {"error": "Invalid API key"}
-        mock_client.stream.return_value = mock_response
-
-        service = LLMService(api_key="bad-key")
-        service._client = mock_client
-
-        with pytest.raises(ValueError, match="Invalid API key"):
-            async for _ in service.stream_chat([{"role": "user", "content": "test"}]):
-                pass
-```
-
-```python
-# tests/test_services/test_rag_context.py
-import pytest
-
-class TestRAGContext:
-    """Tests for RAG context building"""
-
-    @pytest.mark.asyncio
-    async def test_build_context_includes_metadata(self, rag_service_test):
-        """Context includes task status and priority."""
-        context = await rag_service_test.build_context(
-            "what tasks?",
-            n_results=2,
-            include_metadata=True
-        )
-
-        assert "status:" in context
-        assert "priority:" in context
-        assert "similarity:" in context
-
-    @pytest.mark.asyncio
-    async def test_build_context_no_results(self, rag_service_test):
-        """Context message when no relevant results."""
-        mock_search = AsyncMock(return_value=[])
-        rag_service_test.search = mock_search
-
-        context = await rag_service_test.build_context("test query", n_results=3)
-        assert "No relevant tasks found" in context
-```
+- API tests for SSE format and error mapping.
+- Unit tests for `LLMService` provider errors.
+- TUI client tests using mocked SSE streams.
 
 ### Manual
 
-1. Configure a valid API key and model.
-2. Start API and run `tgenie`:
-   - send "What tasks are due today?"
-   - verify response streams and renders correctly
-3. Unset API key and retry:
-   - verify UI explains how to configure LLM and does not crash
+- Configure a valid key and verify streaming replies in the TUI.
+- Unset the key and verify setup guidance is shown.
 
 ## Notes / Risks / Open Questions
 
-- Keep SSE payload format consistent across TUI and Web UI to avoid divergence.
-- Avoid logging prompts/responses by default (privacy).
+- Keep SSE payload format consistent for TUI and Web UI.
+- Tool execution and agent workflows are handled in PR-003B.
diff --git a/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md b/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md
new file mode 100644
index 0000000..be923b1
--- /dev/null
+++ b/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md
@@ -0,0 +1,626 @@
+# PR-003B: Agent Tool-Calling Foundation (Spec)
+
+**Status:** Spec Only  
+**Depends on:** PR-003, PR-002, PR-004  
+**Last Reviewed:** 2026-01-01
+
+## Goal
+
+Enable tool-calling in chat so the assistant can take safe, auditable actions on
+behalf of the user.
+
+## User Value
+
+- Users can ask the assistant to create/update tasks directly.
+- The assistant can refresh attachments and fetch context on demand.
+
+## References
+
+- `docs/01-design/DESIGN_CHAT.md`
+- `docs/01-design/DESIGN_DATA.md`
+- `docs/01-design/API_REFERENCE.md`
+
+## Scope
+
+### In
+
+- Tool schema definition and registry (name, description, parameters).
+- Tool execution pipeline with validation, timeouts, and retries.
+- Safe defaults: allowlist and confirmation for destructive actions.
+- Initial tool set covers task CRUD and attachment list/create/delete.
+- Semantic search tool is included if PR-005 exists.
+- Tool execution logging with correlation IDs (no content logs).
+
+### Out
+
+- Multi-agent orchestration (PR-014).
+- Long-running background tools and scheduling.
+- Marketplace or dynamic tool discovery.
+
+## Mini-Specs
+
+- Tool schema defined in Pydantic and exported to JSON Schema for LLM providers.
+- `ToolRegistry` for registering tools and validating args.
+- `ToolExecutor` that runs tools, captures results, and maps errors.
+- Confirmation gate for destructive tools (delete, overwrite).
+- Tool results are appended to the chat stream before final model response.
+
+## User Stories
+
+- As a user, I can say "mark my task as done" and the assistant updates it.
+- As a user, I can ask the assistant to refresh a GitHub attachment.
+- As a developer, I can add a new tool with a schema and handler.
+
+## UX Notes (if applicable)
+
+- Destructive actions require explicit confirmation before execution.
+
+## Technical Design
+
+### Architecture
+
+- `ToolDefinition` (schema + metadata) and `ToolHandler` (callable).
+- Registry stores tools and exposes schema list to the LLM provider.
+- Chat flow:
+  1. Send messages + tool schema to LLM.
+  2. If tool call returned, validate and execute tool.
+  3. Append tool result and call LLM again for final response.
+
+### Data Model / Migrations
+
+- N/A (tool execution logs are structured logs, not persisted).
+
+### API Contract
+
+- `POST /api/v1/chat` accepts tool-enabled requests; tool calls are handled server-side.
+- Optional `GET /api/v1/tools` returns tool schema for debugging.
+
+### Background Jobs
+
+- N/A (all tools execute inline in this PR).
+
+### Security / Privacy
+
+- Tool allowlist and confirmation for destructive operations.
+- Tool execution logs contain metadata only (no user content or secrets).
+
+### Error Handling
+
+- Invalid tool args return a tool error message back to the model.
+- Tool timeouts return a clear failure response without crashing chat.
+
+## Acceptance Criteria
+
+### AC1: Tool Schema and Registry
+
+**Success Criteria:**
+- [ ] Tools are defined with JSON Schema parameters.
+- [ ] Tool registry exposes the schema list to the chat pipeline.
+
+### AC2: Tool Execution Flow
+
+**Success Criteria:**
+- [ ] Tool calls are validated and executed with timeout handling.
+- [ ] Tool results are included in the chat response flow.
+
+### AC3: Safety and Confirmation
+
+**Success Criteria:**
+- [ ] Destructive tools require confirmation or are blocked by default.
+- [ ] Tool errors are surfaced as readable assistant responses.
+
+## Test Plan
+
+### Automated
+
+- Unit tests for tool schema validation and registry behavior.
+- Integration tests for tool call -> execution -> response flow with mocked LLM.
+- Safety tests for destructive tool confirmation gating.
+
+### Manual
+
+- Run chat and ask the assistant to create/update/complete a task.
+- Attempt a destructive action and verify confirmation is required.
+
+## Notes / Risks / Open Questions
+
+- Decide whether tool execution logs should be persisted for audit (future).
+
+---
+
+## Skill Enrichment: tool-design
+
+### Consolidation Principle
+
+Prefer single comprehensive tools over multiple narrow tools:
+
+```python
+# ❌ Anti-pattern: Multiple narrow tools
+def get_task_by_id(task_id: str) -> Task:
+    """Retrieve task by ID."""
+    pass
+
+def get_tasks_by_status(status: str) -> list[Task]:
+    """Retrieve tasks by status."""
+    pass
+
+def get_tasks_due_before(date: datetime) -> list[Task]:
+    """Retrieve tasks due before date."""
+    pass
+
+# ✅ Better: Single comprehensive tool
+def query_tasks(
+    filters: TaskFilters,
+    limit: int = 50,
+    sort: str = "eta_asc"
+) -> TaskQueryResult:
+    """
+    Query tasks with flexible filtering.
+
+    Use when:
+    - User asks "What's due today?"
+    - User asks "Show high-priority tasks"
+    - User asks "List completed tasks"
+
+    Args:
+        filters: Filter criteria (status, priority, date_range, tags)
+        limit: Max results to return (default 50)
+        sort: Sort order (eta_asc, priority_desc, created_desc)
+
+    Returns:
+        TaskQueryResult with tasks, total count, and applied filters
+
+    Consolidates: get_task_by_id, get_tasks_by_status, get_tasks_due_before
+    """
+    pass
+```
+
+### Tool Description Engineering
+
+Write descriptions that answer what, when, and what returns:
+
+```python
+@tool_definition(
+    name="create_task",
+    description="""
+    Create a new task in the task manager.
+
+    Use when:
+    - User explicitly asks to create a task
+    - User says "Add a task for X"
+    - User needs to capture a quick action item
+
+    The task will be created with 'pending' status and 'medium' priority by default.
+
+    Args:
+        title: Task title (required, 1-255 characters)
+        description: Detailed description (optional, markdown supported)
+        priority: Task priority (optional, one of: low, medium, high, critical)
+        eta: Due date/time (optional, ISO 8601 format)
+        tags: List of tags (optional, for categorization)
+
+    Returns:
+        Created task object with ID, timestamps, and initial state
+
+    Errors:
+        INVALID_TITLE: Title too long or contains invalid characters
+        INVALID_PRIORITY: Priority value not in allowed set
+        INVALID_ETA: Date format not parseable
+    """
+)
+async def create_task(
+    title: str,
+    description: str | None = None,
+    priority: str = "medium",
+    eta: str | None = None,
+    tags: list[str] | None = None
+) -> dict:
+    """Create a new task."""
+    # Implementation...
+```
+
+### Response Format Optimization
+
+Provide concise and detailed format options:
+
+```python
+from enum import Enum
+
+class ResponseFormat(Enum):
+    """Control response verbosity for token efficiency."""
+    CONCISE = "concise"  # ID + status only
+    STANDARD = "standard"   # Core fields (title, status, priority, eta)
+    DETAILED = "detailed"  # Full object with all fields
+
+@tool_definition(
+    name="list_tasks",
+    description=f"""
+    Query tasks from the task manager.
+
+    Use when:
+    - User asks "What tasks do I have?"
+    - User needs to see tasks by status/priority/date
+    - User wants to review workload
+
+    Args:
+        filters: Filter criteria (optional)
+        limit: Max results to return (default 50)
+        format: Response format - 'concise', 'standard', or 'detailed'
+
+        Use 'concise' for quick overviews or when user needs minimal info.
+        Use 'standard' for most queries where full context needed.
+        Use 'detailed' when user needs comprehensive view or all fields.
+
+    Returns:
+        Formatted task list based on requested format.
+
+        'concise': Returns only task ID and status
+        'standard': Returns title, status, priority, eta, created_at
+        'detailed': Returns all fields including description, tags, attachments
+    """
+)
+async def list_tasks(
+    filters: dict | None = None,
+    limit: int = 50,
+    format: str = "standard"
+) -> dict:
+    """List tasks with format control."""
+    # Implementation...
+```
+
+### Tool Definition Schema
+
+Use consistent schema with naming conventions:
+
+```python
+from pydantic import BaseModel, Field
+from typing import Optional, List
+from enum import Enum
+
+class ToolParameter(BaseModel):
+    """Tool parameter definition."""
+    name: str
+    type: str  # "string", "integer", "boolean", "array"
+    description: str
+    required: bool
+    default: Optional[any] = None
+    enum: Optional[List[str]] = None
+
+class ToolDefinition(BaseModel):
+    """Tool definition following JSON Schema."""
+    name: str = Field(..., description="Tool identifier (verb_noun pattern)")
+    description: str = Field(..., description="What tool does, when to use it, What it returns")
+    parameters: ToolParameter | Field(..., description="Parameters schema")
+    returns: dict = Field(
+        default={"type": "object"},
+        description="Return type and structure"
+    )
+    examples: Optional[List[dict]] = Field(
+        None,
+        description="Example usage patterns for agents"
+    )
+
+# Example: Create task tool
+CREATE_TASK_TOOL = ToolDefinition(
+    name="create_task",
+    description=(
+        "Create a new task. Use when user asks to add, create, "
+        "or capture a task. Returns the created task with ID and timestamps."
+    ),
+    parameters=ToolParameter(
+        name="args",
+        type="object",
+        description="Task creation parameters",
+        required=True,
+        properties={
+            "title": {
+                "type": "string",
+                "description": "Task title (1-255 characters)"
+            },
+            "priority": {
+                "type": "string",
+                "description": "Task priority (default: medium)",
+                "enum": ["low", "medium", "high", "critical"],
+                "default": "medium"
+            },
+            "eta": {
+                "type": "string",
+                "description": "Due date/time (ISO 8601 format)"
+            }
+        }
+    ),
+    examples=[
+        {
+            "input": {"title": "Buy groceries", "priority": "high"},
+            "output": {"id": "task-123", "status": "pending", "created_at": "2025-01-15T10:00:00Z"}
+        }
+    ]
+)
+```
+
+### Error Message Design
+
+Design error messages for agent recovery:
+
+```python
+class ToolError(Exception):
+    """Tool error with actionable recovery guidance."""
+
+    def __init__(
+        self,
+        code: str,
+        message: str,
+        retryable: bool = False,
+        recover_action: str | None = None
+    ):
+        self.code = code
+        self.message = message
+        self.retryable = retryable
+        self.recover_action = recover_action
+
+    def to_dict(self) -> dict:
+        return {
+            "error": self.message,
+            "code": self.code,
+            "retryable": self.retryable,
+            "recover_action": self.recover_action
+        }
+
+async def create_task(title: str, priority: str) -> dict:
+    """Create a task with clear error handling."""
+    if len(title) > 255:
+        raise ToolError(
+            code="INVALID_TITLE",
+            message=f"Title too long: {len(title)} characters (max 255)",
+            recover_action="Truncate title to 255 characters and retry"
+        )
+
+    if priority not in ["low", "medium", "high", "critical"]:
+        raise ToolError(
+            code="INVALID_PRIORITY",
+            message=f"Invalid priority: {priority}",
+            recover_action="Use one of: low, medium, high, critical"
+        )
+
+    # Implementation...
+
+async def fetch_github_pr(owner: str, repo: str, number: int) -> dict:
+    """Fetch GitHub PR with rate limit handling."""
+    try:
+        pr = await github_service.get_pr(owner, repo, number)
+        return pr
+    except httpx.HTTPStatusError as e:
+        if e.response.status_code == 429:
+            raise ToolError(
+                code="RATE_LIMITED",
+                message="GitHub rate limit exceeded",
+                retryable=True,
+                recover_action="Wait 1 hour and retry, or configure GITHUB_TOKEN"
+            )
+        elif e.response.status_code == 403:
+            raise ToolError(
+                code="FORBIDDEN",
+                message="Repository access forbidden",
+                recover_action="Check repository permissions or verify GITHUB_TOKEN"
+            )
+        else:
+            raise ToolError(
+                code="FETCH_FAILED",
+                message=f"Failed to fetch PR: {e}",
+                retryable=True,
+                recover_action=f"Check that {owner}/{repo}#{number} exists"
+            )
+```
+
+### Async Tool Execution Patterns
+
+Implement async tool patterns with timeout handling:
+
+```python
+import asyncio
+from contextlib import asynccontextmanager
+
+class AsyncToolExecutor:
+    """Async tool execution with timeout and cancellation."""
+
+    def __init__(self, timeout: float = 30.0):
+        self.timeout = timeout
+
+    async def execute(
+        self,
+        tool_name: str,
+        tool_func: Callable,
+        **kwargs
+    ) -> dict:
+        """Execute tool with timeout and error handling."""
+        try:
+            # Execute with timeout
+            result = await asyncio.wait_for(
+                tool_func(**kwargs),
+                timeout=self.timeout
+            )
+            return {
+                "status": "success",
+                "result": result,
+                "duration_ms": 0  # TODO: track duration
+            }
+
+        except asyncio.TimeoutError:
+            logger.warning({
+                "event": "tool_timeout",
+                "tool": tool_name,
+                "timeout": self.timeout
+            })
+            return {
+                "status": "timeout",
+                "error": f"Tool execution timed out after {self.timeout}s",
+                "recoverable": True
+            }
+
+        except ToolError as e:
+            logger.warning({
+                "event": "tool_error",
+                "tool": tool_name,
+                "code": e.code,
+                "retryable": e.retryable
+            })
+            return {
+                "status": "error",
+                "error": e.message,
+                "code": e.code,
+                "retryable": e.retryable,
+                "recover_action": e.recover_action
+            }
+
+        except Exception as e:
+            logger.error({
+                "event": "tool_unexpected_error",
+                "tool": tool_name,
+                "error": str(e)
+            })
+            return {
+                "status": "error",
+                "error": "Unexpected tool error",
+                "recoverable": False
+            }
+
+# Usage
+executor = AsyncToolExecutor(timeout=30.0)
+result = await executor.execute(
+    tool_name="create_task",
+    tool_func=create_task,
+    title="Buy groceries",
+    priority="high"
+)
+```
+
+### Tool Result Caching
+
+Implement memoization for expensive tool calls:
+
+```python
+from functools import lru_cache
+from typing import Optional
+import hashlib
+
+def cache_key(tool_name: str, **kwargs) -> str:
+    """Generate stable cache key."""
+    # Sort kwargs for consistent keys
+    sorted_items = sorted(kwargs.items())
+    key_str = f"{tool_name}:{sorted_items}"
+    return hashlib.md5(key_str.encode()).hexdigest()
+
+class CachedToolExecutor:
+    """Tool executor with memoization."""
+
+    def __init__(self, ttl_seconds: int = 300):
+        self.ttl_seconds = ttl_seconds
+        self._cache: dict[str, tuple[dict, float]] = {}
+
+    async def execute(
+        self,
+        tool_name: str,
+        tool_func: Callable,
+        **kwargs
+    ) -> dict:
+        """Execute tool with caching."""
+        key = cache_key(tool_name, **kwargs)
+
+        # Check cache
+        if key in self._cache:
+            result, cached_at = self._cache[key]
+            age_seconds = (datetime.now() - cached_at).total_seconds()
+
+            if age_seconds < self.ttl_seconds:
+                logger.debug({
+                    "event": "tool_cache_hit",
+                    "tool": tool_name,
+                    "key": key
+                })
+                return result
+
+        # Execute tool
+        result = await tool_func(**kwargs)
+
+        # Cache result
+        self._cache[key] = (result, datetime.now())
+        logger.debug({
+            "event": "tool_cache_miss",
+            "tool": tool_name,
+            "key": key
+        })
+
+        return result
+
+# Usage
+executor = CachedToolExecutor(ttl_seconds=300)
+await executor.execute(tool_name="list_tasks", tool_func=list_tasks, status="pending")
+```
+
+### Tool Composition and Chaining
+
+Support sequential tool chaining for workflows:
+
+```python
+class ToolChainer:
+    """Chain multiple tools in sequence."""
+
+    def __init__(self):
+        self.tools: dict[str, Callable] = {}
+
+    async def chain(
+        self,
+        steps: List[dict]
+    ) -> List[dict]:
+        """
+        Execute tools in sequence, passing outputs to next steps.
+
+        Steps format:
+        [
+            {"tool": "get_task", "args": {"task_id": "$task_id"}},
+            {"tool": "update_task", "args": {"task_id": "$task_id", "status": "completed"}}
+        ]
+        """
+        context: dict = {}
+        results: List[dict] = []
+
+        for step in steps:
+            tool_name = step["tool"]
+            step_args = self._resolve_args(step["args"], context)
+
+            result = await self.tools[tool_name](**step_args)
+            results.append(result)
+
+            # Update context for next step
+            context.update(result.get("output", {}))
+
+        return results
+
+    def _resolve_args(self, args: dict, context: dict) -> dict:
+        """Resolve argument references from previous step outputs."""
+        resolved = {}
+        for key, value in args.items():
+            if isinstance(value, str) and value.startswith("$"):
+                ref_key = value[1:]  # Remove $ prefix
+                resolved[key] = context.get(ref_key, value)
+            else:
+                resolved[key] = value
+        return resolved
+
+# Example workflow chain
+workflow = ToolChainer()
+results = await workflow.chain([
+    {
+        "tool": "create_task",
+        "args": {"title": "Review PR #123"}
+    },
+    {
+        "tool": "search_github_pr",
+        "args": {"query": "PR #123"}  # Will get task_id from previous output
+    },
+    {
+        "tool": "add_attachment",
+        "args": {"task_id": "$task_id", "url": "$pr_url"}
+    }
+])
+```
diff --git a/docs/02-implementation/pr-specs/PR-004-attachments-link-detection.md b/docs/02-implementation/pr-specs/PR-004-attachments-link-detection.md
index 6db620b..eaa444a 100644
--- a/docs/02-implementation/pr-specs/PR-004-attachments-link-detection.md
+++ b/docs/02-implementation/pr-specs/PR-004-attachments-link-detection.md
@@ -6,24 +6,27 @@
 
 ## Goal
 
-Enable “context-first tasks” by supporting attachments and auto-detecting URLs in task content.
+Support task attachments and automatically detect URLs in task content.
 
 ## User Value
 
-- Users can capture a task and paste a URL (GitHub PR, Gmail, doc) and know it will be saved + surfaced later.
-- This unlocks integrations incrementally (GitHub/Gmail can be layered on without changing core flows).
+- Users can attach links to tasks and keep context nearby.
+- Integrations can be layered later without changing the core workflow.
+
+## References
+
+- `docs/01-design/DESIGN_DATA.md`
+- `docs/01-design/INTEGRATION_GUIDE.md`
+- `docs/01-design/API_REFERENCE.md`
 
 ## Scope
 
 ### In
 
-- Attachment model + schema (type, reference URL, title, cached content, timestamps).
-- Attachment CRUD endpoints (at least: create/list/delete; update optional).
-- Link detection service that scans task title/description for URLs and creates attachments automatically.
-- Minimal provider registry:
-  - `match_url(url) -> bool`
-  - `normalize(url) -> reference`
-  - default provider for “generic url”
+- Attachment model and DB table.
+- Attachment CRUD endpoints (create/list/delete; update optional).
+- Link detection on task create/update with URL normalization and dedup.
+- Provider registry with `match_url` and `normalize` hooks.
 
 ### Out
 
@@ -32,231 +35,86 @@ Enable “context-first tasks” by supporting attachments and auto-detecting UR
 
 ## Mini-Specs
 
-- Data model:
-  - `Attachment` schema + DB table aligned with `DESIGN_DATA.md`.
-- API endpoints:
-  - create/list/delete attachments under `/api/v1/...` (align `API_REFERENCE.md`).
-- URL normalization + dedup:
-  - normalize provider references (`github:...`, `gmail:...`, generic URLs).
-  - prevent duplicates per `(task_id, normalized_reference)`.
-- Link detection:
-  - parse task title/description and auto-create attachments for supported URLs.
-  - provider registry for `match_url` / `normalize` hooks.
-- Tests:
-  - manual attach, auto-detect, dedup, 404/409 behavior.
+- Attachment schema aligned with `DESIGN_DATA.md`.
+- Endpoints for creating/listing/deleting attachments under `/api/v1`.
+- URL normalization to stable references; dedup per task and reference.
+- Auto-detection runs on task create/update for title/description.
+- Attachment service exposes safe methods that can be wrapped as tools (PR-003B).
 
-## References
+## User Stories
 
-- `docs/01-design/DESIGN_DATA.md` (attachment schema)
-- `docs/01-design/INTEGRATION_GUIDE.md` (provider protocol pattern)
-- `docs/01-design/API_REFERENCE.md` (attachments endpoints, if/when defined)
+- As a user, I can attach a URL to a task and see it listed.
+- As a user, URLs pasted into task content create attachments automatically.
+- As a user, duplicate URLs do not create duplicate attachments.
 
-## User Stories
+## UX Notes (if applicable)
 
-- As a user, I can attach a URL to a task and see it listed on the task.
-- As a user, when I paste a URL into a task description, an attachment is created automatically.
-- As a user, I don’t get duplicate attachments for the same link pasted twice.
+- N/A.
 
 ## Technical Design
 
-### Attachment model
-
-- Attachments belong to a task (`task_id` foreign key).
-- Store:
-  - `type` (e.g., `url|github|gmail|notion|...`)
-  - `reference` (canonical/normalized reference)
-  - `title` (optional)
-  - `content` (optional cached content; may be empty until integrations run)
-  - timestamps
-
-### Endpoints (minimum)
-
-- Create attachment:
-  - `POST /api/v1/tasks/{id}/attachments` (or `POST /api/v1/attachments` with `task_id`)
-- List attachments for a task:
-  - `GET /api/v1/tasks/{id}/attachments`
-- Delete attachment:
-  - `DELETE /api/v1/attachments/{attachment_id}`
-
-### Auto-detection behavior
-
-- Extract URL candidates from `title` and `description` on:
-  - task create
-  - task update (when title/description changes)
-- **Parsing:** Start with a simple regex (e.g., `r"https?://\\S+"`), then:
-  - trim common trailing punctuation (e.g., `)`, `]`, `.`, `,`)
-  - normalize with `urllib.parse` (lowercase scheme/host, drop obvious tracking params if desired)
-- **LinkDetectionService:**
-  - Providers:
-    - `GitHubProvider`: matches `github.com/(.*)/(.*)/(pull|issues)/(\d+)`
-    - `GmailProvider`: matches `mail.google.com/mail/u/\d+/#(inbox|label|all)/(\w+)`
-    - `GenericProvider`: matches any other valid URL
-- **Deduplication:** Check if `reference` already exists for the given `task_id` before inserting.
-
-### Attachment lifecycle
-
-- Attachments are created immediately with reference URL.
-- Cached content is optional at this stage (can be empty until integrations fetch it).
+### Architecture
+
+- `LinkDetectionService` parses URLs and calls the provider registry.
+- Provider registry includes GitHub, Gmail, and generic URL providers.
+- Attachment service handles create/list/delete with dedup checks.
+- Provide a thin tool wrapper layer in PR-003B (no tool execution here).
+
+### Data Model / Migrations
+
+- `Attachment` fields: id, task_id (FK), type, reference, title, content,
+  created_at, updated_at.
+- Unique constraint on `(task_id, reference)` to prevent duplicates.
+
+### API Contract
+
+- `POST /api/v1/tasks/{id}/attachments` (or `/api/v1/attachments` with `task_id`).
+- `GET /api/v1/tasks/{id}/attachments`.
+- `DELETE /api/v1/attachments/{attachment_id}`.
+
+### Background Jobs
+
+- N/A.
+
+### Security / Privacy
+
+- Only store references; no external fetching in this PR.
+
+### Error Handling
+
+- 404 for missing task/attachment.
+- 409 conflict for duplicate normalized references.
 
 ## Acceptance Criteria
 
-- [ ] Users can manually attach a URL to a task.
-- [ ] URLs in task description auto-create attachments.
-- [ ] Attachments are listed with task and have stable IDs.
-- [ ] Link detection correctly identifies GitHub, Gmail, and generic URLs.
-- [ ] Deduplication prevents duplicate attachments for same normalized URL.
+### AC1: Manual Attachment CRUD
+
+**Success Criteria:**
+- [ ] Users can add, list, and delete attachments for a task.
+
+### AC2: Auto-Detect URLs
+
+**Success Criteria:**
+- [ ] URLs in title/description create attachments automatically.
+
+### AC3: Normalization and Deduplication
+
+**Success Criteria:**
+- [ ] Providers normalize URLs to stable references.
+- [ ] Duplicate references for a task are rejected or no-op per design.
 
 ## Test Plan
 
 ### Automated
 
-```python
-# tests/test_api/test_attachments.py
-import pytest
-from httpx import AsyncClient
-from backend.services.attachment_service import AttachmentType
-
-class TestAddAttachment:
-    """Tests for POST /api/v1/tasks/{id}/attachments"""
-
-    @pytest.mark.asyncio
-    async def test_add_github_attachment(self, client: AsyncClient, sample_task):
-        """Add GitHub PR attachment."""
-        response = await client.post(
-            f"/api/v1/tasks/{sample_task.id}/attachments",
-            json={
-                "type": "github",
-                "reference": "https://github.com/owner/repo/pull/123"
-            }
-        )
-        assert response.status_code == 201
-        data = response.json()
-        assert data["type"] == "github"
-
-    @pytest.mark.asyncio
-    async def test_add_gmail_attachment(self, client: AsyncClient, sample_task):
-        """Add Gmail attachment."""
-        response = await client.post(
-            f"/api/v1/tasks/{sample_task.id}/attachments",
-            json={
-                "type": "gmail",
-                "reference": "gmail:18e4f7a2b3c4d5e"
-            }
-        )
-        assert response.status_code == 201
-        data = response.json()
-        assert data["type"] == "gmail"
-
-    @pytest.mark.asyncio
-    async def test_add_generic_url_attachment(self, client: AsyncClient, sample_task):
-        """Add generic URL attachment."""
-        response = await client.post(
-            f"/api/v1/tasks/{sample_task.id}/attachments",
-            json={
-                "type": "url",
-                "reference": "https://example.com/docs"
-            }
-        )
-        assert response.status_code == 201
-
-    @pytest.mark.asyncio
-    async def test_add_attachment_task_not_found(self, client: AsyncClient):
-        """Return 404 when task doesn't exist."""
-        response = await client.post(
-            "/api/v1/tasks/nonexistent/attachments",
-            json={"type": "url", "reference": "https://example.com"}
-        )
-        assert response.status_code == 404
-
-
-class TestLinkDetection:
-    """Tests for automatic link detection"""
-
-    @pytest.mark.asyncio
-    async def test_detect_github_pr_url(self, db_session):
-        """Detect GitHub PR URL in task description."""
-        from backend.services.link_detection import LinkDetectionService
-
-        task = await create_task_with_desc(
-            "Check https://github.com/owner/repo/pull/123 please"
-        )
-
-        detected = await LinkDetectionService.detect_and_create(db_session, task)
-        assert len(detected) == 1
-        assert detected[0]["type"] == "github"
-        assert detected[0]["reference"] == "github:owner/repo/pull/123"
-
-    @pytest.mark.asyncio
-    async def test_detect_github_issue_url(self, db_session):
-        """Detect GitHub Issue URL."""
-        from backend.services.link_detection import LinkDetectionService
-
-        task = await create_task_with_desc(
-            "Issue at owner/repo#456"
-        )
-
-        detected = await LinkDetectionService.detect_and_create(db_session, task)
-        assert len(detected) == 1
-        assert detected[0]["type"] == "github"
-
-    @pytest.mark.asyncio
-    async def test_detect_gmail_url(self, db_session):
-        """Detect Gmail URL."""
-        from backend.services.link_detection import LinkDetectionService
-
-        task = await create_task_with_desc(
-            "Email https://mail.google.com/mail/u/0/#inbox/18e4f7a2b3c4d5e"
-        )
-
-        detected = await LinkDetectionService.detect_and_create(db_session, task)
-        assert len(detected) == 1
-        assert detected[0]["type"] == "gmail"
-
-
-class TestDeduplication:
-    """Tests for attachment deduplication"""
-
-    @pytest.mark.asyncio
-    async def test_same_url_no_duplicate(self, client: AsyncClient, sample_task, db_session):
-        """Same URL doesn't create duplicate attachment."""
-        from backend.services.link_detection import LinkDetectionService
-
-        # First attachment
-        await LinkDetectionService.create_attachment(
-            db_session, sample_task.id, "github", "github:owner/repo/pull/123"
-        )
-
-        # Try same reference again
-        response = await client.post(
-            f"/api/v1/tasks/{sample_task.id}/attachments",
-            json={"type": "github", "reference": "github:owner/repo/pull/123"}
-        )
-        assert response.status_code == 409  # Conflict
-
-    @pytest.mark.asyncio
-    async def test_normalized_url_no_duplicate(self, client: AsyncClient, sample_task, db_session):
-        """Normalized URLs don't create duplicates."""
-        from backend.services.link_detection import LinkDetectionService
-
-        # Create with different param in URL
-        await LinkDetectionService.create_attachment(
-            db_session, sample_task.id, "github", "github:owner/repo/pull/123"
-        )
-
-        # Try with query param (normalized to same)
-        response = await client.post(
-            f"/api/v1/tasks/{sample_task.id}/attachments",
-            json={"type": "github", "reference": "github:owner/repo/pull/123?foo=bar"}
-        )
-        assert response.status_code == 409  # Conflict
-```
+- API tests for attachment CRUD.
+- Unit tests for URL parsing, normalization, and dedup.
 
 ### Manual
 
-1. Create task with `https://github.com/owner/repo/pull/123` in description.
-2. Verify task details show 1 attachment entry.
-3. Edit task and add a second URL; verify attachments list updates.
+- Create tasks with URLs in description; verify attachments created.
+- Paste duplicate links; verify no duplicate attachments.
 
 ## Notes / Risks / Open Questions
 
-- Decide canonical URL normalization rules early (strip tracking params? keep fragments?).
+- Finalize normalization rules (tracking params, fragments).
diff --git a/docs/02-implementation/pr-specs/PR-005-rag-semantic-search.md b/docs/02-implementation/pr-specs/PR-005-rag-semantic-search.md
index 8297ff8..0d2ac55 100644
--- a/docs/02-implementation/pr-specs/PR-005-rag-semantic-search.md
+++ b/docs/02-implementation/pr-specs/PR-005-rag-semantic-search.md
@@ -6,108 +6,114 @@
 
 ## Goal
 
-Add semantic recall across tasks and cached attachment content, and improve chat answers by injecting the most relevant context.
+Add semantic search across tasks and cached attachment content, and use retrieved
+context to improve chat responses.
 
 ## User Value
 
-- “Where did I put that?” becomes fast: semantic search finds tasks/attachments by meaning.
-- Chat becomes more accurate because it can ground responses in local data.
+- Users can find tasks by meaning, not just keywords.
+- Chat responses can reference local context.
+
+## References
+
+- `docs/01-design/DESIGN_CHAT.md`
+- `docs/01-design/DESIGN_DATA.md`
+- `docs/01-design/API_REFERENCE.md`
 
 ## Scope
 
 ### In
 
-- Local vector store (ChromaDB).
-- Embedding pipeline for:
-  - tasks
-  - attachments (cached content)
-- Semantic search endpoint (query → top-k results with scores).
-- Chat integration:
-  - retrieve top context snippets
-  - inject into prompt (bounded by token limits)
+- Local vector store (ChromaDB) and embeddings pipeline.
+- Indexing for tasks and cached attachment content.
+- Semantic search endpoint returning ranked results.
+- Chat context injection with strict token limits.
 
 ### Out
 
-- Cross-user/multi-tenant search.
-- Remote vector DB.
+- Multi-tenant or remote vector databases.
+- Hybrid keyword search, re-ranking, and query expansion (future).
 
 ## Mini-Specs
 
-- Embeddings:
-  - choose default embedding model/provider (configurable).
-- Indexing:
-  - tasks + cached attachments are embedded and stored in ChromaDB.
-  - re-index on create/update and after attachment fetch.
-- Query:
-  - semantic search API (`GET /api/v1/search/semantic`) returns ranked results.
-- Chat augmentation:
-  - retrieve top snippets and inject into chat prompt (bounded by token limits).
-- Tests:
-  - small fixture corpus; validate top-k results for representative queries.
+- Configurable embedding model and vector store location.
+- Indexing triggers on task create/update and attachment content updates.
+- `/api/v1/search/semantic` endpoint returning top-k results with scores.
+- Prompt assembly that injects retrieved context within a token budget.
 
-## References
+## User Stories
 
-- `docs/01-design/DESIGN_CHAT.md` (RAG strategy + prompt assembly)
-- `docs/01-design/DESIGN_DATA.md` (RAG document structure)
-- `docs/01-design/API_REFERENCE.md` (semantic search endpoint examples)
+- As a user, I can search by meaning and find relevant tasks.
+- As a user, chat answers reference the right tasks or attachments.
 
-## User Stories
+## UX Notes (if applicable)
 
-- As a user, I can search by meaning (“login issues”) and find the right task.
-- As a user, chat answers can reference the correct task/attachment context.
+- N/A.
 
 ## Technical Design
 
-### Indexing triggers
+### Architecture
+
+- Embedding service creates chunks and writes to ChromaDB.
+- Retrieval service returns top-k snippets with scores and metadata.
+- Chat pipeline requests top snippets and injects them into the prompt.
+
+### Data Model / Migrations
+
+- Vector store collection `taskgenie_items` with metadata:
+  id, source_type (task|attachment), parent_task_id, text, embedding_version.
 
-- Index tasks:
-  - on task create/update
-- Index attachments:
-  - when attachment content is fetched/updated (PR-006/PR-007)
+### API Contract
 
-### Embeddings
+- `GET /api/v1/search/semantic?query=...&limit=...` returns results with id, type,
+  score, snippet, and task linkage.
 
-- **Model:** `sentence-transformers/all-MiniLM-L6-v2` (via `sentence-transformers`; make configurable later)
-- **Vector Store:** ChromaDB (running in-process)
-- **Collection Naming:** `taskgenie_items`
-- **Document Metadata:**
-  - `id`: task or attachment ID
-  - `source_type`: `task | attachment`
-  - `parent_task_id`: ID of the task
-  - `text`: the chunked content
+### Background Jobs
 
-### Retrieval + response format
+- Optional background indexing for large attachment content to avoid blocking
+  requests.
 
-- Semantic search endpoint returns:
-  - top-k results with scores
-  - short excerpts for display
-- Chat integration:
-  - retrieve top snippets
-  - inject into prompt within a strict token budget
-  - (optional) include “sources” in response for traceability
+### Security / Privacy
+
+- Do not log raw content or prompts by default.
+- Embeddings are stored locally in the app data directory.
+
+### Error Handling
+
+- Empty index returns empty results (200 OK).
+- Embedding failures return actionable errors and do not crash chat.
 
 ## Acceptance Criteria
 
+### AC1: Indexing Pipeline
+
+**Success Criteria:**
 - [ ] Tasks and cached attachments are embedded and indexed automatically.
-- [ ] Semantic search returns relevant results for representative queries.
-- [ ] Chat responses cite retrieved task/attachment context where applicable.
+
+### AC2: Semantic Search API
+
+**Success Criteria:**
+- [ ] Search returns relevant results for representative queries.
+
+### AC3: Chat Context Injection
+
+**Success Criteria:**
+- [ ] Chat responses include retrieved context when available and respect token
+  limits.
 
 ## Test Plan
 
 ### Automated
 
-- Unit: chunking logic, prompt assembly, context truncation.
-- Integration:
-  1. Create two tasks with different topics; query semantic search; verify ranking.
-  2. Add attachment content; query that content; verify attachment appears in results.
-  3. Chat query uses retrieved context (verify via stubbed “context used” markers).
+- Unit tests for chunking, prompt assembly, token budgeting.
+- Integration tests for search ranking with a small fixture corpus.
 
 ### Manual
 
-1. Create tasks “Fix auth bug” and “Plan vacation”.
-2. Search “login issues” → should surface the auth task.
-3. Ask chat “What’s the status of the login work?” → response should reference the correct task.
+- Create tasks and attachments and query semantic search.
+- Ask chat a question that should be grounded in a task.
 
 ## Notes / Risks / Open Questions
 
-- Decide whether to store embeddings for task description only vs (title + description).
+- Decide whether embeddings include title only or title + description.
+- Consider hybrid search and re-ranking in a follow-on PR for higher precision.
diff --git a/docs/02-implementation/pr-specs/PR-006-gmail-integration.md b/docs/02-implementation/pr-specs/PR-006-gmail-integration.md
index 028c658..851c809 100644
--- a/docs/02-implementation/pr-specs/PR-006-gmail-integration.md
+++ b/docs/02-implementation/pr-specs/PR-006-gmail-integration.md
@@ -6,306 +6,112 @@
 
 ## Goal
 
-Given a Gmail URL attached to a task, authenticate via OAuth and fetch the email content, then cache it locally as attachment content.
+Given a Gmail URL attachment, authenticate via OAuth, fetch email content, and cache
+it locally on the attachment.
 
 ## User Value
 
-- Email-linked tasks keep the important context without re-opening Gmail.
-- Cached email content can later be searched via RAG (PR-005).
+- Email-linked tasks keep essential context without reopening Gmail.
+- Cached email content can later be searched via RAG.
+
+## References
+
+- `docs/01-design/INTEGRATION_GUIDE.md`
+- `docs/01-design/DESIGN_DATA.md`
+- `docs/01-design/DESIGN_BACKGROUND_JOBS.md`
 
 ## Scope
 
 ### In
 
-- Parse Gmail URLs to a stable reference (message id / thread id as appropriate).
-- OAuth flow (local machine):
-  - open browser for consent
-  - store credentials securely on disk
-- Fetch:
-  - subject
-  - from/to
-  - date
-  - body (plain text preferred; HTML optional)
-- Cache fetched content into the attachment record.
+- Gmail URL normalization to a stable reference.
+- OAuth flow for local machines and secure credential storage.
+- Fetch subject/from/to/date/body and store in `attachment.content`.
+- Explicit refresh action and/or background fetch for pending attachments.
 
 ### Out
 
-- Two-way sync (labeling, replying, etc.).
-- Bulk email ingestion.
+- Two-way sync or bulk ingestion.
 
 ## Mini-Specs
 
-- URL recognition + normalization:
-  - detect Gmail message URLs and normalize to a stable reference key.
-- OAuth + credential storage:
-  - `tgenie config --gmail-auth` (or equivalent) to set up tokens.
-  - store secrets under `~/.taskgenie/credentials/` with `0600` permissions.
-- Fetch + cache:
-  - fetch subject/from/date/body and store into `attachment.content`.
-- Background fetch:
-  - allow explicit refresh and/or background “pending fetch” processing.
-- Tests:
-  - URL parsing, auth error mapping, and fetch-to-cache pipeline (mocked HTTP).
+- Provider that parses Gmail URLs and normalizes to `gmail:<id>`.
+- OAuth setup command and token storage under `~/.taskgenie/credentials/` with 0600.
+- Fetch pipeline to populate attachment title/content and metadata.
+- Optional background job to refresh pending attachments.
+- Expose a tool wrapper for attachment refresh (registered in PR-003B).
 
-## References
+## User Stories
 
-- `docs/01-design/INTEGRATION_GUIDE.md` (provider pattern + security)
-- `docs/01-design/DESIGN_DATA.md` (attachment cache fields)
-- `docs/01-design/DESIGN_BACKGROUND_JOBS.md` (fetch jobs without a queue)
+- As a user, I can paste a Gmail URL and see email context on the task.
+- As a user, my OAuth tokens are stored securely and not logged.
 
-## User Stories
+## UX Notes (if applicable)
 
-- As a user, I can paste a Gmail URL into a task and see key email context in the attachment.
-- As a user, the system handles OAuth securely and doesn’t leak email content into logs.
+- Provide clear setup instructions when Gmail auth is missing.
 
 ## Technical Design
 
-### URL-first normalization
+### Architecture
+
+- Gmail provider service with methods: `normalize_url`, `fetch_message`,
+  `cache_content`.
+- Auth uses Google OAuth InstalledAppFlow with local browser callback.
+- A refresh entrypoint (CLI or background job) triggers fetch per attachment.
+- Tool wrapper integration is defined in PR-003B and calls the refresh entrypoint.
 
-- Accept Gmail URLs as the primary input.
-- Normalize to a stable identifier (message id and/or thread id) stored in `attachment.reference`.
+### Data Model / Migrations
 
-### OAuth + credential storage
+- Store Gmail message metadata in `attachment.metadata` and content in
+  `attachment.content`.
 
-- OAuth flow opens the browser and stores credentials on disk with strict permissions.
-- Do not store OAuth secrets inside the main SQLite DB.
+### API Contract
 
-### Fetch + cache
+- No new public API required beyond attachment refresh endpoint/command.
+- If adding API: `POST /api/v1/attachments/{id}/refresh` returns updated attachment.
 
-- Fetch minimal useful fields (subject, from/to, date, text body).
-- Store formatted content into `attachment.content` for later viewing/searching.
+### Background Jobs
 
-### Background fetch
+- Optional background loop to process attachments with missing content.
 
-- Fetch can be initiated:
-  - explicitly (e.g., “refresh attachment” action), and/or
-  - as a background job that processes “pending fetch” attachments.
+### Security / Privacy
+
+- Store OAuth credentials on disk with 0600 permissions.
+- Do not log email bodies or tokens.
+
+### Error Handling
+
+- Map Gmail API errors (401/403/404/429) to user-facing messages.
+- Fail fetch gracefully without deleting the attachment.
 
 ## Acceptance Criteria
 
-- [ ] OAuth flow works and credentials persist.
-- [ ] Gmail URL is normalized and fetch succeeds.
-- [ ] Email content is cached and viewable from task attachment.
-- [ ] Credentials stored securely with file permissions (0600).
+### AC1: OAuth and Credential Persistence
 
-## Technical Design
+**Success Criteria:**
+- [ ] OAuth flow completes and tokens persist with secure permissions.
+
+### AC2: URL Normalization and Fetch
+
+**Success Criteria:**
+- [ ] Gmail URLs normalize to stable references and fetch succeeds.
+
+### AC3: Cache Attachment Content
 
-### OAuth Flow Implementation
-
-```python
-# backend/services/gmail_service.py
-from google_auth_oauthlib.flow import InstalledAppFlow
-from google.oauth2.credentials import Credentials
-from pathlib import Path
-import json
-
-class GmailService:
-    def __init__(self):
-        self.credentials: Credentials | None = None
-        self.SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
-        self.CREDENTIALS_DIR = Path.home() / ".taskgenie" / "credentials"
-        self.CLIENT_SECRET_PATH = self.CREDENTIALS_DIR / "gmail_client_secret.json"
-        self.TOKEN_PATH = self.CREDENTIALS_DIR / "gmail_token.json"
-
-    async def authenticate(self) -> bool:
-        """Run OAuth flow for Gmail."""
-        # Check for existing token
-        if self.TOKEN_PATH.exists():
-            self.credentials = Credentials.from_authorized_user_file(
-                str(self.TOKEN_PATH), self.SCOPES
-            )
-
-            # Refresh if expired
-            if self.credentials.expired and self.credentials.refresh_token:
-                self.credentials.refresh(Request())
-                self._save_token()
-                return True
-
-        # Run OAuth flow
-        if not self.CLIENT_SECRET_PATH.exists():
-            raise FileNotFoundError(
-                f"Gmail client secret not found at {self.CLIENT_SECRET_PATH}. "
-                "Download from Google Cloud Console."
-            )
-
-        flow = InstalledAppFlow.from_client_secrets_file(
-            str(self.CLIENT_SECRET_PATH), self.SCOPES
-        )
-
-        self.credentials = flow.run_local_server(
-            port=0,
-            prompt="consent",
-            success_message="Authentication successful! You can close this tab."
-        )
-
-        self._save_token()
-        return True
-
-    def _save_token(self):
-        """Save token to file with secure permissions."""
-        self.TOKEN_PATH.parent.mkdir(parents=True, exist_ok=True)
-
-        # Save token
-        with open(self.TOKEN_PATH, "w") as f:
-            f.write(self.credentials.to_json())
-
-        # Set secure file permissions (owner read/write only)
-        self.TOKEN_PATH.chmod(0o600)
-
-    async def get_message(self, message_id: str) -> dict:
-        """Fetch Gmail message by ID."""
-        if not self.credentials or not self.credentials.valid:
-            await self.authenticate()
-
-        service = build("gmail", "v1", credentials=self.credentials)
-        message = service.users().messages().get(
-            userId="me",
-            id=message_id,
-            format="full"
-        ).execute()
-
-        return self._parse_message(message)
-
-    def _parse_message(self, raw_message: dict) -> dict:
-        """Parse Gmail message to structured data."""
-        headers = {
-            h["name"].lower(): h["value"]
-            for h in raw_message.get("payload", {}).get("headers", [])
-        }
-
-        # Extract body
-        payload = raw_message.get("payload", {})
-        body = self._extract_body(payload)
-
-        return {
-            "id": raw_message["id"],
-            "thread_id": raw_message["threadId"],
-            "subject": headers.get("subject", ""),
-            "from": headers.get("from", ""),
-            "to": headers.get("to", ""),
-            "date": headers.get("date", ""),
-            "body": body,
-            "snippet": raw_message.get("snippet", ""),
-        }
-
-    def _extract_body(self, payload: dict) -> str:
-        """Extract body from message payload."""
-        import base64
-
-        # Try direct body first
-        if "body" in payload and payload["body"].get("data"):
-            return base64.urlsafe_b64decode(payload["body"]["data"]).decode("utf-8")
-
-        # Try multipart
-        if "parts" in payload:
-            for part in payload["parts"]:
-                if part.get("mimeType") == "text/plain" and part.get("body", {}).get("data"):
-                    return base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")
-
-        return ""
-```
+**Success Criteria:**
+- [ ] Attachment content includes subject/from/to/date/body excerpt.
 
 ## Test Plan
 
 ### Automated
 
-```python
-# tests/test_services/test_gmail_service.py
-import pytest
-from unittest.mock import MagicMock, patch
-from backend.services.gmail_service import GmailService
-
-class TestGmailOAuth:
-    """Tests for Gmail OAuth flow"""
-
-    @pytest.mark.asyncio
-    async def test_authenticate_creates_token(self, tmp_path):
-        """OAuth flow creates token file."""
-        gmail = GmailService()
-
-        # Mock OAuth flow
-        mock_flow = MagicMock()
-        mock_flow.run_local_server.return_value = MagicMock(
-            to_json=lambda: '{"refresh_token": "test-refresh", "token": "test-token"}'
-        )
-
-        with patch("backend.services.gmail_service.InstalledAppFlow", return_value=mock_flow):
-            await gmail.authenticate()
-
-            # Verify token was saved
-            assert gmail.TOKEN_PATH.exists()
-
-            # Check file permissions
-            import os
-            assert os.stat(gmail.TOKEN_PATH).st_mode == 0o600
-
-    @pytest.mark.asyncio
-    async def test_authenticate_loads_existing_token(self, tmp_path):
-        """Existing token is loaded and refreshed if needed."""
-        gmail = GmailService()
-
-        # Create valid token file
-        gmail.TOKEN_PATH.parent.mkdir(parents=True, exist_ok=True)
-        with open(gmail.TOKEN_PATH, "w") as f:
-            f.write('{"refresh_token": "valid", "token": "valid"}')
-
-        # Load (should not run OAuth flow)
-        mock_flow = MagicMock()
-        with patch("backend.services.gmail_service.InstalledAppFlow", return_value=mock_flow):
-            await gmail.authenticate()
-
-            # Verify existing token was used
-            mock_flow.run_local_server.assert_not_called()
-
-
-class TestGmailMessageParsing:
-    """Tests for Gmail message parsing"""
-
-    def test_parse_simple_message(self):
-        """Parse simple text message."""
-        gmail = GmailService()
-        raw = {
-            "id": "msg-123",
-            "threadId": "thread-456",
-            "snippet": "Test message",
-            "payload": {
-                "headers": [
-                    {"name": "Subject", "value": "Test Subject"},
-                    {"name": "From", "value": "sender@example.com"}
-                ],
-                "body": {"data": "SGVsbG8gV29ybGQ="}  # "Hello World"
-            }
-        }
-
-        result = gmail._parse_message(raw)
-        assert result["subject"] == "Test Subject"
-        assert result["body"] == "Hello World"
-
-    def test_parse_multipart_message(self):
-        """Parse multipart message."""
-        gmail = GmailService()
-        raw = {
-            "id": "msg-123",
-            "payload": {
-                "parts": [
-                    {
-                        "mimeType": "text/plain",
-                        "body": {"data": "TWVsbG8gV29ybGQ="}
-                    }
-                ]
-            }
-        }
-
-        result = gmail._parse_message(raw)
-        assert result["body"] == "Hello World"
-```
+- Unit tests for URL normalization and message parsing.
+- Integration tests for token storage and refresh behavior (mocked HTTP).
 
 ### Manual
 
-1. Run `tgenie config --gmail-auth` (or equivalent) and complete OAuth in browser.
-2. Create task containing a Gmail URL.
-3. Trigger fetch and verify attachment shows subject/from/date/body excerpt.
+- Run OAuth setup, attach a Gmail URL, refresh, and verify cached content.
 
 ## Notes / Risks / Open Questions
 
-- Gmail URL formats vary; normalization rules should be tested against real examples.
+- Gmail URL formats vary; expand normalization fixtures with real samples.
diff --git a/docs/02-implementation/pr-specs/PR-007-github-integration.md b/docs/02-implementation/pr-specs/PR-007-github-integration.md
index 5820471..6200fb2 100644
--- a/docs/02-implementation/pr-specs/PR-007-github-integration.md
+++ b/docs/02-implementation/pr-specs/PR-007-github-integration.md
@@ -6,335 +6,110 @@
 
 ## Goal
 
-Given a GitHub Issue/PR URL attached to a task, fetch the useful content and cache it locally as attachment content.
+Given a GitHub Issue/PR URL attachment, fetch useful content and cache it locally.
 
 ## User Value
 
-- Tasks that reference GitHub become “self-contained”: the user can see key PR/Issue details without context switching.
-- Cached content becomes searchable later (RAG is layered on in PR-005).
+- GitHub-linked tasks become self-contained with key PR/issue details.
+- Cached content becomes searchable via RAG later.
+
+## References
+
+- `docs/01-design/INTEGRATION_GUIDE.md`
+- `docs/01-design/DESIGN_DATA.md`
+- `docs/01-design/DESIGN_BACKGROUND_JOBS.md`
 
 ## Scope
 
 ### In
 
-- Recognize GitHub URLs (issue, pull request).
-- Fetch:
-  - title
-  - description/body
-  - key metadata (state, author, repo, number, updated_at)
-- Cache fetched content into the attachment record.
-- Auth:
-  - support `GITHUB_TOKEN` (PAT) for higher rate limits
-  - handle unauthenticated mode with stricter limits
+- URL recognition and normalization for issues and pull requests.
+- Fetch title/body and key metadata.
+- Cache content on the attachment with a refresh strategy.
+- Optional `GITHUB_TOKEN` auth with rate-limit handling.
 
 ### Out
 
-- Full thread/comment ingestion (future / optional).
-- Webhooks (future).
+- Full comment ingestion or webhooks.
 
 ## Mini-Specs
 
-- URL recognition + normalization:
-  - issues and pull requests (public + private).
-- Fetch + cache:
-  - title/body + key metadata cached into the attachment record.
-  - TTL/refresh strategy to avoid excessive refetching.
-- Auth + rate limits:
-  - support `GITHUB_TOKEN` (PAT) and unauthenticated mode.
-  - map 401/403/404/429 to actionable user messages.
-- Tests:
-  - normalize URLs; mocked GitHub API responses for success + error cases.
-
-## References
-
-- `docs/01-design/INTEGRATION_GUIDE.md` (provider pattern + security)
-- `docs/01-design/DESIGN_DATA.md` (attachment cache fields)
-- `docs/01-design/DESIGN_BACKGROUND_JOBS.md` (fetch jobs without a queue)
+- Provider that normalizes GitHub URLs to `github:<owner>/<repo>/<type>/<number>`.
+- Fetch pipeline that stores title/body and metadata on the attachment.
+- Token-based auth and unauthenticated mode with clear rate-limit messaging.
+- Optional TTL to avoid refetching too often.
+- Expose a tool wrapper for attachment refresh (registered in PR-003B).
 
 ## User Stories
 
-- As a user, I can paste a GitHub PR/Issue URL and see the important details in my task.
+- As a user, I can paste a GitHub URL and see key details in the task.
 - As a user, I get a clear error if the repo is private or my token is invalid.
 
+## UX Notes (if applicable)
+
+- Show actionable errors for auth and rate limits.
+
 ## Technical Design
 
-### Provider implementation
+### Architecture
+
+- GitHub provider service with `normalize_url`, `fetch_issue`, `fetch_pr`.
+- Refresh entrypoint (CLI or background job) triggers fetch per attachment.
+- Tool wrapper integration is defined in PR-003B and calls the refresh entrypoint.
 
-- Implement a GitHub provider with:
-  - `match_url()`, `normalize()`
-  - `fetch_content()` returning a concise text payload
-  - `metadata()` returning structured fields (repo, number, state, updated_at)
+### Data Model / Migrations
 
-### Fetch + cache
+- Store GitHub metadata (state, author, repo, number, updated_at) in
+  `attachment.metadata`.
+- Store cached title/body in `attachment.title` and `attachment.content`.
 
-- Cache:
-  - `attachment.title` from PR/Issue title
-  - `attachment.content` from body + a short metadata header
-- Avoid refetching too frequently (cache TTL).
+### API Contract
 
-### Auth + rate limits
+- No new public API required beyond attachment refresh endpoint/command.
+- If adding API: `POST /api/v1/attachments/{id}/refresh` returns updated attachment.
+
+### Background Jobs
+
+- Optional background loop to refresh stale attachments based on TTL.
+
+### Security / Privacy
+
+- Read `GITHUB_TOKEN` from env/config; never log token values.
+
+### Error Handling
 
-- Support `GITHUB_TOKEN` (PAT) and graceful unauthenticated mode.
 - Map 401/403/404/429 to actionable user messages.
+- Unauthenticated mode works for public repos with lower rate limits.
 
 ## Acceptance Criteria
 
-- [ ] GitHub PR/Issue URL is recognized and normalized.
-- [ ] Content is fetched and cached in attachment record.
-- [ ] Errors are actionable (401/403/404/429 show clear messages).
-- [ ] Unauthenticated mode works with public repos.
-- [ ] PAT authentication uses token for higher rate limits.
+### AC1: URL Recognition and Normalization
 
-## Technical Design
+**Success Criteria:**
+- [ ] GitHub issue/PR URLs normalize to stable references.
+
+### AC2: Fetch and Cache
+
+**Success Criteria:**
+- [ ] Attachment content includes title/body and key metadata.
+
+### AC3: Auth and Rate Limits
 
-### Provider Implementation
-
-```python
-# backend/services/github_service.py
-import httpx
-import re
-from dataclasses import dataclass
-from backend.config import settings
-
-@dataclass
-class GitHubPR:
-    """GitHub Pull Request data."""
-    number: int
-    title: str
-    state: str
-    author: str
-    url: str
-    body: str
-    additions: int
-    deletions: int
-    labels: list[str]
-    created_at: str
-    updated_at: str
-
-@dataclass
-class GitHubIssue:
-    """GitHub Issue data."""
-    number: int
-    title: str
-    state: str
-    author: str
-    url: str
-    body: str
-    labels: list[str]
-    created_at: str
-    updated_at: str
-
-
-class GitHubService:
-    """GitHub API service wrapper."""
-
-    API_BASE = "https://api.github.com"
-
-    def __init__(self, token: str | None = None):
-        self.token = token or settings.github_token
-        self._client: httpx.AsyncClient | None = None
-
-    @property
-    def client(self) -> httpx.AsyncClient:
-        """Lazy-initialize async HTTP client."""
-        if self._client is None:
-            headers = {
-                "Accept": "application/vnd.github+json",
-                "X-GitHub-Api-Version": "2022-11-28",
-            }
-            if self.token:
-                headers["Authorization"] = f"Bearer {self.token}"
-
-            self._client = httpx.AsyncClient(
-                base_url=self.API_BASE,
-                headers=headers,
-                timeout=30.0,
-            )
-        return self._client
-
-    async def close(self):
-        """Close HTTP client."""
-        if self._client:
-            await self._client.aclose()
-            self._client = None
-
-    async def get_pull_request(
-        self, owner: str, repo: str, number: int
-    ) -> GitHubPR:
-        """Fetch pull request details."""
-        response = await self.client.get(f"/repos/{owner}/{repo}/pulls/{number}")
-        self._handle_errors(response)
-        data = response.json()
-
-        return GitHubPR(
-            number=data["number"],
-            title=data["title"],
-            state=data["state"],
-            author=data["user"]["login"],
-            url=data["html_url"],
-            body=data.get("body") or "",
-            additions=data.get("additions", 0),
-            deletions=data.get("deletions", 0),
-            labels=[l["name"] for l in data.get("labels", [])],
-            created_at=data["created_at"],
-            updated_at=data["updated_at"],
-        )
-
-    async def get_issue(self, owner: str, repo: str, number: int) -> GitHubIssue:
-        """Fetch issue details."""
-        response = await self.client.get(f"/repos/{owner}/{repo}/issues/{number}")
-        self._handle_errors(response)
-        data = response.json()
-
-        return GitHubIssue(
-            number=data["number"],
-            title=data["title"],
-            state=data["state"],
-            author=data["user"]["login"],
-            url=data["html_url"],
-            body=data.get("body") or "",
-            labels=[l["name"] for l in data.get("labels", [])],
-            created_at=data["created_at"],
-            updated_at=data["updated_at"],
-        )
-
-    @staticmethod
-    def parse_github_url(url: str) -> tuple[str, str, str, int]:
-        """
-        Parse GitHub URL to extract owner, repo, type, and number.
-
-        Returns: (owner, repo, type, number)
-        type is 'pull' or 'issues'
-        """
-        patterns = [
-            r"github\.com/([^/]+)/([^/]+)/(pull|issues)/(\d+)",
-            r"^([^/]+)/([^/]+)/(pull|issues)/(\d+)$",
-            r"^([^/]+)/([^/]+)#(\d+)$",
-        ]
-
-        for pattern in patterns:
-            match = re.search(pattern, url)
-            if match:
-                groups = match.groups()
-                if len(groups) == 4:
-                    return groups[0], groups[1], groups[2], int(groups[3])
-                elif len(groups) == 3:
-                    return groups[0], groups[1], "issues", int(groups[2])
-
-        raise ValueError(f"Invalid GitHub URL: {url}")
-
-    def _handle_errors(self, response: httpx.Response):
-        """Map GitHub API errors to actionable messages."""
-        if response.status_code == 401:
-            raise ValueError("Invalid GitHub token. Configure GITHUB_TOKEN.")
-        elif response.status_code == 403:
-            raise ValueError("Forbidden. Check repository access permissions.")
-        elif response.status_code == 404:
-            raise ValueError("Repository or resource not found.")
-        elif response.status_code == 429:
-            raise ValueError("Rate limit exceeded. Wait before retrying.")
-
-        response.raise_for_status()
-```
+**Success Criteria:**
+- [ ] Authenticated and unauthenticated modes both function with clear errors.
 
 ## Test Plan
 
 ### Automated
 
-```python
-# tests/test_services/test_github_service.py
-import pytest
-from unittest.mock import AsyncMock, patch
-import httpx
-from backend.services.github_service import GitHubService, GitHubPR
-
-class TestGitHubService:
-    """Tests for GitHub API integration"""
-
-    @pytest.mark.asyncio
-    async def test_get_pull_request_success(self):
-        """Fetch PR details successfully."""
-        service = GitHubService(token="test-token")
-        mock_client = AsyncMock()
-
-        mock_response = AsyncMock()
-        mock_response.json.return_value = {
-            "number": 123,
-            "title": "Fix authentication bug",
-            "state": "open",
-            "user": {"login": "developer"},
-            "html_url": "https://github.com/owner/repo/pull/123",
-            "body": "This PR fixes that issue",
-            "additions": 50,
-            "deletions": 10,
-            "labels": [{"name": "bug"}],
-            "created_at": "2025-01-10T10:00:00Z",
-            "updated_at": "2025-01-12T15:30:00Z",
-        }
-        mock_response.status_code = 200
-        mock_response.raise_for_status = MagicMock()
-        mock_client.get = AsyncMock(return_value=mock_response)
-
-        service._client = mock_client
-        pr = await service.get_pull_request("owner", "repo", 123)
-
-        assert pr.number == 123
-        assert pr.title == "Fix authentication bug"
-        assert "bug" in pr.labels
-        mock_client.get.assert_called_once_with("/repos/owner/repo/pulls/123")
-
-    @pytest.mark.asyncio
-    async def test_get_issue_success(self):
-        """Fetch issue details successfully."""
-        service = GitHubService(token="test-token")
-        mock_client = AsyncMock()
-
-        mock_response = AsyncMock()
-        mock_response.json.return_value = {
-            "number": 456,
-            "title": "Add new feature",
-            "state": "open",
-            "user": {"login": "developer"},
-            "html_url": "https://github.com/owner/repo/issues/456",
-            "labels": [{"name": "enhancement"}],
-        }
-        mock_response.status_code = 200
-        mock_client.get = AsyncMock(return_value=mock_response)
-        service._client = mock_client
-
-        issue = await service.get_issue("owner", "repo", 456)
-        assert issue.number == 456
-
-    def test_parse_github_url_full(self):
-        """Parse full GitHub URL."""
-        owner, repo, type_, num = GitHubService.parse_github_url(
-            "https://github.com/owner/repo/pull/123"
-        )
-        assert owner == "owner"
-        assert repo == "repo"
-        assert type_ == "pull"
-        assert num == 123
-
-    def test_parse_github_url_shorthand(self):
-        """Parse shorthand owner/repo#123 format."""
-        owner, repo, type_, num = GitHubService.parse_github_url("owner/repo#456")
-        assert owner == "owner"
-        assert repo == "repo"
-        assert type_ == "issues"
-        assert num == 456
-
-    def test_parse_github_url_invalid(self):
-        """Parse invalid URL raises error."""
-        with pytest.raises(ValueError, match="Invalid GitHub URL"):
-            GitHubService.parse_github_url("not-a-valid-url")
-```
+- Unit tests for URL normalization (issues and PRs).
+- Integration tests with mocked GitHub API responses and error mapping.
 
 ### Manual
 
-1. Create a task with a GitHub PR URL in its description (auto-detect should attach it via PR-004).
-2. Trigger attachment fetch (explicit command or background fetch).
-3. Verify attachment shows title + body summary in task detail.
+- Attach a public PR URL and verify cached content.
+- Set/unset `GITHUB_TOKEN` and verify rate-limit/error messaging.
 
 ## Notes / Risks / Open Questions
 
-- Decide whether to include comments later; MVP should keep API calls minimal.
+- Decide whether to include comments in a later PR to avoid excessive API calls.
diff --git a/docs/02-implementation/pr-specs/PR-008-interactive-tui.md b/docs/02-implementation/pr-specs/PR-008-interactive-tui.md
index 95013a4..aac1a30 100644
--- a/docs/02-implementation/pr-specs/PR-008-interactive-tui.md
+++ b/docs/02-implementation/pr-specs/PR-008-interactive-tui.md
@@ -6,196 +6,113 @@
 
 ## Goal
 
-Ship a first-class interactive TUI as early as possible so we can iterate on UX from day 1.
+Ship a first-class interactive TUI so we can iterate on UX early.
 
 ## User Value
 
-- The user can actually “try the product” early (add/list/edit/complete tasks).
-- UX feedback happens before heavy features (integrations/RAG) lock in assumptions.
+- Users can try the product quickly and give feedback.
+- Provides a primary workflow before integrations and RAG.
+
+## References
+
+- `docs/01-design/DESIGN_TUI.md`
+- `docs/01-design/DESIGN_ARCHITECTURE.md`
+- `docs/01-design/DESIGN_DATA.md`
 
 ## Scope
 
 ### In
 
-- `tgenie` launches an interactive TUI by default.
-- Core task workflows:
-  - view task list
-  - view task details
-  - create task
-  - edit task
-  - mark done
-- Clear empty/loading/error states.
-- A “chat panel” placeholder is allowed, but real LLM chat is PR-003.
+- `tgenie` launches the interactive TUI by default.
+- Task list, detail, create/edit, and mark-done flows.
+- Empty/loading/error states and confirmation for destructive actions.
+- Placeholder chat panel (no LLM yet).
 
 ### Out
 
-- Full attachment viewing (PR-004).
+- Attachment viewing (PR-004).
 - LLM-backed chat (PR-003).
+- Agent panel and tool execution UI (PR-015).
 - Full web UI (PR-010).
 
 ## Mini-Specs
 
-- Entry point:
-  - `tgenie` starts the interactive TUI (tasks MVP).
-- Screens/widgets:
-  - task list + task detail pane, modal flows for add/edit/delete.
-- Keybindings:
-  - navigation + common actions (add/edit/done/delete/refresh/help/quit).
-- API client:
-  - calls PR-002 endpoints; clear API-down errors and retry flow.
-- Chat panel:
-  - placeholder UI until PR-003, but visible and non-crashing.
-- Tests:
-  - smoke test for app start + basic widget interactions (where feasible).
-
-## References
-
-- `docs/01-design/DESIGN_TUI.md`
-- `docs/01-design/DESIGN_ARCHITECTURE.md`
-- `docs/01-design/DESIGN_DATA.md`
+- Textual-based app with list/detail split view and modal forms.
+- Keybindings for navigation and common actions.
+- Async API client for PR-002 endpoints with retry flow.
+- Clear UI states for empty list and API-down scenarios.
 
 ## User Stories
 
-- As a user, I can open `tgenie` and immediately see my tasks with status/priority.
-- As a user, I can create a task quickly without remembering flags.
-- As a user, I can edit a task (title/description/eta/priority) and see it update immediately.
-- As a user, I can mark a task done and watch it leave the “pending” list.
-- As a user, if the API is down, I see a clear “cannot connect” state (not a traceback).
+- As a user, I can open `tgenie` and see my tasks.
+- As a user, I can create and edit tasks without leaving the UI.
+- As a user, I can mark tasks done and see status update.
+- As a user, I see a clear error when the API is unavailable.
 
-## UX Notes
+## UX Notes (if applicable)
 
-- MVP is “tasks-first”; chat can be a visible placeholder until PR-003.
-- Prefer visible affordances (help bar / action hints) over hidden shortcuts-only UX.
-- Destructive actions (delete) require confirmation.
+- Prefer visible affordances (help bar, hints) over hidden shortcuts.
+- Destructive actions require confirmation.
 
 ## Technical Design
 
-### TUI framework
+### Architecture
 
-- Prefer **Textual** for a modern full-screen TUI and rapid iteration.
-- Use httpx.AsyncClient for API calls
-- Use reactive variables for state management (filters, loading state)
-- Style with TCSS (Textual CSS)
+- Textual app in `backend/cli/tui/` with screens and widgets for list/detail/forms.
+- Thin client: all task mutations go through the API.
 
-### Architecture
+### Data Model / Migrations
 
-**File Structure:**
-```
-backend/cli/tui/
-├── __init__.py
-├── app.py              # TodoApp (main app class)
-├── client.py           # TaskAPIClient (async HTTP wrapper)
-├── styles.tcss         # TCSS styling
-├── screens/
-│   ├── __init__.py
-│   ├── main.py         # MainScreen (list + detail split view)
-│   ├── task_form.py    # TaskFormScreen (add/edit modal)
-│   └── help.py         # HelpScreen
-└── widgets/
-    ├── __init__.py
-    ├── task_list.py    # TaskListView + TaskItem + TaskRow
-    ├── task_detail.py  # TaskDetailPanel (rich table)
-    ├── filter_bar.py   # FilterBar (filter buttons)
-    └── status_bar.py   # StatusBar (bottom status)
-```
-
-**Component Layout:**
-- A Textual app with:
-  - task list pane (filterable, left side)
-  - task detail pane (selected task, right side)
-  - filter bar at top (status/priority filters)
-  - status/help bar at bottom (keybindings)
-  - chat pane placeholder (stub, for PR-003)
-
-**Message Passing:**
-- TaskListView posts TaskSelected message
-- MainScreen receives and updates TaskDetailPanel
-- TaskFormScreen posts TaskUpdated message on save
-- App handles task refresh/notifications
-
-**Thin client principle:**
-- all task operations go through the Task CRUD API
-- no direct DB access from the TUI
-- API base URL configurable via env var/config
+- N/A.
 
 ### API Contract
 
-- Requires PR-002 endpoints:
-  - `POST /api/v1/tasks`
-  - `GET /api/v1/tasks`
-  - `GET /api/v1/tasks/{id}`
-  - `PATCH /api/v1/tasks/{id}`
-  - `DELETE /api/v1/tasks/{id}`
-- Mark done uses PATCH: `{"status": "completed"}`.
+- Uses PR-002 task endpoints for all CRUD operations.
+
+### Background Jobs
+
+- N/A.
+
+### Security / Privacy
+
+- N/A.
 
-### Resilience
+### Error Handling
 
-- API unreachable:
-  - show non-blocking banner + retry affordance
-  - avoid “hanging” renders (timeouts on HTTP calls)
+- Timeouts and connection failures show a banner/state and allow retry.
+- API errors are surfaced as friendly messages, not stack traces.
 
 ## Acceptance Criteria
 
-- [ ] `tgenie` opens a responsive TUI (no stack traces).
-- [ ] Create/list/show/edit/done flows work end-to-end against the API.
-- [ ] UI handles empty state and API-down state with clear messaging.
-- [ ] Keyboard navigation works (arrows, j/k, g/G, enter, escape).
-- [ ] All keybindings work (a/e/d/D/r/?/q).
-- [ ] Filter by status/priority works correctly.
-- [ ] Task list shows priority colors and status styling.
-- [ ] Task detail panel shows formatted table with attachments.
-- [ ] Destructive actions (delete) show confirmation modal.
+### AC1: Tasks MVP Flows
+
+**Success Criteria:**
+- [ ] `tgenie` opens a responsive TUI without tracebacks.
+- [ ] Create/list/show/edit/done flows work against the API.
+
+### AC2: Navigation and Keybindings
+
+**Success Criteria:**
+- [ ] Navigation keys and action bindings work consistently.
+- [ ] Delete requires confirmation.
+
+### AC3: Empty and Error States
+
+**Success Criteria:**
+- [ ] Empty list and API-down states are clear and recoverable.
 
 ## Test Plan
 
 ### Automated
 
-- Unit: UI state reducers / formatting utilities (where applicable).
-- Integration (light): API client calls mocked via HTTPX mock transport.
-- Widget tests: TaskRow rendering, TaskListView message passing, reactive properties
-- App tests: keybindings, screen navigation, error handling with mocked API
-
-**Test structure:**
-```
-tests/test_tui/
-├── __init__.py
-├── test_app.py       # TodoApp lifecycle, keybindings, error handling
-├── test_widgets.py   # TaskRow, TaskListView, TaskDetailPanel rendering
-└── test_screens.py   # MainScreen, TaskFormScreen modal behavior
-```
-
-**Running tests:**
-```bash
-pytest tests/test_tui/ -v
-textual run --dev backend.cli.tui.app:TodoApp
-```
+- Widget tests for rendering and message passing.
+- App tests for keybindings and error handling with mocked API client.
 
 ### Manual
 
-1. Start the API.
-2. Run `tgenie`:
-   - empty state renders correctly with hint to add task
-   - add a task from the UI → task appears in list with correct styling
-   - open task detail → shows fields in rich table format
-   - edit task title/eta/priority → persists immediately
-   - mark task done → status updates in list (strikethrough)
-   - delete task → shows confirmation modal
-   - test all filters (1/2/3/4 keys for status, p for priority)
-   - test search (/ key) → filters by title/description
-3. Stop the API and re-run `tgenie`:
-   - UI shows "cannot connect" state and recovery guidance
-   - 'r' key retries connection
-4. Test all keybindings:
-   - 'q' quits app
-   - '?' shows help screen
-   - 'a' opens add task form
-   - 'e' opens edit task form
-   - 'd' marks task done
-   - 'D' deletes task with confirmation
-   - arrows/j/k navigate task list
-   - g/G jump to top/bottom
-   - Enter shows task details
+- Run the API and verify task flows end to end.
+- Stop the API and verify the TUI shows a recoverable error state.
 
 ## Notes / Risks / Open Questions
 
-- If Textual is adopted, we should ensure a non-interactive mode still exists (`tgenie add`, etc.) for scripting (PR-009).
+- Ensure non-interactive CLI commands remain available (PR-009).
diff --git a/docs/02-implementation/pr-specs/PR-009-cli-subcommands.md b/docs/02-implementation/pr-specs/PR-009-cli-subcommands.md
index 8a89a8d..b13e60d 100644
--- a/docs/02-implementation/pr-specs/PR-009-cli-subcommands.md
+++ b/docs/02-implementation/pr-specs/PR-009-cli-subcommands.md
@@ -1,284 +1,124 @@
 # PR-009: CLI Subcommands (Secondary) (Spec)
 
 **Status:** Spec Only  
-**Depends on:** PR-002  
+**Depends on:** PR-002, PR-003B  
 **Last Reviewed:** 2025-12-29
 
 ## Goal
 
-Provide non-interactive commands for scripting and automation, while keeping the interactive TUI as the primary UX.
+Provide non-interactive commands for scripting while the TUI remains primary.
 
 ## User Value
 
-- Users can automate workflows (`cron`, shell scripts, quick one-liners).
-- Enables easy debugging of API behavior without the TUI.
+- Users can automate workflows in shell scripts or cron.
+- Developers can debug API behavior without the TUI.
+
+## References
+
+- `docs/01-design/DESIGN_CLI.md`
+- `docs/01-design/API_REFERENCE.md`
+- `docs/01-design/DESIGN_TUI.md`
+- `docs/02-implementation/pr-specs/PR-008-interactive-tui.md`
 
 ## Scope
 
 ### In
 
-- Subcommands (initial set):
-  - `tgenie add`
-  - `tgenie list`
-  - `tgenie show`
-  - `tgenie edit`
-  - `tgenie done`
-  - `tgenie delete`
-  - Output conventions:
-    - human-friendly default
-    - optional `--json` for scripting (recommended)
+- `tgenie add|list|show|edit|done|delete` subcommands.
+- `tgenie agent run` and `tgenie agent status` for autonomous workflows.
+- Human-friendly output with optional `--json` for scripting.
+- Commands call the API, not the DB directly.
+- Stable exit codes and clear error messages.
 
 ### Out
 
-- Full import/export formats (can be added later).
-- Power-user TUI features (PR-008 iteration).
+- Import/export formats.
+- Advanced TUI power-user features.
 
 ## Mini-Specs
 
-- Commands:
-  - `tgenie add|list|show|edit|done|delete` (non-interactive).
-- Output:
-  - human-friendly by default; optional `--json` where useful.
-- API integration:
-  - commands call the API, not DB directly.
-- Error handling:
-  - stable exit codes and actionable errors when API is down or args invalid.
-- Tests:
-  - CLI runner tests for basic flag parsing + JSON output validity.
+- Typer-based CLI under `backend/cli/main.py`.
+- Consistent flags across commands (status, priority, eta, tags).
+- `--json` output for list/show/add/edit where useful.
+- Exit codes for API errors and invalid args.
+- `tgenie agent run "<goal>"` starts a run and returns a run ID.
+- `tgenie agent status <run_id>` prints status (supports `--json`).
 
-## References
+## User Stories
 
-- `docs/01-design/DESIGN_CLI.md` (command UX and flags)
-- `docs/01-design/API_REFERENCE.md` (API endpoints)
-- `docs/01-design/DESIGN_TUI.md` (division of responsibilities: TUI vs subcommands, keybindings, screen layout)
-- `docs/02-implementation/pr-specs/PR-008-interactive-tui.md` (TUI implementation)
+- As a user, I can add and list tasks from shell scripts.
+- As a user, I can request machine-readable output with `--json`.
+- As a user, I get clear errors when the API is down.
+- As a user, I can run an agent goal from the CLI and check its status.
 
-## Technical Design
+## UX Notes (if applicable)
 
-- Thin client: subcommands call to API, not the DB directly.
-- Provide stable scripting behavior:
-  - exit code `0` on success, non-zero on errors
-  - `--json` prints machine-readable output only (no extra formatting)
-  - Prefer consistent flags across commands (e.g., `--status`, `--priority`, `--eta`).
+- Keep output stable and minimal in `--json` mode.
 
 ## Technical Design
 
-### CLI Framework
-
-```python
-# backend/cli/main.py
-import typer
-from typing import Optional
-
-app = typer.Typer(
-    name="tgenie",
-    help="Personal task management with AI chat",
-    no_args_is_help=True,
-)
-
-@app.command()
-def add(
-    title: str,
-    description: Optional[str] = typer.Option(None, "-d", "--description"),
-    attach: list[str] = typer.Option(None, "-a", "--attach", multiple=True),
-    eta: Optional[str] = typer.Option(None, "-e", "--eta"),
-    priority: str = typer.Option("medium", "-p", "--priority"),
-    status: str = typer.Option("pending", "-s", "--status"),
-    json_output: bool = typer.Option(False, "--json", help="Output in JSON format"),
-):
-    """Create a new task."""
-    # Implementation would call API or TUI
-    typer.echo(f"Task '{title}' created")
-
-@app.command()
-def list_cmd(
-    status: Optional[str] = typer.Option(None, "--status"),
-    priority: Optional[str] = typer.Option(None, "--priority"),
-    due: Optional[str] = typer.Option(None, "--due"),
-    search: Optional[str] = typer.Option(None, "--search"),
-    limit: int = typer.Option(50, "--limit"),
-    json_output: bool = typer.Option(False, "--json", help="Output in JSON format"),
-):
-    """List tasks with optional filters."""
-    # Implementation would call API and format output
-    typer.echo(f"Listing tasks...")
-
-@app.command()
-def show(
-    task_id: str,
-    json_output: bool = typer.Option(False, "--json", help="Output in JSON format"),
-):
-    """Show task details."""
-    typer.echo(f"Showing task {task_id}")
-
-@app.command()
-def edit(
-    task_id: str,
-    title: Optional[str] = typer.Option(None, "-t", "--title"),
-    description: Optional[str] = typer.Option(None, "-d", "--description"),
-    status: Optional[str] = typer.Option(None, "-s", "--status"),
-    priority: Optional[str] = typer.Option(None, "-p", "--priority"),
-    eta: Optional[str] = typer.Option(None, "-e", "--eta"),
-):
-    """Update task fields."""
-    typer.echo(f"Updating task {task_id}")
-
-@app.command()
-def done(
-    task_id: str,
-    note: Optional[str] = typer.Option(None, "--note", help="Add completion note"),
-):
-    """Mark task as completed."""
-    typer.echo(f"Task {task_id} marked as completed")
-
-@app.command()
-def delete(
-    task_id: str,
-    force: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation"),
-):
-    """Delete a task."""
-    typer.echo(f"Task {task_id} deleted")
-
-if __name__ == "__main__":
-    app()
-```
+### Architecture
+
+- Typer command group `tgenie` that delegates to a shared API client.
+- Output formatter for human vs JSON output.
+
+### Data Model / Migrations
+
+- N/A.
+
+### API Contract
+
+- Uses PR-002 task endpoints.
+
+### Background Jobs
+
+- N/A.
+
+### Security / Privacy
+
+- N/A.
+
+### Error Handling
+
+- Non-zero exit codes on API/network errors.
+- Actionable messages for 404/409/500 responses.
 
 ## Acceptance Criteria
 
-- [ ] Core subcommands work end-to-end against the API.
-- [ ] `--json` output is valid and stable for scripting (where provided).
-- [ ] Helpful errors on API-down or invalid arguments.
+### AC1: Core Subcommands
 
-## Test Plan
+**Success Criteria:**
+- [ ] Add/list/show/edit/done/delete work against the API.
 
-### Automated
+### AC2: JSON Output
+
+**Success Criteria:**
+- [ ] `--json` output is valid JSON with no extra formatting.
 
-```python
-# tests/test_cli/test_commands.py
-import pytest
-from typer.testing import CliRunner
-from backend.cli.main import app
+### AC3: Errors and Exit Codes
 
-@pytest.fixture
-def runner():
-    return CliRunner()
+**Success Criteria:**
+- [ ] API errors and network failures return non-zero exit codes.
 
-class TestAddCommand:
-    """Tests for 'tgenie add' command"""
+### AC4: Agent Run Commands
 
-    def test_add_minimal(self, runner):
-        """Add task with title only."""
-        result = runner.invoke(app, ["add", "Buy groceries"])
-        assert result.exit_code == 0
-        assert "Created" in result.output
-
-    def test_add_with_all_options(self, runner):
-        """Add task with all fields."""
-        result = runner.invoke(app, [
-            "add", "Complete project",
-            "-d", "Finish implementation",
-            "-p", "high",
-            "-e", "2025-01-15",
-            "-s", "in_progress",
-            "--json"
-        ])
-        assert result.exit_code == 0
-        assert json.loads(result.output)["priority"] == "high"
-
-    def test_add_invalid_priority(self, runner):
-        """Reject invalid priority."""
-        result = runner.invoke(app, ["add", "Test", "-p", "super-high"])
-        assert result.exit_code != 0
-        assert "Invalid" in result.output or "Error" in result.output
-
-
-class TestListCommand:
-    """Tests for 'tgenie list' command"""
-
-    def test_list_json_output(self, runner):
-        """List with JSON output."""
-        result = runner.invoke(app, ["list", "--json"])
-        assert result.exit_code == 0
-
-        # Verify valid JSON
-        import json
-        data = json.loads(result.output)
-        assert "tasks" in data
-
-    def test_list_with_filters(self, runner):
-        """List with status and priority filters."""
-        # Add a task first
-        runner.invoke(app, ["add", "Test task"])
-
-        result = runner.invoke(app, ["list", "--status", "pending"])
-        assert result.exit_code == 0
-
-
-class TestEditCommand:
-    """Tests for 'tgenie edit' command"""
-
-    def test_edit_status(self, runner):
-        """Update task status."""
-        result = runner.invoke(app, ["edit", "task-123", "-s", "in_progress"])
-        assert result.exit_code == 0
-
-    def test_edit_not_found(self, runner):
-        """Error on non-existent task."""
-        # Mock API to return 404
-        result = runner.invoke(app, ["edit", "nonexistent", "-s", "done"])
-        assert result.exit_code != 0
-
-
-class TestDoneCommand:
-    """Tests for 'tgenie done' command"""
-
-    def test_done_success(self, runner):
-        """Mark task as completed."""
-        result = runner.invoke(app, ["done", "task-123"])
-        assert result.exit_code == 0
-        assert "completed" in result.output.lower()
-
-    def test_done_with_note(self, runner):
-        """Mark task completed with note."""
-        result = runner.invoke(app, ["done", "task-123", "--note", "Deployed successfully"])
-        assert result.exit_code == 0
-
-
-class TestDeleteCommand:
-    """Tests for 'tgenie delete' command"""
-
-    def test_delete_requires_confirmation(self, runner):
-        """Delete requires confirmation without --yes."""
-        result = runner.invoke(app, ["delete", "task-123"])
-        assert result.exit_code != 0  # Typer should prompt for confirmation
-
-    def test_delete_with_force(self, runner):
-        """Delete with --yes skips confirmation."""
-        result = runner.invoke(app, ["delete", "task-123", "--yes"])
-        assert result.exit_code == 0
-
-
-class TestExitCodes:
-    """Tests for proper exit codes"""
-
-    def test_success_returns_zero(self, runner):
-        """Successful commands return exit code 0."""
-        result = runner.invoke(app, ["list"])
-        assert result.exit_code == 0
-
-    def test_api_error_returns_nonzero(self, runner):
-        """API errors return non-zero exit code."""
-        # Mock HTTP client to return 500
-        result = runner.invoke(app, ["list"])
-        assert result.exit_code != 0
-```
+**Success Criteria:**
+- [ ] `tgenie agent run` starts an agent run and returns a run ID.
+- [ ] `tgenie agent status` returns the current state of a run.
+
+## Test Plan
+
+### Automated
+
+- CLI runner tests for command behavior and JSON output.
+- Mocked API failures to validate exit codes.
+- Agent CLI tests with mocked agent service responses.
 
 ### Manual
 
-1. Start API.
-2. Run:
-   - `tgenie add "Test task"`
-   - `tgenie list`
-   - `tgenie show <id>`
-   - `tgenie done <id>`
-3. Stop API; verify commands fail with clear guidance.
+- Run commands against a live API and verify output.
+
+## Notes / Risks / Open Questions
+
+- Ensure CLI flags align with TUI field names and API enums.
+- Agent commands depend on PR-003B and any agent run API contract.
diff --git a/docs/02-implementation/pr-specs/PR-010-web-ui.md b/docs/02-implementation/pr-specs/PR-010-web-ui.md
index 13c6e9d..345cb22 100644
--- a/docs/02-implementation/pr-specs/PR-010-web-ui.md
+++ b/docs/02-implementation/pr-specs/PR-010-web-ui.md
@@ -6,88 +6,106 @@
 
 ## Goal
 
-Provide a secondary web interface for:
-- managing tasks
-- richer viewing of attachments (longer text, links)
-- optional chat streaming in the browser (once chat exists)
+Provide a secondary web interface for managing tasks and viewing attachments, with
+optional chat streaming.
 
 ## User Value
 
-- Easy browsing and reading for long attachments.
-- A fallback UX when terminal UI isn’t ideal.
+- Easier reading of longer task descriptions and attachments.
+- A fallback UX when terminal UI is not ideal.
+
+## References
+
+- `docs/01-design/DESIGN_WEB.md`
+- `docs/01-design/DESIGN_CHAT.md`
+- `docs/01-design/API_REFERENCE.md`
 
 ## Scope
 
 ### In
 
-- Tasks pages:
-  - list + filters
-  - detail view
-  - create/edit (HTMX forms)
-- Optional (if PR-003 exists):
-  - chat page with streaming responses
+- Task list, detail, create, and edit pages (HTMX forms).
+- Basic responsive layout.
+- Optional chat page if PR-003 is implemented.
 
 ### Out
 
-- Authentication/multi-user (future).
-- Mobile-first polish beyond basic responsiveness (future iteration).
+- Authentication/multi-user.
+- Advanced mobile-first polish.
 
 ## Mini-Specs
 
-- Pages:
-  - tasks list + task detail; create/edit flows (HTMX forms).
-- Chat (optional):
-  - if PR-003 is present, chat page streams responses and handles reconnects.
-- Notifications (optional):
-  - in-app notification feed view (if PR-011 exists).
-- Design:
-  - responsive layout; minimal JS (HTMX + Tailwind or equivalent).
-- Tests:
-  - basic route rendering + API integration smoke tests.
+- FastAPI template routes for tasks list/detail and edit/create.
+- HTMX interactions for inline updates and form submissions.
+- Optional chat page using SSE via EventSource.
 
-## References
+## User Stories
+
+- As a user, I can view and edit tasks in a browser.
+- As a user, I can read long attachment content comfortably.
+- As a user, I can use chat in the browser when available.
+
+## UX Notes (if applicable)
 
-- `docs/01-design/DESIGN_WEB.md` (page layouts + HTMX interactions)
-- `docs/01-design/DESIGN_CHAT.md` (streaming/SSE handling)
-- `docs/01-design/API_REFERENCE.md` (API endpoints consumed)
+- Keep JS minimal and favor HTMX for interactions.
 
 ## Technical Design
 
-- **Backend:** FastAPI with Jinja2Templates
-- **Frontend:**
-  - **CSS:** Tailwind CSS (via CDN or standalone CLI)
-  - **JS:** HTMX for SPA-like feel without a full framework
-- **Patterns:**
-  - `GET /tasks` → returns full page
-  - `POST /tasks` → returns HTMX fragment (single task row) to be prepended to the list
-  - `GET /tasks/{id}/edit` → returns HTMX fragment (form) to swap into the row/modal
-- **Chat (optional):**
-  - Start with plain SSE via `EventSource` for streaming chat output.
-  - If we want tighter HTMX integration later, consider the `htmx-sse` extension.
-- **Thin Client:**
-  - call the same backend services as the API (no duplicated business logic)
-  - rely on the Task API for CRUD
+### Architecture
+
+- FastAPI + Jinja2 templates for server-rendered pages.
+- HTMX for partial updates; Tailwind (or equivalent) for styling.
+
+### Data Model / Migrations
+
+- N/A.
+
+### API Contract
+
+- Uses existing task API for CRUD operations.
+- Optional chat page uses `/api/v1/chat` SSE.
+
+### Background Jobs
+
+- N/A.
+
+### Security / Privacy
+
+- N/A (no auth in MVP).
+
+### Error Handling
+
+- Render friendly error states when API calls fail.
 
 ## Acceptance Criteria
 
-- [ ] Task pages work end-to-end against the API.
-- [ ] Basic responsive layout (desktop + narrow viewport).
-- [ ] If PR-003 is present: chat page streams responses correctly and handles disconnects gracefully.
+### AC1: Task Pages
+
+**Success Criteria:**
+- [ ] List/detail/create/edit flows work against the API.
+
+### AC2: Responsive Layout
+
+**Success Criteria:**
+- [ ] Pages remain usable on narrow viewports.
+
+### AC3: Optional Chat UI
+
+**Success Criteria:**
+- [ ] If PR-003 is present, chat page streams responses and handles disconnects.
 
 ## Test Plan
 
 ### Automated
 
-- Integration: fetch pages and verify key elements render.
-- E2E (optional): Playwright smoke test for task list → create → detail.
+- Integration tests for page rendering and basic actions.
+- Optional Playwright smoke test for task flow.
 
 ### Manual
 
-1. Start API.
-2. Open tasks page; verify list renders.
-3. Create a task; verify it appears and detail page loads.
-4. If chat is enabled, open chat page and send a message; verify streaming.
+- Verify task CRUD in the browser and resize for responsiveness.
+- If chat enabled, verify streaming behavior.
 
 ## Notes / Risks / Open Questions
 
-- Keep web UI optional/secondary; avoid blocking core UX iteration in the TUI.
+- Keep the web UI optional so it does not block core TUI iteration.
diff --git a/docs/02-implementation/pr-specs/PR-011-notifications.md b/docs/02-implementation/pr-specs/PR-011-notifications.md
index f7bdae7..21013c4 100644
--- a/docs/02-implementation/pr-specs/PR-011-notifications.md
+++ b/docs/02-implementation/pr-specs/PR-011-notifications.md
@@ -6,105 +6,122 @@
 
 ## Goal
 
-Deliver early "daily value" by reminding user about tasks with ETAs (24h/6h/overdue), with a path that works both locally and in Docker.
+Deliver reminders for tasks with ETAs (24h/6h/overdue) with a path that works locally
+and in Docker.
 
 ## User Value
 
-- Users feel app helping them without needing integrations or RAG.
-- Encourages regular usage (reminders + quick completion loop).
+- Users get timely reminders without external integrations.
+- Encourages regular task completion.
+
+## References
+
+- `docs/01-design/DESIGN_NOTIFICATIONS.md`
+- `docs/01-design/DESIGN_BACKGROUND_JOBS.md`
+- `docs/01-design/REQUIREMENTS_AUDIT.md`
 
 ## Scope
 
 ### In
 
-- Scheduling logic:
-  - reminders at configured offsets (default 24h + 6h)
-  - overdue alerts
-  - quiet hours
-  - dedup (don't spam same notification)
-- Notification history persisted (so UI can show "what was sent").
-- Delivery mechanism (pragmatic):
-  - **Local run:** desktop notifications (e.g., `plyer`)
-  - **Docker run:** provide an in-app notification channel (Web UI / API), with desktop as optional later
-- Scheduler runs every 1-5 minutes (configurable).
+- Scheduler logic for reminders and overdue alerts.
+- Quiet hours and deduplication.
+- Persisted notification history for UI/TUI viewing.
+- Delivery channels: local desktop notifications and in-app feed for Docker.
+- Optional agent-run notifications (started/completed/failed) when agent system exists.
 
 ### Out
 
-- Mobile notifications (future).
-- Multi-device sync (future).
+- Mobile notifications.
+- Multi-device sync.
 
 ## Mini-Specs
 
-- Data model:
-  - `notifications` table to persist delivery history (type, task_id, sent_at, channel, status, error).
-- Scheduler:
-  - in-process tick loop (APScheduler or background task), configurable interval (default 60s).
-  - computes due reminders (24h/6h/overdue) for non-completed tasks with `eta`.
-  - deduplication via persisted history (no repeated sends for same task/type).
-- Delivery channels:
-  - local dev: desktop notifications (e.g., `plyer`)
-  - Docker: in-app notification feed (API-backed) as the reliable baseline
-- UX:
-  - quiet hours support (defer or suppress)
-  - clear “why did I get this?” content (task title + due time + shortcut to open task)
-- Tests:
-  - time-based computations, deduplication, quiet hours, and Docker/local channel selection.
+- `notifications` table to store delivery history and status.
+- In-process scheduler tick (default 60s) to compute due reminders.
+- Channel adapters for desktop (local) and in-app notifications.
+- In-app API to list notifications for UI/TUI.
+- Notification types for agent runs and tool failures (depends on PR-003B/PR-014).
 
-## References
+## User Stories
 
-- `docs/01-design/DESIGN_NOTIFICATIONS.md`
-- `docs/01-design/DESIGN_BACKGROUND_JOBS.md`
-- `docs/01-design/REQUIREMENTS_AUDIT.md` (Docker notification constraints)
+- As a user, I get reminders before tasks are due and when they are overdue.
+- As a user, I can see a history of notifications in the app.
+- As a user, notifications respect quiet hours.
+- As a user, I can see when an agent run completes or fails.
+
+## UX Notes (if applicable)
+
+- Notifications should explain why they fired (task title + due time).
 
 ## Technical Design
 
-### Scheduler Implementation
-
-- **Background Worker:** Use `asyncio.create_task` or a separate process started on app startup.
-- **Precision:** Run every 60 seconds.
-- **Logic:**
-  1. Query `tasks` where `status != 'completed'` and `eta` is not null.
-  2. For each task, check if a notification of type (24h, 6h, overdue) has already been sent (check `notifications` table).
-  3. If not sent and `now() >= eta - offset`, trigger delivery.
-- **Delivery:**
-  - `DesktopNotifier`: wrapper around `plyer` for local notifications.
-  - `InAppNotifier`: inserts into `notifications` table with `status='unread'`.
-
-### Notification Delivery Channels
-
-- Implement a `NotificationService` that exposes a single “tick” operation:
-  - query due tasks
-  - compute which reminder types should fire
-  - write delivery history (dedup)
-  - dispatch via the configured channel(s)
-- Implement channel adapters:
-  - `DesktopNotifier` (local) wraps `plyer`
-  - `InAppNotifier` persists a notification record for UI/TUI to display
+### Architecture
+
+- Background task or scheduler runs on app startup and calls a `NotificationService`.
+- `NotificationService` computes due reminders, persists history, and dispatches
+  through configured channels.
+
+### Data Model / Migrations
+
+- `notifications` table: id, task_id, type, channel, status, error, sent_at,
+  created_at.
+
+### API Contract
+
+- `GET /api/v1/notifications` returns recent notifications for UI/TUI.
+- `PATCH /api/v1/notifications/{id}` marks as read (optional).
+
+### Background Jobs
+
+- In-process scheduler runs every 60s (configurable).
+
+### Security / Privacy
+
+- Do not log notification content beyond metadata.
+
+### Error Handling
+
+- Delivery failures are stored with error reason and do not stop the scheduler.
 
 ## Acceptance Criteria
 
-- [ ] Reminders fire at default offsets (24h/6h) and for overdue tasks.
-- [ ] No duplicate notifications for the same task/type (dedup persisted).
-- [ ] Quiet hours are respected (defer or suppress, per config).
+### AC1: Scheduling and Overdue
+
+**Success Criteria:**
+- [ ] Reminders fire at configured offsets and for overdue tasks.
+
+### AC2: Deduplication and History
+
+**Success Criteria:**
+- [ ] No duplicate notifications for the same task/type.
+- [ ] Notification history is persisted and queryable.
+
+### AC3: Delivery Channels
+
+**Success Criteria:**
 - [ ] Docker mode uses in-app notifications as the baseline channel.
-- [ ] Notification history is persisted and queryable (for UI/TUI listing).
+- [ ] Local mode can use desktop notifications.
+
+### AC4: Quiet Hours
+
+**Success Criteria:**
+- [ ] Quiet hours suppress or defer notifications per config.
 
 ## Test Plan
 
 ### Automated
 
-- Unit (pure logic): given `(now, eta, offsets, history)` compute which notifications should fire.
-- Integration (DB): history persistence prevents duplicates across ticks/restarts.
-- Channel selection: local run uses desktop channel; Docker run uses in-app channel.
-- Quiet hours: notifications are suppressed/deferred during quiet window.
+- Unit tests for schedule computation and quiet hours.
+- Integration tests for history persistence and dedup across restarts.
+- Channel selection tests for local vs Docker mode.
 
 ### Manual
 
-1. Create a task due in ~10 minutes; temporarily set schedule to `["10m"]` for testing.
-2. Run scheduler job (or wait for interval).
-3. Verify a notification appears and is recorded in history.
-4. Mark task complete; ensure no further reminders fire.
+- Configure a short offset and verify reminders fire.
+- Run in Docker and confirm in-app notifications appear.
 
 ## Notes / Risks / Open Questions
 
-- Desktop notifications from Docker are non-trivial; plan for an "in-app notifications" channel as a reliable baseline.
+- Desktop notifications from Docker are unreliable; in-app feed is the baseline.
+- Agent notifications depend on agent run APIs and may be staged after PR-003B.
diff --git a/docs/02-implementation/pr-specs/PR-012-deployment-docs.md b/docs/02-implementation/pr-specs/PR-012-deployment-docs.md
index f64d825..de15b0b 100644
--- a/docs/02-implementation/pr-specs/PR-012-deployment-docs.md
+++ b/docs/02-implementation/pr-specs/PR-012-deployment-docs.md
@@ -6,89 +6,103 @@
 
 ## Goal
 
-Make the system easy to run, upgrade, and back up:
-- Docker Compose “just works”
-- docs match reality
-- data persistence is safe
+Make the system easy to run, upgrade, and back up with reliable Docker and accurate
+docs.
 
 ## User Value
 
-- Faster setup for new machines.
-- Lower risk of data loss (clear volumes + backup instructions).
+- Faster setup on new machines.
+- Lower risk of data loss via clear persistence and backup guidance.
+
+## References
+
+- `docs/SETUP.md`
+- `docs/01-design/DESIGN_DATA.md`
+- `docs/02-implementation/PR-PLANS.md`
 
 ## Scope
 
 ### In
 
-- Docker Compose configuration:
-  - API service
-  - persistent volume for SQLite DB and vector store
-  - env var configuration
-  - health checks
-- Documentation updates:
-  - setup instructions
-  - backup/restore
-  - “what works today” vs planned
+- Docker Compose configuration with persistent volumes and health checks.
+- Environment variable documentation and `.env.example` alignment.
+- Backup/restore and upgrade instructions.
 
 ### Out
 
-- Cloud deployment (Kubernetes, etc.).
-- Multi-user auth/HTTPS termination (future).
+- Kubernetes or cloud deployment.
+- Multi-user auth and HTTPS termination.
 
 ## Mini-Specs
 
-- Docker Compose:
-  - one-command local run with persisted data volumes.
-- Environment docs:
-  - `.env.example` and `docs/SETUP.md` aligned with the actual config keys.
-- Developer ergonomics:
-  - smoke checks (`/health`, basic task CRUD) documented.
-- Docs hygiene:
-  - docs link/name checks in CI (no `todo` command examples; consistent `tgenie` usage; no broken relative links).
-- Release:
-  - minimal “how to run” instructions and expected ports/paths.
+- Compose file for API service with volume mounts for DB and vector store.
+- Health check on `/health` and clear port mappings.
+- Docs updates for setup, backups, and upgrade path.
 
-## References
+## User Stories
 
-- `docs/SETUP.md`
-- `docs/01-design/DESIGN_DATA.md` (data locations + backup)
-- `docs/02-implementation/PR-PLANS.md` (current roadmap)
+- As a user, I can start the app with `docker compose up` and keep my data.
+- As a user, I can follow docs and get a working setup on a new machine.
+
+## UX Notes (if applicable)
+
+- N/A.
 
 ## Technical Design
 
-### Docker Compose
+### Architecture
+
+- Docker Compose with a single API service and named volumes for data.
+- Document environment variables and default ports.
+
+### Data Model / Migrations
+
+- N/A.
 
-- Define volumes for:
-  - SQLite DB
-  - vector store
-  - attachment cache (if needed)
-- Add health checks and clear env var configuration.
+### API Contract
 
-### Upgrade path
+- N/A.
 
-- When migrations exist:
-  - document `tgenie db upgrade` for upgrades
-  - document backup/restore workflows (SQL dump + restore)
+### Background Jobs
+
+- N/A.
+
+### Security / Privacy
+
+- Document data locations and backup guidance to avoid accidental loss.
+
+### Error Handling
+
+- Health checks surface startup failures in Docker.
 
 ## Acceptance Criteria
 
-- [ ] `docker compose up` starts successfully and `/health` returns ok.
+### AC1: Docker Compose Boots Cleanly
+
+**Success Criteria:**
+- [ ] `docker compose up` starts successfully and `/health` returns OK.
+
+### AC2: Data Persistence
+
+**Success Criteria:**
 - [ ] Data persists across container restarts.
-- [ ] Docs are accurate and consistent with the chosen CLI name and UX.
+
+### AC3: Docs Match Reality
+
+**Success Criteria:**
+- [ ] Setup and backup docs match actual commands, ports, and paths.
 
 ## Test Plan
 
 ### Automated
 
-- Optional: CI smoke test that builds images and runs health check (if CI exists).
+- Optional CI smoke test to build images and hit `/health`.
 
 ### Manual
 
-1. `docker compose up -d`
-2. Create a task (via TUI or API).
-3. Restart containers; verify task persists.
-4. Run `tgenie db upgrade` inside container context and verify no errors.
+- Run compose, create a task, restart, and verify data persists.
+- Follow `docs/SETUP.md` and validate commands.
 
 ## Notes / Risks / Open Questions
 
-- Keep local dev (no docker) and docker paths aligned to avoid “works on my machine” data-loss issues.
+- Keep docker and local paths aligned to avoid data-loss confusion.
diff --git a/docs/02-implementation/pr-specs/PR-013-event-system.md b/docs/02-implementation/pr-specs/PR-013-event-system.md
new file mode 100644
index 0000000..18ead69
--- /dev/null
+++ b/docs/02-implementation/pr-specs/PR-013-event-system.md
@@ -0,0 +1,429 @@
+# PR-013: Event System + Realtime Updates (Spec)
+
+**Status:** Spec Only  
+**Depends on:** PR-002  
+**Last Reviewed:** 2026-01-01
+
+## Goal
+
+Introduce a lightweight event system for task lifecycle events and realtime updates
+for UI and agent hooks.
+
+## User Value
+
+- UIs can update in realtime without polling.
+- Agent workflows can react to task and attachment changes.
+
+## References
+
+- `docs/01-design/DESIGN_ARCHITECTURE.md`
+- `docs/01-design/DESIGN_DATA.md`
+
+## Scope
+
+### In
+
+- Event schema and naming conventions (e.g., `task.created`, `task.updated`).
+- Event emitter hooks in task and attachment services.
+- Persistent event log for replay and SSE resume.
+- SSE endpoint for realtime updates.
+- Optional webhook delivery to configured endpoints.
+
+### Out
+
+- External message brokers (Kafka, SNS/SQS).
+- Guaranteed delivery beyond basic retry/backoff.
+- Multi-tenant or ACL-based event filtering.
+
+## Mini-Specs
+
+- `events` table with `id`, `type`, `payload`, `created_at`.
+- `EventService.emit(type, payload)` used by task/attachment mutations.
+- `GET /api/v1/events` SSE stream with `Last-Event-ID` support.
+- Webhook dispatcher reads configured endpoints and retries on failure.
+
+## User Stories
+
+- As a user, my UI updates when tasks change without manual refresh.
+- As a developer, I can subscribe to task lifecycle events.
+
+## UX Notes (if applicable)
+
+- N/A.
+
+## Technical Design
+
+### Architecture
+
+- In-process event bus writes to the `events` table and publishes to subscribers.
+- SSE stream reads from the event log and emits ordered events.
+- Webhook dispatcher sends event payloads to configured endpoints.
+
+### Data Model / Migrations
+
+- `events` table for lightweight event storage and replay.
+
+### API Contract
+
+- `GET /api/v1/events` returns `text/event-stream` with `event:` and `data:` fields.
+- Optional query params: `types=task.created,task.updated` for filtering.
+
+### Background Jobs
+
+- Webhook delivery worker with basic retry/backoff.
+
+### Security / Privacy
+
+- Event payloads include IDs and minimal metadata only.
+- Webhook targets are allowlisted via config.
+
+### Error Handling
+
+- Failed webhook deliveries are logged and retried without blocking core flows.
+- SSE clients can reconnect with `Last-Event-ID`.
+
+## Acceptance Criteria
+
+### AC1: Event Emission
+
+**Success Criteria:**
+- [ ] Task create/update/delete emits the expected event types.
+- [ ] Attachment create/delete emits events when applicable.
+
+### AC2: SSE Streaming
+
+**Success Criteria:**
+- [ ] SSE endpoint streams events in order.
+- [ ] Reconnect with `Last-Event-ID` resumes without duplicates.
+
+### AC3: Webhook Delivery (Optional)
+
+**Success Criteria:**
+- [ ] Webhooks receive events when configured.
+- [ ] Delivery failures are retried and logged.
+
+## Test Plan
+
+### Automated
+
+- Unit tests for event emission and payloads.
+- Integration tests for SSE streaming and resume behavior.
+- Webhook dispatcher tests with mocked HTTP endpoints.
+
+### Manual
+
+- Create a task and observe SSE events in a terminal client.
+- Configure a webhook endpoint and verify deliveries.
+
+## Notes / Risks / Open Questions
+
+- Decide how long to retain events in the log.
+
+---
+
+## Skill Enrichment: context-fundamentals
+
+### Event Batching for Efficiency
+
+Aggregate high-frequency events to reduce processing overhead:
+
+```python
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from typing import List, Optional
+from collections import defaultdict
+
+@dataclass
+class EventBatch:
+    """Batched events for efficient processing."""
+    events: List[dict]
+    batch_id: str
+    created_at: datetime
+    target_sse_client_id: Optional[str] = None
+
+class EventBatcher:
+    """Event batching and debouncing for high-frequency events."""
+
+    def __init__(
+        self,
+        batch_window_ms: int = 100,
+        max_batch_size: int = 50
+    ):
+        self.batch_window_ms = batch_window_ms
+        self.max_batch_size = max_batch_size
+        self.batches: dict[str, EventBatch] = {}
+        self.pending_events: defaultdict(list) = defaultdict(list)
+        self.last_emit_time: dict[str, datetime] = {}
+
+    async def emit(
+        self,
+        event_type: str,
+        payload: dict,
+        client_id: str | None = None
+    ) -> str:
+        """Emit event with batching."""
+        event_id = str(uuid.uuid4())
+        now = datetime.now()
+
+        # Add to pending events
+        self.pending_events[client_id].append({
+            "id": event_id,
+            "type": event_type,
+            "payload": payload,
+            "created_at": now
+        })
+
+        # Check if batch window expired
+        last_emit = self.last_emit_time.get(client_id, now - timedelta(days=1))
+        if (now - last_emit).total_seconds() >= self.batch_window_ms / 1000:
+            await self._flush_batch(client_id)
+
+        # Flush if batch size exceeded
+        if len(self.pending_events[client_id]) >= self.max_batch_size:
+            await self._flush_batch(client_id)
+
+        return event_id
+
+    async def _flush_batch(self, client_id: str) -> None:
+        """Flush pending events as a batch."""
+        if not self.pending_events[client_id]:
+            return
+
+        events = self.pending_events[client_id].copy()
+        self.pending_events[client_id].clear()
+
+        batch = EventBatch(
+            events=events,
+            batch_id=str(uuid.uuid4()),
+            created_at=datetime.now()
+        )
+
+        # Store batch in memory (for SSE delivery)
+        self.batches[f"{client_id}:{batch.batch_id}"] = batch
+
+        # Emit batch event
+        await self._emit_batch_event(client_id, batch)
+
+        self.last_emit_time[client_id] = datetime.now()
+
+        logger.info({
+            "event": "event_batch_emitted",
+            "client_id": client_id,
+            "batch_size": len(events),
+            "batch_id": batch.batch_id
+        })
+
+    async def _emit_batch_event(
+        self,
+        client_id: str,
+        batch: EventBatch
+    ) -> None:
+        """Emit the batch metadata event."""
+        # Store batch metadata in event log
+        batch_event = {
+            "id": str(uuid.uuid4()),
+            "type": "event_batch",
+            "payload": {
+                "batch_id": batch.batch_id,
+                "event_count": len(batch.events),
+                "created_at": batch.created_at.isoformat()
+            },
+            "created_at": datetime.now().isoformat()
+        }
+
+        # Store in database
+        await store_event(batch_event)
+
+        # Emit to SSE clients
+        await sse_broadcast(client_id, {
+            "type": "batch",
+            "data": batch
+        })
+```
+
+### Event Deduplication
+
+Prevent duplicate events from being emitted:
+
+```python
+from hashlib import md5
+from typing import Set
+
+class EventDeduplicator:
+    """Deduplicate events based on content hash."""
+
+    def __init__(self, dedup_window_seconds: int = 10):
+        self.dedup_window_seconds = dedup_window_seconds
+        self.recent_hashes: Set[str] = set()
+
+    async def emit_if_unique(
+        self,
+        event_type: str,
+        payload: dict
+    ) -> Optional[str]:
+        """Emit event if not duplicate."""
+        # Create content hash
+        content = f"{event_type}:{json.dumps(payload, sort_keys=True)}"
+        content_hash = md5(content.encode()).hexdigest()
+
+        # Check if recently emitted
+        if content_hash in self.recent_hashes:
+            logger.debug({
+                "event": "event_deduped",
+                "type": event_type,
+                "hash": content_hash
+            })
+            return None
+
+        # Add to recent hashes
+        self.recent_hashes.add(content_hash)
+
+        # Clean old hashes (beyond window)
+        if len(self.recent_hashes) > 1000:
+            # Keep only recent 1000 hashes
+            self.recent_hashes = set(list(self.recent_hashes)[-1000:])
+
+        return await emit_event(event_type, payload)
+
+    async def cleanup_old_hashes(self) -> int:
+        """Clean old hashes from dedup window."""
+        now = datetime.now()
+        cutoff = now - timedelta(seconds=self.dedup_window_seconds)
+
+        self.recent_hashes = {
+            h for h in self.recent_hashes
+            # Assume hash includes timestamp for age tracking
+        }
+
+        logger.debug({
+            "event": "event_dedup_cleanup",
+            "cleaned_count": len(self.recent_hashes)
+        })
+```
+
+### Event Filtering and Routing
+
+Filter events by type and content to reduce context noise:
+
+```python
+@dataclass
+class EventFilter:
+    """Event filtering rules for context optimization."""
+    include_types: List[str] = field(default_factory=list)
+    exclude_types: List[str] = field(default_factory=list)
+    include_patterns: List[str] = field(default_factory=list)
+    exclude_patterns: List[str] = field(default_factory=list)
+
+    async def should_emit(
+        self,
+        event_type: str,
+        payload: dict
+    ) -> bool:
+        """Check if event should be emitted to context."""
+        # Check type filters
+        if self.include_types and event_type not in self.include_types:
+            return False
+
+        if event_type in self.exclude_types:
+            return False
+
+        # Check content patterns
+        payload_str = json.dumps(payload, sort_keys=True)
+
+        for exclude in self.exclude_patterns:
+            if exclude in payload_str:
+                return False
+
+        for include in self.include_patterns:
+            if include not in payload_str:
+                return False
+
+        return True
+
+# Example filters for agent context
+AGENT_CONTEXT_FILTER = EventFilter(
+    include_types=[
+        "task.created",
+        "task.updated",
+        "attachment.created",
+        "agent.run_started",
+        "agent.run_completed"
+    ],
+    exclude_types=[
+        # Exclude high-frequency noise
+        "heartbeat",
+        "ping",
+        "debug"
+    ],
+    include_patterns=[
+        # Only include events with agent-relevant content
+        "task_id",
+        "run_id",
+        "agent_name"
+    ]
+)
+```
+
+### Event Prioritization
+
+Prioritize events for context loading (most relevant first):
+
+```python
+from enum import Enum
+from typing import Callable
+
+class EventPriority(Enum):
+    CRITICAL = 1
+    HIGH = 2
+    MEDIUM = 3
+    LOW = 4
+    INFO = 5
+
+def get_event_priority(
+    event_type: str,
+    payload: dict
+) -> EventPriority:
+    """Determine event priority based on type and content."""
+    # Critical: errors, failures, security events
+    if event_type in ["error", "failure", "security_alert"]:
+        return EventPriority.CRITICAL
+
+    # High: task completions, agent results
+    if event_type in ["task.completed", "agent.run_completed"]:
+        return EventPriority.HIGH
+
+    # Medium: task updates, attachments
+    if event_type in ["task.updated", "attachment.created", "attachment.updated"]:
+        return EventPriority.MEDIUM
+
+    # Low: informational events
+    if event_type in ["heartbeat", "ping", "debug"]:
+        return EventPriority.LOW
+
+    # Default info
+    return EventPriority.INFO
+
+async def emit_with_priority(
+    event_type: str,
+    payload: dict
+) -> str:
+    """Emit event with priority metadata."""
+    priority = get_event_priority(event_type, payload)
+
+    event_id = await emit_event(
+        event_type,
+        {
+            **payload,
+            "priority": priority.value
+        }
+    )
+
+    logger.info({
+        "event": "event_emitted_with_priority",
+        "type": event_type,
+        "priority": priority.name,
+        "event_id": event_id
+    })
+
+    return event_id
+```
diff --git a/docs/02-implementation/pr-specs/PR-014-multi-agent-orchestration.md b/docs/02-implementation/pr-specs/PR-014-multi-agent-orchestration.md
new file mode 100644
index 0000000..cbd7dde
--- /dev/null
+++ b/docs/02-implementation/pr-specs/PR-014-multi-agent-orchestration.md
@@ -0,0 +1,970 @@
+# PR-014: Multi-Agent Orchestration (Spec)
+
+**Status:** Spec Only  
+**Depends on:** PR-003B, PR-013  
+**Last Reviewed:** 2026-01-01
+
+## Goal
+
+Enable multiple agents to collaborate on a goal with shared memory and coordinated
+execution.
+
+## User Value
+
+- Complex goals can be decomposed across specialized agents.
+- Users can track agent progress and outcomes over time.
+
+## References
+
+- `docs/01-design/DESIGN_CHAT.md`
+- `docs/01-design/DESIGN_ARCHITECTURE.md`
+
+## Scope
+
+### In
+
+- Agent manager that spawns and supervises agent runs.
+- Shared memory store for agent context and summaries.
+- Run lifecycle: start, pause, resume, cancel, complete.
+- Concurrency controls and rate limiting.
+
+### Out
+
+- Agent marketplace or third-party agent plugins.
+- Cross-user collaboration and shared workspaces.
+
+## Mini-Specs
+
+- `AgentRun` record with status, goal, timestamps, and summary.
+- `AgentManager` orchestrates runs and delegates tool calls.
+- Shared memory storage for summaries and recent context.
+- Event integration to emit run status updates (PR-013).
+
+## User Stories
+
+- As a user, I can launch an agent run and watch progress updates.
+- As a user, I can pause or cancel a long-running agent.
+
+## UX Notes (if applicable)
+
+- Provide a clear "running" indicator and last-updated timestamp.
+
+## Technical Design
+
+### Architecture
+
+- Supervisor process manages agent workers and run state.
+- Agent workers execute plans using the tool-calling foundation.
+- Memory store persists summaries and last-known context for runs.
+
+### Data Model / Migrations
+
+- `agent_runs` table: id, goal, status, started_at, finished_at, summary.
+- Optional `agent_messages` table for recent context slices.
+
+### API Contract
+
+- `POST /api/v1/agents/run` starts a run and returns `run_id`.
+- `GET /api/v1/agents/{run_id}` returns status and summary.
+- `POST /api/v1/agents/{run_id}/cancel` cancels a run.
+
+### Background Jobs
+
+- Agent worker loop and run scheduler.
+
+### Security / Privacy
+
+- Run data stored locally; no prompts or responses logged by default.
+
+### Error Handling
+
+- Failed runs are marked with error status and short reason.
+
+## Acceptance Criteria
+
+### AC1: Run Lifecycle
+
+**Success Criteria:**
+- [ ] Agent runs can be started, paused, resumed, and canceled.
+- [ ] Run status is persisted and queryable.
+
+### AC2: Concurrency Control
+
+**Success Criteria:**
+- [ ] Concurrency limits prevent excessive parallel runs.
+- [ ] Runs back off or queue when limits are reached.
+
+### AC3: Event Integration
+
+**Success Criteria:**
+- [ ] Run status updates emit events for UI consumption.
+
+## Test Plan
+
+### Automated
+
+- Unit tests for run state transitions.
+- Integration tests for API start/cancel/status.
+- Concurrency tests for max-parallel settings.
+
+### Manual
+
+- Start a run, observe status updates, then cancel it.
+
+## Notes / Risks / Open Questions
+
+- Decide how much agent context to persist vs summarize.
+
+---
+
+## Skill Enrichment: multi-agent-patterns
+
+### Architectural Pattern Selection
+
+Implement **Swarm/Peer-to-Peer** pattern (not Supervisor) to avoid telephone game:
+
+```python
+@dataclass
+class AgentDefinition:
+    """Agent definition for swarm pattern."""
+    name: str
+    role: str  # researcher, planner, executor
+    tools: list[str]  # Tools available to this agent
+    system_prompt: str | None = None  # Optional role-specific prompt
+    can_handoff: bool = True  # Supports peer handoff
+
+class SwarmOrchestrator:
+    """Peer-to-peer agent orchestration without supervisor bottleneck."""
+
+    def __init__(self):
+        self.agents: dict[str, AgentDefinition] = {}
+        self.active_runs: dict[str, AgentRun] = {}
+
+    def register_agent(self, agent_def: AgentDefinition):
+        """Register agent with role and tools."""
+        self.agents[agent_def.name] = agent_def
+        logger.info({
+            "event": "agent_registered",
+            "agent_name": agent_def.name,
+            "role": agent_def.role,
+            "tools": agent_def.tools
+        })
+
+    async def delegate_task(self, goal: str, context: dict) -> AgentRun:
+        """Delegate to appropriate specialist agent."""
+        # Analyze goal to select initial agent
+        initial_agent = self._select_agent(goal)
+
+        # Create run with isolated context
+        run_id = str(uuid.uuid4())
+        run = AgentRun(
+            id=run_id,
+            goal=goal,
+            agent_name=initial_agent.name,
+            status="running",
+            started_at=datetime.now()
+        )
+
+        self.active_runs[run_id] = run
+
+        logger.info({
+            "event": "run_started",
+            "run_id": run_id,
+            "agent": initial_agent.name,
+            "goal": goal
+        })
+
+        # Initial agent has control
+        return await initial_agent.execute(run_id, goal, context, self)
+
+    def _select_agent(self, goal: str) -> AgentDefinition:
+        """Select appropriate agent based on goal analysis."""
+        goal_lower = goal.lower()
+
+        if any(kw in goal_lower for kw in ["research", "find", "search", "look up"]):
+            return self.agents.get("researcher")
+        elif any(kw in goal_lower for kw in ["plan", "design", "organize", "schedule"]):
+            return self.agents.get("planner")
+        elif any(kw in goal_lower for kw in ["execute", "do", "implement", "write", "create"]):
+            return self.agents.get("executor")
+        else:
+            return self.agents.get("generalist")
+
+# Example agent definitions
+RESEARCHER = AgentDefinition(
+    name="researcher",
+    role="Research specialist",
+    system_prompt="You are a research specialist. Gather information from available sources and provide factual findings.",
+    tools=["search_tasks", "search_rag", "fetch_attachment"],
+    can_handoff=True
+)
+
+PLANNER = AgentDefinition(
+    name="planner",
+    role="Planning specialist",
+    system_prompt="You are a planning specialist. Break down goals into actionable steps and delegate to specialists.",
+    tools=["list_tasks", "create_task", "add_attachment"],
+    can_handoff=True
+)
+
+EXECUTOR = AgentDefinition(
+    name="executor",
+    role="Execution specialist",
+    system_prompt="You are an execution specialist. Perform tasks directly using available tools. Focus on correctness and efficiency.",
+    tools=["update_task", "mark_done", "delete_task"],
+    can_handoff=False
+)
+
+orchestrator = SwarmOrchestrator()
+orchestrator.register_agent(RESEARCHER)
+orchestrator.register_agent(PLANNER)
+orchestrator.register_agent(EXECUTOR)
+```
+
+### Handoff Protocol
+
+Implement explicit handoff mechanism with `forward_message` tool:
+
+```python
+def forward_to_agent(
+    agent_name: str,
+    message: str,
+    context: dict | None = None
+) -> dict:
+    """
+    Forward to another agent with full context preservation.
+
+    Use when:
+    - Current agent lacks required tools for task
+    - Task requires different specialization
+    - Agent reached depth/convergence limit
+
+    Returns:
+        Handoff directive with target agent and full message
+    """
+    return {
+        "type": "handoff",
+        "target_agent": agent_name,
+        "message": message,
+        "context": context  # Pass accumulated context
+    }
+
+# Register as tool in agent tool definitions
+HANDOFF_TOOL = ToolDefinition(
+    name="forward_to_agent",
+    description=(
+        "Transfer control to a different agent. "
+        "Use when current agent lacks required tools or specialization. "
+        "Preserves full conversation context for handoff."
+    ),
+    parameters=ToolParameter(
+        name="args",
+        type="object",
+        required=True,
+        properties={
+            "agent_name": {
+                "type": "string",
+                "description": "Target agent name"
+            },
+            "message": {
+                "type": "string",
+                "description": "Reason for handoff and accumulated findings"
+            }
+        }
+    )
+)
+
+# Agent usage
+async def researcher_agent(goal: str, context: dict) -> dict:
+    """Researcher agent with handoff capability."""
+    findings = await perform_research(goal)
+
+    # If goal requires planning, handoff to planner
+    if requires_planning(goal):
+        return forward_to_agent("planner", f"Research findings: {findings}", context)
+
+    # If goal requires execution, handoff to executor
+    if requires_execution(goal):
+        return forward_to_agent("executor", f"Action plan: {findings}", context)
+
+    return {"type": "final", "result": findings}
+```
+
+### Convergence and Consensus
+
+Implement debate protocol for complex decisions:
+
+```python
+class DebateCoordinator:
+    """Coordinate debate between multiple agents."""
+
+    def __init__(self):
+        self.participants: list[str] = []
+        self.round: int = 0
+        self.max_rounds: int = 3
+
+    async def coordinate(self, question: str) -> dict:
+        """Run debate and aggregate results."""
+        self.participants = self._select_participants(question)
+        self.round = 0
+
+        while self.round < self.max_rounds:
+            self.round += 1
+
+            # Get positions from all agents
+            positions = await self._gather_positions(question)
+
+            # Critique phase: agents critique each other's positions
+            critiques = await self._gather_critiques(positions)
+
+            # Present critiques to agents for next round
+            updated_positions = await self._incorporate_critiques(positions, critiques)
+
+            # Check convergence
+            if self._has_converged(updated_positions):
+                break
+
+        return self._aggregate_results(updated_positions)
+
+    def _select_participants(self, question: str) -> list[str]:
+        """Select relevant agents for question."""
+        question_lower = question.lower()
+
+        if "github" in question_lower or "pr" in question_lower:
+            return ["github_specialist", "code_reviewer"]
+        elif "email" in question_lower or "gmail" in question_lower:
+            return ["gmail_specialist", "email_analyst"]
+        else:
+            return ["generalist", "researcher", "planner"]
+
+    def _has_converged(self, positions: list[dict]) -> bool:
+        """Check if agents have converged."""
+        if len(positions) < 2:
+            return False
+
+        # Check agreement threshold (e.g., 80% agree)
+        positions_text = [p.get("answer") for p in positions]
+        most_common = max(set(positions_text), key=positions_text.count)
+        agreement_ratio = positions_text.count(most_common) / len(positions_text)
+
+        return agreement_ratio >= 0.8
+
+    def _aggregate_results(self, positions: list[dict]) -> dict:
+        """Aggregate debate results."""
+        # Weight votes by confidence
+        weighted_votes = []
+        for p in positions:
+            confidence = p.get("confidence", 0.5)
+            weighted_votes.extend([p.get("answer")] * int(confidence * 10))
+
+        most_common = max(set(weighted_votes), key=weighted_votes.count)
+
+        return {
+            "type": "consensus",
+            "answer": most_common,
+            "confidence": weighted_votes.count(most_common) / len(weighted_votes),
+            "rounds": self.round
+        }
+```
+
+### Failure Mode Mitigations
+
+**Supervisor Bottleneck:** Implement output schema constraints
+
+```python
+@dataclass
+class SupervisorOutput:
+    """Constrained output to prevent supervisor context bloat."""
+    status: str  # "success", "partial", "failed"
+    summary: str  # Concise summary only
+    findings: list[str]  # Key findings (not full context)
+    next_actions: list[str]  # Actionable next steps
+
+def supervisor_delegate(
+    task: str,
+    sub_agents: list[str]
+) -> SupervisorOutput:
+    """Delegate to sub-agents with constrained output."""
+    results = await asyncio.gather([
+        run_agent(agent, task) for agent in sub_agents
+    ])
+
+    return SupervisorOutput(
+        status="success",
+        summary=f"Delegated to {len(results)} agents",
+        findings=[r["output"] for r in results if "output" in r],
+        next_actions=["Review findings for action plan"]
+    )
+```
+
+**Divergence Prevention:** Define clear objective boundaries
+
+```python
+@dataclass
+class AgentObjective:
+    """Agent objective with boundaries."""
+    primary_goal: str
+    success_criteria: list[str]  # When to consider complete
+    constraints: list[str]  # What NOT to do
+    handoff_triggers: list[str]  # When to transfer control
+
+def enforce_objectives(agent_run: AgentRun, objective: AgentObjective):
+    """Enforce objective boundaries during execution."""
+    current_goal = agent_run.goal.lower()
+
+    # Check for divergence
+    for constraint in objective.constraints:
+        if constraint.lower() in current_goal:
+            logger.warning({
+                "event": "agent_divergence",
+                "agent": agent_run.agent_name,
+                "objective": objective.primary_goal,
+                "constraint": constraint,
+                "goal": agent_run.goal
+            })
+            return False
+
+        return True
+
+---
+
+## Skill Enrichment: memory-systems
+
+### Shared Memory Architecture
+
+Implement hierarchical memory with temporal validity for multi-agent coordination:
+
+```python
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Optional, Dict, List
+
+@dataclass
+class MemoryFact:
+    """Memory fact with temporal validity."""
+    entity_type: str  # "task", "attachment", "github_pr", "email"
+    entity_id: str
+    property_name: str
+    property_value: str
+    source_agent: str  # Which agent learned this fact
+    valid_from: datetime  # Fact becomes valid at
+    valid_until: Optional[datetime] = None  # Fact expires at
+    confidence: float = 1.0  # Confidence in fact
+    created_at: datetime
+
+@dataclass
+class SharedMemory:
+    """Shared memory store for agent coordination."""
+
+    def __init__(self):
+        self.facts: Dict[str, MemoryFact] = {}
+        self.entity_indices: Dict[str, set[str]] = {}
+        self.temporal_indexes: Dict[str, list[str]] = {}
+
+    def store_fact(
+        self,
+        entity_type: str,
+        entity_id: str,
+        property_name: str,
+        property_value: str,
+        source_agent: str,
+        confidence: float = 1.0,
+        valid_hours: int | None = None
+    ) -> None:
+        """Store a memory fact with optional temporal validity."""
+        fact_id = f"{entity_type}:{entity_id}:{property_name}"
+
+        valid_from = datetime.now()
+        valid_until = datetime.now() + timedelta(hours=valid_hours) if valid_hours else None
+
+        fact = MemoryFact(
+            entity_type=entity_type,
+            entity_id=entity_id,
+            property_name=property_name,
+            property_value=property_value,
+            source_agent=source_agent,
+            valid_from=valid_from,
+            valid_until=valid_until,
+            confidence=confidence,
+            created_at=datetime.now()
+        )
+
+        self.facts[fact_id] = fact
+
+        # Update entity index
+        if entity_type not in self.entity_indices:
+            self.entity_indices[entity_type] = set()
+        self.entity_indices[entity_type].add(entity_id)
+
+        # Update temporal index
+        if valid_until:
+            time_key = valid_until.strftime("%Y-%m-%d")
+            if time_key not in self.temporal_indexes:
+                self.temporal_indexes[time_key] = []
+            self.temporal_indexes[time_key].append(fact_id)
+
+        logger.info({
+            "event": "memory_fact_stored",
+            "fact_id": fact_id,
+            "source_agent": source_agent,
+            "confidence": confidence
+        })
+
+    def retrieve_facts(
+        self,
+        entity_type: str,
+        entity_id: str,
+        as_of: datetime | None = None
+    ) -> List[MemoryFact]:
+        """Retrieve facts for entity, respecting temporal validity."""
+        as_of = as_of or datetime.now()
+        prefix = f"{entity_type}:{entity_id}:"
+
+        matching = [
+            fact for fact_id, fact in self.facts.items()
+            if fact_id.startswith(prefix)
+            and fact.valid_from <= as_of
+            and (fact.valid_until is None or fact.valid_until > as_of)
+        ]
+
+        # Sort by confidence and recency
+        matching.sort(key=lambda f: (f.confidence, f.created_at), reverse=True)
+
+        logger.debug({
+            "event": "memory_retrieval",
+            "entity_type": entity_type,
+            "entity_id": entity_id,
+            "facts_found": len(matching),
+            "as_of": as_of.isoformat()
+        })
+
+        return matching
+
+    def temporal_query(
+        self,
+        entity_type: str,
+        query_time: datetime,
+        time_window_hours: int = 24
+    ) -> List[MemoryFact]:
+        """Query memory state as of specific time (time-travel)."""
+        window_start = query_time - timedelta(hours=time_window_hours)
+        window_end = query_time + timedelta(hours=time_window_hours)
+
+        matching = [
+            fact for fact in self.facts.values()
+            if fact.entity_type == entity_type
+            and fact.valid_from <= window_end
+            and (fact.valid_until is None or fact.valid_until > window_start)
+        ]
+
+        logger.info({
+            "event": "memory_temporal_query",
+            "entity_type": entity_type,
+            "query_time": query_time.isoformat(),
+            "window_start": window_start.isoformat(),
+            "window_end": window_end.isoformat(),
+            "facts_found": len(matching)
+        })
+
+        return matching
+
+# Global shared memory instance
+shared_memory = SharedMemory()
+
+# Agent usage
+async def researcher_agent(goal: str, context: dict) -> dict:
+    """Researcher agent stores findings in shared memory."""
+    findings = await perform_research(goal)
+
+    # Store in shared memory for other agents
+    for finding in findings:
+        shared_memory.store_fact(
+            entity_type="research_finding",
+            entity_id=str(hash(finding)),
+            property_name="content",
+            property_value=str(finding),
+            source_agent="researcher",
+            confidence=0.8,
+            valid_hours=48  # Facts valid for 48 hours
+        )
+
+    return {"type": "final", "result": findings}
+
+async def executor_agent(goal: str, context: dict) -> dict:
+    """Executor agent reads from shared memory."""
+    task_id = context.get("task_id")
+
+    # Retrieve relevant facts about task
+    task_facts = shared_memory.retrieve_facts(
+        entity_type="task",
+        entity_id=task_id
+    )
+
+    return {"type": "final", "result": f"Executing task with: {task_facts}"}
+```
+
+### Entity Consistency Tracking
+
+Track entity identity across sessions and agents:
+
+```python
+class EntityRegistry:
+    """Maintain entity consistency across sessions."""
+
+    def __init__(self, shared_memory: SharedMemory):
+        self.memory = shared_memory
+        self.entity_references: Dict[str, set[str]] = {}
+
+    def track_reference(
+        self,
+        session_id: str,
+        entity_type: str,
+        entity_id: str,
+        reference_text: str
+    ) -> None:
+        """Track when an entity is referenced in conversation."""
+        key = f"{session_id}:{entity_type}"
+
+        if key not in self.entity_references:
+            self.entity_references[key] = set()
+
+        self.entity_references[key].add(entity_id)
+
+        # Check if this is a new entity reference
+        existing_facts = self.memory.retrieve_facts(entity_type, entity_id)
+        is_new_entity = len(existing_facts) == 0
+
+        if is_new_entity:
+            logger.info({
+                "event": "entity_first_mention",
+                "session_id": session_id,
+                "entity_type": entity_type,
+                "entity_id": entity_id,
+                "reference": reference_text[:100]
+            })
+
+    def resolve_identity(
+        self,
+        session_id: str,
+        entity_type: str,
+        entity_id: str,
+        potential_matches: list[str]
+    ) -> Optional[str]:
+        """Resolve entity identity when multiple candidates exist."""
+        key = f"{session_id}:{entity_type}"
+        tracked_entities = self.entity_references.get(key, set())
+
+        if not tracked_entities:
+            return None
+
+        # Find best match among tracked entities
+        best_match = self._find_best_match(entity_id, potential_matches, tracked_entities)
+
+        if best_match:
+            # Link identities
+            for entity in tracked_entities:
+                if entity != best_match:
+                    self.memory.store_fact(
+                        entity_type="entity_alias",
+                        entity_id=entity,
+                        property_name="same_as",
+                        property_value=best_match,
+                        source_agent="system",
+                        confidence=0.95,
+                        valid_hours=None
+                    )
+
+            return best_match
+
+        return entity_id
+
+    def _find_best_match(
+        self,
+        entity_id: str,
+        potential_matches: list[str],
+        tracked_entities: set[str]
+    ) -> Optional[str]:
+        """Find best match using string similarity."""
+        from difflib import SequenceMatcher
+
+        best_match = None
+        best_ratio = 0.0
+
+        for candidate in tracked_entities:
+            ratio = SequenceMatcher(None, candidate).ratio(entity_id)
+
+            # Penalize matches that differ significantly from entity_id
+            candidate_similarity = SequenceMatcher(None, entity_id).ratio(candidate)
+            adjusted_ratio = ratio * (1.0 if candidate_similarity > 0.9 else 0.8)
+
+            if adjusted_ratio > best_ratio:
+                best_ratio = adjusted_ratio
+                best_match = candidate
+
+        return best_match
+```
+
+### Memory Consolidation
+
+Implement periodic consolidation to prevent unbounded growth:
+
+```python
+class MemoryConsolidator:
+    """Consolidate memory to prevent unbounded growth."""
+
+    def __init__(self, shared_memory: SharedMemory):
+        self.memory = shared_memory
+        self.last_consolidation: Optional[datetime] = None
+        self.consolidation_interval_hours: int = 24
+
+    async def consolidate_if_needed(self) -> dict:
+        """Consolidate memory if interval elapsed."""
+        now = datetime.now()
+
+        if (self.last_consolidation is None or
+            (now - self.last_consolidation).total_seconds() >
+            self.consolidation_interval_hours * 3600):
+
+            logger.info({"event": "memory_consolidation_started"})
+
+            stats = await self._consolidate()
+
+            self.last_consolidation = now
+
+            return stats
+
+        return {"status": "skipped", "reason": "Too recent"}
+
+    async def _consolidate(self) -> dict:
+        """Perform consolidation."""
+        # 1. Remove outdated facts
+        removed = await self._remove_outdated_facts()
+
+        # 2. Merge duplicate facts
+        merged = await self._merge_duplicate_facts()
+
+        # 3. Update validity periods
+        updated = await self._refresh_validity_periods()
+
+        # 4. Remove low-confidence facts
+        pruned = await self._prune_low_confidence()
+
+        return {
+            "removed_facts": removed,
+            "merged_facts": merged,
+            "updated_validity": updated,
+            "pruned_low_confidence": pruned
+        }
+
+    async def _remove_outdated_facts(self) -> int:
+        """Remove facts past validity period."""
+        now = datetime.now()
+        fact_ids_to_remove = []
+
+        for fact_id, fact in self.memory.facts.items():
+            if fact.valid_until and fact.valid_until < now:
+                fact_ids_to_remove.append(fact_id)
+
+        for fact_id in fact_ids_to_remove:
+            del self.memory.facts[fact_id]
+
+        logger.info({
+            "event": "memory_consolidation",
+            "action": "removed_outdated",
+            "count": len(fact_ids_to_remove)
+        })
+
+        return len(fact_ids_to_remove)
+
+    async def _merge_duplicate_facts(self) -> int:
+        """Merge facts about same entity/property."""
+        # Group facts by entity:property
+        from collections import defaultdict
+        property_groups = defaultdict(list)
+
+        for fact_id, fact in self.memory.facts.items():
+            key = f"{fact.entity_type}:{fact.entity_id}:{fact.property_name}"
+            property_groups[key].append(fact)
+
+        merged_count = 0
+        for facts in property_groups.values():
+            if len(facts) > 1:
+                # Keep highest confidence, mark others as superseded
+                facts.sort(key=lambda f: f.confidence, reverse=True)
+                best_fact = facts[0]
+
+                for fact in facts[1:]:
+                    if fact.property_value != best_fact.property_value:
+                        # Mark as superseded
+                        self.memory.store_fact(
+                            entity_type=fact.entity_type,
+                            entity_id=fact.entity_id,
+                            property_name="superseded_by",
+                            property_value=best_fact.property_value,
+                            source_agent="consolidation",
+                            confidence=0.5,
+                            valid_hours=None
+                        )
+                    merged_count += 1
+
+        logger.info({
+            "event": "memory_consolidation",
+            "action": "merged_duplicates",
+            "count": merged_count
+        })
+
+        return merged_count
+
+    async def _prune_low_confidence(self) -> int:
+        """Remove facts below confidence threshold."""
+        confidence_threshold = 0.3
+
+        fact_ids_to_remove = [
+            fact_id for fact_id, fact in self.memory.facts.items()
+            if fact.confidence < confidence_threshold
+        ]
+
+        for fact_id in fact_ids_to_remove:
+            del self.memory.facts[fact_id]
+
+        logger.info({
+            "event": "memory_consolidation",
+            "action": "pruned_low_confidence",
+            "count": len(fact_ids_to_remove)
+        })
+
+        return len(fact_ids_to_remove)
+```
+
+### Integration with Context Loading
+
+Load relevant memories into agent context:
+
+```python
+async def load_relevant_memory(
+    agent_name: str,
+    goal: str,
+    context_budget: int = 2000
+) -> str:
+    """Load relevant memories into agent context."""
+    # Extract entities from goal
+    entities = extract_entities(goal)
+
+    relevant_facts = []
+    tokens_used = 0
+
+    for entity_type, entity_id in entities:
+        facts = shared_memory.retrieve_facts(entity_type, entity_id)
+
+        for fact in facts:
+            fact_tokens = estimate_tokens(str(fact))
+
+            if tokens_used + fact_tokens <= context_budget:
+                relevant_facts.append(fact)
+                tokens_used += fact_tokens
+            else:
+                break
+
+    if not relevant_facts:
+        return ""
+
+    memory_context = "\n\n".join([
+        f"[Memory from {fact.source_agent}] {fact.entity_type}:{fact.entity_id} "
+        f"{fact.property_name}={fact.property_value} "
+        f"(confidence: {fact.confidence})"
+        for fact in relevant_facts
+    ])
+
+    logger.info({
+        "event": "memory_context_loaded",
+        "agent": agent_name,
+        "entities_count": len(entities),
+        "facts_loaded": len(relevant_facts),
+        "tokens_used": tokens_used,
+        "tokens_remaining": context_budget - tokens_used
+    })
+
+    return memory_context
+
+def estimate_tokens(text: str) -> int:
+    """Rough token estimation."""
+    return len(text.split()) * 1.3
+```
+
+### Agent Task Summarization
+
+Store agent summaries in memory for future retrieval:
+
+```python
+@dataclass
+class AgentRunSummary:
+    """Summary of agent run stored in memory."""
+    run_id: str
+    agent_name: str
+    goal: str
+    summary: str  # Exec summary
+    key_findings: list[str]  # Important discoveries
+    outcomes: list[str]  # Results achieved
+    tokens_used: int
+    duration_seconds: int
+
+async def store_agent_summary(
+    run: AgentRun,
+    summary: str,
+    key_findings: list[str],
+    outcomes: list[str]
+) -> None:
+    """Store agent run summary in shared memory."""
+    run_summary = AgentRunSummary(
+        run_id=run.id,
+        agent_name=run.agent_name,
+        goal=run.goal,
+        summary=summary,
+        key_findings=key_findings,
+        outcomes=outcomes,
+        tokens_used=run.tokens_used or 0,
+        duration_seconds=(run.finished_at - run.started_at).total_seconds() if run.finished_at else 0
+    )
+
+    # Store as memory facts for later retrieval
+    shared_memory.store_fact(
+        entity_type="agent_run",
+        entity_id=run.id,
+        property_name="summary",
+        property_value=summary,
+        source_agent=run.agent_name,
+        confidence=0.9,
+        valid_hours=None
+    )
+
+    for i, finding in enumerate(key_findings):
+        shared_memory.store_fact(
+            entity_type="agent_run",
+            entity_id=run.id,
+            property_name=f"finding_{i}",
+            property_value=finding,
+            source_agent=run.agent_name,
+            confidence=0.8,
+            valid_hours=None
+        )
+
+    for i, outcome in enumerate(outcomes):
+        shared_memory.store_fact(
+            entity_type="agent_run",
+            entity_id=run.id,
+            property_name=f"outcome_{i}",
+            property_value=outcome,
+            source_agent=run.agent_name,
+            confidence=0.9,
+            valid_hours=None
+        )
+
+    logger.info({
+        "event": "agent_summary_stored",
+        "run_id": run.id,
+        "agent": run.agent_name,
+        "key_findings": len(key_findings),
+        "outcomes": len(outcomes)
+    })
+```
+```
diff --git a/docs/02-implementation/pr-specs/PR-015-agent-ux-panel.md b/docs/02-implementation/pr-specs/PR-015-agent-ux-panel.md
new file mode 100644
index 0000000..f8dd70c
--- /dev/null
+++ b/docs/02-implementation/pr-specs/PR-015-agent-ux-panel.md
@@ -0,0 +1,108 @@
+# PR-015: Agent UX Panel (Spec)
+
+**Status:** Spec Only  
+**Depends on:** PR-008, PR-003B, PR-013, PR-014  
+**Last Reviewed:** 2026-01-01
+
+## Goal
+
+Add a TUI panel for agent runs, tool execution status, and controls.
+
+## User Value
+
+- Users can see what the agent is doing and intervene when needed.
+- Tool execution is transparent and traceable.
+
+## References
+
+- `docs/01-design/DESIGN_TUI.md`
+- `docs/01-design/DESIGN_CHAT.md`
+
+## Scope
+
+### In
+
+- Agent panel in the TUI showing run status and recent actions.
+- Tool execution timeline with success/failure states.
+- Controls to pause, resume, and cancel agent runs.
+- Real-time updates via event stream (PR-013).
+
+### Out
+
+- Web UI for agent runs.
+- Multi-agent visualization dashboards.
+
+## Mini-Specs
+
+- New TUI screen/panel showing agent run list and details.
+- Status indicator (running/paused/failed/completed) with timestamps.
+- Keybindings for pause/resume/cancel actions.
+- Inline view of recent tool calls and results.
+
+## User Stories
+
+- As a user, I can see when an agent starts, progresses, and finishes.
+- As a user, I can pause or cancel a run from the TUI.
+
+## UX Notes (if applicable)
+
+- Avoid showing hidden reasoning; show tool actions and status only.
+
+## Technical Design
+
+### Architecture
+
+- TUI panel subscribes to SSE events for agent run updates.
+- Agent controls call agent run endpoints (`/api/v1/agents/...`).
+
+### Data Model / Migrations
+
+- N/A.
+
+### API Contract
+
+- Uses PR-014 run endpoints and PR-013 event stream.
+
+### Background Jobs
+
+- N/A.
+
+### Security / Privacy
+
+- Do not display or log raw prompts by default.
+
+### Error Handling
+
+- If event stream disconnects, show a recoverable error state and retry option.
+
+## Acceptance Criteria
+
+### AC1: Agent Panel Rendering
+
+**Success Criteria:**
+- [ ] Agent panel renders and updates in real time.
+
+### AC2: Tool Execution Visibility
+
+**Success Criteria:**
+- [ ] Tool calls and results appear in the timeline with statuses.
+
+### AC3: Run Controls
+
+**Success Criteria:**
+- [ ] Pause/resume/cancel actions work from the TUI.
+
+## Test Plan
+
+### Automated
+
+- TUI widget tests for agent panel rendering.
+- Integration tests with mocked SSE events and agent API calls.
+
+### Manual
+
+- Start an agent run and verify live status updates and controls.
+
+## Notes / Risks / Open Questions
+
+- Decide how much tool detail to surface without overwhelming users.
diff --git a/docs/02-implementation/pr-specs/PR-016-observability-baseline.md b/docs/02-implementation/pr-specs/PR-016-observability-baseline.md
new file mode 100644
index 0000000..ddf2606
--- /dev/null
+++ b/docs/02-implementation/pr-specs/PR-016-observability-baseline.md
@@ -0,0 +1,113 @@
+# PR-016: Observability Baseline (Spec)
+
+**Status:** Spec Only  
+**Depends on:** PR-001  
+**Last Reviewed:** 2026-01-01
+
+## Goal
+
+Provide baseline observability with structured logs and a lightweight telemetry
+endpoint.
+
+## User Value
+
+- Easier debugging of failures and performance issues.
+- Clear visibility into DB health and migration status.
+
+## References
+
+- `docs/01-design/DESIGN_ARCHITECTURE.md`
+
+## Scope
+
+### In
+
+- JSON structured logging with correlation IDs.
+- Request logging middleware for API routes.
+- Telemetry endpoint with health and basic metrics.
+- Log redaction for secrets and tokens.
+
+### Out
+
+- Full distributed tracing or external telemetry exporters.
+- Long-term metrics storage.
+
+## Mini-Specs
+
+- Log format includes `request_id`, `event`, `duration_ms`, `status`.
+- Middleware assigns request IDs and injects into logs.
+- `GET /api/v1/telemetry` returns JSON metrics:
+  - DB connectivity
+  - migration version
+  - event queue size (if PR-013 exists)
+  - agent run counts (if PR-014 exists)
+- Redact tokens and email addresses from logs.
+
+## User Stories
+
+- As a developer, I can correlate logs by request ID.
+- As a user, I can see a single endpoint with system health metrics.
+
+## UX Notes (if applicable)
+
+- N/A.
+
+## Technical Design
+
+### Architecture
+
+- Logging config in `backend/logging.py` (or equivalent) with JSON formatter.
+- Middleware attaches `request_id` to context and response headers.
+
+### Data Model / Migrations
+
+- N/A.
+
+### API Contract
+
+- `GET /api/v1/telemetry` returns JSON metrics.
+
+### Background Jobs
+
+- N/A.
+
+### Security / Privacy
+
+- Telemetry excludes PII and content payloads.
+
+### Error Handling
+
+- Telemetry endpoint degrades gracefully if optional subsystems are missing.
+
+## Acceptance Criteria
+
+### AC1: Structured Logging
+
+**Success Criteria:**
+- [ ] Logs are JSON and include `request_id` for API requests.
+
+### AC2: Telemetry Endpoint
+
+**Success Criteria:**
+- [ ] `/api/v1/telemetry` returns JSON with DB health and migration version.
+
+### AC3: Redaction
+
+**Success Criteria:**
+- [ ] Tokens and emails are redacted from logs.
+
+## Test Plan
+
+### Automated
+
+- Unit tests for log formatter and redaction.
+- Integration tests for telemetry endpoint response shape.
+
+### Manual
+
+- Trigger API requests and verify logs contain `request_id`.
+- Call `/api/v1/telemetry` and verify metrics payload.
+
+## Notes / Risks / Open Questions
+
+- Decide whether to expose telemetry in production by default.
diff --git a/docs/02-implementation/pr-specs/PR-017-db-config-followups.md b/docs/02-implementation/pr-specs/PR-017-db-config-followups.md
new file mode 100644
index 0000000..e456540
--- /dev/null
+++ b/docs/02-implementation/pr-specs/PR-017-db-config-followups.md
@@ -0,0 +1,128 @@
+# PR-017: DB Config Follow-ups (Spec)
+
+**Status:** Spec Only  
+**Depends on:** PR-001  
+**Last Reviewed:** 2025-12-30
+
+## Goal
+
+Close gaps discovered after PR-001 by tightening directory creation, restore safety,
+SQLite FK enforcement, and docs alignment.
+
+## User Value
+
+- App data directories are consistent and complete across machines.
+- Restore confirmations prevent accidental overwrites while supporting automation.
+- SQLite foreign keys are enforced for all connections.
+- Troubleshooting docs reflect current behavior.
+
+## References
+
+- `docs/02-implementation/pr-specs/PR-001-db-config.md`
+- `docs/01-design/DESIGN_DATA.md`
+- `docs/02-implementation/MIGRATIONS.md`
+
+## Scope
+
+### In
+
+- Ensure `ensure_app_dirs()` creates the vector store directory (`data/chroma`).
+- Keep confirmation when `tgenie db restore` would overwrite an existing DB,
+  and add a `--yes` flag to skip the prompt for automation.
+- Enforce SQLite foreign keys on every SQLAlchemy connection (not just sessions).
+- Update `docs/TROUBLESHOOTING.md` to remove PR-001 "planned" wording and align
+  default DB path guidance with current settings.
+
+### Out
+
+- Non-SQLite databases or new backup/retention policies.
+- Changes to migration scripts or schema.
+
+## Mini-Specs
+
+- Add vector store directory creation to settings dir bootstrap.
+- Add a restore confirmation bypass flag for overwrite prompts.
+- Add engine-level SQLite FK enforcement hook.
+- Align troubleshooting docs with actual DB/config behavior.
+
+## User Stories
+
+- As a user, I see my vector store directory created alongside the DB.
+- As a user, I must explicitly confirm a restore before it overwrites data.
+- As a developer, I can rely on SQLite foreign keys for any connection.
+
+## UX Notes
+
+- Restore prompts should be explicit about the target DB path.
+- The `--yes` flag should be consistent with `db reset`.
+
+## Technical Design
+
+### Architecture
+
+- In `Settings.ensure_app_dirs()`, create `data/chroma` using the same root as
+  `database_path`.
+- For SQLite, attach a SQLAlchemy engine `connect` event that runs
+  `PRAGMA foreign_keys=ON` on every new DBAPI connection (sync + async engines).
+- Keep `get_db()` PRAGMA as a secondary guard.
+
+### Data Model / Migrations
+
+- No schema changes.
+
+### API Contract
+
+- `tgenie db restore --in <file>` prompts when the target DB exists, unless
+  `--yes` is provided.
+
+### Background Jobs
+
+- N/A.
+
+### Security / Privacy
+
+- Do not log SQL contents during restore.
+
+### Error Handling
+
+- Restore failures should remain actionable and exit non-zero.
+
+## Acceptance Criteria
+
+### AC1: App Dir Completeness
+
+**Success Criteria:**
+- [ ] `ensure_app_dirs()` creates `data/chroma` under the resolved app data dir.
+- [ ] No directories are created at import time.
+
+### AC2: Restore Confirmation
+
+**Success Criteria:**
+- [ ] `tgenie db restore` prompts for confirmation only when the target DB exists.
+- [ ] `--yes` skips the prompt when an existing DB would be overwritten.
+
+### AC3: SQLite FK Enforcement
+
+**Success Criteria:**
+- [ ] SQLite `PRAGMA foreign_keys=ON` is executed for every engine connection.
+
+### AC4: Docs Alignment
+
+**Success Criteria:**
+- [ ] `docs/TROUBLESHOOTING.md` reflects current DB wiring and default paths.
+
+## Test Plan
+
+### Automated
+
+- Unit: `ensure_app_dirs()` creates `data/chroma`.
+- CLI: `tgenie db restore` prompts only when the DB exists; `--yes` skips.
+- Integration: engine-level FK PRAGMA is applied for a fresh connection.
+
+### Manual
+
+- Run `tgenie db restore --in backup.sql` on an existing DB and confirm prompt.
+- Run `tgenie db restore --in backup.sql` on a missing DB and confirm no prompt.
+- Start the API and confirm `~/.taskgenie/data/chroma` is created.
+
+## Notes / Risks / Open Questions
diff --git a/docs/02-implementation/pr-specs/TEMPLATE.md b/docs/02-implementation/pr-specs/TEMPLATE.md
index ff9a526..f503352 100644
--- a/docs/02-implementation/pr-specs/TEMPLATE.md
+++ b/docs/02-implementation/pr-specs/TEMPLATE.md
@@ -2,7 +2,7 @@
 
 **Status:** Spec Only  
 **Depends on:** PR-XXX  
-**Last Reviewed:** 2025-12-29
+**Last Reviewed:** YYYY-MM-DD
 
 ## Goal
 
@@ -20,14 +20,16 @@
 
 ## Mini-Specs
 
-- Bullet list of concrete deliverables (sub-features) to ship in this PR.
+- Concrete deliverables for this PR.
 
 ## User Stories
 
-- As a user, I can …
+- As a user, I can ...
 
 ## UX Notes (if applicable)
 
+- N/A.
+
 ## Technical Design
 
 ### Architecture
@@ -36,7 +38,9 @@
 
 ### API Contract
 
-### Background Jobs (if applicable)
+### Background Jobs
+
+- N/A.
 
 ### Security / Privacy
 
@@ -44,6 +48,16 @@
 
 ## Acceptance Criteria
 
+### AC1: <Short Title>
+
+**Success Criteria:**
+- [ ] ...
+
+### AC2: <Short Title>
+
+**Success Criteria:**
+- [ ] ...
+
 ## Test Plan
 
 ### Automated

From ebd218ae4d0e9d74f0166ae5d96c7ab1fb57fb49 Mon Sep 17 00:00:00 2001
From: Raymond Christopher <raymond.christopher@gdplabs.id>
Date: Thu, 1 Jan 2026 20:29:07 +0700
Subject: [PATCH 2/4] chore: update PR-PLANS and specifications for database
 configuration follow-ups

- Increment the total PR count in `PR-PLANS.md` to reflect the addition of PR-017 for database configuration follow-ups.
- Revise the sequence of PRs to include PR-017 and adjust dependencies accordingly.
- Update `INDEX.md` to include the new specification for PR-017.
- Enhance the acceptance criteria and implementation notes in `PR-017-db-config-followups.md` to clarify requirements and expectations.

These changes aim to improve project organization and ensure clarity in the implementation of database configuration follow-ups.
---
 docs/02-implementation/PR-PLANS.md            | 54 +++++++----
 docs/02-implementation/pr-specs/INDEX.md      |  2 +-
 .../pr-specs/PR-003B-agent-tool-calling.md    | 84 ++++++++--------
 .../pr-specs/PR-009-cli-subcommands.md        |  4 +-
 .../pr-specs/PR-010-web-ui.md                 | 13 ++-
 .../pr-specs/PR-011-notifications.md          |  2 +-
 .../pr-specs/PR-013-event-system.md           | 88 +++++++++--------
 .../PR-014-multi-agent-orchestration.md       | 95 +++++++++----------
 .../pr-specs/PR-017-db-config-followups.md    |  4 +-
 9 files changed, 182 insertions(+), 164 deletions(-)

diff --git a/docs/02-implementation/PR-PLANS.md b/docs/02-implementation/PR-PLANS.md
index 2ddc8d2..f0d2285 100644
--- a/docs/02-implementation/PR-PLANS.md
+++ b/docs/02-implementation/PR-PLANS.md
@@ -2,7 +2,7 @@
 
 **Status:** Spec Complete | Implementation In Progress
 **Last Reviewed:** 2025-12-31
-**Total PRs:** 17 (PR-001 through PR-012, plus PR-003B and PR-013 through PR-016)
+**Total PRs:** 18 (PR-001 through PR-012, plus PR-003B and PR-013 through PR-017)
 
 ## Overview
 
@@ -40,25 +40,26 @@ This sequence prioritizes **something usable early** (good UX) and then adds cap
 | Seq | PR | Title | Why now? | Depends on | Skill Enrichment |
 |---:|---|---|---|---|---|
 | 1 | PR-001 | Database & Configuration | Foundation + migrations | - | - |
-| 2 | PR-016 | Observability Baseline | De-risk debugging early | PR-001 | - |
-| 3 | PR-002 | Task CRUD API | Core workflows + enables clients | PR-001 | api-testing |
-| 4 | PR-008 | Interactive TUI (Tasks MVP) | Validate UX early | PR-002 | tui-dev |
-| 5 | PR-003 | LLM + Chat Backbone | Make chat real (provider + API + TUI) | PR-001, PR-002, PR-008 | api-testing, tui-dev |
-| 6 | PR-004 | Attachments + Link Detection | Context capture for real work | PR-002 | task-workflow |
-| 7 | PR-013 | Event System + Realtime Updates | Enable subscriptions + hooks | PR-002 | - |
-| 8 | PR-003B | Agent Tool-Calling Foundation | Safe tool execution | PR-003, PR-002, PR-004 | - |
-| 9 | PR-011 | Notifications | Early \"daily value\" | PR-002 | task-workflow |
-| 10 | PR-007 | GitHub Integration | High-value for dev tasks | PR-004 | integration-setup |
-| 11 | PR-006 | Gmail Integration | High-value, higher complexity | PR-004 | integration-setup |
-| 12 | PR-005 | RAG + Semantic Search | Better recall + better chat | PR-003, PR-004 | rag-testing, context-optimization, context-compression |
-| 13 | PR-014 | Multi-Agent Orchestration | Coordinated agent runs | PR-003B, PR-013 | - |
-| 14 | PR-015 | Agent UX Panel | Visibility + controls | PR-008, PR-003B, PR-013, PR-014 | - |
-| 15 | PR-009 | CLI Subcommands (Secondary) | Scriptable workflows + agent CLI | PR-002, PR-003B | task-workflow |
-| 16 | PR-010 | Web UI | Secondary UX for rich preview | PR-002 (chat optional: PR-003) | - |
-| 17 | PR-012 | Deployment + Docs | Make it easy to run/share | PR-010, PR-011 | - |
+| 2 | PR-017 | DB Config Follow-ups | Close PR-001 gaps | PR-001 | - |
+| 3 | PR-016 | Observability Baseline | De-risk debugging early | PR-001 | - |
+| 4 | PR-002 | Task CRUD API | Core workflows + enables clients | PR-001 | api-testing |
+| 5 | PR-008 | Interactive TUI (Tasks MVP) | Validate UX early | PR-002 | tui-dev |
+| 6 | PR-003 | LLM + Chat Backbone | Make chat real (provider + API + TUI) | PR-001, PR-002, PR-008 | api-testing, tui-dev |
+| 7 | PR-004 | Attachments + Link Detection | Context capture for real work | PR-002 | task-workflow |
+| 8 | PR-013 | Event System + Realtime Updates | Enable subscriptions + hooks | PR-002 | - |
+| 9 | PR-003B | Agent Tool-Calling Foundation | Safe tool execution | PR-003, PR-002, PR-004 | - |
+| 10 | PR-011 | Notifications | Early \"daily value\" | PR-002 | task-workflow |
+| 11 | PR-007 | GitHub Integration | High-value for dev tasks | PR-004 | integration-setup |
+| 12 | PR-006 | Gmail Integration | High-value, higher complexity | PR-004 | integration-setup |
+| 13 | PR-005 | RAG + Semantic Search | Better recall + better chat | PR-003, PR-004 | rag-testing, context-optimization, context-compression |
+| 14 | PR-014 | Multi-Agent Orchestration | Coordinated agent runs | PR-003B, PR-013 | - |
+| 15 | PR-015 | Agent UX Panel | Visibility + controls | PR-008, PR-003B, PR-013, PR-014 | - |
+| 16 | PR-009 | CLI Subcommands (Secondary) | Scriptable workflows + agent CLI | PR-002, PR-003B | task-workflow |
+| 17 | PR-010 | Web UI | Secondary UX for rich preview | PR-002 (chat optional: PR-003) | - |
+| 18 | PR-012 | Deployment + Docs | Make it easy to run/share | PR-010, PR-011 | - |
 
 Notes:
-- You can swap **Seq 9–12** based on what you can test earliest (notifications vs integrations vs RAG).
+- You can swap **Seq 10–13** based on what you can test earliest (notifications vs integrations vs RAG).
 - PR-010 can be started earlier for task pages, but chat streaming needs PR-003.
 - PR-015 depends on PR-014 for agent run endpoints.
 - Specs (with test scenarios): `pr-specs/INDEX.md`
@@ -68,6 +69,7 @@ Notes:
 ```mermaid
 flowchart TD
   PR001["PR-001: Database & Config"]
+  PR017["PR-017: DB Config Follow-ups"]
   PR016["PR-016: Observability Baseline"]
   PR002["PR-002: Task CRUD API"]
   PR008["PR-008: Interactive TUI (Tasks MVP)"]
@@ -85,6 +87,7 @@ flowchart TD
   PR010["PR-010: Web UI"]
   PR012["PR-012: Deployment + Docs"]
 
+  PR001 --> PR017
   PR001 --> PR016
   PR001 --> PR002
   PR002 --> PR008
@@ -127,6 +130,7 @@ These diagrams break the full dependency graph into smaller, phase-focused views
 ```mermaid
 flowchart TD
   PR001["PR-001: Database & Config"]
+  PR017["PR-017: DB Config Follow-ups"]
   PR016["PR-016: Observability Baseline"]
   PR002["PR-002: Task CRUD API"]
   PR008["PR-008: Interactive TUI"]
@@ -135,6 +139,7 @@ flowchart TD
   PR013["PR-013: Event System"]
   PR003B["PR-003B: Agent Tool-Calling"]
 
+  PR001 --> PR017
   PR001 --> PR016
   PR001 --> PR002
   PR002 --> PR008
@@ -214,6 +219,17 @@ flowchart TD
 - [ ] Environment variables load correctly
 - [ ] Tests pass for database operations
 
+### PR-017: DB Config Follow-ups
+**Branch:** `feature/db-config-followups`
+**Status:** ⬜ Not Started
+**Dependency:** PR-001
+**Description:** Close gaps after PR-001 (dir creation, restore safety, FK enforcement, docs alignment).
+**Spec:** `pr-specs/PR-017-db-config-followups.md`
+**Acceptance Criteria:**
+- [ ] `data/chroma` directory is created in app data dir
+- [ ] `tgenie db restore` prompts unless `--yes` is provided
+- [ ] SQLite foreign keys are enforced on every connection
+
 ### PR-016: Observability Baseline
 **Branch:** `feature/observability-baseline`
 **Status:** ⬜ Not Started
@@ -438,7 +454,7 @@ This phase is intentionally flexible: pick what’s easiest to validate early fr
 
 | Phase | Focus | Weeks | Key PRs |
 |-------|-------|--------|----------|
-| **1** | **Foundation + Observability + UX MVP** | 1-2 | PR-001 (DB), PR-016 (Observability), PR-002 (Task API), PR-008 (TUI Tasks) |
+| **1** | **Foundation + Observability + UX MVP** | 1-2 | PR-001 (DB), PR-017 (DB follow-ups), PR-016 (Observability), PR-002 (Task API), PR-008 (TUI Tasks) |
 | **2** | **Chat + Attachments** | 3-4 | PR-003 (Chat backbone), PR-004 (Attachments) |
 | **3** | **Agent Foundations + Events** | 5-6 | PR-013 (Events), PR-003B (Tool-calling) |
 | **4** | **Early Value Track** | 7-8 | PR-011 (Notifications) and/or PR-007 (GitHub) / PR-006 (Gmail) |
diff --git a/docs/02-implementation/pr-specs/INDEX.md b/docs/02-implementation/pr-specs/INDEX.md
index 5612654..cb22036 100644
--- a/docs/02-implementation/pr-specs/INDEX.md
+++ b/docs/02-implementation/pr-specs/INDEX.md
@@ -26,4 +26,4 @@ Design deep-dives live in `docs/01-design/` (notably `DESIGN_TUI.md`, `DESIGN_CH
 - [PR-014-multi-agent-orchestration.md](PR-014-multi-agent-orchestration.md) - Multi-agent orchestration
 - [PR-015-agent-ux-panel.md](PR-015-agent-ux-panel.md) - TUI agent panel + controls
 - [PR-016-observability-baseline.md](PR-016-observability-baseline.md) - Structured logging + telemetry
-- [PR-017-db-config-followups.md](PR-017-db-config-followups.md) - DB config follow-up fixes
+- [PR-017-db-config-followups.md](PR-017-db-config-followups.md) - DB config follow-ups
diff --git a/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md b/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md
index be923b1..89a7adb 100644
--- a/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md
+++ b/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md
@@ -89,48 +89,9 @@ behalf of the user.
 - Invalid tool args return a tool error message back to the model.
 - Tool timeouts return a clear failure response without crashing chat.
 
-## Acceptance Criteria
-
-### AC1: Tool Schema and Registry
-
-**Success Criteria:**
-- [ ] Tools are defined with JSON Schema parameters.
-- [ ] Tool registry exposes the schema list to the chat pipeline.
-
-### AC2: Tool Execution Flow
-
-**Success Criteria:**
-- [ ] Tool calls are validated and executed with timeout handling.
-- [ ] Tool results are included in the chat response flow.
-
-### AC3: Safety and Confirmation
-
-**Success Criteria:**
-- [ ] Destructive tools require confirmation or are blocked by default.
-- [ ] Tool errors are surfaced as readable assistant responses.
-
-## Test Plan
-
-### Automated
-
-- Unit tests for tool schema validation and registry behavior.
-- Integration tests for tool call -> execution -> response flow with mocked LLM.
-- Safety tests for destructive tool confirmation gating.
-
-### Manual
-
-- Run chat and ask the assistant to create/update/complete a task.
-- Attempt a destructive action and verify confirmation is required.
-
-## Notes / Risks / Open Questions
-
-- Decide whether tool execution logs should be persisted for audit (future).
-
----
-
-## Skill Enrichment: tool-design
+### Implementation Notes
 
-### Consolidation Principle
+#### Consolidation Principle
 
 Prefer single comprehensive tools over multiple narrow tools:
 
@@ -175,7 +136,7 @@ def query_tasks(
     pass
 ```
 
-### Tool Description Engineering
+#### Tool Description Engineering
 
 Write descriptions that answer what, when, and what returns:
 
@@ -219,6 +180,43 @@ async def create_task(
     # Implementation...
 ```
 
+## Acceptance Criteria
+
+### AC1: Tool Schema and Registry
+
+**Success Criteria:**
+- [ ] Tools are defined with JSON Schema parameters.
+- [ ] Tool registry exposes the schema list to the chat pipeline.
+
+### AC2: Tool Execution Flow
+
+**Success Criteria:**
+- [ ] Tool calls are validated and executed with timeout handling.
+- [ ] Tool results are included in the chat response flow.
+
+### AC3: Safety and Confirmation
+
+**Success Criteria:**
+- [ ] Destructive tools require confirmation or are blocked by default.
+- [ ] Tool errors are surfaced as readable assistant responses.
+
+## Test Plan
+
+### Automated
+
+- Unit tests for tool schema validation and registry behavior.
+- Integration tests for tool call -> execution -> response flow with mocked LLM.
+- Safety tests for destructive tool confirmation gating.
+
+### Manual
+
+- Run chat and ask the assistant to create/update/complete a task.
+- Attempt a destructive action and verify confirmation is required.
+
+## Notes / Risks / Open Questions
+
+- Decide whether tool execution logs should be persisted for audit (future).
+
 ### Response Format Optimization
 
 Provide concise and detailed format options:
@@ -442,7 +440,7 @@ class AsyncToolExecutor:
             return {
                 "status": "success",
                 "result": result,
-                "duration_ms": 0  # TODO: track duration
+                "duration_ms": 0  # Example field (implementation should track actual duration)
             }
 
         except asyncio.TimeoutError:
diff --git a/docs/02-implementation/pr-specs/PR-009-cli-subcommands.md b/docs/02-implementation/pr-specs/PR-009-cli-subcommands.md
index b13e60d..ab79cb7 100644
--- a/docs/02-implementation/pr-specs/PR-009-cli-subcommands.md
+++ b/docs/02-implementation/pr-specs/PR-009-cli-subcommands.md
@@ -1,7 +1,7 @@
 # PR-009: CLI Subcommands (Secondary) (Spec)
 
 **Status:** Spec Only  
-**Depends on:** PR-002, PR-003B  
+**Depends on:** PR-002, PR-003B, PR-014  
 **Last Reviewed:** 2025-12-29
 
 ## Goal
@@ -121,4 +121,4 @@ Provide non-interactive commands for scripting while the TUI remains primary.
 ## Notes / Risks / Open Questions
 
 - Ensure CLI flags align with TUI field names and API enums.
-- Agent commands depend on PR-003B and any agent run API contract.
+- Agent commands depend on PR-003B and PR-014 (agent run API contract).
diff --git a/docs/02-implementation/pr-specs/PR-010-web-ui.md b/docs/02-implementation/pr-specs/PR-010-web-ui.md
index 345cb22..9077a6a 100644
--- a/docs/02-implementation/pr-specs/PR-010-web-ui.md
+++ b/docs/02-implementation/pr-specs/PR-010-web-ui.md
@@ -1,7 +1,7 @@
 # PR-010: Web UI (Spec)
 
 **Status:** Spec Only  
-**Depends on:** PR-002 (chat optional: PR-003)  
+**Depends on:** PR-002, PR-004 (chat optional: PR-003)  
 **Last Reviewed:** 2025-12-30
 
 ## Goal
@@ -25,6 +25,7 @@ optional chat streaming.
 ### In
 
 - Task list, detail, create, and edit pages (HTMX forms).
+- Attachment viewing pages for reading attachment content.
 - Basic responsive layout.
 - Optional chat page if PR-003 is implemented.
 
@@ -36,6 +37,7 @@ optional chat streaming.
 ## Mini-Specs
 
 - FastAPI template routes for tasks list/detail and edit/create.
+- Attachment viewing pages for displaying attachment content.
 - HTMX interactions for inline updates and form submissions.
 - Optional chat page using SSE via EventSource.
 
@@ -84,12 +86,17 @@ optional chat streaming.
 **Success Criteria:**
 - [ ] List/detail/create/edit flows work against the API.
 
-### AC2: Responsive Layout
+### AC2: Attachment Viewing
+
+**Success Criteria:**
+- [ ] Attachment viewing pages display attachment content correctly.
+
+### AC3: Responsive Layout
 
 **Success Criteria:**
 - [ ] Pages remain usable on narrow viewports.
 
-### AC3: Optional Chat UI
+### AC4: Optional Chat UI
 
 **Success Criteria:**
 - [ ] If PR-003 is present, chat page streams responses and handles disconnects.
diff --git a/docs/02-implementation/pr-specs/PR-011-notifications.md b/docs/02-implementation/pr-specs/PR-011-notifications.md
index 21013c4..db943ca 100644
--- a/docs/02-implementation/pr-specs/PR-011-notifications.md
+++ b/docs/02-implementation/pr-specs/PR-011-notifications.md
@@ -1,7 +1,7 @@
 # PR-011: Notifications (Spec)
 
 **Status:** Spec Only  
-**Depends on:** PR-002  
+**Depends on:** PR-002, PR-003B, PR-014 (agent notifications optional)  
 **Last Reviewed:** 2025-12-30
 
 ## Goal
diff --git a/docs/02-implementation/pr-specs/PR-013-event-system.md b/docs/02-implementation/pr-specs/PR-013-event-system.md
index 18ead69..3e19649 100644
--- a/docs/02-implementation/pr-specs/PR-013-event-system.md
+++ b/docs/02-implementation/pr-specs/PR-013-event-system.md
@@ -1,7 +1,7 @@
 # PR-013: Event System + Realtime Updates (Spec)
 
 **Status:** Spec Only  
-**Depends on:** PR-002  
+**Depends on:** PR-002, PR-004  
 **Last Reviewed:** 2026-01-01
 
 ## Goal
@@ -82,48 +82,9 @@ for UI and agent hooks.
 - Failed webhook deliveries are logged and retried without blocking core flows.
 - SSE clients can reconnect with `Last-Event-ID`.
 
-## Acceptance Criteria
-
-### AC1: Event Emission
-
-**Success Criteria:**
-- [ ] Task create/update/delete emits the expected event types.
-- [ ] Attachment create/delete emits events when applicable.
-
-### AC2: SSE Streaming
-
-**Success Criteria:**
-- [ ] SSE endpoint streams events in order.
-- [ ] Reconnect with `Last-Event-ID` resumes without duplicates.
-
-### AC3: Webhook Delivery (Optional)
-
-**Success Criteria:**
-- [ ] Webhooks receive events when configured.
-- [ ] Delivery failures are retried and logged.
-
-## Test Plan
-
-### Automated
-
-- Unit tests for event emission and payloads.
-- Integration tests for SSE streaming and resume behavior.
-- Webhook dispatcher tests with mocked HTTP endpoints.
-
-### Manual
-
-- Create a task and observe SSE events in a terminal client.
-- Configure a webhook endpoint and verify deliveries.
-
-## Notes / Risks / Open Questions
-
-- Decide how long to retain events in the log.
-
----
-
-## Skill Enrichment: context-fundamentals
+### Implementation Notes
 
-### Event Batching for Efficiency
+#### Event Batching for Efficiency
 
 Aggregate high-frequency events to reduce processing overhead:
 
@@ -241,7 +202,7 @@ class EventBatcher:
         })
 ```
 
-### Event Deduplication
+#### Event Deduplication
 
 Prevent duplicate events from being emitted:
 
@@ -301,7 +262,7 @@ class EventDeduplicator:
         })
 ```
 
-### Event Filtering and Routing
+#### Event Filtering and Routing
 
 Filter events by type and content to reduce context noise:
 
@@ -364,7 +325,7 @@ AGENT_CONTEXT_FILTER = EventFilter(
 )
 ```
 
-### Event Prioritization
+#### Event Prioritization
 
 Prioritize events for context loading (most relevant first):
 
@@ -427,3 +388,40 @@ async def emit_with_priority(
 
     return event_id
 ```
+
+## Acceptance Criteria
+
+### AC1: Event Emission
+
+**Success Criteria:**
+- [ ] Task create/update/delete emits the expected event types.
+- [ ] Attachment create/delete emits events when applicable.
+
+### AC2: SSE Streaming
+
+**Success Criteria:**
+- [ ] SSE endpoint streams events in order.
+- [ ] Reconnect with `Last-Event-ID` resumes without duplicates.
+
+### AC3: Webhook Delivery (Optional)
+
+**Success Criteria:**
+- [ ] Webhooks receive events when configured.
+- [ ] Delivery failures are retried and logged.
+
+## Test Plan
+
+### Automated
+
+- Unit tests for event emission and payloads.
+- Integration tests for SSE streaming and resume behavior.
+- Webhook dispatcher tests with mocked HTTP endpoints.
+
+### Manual
+
+- Create a task and observe SSE events in a terminal client.
+- Configure a webhook endpoint and verify deliveries.
+
+## Notes / Risks / Open Questions
+
+- Decide how long to retain events in the log.
diff --git a/docs/02-implementation/pr-specs/PR-014-multi-agent-orchestration.md b/docs/02-implementation/pr-specs/PR-014-multi-agent-orchestration.md
index cbd7dde..7b1461a 100644
--- a/docs/02-implementation/pr-specs/PR-014-multi-agent-orchestration.md
+++ b/docs/02-implementation/pr-specs/PR-014-multi-agent-orchestration.md
@@ -80,46 +80,9 @@ execution.
 
 - Failed runs are marked with error status and short reason.
 
-## Acceptance Criteria
-
-### AC1: Run Lifecycle
-
-**Success Criteria:**
-- [ ] Agent runs can be started, paused, resumed, and canceled.
-- [ ] Run status is persisted and queryable.
-
-### AC2: Concurrency Control
-
-**Success Criteria:**
-- [ ] Concurrency limits prevent excessive parallel runs.
-- [ ] Runs back off or queue when limits are reached.
-
-### AC3: Event Integration
-
-**Success Criteria:**
-- [ ] Run status updates emit events for UI consumption.
-
-## Test Plan
-
-### Automated
-
-- Unit tests for run state transitions.
-- Integration tests for API start/cancel/status.
-- Concurrency tests for max-parallel settings.
-
-### Manual
-
-- Start a run, observe status updates, then cancel it.
-
-## Notes / Risks / Open Questions
-
-- Decide how much agent context to persist vs summarize.
-
----
-
-## Skill Enrichment: multi-agent-patterns
+### Implementation Notes
 
-### Architectural Pattern Selection
+#### Architectural Pattern Selection
 
 Implement **Swarm/Peer-to-Peer** pattern (not Supervisor) to avoid telephone game:
 
@@ -221,7 +184,7 @@ orchestrator.register_agent(PLANNER)
 orchestrator.register_agent(EXECUTOR)
 ```
 
-### Handoff Protocol
+#### Handoff Protocol
 
 Implement explicit handoff mechanism with `forward_message` tool:
 
@@ -290,7 +253,7 @@ async def researcher_agent(goal: str, context: dict) -> dict:
     return {"type": "final", "result": findings}
 ```
 
-### Convergence and Consensus
+#### Convergence and Consensus
 
 Implement debate protocol for complex decisions:
 
@@ -367,7 +330,7 @@ class DebateCoordinator:
         }
 ```
 
-### Failure Mode Mitigations
+#### Failure Mode Mitigations
 
 **Supervisor Bottleneck:** Implement output schema constraints
 
@@ -428,9 +391,9 @@ def enforce_objectives(agent_run: AgentRun, objective: AgentObjective):
 
 ---
 
-## Skill Enrichment: memory-systems
+### Memory Implementation Notes
 
-### Shared Memory Architecture
+#### Shared Memory Architecture
 
 Implement hierarchical memory with temporal validity for multi-agent coordination:
 
@@ -603,7 +566,7 @@ async def executor_agent(goal: str, context: dict) -> dict:
     return {"type": "final", "result": f"Executing task with: {task_facts}"}
 ```
 
-### Entity Consistency Tracking
+#### Entity Consistency Tracking
 
 Track entity identity across sessions and agents:
 
@@ -704,7 +667,7 @@ class EntityRegistry:
         return best_match
 ```
 
-### Memory Consolidation
+#### Memory Consolidation
 
 Implement periodic consolidation to prevent unbounded growth:
 
@@ -836,7 +799,7 @@ class MemoryConsolidator:
         return len(fact_ids_to_remove)
 ```
 
-### Integration with Context Loading
+#### Integration with Context Loading
 
 Load relevant memories into agent context:
 
@@ -891,7 +854,7 @@ def estimate_tokens(text: str) -> int:
     return len(text.split()) * 1.3
 ```
 
-### Agent Task Summarization
+#### Agent Task Summarization
 
 Store agent summaries in memory for future retrieval:
 
@@ -967,4 +930,38 @@ async def store_agent_summary(
         "outcomes": len(outcomes)
     })
 ```
-```
+
+## Acceptance Criteria
+
+### AC1: Run Lifecycle
+
+**Success Criteria:**
+- [ ] Agent runs can be started, paused, resumed, and canceled.
+- [ ] Run status is persisted and queryable.
+
+### AC2: Concurrency Control
+
+**Success Criteria:**
+- [ ] Concurrency limits prevent excessive parallel runs.
+- [ ] Runs back off or queue when limits are reached.
+
+### AC3: Event Integration
+
+**Success Criteria:**
+- [ ] Run status updates emit events for UI consumption.
+
+## Test Plan
+
+### Automated
+
+- Unit tests for run state transitions.
+- Integration tests for API start/cancel/status.
+- Concurrency tests for max-parallel settings.
+
+### Manual
+
+- Start a run, observe status updates, then cancel it.
+
+## Notes / Risks / Open Questions
+
+- Decide how much agent context to persist vs summarize.
diff --git a/docs/02-implementation/pr-specs/PR-017-db-config-followups.md b/docs/02-implementation/pr-specs/PR-017-db-config-followups.md
index e456540..d4fee70 100644
--- a/docs/02-implementation/pr-specs/PR-017-db-config-followups.md
+++ b/docs/02-implementation/pr-specs/PR-017-db-config-followups.md
@@ -51,7 +51,7 @@ SQLite FK enforcement, and docs alignment.
 - As a user, I must explicitly confirm a restore before it overwrites data.
 - As a developer, I can rely on SQLite foreign keys for any connection.
 
-## UX Notes
+## UX Notes (if applicable)
 
 - Restore prompts should be explicit about the target DB path.
 - The `--yes` flag should be consistent with `db reset`.
@@ -126,3 +126,5 @@ SQLite FK enforcement, and docs alignment.
 - Start the API and confirm `~/.taskgenie/data/chroma` is created.
 
 ## Notes / Risks / Open Questions
+
+- N/A.

From 8e5d2f8b7ac3f1ca96299802403bed48d1b448c5 Mon Sep 17 00:00:00 2001
From: Raymond Christopher <raymond.christopher@gdplabs.id>
Date: Thu, 1 Jan 2026 20:36:55 +0700
Subject: [PATCH 3/4] chore: update PR specifications and dependencies for
 improved clarity

- Revise dependencies in PR specifications to reflect accurate relationships, including updates in PR-009, PR-010, PR-011, PR-012, and PR-013.
- Remove unnecessary fields and enhance descriptions for better understanding of dependencies and project flow in `PR-PLANS.md`.
- Clean up the return structure in `AsyncToolExecutor` to remove the placeholder duration field.

These changes aim to enhance documentation clarity and ensure accurate representation of project dependencies.
---
 docs/02-implementation/PR-PLANS.md                  | 13 +++++++++----
 .../pr-specs/PR-003B-agent-tool-calling.md          |  3 +--
 docs/02-implementation/pr-specs/PR-010-web-ui.md    |  2 +-
 .../pr-specs/PR-011-notifications.md                |  4 ++--
 .../pr-specs/PR-012-deployment-docs.md              |  2 +-
 .../pr-specs/PR-013-event-system.md                 |  2 +-
 6 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/docs/02-implementation/PR-PLANS.md b/docs/02-implementation/PR-PLANS.md
index f0d2285..c2a12d7 100644
--- a/docs/02-implementation/PR-PLANS.md
+++ b/docs/02-implementation/PR-PLANS.md
@@ -46,7 +46,7 @@ This sequence prioritizes **something usable early** (good UX) and then adds cap
 | 5 | PR-008 | Interactive TUI (Tasks MVP) | Validate UX early | PR-002 | tui-dev |
 | 6 | PR-003 | LLM + Chat Backbone | Make chat real (provider + API + TUI) | PR-001, PR-002, PR-008 | api-testing, tui-dev |
 | 7 | PR-004 | Attachments + Link Detection | Context capture for real work | PR-002 | task-workflow |
-| 8 | PR-013 | Event System + Realtime Updates | Enable subscriptions + hooks | PR-002 | - |
+| 8 | PR-013 | Event System + Realtime Updates | Enable subscriptions + hooks | PR-002, PR-004 | - |
 | 9 | PR-003B | Agent Tool-Calling Foundation | Safe tool execution | PR-003, PR-002, PR-004 | - |
 | 10 | PR-011 | Notifications | Early \"daily value\" | PR-002 | task-workflow |
 | 11 | PR-007 | GitHub Integration | High-value for dev tasks | PR-004 | integration-setup |
@@ -54,9 +54,9 @@ This sequence prioritizes **something usable early** (good UX) and then adds cap
 | 13 | PR-005 | RAG + Semantic Search | Better recall + better chat | PR-003, PR-004 | rag-testing, context-optimization, context-compression |
 | 14 | PR-014 | Multi-Agent Orchestration | Coordinated agent runs | PR-003B, PR-013 | - |
 | 15 | PR-015 | Agent UX Panel | Visibility + controls | PR-008, PR-003B, PR-013, PR-014 | - |
-| 16 | PR-009 | CLI Subcommands (Secondary) | Scriptable workflows + agent CLI | PR-002, PR-003B | task-workflow |
-| 17 | PR-010 | Web UI | Secondary UX for rich preview | PR-002 (chat optional: PR-003) | - |
-| 18 | PR-012 | Deployment + Docs | Make it easy to run/share | PR-010, PR-011 | - |
+| 16 | PR-009 | CLI Subcommands (Secondary) | Scriptable workflows + agent CLI | PR-002, PR-003B, PR-014 | task-workflow |
+| 17 | PR-010 | Web UI | Secondary UX for rich preview | PR-002, PR-004 (chat optional: PR-003) | - |
+| 18 | PR-012 | Deployment + Docs | Make it easy to run/share | PR-001, PR-017, PR-010, PR-011 | - |
 
 Notes:
 - You can swap **Seq 10–13** based on what you can test earliest (notifications vs integrations vs RAG).
@@ -96,6 +96,7 @@ flowchart TD
   PR008 --> PR003
   PR002 --> PR004
   PR002 --> PR013
+  PR004 --> PR013
   PR003 --> PR003B
   PR002 --> PR003B
   PR004 --> PR003B
@@ -110,9 +111,13 @@ flowchart TD
   PR003B --> PR015
   PR013 --> PR015
   PR014 --> PR015
+  PR014 --> PR009
   PR003B --> PR009
   PR002 --> PR010
+  PR004 --> PR010
   PR003 -. "chat UI (optional)" .-> PR010
+  PR001 --> PR012
+  PR017 --> PR012
   PR010 --> PR012
   PR011 --> PR012
 ```
diff --git a/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md b/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md
index 89a7adb..697a425 100644
--- a/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md
+++ b/docs/02-implementation/pr-specs/PR-003B-agent-tool-calling.md
@@ -439,8 +439,7 @@ class AsyncToolExecutor:
             )
             return {
                 "status": "success",
-                "result": result,
-                "duration_ms": 0  # Example field (implementation should track actual duration)
+                "result": result
             }
 
         except asyncio.TimeoutError:
diff --git a/docs/02-implementation/pr-specs/PR-010-web-ui.md b/docs/02-implementation/pr-specs/PR-010-web-ui.md
index 9077a6a..ebbe9e5 100644
--- a/docs/02-implementation/pr-specs/PR-010-web-ui.md
+++ b/docs/02-implementation/pr-specs/PR-010-web-ui.md
@@ -1,7 +1,7 @@
 # PR-010: Web UI (Spec)
 
 **Status:** Spec Only  
-**Depends on:** PR-002, PR-004 (chat optional: PR-003)  
+**Depends on:** PR-002, PR-004 (attachment viewing), PR-003 (chat optional)  
 **Last Reviewed:** 2025-12-30
 
 ## Goal
diff --git a/docs/02-implementation/pr-specs/PR-011-notifications.md b/docs/02-implementation/pr-specs/PR-011-notifications.md
index db943ca..11e70e6 100644
--- a/docs/02-implementation/pr-specs/PR-011-notifications.md
+++ b/docs/02-implementation/pr-specs/PR-011-notifications.md
@@ -1,7 +1,7 @@
 # PR-011: Notifications (Spec)
 
 **Status:** Spec Only  
-**Depends on:** PR-002, PR-003B, PR-014 (agent notifications optional)  
+**Depends on:** PR-002 (agent notifications optional: PR-003B, PR-014)  
 **Last Reviewed:** 2025-12-30
 
 ## Goal
@@ -28,7 +28,7 @@ and in Docker.
 - Quiet hours and deduplication.
 - Persisted notification history for UI/TUI viewing.
 - Delivery channels: local desktop notifications and in-app feed for Docker.
-- Optional agent-run notifications (started/completed/failed) when agent system exists.
+- Optional agent-run notifications (started/completed/failed) when agent system exists (requires PR-003B and PR-014).
 
 ### Out
 
diff --git a/docs/02-implementation/pr-specs/PR-012-deployment-docs.md b/docs/02-implementation/pr-specs/PR-012-deployment-docs.md
index de15b0b..26e83a2 100644
--- a/docs/02-implementation/pr-specs/PR-012-deployment-docs.md
+++ b/docs/02-implementation/pr-specs/PR-012-deployment-docs.md
@@ -1,7 +1,7 @@
 # PR-012: Deployment + Documentation (Spec)
 
 **Status:** Spec Only  
-**Depends on:** PR-010, PR-011  
+**Depends on:** PR-001 (backup/restore), PR-010, PR-011, PR-017 (backup/restore guidance)  
 **Last Reviewed:** 2025-12-29
 
 ## Goal
diff --git a/docs/02-implementation/pr-specs/PR-013-event-system.md b/docs/02-implementation/pr-specs/PR-013-event-system.md
index 3e19649..6366ab1 100644
--- a/docs/02-implementation/pr-specs/PR-013-event-system.md
+++ b/docs/02-implementation/pr-specs/PR-013-event-system.md
@@ -1,7 +1,7 @@
 # PR-013: Event System + Realtime Updates (Spec)
 
 **Status:** Spec Only  
-**Depends on:** PR-002, PR-004  
+**Depends on:** PR-002, PR-004 (for attachment event emission)  
 **Last Reviewed:** 2026-01-01
 
 ## Goal

From 678d26e8b072e554fcec1125e82dd3f0c63f0dd7 Mon Sep 17 00:00:00 2001
From: Raymond Christopher <raymond.christopher@gdplabs.id>
Date: Thu, 1 Jan 2026 20:44:50 +0700
Subject: [PATCH 4/4] fix: improve link checking in documentation by excluding
 code blocks

- Enhance the `check_relative_links` function in `check_docs.py` to remove code blocks from markdown files before checking for relative links. This change prevents false positives from links within code examples, improving the accuracy of link validation.

These updates aim to enhance the reliability of documentation checks and ensure that only relevant links are evaluated.
---
 scripts/check_docs.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/check_docs.py b/scripts/check_docs.py
index b6f8400..7eed5a6 100644
--- a/scripts/check_docs.py
+++ b/scripts/check_docs.py
@@ -31,7 +31,12 @@ def check_relative_links(markdown_files: list[Path]) -> list[str]:
 
     for md in markdown_files:
         text = md.read_text(encoding="utf-8")
-        for raw_target in LINK_RE.findall(text):
+
+        # Remove code blocks to avoid false positives from code examples
+        code_block_pattern = re.compile(r"```[\s\S]*?```", re.MULTILINE)
+        text_without_code = code_block_pattern.sub("", text)
+
+        for raw_target in LINK_RE.findall(text_without_code):
             target = raw_target.strip()
             if not target or target.startswith("#"):
                 continue