diff --git a/.agents/context/ai-sdk-v6.md b/.agents/project/ai-sdk-v6.md
similarity index 100%
rename from .agents/context/ai-sdk-v6.md
rename to .agents/project/ai-sdk-v6.md
diff --git a/.agents/context/api.md b/.agents/project/api.md
similarity index 100%
rename from .agents/context/api.md
rename to .agents/project/api.md
diff --git a/.agents/context/architecture.md b/.agents/project/architecture.md
similarity index 100%
rename from .agents/context/architecture.md
rename to .agents/project/architecture.md
diff --git a/.agents/context/conventions.md b/.agents/project/conventions.md
similarity index 100%
rename from .agents/context/conventions.md
rename to .agents/project/conventions.md
diff --git a/.agents/context/database.md b/.agents/project/database.md
similarity index 100%
rename from .agents/context/database.md
rename to .agents/project/database.md
diff --git a/.agents/context/deployment.md b/.agents/project/deployment.md
similarity index 100%
rename from .agents/context/deployment.md
rename to .agents/project/deployment.md
diff --git a/.agents/context/glossary.md b/.agents/project/glossary.md
similarity index 100%
rename from .agents/context/glossary.md
rename to .agents/project/glossary.md
diff --git a/.agents/context/testing.md b/.agents/project/testing.md
similarity index 100%
rename from .agents/context/testing.md
rename to .agents/project/testing.md
diff --git a/.agents/context/ai-context-engineering-guide.md b/.agents/research/ai-context-engineering-guide.md
similarity index 100%
rename from .agents/context/ai-context-engineering-guide.md
rename to .agents/research/ai-context-engineering-guide.md
diff --git a/.agents/context/research/browser-extraction/component-state-extraction-research.md b/.agents/research/browser-extraction/component-state-extraction-research.md
similarity index 100%
rename from .agents/context/research/browser-extraction/component-state-extraction-research.md
rename to .agents/research/browser-extraction/component-state-extraction-research.md
diff --git a/.agents/context/research/browser-extraction/css-animation-extraction-research.md b/.agents/research/browser-extraction/css-animation-extraction-research.md
similarity index 100%
rename from .agents/context/research/browser-extraction/css-animation-extraction-research.md
rename to .agents/research/browser-extraction/css-animation-extraction-research.md
diff --git a/.agents/research/chatgpt-logged-out.png b/.agents/research/chatgpt-logged-out.png
deleted file mode 100644
index 427a92c6..00000000
Binary files a/.agents/research/chatgpt-logged-out.png and /dev/null differ
diff --git a/.agents/research/url-content-fetching.md b/.agents/research/url-content-fetching.md
new file mode 100644
index 00000000..353917c9
--- /dev/null
+++ b/.agents/research/url-content-fetching.md
@@ -0,0 +1,663 @@
+# URL Content Fetching Capability for AI Chat
+
+> **Status**: Research Complete
+> **Date**: 2026-02-22
+> **Scope**: Architecture, libraries, security, context budgets, specialized extractors, caching
+> **Related**: `.agents/plans/phase-7-future-tool-integrations.md` (Sub-Phase 7.7)
+
+---
+
+## Table of Contents
+
+1. [Production Landscape Survey](#1-production-landscape-survey)
+2. [HTML-to-Text Conversion](#2-html-to-text-conversion)
+3. [Security Considerations](#3-security-considerations)
+4. [Context Budget Management](#4-context-budget-management)
+5. [Architecture Placement](#5-architecture-placement)
+6. [Specialized Content Types](#6-specialized-content-types)
+7. [Hosted Services and APIs](#7-hosted-services-and-apis)
+8. [Caching Strategy](#8-caching-strategy)
+9. [Recommendations](#9-recommendations)
+
+---
+
+## 1. Production Landscape Survey
+
+### How Major Platforms Implement URL Fetching
+
+| Platform | Architecture | JS Rendering | Content Budget | URL Source Restriction |
+|----------|-------------|-------------|----------------|----------------------|
+| ChatGPT | Server-side, proprietary | Yes (Atlas/Chromium) | Auto-summarization | Model constructs queries |
+| Perplexity | Server-side, hybrid RAG | Unknown | `max_tokens_per_page` | Model-driven |
+| Claude | Server-side, API tool | No (HTML only) | `max_content_tokens` + dynamic filtering | User-provided URLs only |
+| Open WebUI | Server-side, dual-mode | No | 50K char hard cap | Model-driven |
+| LibreChat | Server-side, Firecrawl | Yes (via Firecrawl) | Reranker truncation | Search-driven |
+| LobeChat | Serverless plugin | No | Plugin-level | Explicit URL input |
+
+### ChatGPT / OpenAI
+
+Server-side with proprietary infrastructure. Reasoning models (o3, GPT-5) get two page-level actions beyond search: `open_page` (accesses a webpage) and `find_in_page` (searches within an opened page). The model doesn't read entire pages — it fans out short sub-queries, skims titles and introductions (~500–1,000 chars), and extracts answer blocks under headings. The Atlas browser (Oct 2025) runs a full Chromium-based browser via the OWL architecture.
+
+### Claude / Anthropic
+
+Two versions of the `web_fetch` tool:
+
+| Version | Features |
+|---------|----------|
+| `web_fetch_20250910` | Basic fetch + PDF extraction |
+| `web_fetch_20260209` | Adds dynamic filtering (Opus 4.6, Sonnet 4.6) |
+
+Dynamic filtering enables Claude to write and execute code that filters fetched content *before* it enters the context window — achieving ~24% input token reduction and ~11% quality improvement. Anti-exfiltration measure: Claude cannot dynamically construct URLs; it can only fetch URLs explicitly provided by the user or from previous search/fetch results.
+
+Configuration surface:
+- `max_content_tokens` — hard cap on content length
+- `max_uses` — limits fetches per request
+- `allowed_domains` / `blocked_domains` — domain restrictions
+
+### Perplexity
+
+Three-stage RAG pipeline: hybrid retrieval → content fetching → grounded generation. Agent API exposes `web_search` (with `max_tokens_per_page`) and `fetch_url` (full page content). Content is fetched on-demand per query and not stored.
+
+### Open WebUI (Open Source)
+
+Agentic mode exposes `search_web` and `fetch_url` tools. `fetch_url` retrieves full page text, hard-capped at 50,000 characters, injected directly into context (no Vector DB, no chunking). Requires frontier models (GPT-5, Claude 4.5+) for effective multi-step tool use. No JS rendering.
+
+### LibreChat (Open Source)
+
+Three-component pipeline: **Search** (Serper/SearXNG) → **Scrape** (Firecrawl) → **Rerank** (Jina/Cohere). Firecrawl handles JS rendering and markdown conversion. Scraper timeout defaults to 7,500ms. Open enhancement request for direct URL fetching beyond search results.
+
+### Common Patterns
+
+1. Content extraction is always **server-side** (never client-side)
+2. Trend toward **direct context injection** of filtered content over RAG chunking
+3. Token/character limits enforced to prevent context overflow
+4. Modern approaches (Claude's dynamic filtering, OpenAI's `find_in_page`) extract **relevant portions** rather than full pages
+5. **Markdown** is the preferred output format (token-efficient, preserves structure)
+
+### Vercel AI SDK
+
+No built-in URL fetch tool — composable approach. Ready-made third-party integrations:
+
+| Package | Tool |
+|---------|------|
+| `@tavily/ai-sdk` (v0.4.1) | `tavilyExtract()` — URL content extraction + search |
+| `@exalabs/ai-sdk` | `webSearch()` — search + content extraction |
+| `@parallel-web/ai-sdk-tools` | `searchTool` + `extractTool` |
+
+The `@tavily/ai-sdk` `tavilyExtract()` tool is particularly relevant — it extracts clean, structured content from URLs with configurable `format` (markdown/text) and `extractDepth` (basic/advanced). ~4.6K weekly downloads.
+
+---
+
+## 2. HTML-to-Text Conversion
+
+### The Standard Pipeline
+
+The dominant pattern for HTML → LLM-ready text:
+
+```
+Raw HTML → [DOM Parser] → [Content Extraction] → [Markdown Conversion] → Clean Markdown
+             jsdom          Readability.js           Turndown
+```
+
+Achieves **~70–80% token reduction** vs raw HTML.
+
+### Article Extraction Libraries
+
+| Library | Version | Weekly Downloads | Bundle (min+gz) | Dependencies | Quality |
+|---------|---------|-----------------|-----------------|-------------|---------|
+| `@mozilla/readability` | 0.6.0 | ~500K | ~15 KB | 0 | Excellent (articles) |
+| `@extractus/article-extractor` | 8.0.20 | ~11.5K | Larger | Multiple | Good (rich metadata) |
+| `cheerio` | 1.0.0 | ~8M | ~50 KB | 5+ (parse5) | Flexible (manual selectors) |
+
+**`@mozilla/readability`** is the clear winner for general-purpose extraction:
+- Battle-tested (powers Firefox Reader View on billions of page loads)
+- Zero dependencies, small footprint
+- Returns `{ title, content (HTML), textContent, excerpt, byline }`
+- Used by Jina Reader internally
+- Requires a DOM environment (`jsdom` on server side)
+- Modifies DOM in-place (clone the document first)
+- Optimized for articles; weaker on forums, product pages, search results
+
+### Markdown Conversion
+
+| Library | Version | Weekly Downloads | Bundle (min+gz) | Speed |
+|---------|---------|-----------------|-----------------|-------|
+| `turndown` | 7.2.0 | ~2.37M | 3.96 KB | Baseline |
+| `node-html-markdown` | 1.3.0 | ~328K | ~8 KB | **1.57x faster** |
+
+Performance benchmarks (reused instance):
+
+| Input Size | `node-html-markdown` | `turndown` |
+|------------|---------------------|-----------|
+| 100 KB | 17 ms | 27 ms |
+| 1 MB | 176 ms | 280 ms |
+
+**`turndown`** has 7x larger ecosystem, plugin system (GFM tables/strikethrough), and is used by Jina Reader in production. **`node-html-markdown`** is consistently faster but has fewer community integrations.
+
+Recommendation: **`turndown`** for ecosystem maturity. The 1.57x speed difference is negligible for single-page fetches (27ms vs 17ms at 100KB).
+
+### DOM Parsing (Server-Side)
+
+| Library | Import Time | HTML Parse | Dependencies |
+|---------|------------|------------|-------------|
+| `jsdom` | 333 ms | 256 ms | 20+ |
+| `happy-dom` | 45 ms | 26 ms | Few |
+| `linkedom` | Fast | Fast | Few |
+
+**`jsdom`** is required by `@mozilla/readability` and has the most complete browser emulation (~14M weekly downloads). `happy-dom` is 7.4x faster but less comprehensive. For this use case, `jsdom` is the correct choice because Readability depends on its DOM fidelity.
+
+### JavaScript-Rendered Content
+
+| Content Type | Approach | Cost |
+|-------------|----------|------|
+| Static HTML (articles, blogs, docs) | `fetch` + `jsdom` + Readability | Minimal |
+| SPA / JS-rendered | Playwright/Puppeteer or hosted service (Jina/Firecrawl) | High |
+| Known site structures | `fetch` + `cheerio` + custom selectors | Minimal |
+
+Static HTML covers the vast majority of URLs users share in chat (articles, documentation, blog posts). JS-rendered SPAs are an edge case that can be handled by falling back to Jina Reader or Firecrawl.
+
+### Token Reduction Benchmarks
+
+| Content Type | Raw HTML Tokens | After Readability+Turndown | Reduction |
+|-------------|----------------|---------------------------|-----------|
+| Blog post | ~16,000 | ~3,150 | **80%** |
+| E-commerce page | ~40,000 | ~2,000 | **95%** |
+| News article | 15–25K | 2–5K | **75–80%** |
+| Documentation | 10–30K | 3–8K | **70–75%** |
+| Wikipedia | 20–80K | 5–20K | **60–75%** |
+
+Markdown-formatted content shows **35% better RAG accuracy** vs raw HTML.
+
+### Emerging Alternatives
+
+**ReaderLM-v2** (Jina AI, Jan 2025): 1.5B parameter model trained specifically for HTML → Markdown. Handles complex elements (code fences, nested lists, tables, LaTeX) with 512K token context. 15–20% better than GPT-4o on extraction benchmarks. Available via Jina API. Trade-off: requires model inference vs zero-cost heuristic conversion.
+
+**MinerU-HTML / Dripper** (ICLR 2026): 0.6B parameter model for semantic block classification. Reduces HTML to 22% of original tokens while preserving structure. 81.58% ROUGE-N F1 vs Readability's 64.91%. Requires running a small model — heavier infrastructure.
+
+Neither is practical for the MVP, but both indicate the direction the field is heading.
+
+---
+
+## 3. Security Considerations
+
+### SSRF (Server-Side Request Forgery)
+
+The primary risk of server-side URL fetching. Must block:
+
+- **Private IPs**: `10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`
+- **Loopback**: `127.0.0.0/8`, `::1`
+- **Link-local**: `169.254.0.0/16`, `fe80::/10`
+- **Cloud metadata endpoints**: AWS `169.254.169.254`, GCP `metadata.google.internal`, Azure `169.254.169.254`
+- **Alternative IP representations**: Octal (`0177.0.0.1`), hex (`0x7f000001`), IPv6-mapped IPv4 (`::ffff:127.0.0.1`), decimal integer (`2130706433`)
+- **URL schemes**: Only `http:` and `https:`. Block `file:`, `ftp:`, `data:`, `javascript:`, etc.
+
+### DNS Rebinding Prevention
+
+Attacker's DNS initially resolves to a public IP (passes validation), then TTL expires and resolves to `127.0.0.1` (actual request hits internal network). Mitigation requires resolving DNS *and pinning the resolved IP* for the actual request — no gap between validation and connection (TOCTOU).
+
+### Node.js SSRF Protection Libraries
+
+| Library | Weekly Downloads | DNS Rebinding | Cloud Metadata | TypeScript |
+|---------|-----------------|---------------|----------------|-----------|
+| `ssrf-agent-guard` (v1.1, Jan 2026) | New | Yes | AWS/GCP/Azure/Oracle/DO/K8s | Yes |
+| `request-filtering-agent` | ~101K | No | Partial | No |
+| `ssrf-req-filter` | ~45K | Open issue | No | No |
+
+**`ssrf-agent-guard`** is the most feature-complete pure-TypeScript option (MIT, Jan 2026):
+- Blocks private/reserved IPs + cloud metadata endpoints
+- DNS rebinding detection
+- Policy-based domain filtering (allowlists, denylists, TLD blocking)
+- Multiple modes (block/report/allow)
+- Works with axios, node-fetch, native fetch via http.Agent wrapping
+- Only 6 releases, 2 contributors — newer library, less battle-tested
+
+**`request-filtering-agent`** has the widest adoption (~101K weekly downloads) but lacks DNS rebinding protection and cloud metadata blocking.
+
+For defense-in-depth, layer: URL normalization (WHATWG URL API) + protocol restriction + DNS resolution with IP classification + redirect validation at each hop.
+
+### Response Handling
+
+| Control | Recommended Value | Rationale |
+|---------|-------------------|-----------|
+| Content-type allowlist | `text/html`, `text/plain`, `application/json`, `application/xml`, `application/pdf` | Reject binary, media, executables |
+| Response size limit | 5 MB raw | Generous for HTML; content will be compressed to markdown |
+| Redirect hops | 3–5 maximum | Validate each destination against SSRF rules |
+| Connection timeout | 5–10 seconds | Prevent hanging connections |
+| Total timeout | 15 seconds | Match existing `TOOL_EXECUTION_TIMEOUT_MS` |
+| Streaming cutoff | AbortController at size limit | Don't buffer the entire response before checking |
+
+### Rate Limiting
+
+| Dimension | Recommended Limit | Rationale |
+|-----------|-------------------|-----------|
+| Per-user per hour | 30 fetches | Prevents sustained abuse |
+| Per-conversation turn | 5 fetches | Matches existing step limits |
+| Per-domain per minute | 3 requests | Prevents hammering a single site |
+| Anonymous users | 10 fetches per day | Daily message limit is the primary control |
+
+### Legal/Ethical
+
+- **User-initiated fetches** (user shares a URL) are analogous to a browser acting on behalf of the user — distinct from autonomous crawling. ChatGPT, Perplexity, and Claude all fetch user-provided URLs without robots.txt checks.
+- **Robots.txt**: Voluntary protocol (RFC 9309). For user-initiated fetches, treat like a user agent. Not legally binding.
+- **Mitigation**: Rate limit aggressively, don't cache/redistribute content long-term, attribute sources in responses.
+- Set a descriptive `User-Agent` header (e.g., `NotAWrapper/1.0 (User-initiated content fetch)`).
+
+---
+
+## 4. Context Budget Management
+
+### Token Reduction from Extraction
+
+The extraction pipeline (Readability + Turndown) provides massive token savings:
+
+| Page Type | Raw HTML Tokens | After Extraction | Reduction |
+|-----------|----------------|-----------------|-----------|
+| Blog post | ~16,000 | ~3,150 | 80% |
+| E-commerce | ~40,000 | ~2,000 | 95% |
+| News article | 15–25K | 2–5K | 75–80% |
+
+### Token Counting
+
+| Approach | Speed | Accuracy | Portability |
+|----------|-------|----------|-------------|
+| `characters / 4` heuristic | Instant | ±15% | All models |
+| `js-tiktoken` (exact BPE) | 1,494–31,334 ops/sec | Exact for OpenAI | OpenAI only |
+| `@dqbd/tiktoken` (WASM) | 1,992 ops/sec | Exact for OpenAI | OpenAI only |
+
+**Recommendation**: Use `content.length / 4` heuristic for budget gating at fetch time. Different providers tokenize differently — the heuristic is more portable than exact counting with one tokenizer. Reserve exact counting for when approaching hard limits.
+
+English prose ratios: ~176 tokens per 1,000 characters (GPT-4o), ~185 tokens per 1,000 characters (GPT-4/cl100k).
+
+### Budget Framework
+
+For a 128K context window model:
+
+```
+┌──────────────────────────────────────────────────┐
+│         USABLE CONTEXT: ~100K tokens             │
+│       (128K window - 28K safety margin)          │
+├──────────────────────────────────────────────────┤
+│                                                  │
+│  Fixed Costs:                           7–15K    │
+│  ├── System prompt                      2–5K     │
+│  ├── Tool definitions                   2–5K     │
+│  └── Static instructions                3–5K     │
+│                                                  │
+│  Variable Costs:                        50–70K   │
+│  ├── Conversation history               10–30K   │
+│  ├── Fetched web content                20–40K   │
+│  └── Tool results (non-web)             5–10K    │
+│                                                  │
+│  Reserved:                              8–15K    │
+│  ├── Response generation                4–8K     │
+│  └── Reasoning overhead                 4–8K     │
+│                                                  │
+└──────────────────────────────────────────────────┘
+```
+
+**Allocation rule**: Fetched content budget = `min(user_limit, model_context_window * 0.25)`.
+
+20–25% of the model's practical context limit is the sweet spot — enough to be useful, conservative enough to leave room for conversation history and response generation.
+
+### Model-Aware Defaults
+
+| Model Family | Context Window | Practical Limit (~65%) | Content Budget (25%) |
+|-------------|---------------|----------------------|---------------------|
+| GPT-4o / GPT-5 | 128K | ~83K | 20K |
+| Claude Sonnet/Opus | 200K | ~130K | 30K |
+| Gemini 1.5 Pro | 1M | ~650K | 100K |
+| Small models (32K) | 32K | ~21K | 5K |
+
+Models claiming large context windows become unreliable well before the advertised limit. Performance degrades at ~65% of advertised capacity with "sudden performance drops rather than gradual degradation."
+
+### Smart Truncation
+
+Truncation priority order:
+1. Section/heading boundaries (best)
+2. Paragraph boundaries (good)
+3. Sentence boundaries (acceptable)
+4. Word boundaries (minimum viable)
+
+Never truncate at arbitrary character offsets — boundary-aware truncation preserves significantly more useful information at the same token count.
+
+### Monitoring Thresholds
+
+| Context Occupancy | Action |
+|-------------------|--------|
+| < 70% | Normal operation |
+| 70% | Soft cap — trigger history summarization |
+| 85–90% | Hard cap — refuse new tool calls or drop low-value chunks |
+| 95%+ | Emergency compression |
+
+---
+
+## 5. Architecture Placement
+
+### Options Analysis
+
+| Approach | Description | Pros | Cons |
+|----------|-------------|------|------|
+| **A. Layer 2 standalone tool** | Add `content_extract` alongside `web_search` in `lib/tools/third-party.ts` | Minimal change, follows existing patterns, model decides when to use it | New tool name in all providers |
+| **B. Search enhancement** | Automatically fetch full content for top search results | Better UX for "search and read" | Wastes tokens/money on results the model doesn't need |
+| **C. Provider-native** | Use each provider's own fetch tool where available | Highest quality (Claude's dynamic filtering) | Only Anthropic has this; inconsistent across providers |
+| **D. MCP server** | Optional MCP server users install | Zero default footprint | Requires opt-in configuration, not discoverable |
+| **E. Separate "browsing" mode** | Toggle between search and browse modes | Clear UX intent | Complicates the interface, ChatGPT-style complexity |
+
+### Recommendation: A (Layer 2 Standalone) with Exa as MVP Backend
+
+**Rationale**: The existing Phase 7.7 plan in `.agents/plans/phase-7-future-tool-integrations.md` already describes this approach with Exa's `getContents()`. It requires:
+- Zero new dependencies (Exa SDK already installed)
+- Minimal code change (~30 lines in `lib/tools/third-party.ts`)
+- Same API key as search (unified BYOK billing)
+- $1/1K pages (cheaper than search at $5/1K)
+
+**Enhancement path** (post-MVP):
+1. **MVP**: Exa `getContents()` — zero new deps, immediate value
+2. **V2**: Self-hosted pipeline (`fetch` + `@mozilla/readability` + `turndown`) — zero per-request cost, better for high-volume
+3. **V3**: Specialized extractors (YouTube transcripts, GitHub, PDFs) — highest quality per content type
+4. **V4**: Jina Reader fallback for JS-rendered pages — covers the SPA edge case
+
+### Access Control
+
+| User Type | Recommendation | Rationale |
+|-----------|----------------|-----------|
+| Authenticated | Full access, 30 fetches/hour | Primary user base |
+| Anonymous | Allowed, 10 fetches/day | At $1/1K pages, worst case ~$0.01/day per anonymous user. Daily message limit is the real control. |
+| BYOK | Full access, their own API costs | Same Exa key handles search and extraction |
+
+### Tool Decision: Model-Driven (`toolChoice: "auto"`)
+
+The model should decide when to use `content_extract` based on user intent. No proactive URL extraction — URLs appear in code snippets, reference links, and other contexts where fetching would be wrong. Claude, GPT-5, and Gemini all demonstrate good judgment about when URL content is needed vs. when the URL is just a reference.
+
+---
+
+## 6. Specialized Content Types
+
+### Value vs Complexity Assessment
+
+| Extractor | Value | Complexity | Verdict | Priority |
+|-----------|-------|------------|---------|----------|
+| YouTube transcripts | Very High — unique content AI can't get otherwise | Low (one npm package) | **Must have** | V3 |
+| PDF | High — common link type, needs specialized parsing | Low (`unpdf`, one function) | **Must have** | V3 |
+| GitHub | High — structured data (issues, code, README) | Low-Medium (Octokit, REST API) | **Must have** | V3 |
+| Wikipedia | Medium — cleaner than generic scraping | Very Low (REST API, no auth) | **Worth it** | V3 |
+| Twitter/X | Medium — tweets are short | Low (oEmbed, free) but unreliable for threads | **Worth it** | V4 |
+
+### YouTube
+
+Neither ChatGPT nor Claude can reliably extract YouTube transcripts today. This is a real differentiation opportunity.
+
+| Library | Version | Weekly Downloads | Notes |
+|---------|---------|-----------------|-------|
+| `youtube-transcript` | 1.2.1 | 135.6K | Most popular, zero deps, MIT |
+| `youtube-transcript-plus` | 1.2.0 | Growing | Fork with proxy/custom-fetch support (Feb 2026) |
+
+```typescript
+import { YoutubeTranscript } from 'youtube-transcript';
+const transcript = await YoutubeTranscript.fetchTranscript('dQw4w9WgXcQ');
+// Returns: [{ text: string, duration: number, offset: number, lang?: string }]
+```
+
+Uses unofficial YouTube endpoints (timedtext API). Technically violates YouTube ToS. Widely tolerated at low volume with rate limiting and caching. The official YouTube Data API v3 cannot download transcript text.
+
+### PDF
+
+| Library | Weekly Downloads | Serverless | Notes |
+|---------|-----------------|------------|-------|
+| `unpdf` | 266.8K | Yes | Modern, zero deps, recommended |
+| `pdf-parse` | ~2.2M | No | Most downloaded but unmaintained |
+| `pdfjs-dist` | High | No | Low-level, maximum control |
+
+**`unpdf`** is the clear winner — zero dependencies, works in Node.js/Bun/Deno/Cloudflare Workers, bundles PDF.js v5.4:
+
+```typescript
+import { extractText, getDocumentProxy } from 'unpdf';
+const buffer = await fetch(pdfUrl).then(r => r.arrayBuffer());
+const pdf = await getDocumentProxy(new Uint8Array(buffer));
+const { totalPages, text } = await extractText(pdf, { mergePages: true });
+```
+
+Quality: excellent for text, poor for tables (loses structure), no OCR for scanned PDFs. A 100-page PDF ≈ 30–50K tokens.
+
+### GitHub
+
+GitHub REST API with `@octokit/rest` (~3.5M weekly downloads). Rate limits: 60 req/hour unauthenticated, 5,000 req/hour authenticated. Standard pattern for "summarize this repo": fetch README + repo metadata + directory tree + package.json.
+
+URL detection covers:
+- `github.com/{owner}/{repo}` → README + metadata
+- `github.com/{owner}/{repo}/issues/{number}` → issue body + comments
+- `github.com/{owner}/{repo}/pull/{number}` → PR description + diff stats
+- `github.com/{owner}/{repo}/blob/{branch}/{path}` → file content
+
+### Wikipedia
+
+REST API at `en.wikipedia.org/api/rest_v1/` provides clean endpoints. No auth required. `wikipedia` npm package (v2.1.2) provides `page.summary()` and `page.content()`. `wtf_wikipedia` (v10.4.1, 6.7K weekly downloads) parses wikitext into structured sections.
+
+### Architecture: URL Router Pattern
+
+```
+User shares URL → URL Detector (regex) → Specialized Extractor or Generic Fallback
+                                          ↓
+                                   Normalized Output: { type, title, content (markdown), metadata, sourceUrl, tokenEstimate }
+```
+
+All extractors return the same `ExtractedContent` shape, keeping downstream LLM prompt logic consistent regardless of source type. The generic fallback (Exa `getContents` for MVP, Readability+Turndown for V2) handles the long tail.
+
+---
+
+## 7. Hosted Services and APIs
+
+### Comparison Matrix
+
+| Service | Best For | JS Render | Pricing | Self-Host | Latency |
+|---------|----------|-----------|---------|-----------|---------|
+| **Exa `getContents`** | Search + extraction pipeline | Yes (internal) | $1/1K pages | No | Fast (cached) |
+| **Jina Reader** (`r.jina.ai`) | Simple URL → Markdown | Yes (headless Chrome) | Free 1M tokens, then token-based | Yes (Apache 2.0) | Fast |
+| **Tavily Extract** (`@tavily/ai-sdk`) | AI SDK integration | Unknown | Credit-based | No | Medium |
+| **Firecrawl** | Structured extraction + crawling | Yes (Chromium) | Free 500 credits, $16–$599/mo | Yes (AGPL, Docker) | Medium |
+| **Browserless** | Full browser automation | Yes | Free 1K units, $200–$500/mo | Yes | Variable |
+
+### Exa `getContents()` — MVP Backend
+
+Already integrated via `exa-js`. `getContents()` fetches and extracts content from specific URLs:
+
+```typescript
+const results = await exa.getContents(urls, {
+  text: { maxCharacters: 10000 },
+});
+```
+
+- **$1/1K pages** (vs $5/1K search requests)
+- Returns from Exa's cache (instant), falls back to live crawl
+- Content extraction options: text (markdown), highlights (AI excerpts), summary (LLM-generated)
+- Same API key as search — unified BYOK
+- No new dependency
+
+### Jina Reader — JS-Rendered Fallback
+
+Prefix any URL with `https://r.jina.ai/` → returns clean Markdown. Uses Readability + Turndown internally, with Puppeteer for JS rendering. Processes 100 billion tokens daily. Open source (Apache 2.0).
+
+- Free tier: ~1M tokens (IP-based rate limits)
+- Supports CSS selectors, image captions, PDF reading
+- ReaderLM-v2 option for higher quality (3x token cost)
+- No npm package needed — simple HTTP API
+
+### Tavily Extract — AI SDK Native
+
+The `@tavily/ai-sdk` (v0.4.1) provides a `tavilyExtract()` tool that plugs directly into Vercel AI SDK's `tools` parameter:
+
+```typescript
+import { tavilyExtract } from "@tavily/ai-sdk";
+tools: { extract: tavilyExtract({ format: "markdown" }) }
+```
+
+Cleanest integration for AI SDK, but adds a new dependency and API key. ~4.6K weekly downloads.
+
+### Self-Hosted Pipeline — Long-Term
+
+For zero per-request cost:
+
+```typescript
+import { JSDOM } from 'jsdom';
+import { Readability } from '@mozilla/readability';
+import TurndownService from 'turndown';
+
+async function fetchAndExtract(url: string): Promise<string> {
+  const response = await fetch(url, { signal: AbortSignal.timeout(15000) });
+  const html = await response.text();
+  const dom = new JSDOM(html, { url });
+  const article = new Readability(dom.window.document).parse();
+  if (!article) return '';
+  return new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' })
+    .turndown(article.content);
+}
+```
+
+Dependencies: `jsdom` (~14M/wk), `@mozilla/readability` (~500K/wk), `turndown` (~2.37M/wk). Total: ~3 new dependencies. Full pipeline: ~300–500ms per page.
+
+---
+
+## 8. Caching Strategy
+
+### Should Fetched Content Be Cached?
+
+**Yes, with short TTL and per-user isolation.**
+
+Web page content changes. Caching too aggressively returns stale data; not caching at all wastes API credits and increases latency for repeated URLs (common in multi-turn conversations about the same page).
+
+### Recommended Approach
+
+| Dimension | Recommendation | Rationale |
+|-----------|----------------|-----------|
+| **TTL** | 15–30 minutes | Long enough for multi-turn conversations about the same URL; short enough that content stays fresh |
+| **Scope** | Shared cache (same URL = same content) | No privacy concern — content is publicly accessible by URL. Shared cache maximizes hit rate. |
+| **Storage** | In-memory (Map or LRU cache) for MVP | No infrastructure dependency. Process-level cache is fine for single-server deployments. |
+| **Key** | Normalized URL (strip tracking params, normalize case) | Prevent duplicate fetches for equivalent URLs |
+| **Eviction** | LRU with 500-entry cap | Prevents unbounded memory growth |
+| **Privacy** | Don't log URLs fetched | URLs can reveal user interests and browsing patterns |
+
+### Implementation Sketch
+
+```typescript
+const cache = new Map<string, { content: string; fetchedAt: number }>();
+const CACHE_TTL_MS = 15 * 60 * 1000; // 15 minutes
+const MAX_ENTRIES = 500;
+
+function getCached(url: string): string | null {
+  const entry = cache.get(normalizeUrl(url));
+  if (!entry) return null;
+  if (Date.now() - entry.fetchedAt > CACHE_TTL_MS) {
+    cache.delete(normalizeUrl(url));
+    return null;
+  }
+  return entry.content;
+}
+```
+
+### What Not To Cache
+
+- URLs with authentication tokens or session-specific content
+- Content fetched via POST or non-idempotent requests
+- Error responses (cache misses, not failures)
+
+### Scaling Beyond In-Memory
+
+If the application scales to multiple server instances, migrate to Redis or Convex-backed cache. Redis LangCache provides semantic caching (matches semantically similar queries to cached results) — relevant for search results but overkill for URL content caching where the key is the exact URL.
+
+---
+
+## 9. Recommendations
+
+### Progressive Enhancement Path
+
+| Phase | What | Dependencies | Per-Request Cost | Coverage |
+|-------|------|-------------|-----------------|---------|
+| **MVP** | Exa `getContents()` tool in Layer 2 | None (Exa already installed) | $1/1K pages | Static HTML, cached pages |
+| **V2** | Self-hosted Readability + Turndown pipeline | `jsdom`, `@mozilla/readability`, `turndown` | $0 | Static HTML (no JS rendering) |
+| **V3** | Specialized extractors (YouTube, PDF, GitHub, Wikipedia) | `youtube-transcript`, `unpdf`, `@octokit/rest`, `wikipedia` | $0 (mostly) | Structured content from known platforms |
+| **V4** | Jina Reader fallback for JS-rendered pages | None (HTTP API) | Token-based | SPAs, JS-rendered content |
+| **V5** | SSRF hardening + production rate limiting | `ssrf-agent-guard` or `request-filtering-agent` | $0 | Security |
+
+### MVP Specification (Phase 7.7 Alignment)
+
+The MVP aligns with the existing plan in `.agents/plans/phase-7-future-tool-integrations.md` Sub-Phase 7.7:
+
+**Tool name**: `content_extract`
+**Location**: `lib/tools/third-party.ts` (alongside `web_search`)
+**Backend**: Exa `getContents()`
+**Input**: `{ urls: z.array(z.string().url()).min(1).max(5) }`
+**Output**: `{ ok, data: [{ url, title, content }], error, meta }`
+**Content limit**: 10,000 characters per URL (≈2,500 tokens)
+**Cost**: $1/1K pages (`estimatedCostPer1k: 1`)
+**Timeout**: 15s (existing `TOOL_EXECUTION_TIMEOUT_MS`)
+**Access**: All users (authenticated and anonymous)
+**Tool metadata**: `{ displayName: "Read Page", source: "third-party", serviceName: "Exa", readOnly: true }`
+
+### Key Design Decisions
+
+| Decision | Choice | Rationale |
+|----------|--------|-----------|
+| Model triggers fetch | `toolChoice: "auto"` | Model judges intent better than URL regex |
+| Content format | Markdown | Token-efficient, preserves structure, industry standard |
+| Content budget default | 10K chars (MVP), model-aware in V2 | Conservative start; can increase based on usage data |
+| Caching | In-memory LRU, 15-min TTL | Simple, effective for multi-turn conversations |
+| SSRF protection | `ssrf-agent-guard` (V5) | Most complete TypeScript library; not needed for MVP since Exa handles fetching |
+| Specialized extractors | V3 (after generic works) | YouTube transcripts are the highest-value differentiator |
+
+### Risks and Mitigations
+
+| Risk | Impact | Mitigation |
+|------|--------|------------|
+| Exa extraction quality varies by page | Medium — some pages return poor content | V2 self-hosted pipeline as fallback; Jina Reader for JS-rendered pages |
+| Exa service outage | High — tool becomes unavailable | Timeout + graceful error messaging; V2 self-hosted pipeline as backup |
+| SSRF vulnerability (V2 self-hosted) | Critical — server-side URL fetching | Defer self-hosted fetching to V5 with proper SSRF protection |
+| Context budget overflow | Medium — degraded model performance | `chars / 4` budget gating; smart truncation at paragraph boundaries |
+| YouTube scraping breaks | Medium — unofficial API can change | Cache transcripts; degrade gracefully to generic page scraping |
+| Cost at scale | Low — $1/1K pages is cheap | BYOK passes cost to user; platform key has existing billing controls |
+
+### Dependencies Summary
+
+| Phase | New Dependencies | Bundle Impact |
+|-------|-----------------|---------------|
+| MVP | None | None |
+| V2 | `jsdom`, `@mozilla/readability`, `turndown` | ~2 MB (server-only, no client bundle impact) |
+| V3 | `youtube-transcript`, `unpdf`, `wikipedia` | ~200 KB (server-only) |
+| V4 | None (HTTP API to Jina) | None |
+| V5 | `ssrf-agent-guard` | ~50 KB (server-only) |
+
+---
+
+## Appendix A: Tavily as Alternative to Exa
+
+The `@tavily/ai-sdk` (v0.4.1) provides `tavilyExtract()` that plugs directly into AI SDK's tool system. It handles URL extraction with configurable format (markdown/text) and depth (basic/advanced). Extract API supports up to 20 URLs simultaneously.
+
+**Comparison with Exa `getContents()`**:
+
+| Dimension | Exa | Tavily |
+|-----------|-----|--------|
+| Already integrated | Yes (`exa-js`) | No (new dependency) |
+| AI SDK plugin | Via `@exalabs/ai-sdk` (not used due to BYOK) | Via `@tavily/ai-sdk` |
+| Pricing | $1/1K pages | Credit-based (varies) |
+| BYOK support | Yes (explicit key constructor) | Yes (API key param) |
+| Batch size | Unlimited | 20 URLs |
+| Output format | Text, highlights, summary | Markdown, text |
+
+**Verdict**: Exa is the correct MVP choice — already integrated, zero new dependencies, cheaper, proven in the codebase.
+
+## Appendix B: Full Library Reference
+
+| Library | npm Package | Version | Weekly Downloads | License |
+|---------|-------------|---------|-----------------|---------|
+| Readability | `@mozilla/readability` | 0.6.0 | ~500K | Apache 2.0 |
+| Turndown | `turndown` | 7.2.0 | ~2.37M | MIT |
+| jsdom | `jsdom` | latest | ~14M | MIT |
+| node-html-markdown | `node-html-markdown` | 1.3.0 | ~328K | MIT |
+| unpdf | `unpdf` | latest | 266.8K | MIT |
+| youtube-transcript | `youtube-transcript` | 1.2.1 | 135.6K | MIT |
+| youtube-transcript-plus | `youtube-transcript-plus` | 1.2.0 | Growing | MIT |
+| wikipedia | `wikipedia` | 2.1.2 | Moderate | MIT |
+| wtf_wikipedia | `wtf_wikipedia` | 10.4.1 | 6.7K | MIT |
+| ssrf-agent-guard | `ssrf-agent-guard` | 1.1 | New | MIT |
+| request-filtering-agent | `request-filtering-agent` | latest | ~101K | MIT |
+| Tavily AI SDK | `@tavily/ai-sdk` | 0.4.1 | ~4.6K | MIT |
+| Exa SDK | `exa-js` | latest | Moderate | MIT |
diff --git a/.agents/workflows/correctness-decision-workflow.md b/.agents/workflows/correctness-decision-workflow.md
new file mode 100644
index 00000000..17535ebb
--- /dev/null
+++ b/.agents/workflows/correctness-decision-workflow.md
@@ -0,0 +1,64 @@
+# Workflow: Correctness-First Decision
+
+Use this workflow for medium/high-risk tasks to prioritize robust, industry-standard solutions over quick fixes.
+
+## Goal
+
+- Preserve high implementation quality with explicit design decisions.
+- Keep always-on context minimal by loading this workflow only when needed.
+
+## Step 1: Risk Triage
+
+Classify the task before coding:
+
+- **Low risk:** localized refactor, copy changes, non-behavioral cleanup.
+- **Medium risk:** behavior changes in one subsystem, moderate user impact.
+- **High risk:** auth, schema/data model, API contracts, persistence, concurrency, migrations, billing/payments, security-critical paths.
+
+If the task is medium/high risk, continue with this workflow.
+
+## Step 2: Evaluate Approaches (Short ADR)
+
+Document a brief decision note:
+
+1. Problem and constraints.
+2. Candidate approaches (2-3 options).
+3. Chosen approach and why it is safer/clearer long-term.
+4. Why alternatives were rejected.
+5. Failure modes and rollback plan.
+
+Keep it concise. This is for decision quality, not long-form documentation.
+
+## Step 3: Industry-Standard Gate
+
+Before introducing new patterns or dependencies, verify:
+
+- Existing project pattern cannot reasonably solve the problem.
+- Proposed approach is mature and actively maintained.
+- Security and operational implications are acceptable.
+- Migration and maintenance cost are understood.
+
+If the new dependency is optional or convenience-only, prefer existing patterns.
+
+## Step 4: Implementation Discipline
+
+- Implement smallest change that satisfies the specification.
+- Preserve existing architecture boundaries.
+- Avoid speculative abstractions.
+- Add concise comments only where logic is non-obvious.
+
+## Step 5: Risk-Scaled Validation
+
+- **Low risk:** targeted tests/checks for touched behavior.
+- **Medium risk:** targeted + affected integration path tests.
+- **High risk:** targeted + integration + regression/failure-mode checks.
+
+Compilation, lint, and type checks are necessary but not sufficient for medium/high risk.
+
+## Step 6: Final Review Checklist
+
+- Chosen approach addresses root cause.
+- Trade-offs were considered, not guessed.
+- Changes match project patterns unless deviation is justified.
+- Validation depth matches risk tier.
+- Residual risks are explicitly noted in handoff/report.
diff --git a/.env.example b/.env.example
index b4ff83d3..5592b340 100644
--- a/.env.example
+++ b/.env.example
@@ -53,6 +53,9 @@ OPENAI_API_KEY=
 
 # Anthropic (Claude) - https://console.anthropic.com/
 ANTHROPIC_API_KEY=
+# Set to "false" to disable the token-efficient-tools beta header for Anthropic.
+# Used for A/B benchmarking tool token usage. Default: enabled.
+# ANTHROPIC_TOKEN_EFFICIENT_TOOLS=false
 
 # Google (Gemini) - https://aistudio.google.com/apikey
 GOOGLE_GENERATIVE_AI_API_KEY=
diff --git a/AGENTS.md b/AGENTS.md
index ff4f1df1..2f9b5f13 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,142 +1,70 @@
 # Not A Wrapper
 
-Open-source multi-AI chat application with unified model interface. Supports 100+ models across 8 providers with multi-model comparison, BYOK, and local model support.
-
-## Tech Stack
-
-| Layer | Technology |
-|-------|------------|
-| Framework | Next.js 16 (App Router), React 19, TypeScript |
-| Database | Convex (reactive DB + built-in RAG) |
-| Auth | Clerk |
-| Payments | Flowglad |
-| AI | Vercel AI SDK → Multi-provider (OpenAI, Claude, Gemini, etc.) |
-| State | Zustand + TanStack Query |
-| UI | Shadcn/Base UI + Tailwind 4 |
-
-## Commands
-
-```bash
-bun install        # Install deps
-bun run dev        # Dev server (:3000)
-bun run dev:clean  # Dev server with fresh .next cache
-bun run lint       # ESLint
-bun run typecheck  # tsc --noEmit
-bun run build      # Production build
-bun run test       # Vitest (critical paths)
-```
-
-## Context System
-
-This project uses a structured context system for AI assistants:
-
-| Location | Purpose | When Loaded |
-|----------|---------|-------------|
-| `AGENTS.md` | Quick reference (this file) | Always |
-| `.cursor/rules/` | Cursor-specific patterns | Auto by Cursor |
-| `.agents/context/` | Domain knowledge & references | On-demand |
-| `.agents/context/glossary.md` | Domain terminology | On-demand |
-| `.agents/research/` | Research, evaluations, analyses | On-demand |
-| `.agents/troubleshooting/` | Known issues & fixes | On-demand |
-| `.agents/design/` | Design references & UI research | On-demand |
-| `.agents/plans/` | Implementation plans | On-demand |
-| `.agents/skills/` | Multi-step task guides | On-demand |
-| `.agents/workflows/` | Development procedures | On-demand |
-| `.agents/archive/` | Superseded documents | On-demand |
+Open-source multi-AI chat app with a unified model interface across providers.
 
-### Key Skills
+## Primary Objective
 
-**Load skills BEFORE starting work** when a task matches the trigger.
+Deliver correct, secure, maintainable changes with minimal, focused diffs.
 
-| Skill | Use When |
-|-------|----------|
-| `add-ai-provider` | Integrating new AI service |
-| `add-model` | Adding model to existing provider |
-| `convex-function` | Creating database functions |
+## Context File Contract (Paper-Aligned)
 
-> Skills contain checklists and patterns that prevent common mistakes. Load via `@.agents/skills/[name]/SKILL.md`
+- Keep this file minimal and high-signal.
+- Include only mandatory constraints and critical patterns.
+- Avoid broad repository overviews and generic checklists.
+- Load deeper guidance from `.agents/` only when task-relevant.
 
-### Workflows
+## Implementation Philosophy (SHOULD)
 
-| Workflow | Use When |
-|----------|----------|
-| `new-feature.md` | Implementing new features |
-| `debugging.md` | Troubleshooting issues |
-| `release.md` | Releasing new versions |
-
-## Directory Structure
-
-```
-app/                    # Next.js App Router
-├── api/               # API routes (streaming)
-├── auth/              # Auth pages/actions
-├── c/[chatId]/        # Chat pages
-├── p/[projectId]/     # Project pages
-├── share/             # Public share pages
-└── components/chat/   # Chat UI
-
-lib/                    # Shared utilities
-├── chat-store/        # Chat state
-├── config.ts          # Constants
-├── models/            # AI model definitions
-└── openproviders/     # AI provider abstraction
-
-components/            # Shadcn UI components (Base UI primitives)
-convex/               # Convex DB schema & functions
-
-.agents/               # AI context & knowledge base
-├── context/          # Domain knowledge & references
-├── research/         # Research & evaluations
-├── troubleshooting/  # Known issues & fixes
-├── design/           # Design references & UI research
-├── plans/            # Implementation plans
-├── skills/           # Multi-step task guides
-├── workflows/        # Development procedures
-└── archive/          # Superseded documents
-
-.cursor/rules/         # Cursor-specific rules
-```
+- Prefer well-researched, industry-standard solutions over quick fixes.
+- Extend existing project patterns instead of introducing parallel systems.
+- Fix root causes instead of symptoms.
+- Optimize for maintainability and clarity over short-term speed.
+- If unsure, consult `.agents/research/` and document non-trivial trade-offs.
 
-## Gold Standard Examples
+## Correctness-First Escalation (MUST)
 
-| Pattern | File |
-|---------|------|
-| API Route | `app/api/chat/route.ts` |
-| Provider History Adapter Registry | `app/api/chat/adapters/index.ts` |
-| Custom Hook | `app/components/chat/use-chat-core.ts` |
-| Context Provider | `lib/chat-store/chats/provider.tsx` |
-| Component | `app/components/chat/chat.tsx` |
+- Use risk-based rigor: keep low-risk tasks lightweight, increase rigor for medium/high-risk tasks.
+- Medium/high-risk changes require a brief approach decision before coding (options, trade-offs, chosen approach).
+- High-risk triggers include: auth, schema/data model, API contracts, persistence, concurrency, migrations, billing/payments, and security-critical paths.
+- Introducing a new dependency or architectural pattern requires explicit justification and at least one alternative considered.
+- Validation depth must scale with risk; do not treat successful compilation as sufficient evidence of correctness.
+- For the detailed process, load `.agents/workflows/correctness-decision-workflow.md` on demand.
 
-## Implementation Philosophy
+## Non-Negotiable Rules
 
-**Prefer well-researched, industry-standard solutions over quick fixes.**
+### Security (MUST)
 
-When implementing features or fixing bugs:
+- Never read/write `.env*` files.
+- Never log or expose secrets, tokens, or credentials.
+- Treat BYOK/API key data as encrypted-at-rest.
 
-1. **Research first** — Understand the problem domain and established solutions before writing code
-2. **Use proven patterns** — Prefer battle-tested approaches (design patterns, established libraries, documented techniques) over novel or ad-hoc solutions
-3. **Optimize for maintainability** — Long-term code health over short-term velocity
-4. **Extend existing conventions** — Follow and build upon the codebase's established patterns
-5. **Evaluate trade-offs** — When multiple approaches exist, analyze pros/cons before committing
+### Code Quality (MUST)
 
-> When unsure, consult `.agents/research/` for prior analysis or create a new research document before implementing.
+- No `// @ts-ignore`.
+- No lint-rule bypassing (`eslint-disable`) without explicit documented approval.
+- Do not downgrade or disable checks to "make it pass."
+- Prefer source fixes over workarounds.
 
-## Prompt Delivery Default
+### Git Safety (MUST)
 
-When the user asks to "create a prompt" (or similar), return the prompt directly in chat.
-Do not create a markdown file unless the user explicitly asks for a file.
-If ambiguous, prefer chat output.
+- Never create branches unless explicitly asked.
+- Never force-push to shared branches.
+- Avoid destructive git commands unless explicitly requested.
 
-## No Timeline Estimates
+## Ask Before Making These Changes (MUST)
 
-**Never include time estimates, durations, or effort assessments** in plans, summaries, or implementation outputs. This includes phrases like "~30 minutes", "2-3 hours", "Phase 1 (Day 1)", "Quick win", or any similar timeline/effort language. AI-generated timeline estimates are unreliable and misleading. Only include timeline or effort information if the user explicitly requests it.
+- Adding dependencies (`bun add ...`)
+- Modifying `package.json`, `tsconfig*`, `next.config.*`
+- Editing auth-critical paths (`app/auth/`, `middleware.ts`)
+- Changing DB schema (`convex/schema.ts`)
+- Changing CI/CD (`.github/workflows/`)
+- Deleting files
 
-## Critical Patterns
+## Required Project Patterns (MUST When Applicable)
 
-### Streaming Responses (MUST)
+### Streaming Responses (AI SDK v6)
 
 ```typescript
-// ALWAYS use toUIMessageStreamResponse for AI chat (AI SDK v6)
 return result.toUIMessageStreamResponse({
   sendReasoning: true,
   sendSources: true,
@@ -144,108 +72,54 @@ return result.toUIMessageStreamResponse({
 })
 ```
 
-### Convex Auth Pattern (MUST)
+### Convex Auth Pattern
 
 ```typescript
-// All mutations modifying user data:
 const identity = await ctx.auth.getUserIdentity()
 if (!identity) throw new Error("Not authenticated")
-// ... lookup user, verify ownership, then operate
+// verify ownership before user-scoped mutations
 ```
 
-### Optimistic Updates
+### Optimistic Update Pattern
 
 ```typescript
-// Store previous → Update optimistic → Rollback on error
 let previous = null
-setState((prev) => { previous = prev; return updated })
-try { await mutation() } 
-catch { if (previous) setState(previous) }
-```
-
-## AI Agent Permissions
-
-### ✅ Allowed
-
-- Read any source file
-- Run: `dev`, `build`, `lint`, `typecheck`, `test`
-- Create/edit in: `app/`, `lib/`, `components/`, `hooks/`
-- Create/edit documentation in: `.agents/` (follow `.cursor/rules/070-documentation.mdc`)
-
-### ⚠️ Ask First
-
-- `bun add <package>`
-- Modify: `package.json`, `tsconfig.json`, `next.config.*`
-- Git operations
-- Auth logic (`app/auth/`, `middleware.ts`)
-- Delete files
-- DB schema (`convex/schema.ts`)
-- CI/CD (`.github/workflows/`)
-
-### 🚫 Forbidden
-
-- **Creating git branches** — NEVER create new branches unless the user explicitly asks for a branch to be created. Implementation plans, feature work, and all other tasks must be done on the current branch. Branch creation requires explicit user instruction.
-- Read/write `.env*` files
-- Force push or commit secrets
-- `// @ts-ignore` (never acceptable)
-- `eslint-disable` without documented reason
-- Disabling lint rules to bypass errors
-
-## Security
-
-**Never log:** OAuth tokens, API keys, credentials, session tokens
-
-**Encrypt at rest:** User-provided API keys (BYOK) via AES-256-GCM
-
-**Rate limiting:** Check BEFORE calling `streamText()`
-
-## Key Terminology
-
-> Full glossary: `.agents/context/glossary.md`
-
-| Term | Meaning |
-|------|---------|
-| Model | Config object, ID string, or SDK instance (context-dependent) |
-| providerId | Internal ID for API key lookups (`"anthropic"`) |
-| baseProviderId | AI SDK identifier (`"claude"`) |
-| parts | AI SDK message content array (text, tools, reasoning) |
-| BYOK | Bring Your Own Key |
-
-## Environment Variables
-
-```bash
-# Required
-NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY=
-CLERK_SECRET_KEY=
-CONVEX_DEPLOYMENT=
-NEXT_PUBLIC_CONVEX_URL=
-CSRF_SECRET=
-ENCRYPTION_KEY=  # Must be 32 bytes base64
-
-# AI Providers (at least one)
-ANTHROPIC_API_KEY=
-OPENAI_API_KEY=
+setState((prev) => {
+  previous = prev
+  return updated
+})
+try {
+  await mutation()
+} catch {
+  if (previous) setState(previous)
+}
 ```
 
-See `.env.example` for complete documentation.
-
-## Development Workflow
+## Execution Defaults (SHOULD)
 
-Four-phase cycle: **Research → Plan → Code & Verify → Commit**
+1. Gather only the context needed for the current task.
+2. Plan small, testable edits.
+3. Implement focused changes.
+4. Run only relevant checks (`lint`, `typecheck`, targeted tests).
+5. Report key trade-offs and residual risks.
 
-Use `ultrathink` for complex architectural decisions.
+## On-Demand Context
 
-See `.agents/workflows/development-cycle.md` for details.
+Load only when needed:
 
-## Pull Requests
+- `.agents/context/`
+- `.agents/skills/`
+- `.agents/workflows/`
+- `.agents/troubleshooting/`
+- `.agents/context/glossary.md`
 
-When creating a pull request:
+## Output Preferences (SHOULD)
 
-1. **Always fetch first** — Run `git fetch origin` before comparing branches
-2. **Compare against remote** — Use `origin/main` (not local `main`) for diffs and commit logs. Local `main` may be stale.
-3. **Verify commit count** — Run `git log origin/main..HEAD --oneline` and confirm the number matches what GitHub will show
-4. **Scope the description** — The PR body must reflect only the commits unique to the branch, not the full history since an outdated local ref
+- If asked to create a prompt, return it directly in chat unless a file is explicitly requested.
+- Do not include timeline or effort estimates unless explicitly requested.
 
----
+## Pull Request Baseline (SHOULD When Preparing PRs)
 
-*~200 lines. For detailed patterns, see `.cursor/rules/` and `.agents/skills/`.*
+1. Run `git fetch origin` before branch comparisons.
+2. Diff and log against `origin/main` (not local `main`).
+3. Scope PR descriptions to commits in `origin/main..HEAD`.
diff --git a/CLAUDE.md b/CLAUDE.md
index 9954c16f..1fcc6bb5 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,226 +1,21 @@
-# Claude-Specific Context
+# Claude Overlay (Minimal)
 
-This file contains Claude-specific behaviors, preferences, and context for the Not A Wrapper project.
+Claude-specific guidance for this repository. Universal rules live in `AGENTS.md`.
 
-> See `@AGENTS.md` for universal guidelines that apply to all AI agents.
+## First Principle
 
-## Claude Preferences
+Follow `AGENTS.md` as the source of truth for implementation philosophy, safety, and quality.
 
-### Thinking Mode
-- Use **extended thinking** for complex architectural decisions
-- Use `ultrathink` trigger for multi-step refactoring or debugging sessions
-- Standard thinking is fine for simple edits and additions
+## Claude-Specific Deltas (Only)
 
-### Response Style
-- Be concise; avoid over-explaining obvious code
-- Use code references (`startLine:endLine:filepath`) when discussing existing code
-- Prefer showing small, focused diffs over full file rewrites
-- **Never include timeline or effort estimates** (e.g., "~30 min", "2 hours", "Day 1") unless the user explicitly asks for them — AI time estimates are unreliable
+- Use parallel tool calls when operations are independent.
+- Read only task-relevant files; avoid broad exploration by default.
+- If a task is medium/high risk, load `.agents/workflows/correctness-decision-workflow.md` before implementation.
+- After substantive edits, run relevant validation (`lint`, `typecheck`, targeted tests).
+- Be concise and direct.
+- Do not provide timeline or effort estimates unless explicitly requested.
 
-### Tool Usage
-- **Maximize parallel tool calls** when operations are independent
-- Read multiple files simultaneously when exploring a feature
-- Run lint/typecheck after edits to catch issues early
+## Notes
 
-## Project-Specific Behaviors
-
-### Prompt Delivery Default
-- When the user asks to "create a prompt" (or similar), return the prompt directly in chat
-- Do not create a markdown file unless the user explicitly asks for a file
-- If ambiguous, prefer chat output
-
-### When Working on Chat Features
-- Reference `app/components/chat/use-chat-core.ts` for hook patterns
-- Follow optimistic update pattern from `lib/chat-store/chats/provider.tsx`
-- Streaming responses use Vercel AI SDK patterns
-
-### When Working on API Routes
-- Follow `app/api/chat/route.ts` as the gold standard
-- Always validate input with proper error handling
-- Use structured error responses: `{ error: string, code?: string }`
-
-### When Working on UI Components
-- Use Shadcn/Base UI primitives from `components/ui/`
-- Follow existing patterns in `app/components/`
-- Prefer composition over configuration
-
-## Memory Hierarchy
-
-This project uses the following memory structure:
-
-```
-CLAUDE.md (this file) → Project-level Claude context
-├── app/CLAUDE.md → App-specific patterns
-├── lib/CLAUDE.md → Library patterns
-└── ~/.claude/CLAUDE.md → Personal user preferences
-```
-
-## Import Syntax for Context
-
-When you need additional context, use the `@` import syntax:
-
-```markdown
-@AGENTS.md                              # Project overview, commands, permissions
-@.agents/context/glossary.md            # Domain terminology definitions
-@.agents/skills/add-ai-provider/        # Adding new AI providers
-@.agents/skills/add-model/              # Adding new models
-@.agents/skills/convex-function/        # Creating database functions
-@.agents/workflows/development-cycle.md # Development workflows (four-phase cycle, TDD)
-@lib/config.ts                          # Centralized configuration constants
-```
-
-## Context System
-
-| Location | Purpose |
-|----------|---------|
-| `.agents/context/` | Architecture, API, database, and deployment docs |
-| `.agents/context/glossary.md` | Domain terminology (Model, providerId, parts, etc.) |
-| `.agents/research/` | Research, evaluations, analyses |
-| `.agents/troubleshooting/` | Known issues & fixes |
-| `.agents/design/` | Design references & UI research |
-| `.agents/plans/` | Implementation plans |
-| `.agents/skills/` | Multi-step task guides |
-| `.agents/workflows/` | Development workflows and procedures |
-| `.agents/archive/` | Superseded documents |
-| `.cursor/rules/` | Cursor-specific patterns (auto-loaded) |
-
-> **Documentation rule**: All AI-generated markdown belongs in `.agents/`. See `.cursor/rules/070-documentation.mdc` for placement guide.
-
-## Development Workflow
-
-This project follows Anthropic's four-phase coding cycle. See `@.agents/workflows/development-cycle.md` for complete details.
-
-### Quick Reference
-
-**Phase 1: Research** → Gather context, read files, understand patterns
-**Phase 2: Plan** → Create detailed plan, use `ultrathink` for complex problems
-**Phase 3: Code & Verify** → Implement step-by-step, verify after each step
-**Phase 4: Commit** → Commit incrementally with clear messages
-
-### Extended Thinking
-
-Use extended thinking (`ultrathink`) for:
-- Architectural decisions
-- Complex debugging sessions
-- Security analysis
-- Performance optimization
-
-Toggle "Thinking On/Off" in Claude Code, or use `ultrathink:` prefix in prompts.
-
-### TDD Workflow
-
-For critical paths (auth, data transforms, rate limiting):
-1. Write tests first
-2. Confirm tests fail
-3. Commit tests
-4. Implement to pass tests
-5. Iterate until all pass
-
-### Context Management
-
-When sessions get long:
-- Summarize older messages (keep last 10)
-- Write session discoveries to `NOTES.md` (project root — scratch notes only)
-- Write lasting research/analysis to `.agents/research/`
-- Reference `@` files instead of pasting content
-- Use context compaction strategies
-
-See `@.agents/workflows/development-cycle.md` and `@.agents/workflows/examples.md` for detailed workflows.
-
-## Sub-Agent Architecture
-
-When the sub-agent architecture is implemented, Claude should route tasks:
-
-| Task Type | Agent | Model |
-|-----------|-------|-------|
-| Code assistance | Code Assistant | Haiku 4.5 |
-| Writing/editing | Writing Editor | Sonnet 4.5 |
-| Research tasks | Research Analyst | Sonnet 4.5 |
-| Data analysis | Data Analyst | Sonnet 4.5 |
-| General conversation | Main Orchestrator | Opus 4.5 |
-
-## Context Compaction
-
-For long sessions, Claude should:
-
-1. Summarize older messages when approaching token limits
-2. Write important discoveries to `NOTES.md`
-3. Keep the last 10 messages in full context
-4. Reference `@` files instead of keeping full content in context
-
-## Quality Enforcement
-
-**This project prioritizes well-researched, industry-standard solutions over quick fixes.** See `AGENTS.md` > Implementation Philosophy for the universal principles.
-
-### Implementation Decision Framework
-
-Before writing code, follow this sequence:
-
-1. **Research the domain** — Search for established patterns, prior art, and industry conventions for the problem at hand
-2. **Check for existing solutions** — Look in `.agents/research/` for prior analysis; check if the codebase already solves a similar problem
-3. **Evaluate approaches** — When multiple solutions exist, compare trade-offs (performance, maintainability, complexity, ecosystem alignment)
-4. **Align with the codebase** — Ensure the chosen approach extends existing conventions rather than introducing a parallel pattern
-5. **Implement and verify** — Build incrementally, verifying each step against the gold standard examples in `AGENTS.md`
-
-### When You're Uncertain
-
-```markdown
-✅ DO: Research the problem first — "Let me check how this is typically handled in Next.js App Router..."
-✅ DO: Reference prior art — "React's documentation recommends this pattern for..."
-✅ DO: Document your reasoning — Create a research doc in .agents/research/ for non-trivial decisions
-✅ DO: Propose options — "There are two established approaches here. Option A... Option B... I recommend B because..."
-
-❌ DON'T: Jump to the first solution that works
-❌ DON'T: Invent custom patterns when standard ones exist
-❌ DON'T: Optimize for fewer lines of code over clarity and maintainability
-❌ DON'T: Skip research for unfamiliar problem domains
-```
-
-### Hierarchy of Solutions (Errors & Issues)
-
-1. **Fix the code properly** — Always the first choice
-2. **Refactor the pattern** — If the code is fundamentally incompatible with the correct approach
-3. **Document exception** — Only with explicit user approval and clear reason
-4. **Never**: Disable rules, add ignore comments, or downgrade deps without approval
-
-### Forbidden Actions
-
-- **Creating git branches** — NEVER create new branches unless the user explicitly asks for a branch to be created. Implementation plans, feature work, and all other tasks must be done on the current branch. Branch creation requires explicit user instruction.
-- Setting ESLint rules to `"off"` or `"warn"` to bypass errors
-- Adding `// @ts-ignore` (never acceptable)
-- Adding `eslint-disable` comments without documented reason
-- Suggesting "we can disable this check" as a solution
-- Downgrading packages to avoid type/lint errors
-- Implementing ad-hoc workarounds when a well-documented solution exists
-
-### Reference for Fixes
-
-- `.agents/research/` — Prior research and analysis
-- `.agents/workflows/react-19-lint-fixes.md` — React 19 / React Compiler patterns
-- `.agents/context/conventions.md` — Quality gates and acceptable exceptions
-- `.agents/troubleshooting/` — Known issues & fixes
-- Gold standard examples in `AGENTS.md`
-
-## Debugging Workflow
-
-When debugging issues:
-
-1. **Read first**: Examine the relevant files before suggesting changes
-2. **Check lints**: Run `bun run lint` and `bun run typecheck`
-3. **Verify patterns**: Ensure changes follow gold standard examples
-4. **Test incrementally**: Suggest running tests after each significant change
-5. **Fix at source**: Never suggest disabling checks as a solution
-
-## Common Gotchas
-
-- **Streaming responses**: Must use `result.toUIMessageStreamResponse()` from Vercel AI SDK (v6)
-- **Server Components**: Cannot use hooks; use Client Components wrapper with `"use client"`
-- **Database**: Uses Convex for all data operations (real-time queries + mutations)
-- **Auth**: Uses Clerk for authentication; avoid touching `middleware.ts` without review
-- **File Storage**: Uses Convex storage for file uploads
-- **Model terminology**: See `.agents/context/glossary.md` for precise definitions
-- **Pull requests**: Always `git fetch origin` first and compare against `origin/main`, never local `main`. Local main can be arbitrarily stale, causing commit/diff inflation in PR descriptions.
-
----
-
-*This file is automatically loaded by Claude Code and Claude API tools.*
+- Keep this file short and Claude-specific.
+- Do not duplicate policy from `AGENTS.md` unless a contradiction must be resolved.
diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
index 6b2a7951..d585fce0 100644
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -18,7 +18,7 @@ import {
   flushPostHog,
   getPostHogClient,
 } from "@/lib/posthog"
-import type { Provider } from "@/lib/user-keys"
+import type { Provider, ToolKeyMode } from "@/lib/user-keys"
 import {
   UIMessage as MessageAISDK,
   streamText,
@@ -48,12 +48,27 @@ import {
   loadUserMcpTools,
   type LoadToolsResult,
 } from "@/lib/mcp/load-tools"
-import { resolveToolCapabilities } from "@/lib/tools/types"
-import { shouldInjectSearchTools } from "./search-tools"
+import {
+  enforceToolNamingGovernance,
+  type ToolLayerMap,
+} from "@/lib/tools/naming"
+import {
+  filterMetadataMapByPolicy,
+  filterToolSetByPolicy,
+  getActiveToolsForStep,
+  resolveCapabilityPolicy,
+  type ToolPolicyInput,
+  type ToolPolicyDecision,
+} from "@/lib/tools/capability-policy"
+import {
+  buildFinishToolInvocationStreamMetadata,
+  buildStartToolInvocationStreamMetadata,
+  buildToolInvocationMetadataByName,
+  type ToolInvocationMetadataByCallId,
+} from "@/lib/tools/ui-metadata"
 import {
   ToolTraceCollector,
   wrapMcpTools,
-  isToolResultEnvelope,
 } from "@/lib/tools/mcp-wrapper"
 import type { ShippingAddress } from "@/lib/payclaw/schemas"
 
@@ -265,6 +280,7 @@ export async function POST(req: Request) {
         )
       }
     }
+    const providerToolKeyMode: ToolKeyMode = apiKey ? "byok" : "platform"
 
     // enableSearch is no longer passed to the model — it controls tool injection below.
     // All search is now provided via visible, auditable tool calls (Layer 1 or Layer 2).
@@ -290,8 +306,27 @@ export async function POST(req: Request) {
     let builtInTools: ToolSet = {} as ToolSet
     let builtInToolMetadata = new Map<string, import("@/lib/tools/types").ToolMetadata>()
 
-    const capabilities = resolveToolCapabilities(modelConfig.tools)
-    const shouldInjectSearch = shouldInjectSearchTools(enableSearch, modelConfig.tools)
+    const initialCapabilityPolicy = resolveCapabilityPolicy({
+      modelTools: modelConfig.tools,
+      isAuthenticated,
+    })
+    const capabilities = initialCapabilityPolicy.capabilities
+    const shouldInjectSearch = enableSearch && capabilities.search
+    const requestId = crypto.randomUUID()
+
+    console.log(
+      JSON.stringify({
+        _tag: "tool_capability_policy",
+        requestId,
+        chatId,
+        userId,
+        model,
+        userTier: initialCapabilityPolicy.userTier,
+        capabilities: initialCapabilityPolicy.capabilities,
+        capabilityReasons: initialCapabilityPolicy.capabilityReasons,
+        keyModeReason: initialCapabilityPolicy.keyModeReason,
+      })
+    )
 
     if (shouldInjectSearch) {
       const { getProviderTools } = await import("@/lib/tools/provider")
@@ -301,17 +336,137 @@ export async function POST(req: Request) {
     }
 
     // -----------------------------------------------------------------------
-    // Third-Party Tool Loading (Layer 2)
+    // Exa API Key Resolution (shared by Layer 2 capabilities)
+    //
+    // Resolved once, used by both search fallback and content extraction.
+    // Key resolution: user BYOK key → platform env var → undefined.
+    // The exa-js SDK accepts keys in its constructor, so BYOK keys
+    // are passed directly — no env var manipulation needed.
+    //
+    // NOT gated on isAuthenticated — anonymous users get Exa-powered tools
+    // when the platform has an EXA_API_KEY configured (same as Layer 1).
+    // -----------------------------------------------------------------------
+    let resolvedExaKey: string | undefined
+    let resolvedExaKeyMode: ToolKeyMode | undefined
+    const { getEffectiveToolKeyWithMode } = await import("@/lib/user-keys")
+    const resolvedExa = await getEffectiveToolKeyWithMode("exa", convexToken)
+    resolvedExaKey = resolvedExa.key
+    resolvedExaKeyMode = resolvedExa.keyMode
+
+    const {
+      createOutageTolerantToolBudgetEnforcer,
+      createConvexToolLimitStore,
+      createRequestLocalToolSoftCap,
+      createToolPolicyGuard,
+      isPolicyUnavailableError,
+      probeToolBudget,
+    } = await import("@/lib/tools/policy")
+    const toolLimitStore = createConvexToolLimitStore({
+      convexToken,
+      anonymousId,
+    })
+    const makePolicyGuard = (keyMode: ToolKeyMode) =>
+      createToolPolicyGuard({ store: toolLimitStore, keyMode })
+
+    const builtInPolicyGuard = makePolicyGuard(providerToolKeyMode)
+    const mcpPolicyGuard = makePolicyGuard("platform")
+    const platformPolicyGuard = makePolicyGuard("platform")
+    const exaPolicyGuard =
+      resolvedExaKeyMode ? makePolicyGuard(resolvedExaKeyMode) : undefined
+
+    const logOutageTolerantBudgetEvent = (
+      source: "third-party" | "content" | "platform" | "mcp",
+      event: {
+        type: "recovered" | "degraded_allow" | "degraded_block"
+        toolName: string
+        keyMode: ToolKeyMode
+        retryAfterSeconds?: number
+        snapshot?: {
+          used: number
+          remaining: number
+          maxCalls: number
+        }
+        error?: string
+      }
+    ) => {
+      if (event.type === "recovered") {
+        console.warn(
+          JSON.stringify({
+            _tag: "tool_budget_gate_recovered",
+            requestId,
+            tool: event.toolName,
+            source,
+            keyMode: event.keyMode,
+            action: "resume_policy_enforced_budgeting",
+          })
+        )
+        return
+      }
+
+      console.warn(
+        JSON.stringify({
+          _tag: "tool_budget_gate_degraded",
+          requestId,
+          tool: event.toolName,
+          source,
+          keyMode: event.keyMode,
+          policyUnavailable: true,
+          usedCalls: event.snapshot?.used ?? null,
+          remainingCalls: event.snapshot?.remaining ?? null,
+          maxCalls: event.snapshot?.maxCalls ?? null,
+          retryAfterSeconds: event.retryAfterSeconds ?? null,
+          error: event.error ?? null,
+          action:
+            event.type === "degraded_allow"
+              ? "allow_tool_with_request_local_soft_cap"
+              : "disable_tool_for_remaining_request",
+        })
+      )
+    }
+
+    const thirdPartyBudgetEnforcer =
+      exaPolicyGuard && resolvedExaKeyMode
+        ? createOutageTolerantToolBudgetEnforcer({
+            enforceToolBudget: (toolName) => exaPolicyGuard.enforceToolBudget(toolName),
+            keyMode: resolvedExaKeyMode,
+            maxCallsPerTool: PREPARE_STEP_THRESHOLD,
+            onEvent: (event) => logOutageTolerantBudgetEvent("third-party", event),
+          })
+        : undefined
+
+    const contentBudgetEnforcer =
+      exaPolicyGuard && resolvedExaKeyMode
+        ? createOutageTolerantToolBudgetEnforcer({
+            enforceToolBudget: (toolName) => exaPolicyGuard.enforceToolBudget(toolName),
+            keyMode: resolvedExaKeyMode,
+            maxCallsPerTool: PREPARE_STEP_THRESHOLD,
+            onEvent: (event) => logOutageTolerantBudgetEvent("content", event),
+          })
+        : undefined
+
+    const platformBudgetEnforcer = createOutageTolerantToolBudgetEnforcer({
+      enforceToolBudget: (toolName) => platformPolicyGuard.enforceToolBudget(toolName),
+      keyMode: "platform",
+      maxCallsPerTool: PREPARE_STEP_THRESHOLD,
+      onEvent: (event) => logOutageTolerantBudgetEvent("platform", event),
+    })
+
+    const mcpBudgetEnforcer = createOutageTolerantToolBudgetEnforcer({
+      enforceToolBudget: (toolName) => mcpPolicyGuard.enforceToolBudget(toolName),
+      keyMode: "platform",
+      maxCallsPerTool: PREPARE_STEP_THRESHOLD,
+      onEvent: (event) => logOutageTolerantBudgetEvent("mcp", event),
+    })
+
+    // -----------------------------------------------------------------------
+    // Third-Party Search Fallback (Layer 2 — Search)
     // Universal search fallback for providers without native search tools.
     // Only loaded when enableSearch is true AND Layer 1 didn't provide search.
     //
     // The coordination model is simple:
     //   - enableSearch === true: route.ts injects search tools
-    //   - Layer 1 provided search (builtInHasSearch): skip Layer 2
+    //   - Layer 1 provided search (builtInHasSearch): skip Layer 2 search
     //   - Layer 1 did NOT provide search: load Layer 2 Exa fallback
-    //
-    // NOT gated on isAuthenticated — anonymous users get search when
-    // the platform has an EXA_API_KEY configured (same as Layer 1).
     // -----------------------------------------------------------------------
     let thirdPartyTools: ToolSet = {} as ToolSet
     let thirdPartyToolMetadata = new Map<string, import("@/lib/tools/types").ToolMetadata>()
@@ -319,26 +474,10 @@ export async function POST(req: Request) {
     if (shouldInjectSearch) {
       const builtInHasSearch = Object.keys(builtInTools).length > 0
 
-      // Only load Layer 2 when Layer 1 didn't provide search.
-      // This is the sole coordination point — third-party.ts does not
-      // know about providers. It just receives a skipSearch flag.
       if (!builtInHasSearch) {
         const { getThirdPartyTools } = await import("@/lib/tools/third-party")
-
-        // Key resolution: user BYOK key → platform env var → undefined
-        // The exa-js SDK accepts keys in its constructor, so BYOK keys
-        // are passed directly — no env var manipulation needed.
-        let resolvedExaKey: string | undefined
-        if (convexToken) {
-          const { getEffectiveToolKey } = await import("@/lib/user-keys")
-          resolvedExaKey = await getEffectiveToolKey("exa", convexToken)
-        }
-        if (!resolvedExaKey) {
-          resolvedExaKey = process.env.EXA_API_KEY
-        }
-
         const thirdPartyResult = await getThirdPartyTools({
-          skipSearch: false, // We already know we need search (builtInHasSearch is false)
+          skipSearch: false,
           exaKey: resolvedExaKey,
         })
         thirdPartyTools = thirdPartyResult.tools
@@ -346,6 +485,26 @@ export async function POST(req: Request) {
       }
     }
 
+    // -----------------------------------------------------------------------
+    // Content Extraction Tools (Layer 2 — Content)
+    // Independent capability — NOT gated on shouldInjectSearch or
+    // builtInHasSearch. Gated on capabilities.extract and Exa key.
+    // Available for ALL providers including those with native Layer 1
+    // search (OpenAI, Anthropic, Google, xAI).
+    // -----------------------------------------------------------------------
+    let contentTools: ToolSet = {} as ToolSet
+    let contentToolMetadata = new Map<string, import("@/lib/tools/types").ToolMetadata>()
+
+    if (resolvedExaKey && capabilities.extract) {
+      const { getContentExtractionTools } = await import("@/lib/tools/third-party")
+      const contentResult = await getContentExtractionTools({
+        exaKey: resolvedExaKey,
+        policyGuard: exaPolicyGuard,
+      })
+      contentTools = contentResult.tools
+      contentToolMetadata = contentResult.metadata
+    }
+
     // -----------------------------------------------------------------------
     // MCP Tool Loading
     // Gate on: auth + Convex token + model capability
@@ -401,7 +560,7 @@ export async function POST(req: Request) {
     let platformToolMetadata = new Map<string, import("@/lib/tools/types").ToolMetadata>()
 
     let userCardId: string | undefined
-    if (isAuthenticated) {
+    if (isAuthenticated && capabilities.platform) {
       if (convexToken) {
         try {
           userAddresses = (await fetchQuery(
@@ -464,6 +623,134 @@ export async function POST(req: Request) {
       platformToolMetadata = platformResult.metadata
     }
 
+    const toolPolicyInputs: ToolPolicyInput[] = [
+      ...Object.keys(builtInTools).map((toolName) => {
+        const meta = builtInToolMetadata.get(toolName)
+        return {
+          toolName,
+          source: meta?.source ?? "builtin",
+          capability: "search" as const,
+          readOnly: meta?.readOnly,
+          destructive: meta?.destructive,
+          idempotent: meta?.idempotent,
+          openWorld: meta?.openWorld,
+        }
+      }),
+      ...Object.keys(thirdPartyTools).map((toolName) => {
+        const meta = thirdPartyToolMetadata.get(toolName)
+        return {
+          toolName,
+          source: meta?.source ?? "third-party",
+          capability: "search" as const,
+          readOnly: meta?.readOnly,
+          destructive: meta?.destructive,
+          idempotent: meta?.idempotent,
+          openWorld: meta?.openWorld,
+        }
+      }),
+      ...Object.keys(contentTools).map((toolName) => {
+        const meta = contentToolMetadata.get(toolName)
+        return {
+          toolName,
+          source: meta?.source ?? "third-party",
+          capability: "extract" as const,
+          readOnly: meta?.readOnly,
+          destructive: meta?.destructive,
+          idempotent: meta?.idempotent,
+          openWorld: meta?.openWorld,
+        }
+      }),
+      ...Object.keys(platformTools).map((toolName) => {
+        const meta = platformToolMetadata.get(toolName)
+        return {
+          toolName,
+          source: meta?.source ?? "platform",
+          capability: "platform" as const,
+          readOnly: meta?.readOnly,
+          destructive: meta?.destructive,
+          idempotent: meta?.idempotent,
+          openWorld: meta?.openWorld,
+        }
+      }),
+      ...Object.keys(mcpTools).map((toolName) => {
+        const info = mcpToolServerMap.get(toolName)
+        const policyHintsTrusted = info?.policyHintsTrusted === true
+        return {
+          toolName,
+          source: "mcp" as const,
+          capability: "mcp" as const,
+          riskHintsTrusted: policyHintsTrusted,
+          readOnly: policyHintsTrusted ? info?.readOnly : undefined,
+          destructive: policyHintsTrusted ? info?.destructive : undefined,
+          idempotent: policyHintsTrusted ? info?.idempotent : undefined,
+          openWorld: policyHintsTrusted ? info?.openWorld : undefined,
+        }
+      }),
+    ]
+
+    const toolPolicy = resolveCapabilityPolicy({
+      modelTools: modelConfig.tools,
+      isAuthenticated,
+      keyMode: resolvedExaKeyMode,
+      tools: toolPolicyInputs,
+    })
+
+    const summarizeReasonCounts = (
+      decisions: ToolPolicyDecision[],
+      selector: (decision: ToolPolicyDecision) => string
+    ) => {
+      const counts: Record<string, number> = {}
+      for (const decision of decisions) {
+        const reason = selector(decision)
+        counts[reason] = (counts[reason] ?? 0) + 1
+      }
+      return counts
+    }
+
+    console.log(
+      JSON.stringify({
+        _tag: "tool_policy_matrix",
+        requestId,
+        chatId,
+        userId,
+        model,
+        userTier: toolPolicy.userTier,
+        keyMode: toolPolicy.keyMode ?? null,
+        keyModeReason: toolPolicy.keyModeReason,
+        capabilities: toolPolicy.capabilities,
+        capabilityReasons: toolPolicy.capabilityReasons,
+        totalTools: toolPolicy.toolDecisions.length,
+        earlyAllowedCount: toolPolicy.earlyToolNames.length,
+        lateAllowedCount: toolPolicy.lateToolNames.length,
+        earlyReasonCounts: summarizeReasonCounts(
+          toolPolicy.toolDecisions,
+          (decision) => decision.earlyReasonCode
+        ),
+        lateReasonCounts: summarizeReasonCounts(
+          toolPolicy.toolDecisions,
+          (decision) => decision.lateReasonCode
+        ),
+      })
+    )
+
+    builtInTools = filterToolSetByPolicy(builtInTools, toolPolicy)
+    thirdPartyTools = filterToolSetByPolicy(thirdPartyTools, toolPolicy)
+    contentTools = filterToolSetByPolicy(contentTools, toolPolicy)
+    platformTools = filterToolSetByPolicy(platformTools, toolPolicy)
+    mcpTools = filterToolSetByPolicy(mcpTools, toolPolicy)
+
+    builtInToolMetadata = filterMetadataMapByPolicy(builtInToolMetadata, toolPolicy)
+    thirdPartyToolMetadata = filterMetadataMapByPolicy(
+      thirdPartyToolMetadata,
+      toolPolicy
+    )
+    contentToolMetadata = filterMetadataMapByPolicy(contentToolMetadata, toolPolicy)
+    platformToolMetadata = filterMetadataMapByPolicy(
+      platformToolMetadata,
+      toolPolicy
+    )
+    mcpToolServerMap = filterMetadataMapByPolicy(mcpToolServerMap, toolPolicy)
+
     // Wrap MCP tools with timeout, timing, truncation, and envelope.
     // Single wrapper handles all Layer 3 concerns — follows the Exa gold
     // standard pattern (lib/tools/third-party.ts:82-119).
@@ -474,29 +761,233 @@ export async function POST(req: Request) {
       mcpTools = wrapMcpTools(mcpTools, {
         toolServerMap: mcpToolServerMap,
         traceCollector,
+        requestId,
+        enforceToolBudget: async (toolName) => {
+          await mcpBudgetEnforcer(toolName)
+        },
       }) as ToolSet
     }
 
-    // Merge all tool layers: search (Layer 1 OR Layer 2) + platform (Layer 4) + MCP (Layer 3)
-    // Search tools are mutually exclusive: Layer 1 XOR Layer 2 (never both).
-    // MCP tools are always independent and additive.
-    // Spread order matters for conflict resolution:
-    //   1. Built-in/third-party search tools (lowest priority)
-    //   2. Platform tools (middle priority)
-    //   3. MCP tools (highest priority — user-configured, namespaced)
-    const searchTools = { ...builtInTools, ...thirdPartyTools }
-    const allTools = { ...searchTools, ...platformTools, ...mcpTools } as ToolSet
-
-    // Dev-mode collision detection: warn when duplicate keys are found
-    if (process.env.NODE_ENV !== "production") {
-      const searchKeys = new Set(Object.keys(searchTools))
-      for (const key of Object.keys(mcpTools)) {
-        if (searchKeys.has(key)) {
-          console.warn(`[tools] Key collision: "${key}" exists in both search and MCP tools. MCP wins.`)
+    // Wrap non-builtin tools with tracing (Layer 2 + Layer 4).
+    // Records durationMs and resultSizeBytes into traceCollector so
+    // onStepFinish and onFinish can read them for ALL tool types.
+    const { wrapToolsWithTracing } = await import("@/lib/tools/utils")
+    if (Object.keys(thirdPartyTools).length > 0) {
+      thirdPartyTools = wrapToolsWithTracing(
+        thirdPartyTools,
+        traceCollector,
+        requestId,
+        async (toolName) => {
+          if (!thirdPartyBudgetEnforcer) return
+          await thirdPartyBudgetEnforcer(toolName)
+        },
+        thirdPartyToolMetadata
+      )
+    }
+    if (Object.keys(contentTools).length > 0) {
+      contentTools = wrapToolsWithTracing(
+        contentTools,
+        traceCollector,
+        requestId,
+        async (toolName) => {
+          if (!contentBudgetEnforcer) return
+          await contentBudgetEnforcer(toolName)
+        },
+        contentToolMetadata
+      )
+    }
+    if (Object.keys(platformTools).length > 0) {
+      platformTools = wrapToolsWithTracing(
+        platformTools,
+        traceCollector,
+        requestId,
+        async (toolName) => {
+          await platformBudgetEnforcer(toolName)
+        },
+        platformToolMetadata
+      )
+    }
+
+    // Merge all tool layers:
+    //   - Search: Layer 1 (built-in) XOR Layer 2 (Exa fallback) — never both
+    //   - Content: Layer 2 content extraction — independent of search gating
+    //   - Platform: Layer 4 (Flowglad Pay, etc.)
+    //   - MCP: Layer 3 (user-configured servers)
+    // Spread order = conflict resolution priority (last wins):
+    //   1. Search tools (lowest priority)
+    //   2. Content extraction tools (same priority tier as search)
+    //   3. Platform tools (middle priority)
+    //   4. MCP tools (highest priority — user-configured, namespaced)
+    const toolLayers: ToolLayerMap = {
+      "built-in": builtInTools,
+      "third-party-search": thirdPartyTools,
+      "content-extraction": contentTools,
+      platform: platformTools,
+      mcp: mcpTools,
+    }
+
+    const namingResult = enforceToolNamingGovernance(toolLayers)
+    if (namingResult.invalid.length > 0) {
+      for (const invalid of namingResult.invalid) {
+        console.warn(
+          JSON.stringify({
+            _tag: "tool_name_invalid",
+            requestId,
+            tool: invalid.toolKey,
+            layer: invalid.layer,
+            reason: invalid.reason,
+            action: "drop_invalid_tool",
+          })
+        )
+      }
+    }
+    if (namingResult.collisions.length > 0) {
+      for (const collision of namingResult.collisions) {
+        const droppedLayers = collision.owners.filter(
+          (layer) => layer !== collision.winner
+        )
+        console.warn(
+          JSON.stringify({
+            _tag: "tool_name_collision",
+            requestId,
+            tool: collision.toolKey,
+            layers: collision.owners,
+            winner: collision.winner,
+            droppedLayers,
+            action: "keep_winner_drop_losers",
+          })
+        )
+      }
+    }
+
+    builtInTools = (namingResult.sanitizedLayers["built-in"] ?? {}) as ToolSet
+    thirdPartyTools = (namingResult.sanitizedLayers["third-party-search"] ??
+      {}) as ToolSet
+    contentTools = (namingResult.sanitizedLayers["content-extraction"] ??
+      {}) as ToolSet
+    platformTools = (namingResult.sanitizedLayers.platform ?? {}) as ToolSet
+    mcpTools = (namingResult.sanitizedLayers.mcp ?? {}) as ToolSet
+
+    const filterMetadataByTools = <T>(
+      metadata: ReadonlyMap<string, T>,
+      tools: ToolSet
+    ) =>
+      new Map(
+        Array.from(metadata.entries()).filter(([name]) =>
+          Object.prototype.hasOwnProperty.call(tools, name)
+        )
+      )
+
+    builtInToolMetadata = filterMetadataByTools(builtInToolMetadata, builtInTools)
+    thirdPartyToolMetadata = filterMetadataByTools(
+      thirdPartyToolMetadata,
+      thirdPartyTools
+    )
+    contentToolMetadata = filterMetadataByTools(contentToolMetadata, contentTools)
+    platformToolMetadata = filterMetadataByTools(
+      platformToolMetadata,
+      platformTools
+    )
+    mcpToolServerMap = new Map(
+      Array.from(mcpToolServerMap.entries()).filter(([name]) =>
+        Object.prototype.hasOwnProperty.call(mcpTools, name)
+      )
+    )
+
+    const builtInToolNames = new Set(Object.keys(builtInTools))
+    const exhaustedBuiltInTools = new Set<string>()
+    const degradedBuiltInTools = new Set<string>()
+    const degradedBuiltInSoftCap = createRequestLocalToolSoftCap({
+      maxCallsPerTool: PREPARE_STEP_THRESHOLD,
+    })
+
+    // Provider-native (Layer 1) tools are provider-executed and do not expose a
+    // local execute() hook for per-call preflight enforcement. Compensating
+    // control: probe budget during prepareStep (consume:false) and account
+    // actual usage in onStepFinish. This preserves centralized budget policy
+    // semantics, with a bounded request-local soft cap when policy is unavailable.
+    const isBuiltInToolBudgetAllowed = async (toolName: string): Promise<boolean> => {
+      if (!builtInToolNames.has(toolName)) return true
+      if (exhaustedBuiltInTools.has(toolName)) return false
+
+      try {
+        const probe = await probeToolBudget({
+          store: toolLimitStore,
+          keyMode: providerToolKeyMode,
+          toolName,
+        })
+        if (probe.allowed) {
+          if (degradedBuiltInTools.delete(toolName)) {
+            console.warn(
+              JSON.stringify({
+                _tag: "tool_budget_gate_recovered",
+                requestId,
+                tool: toolName,
+                source: "builtin",
+                keyMode: providerToolKeyMode,
+                action: "resume_policy_enforced_budgeting",
+              })
+            )
+          }
+          return true
         }
+        degradedBuiltInTools.delete(toolName)
+        exhaustedBuiltInTools.add(toolName)
+        console.warn(
+          JSON.stringify({
+            _tag: "tool_budget_gate",
+            requestId,
+            tool: toolName,
+            source: "builtin",
+            keyMode: providerToolKeyMode,
+            retryAfterSeconds: probe.retryAfterSeconds ?? null,
+            action: "disable_tool_for_remaining_steps",
+          })
+        )
+        return false
+      } catch (error) {
+        if (isPolicyUnavailableError(error)) {
+          degradedBuiltInTools.add(toolName)
+          const softCap = degradedBuiltInSoftCap.getSnapshot(toolName)
+          const allowed = softCap.remaining > 0
+          console.warn(
+            JSON.stringify({
+              _tag: "tool_budget_gate_degraded",
+              requestId,
+              tool: toolName,
+              source: "builtin",
+              keyMode: providerToolKeyMode,
+              policyUnavailable: true,
+              usedCalls: softCap.used,
+              remainingCalls: softCap.remaining,
+              maxCalls: softCap.maxCalls,
+              error: error.message,
+              action: allowed
+                ? "allow_tool_with_request_local_soft_cap"
+                : "disable_tool_until_policy_recovers",
+            })
+          )
+          return allowed
+        }
+        exhaustedBuiltInTools.add(toolName)
+        console.warn(
+          JSON.stringify({
+            _tag: "tool_budget_gate_error",
+            requestId,
+            tool: toolName,
+            source: "builtin",
+            keyMode: providerToolKeyMode,
+            error: error instanceof Error ? error.message : String(error),
+            action: "disable_tool_fail_closed",
+          })
+        )
+        return false
       }
     }
 
+    const searchTools = { ...builtInTools, ...thirdPartyTools }
+    const allTools = { ...searchTools, ...contentTools, ...platformTools, ...mcpTools } as ToolSet
+
     const hasAnyTools = Object.keys(allTools).length > 0
 
     // Anonymous users get a lower step count to limit tool call cost exposure.
@@ -747,20 +1238,27 @@ export async function POST(req: Request) {
     // The header is safe to apply: @ai-sdk/anthropic@3.0.41 comma-merges
     // user and inferred betas (getBetasFromHeaders + Array.from(betas).join(",")).
     // -----------------------------------------------------------------------
+    const isTokenEfficient =
+      process.env.ANTHROPIC_TOKEN_EFFICIENT_TOOLS !== "false"
     const requestHeaders: Record<string, string> = {}
 
-    if (provider === "anthropic" && hasAnyTools) {
+    if (provider === "anthropic" && hasAnyTools && isTokenEfficient) {
       requestHeaders["anthropic-beta"] = ANTHROPIC_BETA_HEADERS.tokenEfficient
     }
 
     // Collect all tool metadata for prepareStep tool restriction.
-    // Merge built-in + third-party metadata (MCP metadata not available here —
-    // MCP tools are conservatively included in the safe list).
+    // Merge built-in + third-party + content + platform metadata (MCP metadata
+    // not available here — MCP tools are conservatively included in the safe list).
     const allToolMetadata = new Map([
       ...builtInToolMetadata,
       ...thirdPartyToolMetadata,
+      ...contentToolMetadata,
       ...platformToolMetadata,
     ])
+    const toolMetadataByName = buildToolInvocationMetadataByName({
+      nonMcpMetadata: allToolMetadata,
+      mcpToolServerMap,
+    })
     let enrichedSystemPrompt = effectiveSystemPrompt
     if (isAuthenticated && userAddresses.length > 0) {
       enrichedSystemPrompt += formatAddressContext(userAddresses)
@@ -768,12 +1266,14 @@ export async function POST(req: Request) {
 
     const streamStartMs = Date.now()
     let stepCounter = 0
+    let toolMetadataByCallId: ToolInvocationMetadataByCallId = {}
 
     // Track reasoning timing for messageMetadata persistence.
     // The first reasoning chunk records a start timestamp; when text-delta
     // arrives (reasoning is done) or onFinish fires, we compute elapsed ms.
     let reasoningStartMs: number | null = null
     let reasoningDurationMs: number | null = null
+    let loggedLateStepPolicy = false
 
     const result = streamText({
       model: aiModel,
@@ -782,30 +1282,48 @@ export async function POST(req: Request) {
       tools: allTools,
       stopWhen: stepCountIs(maxSteps),
 
-      // Restrict tools after PREPARE_STEP_THRESHOLD to prevent runaway
-      // tool chains. Only tools explicitly marked readOnly: true remain
-      // available. MCP tools are conservatively included (can't classify
-      // read/write yet). Unclassified non-MCP tools are restricted
-      // (fail closed — new tools must opt in via readOnly: true).
+      // Centralized step gating from the capability policy resolver.
+      // After PREPARE_STEP_THRESHOLD, only late-step-safe tools remain
+      // (currently read_only risk class). Unknown risk fails closed.
       prepareStep: hasAnyTools
         ? async ({ stepNumber }) => {
-            if (stepNumber <= PREPARE_STEP_THRESHOLD) return {}
-
-            // Build safe tool list: only tools explicitly marked readOnly.
-            // New tools that omit readOnly default to RESTRICTED (fail closed).
-            const safeTools: string[] = []
-            for (const [name, meta] of allToolMetadata) {
-              if (meta.readOnly === true) safeTools.push(name)
+            const isLateStep = stepNumber > PREPARE_STEP_THRESHOLD
+            const policyToolsForStep = getActiveToolsForStep(
+              toolPolicy,
+              stepNumber,
+              PREPARE_STEP_THRESHOLD
+            )
+            const budgetAllowedTools: string[] = []
+            for (const toolName of policyToolsForStep ?? []) {
+              if (!builtInToolNames.has(toolName)) {
+                budgetAllowedTools.push(toolName)
+                continue
+              }
+              if (await isBuiltInToolBudgetAllowed(toolName)) {
+                budgetAllowedTools.push(toolName)
+              }
             }
-            // Include all MCP tools (can't classify read/write yet)
-            for (const name of Object.keys(mcpTools)) {
-              if (!safeTools.includes(name)) safeTools.push(name)
+
+            if (isLateStep && !loggedLateStepPolicy) {
+              loggedLateStepPolicy = true
+              console.log(
+                JSON.stringify({
+                  _tag: "tool_policy_step_gate",
+                  requestId,
+                  chatId,
+                  userId,
+                  model,
+                  stepNumber,
+                  threshold: PREPARE_STEP_THRESHOLD,
+                  earlyToolCount: toolPolicy.earlyToolNames.length,
+                  lateToolCount: budgetAllowedTools.length,
+                  blockedCount:
+                    toolPolicy.earlyToolNames.length - budgetAllowedTools.length,
+                })
+              )
             }
 
-            // Fail closed: if no safe tools found, no tools available.
-            // This is intentional — prevents unrestricted tool access
-            // if readOnly metadata is misconfigured.
-            return { activeTools: safeTools }
+            return { activeTools: budgetAllowedTools }
           }
         : undefined,
 
@@ -813,10 +1331,65 @@ export async function POST(req: Request) {
       // Captures tool name, duration, token usage, and success per step.
       // This data feeds into the existing toolCallLog for trajectory analysis
       // and future trace-based evaluation.
-      onStepFinish: ({ toolCalls, toolResults, usage, finishReason }) => {
+      onStepFinish: async ({ toolCalls, toolResults, usage, finishReason }) => {
         stepCounter++
         if (toolCalls.length === 0) return
 
+        for (const call of toolCalls) {
+          if (!builtInToolNames.has(call.toolName)) continue
+          try {
+            await builtInPolicyGuard.enforceToolBudget(call.toolName)
+            if (degradedBuiltInTools.delete(call.toolName)) {
+              console.warn(
+                JSON.stringify({
+                  _tag: "tool_budget_post_accounting_recovered",
+                  requestId,
+                  tool: call.toolName,
+                  source: "builtin",
+                  keyMode: providerToolKeyMode,
+                  action: "resume_policy_enforced_budgeting",
+                })
+              )
+            }
+          } catch (error) {
+            if (isPolicyUnavailableError(error)) {
+              degradedBuiltInTools.add(call.toolName)
+              const softCap = degradedBuiltInSoftCap.recordCall(call.toolName)
+              console.warn(
+                JSON.stringify({
+                  _tag: "tool_budget_post_accounting_degraded",
+                  requestId,
+                  tool: call.toolName,
+                  source: "builtin",
+                  keyMode: providerToolKeyMode,
+                  policyUnavailable: true,
+                  usedCalls: softCap.used,
+                  remainingCalls: softCap.remaining,
+                  maxCalls: softCap.maxCalls,
+                  error: error.message,
+                  action:
+                    softCap.remaining > 0
+                      ? "allow_tool_with_request_local_soft_cap"
+                      : "disable_tool_until_policy_recovers",
+                })
+              )
+              continue
+            }
+            exhaustedBuiltInTools.add(call.toolName)
+            console.warn(
+              JSON.stringify({
+                _tag: "tool_budget_post_accounting_denied",
+                requestId,
+                tool: call.toolName,
+                source: "builtin",
+                keyMode: providerToolKeyMode,
+                error: error instanceof Error ? error.message : String(error),
+                action: "disable_tool_for_remaining_steps",
+              })
+            )
+          }
+        }
+
         for (const call of toolCalls) {
           const result = toolResults.find(
             (r) => r.toolCallId === call.toolCallId
@@ -825,18 +1398,26 @@ export async function POST(req: Request) {
             ? !(result as { isError?: boolean }).isError
             : false
           const meta = allToolMetadata.get(call.toolName)
+          const trace = traceCollector.get(call.toolCallId)
 
           // Structured JSON log — parseable by Vercel log drain and grep.
           // Uses _tag for machine filtering without affecting human readability.
           console.log(
             JSON.stringify({
               _tag: "tool_trace",
+              requestId,
               chatId,
+              userId,
               step: stepCounter,
               tool: meta?.displayName ?? call.toolName,
               source: meta?.source ?? "unknown",
               success,
-              durationMs: traceCollector.get(call.toolCallId)?.durationMs ?? null,
+              durationMs: trace?.durationMs ?? null,
+              estimatedCostPer1k: meta?.estimatedCostPer1k ?? null,
+              errorCode: trace?.errorCode ?? null,
+              retryAfterSeconds: trace?.retryAfterSeconds ?? null,
+              budgetKeyMode: trace?.budgetKeyMode ?? null,
+              budgetDenied: trace?.budgetDenied ?? null,
               tokens: {
                 in: usage?.inputTokens ?? null,
                 out: usage?.outputTokens ?? null,
@@ -909,6 +1490,19 @@ export async function POST(req: Request) {
       },
 
       onFinish: ({ text, usage, steps, finishReason }) => {
+        if (steps) {
+          const resolvedByCallId: ToolInvocationMetadataByCallId = {}
+          for (const step of steps) {
+            for (const toolCall of step.toolCalls ?? []) {
+              const resolved = toolMetadataByName[toolCall.toolName]
+              if (resolved) {
+                resolvedByCallId[toolCall.toolCallId] = resolved
+              }
+            }
+          }
+          toolMetadataByCallId = resolvedByCallId
+        }
+
         // Freeze reasoning duration if it wasn't already frozen by text-delta
         // (e.g. reasoning-only responses with no text output, or errors)
         if (reasoningStartMs !== null && reasoningDurationMs === null) {
@@ -945,7 +1539,7 @@ export async function POST(req: Request) {
           console.log(
             `[chat] Anthropic tool usage — inputTokens: ${usage?.inputTokens ?? "?"}, ` +
             `toolCount: ${Object.keys(allTools).length}, ` +
-            `tokenEfficient: true`
+            `tokenEfficient: ${isTokenEfficient}`
           )
         }
 
@@ -974,13 +1568,6 @@ export async function POST(req: Request) {
             // PostHog: unified tool call events — one event per tool invocation (all sources)
             // Replaces the previous MCP-only mcp_tool_call event.
             if (steps) {
-              // Combine all metadata maps for source identification
-              const allToolMetadata = new Map([
-                ...builtInToolMetadata,
-                ...thirdPartyToolMetadata,
-                ...platformToolMetadata,
-              ])
-
               for (const step of steps) {
                 if (step.toolCalls) {
                   for (const toolCall of step.toolCalls) {
@@ -1002,6 +1589,7 @@ export async function POST(req: Request) {
                     const success = toolResult
                       ? !(toolResult as { isError?: boolean }).isError
                       : false
+                    const trace = traceCollector.get(toolCall.toolCallId)
 
                     phClient.capture({
                       distinctId: userId,
@@ -1014,7 +1602,12 @@ export async function POST(req: Request) {
                         success,
                         chatId,
                         // Phase C: Observability enrichment
-                        durationMs: traceCollector.get(toolCall.toolCallId)?.durationMs ?? undefined,
+                        durationMs: trace?.durationMs ?? undefined,
+                        errorCode: trace?.errorCode,
+                        retryAfterSeconds: trace?.retryAfterSeconds,
+                        budgetKeyMode: trace?.budgetKeyMode,
+                        budgetDenied: trace?.budgetDenied,
+                        requestId,
                         // MCP-specific (optional)
                         ...(mcpServerInfo && {
                           serverId: mcpServerInfo.serverId,
@@ -1060,12 +1653,7 @@ export async function POST(req: Request) {
                   ? !(toolResult as { isError?: boolean }).isError
                   : false
 
-                // Extract data from envelope for preview — avoids wasting
-                // 500 chars on envelope metadata ({"ok":true,"data":...}).
-                const output = toolResult?.output
-                const previewData = isToolResultEnvelope(output)
-                  ? output.data
-                  : output
+                const previewData = toolResult?.output
                 const trace = traceCollector.get(toolCall.toolCallId)
 
                 void fetchMutation(
@@ -1089,6 +1677,11 @@ export async function POST(req: Request) {
                     inputTokens: step.usage?.inputTokens,
                     outputTokens: step.usage?.outputTokens,
                     resultSizeBytes: trace?.resultSizeBytes,
+                    requestId,
+                    errorCode: trace?.errorCode,
+                    retryAfterSeconds: trace?.retryAfterSeconds,
+                    budgetKeyMode: trace?.budgetKeyMode,
+                    budgetDenied: trace?.budgetDenied,
                   },
                   { token: convexToken }
                 ).catch(() => {
@@ -1102,14 +1695,7 @@ export async function POST(req: Request) {
         // Audit log: persist built-in + third-party tool calls (fire-and-forget).
         // Identifies non-MCP tools by checking if the tool name is NOT in mcpToolServerMap.
         if (convexToken && steps) {
-          // Combine built-in and third-party metadata maps
-          const nonMcpMetadata = new Map([
-            ...builtInToolMetadata,
-            ...thirdPartyToolMetadata,
-            ...platformToolMetadata,
-          ])
-
-          if (nonMcpMetadata.size > 0) {
+          if (allToolMetadata.size > 0) {
             let finishStepNumber = 0
 
             for (const step of steps) {
@@ -1120,7 +1706,7 @@ export async function POST(req: Request) {
                   // Skip MCP tools (already logged above)
                   if (mcpToolServerMap.get(toolCall.toolName)) continue
 
-                  const meta = nonMcpMetadata.get(toolCall.toolName)
+                  const meta = allToolMetadata.get(toolCall.toolName)
                   if (!meta) continue // Unknown tool — skip
 
                   const toolResult = step.toolResults?.find(
@@ -1131,11 +1717,9 @@ export async function POST(req: Request) {
                     ? !(toolResult as { isError?: boolean }).isError
                     : false
 
-                  // For non-MCP tools, check if result is enveloped (Exa uses envelopes)
-                  const output = toolResult?.output
-                  const previewData = isToolResultEnvelope(output)
-                    ? output.data
-                    : output
+                  const previewData = toolResult?.output
+
+                  const trace = traceCollector.get(toolCall.toolCallId)
 
                   void fetchMutation(
                     api.toolCallLog.log,
@@ -1149,18 +1733,19 @@ export async function POST(req: Request) {
                         ? JSON.stringify(previewData).slice(0, 500)
                         : undefined,
                       success,
-                      // FIX: Use actual per-tool duration instead of total stream duration.
-                      // For Exa, the envelope's meta.durationMs has the real timing.
-                      // For builtin tools, we don't have per-tool timing (no trace collector).
-                      durationMs: isToolResultEnvelope(output)
-                        ? output.meta.durationMs
-                        : undefined,
+                      durationMs: trace?.durationMs,
                       source: meta.source,
                       serviceName: meta.serviceName,
                       // Phase C: Observability enrichment
                       stepNumber: finishStepNumber,
                       inputTokens: step.usage?.inputTokens,
                       outputTokens: step.usage?.outputTokens,
+                      resultSizeBytes: trace?.resultSizeBytes,
+                      requestId,
+                      errorCode: trace?.errorCode,
+                      retryAfterSeconds: trace?.retryAfterSeconds,
+                      budgetKeyMode: trace?.budgetKeyMode,
+                      budgetDenied: trace?.budgetDenied,
                     },
                     { token: convexToken }
                   ).catch(() => {
@@ -1178,8 +1763,14 @@ export async function POST(req: Request) {
       sendReasoning: true,
       sendSources: true,
       messageMetadata: ({ part }) => {
-        if (part.type === "finish" && reasoningDurationMs !== null) {
-          return { reasoningDurationMs }
+        if (part.type === "start") {
+          return buildStartToolInvocationStreamMetadata(toolMetadataByName)
+        }
+        if (part.type === "finish") {
+          return buildFinishToolInvocationStreamMetadata({
+            toolMetadataByCallId,
+            reasoningDurationMs,
+          })
         }
         return {}
       },
diff --git a/app/components/chat/message-assistant.tsx b/app/components/chat/message-assistant.tsx
index 007104d3..6fe5a17e 100644
--- a/app/components/chat/message-assistant.tsx
+++ b/app/components/chat/message-assistant.tsx
@@ -225,7 +225,10 @@ export function MessageAssistant({
         {toolInvocationParts &&
           toolInvocationParts.length > 0 &&
           preferences.showToolInvocations && (
-            <ToolInvocation toolInvocations={toolInvocationParts} />
+            <ToolInvocation
+              toolInvocations={toolInvocationParts}
+              metadata={metadata}
+            />
           )}
 
         {showToolProgress && (
diff --git a/app/components/chat/tool-invocation.tsx b/app/components/chat/tool-invocation.tsx
index 92fe052b..21f4bbba 100644
--- a/app/components/chat/tool-invocation.tsx
+++ b/app/components/chat/tool-invocation.tsx
@@ -3,8 +3,15 @@
 import { cn } from "@/lib/utils"
 import type { ToolUIPart } from 'ai'
 import { getStaticToolName } from 'ai'
+import {
+  humanizeToolName,
+  resolveToolInvocationMetadata,
+  type ToolInvocationDisplayMetadata,
+  type ToolInvocationStreamMetadata,
+} from "@/lib/tools/ui-metadata"
 import { HugeiconsIcon } from "@hugeicons/react"
 import {
+  AlertCircleIcon,
   ArrowDown01Icon,
   CheckmarkCircle01Icon,
   SourceCodeIcon,
@@ -12,6 +19,7 @@ import {
   NutIcon,
   Loading01Icon,
   Search01Icon,
+  FileSearchIcon,
   Wrench01Icon,
 } from "@hugeicons-pro/core-stroke-rounded"
 import { AnimatePresence, motion } from "framer-motion"
@@ -19,6 +27,7 @@ import { useMemo, useState } from "react"
 
 type ToolInvocationProps = {
   toolInvocations: ToolUIPart[]
+  metadata?: Record<string, unknown>
   className?: string
   defaultOpen?: boolean
 }
@@ -32,30 +41,119 @@ const TRANSITION = {
 /** Maps built-in tool names to human-readable display names and icons */
 const BUILTIN_TOOL_DISPLAY: Record<
   string,
-  { name: string; icon: "search" | "code" | "image" | "extract" }
+  { name: string; icon: "search" | "code" | "image" | "extract" | "wrench" }
 > = {
   web_search: { name: "Web Search", icon: "search" },
   google_search: { name: "Web Search", icon: "search" },
+  extract_content: { name: "Read Page", icon: "extract" },
+  pay_purchase: { name: "Purchase", icon: "wrench" },
+  pay_status: { name: "Purchase Status", icon: "wrench" },
   // Future built-in tools:
   // code_execution: { name: "Code Execution", icon: "code" },
   // image_generation: { name: "Image Generation", icon: "image" },
 }
 
-/** Resolve icon component from BUILTIN_TOOL_DISPLAY icon identifier */
-function getToolIcon(iconId: "search" | "code" | "image" | "extract") {
+/** Resolve icon component from metadata icon identifier */
+function getToolIcon(iconId: NonNullable<ToolInvocationDisplayMetadata["icon"]>) {
   switch (iconId) {
     case "search":
       return Search01Icon
+    case "extract":
+      return FileSearchIcon
+    case "wrench":
+      return Wrench01Icon
     default:
       return Wrench01Icon
   }
 }
 
+function isToolSource(value: unknown): value is ToolInvocationDisplayMetadata["source"] {
+  return (
+    value === "builtin" ||
+    value === "third-party" ||
+    value === "mcp" ||
+    value === "platform"
+  )
+}
+
+function isToolIcon(value: unknown): value is NonNullable<ToolInvocationDisplayMetadata["icon"]> {
+  return (
+    value === "search" ||
+    value === "code" ||
+    value === "image" ||
+    value === "extract" ||
+    value === "wrench"
+  )
+}
+
+function isToolInvocationDisplayMetadata(
+  value: unknown
+): value is ToolInvocationDisplayMetadata {
+  if (typeof value !== "object" || value === null) return false
+  const candidate = value as Record<string, unknown>
+  if (typeof candidate.displayName !== "string") return false
+  if (!isToolSource(candidate.source)) return false
+  if (typeof candidate.serviceName !== "string") return false
+  if (candidate.icon !== undefined && !isToolIcon(candidate.icon)) return false
+  if (
+    candidate.estimatedCostPer1k !== undefined &&
+    typeof candidate.estimatedCostPer1k !== "number"
+  ) return false
+  if (candidate.readOnly !== undefined && typeof candidate.readOnly !== "boolean") return false
+  if (candidate.destructive !== undefined && typeof candidate.destructive !== "boolean") return false
+  if (candidate.idempotent !== undefined && typeof candidate.idempotent !== "boolean") return false
+  if (candidate.openWorld !== undefined && typeof candidate.openWorld !== "boolean") return false
+  return true
+}
+
+function toMetadataRecord(
+  value: unknown
+): Record<string, ToolInvocationDisplayMetadata> {
+  if (typeof value !== "object" || value === null) return {}
+  const record = value as Record<string, unknown>
+  const parsed: Record<string, ToolInvocationDisplayMetadata> = {}
+
+  for (const [key, candidate] of Object.entries(record)) {
+    if (isToolInvocationDisplayMetadata(candidate)) {
+      parsed[key] = candidate
+    }
+  }
+
+  return parsed
+}
+
+function getToolMetadataMaps(metadata?: Record<string, unknown>) {
+  return {
+    byName: toMetadataRecord(metadata?.toolMetadataByName),
+    byCallId: toMetadataRecord(metadata?.toolMetadataByCallId),
+  }
+}
+
+function formatSource(source: ToolInvocationDisplayMetadata["source"]): string {
+  switch (source) {
+    case "builtin":
+      return "Built-in"
+    case "third-party":
+      return "Third-party"
+    case "platform":
+      return "Platform"
+    case "mcp":
+      return "MCP"
+    default:
+      return "Unknown"
+  }
+}
+
 export function ToolInvocation({
   toolInvocations,
+  metadata,
   defaultOpen = false,
 }: ToolInvocationProps) {
   const [isExpanded, setIsExpanded] = useState(defaultOpen)
+  const { byName, byCallId } = useMemo(
+    () => getToolMetadataMaps(metadata),
+    [metadata]
+  )
 
   const toolInvocationsData = Array.isArray(toolInvocations)
     ? toolInvocations
@@ -81,6 +179,8 @@ export function ToolInvocation({
     return (
       <SingleToolView
         toolInvocations={toolInvocationsData}
+        metadataByName={byName}
+        metadataByCallId={byCallId}
         defaultOpen={defaultOpen}
         className="mb-10"
       />
@@ -138,6 +238,8 @@ export function ToolInvocation({
                       >
                         <SingleToolView
                           toolInvocations={toolInvocationsForId}
+                          metadataByName={byName}
+                          metadataByCallId={byCallId}
                         />
                       </div>
                     )
@@ -154,12 +256,16 @@ export function ToolInvocation({
 
 type SingleToolViewProps = {
   toolInvocations: ToolUIPart[]
+  metadataByName: Record<string, ToolInvocationDisplayMetadata>
+  metadataByCallId: Record<string, ToolInvocationDisplayMetadata>
   defaultOpen?: boolean
   className?: string
 }
 
 function SingleToolView({
   toolInvocations,
+  metadataByName,
+  metadataByCallId,
   defaultOpen = false,
   className,
 }: SingleToolViewProps) {
@@ -201,6 +307,8 @@ function SingleToolView({
     return (
       <SingleToolCard
         toolData={toolsToDisplay[0]}
+        metadataByName={metadataByName}
+        metadataByCallId={metadataByCallId}
         defaultOpen={defaultOpen}
         className={className}
       />
@@ -215,6 +323,8 @@ function SingleToolView({
           <SingleToolCard
             key={tool.toolCallId}
             toolData={tool}
+            metadataByName={metadataByName}
+            metadataByCallId={metadataByCallId}
             defaultOpen={defaultOpen}
           />
         ))}
@@ -226,27 +336,48 @@ function SingleToolView({
 // New component to handle individual tool cards
 function SingleToolCard({
   toolData,
+  metadataByName,
+  metadataByCallId,
   defaultOpen = false,
   className,
 }: {
   toolData: ToolUIPart
+  metadataByName: Record<string, ToolInvocationDisplayMetadata>
+  metadataByCallId: Record<string, ToolInvocationDisplayMetadata>
   defaultOpen?: boolean
   className?: string
 }) {
   const [isExpanded, setIsExpanded] = useState(defaultOpen)
   const { state, toolCallId } = toolData
-  // v6: Get tool name using official helper.
-  // NOTE: For MCP tools, this returns the namespaced name (e.g. "my_github_server_create_issue").
-  // Displaying a cleaner name requires passing toolServerMap from the chat route via stream
-  // metadata, which is planned for v1.1. Until then, the namespaced name is shown as-is.
   const toolName = getStaticToolName(toolData)
+  const streamMetadata: ToolInvocationStreamMetadata = {
+    toolMetadataByName: metadataByName,
+    toolMetadataByCallId: metadataByCallId,
+  }
+  const runtimeMetadata = resolveToolInvocationMetadata({
+    toolName,
+    toolCallId,
+    streamMetadata,
+  })
   const displayInfo = BUILTIN_TOOL_DISPLAY[toolName] ?? null
-  const displayName = displayInfo?.name ?? toolName
-  const ToolIcon = displayInfo ? getToolIcon(displayInfo.icon) : Wrench01Icon
+  const displayName =
+    runtimeMetadata?.displayName ??
+    displayInfo?.name ??
+    humanizeToolName(toolName)
+  const iconId = runtimeMetadata?.icon ?? displayInfo?.icon ?? "wrench"
+  const ToolIcon = getToolIcon(iconId)
+  const source = runtimeMetadata?.source
+  const serviceName = runtimeMetadata?.serviceName
+  const estimatedCostPer1k = runtimeMetadata?.estimatedCostPer1k
+  const readOnly = runtimeMetadata?.readOnly
+  const destructive = runtimeMetadata?.destructive
+  const idempotent = runtimeMetadata?.idempotent
+  const openWorld = runtimeMetadata?.openWorld
   const args = toolData.input as Record<string, unknown> | undefined
   const isLoading = state === "input-available" || state === "input-streaming"
   const isCompleted = state === "output-available"
   const result = isCompleted ? toolData.output : undefined
+  const isError = isCompleted && result != null && typeof result === "object" && "isError" in result && (result as Record<string, unknown>).isError === true
 
   // Parse the result JSON if available
   const { parsedResult, parseError } = useMemo(() => {
@@ -421,7 +552,18 @@ function SingleToolCard({
       >
         <div className="flex flex-1 flex-row items-center gap-2 text-left text-base">
           <HugeiconsIcon icon={ToolIcon} size={16} className="text-muted-foreground" />
-          <span className={cn("text-sm", displayInfo ? "" : "font-mono")}>{displayName}</span>
+          <div className="flex min-w-0 flex-col">
+            <span className={cn("truncate text-sm", runtimeMetadata || displayInfo ? "" : "font-mono")}>
+              {displayName}
+            </span>
+            {(source || serviceName) && (
+              <span className="text-muted-foreground truncate text-xs">
+                {[source ? formatSource(source) : null, serviceName]
+                  .filter(Boolean)
+                  .join(" · ")}
+              </span>
+            )}
+          </div>
           <AnimatePresence mode="popLayout" initial={false}>
             {isLoading ? (
               <motion.div
@@ -436,6 +578,19 @@ function SingleToolCard({
                   Running
                 </div>
               </motion.div>
+            ) : isError ? (
+              <motion.div
+                initial={{ opacity: 0, scale: 0.9, filter: "blur(2px)" }}
+                animate={{ opacity: 1, scale: 1, filter: "blur(0px)" }}
+                exit={{ opacity: 0, scale: 0.9, filter: "blur(2px)" }}
+                transition={{ duration: 0.15 }}
+                key="error"
+              >
+                <div className="inline-flex items-center rounded-full border border-red-200 bg-red-50 px-1.5 py-0.5 text-xs text-red-700 dark:border-red-800 dark:bg-red-950/30 dark:text-red-400">
+                  <HugeiconsIcon icon={AlertCircleIcon} size={12} className="mr-1 h-3 w-3" />
+                  Failed
+                </div>
+              </motion.div>
             ) : (
               <motion.div
                 initial={{ opacity: 0, scale: 0.9, filter: "blur(2px)" }}
@@ -500,6 +655,64 @@ function SingleToolCard({
                 </div>
               )}
 
+              {(source ||
+                serviceName ||
+                typeof estimatedCostPer1k === "number" ||
+                typeof readOnly === "boolean" ||
+                typeof destructive === "boolean" ||
+                typeof idempotent === "boolean" ||
+                typeof openWorld === "boolean") && (
+                <div>
+                  <div className="text-muted-foreground mb-1 text-xs font-medium">
+                    Tool info
+                  </div>
+                  <div className="bg-background space-y-1 rounded border p-2 text-sm">
+                    {source && (
+                      <div>
+                        <span className="text-muted-foreground font-medium">Source:</span>{" "}
+                        {formatSource(source)}
+                      </div>
+                    )}
+                    {serviceName && (
+                      <div>
+                        <span className="text-muted-foreground font-medium">Service:</span>{" "}
+                        {serviceName}
+                      </div>
+                    )}
+                    {typeof estimatedCostPer1k === "number" && (
+                      <div>
+                        <span className="text-muted-foreground font-medium">Estimated cost:</span>{" "}
+                        ${estimatedCostPer1k.toFixed(2)} / 1k calls
+                      </div>
+                    )}
+                    {typeof readOnly === "boolean" && (
+                      <div>
+                        <span className="text-muted-foreground font-medium">Read-only:</span>{" "}
+                        {readOnly ? "Yes" : "No"}
+                      </div>
+                    )}
+                    {typeof destructive === "boolean" && (
+                      <div>
+                        <span className="text-muted-foreground font-medium">Destructive:</span>{" "}
+                        {destructive ? "Yes" : "No"}
+                      </div>
+                    )}
+                    {typeof idempotent === "boolean" && (
+                      <div>
+                        <span className="text-muted-foreground font-medium">Idempotent:</span>{" "}
+                        {idempotent ? "Yes" : "No"}
+                      </div>
+                    )}
+                    {typeof openWorld === "boolean" && (
+                      <div>
+                        <span className="text-muted-foreground font-medium">Open-world:</span>{" "}
+                        {openWorld ? "Yes" : "No"}
+                      </div>
+                    )}
+                  </div>
+                </div>
+              )}
+
               {/* Tool call ID */}
               <div className="text-muted-foreground flex items-center justify-between text-xs">
                 <div className="flex items-center">
diff --git a/app/components/chat/use-model.ts b/app/components/chat/use-model.ts
index caec8747..8792ec52 100644
--- a/app/components/chat/use-model.ts
+++ b/app/components/chat/use-model.ts
@@ -28,16 +28,20 @@ export function useModel({
   chatId,
 }: UseModelProps) {
   // Get favorite models and last-used model from ModelProvider
-  const { favoriteModels, lastUsedModel, setLastUsedModel } =
+  const { favoriteModels, lastUsedModel, modelPrefsHydrated, setLastUsedModel } =
     useModelProvider()
 
   // Calculate the effective model based on priority: chat model > last used > first favorite > default
   const getEffectiveModel = useCallback(() => {
-    const firstFavoriteModel = favoriteModels[0]
+    const hydratedLastUsedModel = modelPrefsHydrated ? lastUsedModel : null
+    const firstFavoriteModel = modelPrefsHydrated ? favoriteModels[0] : null
     return (
-      currentChat?.model || lastUsedModel || firstFavoriteModel || MODEL_DEFAULT
+      currentChat?.model ||
+      hydratedLastUsedModel ||
+      firstFavoriteModel ||
+      MODEL_DEFAULT
     )
-  }, [currentChat?.model, lastUsedModel, favoriteModels])
+  }, [currentChat?.model, favoriteModels, lastUsedModel, modelPrefsHydrated])
 
   // Use local state only for temporary overrides, derive base value from props
   const [localSelectedModel, setLocalSelectedModel] = useState<string | null>(
diff --git a/app/globals.css b/app/globals.css
index 17d258a4..ead80a14 100644
--- a/app/globals.css
+++ b/app/globals.css
@@ -157,6 +157,100 @@
       box-shadow: 0 0 8px 1px color-mix(in oklab, var(--foreground) 45%, transparent);
     }
   }
+
+  @keyframes spinner-fade {
+    0% {
+      opacity: 1;
+    }
+    100% {
+      opacity: 0;
+    }
+  }
+
+  @keyframes thin-pulse {
+    0%,
+    100% {
+      transform: scale(0.8);
+      opacity: 0.5;
+    }
+    50% {
+      transform: scale(1);
+      opacity: 1;
+    }
+  }
+
+  @keyframes pulse-dot {
+    0%,
+    100% {
+      transform: scale(0.8);
+      opacity: 0.5;
+    }
+    50% {
+      transform: scale(1.3);
+      opacity: 1;
+    }
+  }
+
+  @keyframes bounce-dots {
+    0%,
+    80%,
+    100% {
+      transform: scale(0);
+    }
+    40% {
+      transform: scale(1);
+    }
+  }
+
+  @keyframes typing {
+    0%,
+    100% {
+      opacity: 0.2;
+    }
+    50% {
+      opacity: 1;
+    }
+  }
+
+  @keyframes wave {
+    0%,
+    100% {
+      transform: scaleY(0.5);
+    }
+    50% {
+      transform: scaleY(1.2);
+    }
+  }
+
+  @keyframes wave-bars {
+    0%,
+    100% {
+      transform: scaleY(0.4);
+    }
+    50% {
+      transform: scaleY(1);
+    }
+  }
+
+  @keyframes blink {
+    0%,
+    100% {
+      opacity: 1;
+    }
+    50% {
+      opacity: 0;
+    }
+  }
+
+  @keyframes text-blink {
+    0%,
+    100% {
+      opacity: 1;
+    }
+    50% {
+      opacity: 0.3;
+    }
+  }
 }
 
 @theme inline {
diff --git a/app/test/thinking-states/page.tsx b/app/test/thinking-states/page.tsx
index 5701a0cc..a9baa64e 100644
--- a/app/test/thinking-states/page.tsx
+++ b/app/test/thinking-states/page.tsx
@@ -32,7 +32,7 @@ import {
   Copy01Icon,
 } from "@hugeicons-pro/core-stroke-rounded"
 import type { SourceUrlUIPart, ToolUIPart } from "ai"
-import { useCallback, useState, useEffect } from "react"
+import { useCallback, useState, useEffect, useId } from "react"
 
 // ─── Constants ───────────────────────────────────────────────────────────────
 
@@ -286,20 +286,51 @@ function StateAnnotation({
   )
 }
 
+function ArticleWrapper({
+  children,
+  role,
+}: {
+  children: React.ReactNode
+  role: "user" | "assistant"
+}) {
+  return (
+    <article
+      className={cn(
+        "text-base mx-auto w-full [--thread-content-margin:1rem] @sm/main:[--thread-content-margin:1.5rem] @lg/main:[--thread-content-margin:4rem] px-[var(--thread-content-margin,1rem)]",
+        role === "user" && "pt-3 scroll-mt-[var(--spacing-app-header)]",
+        role === "assistant" && "pb-10 scroll-mt-[calc(var(--spacing-app-header)+min(200px,max(70px,20svh)))]"
+      )}
+      data-turn={role}
+    >
+      <div className="group/turn-messages relative mx-auto flex w-full min-w-0 [--thread-content-max-width:40rem] @[64rem]/main:[--thread-content-max-width:48rem] max-w-[var(--thread-content-max-width,40rem)] flex-1 flex-col">
+        {children}
+      </div>
+    </article>
+  )
+}
+
 function UserBubble({ children }: { children: string }) {
   const isMultiline = children.includes("\n")
   return (
-    <Message className="group/message flex w-full max-w-[var(--thread-content-max-width,40rem)] flex-col items-end gap-0.5 px-[var(--thread-content-margin,1rem)] pb-2">
-      <MessageContent
-        className={cn(
-          "bg-accent prose relative max-w-[var(--user-chat-width,70%)] rounded-[18px] px-4",
-          isMultiline ? "py-3" : "py-1.5"
-        )}
-        markdown={false}
+    <ArticleWrapper role="user">
+      <Message
+        className="flex w-full flex-col items-end gap-0.5"
+        data-turn="user"
+        data-scroll-anchor="false"
+        tabIndex={-1}
       >
-        {children}
-      </MessageContent>
-    </Message>
+        <h5 className="sr-only">You said:</h5>
+        <MessageContent
+          className={cn(
+            "bg-accent prose relative max-w-[var(--user-chat-width,70%)] rounded-[18px] px-4",
+            isMultiline ? "py-3" : "py-1.5"
+          )}
+          markdown={false}
+        >
+          {children}
+        </MessageContent>
+      </Message>
+    </ArticleWrapper>
   )
 }
 
@@ -310,26 +341,54 @@ function AssistantShell({
   children: React.ReactNode
   isLast?: boolean
 }) {
+  const msgId = useId()
   return (
-    <Message className="group/message flex w-full max-w-[var(--thread-content-max-width,40rem)] flex-1 items-start gap-4 px-[var(--thread-content-margin,1rem)] pb-2">
-      <div
-        className={cn(
-          "relative flex min-w-full flex-col gap-2",
-          isLast && "pb-8"
-        )}
+    <ArticleWrapper role="assistant">
+      <Message
+        className="flex w-full flex-1 items-start gap-4"
+        data-turn="assistant"
+        data-message-id={msgId}
+        data-scroll-anchor={isLast ? "true" : "false"}
+        tabIndex={-1}
       >
-        {children}
-      </div>
-    </Message>
+        <h6 className="sr-only">Assistant said:</h6>
+        <div
+          className={cn(
+            "relative flex min-w-full flex-col gap-2",
+            isLast && "pb-8"
+          )}
+        >
+          {children}
+        </div>
+      </Message>
+    </ArticleWrapper>
   )
 }
 
 function CopyRegenActions() {
   return (
-    <MessageActions className="-ml-2 flex gap-0">
+    <MessageActions
+      className={cn(
+        "-ml-2 flex gap-0",
+        "pointer-events-none",
+        "[mask-image:linear-gradient(to_right,black_33%,transparent_66%)]",
+        "[mask-size:300%_100%]",
+        "[mask-position:100%_0%]",
+        "motion-safe:transition-[mask-position]",
+        "duration-[1.5s]",
+        "group-hover/turn-messages:pointer-events-auto",
+        "group-hover/turn-messages:[mask-position:0_0]",
+        "group-focus-within/turn-messages:pointer-events-auto",
+        "group-focus-within/turn-messages:[mask-position:0_0]",
+        "has-[[data-state=open]]:pointer-events-auto",
+        "has-[[data-state=open]]:[mask-position:0_0]",
+        "pointer-coarse:pointer-events-auto",
+        "pointer-coarse:[mask-image:none]"
+      )}
+    >
       <MessageAction tooltip="Copy text" side="bottom">
         <button
-          className="hover:bg-accent/60 text-muted-foreground hover:text-foreground flex h-8 w-8 items-center justify-center rounded-lg bg-transparent transition"
+          className="hover:bg-accent/60 text-muted-foreground hover:text-foreground flex h-8 w-8 items-center justify-center rounded-lg bg-transparent transition pointer-coarse:h-10 pointer-coarse:w-10"
           aria-label="Copy text"
           type="button"
         >
@@ -338,7 +397,7 @@ function CopyRegenActions() {
       </MessageAction>
       <MessageAction tooltip="Regenerate" side="bottom" delay={0}>
         <button
-          className="hover:bg-accent/60 text-muted-foreground hover:text-foreground flex h-8 w-8 items-center justify-center rounded-lg bg-transparent transition"
+          className="hover:bg-accent/60 text-muted-foreground hover:text-foreground flex h-8 w-8 items-center justify-center rounded-lg bg-transparent transition pointer-coarse:h-10 pointer-coarse:w-10"
           aria-label="Regenerate"
           type="button"
         >
@@ -363,9 +422,9 @@ export default function ThinkingStatesTestPage() {
   return (
     <MessagesProvider>
     <LayoutApp>
-      <div className="@container/main relative flex h-full flex-col items-center justify-end md:justify-center">
+      <div className="relative flex min-h-0 flex-1 flex-col items-center">
         {/* ━━━ Conversation ━━━ */}
-        <ScrollRootContent className="flex w-full flex-1 flex-col items-center pt-4 pb-4">
+        <ScrollRootContent className="relative flex w-full flex-1 flex-col items-center pt-4 [--composer-overlap-px:28px] [--thread-bottom-offset:calc(var(--spacing-input-area)+2rem+env(safe-area-inset-bottom,0px))] pb-[var(--thread-bottom-offset)] -mb-[var(--composer-overlap-px)]">
               {/* ─── User message ─── */}
               <UserBubble>This is a test chat thread</UserBubble>
 
@@ -483,7 +542,7 @@ export default function ThinkingStatesTestPage() {
               <AssistantShell>
                 <Loader
                   variant="text-shimmer"
-                  text="Generating"
+                  text="Thinking"
                   showCaret
                   streamingIndicatorVariant={STREAMING_INDICATOR_VARIANT}
                 />
@@ -803,31 +862,42 @@ export default function ThinkingStatesTestPage() {
                 <CopyRegenActions />
               </AssistantShell>
         </ScrollRootContent>
-        <div className="pointer-events-none sticky inset-x-0 bottom-20 z-10 flex items-center justify-center">
-          <div className="pointer-events-auto">
-            <ScrollButton />
-          </div>
-        </div>
 
         {/* ━━━ Composer ━━━ */}
-        <div className="bg-background sticky bottom-0 z-10 mx-auto w-full max-w-[var(--thread-content-max-width,40rem)] px-[var(--thread-content-margin,1rem)]">
-          <ChatInput
-            defaultValue=""
-            onValueChange={noopStr}
-            onSend={noop}
-            isSubmitting={false}
-            files={[]}
-            onFileUpload={noopFiles}
-            onFileRemove={noopFile}
-            onSuggestion={noopStr}
-            hasSuggestions={false}
-            selectedModel="gpt-5.2"
-            isUserAuthenticated={true}
-            stop={noop}
-            status="ready"
-            setEnableSearch={noopBool}
-            enableSearch={false}
-          />
+        <div className="group/thread-bottom-container content-fade relative isolate z-10 flex w-full basis-auto flex-col [--thread-content-margin:1rem] @sm/main:[--thread-content-margin:1.5rem] @lg/main:[--thread-content-margin:4rem] px-[var(--thread-content-margin,1rem)] pb-[env(safe-area-inset-bottom,0px)] sticky bottom-0">
+          <div className="relative h-0">
+            <div className="pointer-events-none absolute inset-x-0 bottom-[calc(100%+1.5rem)] z-30 flex justify-center">
+              <div className="pointer-events-auto">
+                <ScrollButton />
+              </div>
+            </div>
+          </div>
+          <div className="mx-auto w-full [--thread-content-max-width:40rem] @[64rem]/main:[--thread-content-max-width:48rem] max-w-[var(--thread-content-max-width,40rem)]">
+            <ChatInput
+              defaultValue=""
+              onValueChange={noopStr}
+              onSend={noop}
+              isSubmitting={false}
+              files={[]}
+              onFileUpload={noopFiles}
+              onFileRemove={noopFile}
+              onSuggestion={noopStr}
+              hasSuggestions={false}
+              selectedModel="gpt-5.2"
+              isUserAuthenticated={true}
+              stop={noop}
+              status="ready"
+              setEnableSearch={noopBool}
+              enableSearch={false}
+            />
+          </div>
+          <div className="-mt-4 text-muted-foreground relative w-full overflow-hidden text-center text-xs md:px-[60px]">
+            <div className="flex min-h-8 w-full items-center justify-center p-2 select-none">
+              <div className="pointer-events-auto">
+                <div>Not A Wrapper can make mistakes. Check important info.</div>
+              </div>
+            </div>
+          </div>
         </div>
       </div>
     </LayoutApp>
diff --git a/components/ui/reasoning.tsx b/components/ui/reasoning.tsx
index cf476969..03a21172 100644
--- a/components/ui/reasoning.tsx
+++ b/components/ui/reasoning.tsx
@@ -188,7 +188,7 @@ function ReasoningLabel({ className }: ReasoningLabelProps) {
       <span className="text-muted-foreground font-medium">
         {durationSeconds !== undefined
           ? `Thought for ${formatDuration(durationSeconds)}`
-          : "Reasoned"}
+          : "Thoughts"}
       </span>
     )
 
diff --git a/components/ui/thinking-bar.tsx b/components/ui/thinking-bar.tsx
index e4097a45..159ed43d 100644
--- a/components/ui/thinking-bar.tsx
+++ b/components/ui/thinking-bar.tsx
@@ -44,7 +44,7 @@ export function ThinkingBar({
         <button
           type="button"
           onClick={onClick}
-          className="flex items-center gap-1 text-sm transition-opacity hover:opacity-80"
+          className="flex items-center gap-1 text-base transition-opacity hover:opacity-80"
         >
           <TextShimmer className="font-medium">{text}</TextShimmer>
           <HugeiconsIcon icon={ArrowRight01Icon} size={16} className="text-muted-foreground" />
diff --git a/convex/_generated/api.d.ts b/convex/_generated/api.d.ts
index f2c87243..74807d7f 100644
--- a/convex/_generated/api.d.ts
+++ b/convex/_generated/api.d.ts
@@ -20,6 +20,7 @@ import type * as shippingAddressDefaulting from "../shippingAddressDefaulting.js
 import type * as shippingAddressPatch from "../shippingAddressPatch.js";
 import type * as shippingAddresses from "../shippingAddresses.js";
 import type * as toolCallLog from "../toolCallLog.js";
+import type * as toolLimits from "../toolLimits.js";
 import type * as usage from "../usage.js";
 import type * as userKeys from "../userKeys.js";
 import type * as userPreferences from "../userPreferences.js";
@@ -44,6 +45,7 @@ declare const fullApi: ApiFromModules<{
   shippingAddressPatch: typeof shippingAddressPatch;
   shippingAddresses: typeof shippingAddresses;
   toolCallLog: typeof toolCallLog;
+  toolLimits: typeof toolLimits;
   usage: typeof usage;
   userKeys: typeof userKeys;
   userPreferences: typeof userPreferences;
diff --git a/convex/schema.ts b/convex/schema.ts
index a44f4014..e513e482 100644
--- a/convex/schema.ts
+++ b/convex/schema.ts
@@ -137,6 +137,28 @@ export default defineSchema({
     dailyReset: v.number(), // Unix timestamp (start of day)
   }).index("by_anonymous_id", ["anonymousId"]),
 
+  // Persistent tool limit buckets for sliding-window enforcement.
+  // Shared by:
+  // - extract_content per-domain abuse control
+  // - centralized per-tool budgets (platform/BYOK policies)
+  toolLimitBuckets: defineTable({
+    actorKey: v.string(), // "user:<clerkId>" or "guest:<anonymousId>"
+    limitType: v.union(v.literal("domain"), v.literal("budget")),
+    toolName: v.string(),
+    scopeKey: v.string(), // domain for domain limits, "*" for per-tool budgets
+    keyMode: v.union(v.literal("platform"), v.literal("byok")),
+    bucketStartMs: v.number(),
+    count: v.number(),
+    updatedAt: v.number(),
+  }).index("by_actor_limit_scope_bucket", [
+    "actorKey",
+    "limitType",
+    "toolName",
+    "scopeKey",
+    "keyMode",
+    "bucketStartMs",
+  ]),
+
   // ============================================================================
   // MCP (Model Context Protocol) Integration
   // ============================================================================
@@ -194,7 +216,8 @@ export default defineSchema({
     source: v.union(
       v.literal("builtin"),
       v.literal("third-party"),
-      v.literal("mcp")
+      v.literal("mcp"),
+      v.literal("platform")
     ),
     // Service name for display and filtering (e.g., "OpenAI", "Exa", "my-mcp-server")
     serviceName: v.optional(v.string()),
@@ -213,6 +236,18 @@ export default defineSchema({
     // Original result size in bytes before truncation.
     // Helps identify tools that consistently return large results.
     resultSizeBytes: v.optional(v.number()),
+
+    // Request-level correlation ID (crypto.randomUUID() from the chat route).
+    // Enables joining tool calls, PostHog events, and console logs for a single request.
+    requestId: v.optional(v.string()),
+    // Policy denial enrichment (optional). Present when a wrapper denies a call
+    // due to budget controls before execution.
+    errorCode: v.optional(v.string()),
+    retryAfterSeconds: v.optional(v.number()),
+    budgetKeyMode: v.optional(
+      v.union(v.literal("platform"), v.literal("byok"))
+    ),
+    budgetDenied: v.optional(v.boolean()),
   })
     .index("by_user", ["userId"])
     .index("by_chat", ["chatId"])
diff --git a/convex/toolCallLog.ts b/convex/toolCallLog.ts
index 91c4d55c..baf0bba7 100644
--- a/convex/toolCallLog.ts
+++ b/convex/toolCallLog.ts
@@ -49,7 +49,8 @@ export const log = mutation({
     source: v.union(
       v.literal("builtin"),
       v.literal("third-party"),
-      v.literal("mcp")
+      v.literal("mcp"),
+      v.literal("platform")
     ),
     serviceName: v.optional(v.string()),
     // Phase C: Observability enrichment
@@ -57,6 +58,13 @@ export const log = mutation({
     inputTokens: v.optional(v.number()),
     outputTokens: v.optional(v.number()),
     resultSizeBytes: v.optional(v.number()),
+    requestId: v.optional(v.string()),
+    errorCode: v.optional(v.string()),
+    retryAfterSeconds: v.optional(v.number()),
+    budgetKeyMode: v.optional(
+      v.union(v.literal("platform"), v.literal("byok"))
+    ),
+    budgetDenied: v.optional(v.boolean()),
   },
   handler: async (ctx, args) => {
     const identity = await ctx.auth.getUserIdentity()
@@ -95,6 +103,11 @@ export const log = mutation({
       inputTokens: args.inputTokens,
       outputTokens: args.outputTokens,
       resultSizeBytes: args.resultSizeBytes,
+      requestId: args.requestId,
+      errorCode: args.errorCode,
+      retryAfterSeconds: args.retryAfterSeconds,
+      budgetKeyMode: args.budgetKeyMode,
+      budgetDenied: args.budgetDenied,
       createdAt: Date.now(),
     })
   },
diff --git a/convex/toolLimits.ts b/convex/toolLimits.ts
new file mode 100644
index 00000000..5a5a6dc0
--- /dev/null
+++ b/convex/toolLimits.ts
@@ -0,0 +1,203 @@
+import { v } from "convex/values"
+import { mutation } from "./_generated/server"
+
+const MAX_SCOPE_ITEMS = 25
+
+function sanitizeScopeCount(count: number): number {
+  if (!Number.isFinite(count)) return 1
+  const normalized = Math.trunc(count)
+  return normalized > 0 ? normalized : 1
+}
+
+function toRetryAfterSeconds(retryAfterMs: number): number {
+  return Math.max(1, Math.ceil(retryAfterMs / 1000))
+}
+
+function formatDomainLimitCode(toolName: string): `${string}_DOMAIN_LIMIT_EXCEEDED` {
+  const normalizedToolName = toolName
+    .trim()
+    .replace(/[^a-zA-Z0-9]+/g, "_")
+    .replace(/^_+|_+$/g, "")
+    .toUpperCase()
+
+  return `${normalizedToolName || "TOOL"}_DOMAIN_LIMIT_EXCEEDED`
+}
+
+export const checkAndConsume = mutation({
+  args: {
+    limitType: v.union(v.literal("domain"), v.literal("budget")),
+    toolName: v.string(),
+    keyMode: v.union(v.literal("platform"), v.literal("byok")),
+    scopeCounts: v.array(
+      v.object({
+        scopeKey: v.string(),
+        count: v.number(),
+      })
+    ),
+    windowMs: v.number(),
+    maxCount: v.number(),
+    bucketSizeMs: v.number(),
+    anonymousId: v.optional(v.string()),
+    consume: v.optional(v.boolean()),
+  },
+  handler: async (ctx, args) => {
+    const {
+      limitType,
+      toolName,
+      keyMode,
+      scopeCounts,
+      windowMs,
+      maxCount,
+      bucketSizeMs,
+      anonymousId,
+      consume = true,
+    } = args
+
+    if (scopeCounts.length === 0) {
+      return { allowed: true, remaining: maxCount }
+    }
+
+    if (scopeCounts.length > MAX_SCOPE_ITEMS) {
+      throw new Error(`Too many scopes (${scopeCounts.length}); max ${MAX_SCOPE_ITEMS}`)
+    }
+
+    if (windowMs <= 0 || maxCount <= 0 || bucketSizeMs <= 0) {
+      throw new Error("Invalid limit configuration")
+    }
+
+    const identity = await ctx.auth.getUserIdentity()
+    let actorKey: string
+    if (identity) {
+      actorKey = `user:${identity.subject}`
+    } else {
+      if (!anonymousId) {
+        throw new Error("Anonymous ID required for unauthenticated tool limits")
+      }
+      actorKey = `guest:${anonymousId}`
+    }
+
+    const now = Date.now()
+    const currentBucketStartMs =
+      Math.floor(now / bucketSizeMs) * bucketSizeMs
+    const windowStartMs = now - windowMs + 1
+    const firstBucketStartMs =
+      Math.floor(windowStartMs / bucketSizeMs) * bucketSizeMs
+
+    const normalizedScopes = scopeCounts.map((scope) => ({
+      scopeKey: scope.scopeKey,
+      count: sanitizeScopeCount(scope.count),
+    }))
+
+    const scopedTotals: Array<{
+      scopeKey: string
+      total: number
+      projected: number
+      count: number
+      oldestBucketStartMs?: number
+    }> = []
+
+    for (const scope of normalizedScopes) {
+      const buckets = await ctx.db
+        .query("toolLimitBuckets")
+        .withIndex("by_actor_limit_scope_bucket", (q) =>
+          q
+            .eq("actorKey", actorKey)
+            .eq("limitType", limitType)
+            .eq("toolName", toolName)
+            .eq("scopeKey", scope.scopeKey)
+            .eq("keyMode", keyMode)
+            .gte("bucketStartMs", firstBucketStartMs)
+            .lte("bucketStartMs", currentBucketStartMs)
+        )
+        .collect()
+
+      const total = buckets.reduce((sum, bucket) => sum + bucket.count, 0)
+      const projected = total + scope.count
+      const oldestBucketStartMs = buckets
+        .filter((bucket) => bucket.count > 0)
+        .sort((a, b) => a.bucketStartMs - b.bucketStartMs)[0]?.bucketStartMs
+
+      scopedTotals.push({
+        scopeKey: scope.scopeKey,
+        total,
+        projected,
+        count: scope.count,
+        oldestBucketStartMs,
+      })
+    }
+
+    const denied = scopedTotals.find((scope) => scope.projected > maxCount)
+    if (denied) {
+      const retryAfterMs = denied.oldestBucketStartMs
+        ? Math.max(
+            1_000,
+            denied.oldestBucketStartMs + bucketSizeMs + windowMs - now
+          )
+        : windowMs
+
+      if (limitType === "domain") {
+        return {
+          allowed: false,
+          code: formatDomainLimitCode(toolName),
+          message:
+            `Too many "${toolName}" requests for domain "${denied.scopeKey}" in the active window.`,
+          retryAfterSeconds: toRetryAfterSeconds(retryAfterMs),
+          scopeKey: denied.scopeKey,
+          remaining: Math.max(0, maxCount - denied.total),
+        }
+      }
+
+      return {
+        allowed: false,
+        code: "TOOL_BUDGET_EXCEEDED",
+        message:
+          `Tool budget exceeded for "${toolName}" (${keyMode} key mode) in the active window.`,
+        retryAfterSeconds: toRetryAfterSeconds(retryAfterMs),
+        scopeKey: denied.scopeKey,
+        remaining: Math.max(0, maxCount - denied.total),
+      }
+    }
+
+    if (consume) {
+      for (const scope of scopedTotals) {
+        const existing = await ctx.db
+          .query("toolLimitBuckets")
+          .withIndex("by_actor_limit_scope_bucket", (q) =>
+            q
+              .eq("actorKey", actorKey)
+              .eq("limitType", limitType)
+              .eq("toolName", toolName)
+              .eq("scopeKey", scope.scopeKey)
+              .eq("keyMode", keyMode)
+              .eq("bucketStartMs", currentBucketStartMs)
+          )
+          .unique()
+
+        if (existing) {
+          await ctx.db.patch(existing._id, {
+            count: existing.count + scope.count,
+            updatedAt: now,
+          })
+        } else {
+          await ctx.db.insert("toolLimitBuckets", {
+            actorKey,
+            limitType,
+            toolName,
+            scopeKey: scope.scopeKey,
+            keyMode,
+            bucketStartMs: currentBucketStartMs,
+            count: scope.count,
+            updatedAt: now,
+          })
+        }
+      }
+    }
+
+    const remaining = scopedTotals.reduce(
+      (min, scope) => Math.min(min, maxCount - scope.projected),
+      maxCount
+    )
+
+    return { allowed: true, remaining: Math.max(0, remaining) }
+  },
+})
diff --git a/lib/config.ts b/lib/config.ts
index 4f89670f..38c03152 100644
--- a/lib/config.ts
+++ b/lib/config.ts
@@ -184,6 +184,22 @@ export const MCP_CONNECTION_TIMEOUT_MS = 5000
 export const MCP_CIRCUIT_BREAKER_THRESHOLD = 3
 export const MCP_MAX_STEP_COUNT = 20
 export const MCP_MAX_TOOLS_PER_REQUEST = 50
+/**
+ * Comma-separated allowlist for MCP servers whose annotation hints are trusted
+ * for retry safety decisions.
+ *
+ * Values are normalized to lowercase and matched against server id, server
+ * name, slugified server name, and server URL host.
+ *
+ * Example:
+ * MCP_TRUSTED_RETRY_SERVER_ALLOWLIST=server_123,github-mcp,mcp.example.com
+ */
+export const MCP_TRUSTED_RETRY_SERVER_ALLOWLIST = (
+  process.env.MCP_TRUSTED_RETRY_SERVER_ALLOWLIST ?? ""
+)
+  .split(",")
+  .map((entry) => entry.trim().toLowerCase())
+  .filter((entry) => entry.length > 0)
 
 /** Timeout for MCP tool executions (in milliseconds).
  * MCP tools connect to arbitrary user-configured servers that can hang.
@@ -228,6 +244,52 @@ export const PREPARE_STEP_THRESHOLD = 3
  */
 export const TOOL_EXECUTION_TIMEOUT_MS = 15_000
 
+/**
+ * Third-party cache policy (Layer 2 Exa tools).
+ * TTL governs in-process reuse lifetime; max entries bounds memory usage.
+ */
+export const THIRD_PARTY_SEARCH_CACHE_TTL_MS = 15 * 60_000
+export const THIRD_PARTY_SEARCH_CACHE_MAX_ENTRIES = 500
+export const THIRD_PARTY_EXTRACTION_CACHE_TTL_MS = 15 * 60_000
+export const THIRD_PARTY_EXTRACTION_CACHE_MAX_ENTRIES = 500
+
+/**
+ * Freshness window passed to Exa content extraction when supported.
+ * Keeps extraction results reasonably up-to-date while avoiding unnecessary recrawls.
+ */
+export const EXA_CONTENT_FRESHNESS_MAX_AGE_HOURS = 24
+
+/**
+ * Persistent bucket size for tool limit windows (domain + budget).
+ * Smaller buckets improve retry-after precision at the cost of more writes.
+ */
+export const TOOL_LIMIT_BUCKET_SIZE_MS = 60_000
+
+/**
+ * Sliding-window controls for extract_content domain abuse protection.
+ * Applies per actor + domain and is enforced only for uncached URL fetches.
+ */
+export const EXTRACT_CONTENT_DOMAIN_WINDOW_MS = 15 * 60_000
+export const EXTRACT_CONTENT_DOMAIN_MAX_REQUESTS = 6
+
+/**
+ * Centralized per-tool invocation budgets for server-executed layers.
+ * Platform-key budgets are stricter to protect shared infrastructure.
+ */
+export const TOOL_BUDGET_WINDOW_MS = 15 * 60_000
+export const TOOL_BUDGET_LIMITS = {
+  platform: {
+    default: 25,
+    web_search: 25,
+    extract_content: 20,
+  },
+  byok: {
+    default: 80,
+    web_search: 80,
+    extract_content: 60,
+  },
+} as const
+
 // ============================================================================
 // History Replay Compiler
 // ============================================================================
diff --git a/lib/mcp/__tests__/load-tools.test.ts b/lib/mcp/__tests__/load-tools.test.ts
index 2236d4b1..312ef078 100644
--- a/lib/mcp/__tests__/load-tools.test.ts
+++ b/lib/mcp/__tests__/load-tools.test.ts
@@ -8,6 +8,9 @@ import { resetAllCircuits, recordFailure } from "../circuit-breaker"
 const mockCreateMCPClient = vi.fn()
 const mockFetchQuery = vi.fn()
 const mockFetchMutation = vi.fn()
+const { mockTrustedRetryAllowlist } = vi.hoisted(() => ({
+  mockTrustedRetryAllowlist: [] as string[],
+}))
 
 vi.mock("@ai-sdk/mcp", () => ({
   createMCPClient: (...args: unknown[]) => mockCreateMCPClient(...args),
@@ -38,6 +41,7 @@ vi.mock("@/lib/config", () => ({
   MCP_CONNECTION_TIMEOUT_MS: 5000,
   MCP_MAX_TOOLS_PER_REQUEST: 50,
   MCP_CIRCUIT_BREAKER_THRESHOLD: 3,
+  MCP_TRUSTED_RETRY_SERVER_ALLOWLIST: mockTrustedRetryAllowlist,
 }))
 
 // =============================================================================
@@ -124,6 +128,7 @@ describe("loadUserMcpTools", () => {
   beforeEach(() => {
     vi.clearAllMocks()
     resetAllCircuits()
+    mockTrustedRetryAllowlist.length = 0
     // fetchMutation is fire-and-forget with .catch() — must return a promise
     mockFetchMutation.mockResolvedValue(undefined)
     // Suppress console output in tests
@@ -190,11 +195,102 @@ describe("loadUserMcpTools", () => {
 
       // Tool server map should have entries
       const issueInfo = result.toolServerMap.get("github_create_issue")
-      expect(issueInfo).toEqual({
+      expect(issueInfo).toMatchObject({
+        displayName: "create_issue",
+        serverName: "GitHub",
+        serverId: "server_1",
+      })
+    })
+
+    it("maps MCP annotation hints when present", async () => {
+      const server = mockServer({ name: "GitHub" })
+      const client = mockClient({
+        create_issue: {
+          ...mockTool("create_issue"),
+          annotations: {
+            readOnlyHint: false,
+            destructiveHint: true,
+            idempotentHint: false,
+            openWorldHint: true,
+          },
+        },
+      })
+
+      mockFetchQuery
+        .mockResolvedValueOnce([server])
+        .mockResolvedValueOnce([])
+
+      mockCreateMCPClient.mockResolvedValue(client)
+
+      const result = await loadUserMcpTools("test-token")
+      const issueInfo = result.toolServerMap.get("github_create_issue")
+
+      expect(issueInfo).toMatchObject({
         displayName: "create_issue",
         serverName: "GitHub",
         serverId: "server_1",
+        readOnly: false,
+        destructive: true,
+        idempotent: false,
+        openWorld: true,
+        retrySafetyTrusted: false,
+        policyHintsTrusted: false,
+      })
+    })
+
+    it("keeps annotation fields undefined when hints are missing", async () => {
+      const server = mockServer({ name: "GitHub" })
+      const client = mockClient({
+        create_issue: {
+          ...mockTool("create_issue"),
+          annotations: {
+            readOnlyHint: true,
+            destructiveHint: "not-a-boolean",
+            idempotentHint: 1,
+          },
+        },
       })
+
+      mockFetchQuery
+        .mockResolvedValueOnce([server])
+        .mockResolvedValueOnce([])
+
+      mockCreateMCPClient.mockResolvedValue(client)
+
+      const result = await loadUserMcpTools("test-token")
+      const issueInfo = result.toolServerMap.get("github_create_issue")
+
+      expect(issueInfo).toBeDefined()
+      expect(issueInfo?.readOnly).toBe(true)
+      expect(issueInfo?.destructive).toBeUndefined()
+      expect(issueInfo?.idempotent).toBeUndefined()
+      expect(issueInfo?.openWorld).toBeUndefined()
+      expect(issueInfo?.retrySafetyTrusted).toBe(false)
+      expect(issueInfo?.policyHintsTrusted).toBe(false)
+    })
+
+    it("marks retry hints trusted when server matches allowlist", async () => {
+      mockTrustedRetryAllowlist.push("github")
+
+      const server = mockServer({ name: "GitHub" })
+      const client = mockClient({
+        create_issue: {
+          ...mockTool("create_issue"),
+          annotations: { idempotentHint: true },
+        },
+      })
+
+      mockFetchQuery
+        .mockResolvedValueOnce([server])
+        .mockResolvedValueOnce([])
+
+      mockCreateMCPClient.mockResolvedValue(client)
+
+      const result = await loadUserMcpTools("test-token")
+      const issueInfo = result.toolServerMap.get("github_create_issue")
+
+      expect(issueInfo?.retrySafetyTrusted).toBe(true)
+      expect(issueInfo?.policyHintsTrusted).toBe(true)
     })
   })
 
@@ -254,6 +350,54 @@ describe("loadUserMcpTools", () => {
       expect(result.tools).toHaveProperty("server_a_search")
       expect(result.tools).toHaveProperty("server_b_search")
     })
+
+    it("drops colliding namespaced tools when slug normalization collides", async () => {
+      const servers = [
+        mockServer({ _id: "s1", name: "Alpha Beta" }),
+        mockServer({ _id: "s2", name: "Alpha---Beta", url: "https://alpha2.example.com" }),
+      ]
+
+      const client1 = mockClient({ search: mockTool("search") })
+      const client2 = mockClient({ search: mockTool("search") })
+
+      mockFetchQuery
+        .mockResolvedValueOnce(servers)
+        .mockResolvedValueOnce([])
+
+      mockCreateMCPClient
+        .mockResolvedValueOnce(client1)
+        .mockResolvedValueOnce(client2)
+
+      const result = await loadUserMcpTools("test-token")
+
+      expect(result.tools).not.toHaveProperty("alpha_beta_search")
+      expect(result.toolServerMap.has("alpha_beta_search")).toBe(false)
+    })
+
+    it("drops colliding namespaced tools when server slugs collide after truncation", async () => {
+      const base = "a".repeat(30)
+      const servers = [
+        mockServer({ _id: "s1", name: `${base}-x` }),
+        mockServer({ _id: "s2", name: `${base}_y`, url: "https://long2.example.com" }),
+      ]
+
+      const client1 = mockClient({ lookup: mockTool("lookup") })
+      const client2 = mockClient({ lookup: mockTool("lookup") })
+
+      mockFetchQuery
+        .mockResolvedValueOnce(servers)
+        .mockResolvedValueOnce([])
+
+      mockCreateMCPClient
+        .mockResolvedValueOnce(client1)
+        .mockResolvedValueOnce(client2)
+
+      const result = await loadUserMcpTools("test-token")
+      const collidingName = `${base}_lookup`
+
+      expect(result.tools).not.toHaveProperty(collidingName)
+      expect(result.toolServerMap.has(collidingName)).toBe(false)
+    })
   })
 
   // ===========================================================================
diff --git a/lib/mcp/load-tools.ts b/lib/mcp/load-tools.ts
index 75a18846..0ba07c69 100644
--- a/lib/mcp/load-tools.ts
+++ b/lib/mcp/load-tools.ts
@@ -6,6 +6,7 @@ import { decryptKey } from "@/lib/encryption"
 import {
   MCP_CONNECTION_TIMEOUT_MS,
   MCP_MAX_TOOLS_PER_REQUEST,
+  MCP_TRUSTED_RETRY_SERVER_ALLOWLIST,
 } from "@/lib/config"
 import {
   isCircuitOpen,
@@ -32,6 +33,24 @@ export type ServerInfo = {
   serverName: string
   /** Convex document ID of the MCP server */
   serverId: string
+  /** Whether this tool is read-only (from MCP tool annotations) */
+  readOnly?: boolean
+  /** Whether this tool performs destructive actions (from MCP annotations) */
+  destructive?: boolean
+  /** Whether this tool is idempotent (from MCP annotations) */
+  idempotent?: boolean
+  /** Whether this tool operates in an open-world context (from MCP annotations) */
+  openWorld?: boolean
+  /**
+   * Whether MCP annotation hints are trusted for retry safety decisions.
+   * Hints are advisory by default and only become retry-driving when trusted.
+   */
+  retrySafetyTrusted?: boolean
+  /**
+   * Whether MCP annotation hints are trusted for safety-critical risk policy.
+   * Untrusted hints are retained for UI context but ignored by gating policy.
+   */
+  policyHintsTrusted?: boolean
 }
 
 /** Result from loadUserMcpTools — everything the chat route needs */
@@ -51,6 +70,18 @@ export type LoadToolsOptions = {
   timeout?: number
 }
 
+type NamespacedToolOwner = {
+  serverId: string
+  serverName: string
+  displayName: string
+}
+
+type RetryTrustServer = {
+  _id: string
+  name: string
+  url: string
+}
+
 // =============================================================================
 // Helpers
 // =============================================================================
@@ -68,6 +99,44 @@ function isToolDescriptor(value: unknown): boolean {
   )
 }
 
+type ToolAnnotationHints = {
+  // MCP annotations are optional, provider-defined hints.
+  // Treat as advisory unless paired with an explicit trust context.
+  readOnly?: boolean
+  destructive?: boolean
+  idempotent?: boolean
+  openWorld?: boolean
+}
+
+function extractToolAnnotationHints(tool: unknown): ToolAnnotationHints {
+  if (typeof tool !== "object" || tool === null) return {}
+
+  const toolRecord = tool as Record<string, unknown>
+  if (!("annotations" in toolRecord)) return {}
+
+  const annotations = toolRecord.annotations as Record<string, unknown> | undefined
+  if (!annotations) return {}
+
+  return {
+    readOnly:
+      typeof annotations.readOnlyHint === "boolean"
+        ? annotations.readOnlyHint
+        : undefined,
+    destructive:
+      typeof annotations.destructiveHint === "boolean"
+        ? annotations.destructiveHint
+        : undefined,
+    idempotent:
+      typeof annotations.idempotentHint === "boolean"
+        ? annotations.idempotentHint
+        : undefined,
+    openWorld:
+      typeof annotations.openWorldHint === "boolean"
+        ? annotations.openWorldHint
+        : undefined,
+  }
+}
+
 /**
  * Convert a server name to a stable URL-safe slug for tool namespacing.
  *
@@ -88,6 +157,32 @@ export function slugify(name: string): string {
   )
 }
 
+function normalizeRetryTrustToken(value: string): string {
+  return value.trim().toLowerCase()
+}
+
+function getServerHost(url: string): string | undefined {
+  try {
+    return new URL(url).host.toLowerCase()
+  } catch {
+    return undefined
+  }
+}
+
+function isRetrySafetyTrustedServer(server: RetryTrustServer): boolean {
+  if (MCP_TRUSTED_RETRY_SERVER_ALLOWLIST.length === 0) return false
+
+  const allowlist = new Set(MCP_TRUSTED_RETRY_SERVER_ALLOWLIST)
+  const candidates = [
+    normalizeRetryTrustToken(server._id),
+    normalizeRetryTrustToken(server.name),
+    normalizeRetryTrustToken(slugify(server.name)),
+    getServerHost(server.url),
+  ].filter((candidate): candidate is string => Boolean(candidate))
+
+  return candidates.some((candidate) => allowlist.has(candidate))
+}
+
 /**
  * Build auth headers for an MCP server connection.
  * Decrypts the stored auth value using AES-256-GCM (same pattern as BYOK keys).
@@ -278,6 +373,9 @@ export async function loadUserMcpTools(
   // without introducing a runtime wrapper around each tool descriptor.
   const mergedTools: Record<string, unknown> = {}
   const toolServerMap = new Map<string, ServerInfo>()
+  const namespacedOwners = new Map<string, NamespacedToolOwner>()
+  const namespacedCollisionOwners = new Map<string, NamespacedToolOwner[]>()
+  const collidingNames = new Set<string>()
   let toolCount = 0
   let connectionFailures = 0
 
@@ -307,6 +405,7 @@ export async function loadUserMcpTools(
     try {
       const tools = await client.tools()
       const serverSlug = slugify(server.name)
+      const retrySafetyTrusted = isRetrySafetyTrustedServer(server)
 
       // Update successful connection status (best-effort)
       updateConnectionStatus(
@@ -338,14 +437,53 @@ export async function loadUserMcpTools(
           continue
         }
 
-        // 7. Namespace tool name: `${serverSlug}_${toolName}`
+        // 7. Read MCP annotation hints.
+        // These are advisory/untrusted by default and may be absent.
+        // Downstream policy and retry code must fail safe without them.
+        const annotationHints = extractToolAnnotationHints(tool)
+
+        // 8. Namespace tool name: `${serverSlug}_${toolName}`
         const namespacedName = `${serverSlug}_${toolName}`
+        const owner: NamespacedToolOwner = {
+          serverId: server._id,
+          serverName: server.name,
+          displayName: toolName,
+        }
+
+        if (collidingNames.has(namespacedName)) {
+          const existing = namespacedCollisionOwners.get(namespacedName) ?? []
+          namespacedCollisionOwners.set(namespacedName, [...existing, owner])
+          continue
+        }
+
+        if (mergedTools[namespacedName]) {
+          collidingNames.add(namespacedName)
+          const previousOwner = namespacedOwners.get(namespacedName)
+          namespacedCollisionOwners.set(
+            namespacedName,
+            previousOwner ? [previousOwner, owner] : [owner]
+          )
+
+          delete mergedTools[namespacedName]
+          toolServerMap.delete(namespacedName)
+          namespacedOwners.delete(namespacedName)
+          toolCount = Math.max(0, toolCount - 1)
+          continue
+        }
+
         mergedTools[namespacedName] = tool
         toolServerMap.set(namespacedName, {
           displayName: toolName,
           serverName: server.name,
           serverId: server._id,
+          readOnly: annotationHints.readOnly,
+          destructive: annotationHints.destructive,
+          idempotent: annotationHints.idempotent,
+          openWorld: annotationHints.openWorld,
+          retrySafetyTrusted,
+          policyHintsTrusted: retrySafetyTrusted,
         })
+        namespacedOwners.set(namespacedName, owner)
         toolCount++
       }
     } catch (error) {
@@ -360,6 +498,26 @@ export async function loadUserMcpTools(
     }
   }
 
+  for (const [namespacedName, owners] of namespacedCollisionOwners.entries()) {
+    const distinctOwners = owners.filter(
+      (owner, index, all) =>
+        all.findIndex(
+          (candidate) =>
+            candidate.serverId === owner.serverId &&
+            candidate.displayName === owner.displayName
+        ) === index
+    )
+    console.warn(
+      JSON.stringify({
+        _tag: "mcp_tool_name_collision",
+        namespacedName,
+        ownerCount: distinctOwners.length,
+        owners: distinctOwners,
+        action: "drop_all_colliders",
+      })
+    )
+  }
+
   return {
     tools: mergedTools as MCPToolSet,
     clients,
diff --git a/lib/model-store/provider.tsx b/lib/model-store/provider.tsx
index 11ac0255..b3fca8f0 100644
--- a/lib/model-store/provider.tsx
+++ b/lib/model-store/provider.tsx
@@ -2,6 +2,7 @@
 
 import { api } from "@/convex/_generated/api"
 import { fetchClient } from "@/lib/fetch"
+import { getModelInfo } from "@/lib/models"
 import { ModelConfig } from "@/lib/models/types"
 import { useQuery } from "convex/react"
 import {
@@ -39,6 +40,7 @@ type ModelContextType = {
   userKeyStatus: UserKeyStatus
   favoriteModels: string[]
   lastUsedModel: string | null
+  modelPrefsHydrated: boolean
   setLastUsedModel: (model: string) => void
   isLoading: boolean
   refreshModels: () => Promise<void>
@@ -50,26 +52,29 @@ type ModelContextType = {
 
 const ModelContext = createContext<ModelContextType | undefined>(undefined)
 
+function isKnownModelId(modelId: string): boolean {
+  return getModelInfo(modelId) !== undefined
+}
+
+function normalizeFavoriteModels(value: unknown): string[] {
+  if (!Array.isArray(value)) return []
+
+  const knownFavorites = value.filter(
+    (entry): entry is string =>
+      typeof entry === "string" && isKnownModelId(entry)
+  )
+
+  return [...new Set(knownFavorites)]
+}
+
 export function ModelProvider({ children }: { children: React.ReactNode }) {
   const [rawModels, setRawModels] = useState<ModelConfig[]>([])
-  const [favoriteModels, setFavoriteModels] = useState<string[]>(() => {
-    if (typeof window === "undefined") return []
-    try {
-      const cached = localStorage.getItem("cachedFavoriteModels")
-      return cached ? JSON.parse(cached) : []
-    } catch {
-      return []
-    }
-  })
-  const [lastUsedModel, setLastUsedModelState] = useState<string | null>(() => {
-    if (typeof window === "undefined") return null
-    try {
-      return localStorage.getItem("lastUsedModel")
-    } catch {
-      return null
-    }
-  })
+  // Keep first render deterministic between SSR and hydration.
+  // Persisted browser values are applied after mount.
+  const [favoriteModels, setFavoriteModels] = useState<string[]>([])
+  const [lastUsedModel, setLastUsedModelState] = useState<string | null>(null)
   const [isLoading, setIsLoading] = useState(true)
+  const [modelPrefsHydrated, setModelPrefsHydrated] = useState(false)
 
   const setLastUsedModel = useCallback((model: string) => {
     setLastUsedModelState(model)
@@ -191,6 +196,34 @@ export function ModelProvider({ children }: { children: React.ReactNode }) {
     }
   }, [fetchModels, fetchFavoriteModels])
 
+  // Hydrate cached browser-only model preferences after mount.
+  useEffect(() => {
+    try {
+      const cachedFavoriteModels = localStorage.getItem("cachedFavoriteModels")
+      if (cachedFavoriteModels) {
+        const parsed = JSON.parse(cachedFavoriteModels)
+        const normalized = normalizeFavoriteModels(parsed)
+        setFavoriteModels(normalized)
+
+        const normalizedSerialized = JSON.stringify(normalized)
+        if (normalizedSerialized !== cachedFavoriteModels) {
+          localStorage.setItem("cachedFavoriteModels", normalizedSerialized)
+        }
+      }
+    } catch {}
+
+    try {
+      const cachedLastUsedModel = localStorage.getItem("lastUsedModel")
+      if (cachedLastUsedModel && isKnownModelId(cachedLastUsedModel)) {
+        setLastUsedModelState(cachedLastUsedModel)
+      } else if (cachedLastUsedModel) {
+        localStorage.removeItem("lastUsedModel")
+      }
+    } catch {}
+
+    setModelPrefsHydrated(true)
+  }, [])
+
   // Initial data fetch for non-Convex data
   useEffect(() => {
     const initFetch = async () => {
@@ -211,6 +244,7 @@ export function ModelProvider({ children }: { children: React.ReactNode }) {
         userKeyStatus,
         favoriteModels,
         lastUsedModel,
+        modelPrefsHydrated,
         setLastUsedModel,
         isLoading,
         refreshModels,
diff --git a/lib/payclaw/client.ts b/lib/payclaw/client.ts
index ef7b476c..a145f3ce 100644
--- a/lib/payclaw/client.ts
+++ b/lib/payclaw/client.ts
@@ -201,9 +201,14 @@ type CreateJobIndirectBody = {
 
 type CreateJobRequestBody = CreateJobDirectBody | CreateJobIndirectBody
 
+type PayClawRequestOptions = {
+  signal?: AbortSignal
+}
+
 export async function createJob(
   input: PayClawToolInput,
   config: PayClawConfig,
+  options: PayClawRequestOptions = {},
 ): Promise<CreateJobResponse> {
   const paymentMethod = input.paymentMethod ?? (
     config.defaultCardId
@@ -232,6 +237,7 @@ export async function createJob(
     method: 'POST',
     headers: makeHeaders(config),
     body: JSON.stringify(body),
+    signal: options.signal,
   })
 
   if (!res.ok) {
@@ -244,9 +250,11 @@ export async function createJob(
 export async function getJob(
   jobId: string,
   config: PayClawConfig,
+  options: PayClawRequestOptions = {},
 ): Promise<Job> {
   const res = await fetch(`${config.appBaseUrl}/api/v1/jobs/${jobId}`, {
     headers: makeHeaders(config),
+    signal: options.signal,
   })
 
   if (!res.ok) {
@@ -259,9 +267,11 @@ export async function getJob(
 export async function getJobEvents(
   jobId: string,
   config: PayClawConfig,
+  options: PayClawRequestOptions = {},
 ): Promise<JobEvent[]> {
   const res = await fetch(`${config.appBaseUrl}/api/v1/jobs/${jobId}/events`, {
     headers: makeHeaders(config),
+    signal: options.signal,
   })
 
   if (!res.ok) {
diff --git a/lib/payclaw/schemas.ts b/lib/payclaw/schemas.ts
index 2acdf727..f7c0ccfb 100644
--- a/lib/payclaw/schemas.ts
+++ b/lib/payclaw/schemas.ts
@@ -45,7 +45,9 @@ export const payClawToolInputSchema = z.object({
   paymentMethod: paymentMethodSchema.optional().describe(
     'Payment method override. Omit this field — a default payment card is applied automatically.'
   ),
-  browserProvider: browserProviderSchema.optional(),
+  browserProvider: browserProviderSchema.optional().describe(
+    'Optional runtime override for browser automation. Omit unless the user explicitly requests a specific browser provider.'
+  ),
 })
 
 // -- API Response Schemas --------------------------------------
diff --git a/lib/tools/__tests__/cache.test.ts b/lib/tools/__tests__/cache.test.ts
new file mode 100644
index 00000000..2c9e0f5d
--- /dev/null
+++ b/lib/tools/__tests__/cache.test.ts
@@ -0,0 +1,70 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"
+import { LruTtlCache } from "../cache"
+
+describe("LruTtlCache", () => {
+  beforeEach(() => {
+    vi.useFakeTimers()
+    vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"))
+  })
+
+  afterEach(() => {
+    vi.useRealTimers()
+  })
+
+  it("evicts the least recently used entry when capacity is exceeded", () => {
+    const cache = new LruTtlCache<string, string>({
+      ttlMs: 60_000,
+      maxEntries: 2,
+    })
+
+    cache.set("a", "value-a")
+    cache.set("b", "value-b")
+
+    // Refresh "a" so "b" becomes least recently used.
+    expect(cache.get("a")).toBe("value-a")
+
+    cache.set("c", "value-c")
+
+    expect(cache.get("a")).toBe("value-a")
+    expect(cache.get("b")).toBeNull()
+    expect(cache.get("c")).toBe("value-c")
+  })
+
+  it("expires entries by TTL without extending freshness on reads", () => {
+    const cache = new LruTtlCache<string, string>({
+      ttlMs: 1_000,
+      maxEntries: 5,
+    })
+
+    cache.set("key", "value")
+
+    vi.advanceTimersByTime(700)
+    expect(cache.get("key")).toBe("value")
+
+    // Reading refreshed recency, but freshness still uses original fetch time.
+    vi.advanceTimersByTime(400)
+    expect(cache.get("key")).toBeNull()
+  })
+
+  it("prunes stale entries even when they are more recent than fresh ones", () => {
+    const cache = new LruTtlCache<string, string>({
+      ttlMs: 1_000,
+      maxEntries: 2,
+    })
+
+    cache.set("stale", "value-stale")
+    vi.advanceTimersByTime(900)
+    cache.set("fresh", "value-fresh")
+
+    // Refresh recency only. "stale" stays old by fetchedAt and expires soon.
+    expect(cache.get("stale")).toBe("value-stale")
+    vi.advanceTimersByTime(200)
+
+    // Inserting should prune expired "stale" before evicting healthy entries.
+    cache.set("new", "value-new")
+
+    expect(cache.get("fresh")).toBe("value-fresh")
+    expect(cache.get("stale")).toBeNull()
+    expect(cache.get("new")).toBe("value-new")
+  })
+})
diff --git a/lib/tools/__tests__/capability-policy.test.ts b/lib/tools/__tests__/capability-policy.test.ts
new file mode 100644
index 00000000..f087d902
--- /dev/null
+++ b/lib/tools/__tests__/capability-policy.test.ts
@@ -0,0 +1,304 @@
+import { describe, expect, it } from "vitest"
+import {
+  filterMetadataMapByPolicy,
+  filterToolSetByPolicy,
+  getActiveToolsForStep,
+  resolveCapabilityPolicy,
+  type ToolPolicyInput,
+} from "../capability-policy"
+
+function policyFor(tools: ToolPolicyInput[], options?: {
+  isAuthenticated?: boolean
+  modelTools?: boolean | {
+    search?: boolean
+    extract?: boolean
+    code?: boolean
+    mcp?: boolean
+    platform?: boolean
+  }
+  keyMode?: "platform" | "byok"
+}) {
+  return resolveCapabilityPolicy({
+    modelTools: options?.modelTools,
+    isAuthenticated: options?.isAuthenticated ?? true,
+    keyMode: options?.keyMode,
+    tools,
+  })
+}
+
+describe("capability policy matrix", () => {
+  it("blocks anonymous users from risky capability classes", () => {
+    const policy = policyFor(
+      [
+        { toolName: "web_search", source: "third-party", capability: "search", readOnly: true },
+        { toolName: "extract_content", source: "third-party", capability: "extract", readOnly: true },
+        { toolName: "github_create_issue", source: "mcp", capability: "mcp", readOnly: false },
+        { toolName: "pay_purchase", source: "platform", capability: "platform", readOnly: false },
+      ],
+      { isAuthenticated: false, modelTools: true }
+    )
+
+    expect(policy.capabilities.search).toBe(true)
+    expect(policy.capabilities.extract).toBe(true)
+    expect(policy.capabilities.code).toBe(false)
+    expect(policy.capabilities.mcp).toBe(false)
+    expect(policy.capabilities.platform).toBe(false)
+
+    const mcpDecision = policy.toolDecisions.find((d) => d.toolName === "github_create_issue")
+    const platformDecision = policy.toolDecisions.find((d) => d.toolName === "pay_purchase")
+    expect(mcpDecision?.allowInEarlySteps).toBe(false)
+    expect(platformDecision?.allowInEarlySteps).toBe(false)
+  })
+
+  it("does not block safe tools when key mode changes", () => {
+    const tools: ToolPolicyInput[] = [
+      {
+        toolName: "web_search",
+        source: "third-party",
+        capability: "search",
+        readOnly: true,
+        idempotent: true,
+      },
+    ]
+
+    const byok = policyFor(tools, {
+      isAuthenticated: true,
+      modelTools: true,
+      keyMode: "byok",
+    })
+    const platform = policyFor(tools, {
+      isAuthenticated: true,
+      modelTools: true,
+      keyMode: "platform",
+    })
+
+    expect(byok.earlyToolNames).toContain("web_search")
+    expect(byok.lateToolNames).toContain("web_search")
+    expect(platform.earlyToolNames).toContain("web_search")
+    expect(platform.lateToolNames).toContain("web_search")
+    expect(byok.toolDecisions[0]?.earlyReasonCode).toBe("key_mode_byok_allowed")
+    expect(platform.toolDecisions[0]?.earlyReasonCode).toBe("key_mode_platform_allowed")
+  })
+
+  it("fails closed for third-party tools when key mode is unknown", () => {
+    const policy = policyFor(
+      [
+        {
+          toolName: "web_search",
+          source: "third-party",
+          capability: "search",
+          readOnly: true,
+        },
+      ],
+      { isAuthenticated: true, modelTools: true }
+    )
+
+    expect(policy.earlyToolNames).not.toContain("web_search")
+    expect(policy.lateToolNames).not.toContain("web_search")
+    expect(policy.toolDecisions[0]?.earlyReasonCode).toBe(
+      "key_mode_unknown_fail_closed"
+    )
+    expect(policy.toolDecisions[0]?.lateReasonCode).toBe(
+      "key_mode_unknown_fail_closed"
+    )
+  })
+
+  it("honors model capability opt-outs", () => {
+    const policy = policyFor(
+      [
+        { toolName: "web_search", source: "builtin", capability: "search", readOnly: true },
+        { toolName: "extract_content", source: "third-party", capability: "extract", readOnly: true },
+      ],
+      {
+        isAuthenticated: true,
+        modelTools: { search: false, extract: true, code: true, mcp: true, platform: true },
+        keyMode: "platform",
+      }
+    )
+
+    const searchDecision = policy.toolDecisions.find((d) => d.toolName === "web_search")
+    const extractDecision = policy.toolDecisions.find((d) => d.toolName === "extract_content")
+
+    expect(policy.capabilities.search).toBe(false)
+    expect(searchDecision?.allowInEarlySteps).toBe(false)
+    expect(extractDecision?.allowInEarlySteps).toBe(true)
+  })
+
+  it("enforces risk-based late-step restrictions", () => {
+    const policy = policyFor(
+      [
+        { toolName: "web_search", source: "builtin", capability: "search", readOnly: true },
+        { toolName: "pay_status", source: "platform", capability: "platform", readOnly: true },
+        {
+          toolName: "pay_purchase",
+          source: "platform",
+          capability: "platform",
+          readOnly: false,
+          destructive: false,
+        },
+        {
+          toolName: "filesystem_delete",
+          source: "mcp",
+          capability: "mcp",
+          readOnly: false,
+          destructive: true,
+        },
+      ],
+      { isAuthenticated: true, modelTools: true }
+    )
+
+    const afterThreshold = getActiveToolsForStep(policy, 5, 3) ?? []
+    expect(afterThreshold).toContain("web_search")
+    expect(afterThreshold).toContain("pay_status")
+    expect(afterThreshold).not.toContain("pay_purchase")
+    expect(afterThreshold).not.toContain("filesystem_delete")
+  })
+
+  it("treats unknown MCP risk as advisory in early steps and fail-closed in late steps", () => {
+    const policy = policyFor(
+      [
+        {
+          toolName: "github_unknown_tool",
+          source: "mcp",
+          capability: "mcp",
+        },
+      ],
+      { isAuthenticated: true, modelTools: true }
+    )
+
+    const decision = policy.toolDecisions[0]
+    expect(decision.risk).toBe("unknown")
+    expect(decision.allowInEarlySteps).toBe(true)
+    expect(decision.allowInLateSteps).toBe(false)
+    expect(decision.earlyReasonCode).toBe("risk_unknown_early_step_advisory_allow")
+    expect(decision.lateReasonCode).toBe("risk_unknown_fail_closed")
+  })
+
+  it("only trusts MCP risk hints when explicitly marked trusted", () => {
+    const policy = policyFor(
+      [
+        {
+          toolName: "mcp_untrusted_read",
+          source: "mcp",
+          capability: "mcp",
+          readOnly: true,
+          riskHintsTrusted: false,
+        },
+        {
+          toolName: "mcp_trusted_read",
+          source: "mcp",
+          capability: "mcp",
+          readOnly: true,
+          riskHintsTrusted: true,
+        },
+      ],
+      { isAuthenticated: true, modelTools: true }
+    )
+
+    const untrusted = policy.toolDecisions.find(
+      (d) => d.toolName === "mcp_untrusted_read"
+    )
+    const trusted = policy.toolDecisions.find(
+      (d) => d.toolName === "mcp_trusted_read"
+    )
+
+    expect(untrusted?.risk).toBe("unknown")
+    expect(untrusted?.allowInLateSteps).toBe(false)
+    expect(untrusted?.lateReasonCode).toBe("risk_unknown_fail_closed")
+
+    expect(trusted?.risk).toBe("read_only")
+    expect(trusted?.allowInLateSteps).toBe(true)
+    expect(trusted?.lateReasonCode).toBe("risk_read_only_allowed")
+  })
+
+  it("blocks open-world tools in late steps even when read-only", () => {
+    const policy = policyFor(
+      [
+        {
+          toolName: "web_search",
+          source: "builtin",
+          capability: "search",
+          readOnly: true,
+          openWorld: true,
+        },
+        {
+          toolName: "extract_content",
+          source: "third-party",
+          capability: "extract",
+          readOnly: true,
+          openWorld: true,
+        },
+      ],
+      { isAuthenticated: true, modelTools: true, keyMode: "platform" }
+    )
+
+    expect(policy.earlyToolNames).toContain("web_search")
+    expect(policy.earlyToolNames).toContain("extract_content")
+    expect(policy.lateToolNames).not.toContain("web_search")
+    expect(policy.lateToolNames).not.toContain("extract_content")
+
+    const searchDecision = policy.toolDecisions.find((d) => d.toolName === "web_search")
+    expect(searchDecision?.risk).toBe("open_world")
+    expect(searchDecision?.lateReasonCode).toBe("risk_open_world_late_step_block")
+  })
+
+  it("applies centralized policy output to early filtering and late gating", () => {
+    const tools: ToolPolicyInput[] = [
+      {
+        toolName: "web_search",
+        source: "third-party",
+        capability: "search",
+        readOnly: true,
+      },
+      {
+        toolName: "github_create_issue",
+        source: "mcp",
+        capability: "mcp",
+        readOnly: false,
+      },
+      {
+        toolName: "unknown_tool",
+        source: "mcp",
+        capability: "mcp",
+      },
+    ]
+    const policy = policyFor(tools, {
+      isAuthenticated: true,
+      modelTools: true,
+      keyMode: "platform",
+    })
+
+    const allTools = {
+      web_search: { description: "search" },
+      github_create_issue: { description: "issue" },
+      unknown_tool: { description: "unknown" },
+    } as unknown as import("ai").ToolSet
+
+    const filteredTools = filterToolSetByPolicy(allTools, policy)
+    expect(Object.keys(filteredTools)).toEqual([
+      "web_search",
+      "github_create_issue",
+      "unknown_tool",
+    ])
+
+    const metadata = new Map([
+      ["web_search", { source: "third-party" }],
+      ["github_create_issue", { source: "mcp" }],
+      ["unknown_tool", { source: "mcp" }],
+    ])
+    const filteredMetadata = filterMetadataMapByPolicy(metadata, policy)
+    expect(Array.from(filteredMetadata.keys())).toEqual([
+      "web_search",
+      "github_create_issue",
+      "unknown_tool",
+    ])
+
+    // After threshold, late-step tools should come from centralized late allow-list.
+    expect(getActiveToolsForStep(policy, 1, 3)).toEqual([
+      "web_search",
+      "github_create_issue",
+      "unknown_tool",
+    ])
+    expect(getActiveToolsForStep(policy, 5, 3)).toEqual(["web_search"])
+  })
+})
diff --git a/lib/tools/__tests__/errors.test.ts b/lib/tools/__tests__/errors.test.ts
new file mode 100644
index 00000000..b3100f74
--- /dev/null
+++ b/lib/tools/__tests__/errors.test.ts
@@ -0,0 +1,86 @@
+import { describe, expect, it } from "vitest"
+import { ToolPolicyError } from "../policy"
+import { normalizeToolError } from "../errors"
+
+describe("normalizeToolError", () => {
+  it("maps abort/cancel errors", () => {
+    const err = new Error("Request was aborted by caller")
+    err.name = "AbortError"
+    const normalized = normalizeToolError(err)
+
+    expect(normalized.code).toBe("aborted")
+    expect(normalized.retryable).toBe(false)
+  })
+
+  it("maps timeout-like errors", () => {
+    const err = new Error("operation timed out after 30000ms")
+    err.name = "ToolTimeoutError"
+    const normalized = normalizeToolError(err, { toolName: "mcp_read_docs" })
+
+    expect(normalized.code).toBe("timeout")
+    expect(normalized.retryable).toBe(true)
+    expect(normalized.toolName).toBe("mcp_read_docs")
+  })
+
+  it("maps rate limit errors", () => {
+    const normalized = normalizeToolError(new Error("429 rate limit exceeded"))
+    expect(normalized.code).toBe("rate_limit")
+    expect(normalized.retryable).toBe(true)
+  })
+
+  it("maps auth errors", () => {
+    const normalized = normalizeToolError(new Error("401 unauthorized"))
+    expect(normalized.code).toBe("auth")
+    expect(normalized.retryable).toBe(false)
+  })
+
+  it("maps network errors", () => {
+    const normalized = normalizeToolError(
+      new Error("ENOTFOUND api.example.com")
+    )
+    expect(normalized.code).toBe("network")
+    expect(normalized.retryable).toBe(true)
+  })
+
+  it("maps validation/input errors", () => {
+    const normalized = normalizeToolError(
+      new Error("Validation failed: invalid input schema")
+    )
+    expect(normalized.code).toBe("validation_input")
+    expect(normalized.retryable).toBe(false)
+  })
+
+  it("maps policy errors into policy_limit taxonomy", () => {
+    const normalized = normalizeToolError(
+      new ToolPolicyError(
+        "TOOL_BUDGET_EXCEEDED: Retry after approximately 30 seconds.",
+        {
+          code: "TOOL_BUDGET_EXCEEDED",
+          retryAfterSeconds: 30,
+          keyMode: "platform",
+          budgetDenied: true,
+        }
+      )
+    )
+
+    expect(normalized.code).toBe("policy_limit")
+    expect(normalized.retryAfterSeconds).toBe(30)
+    expect(normalized.details?.policyCode).toBe("TOOL_BUDGET_EXCEEDED")
+  })
+
+  it("maps 5xx errors to upstream_failure", () => {
+    const err = new Error("Service unavailable")
+    ;(err as Error & { statusCode?: number }).statusCode = 503
+    const normalized = normalizeToolError(err)
+
+    expect(normalized.code).toBe("upstream_failure")
+    expect(normalized.retryable).toBe(true)
+    expect(normalized.statusCode).toBe(503)
+  })
+
+  it("falls back to unknown", () => {
+    const normalized = normalizeToolError(new Error("something odd happened"))
+    expect(normalized.code).toBe("unknown")
+    expect(normalized.retryable).toBe(false)
+  })
+})
diff --git a/lib/tools/__tests__/mcp-wrapper.test.ts b/lib/tools/__tests__/mcp-wrapper.test.ts
index 297a4aba..c27e766a 100644
--- a/lib/tools/__tests__/mcp-wrapper.test.ts
+++ b/lib/tools/__tests__/mcp-wrapper.test.ts
@@ -3,16 +3,19 @@ import {
   wrapMcpTools,
   ToolTraceCollector,
   ToolTimeoutError,
-  isToolResultEnvelope,
 } from "../mcp-wrapper"
 import type { ToolSet } from "ai"
-import type { ToolResultEnvelope } from "../types"
+import { ToolPolicyError } from "../policy"
+import { MCP_CIRCUIT_BREAKER_THRESHOLD } from "@/lib/config"
 
 // ── Helpers ──────────────────────────────────────────────
 
 /** Create a minimal tool with the given execute function */
 function makeTool(
-  executeFn: (params: unknown, options: { toolCallId: string }) => Promise<unknown>
+  executeFn: (
+    params: unknown,
+    options: { toolCallId: string; abortSignal?: AbortSignal; [k: string]: unknown }
+  ) => Promise<unknown>
 ) {
   return {
     description: "test tool",
@@ -56,17 +59,9 @@ describe("wrapMcpTools", () => {
     const result = await (wrapped.test_tool as { execute: Function }).execute(
       {},
       { toolCallId: "call_1" }
-    ) as ToolResultEnvelope
-
-    // Envelope structure
-    expect(result.ok).toBe(true)
-    expect(result.data).toEqual({ answer: 42 })
-    expect(result.error).toBeNull()
-    expect(result.meta.tool).toBe("Test Tool")
-    expect(result.meta.source).toBe("mcp")
-    expect(result.meta.serverName).toBe("test-server")
-    expect(typeof result.meta.durationMs).toBe("number")
-    expect(result.meta.durationMs).toBeGreaterThanOrEqual(0)
+    )
+
+    expect(result).toEqual({ answer: 42 })
 
     // Trace recorded
     const trace = config.traceCollector.get("call_1")
@@ -98,9 +93,157 @@ describe("wrapMcpTools", () => {
     expect(trace).toBeDefined()
     expect(trace!.success).toBe(false)
     expect(trace!.error).toContain("timed out")
+    expect(trace!.errorCode).toBe("timeout")
     expect(trace!.durationMs).toBeLessThan(500) // Should fail fast
   })
 
+  it("cancels on upstream abortSignal and preserves abort taxonomy", async () => {
+    const config = makeConfig({ timeoutMs: 5000 })
+    const tools = {
+      test_tool: makeTool(
+        async (_params, options) =>
+          new Promise((_, reject) => {
+            const signal = options.abortSignal as AbortSignal | undefined
+            if (!signal) return
+            signal.addEventListener(
+              "abort",
+              () => reject(new Error("tool saw abort")),
+              { once: true }
+            )
+          })
+      ),
+    } as unknown as ToolSet
+
+    const wrapped = wrapMcpTools(tools, config)
+    const controller = new AbortController()
+    const pending = (wrapped.test_tool as { execute: Function }).execute(
+      {},
+      { toolCallId: "call_upstream_abort", abortSignal: controller.signal }
+    )
+    controller.abort("request_cancelled")
+
+    await expect(pending).rejects.toThrow(/cancelled|aborted/i)
+    const trace = config.traceCollector.get("call_upstream_abort")
+    expect(trace?.success).toBe(false)
+    expect(trace?.errorCode).toBe("aborted")
+  })
+
+  it("retries idempotent MCP tool on transient failure when retry hints are trusted", async () => {
+    const execute = vi
+      .fn()
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+      .mockResolvedValueOnce({ ok: true })
+    const traceCollector = new ToolTraceCollector()
+    const wrapped = wrapMcpTools(
+      {
+        test_tool: makeTool(execute),
+      } as unknown as ToolSet,
+      {
+        toolServerMap: new Map([
+          [
+            "test_tool",
+            {
+              displayName: "Test Tool",
+              serverName: "test-server",
+              serverId: "server123",
+              idempotent: true,
+              destructive: false,
+              retrySafetyTrusted: true,
+            },
+          ],
+        ]),
+        traceCollector,
+        timeoutMs: 1000,
+      }
+    )
+
+    const result = await (wrapped.test_tool as { execute: Function }).execute(
+      {},
+      { toolCallId: "call_retry_mcp" }
+    )
+    expect(result).toEqual({ ok: true })
+    expect(execute).toHaveBeenCalledTimes(2)
+    expect(traceCollector.get("call_retry_mcp")?.retryCount).toBe(1)
+  })
+
+  it("does not retry trusted idempotent MCP tool when non-destructive signal is missing", async () => {
+    const execute = vi
+      .fn()
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+      .mockResolvedValueOnce({ ok: true })
+    const traceCollector = new ToolTraceCollector()
+    const wrapped = wrapMcpTools(
+      {
+        test_tool: makeTool(execute),
+      } as unknown as ToolSet,
+      {
+        toolServerMap: new Map([
+          [
+            "test_tool",
+            {
+              displayName: "Test Tool",
+              serverName: "test-server",
+              serverId: "server123",
+              idempotent: true,
+              retrySafetyTrusted: true,
+            },
+          ],
+        ]),
+        traceCollector,
+        timeoutMs: 1000,
+      }
+    )
+
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_retry_trusted_without_non_destructive" }
+      )
+    ).rejects.toThrow(/fetch failed/i)
+    expect(execute).toHaveBeenCalledTimes(1)
+    expect(
+      traceCollector.get("call_retry_trusted_without_non_destructive")?.retryCount
+    ).toBe(0)
+  })
+
+  it("does not retry idempotent MCP tool when retry hints are untrusted", async () => {
+    const execute = vi
+      .fn()
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+      .mockResolvedValueOnce({ ok: true })
+    const traceCollector = new ToolTraceCollector()
+    const wrapped = wrapMcpTools(
+      {
+        test_tool: makeTool(execute),
+      } as unknown as ToolSet,
+      {
+        toolServerMap: new Map([
+          [
+            "test_tool",
+            {
+              displayName: "Test Tool",
+              serverName: "test-server",
+              serverId: "server123",
+              idempotent: true,
+              retrySafetyTrusted: false,
+            },
+          ],
+        ]),
+        traceCollector,
+        timeoutMs: 1000,
+      }
+    )
+
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_retry_untrusted_mcp" }
+      )
+    ).rejects.toThrow(/fetch failed/i)
+    expect(execute).toHaveBeenCalledTimes(1)
+    expect(traceCollector.get("call_retry_untrusted_mcp")?.retryCount).toBe(0)
+  })
+
   it("throws and traces when tool execute() rejects", async () => {
     const config = makeConfig({ timeoutMs: 5000 })
     const tools = {
@@ -122,6 +265,252 @@ describe("wrapMcpTools", () => {
     expect(trace).toBeDefined()
     expect(trace!.success).toBe(false)
     expect(trace!.error).toBe("API rate limited")
+    expect(trace!.errorCode).toBe("rate_limit")
+  })
+
+  it("maps policy errors to taxonomy while preserving budget metadata", async () => {
+    const config = makeConfig({ timeoutMs: 5000 })
+    const tools = {
+      test_tool: makeTool(async () => {
+        throw new ToolPolicyError(
+          "TOOL_BUDGET_EXCEEDED: Tool budget exceeded. Retry after approximately 60 seconds.",
+          {
+            code: "TOOL_BUDGET_EXCEEDED",
+            retryAfterSeconds: 60,
+            keyMode: "platform",
+            budgetDenied: true,
+          }
+        )
+      }),
+    } as unknown as ToolSet
+
+    const wrapped = wrapMcpTools(tools, config)
+
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_policy" }
+      )
+    ).rejects.toThrow("TOOL_BUDGET_EXCEEDED")
+
+    const trace = config.traceCollector.get("call_policy")
+    expect(trace).toBeDefined()
+    expect(trace!.success).toBe(false)
+    expect(trace!.errorCode).toBe("policy_limit")
+    expect(trace!.retryAfterSeconds).toBe(60)
+    expect(trace!.budgetKeyMode).toBe("platform")
+    expect(trace!.budgetDenied).toBe(true)
+  })
+
+  it("opens only after true consecutive transient failures", async () => {
+    const execute = vi
+      .fn()
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+      .mockResolvedValueOnce({ ok: true })
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+
+    const wrapped = wrapMcpTools(
+      { test_tool: makeTool(execute) } as unknown as ToolSet,
+      makeConfig()
+    )
+
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_reset_1" }
+      )
+    ).rejects.toThrow(/fetch failed/i)
+
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_reset_2" }
+      )
+    ).resolves.toEqual({ ok: true })
+
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_reset_3" }
+      )
+    ).rejects.toThrow(/fetch failed/i)
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_reset_4" }
+      )
+    ).rejects.toThrow(/fetch failed/i)
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_reset_5" }
+      )
+    ).rejects.toThrow(/fetch failed/i)
+
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_reset_6" }
+      )
+    ).rejects.toThrow(/circuit open/i)
+
+    expect(execute).toHaveBeenCalledTimes(5)
+  })
+
+  it("resets transient streak when a non-transient failure occurs", async () => {
+    const execute = vi
+      .fn()
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+      .mockRejectedValueOnce(new Error("invalid input: query is required"))
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+
+    const wrapped = wrapMcpTools(
+      { test_tool: makeTool(execute) } as unknown as ToolSet,
+      makeConfig()
+    )
+
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_streak_1" }
+      )
+    ).rejects.toThrow(/fetch failed/i)
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_streak_2" }
+      )
+    ).rejects.toThrow(/invalid input/i)
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_streak_3" }
+      )
+    ).rejects.toThrow(/fetch failed/i)
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_streak_4" }
+      )
+    ).rejects.toThrow(/fetch failed/i)
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_streak_5" }
+      )
+    ).rejects.toThrow(/fetch failed/i)
+    await expect(
+      (wrapped.test_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_streak_6" }
+      )
+    ).rejects.toThrow(/circuit open/i)
+
+    expect(execute).toHaveBeenCalledTimes(5)
+  })
+
+  it("does not count policy or input-validation failures toward circuit opening", async () => {
+    const policyFailure = makeTool(async () => {
+      throw new ToolPolicyError(
+        "TOOL_BUDGET_EXCEEDED: Tool budget exceeded. Retry after approximately 30 seconds.",
+        {
+          code: "TOOL_BUDGET_EXCEEDED",
+          retryAfterSeconds: 30,
+          keyMode: "platform",
+          budgetDenied: true,
+        }
+      )
+    })
+    const validationFailure = makeTool(async () => {
+      throw new Error("invalid input: query is required")
+    })
+
+    const wrapped = wrapMcpTools(
+      {
+        test_tool: policyFailure,
+        validation_tool: validationFailure,
+      } as unknown as ToolSet,
+      {
+        toolServerMap: new Map([
+          [
+            "test_tool",
+            {
+              displayName: "Test Tool",
+              serverName: "test-server",
+              serverId: "server123",
+            },
+          ],
+          [
+            "validation_tool",
+            {
+              displayName: "Validation Tool",
+              serverName: "test-server",
+              serverId: "server123",
+            },
+          ],
+        ]),
+        traceCollector: new ToolTraceCollector(),
+      }
+    )
+
+    for (let i = 0; i < 5; i++) {
+      await expect(
+        (wrapped.test_tool as { execute: Function }).execute(
+          {},
+          { toolCallId: `call_policy_${i}` }
+        )
+      ).rejects.toThrow("TOOL_BUDGET_EXCEEDED")
+      await expect(
+        (wrapped.validation_tool as { execute: Function }).execute(
+          {},
+          { toolCallId: `call_validation_${i}` }
+        )
+      ).rejects.toThrow(/invalid input/i)
+    }
+  })
+
+  it("isolates circuit state for tools without server metadata", async () => {
+    const flakyExecute = vi.fn().mockRejectedValue(new Error("fetch failed ECONNREFUSED"))
+    const healthyExecute = vi.fn().mockResolvedValue({ ok: true })
+
+    const wrapped = wrapMcpTools(
+      {
+        flaky_tool: makeTool(flakyExecute),
+        healthy_tool: makeTool(healthyExecute),
+      } as unknown as ToolSet,
+      {
+        toolServerMap: new Map(),
+        traceCollector: new ToolTraceCollector(),
+      }
+    )
+
+    for (let i = 0; i < MCP_CIRCUIT_BREAKER_THRESHOLD; i++) {
+      await expect(
+        (wrapped.flaky_tool as { execute: Function }).execute(
+          {},
+          { toolCallId: `call_flaky_${i}` }
+        )
+      ).rejects.toThrow(/fetch failed/i)
+    }
+
+    await expect(
+      (wrapped.flaky_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_flaky_circuit_open" }
+      )
+    ).rejects.toThrow(/circuit open/i)
+
+    await expect(
+      (wrapped.healthy_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_healthy_after_flaky_open" }
+      )
+    ).resolves.toEqual({ ok: true })
+    expect(healthyExecute).toHaveBeenCalledTimes(1)
   })
 
   it("passes through tools without execute unchanged", () => {
@@ -146,12 +535,11 @@ describe("wrapMcpTools", () => {
     const result = await (wrapped.test_tool as { execute: Function }).execute(
       {},
       { toolCallId: "call_large" }
-    ) as ToolResultEnvelope
+    )
 
-    expect(result.ok).toBe(true)
-    // The data should be truncated (contains truncation marker)
-    const dataStr = typeof result.data === "string" ? result.data : JSON.stringify(result.data)
-    expect(dataStr.length).toBeLessThan(1000)
+    // Raw string result should be truncated (contains truncation marker)
+    const resultStr = typeof result === "string" ? result : JSON.stringify(result)
+    expect(resultStr.length).toBeLessThan(1000)
 
     // Trace should record original size
     const trace = config.traceCollector.get("call_large")
@@ -212,30 +600,6 @@ describe("ToolTraceCollector", () => {
   })
 })
 
-// ── isToolResultEnvelope ─────────────────────────────────
-
-describe("isToolResultEnvelope", () => {
-  it("returns true for valid envelopes", () => {
-    const envelope: ToolResultEnvelope = {
-      ok: true,
-      data: { result: "hello" },
-      error: null,
-      meta: { tool: "test", source: "mcp", durationMs: 100 },
-    }
-    expect(isToolResultEnvelope(envelope)).toBe(true)
-  })
-
-  it("returns false for non-envelope objects", () => {
-    expect(isToolResultEnvelope({ result: "hello" })).toBe(false)
-    expect(isToolResultEnvelope({ ok: true })).toBe(false)
-    expect(isToolResultEnvelope({ ok: true, data: null })).toBe(false)
-    expect(isToolResultEnvelope(null)).toBe(false)
-    expect(isToolResultEnvelope(undefined)).toBe(false)
-    expect(isToolResultEnvelope("string")).toBe(false)
-    expect(isToolResultEnvelope(42)).toBe(false)
-  })
-})
-
 // ── ToolTimeoutError ─────────────────────────────────────
 
 describe("ToolTimeoutError", () => {
diff --git a/lib/tools/__tests__/naming.test.ts b/lib/tools/__tests__/naming.test.ts
new file mode 100644
index 00000000..5d15c8c8
--- /dev/null
+++ b/lib/tools/__tests__/naming.test.ts
@@ -0,0 +1,153 @@
+import { describe, expect, it } from "vitest"
+import {
+  collectGlobalCollisions,
+  collectInvalidToolNames,
+  enforceToolNamingGovernance,
+  validateToolName,
+  type ToolLayerMap,
+} from "../naming"
+
+function makeToolSet(keys: string[]): import("ai").ToolSet {
+  return Object.fromEntries(keys.map((key) => [key, {}])) as unknown as import("ai").ToolSet
+}
+
+describe("validateToolName", () => {
+  it("accepts valid tool names", () => {
+    expect(validateToolName("web_search")).toEqual({ valid: true })
+    expect(validateToolName("extract-content.v2")).toEqual({ valid: true })
+    expect(validateToolName("A1")).toEqual({ valid: true })
+  })
+
+  it("rejects invalid names", () => {
+    expect(validateToolName("")).toEqual({
+      valid: false,
+      reason: "length must be 1..128",
+    })
+    expect(validateToolName("space invalid")).toEqual({
+      valid: false,
+      reason: "allowed characters are ASCII letters, digits, '_', '-', '.'",
+    })
+    expect(validateToolName("slash/invalid")).toEqual({
+      valid: false,
+      reason: "allowed characters are ASCII letters, digits, '_', '-', '.'",
+    })
+    expect(validateToolName("x".repeat(129))).toEqual({
+      valid: false,
+      reason: "length must be 1..128",
+    })
+  })
+})
+
+describe("collectInvalidToolNames", () => {
+  it("returns invalid names with layer context", () => {
+    const layers: ToolLayerMap = {
+      "built-in": makeToolSet(["web_search"]),
+      "third-party-search": makeToolSet(["invalid name"]),
+      "content-extraction": makeToolSet(["extract_content"]),
+      platform: makeToolSet(["pay/status"]),
+      mcp: makeToolSet(["my.server_tool"]),
+    }
+
+    const invalid = collectInvalidToolNames(layers)
+    expect(invalid).toEqual([
+      {
+        toolKey: "invalid name",
+        layer: "third-party-search",
+        reason: "allowed characters are ASCII letters, digits, '_', '-', '.'",
+      },
+      {
+        toolKey: "pay/status",
+        layer: "platform",
+        reason: "allowed characters are ASCII letters, digits, '_', '-', '.'",
+      },
+    ])
+  })
+})
+
+describe("collectGlobalCollisions", () => {
+  it("detects collisions across all layers and reports the precedence winner", () => {
+    const layers: ToolLayerMap = {
+      "built-in": makeToolSet(["web_search", "shared_builtin_platform"]),
+      "third-party-search": makeToolSet(["web_search", "shared_search_content"]),
+      "content-extraction": makeToolSet(["extract_content", "shared_search_content"]),
+      platform: makeToolSet(["pay_status", "shared_builtin_platform"]),
+      mcp: makeToolSet(["web_search", "pay_status"]),
+    }
+
+    expect(collectGlobalCollisions(layers)).toEqual([
+      {
+        toolKey: "pay_status",
+        owners: ["platform", "mcp"],
+        winner: "mcp",
+      },
+      {
+        toolKey: "shared_builtin_platform",
+        owners: ["built-in", "platform"],
+        winner: "platform",
+      },
+      {
+        toolKey: "shared_search_content",
+        owners: ["third-party-search", "content-extraction"],
+        winner: "content-extraction",
+      },
+      {
+        toolKey: "web_search",
+        owners: ["built-in", "third-party-search", "mcp"],
+        winner: "mcp",
+      },
+    ])
+  })
+
+  it("returns empty array when there are no collisions", () => {
+    const layers: ToolLayerMap = {
+      "built-in": makeToolSet(["provider_search"]),
+      "third-party-search": makeToolSet(["web_search"]),
+      "content-extraction": makeToolSet(["extract_content"]),
+      platform: makeToolSet(["pay_purchase"]),
+      mcp: makeToolSet(["github_list_issues"]),
+    }
+
+    expect(collectGlobalCollisions(layers)).toEqual([])
+  })
+})
+
+describe("enforceToolNamingGovernance", () => {
+  it("removes invalid names and drops only losing colliders", () => {
+    const layers: ToolLayerMap = {
+      "built-in": makeToolSet(["web_search", "shared"]),
+      "third-party-search": makeToolSet(["invalid name", "shared"]),
+      "content-extraction": makeToolSet(["extract_content"]),
+      platform: makeToolSet(["pay_purchase", "pay/status"]),
+      mcp: makeToolSet(["mcp.valid_tool"]),
+    }
+
+    const result = enforceToolNamingGovernance(layers)
+
+    expect(result.invalid).toEqual([
+      {
+        toolKey: "invalid name",
+        layer: "third-party-search",
+        reason: "allowed characters are ASCII letters, digits, '_', '-', '.'",
+      },
+      {
+        toolKey: "pay/status",
+        layer: "platform",
+        reason: "allowed characters are ASCII letters, digits, '_', '-', '.'",
+      },
+    ])
+
+    expect(result.collisions).toEqual([
+      {
+        toolKey: "shared",
+        owners: ["built-in", "third-party-search"],
+        winner: "third-party-search",
+      },
+    ])
+
+    expect(result.sanitizedLayers["built-in"]).toEqual(makeToolSet(["web_search"]))
+    expect(result.sanitizedLayers["third-party-search"]).toEqual(
+      makeToolSet(["shared"])
+    )
+    expect(result.sanitizedLayers.platform).toEqual(makeToolSet(["pay_purchase"]))
+  })
+})
diff --git a/lib/tools/__tests__/platform.test.ts b/lib/tools/__tests__/platform.test.ts
new file mode 100644
index 00000000..48a23b7b
--- /dev/null
+++ b/lib/tools/__tests__/platform.test.ts
@@ -0,0 +1,142 @@
+import { beforeEach, describe, expect, it, vi } from "vitest"
+import type { ToolSet } from "ai"
+import { getPlatformTools } from "../platform"
+import { wrapToolsWithTracing } from "../utils"
+import { ToolTraceCollector } from "../types"
+
+const mockGetPayClawConfig = vi.fn()
+const mockCreateJob = vi.fn()
+const mockGetJob = vi.fn()
+const mockGetJobEvents = vi.fn()
+
+vi.mock("@/lib/payclaw/config", () => ({
+  getPayClawConfig: () => mockGetPayClawConfig(),
+}))
+
+vi.mock("@/lib/payclaw/client", () => ({
+  PayClawApiError: class PayClawApiError extends Error {
+    statusCode = 500
+    code = "UNKNOWN_ERROR"
+  },
+  createJob: (...args: unknown[]) => mockCreateJob(...args),
+  getJob: (...args: unknown[]) => mockGetJob(...args),
+  getJobEvents: (...args: unknown[]) => mockGetJobEvents(...args),
+}))
+
+describe("platform tool cancellation propagation", () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+    mockGetPayClawConfig.mockReturnValue({
+      apiKey: "pc_test",
+      appBaseUrl: "https://payclaw.test",
+      defaultCardId: "card_default",
+    })
+  })
+
+  it("propagates abort signal through pay_purchase to PayClaw client", async () => {
+    mockCreateJob.mockImplementation(
+      (_input: unknown, _config: unknown, options?: { signal?: AbortSignal }) =>
+        new Promise((_, reject) => {
+          options?.signal?.addEventListener(
+            "abort",
+            () => reject(new Error("downstream aborted")),
+            { once: true }
+          )
+        })
+    )
+
+    const { tools, metadata } = await getPlatformTools()
+    const traceCollector = new ToolTraceCollector()
+    const wrapped = wrapToolsWithTracing(
+      tools as ToolSet,
+      traceCollector,
+      "req_platform_abort",
+      undefined,
+      metadata
+    )
+
+    const controller = new AbortController()
+    const pending = (wrapped.pay_purchase as { execute: Function }).execute(
+      { url: "https://vendor.example.com", maxSpend: 2500 },
+      { toolCallId: "call_purchase_abort", abortSignal: controller.signal }
+    )
+    controller.abort("request_cancelled")
+
+    await expect(pending).rejects.toThrow(/cancelled|aborted/i)
+    expect(mockCreateJob).toHaveBeenCalledTimes(1)
+    const createJobOptions = mockCreateJob.mock.calls[0]?.[2] as
+      | { signal?: AbortSignal }
+      | undefined
+    expect(createJobOptions?.signal).toBeDefined()
+    expect(traceCollector.get("call_purchase_abort")?.errorCode).toBe("aborted")
+  })
+
+  it("propagates abort signal through pay_status calls", async () => {
+    mockGetJob.mockImplementation(
+      (_jobId: string, _config: unknown, options?: { signal?: AbortSignal }) =>
+        new Promise((_, reject) => {
+          options?.signal?.addEventListener(
+            "abort",
+            () => reject(new Error("job aborted")),
+            { once: true }
+          )
+        })
+    )
+    mockGetJobEvents.mockImplementation(
+      (_jobId: string, _config: unknown, options?: { signal?: AbortSignal }) =>
+        new Promise((_, reject) => {
+          options?.signal?.addEventListener(
+            "abort",
+            () => reject(new Error("events aborted")),
+            { once: true }
+          )
+        })
+    )
+
+    const { tools, metadata } = await getPlatformTools()
+    const traceCollector = new ToolTraceCollector()
+    const wrapped = wrapToolsWithTracing(
+      tools as ToolSet,
+      traceCollector,
+      "req_status_abort",
+      undefined,
+      metadata
+    )
+
+    const controller = new AbortController()
+    const pending = (wrapped.pay_status as { execute: Function }).execute(
+      { jobId: "job_abc123" },
+      { toolCallId: "call_status_abort", abortSignal: controller.signal }
+    )
+    controller.abort("request_cancelled")
+
+    await expect(pending).rejects.toThrow(/cancelled|aborted/i)
+    expect(mockGetJob).toHaveBeenCalledTimes(1)
+    expect(mockGetJobEvents).toHaveBeenCalledTimes(1)
+    expect((mockGetJob.mock.calls[0]?.[2] as { signal?: AbortSignal })?.signal).toBeDefined()
+    expect((mockGetJobEvents.mock.calls[0]?.[2] as { signal?: AbortSignal })?.signal).toBeDefined()
+    expect(traceCollector.get("call_status_abort")?.errorCode).toBe("aborted")
+  })
+
+  it("surfaces timeout taxonomy for stalled pay_purchase execution", async () => {
+    mockCreateJob.mockRejectedValueOnce(new Error("request timed out"))
+
+    const { tools, metadata } = await getPlatformTools()
+    const traceCollector = new ToolTraceCollector()
+    const wrapped = wrapToolsWithTracing(
+      tools as ToolSet,
+      traceCollector,
+      "req_platform_timeout",
+      undefined,
+      metadata
+    )
+
+    await expect(
+      (wrapped.pay_purchase as { execute: Function }).execute(
+        { url: "https://vendor.example.com", maxSpend: 2500 },
+        { toolCallId: "call_purchase_timeout" }
+      )
+    ).rejects.toThrow(/timed out/i)
+    expect(traceCollector.get("call_purchase_timeout")?.errorCode).toBe("timeout")
+  })
+})
diff --git a/lib/tools/__tests__/policy.test.ts b/lib/tools/__tests__/policy.test.ts
new file mode 100644
index 00000000..48073a9d
--- /dev/null
+++ b/lib/tools/__tests__/policy.test.ts
@@ -0,0 +1,413 @@
+import { describe, expect, it } from "vitest"
+import type { ToolSet } from "ai"
+import { wrapMcpTools, ToolTraceCollector } from "../mcp-wrapper"
+import { wrapToolsWithTracing } from "../utils"
+import {
+  createOutageTolerantToolBudgetEnforcer,
+  createRequestLocalToolSoftCap,
+  createToolPolicyGuard,
+  getToolBudgetPolicy,
+  InMemoryToolLimitStore,
+  isPolicyUnavailableError,
+  probeToolBudget,
+  type ToolLimitStore,
+  ToolPolicyError,
+} from "../policy"
+
+describe("tool policy guardrails", () => {
+  it("persists extract_content domain limits across separate requests", async () => {
+    let now = 1_000_000
+    const store = new InMemoryToolLimitStore(() => now)
+
+    const requestOneGuard = createToolPolicyGuard({
+      store,
+      keyMode: "platform",
+      actorKey: "guest:guest_alpha",
+    })
+    await requestOneGuard.enforceExtractDomainLimit(
+      new Map([["example.com", 6]])
+    )
+
+    const requestTwoGuard = createToolPolicyGuard({
+      store,
+      keyMode: "platform",
+      actorKey: "guest:guest_alpha",
+    })
+
+    await expect(
+      requestTwoGuard.enforceExtractDomainLimit(new Map([["example.com", 1]]))
+    ).rejects.toThrow("EXTRACT_CONTENT_DOMAIN_LIMIT_EXCEEDED")
+
+    // Move beyond the window and verify quota resets.
+    now += 16 * 60_000
+    await expect(
+      requestTwoGuard.enforceExtractDomainLimit(new Map([["example.com", 1]]))
+    ).resolves.toBeUndefined()
+  })
+
+  it("isolates domain limits by authenticated actor", async () => {
+    const store = new InMemoryToolLimitStore(() => 2_000_000)
+    const userAGuard = createToolPolicyGuard({
+      store,
+      keyMode: "platform",
+      actorKey: "user:user_a",
+    })
+    const userBGuard = createToolPolicyGuard({
+      store,
+      keyMode: "platform",
+      actorKey: "user:user_b",
+    })
+
+    await userAGuard.enforceExtractDomainLimit(new Map([["docs.example.com", 6]]))
+    await expect(
+      userAGuard.enforceExtractDomainLimit(new Map([["docs.example.com", 1]]))
+    ).rejects.toThrow("EXTRACT_CONTENT_DOMAIN_LIMIT_EXCEEDED")
+
+    // User B remains unaffected by user A's quota consumption.
+    await expect(
+      userBGuard.enforceExtractDomainLimit(new Map([["docs.example.com", 1]]))
+    ).resolves.toBeUndefined()
+  })
+
+  it("isolates anonymous guest IDs correctly", async () => {
+    const store = new InMemoryToolLimitStore(() => 3_000_000)
+    const guestOneGuard = createToolPolicyGuard({
+      store,
+      keyMode: "platform",
+      actorKey: "guest:guest_one",
+    })
+    const guestTwoGuard = createToolPolicyGuard({
+      store,
+      keyMode: "platform",
+      actorKey: "guest:guest_two",
+    })
+
+    await guestOneGuard.enforceExtractDomainLimit(new Map([["news.site", 6]]))
+    await expect(
+      guestOneGuard.enforceExtractDomainLimit(new Map([["news.site", 1]]))
+    ).rejects.toThrow("EXTRACT_CONTENT_DOMAIN_LIMIT_EXCEEDED")
+
+    await expect(
+      guestTwoGuard.enforceExtractDomainLimit(new Map([["news.site", 1]]))
+    ).resolves.toBeUndefined()
+  })
+
+  it("enforces tool budgets in Layer 2 tracing wrapper", async () => {
+    const traces = new ToolTraceCollector()
+    const tools = {
+      web_search: {
+        description: "search",
+        inputSchema: { type: "object" },
+        execute: async () => ({ ok: true }),
+      },
+    } as unknown as ToolSet
+
+    const wrapped = wrapToolsWithTracing(
+      tools,
+      traces,
+      "req_layer2",
+      async () => {
+        throw new ToolPolicyError(
+          "TOOL_BUDGET_EXCEEDED: Tool budget exceeded. Retry after approximately 30 seconds.",
+          {
+            code: "TOOL_BUDGET_EXCEEDED",
+            retryAfterSeconds: 30,
+            keyMode: "platform",
+            budgetDenied: true,
+          }
+        )
+      }
+    )
+
+    await expect(
+      (wrapped.web_search as { execute: Function }).execute({}, { toolCallId: "call_layer2" })
+    ).rejects.toThrow("TOOL_BUDGET_EXCEEDED")
+
+    const trace = traces.get("call_layer2")
+    expect(trace?.errorCode).toBe("policy_limit")
+    expect(trace?.retryAfterSeconds).toBe(30)
+    expect(trace?.budgetDenied).toBe(true)
+    expect(trace?.budgetKeyMode).toBe("platform")
+  })
+
+  it("enforces tool budgets in Layer 3 MCP wrapper", async () => {
+    const traces = new ToolTraceCollector()
+    const tools = {
+      mcp_read_docs: {
+        description: "mcp read docs",
+        parameters: { type: "object", properties: {} },
+        execute: async () => ({ ok: true }),
+      },
+    } as unknown as ToolSet
+
+    const wrapped = wrapMcpTools(tools, {
+      toolServerMap: new Map([
+        [
+          "mcp_read_docs",
+          {
+            displayName: "Read Docs",
+            serverName: "docs",
+            serverId: "server_docs",
+          },
+        ],
+      ]),
+      traceCollector: traces,
+      requestId: "req_layer3",
+      enforceToolBudget: async () => {
+        throw new ToolPolicyError(
+          "TOOL_BUDGET_EXCEEDED: Tool budget exceeded. Retry after approximately 45 seconds.",
+          {
+            code: "TOOL_BUDGET_EXCEEDED",
+            retryAfterSeconds: 45,
+            keyMode: "platform",
+            budgetDenied: true,
+          }
+        )
+      },
+    })
+
+    await expect(
+      (wrapped.mcp_read_docs as { execute: Function }).execute({}, { toolCallId: "call_layer3" })
+    ).rejects.toThrow("TOOL_BUDGET_EXCEEDED")
+
+    const trace = traces.get("call_layer3")
+    expect(trace?.errorCode).toBe("policy_limit")
+    expect(trace?.budgetDenied).toBe(true)
+    expect(trace?.budgetKeyMode).toBe("platform")
+  })
+
+  it("uses stricter platform budgets than BYOK budgets", () => {
+    const platform = getToolBudgetPolicy("extract_content", "platform")
+    const byok = getToolBudgetPolicy("extract_content", "byok")
+    expect(byok.maxCount).toBeGreaterThan(platform.maxCount)
+  })
+
+  it("returns stable error code with actionable retry hint", async () => {
+    const store = new InMemoryToolLimitStore(() => 4_000_000)
+    const guard = createToolPolicyGuard({
+      store,
+      keyMode: "platform",
+      actorKey: "guest:guest_error_shape",
+    })
+
+    await guard.enforceExtractDomainLimit(new Map([["shape.example", 6]]))
+
+    const error = await guard
+      .enforceExtractDomainLimit(new Map([["shape.example", 1]]))
+      .then(() => null)
+      .catch((err) => err)
+
+    expect(error).toBeInstanceOf(ToolPolicyError)
+    const typed = error as ToolPolicyError
+    expect(typed.code).toBe("EXTRACT_CONTENT_DOMAIN_LIMIT_EXCEEDED")
+    expect(typed.message).toContain("Retry after approximately")
+  })
+
+  it("returns stable budget error code with actionable retry hint", async () => {
+    const store = new InMemoryToolLimitStore(() => 5_000_000)
+    const guard = createToolPolicyGuard({
+      store,
+      keyMode: "platform",
+      actorKey: "user:user_budget_shape",
+    })
+
+    const budget = getToolBudgetPolicy("web_search", "platform")
+    for (let i = 0; i < budget.maxCount; i++) {
+      await guard.enforceToolBudget("web_search")
+    }
+
+    const error = await guard
+      .enforceToolBudget("web_search")
+      .then(() => null)
+      .catch((err) => err)
+
+    expect(error).toBeInstanceOf(ToolPolicyError)
+    const typed = error as ToolPolicyError
+    expect(typed.code).toBe("TOOL_BUDGET_EXCEEDED")
+    expect(typed.message).toContain("Retry after approximately")
+  })
+
+  it("uses tool-specific domain limit codes for non-extract tools", async () => {
+    const store = new InMemoryToolLimitStore(() => 5_500_000)
+
+    const first = await store.checkAndConsume({
+      limitType: "domain",
+      toolName: "web_search",
+      keyMode: "platform",
+      scopeCounts: [{ scopeKey: "example.com", count: 1 }],
+      windowMs: 60_000,
+      maxCount: 1,
+      bucketSizeMs: 30_000,
+      consume: true,
+    })
+    expect(first.allowed).toBe(true)
+
+    const second = await store.checkAndConsume({
+      limitType: "domain",
+      toolName: "web_search",
+      keyMode: "platform",
+      scopeCounts: [{ scopeKey: "example.com", count: 1 }],
+      windowMs: 60_000,
+      maxCount: 1,
+      bucketSizeMs: 30_000,
+      consume: true,
+    })
+
+    expect(second.allowed).toBe(false)
+    expect(second.code).toBe("WEB_SEARCH_DOMAIN_LIMIT_EXCEEDED")
+    expect(second.message).toContain('"web_search"')
+    expect(second.message).toContain('domain "example.com"')
+  })
+
+  it("degrades with a bounded request-local soft cap when policy backend is unavailable", async () => {
+    const unavailableStore: ToolLimitStore = {
+      async checkAndConsume() {
+        throw new ToolPolicyError(
+          "TOOL_POLICY_UNAVAILABLE: Tool policy service is unavailable. Retry in 30 seconds.",
+          {
+            code: "TOOL_POLICY_UNAVAILABLE",
+            retryAfterSeconds: 30,
+          }
+        )
+      },
+    }
+
+    const probeError = await probeToolBudget({
+      store: unavailableStore,
+      keyMode: "platform",
+      toolName: "web_search",
+    }).catch((err) => err)
+    expect(isPolicyUnavailableError(probeError)).toBe(true)
+
+    const softCap = createRequestLocalToolSoftCap({ maxCallsPerTool: 2 })
+    expect(softCap.getSnapshot("web_search")).toMatchObject({
+      used: 0,
+      remaining: 2,
+      maxCalls: 2,
+    })
+
+    expect(softCap.recordCall("web_search")).toMatchObject({
+      used: 1,
+      remaining: 1,
+      maxCalls: 2,
+    })
+    expect(softCap.recordCall("web_search")).toMatchObject({
+      used: 2,
+      remaining: 0,
+      maxCalls: 2,
+    })
+  })
+
+  it("supports non-consuming budget probes for provider-executed tools", async () => {
+    const store = new InMemoryToolLimitStore(() => 6_000_000)
+    const guard = createToolPolicyGuard({
+      store,
+      keyMode: "platform",
+      actorKey: "user:user_provider_probe",
+    })
+    const budget = getToolBudgetPolicy("web_search", "platform")
+
+    for (let i = 0; i < 10; i++) {
+      const probe = await probeToolBudget({
+        store,
+        keyMode: "platform",
+        actorKey: "user:user_provider_probe",
+        toolName: "web_search",
+      })
+      expect(probe.allowed).toBe(true)
+    }
+
+    for (let i = 0; i < budget.maxCount; i++) {
+      await expect(guard.enforceToolBudget("web_search")).resolves.toBeUndefined()
+    }
+
+    const exhaustedProbe = await probeToolBudget({
+      store,
+      keyMode: "platform",
+      actorKey: "user:user_provider_probe",
+      toolName: "web_search",
+    })
+    expect(exhaustedProbe.allowed).toBe(false)
+  })
+
+  it("degrades non-built-in budget enforcement with bounded request-local caps", async () => {
+    const unavailable = () =>
+      new ToolPolicyError(
+        "TOOL_POLICY_UNAVAILABLE: Tool policy service is unavailable. Retry in 30 seconds.",
+        {
+          code: "TOOL_POLICY_UNAVAILABLE",
+          retryAfterSeconds: 30,
+        }
+      )
+
+    const events: Array<{ type: string }> = []
+    const enforce = createOutageTolerantToolBudgetEnforcer({
+      enforceToolBudget: async () => {
+        throw unavailable()
+      },
+      keyMode: "platform",
+      maxCallsPerTool: 2,
+      onEvent: (event) => {
+        events.push({ type: event.type })
+      },
+    })
+
+    await expect(enforce("web_search")).resolves.toBeUndefined()
+    await expect(enforce("web_search")).resolves.toBeUndefined()
+    await expect(enforce("web_search")).rejects.toMatchObject({
+      code: "TOOL_BUDGET_EXCEEDED",
+      budgetDenied: true,
+    })
+
+    expect(events.map((event) => event.type)).toEqual([
+      "degraded_allow",
+      "degraded_allow",
+      "degraded_block",
+    ])
+  })
+
+  it("emits degraded recovery once policy backend is available again", async () => {
+    let callCount = 0
+    const events: Array<{ type: string }> = []
+    const enforce = createOutageTolerantToolBudgetEnforcer({
+      enforceToolBudget: async () => {
+        callCount++
+        if (callCount === 1) {
+          throw new ToolPolicyError(
+            "TOOL_POLICY_UNAVAILABLE: Tool policy service is unavailable. Retry in 30 seconds.",
+            {
+              code: "TOOL_POLICY_UNAVAILABLE",
+              retryAfterSeconds: 30,
+            }
+          )
+        }
+      },
+      keyMode: "platform",
+      maxCallsPerTool: 2,
+      onEvent: (event) => {
+        events.push({ type: event.type })
+      },
+    })
+
+    await expect(enforce("web_search")).resolves.toBeUndefined()
+    await expect(enforce("web_search")).resolves.toBeUndefined()
+    expect(events.map((event) => event.type)).toEqual([
+      "degraded_allow",
+      "recovered",
+    ])
+  })
+
+  it("keeps fail-closed behavior for non-policy errors", async () => {
+    const enforce = createOutageTolerantToolBudgetEnforcer({
+      enforceToolBudget: async () => {
+        throw new Error("unexpected policy backend failure")
+      },
+      keyMode: "platform",
+      maxCallsPerTool: 2,
+    })
+
+    await expect(enforce("web_search")).rejects.toThrow(
+      "unexpected policy backend failure"
+    )
+  })
+})
diff --git a/lib/tools/__tests__/provider.test.ts b/lib/tools/__tests__/provider.test.ts
new file mode 100644
index 00000000..44abca52
--- /dev/null
+++ b/lib/tools/__tests__/provider.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, it, vi } from "vitest"
+import { getProviderTools } from "../provider"
+
+vi.mock("@ai-sdk/openai", () => ({
+  createOpenAI: () => ({
+    tools: {
+      webSearch: () => ({ description: "openai-web-search" }),
+    },
+  }),
+}))
+
+vi.mock("@ai-sdk/anthropic", () => ({
+  createAnthropic: () => ({
+    tools: {
+      webSearch_20250305: () => ({ description: "anthropic-web-search" }),
+    },
+  }),
+}))
+
+vi.mock("@ai-sdk/google", () => ({
+  createGoogleGenerativeAI: () => ({
+    tools: {
+      googleSearch: () => ({ description: "google-search" }),
+    },
+  }),
+}))
+
+vi.mock("@ai-sdk/xai", () => ({
+  createXai: () => ({
+    tools: {
+      webSearch: () => ({ description: "xai-web-search" }),
+    },
+  }),
+}))
+
+describe("provider tools metadata", () => {
+  it.each(["openai", "anthropic", "google", "xai"] as const)(
+    "marks %s web search as open-world",
+    async (providerId) => {
+      const { metadata, tools } = await getProviderTools(providerId, "test_key")
+      expect(Object.keys(tools)).toContain("web_search")
+      expect(metadata.get("web_search")?.openWorld).toBe(true)
+      expect(metadata.get("web_search")?.readOnly).toBe(true)
+    }
+  )
+})
diff --git a/lib/tools/__tests__/route-policy-integration.test.ts b/lib/tools/__tests__/route-policy-integration.test.ts
new file mode 100644
index 00000000..b86ac827
--- /dev/null
+++ b/lib/tools/__tests__/route-policy-integration.test.ts
@@ -0,0 +1,148 @@
+import { describe, expect, it } from "vitest"
+import {
+  filterMetadataMapByPolicy,
+  filterToolSetByPolicy,
+  getActiveToolsForStep,
+  resolveCapabilityPolicy,
+} from "../capability-policy"
+import {
+  buildFinishToolInvocationStreamMetadata,
+  buildStartToolInvocationStreamMetadata,
+  resolveToolInvocationMetadata,
+} from "../ui-metadata"
+
+describe("route/policy integration helpers", () => {
+  it("uses centralized policy output for early filtering and late-step gating", () => {
+    const policy = resolveCapabilityPolicy({
+      modelTools: true,
+      isAuthenticated: true,
+      keyMode: "platform",
+      tools: [
+        {
+          toolName: "web_search",
+          source: "third-party",
+          capability: "search",
+          readOnly: true,
+        },
+        {
+          toolName: "pay_purchase",
+          source: "platform",
+          capability: "platform",
+          readOnly: false,
+        },
+        {
+          toolName: "github_unknown",
+          source: "mcp",
+          capability: "mcp",
+        },
+      ],
+    })
+
+    const tools = {
+      web_search: { description: "search" },
+      pay_purchase: { description: "purchase" },
+      github_unknown: { description: "unknown" },
+    } as unknown as import("ai").ToolSet
+    const filteredTools = filterToolSetByPolicy(tools, policy)
+    expect(Object.keys(filteredTools)).toEqual([
+      "web_search",
+      "pay_purchase",
+      "github_unknown",
+    ])
+
+    const byNameMetadata = new Map([
+      ["web_search", { displayName: "Web Search" }],
+      ["pay_purchase", { displayName: "Purchase" }],
+      ["github_unknown", { displayName: "Unknown" }],
+    ])
+    const filteredMetadata = filterMetadataMapByPolicy(byNameMetadata, policy)
+    expect(Array.from(filteredMetadata.keys())).toEqual([
+      "web_search",
+      "pay_purchase",
+      "github_unknown",
+    ])
+
+    // Unknown-risk MCP tool is advisory-allowed in early steps, fail-closed in late steps.
+    expect(policy.earlyToolNames).toContain("github_unknown")
+    expect(policy.lateToolNames).not.toContain("github_unknown")
+    expect(
+      policy.toolDecisions.find((d) => d.toolName === "github_unknown")
+        ?.earlyReasonCode
+    ).toBe("risk_unknown_early_step_advisory_allow")
+
+    // prepareStep behavior remains policy-driven in both early and late steps.
+    expect(getActiveToolsForStep(policy, 2, 3)).toEqual([
+      "web_search",
+      "pay_purchase",
+      "github_unknown",
+    ])
+    expect(getActiveToolsForStep(policy, 4, 3)).toEqual(["web_search"])
+  })
+
+  it("treats untrusted MCP hints as unknown risk while preserving trusted late-step allow", () => {
+    const policy = resolveCapabilityPolicy({
+      modelTools: true,
+      isAuthenticated: true,
+      keyMode: "platform",
+      tools: [
+        {
+          toolName: "mcp_untrusted_read",
+          source: "mcp",
+          capability: "mcp",
+          readOnly: true,
+          riskHintsTrusted: false,
+        },
+        {
+          toolName: "mcp_trusted_read",
+          source: "mcp",
+          capability: "mcp",
+          readOnly: true,
+          riskHintsTrusted: true,
+        },
+      ],
+    })
+
+    expect(policy.earlyToolNames).toContain("mcp_untrusted_read")
+    expect(policy.earlyToolNames).toContain("mcp_trusted_read")
+    expect(policy.lateToolNames).toEqual(["mcp_trusted_read"])
+  })
+
+  it("keeps tool metadata plumbing correct with start/finish split payloads", () => {
+    const byName = {
+      web_search: {
+        displayName: "Web Search",
+        source: "third-party" as const,
+        serviceName: "Exa",
+      },
+    }
+    const startMetadata = buildStartToolInvocationStreamMetadata(byName)
+    expect(startMetadata).toEqual({ toolMetadataByName: byName })
+
+    const byCallId = {
+      call_1: {
+        displayName: "Web Search",
+        source: "third-party" as const,
+        serviceName: "Exa",
+      },
+    }
+    const finishMetadata = buildFinishToolInvocationStreamMetadata({
+      toolMetadataByCallId: byCallId,
+      reasoningDurationMs: 420,
+    })
+    expect(finishMetadata).toEqual({
+      toolMetadataByCallId: byCallId,
+      reasoningDurationMs: 420,
+    })
+    expect(finishMetadata).not.toHaveProperty("toolMetadataByName")
+
+    const resolved = resolveToolInvocationMetadata({
+      toolName: "web_search",
+      toolCallId: "call_1",
+      streamMetadata: {
+        ...startMetadata,
+        ...finishMetadata,
+      },
+    })
+    expect(resolved?.displayName).toBe("Web Search")
+  })
+})
diff --git a/lib/tools/__tests__/tool-description-quality.test.ts b/lib/tools/__tests__/tool-description-quality.test.ts
new file mode 100644
index 00000000..25c99f38
--- /dev/null
+++ b/lib/tools/__tests__/tool-description-quality.test.ts
@@ -0,0 +1,217 @@
+import { describe, expect, it, vi } from "vitest"
+import { z } from "zod"
+import { getThirdPartyTools, getContentExtractionTools } from "../third-party"
+import { getPlatformTools } from "../platform"
+
+vi.mock("exa-js", () => {
+  class MockExa {
+    async searchAndContents() {
+      return { results: [] }
+    }
+
+    async getContents() {
+      return { results: [], statuses: [] }
+    }
+  }
+
+  return { default: MockExa }
+})
+
+vi.mock("@/lib/payclaw/config", () => ({
+  getPayClawConfig: () => ({
+    apiKey: "test_api_key",
+    baseUrl: "https://api.example.com",
+    defaultCardId: "card_default",
+  }),
+}))
+
+vi.mock("@/lib/payclaw/client", () => ({
+  createJob: vi.fn(),
+  getJob: vi.fn(),
+  getJobEvents: vi.fn(),
+  PayClawApiError: class extends Error {},
+}))
+
+type ToolDescriptor = {
+  description?: unknown
+  inputSchema?: unknown
+  inputExamples?: unknown
+}
+
+function getToolDescriptor(
+  tools: Record<string, unknown>,
+  name: string
+): ToolDescriptor {
+  const descriptor = tools[name]
+  if (!descriptor || typeof descriptor !== "object") {
+    throw new Error(`Tool "${name}" not found`)
+  }
+  return descriptor as ToolDescriptor
+}
+
+function getTopLevelShape(
+  schema: unknown
+): Record<string, z.ZodTypeAny> {
+  if (!(schema instanceof z.ZodObject)) {
+    throw new Error("Expected tool inputSchema to be a ZodObject")
+  }
+  return schema.shape
+}
+
+function hasPlaceholderText(text: string): boolean {
+  return /todo|tbd|placeholder|lorem ipsum|generic description|insert description|replace me/i.test(
+    text
+  )
+}
+
+function assertDescriptionQuality(toolName: string, description: unknown): void {
+  expect(typeof description).toBe("string")
+  const text = (description as string).trim()
+
+  expect(text.length, `${toolName} description should not be empty`).toBeGreaterThan(0)
+
+  // Low-signal placeholders or generic filler should fail fast.
+  expect(
+    hasPlaceholderText(text),
+    `${toolName} description contains low-signal placeholder text`
+  ).toBe(false)
+}
+
+function assertFieldDescriptions(toolName: string, schema: unknown): void {
+  const shape = getTopLevelShape(schema)
+  for (const [fieldName, fieldSchema] of Object.entries(shape)) {
+    const fieldDescription = fieldSchema.description?.trim() ?? ""
+    if (fieldDescription.length === 0) continue
+    expect(
+      hasPlaceholderText(fieldDescription),
+      `${toolName}.${fieldName} has low-signal description text`
+    ).toBe(false)
+  }
+}
+
+function assertInputExamples(
+  toolName: string,
+  inputSchema: unknown,
+  inputExamples: unknown
+): void {
+  expect(Array.isArray(inputExamples), `${toolName} must define inputExamples`).toBe(true)
+  const examples = inputExamples as unknown[]
+  expect(examples.length, `${toolName} should provide at least one example`).toBeGreaterThan(0)
+
+  if (!(inputSchema instanceof z.ZodType)) {
+    throw new Error(`${toolName} inputSchema is not a Zod schema`)
+  }
+
+  for (const [index, example] of examples.entries()) {
+    expect(
+      typeof example === "object" &&
+      example !== null &&
+      "input" in example,
+      `${toolName} inputExamples[${index}] must include an input object`
+    ).toBe(true)
+
+    const input = (example as { input: unknown }).input
+    const result = inputSchema.safeParse(input)
+    expect(
+      result.success,
+      `${toolName} inputExamples[${index}].input must match schema`
+    ).toBe(true)
+  }
+}
+
+describe("custom tool description quality gates", () => {
+  it("enforces quality for third-party and content extraction tools", async () => {
+    const { tools: thirdPartyTools, metadata: thirdPartyMetadata } = await getThirdPartyTools({
+      exaKey: "exa_test_key",
+      skipSearch: false,
+    })
+    const { tools: contentTools, metadata: contentMetadata } = await getContentExtractionTools({
+      exaKey: "exa_test_key",
+    })
+
+    const webSearch = getToolDescriptor(
+      thirdPartyTools as unknown as Record<string, unknown>,
+      "web_search"
+    )
+    const extractContent = getToolDescriptor(
+      contentTools as unknown as Record<string, unknown>,
+      "extract_content"
+    )
+
+    assertDescriptionQuality("web_search", webSearch.description)
+    assertDescriptionQuality("extract_content", extractContent.description)
+    assertFieldDescriptions("web_search", webSearch.inputSchema)
+    assertFieldDescriptions("extract_content", extractContent.inputSchema)
+    expect(thirdPartyMetadata.get("web_search")?.openWorld).toBe(true)
+    expect(contentMetadata.get("extract_content")?.openWorld).toBe(true)
+  })
+
+  it("enforces quality for platform tools", async () => {
+    const { tools } = await getPlatformTools()
+
+    const payPurchase = getToolDescriptor(
+      tools as unknown as Record<string, unknown>,
+      "pay_purchase"
+    )
+    const payStatus = getToolDescriptor(
+      tools as unknown as Record<string, unknown>,
+      "pay_status"
+    )
+
+    assertDescriptionQuality("pay_purchase", payPurchase.description)
+    assertDescriptionQuality("pay_status", payStatus.description)
+    assertFieldDescriptions("pay_purchase", payPurchase.inputSchema)
+    assertFieldDescriptions("pay_status", payStatus.inputSchema)
+  })
+})
+
+describe("custom tool inputExamples", () => {
+  it("validates inputExamples against each complex tool schema", async () => {
+    const { tools: thirdPartyTools } = await getThirdPartyTools({
+      exaKey: "exa_test_key",
+      skipSearch: false,
+    })
+    const { tools: contentTools } = await getContentExtractionTools({
+      exaKey: "exa_test_key",
+    })
+    const { tools: platformTools } = await getPlatformTools()
+
+    const webSearch = getToolDescriptor(
+      thirdPartyTools as unknown as Record<string, unknown>,
+      "web_search"
+    )
+    const extractContent = getToolDescriptor(
+      contentTools as unknown as Record<string, unknown>,
+      "extract_content"
+    )
+    const payPurchase = getToolDescriptor(
+      platformTools as unknown as Record<string, unknown>,
+      "pay_purchase"
+    )
+    const payStatus = getToolDescriptor(
+      platformTools as unknown as Record<string, unknown>,
+      "pay_status"
+    )
+
+    assertInputExamples(
+      "web_search",
+      webSearch.inputSchema,
+      webSearch.inputExamples
+    )
+    assertInputExamples(
+      "extract_content",
+      extractContent.inputSchema,
+      extractContent.inputExamples
+    )
+    assertInputExamples(
+      "pay_purchase",
+      payPurchase.inputSchema,
+      payPurchase.inputExamples
+    )
+    assertInputExamples(
+      "pay_status",
+      payStatus.inputSchema,
+      payStatus.inputExamples
+    )
+  })
+})
diff --git a/lib/tools/__tests__/truncation-policy.test.ts b/lib/tools/__tests__/truncation-policy.test.ts
new file mode 100644
index 00000000..e9fd2a95
--- /dev/null
+++ b/lib/tools/__tests__/truncation-policy.test.ts
@@ -0,0 +1,27 @@
+import { describe, expect, it } from "vitest"
+
+import { findSemanticBoundary } from "../truncation-policy"
+
+describe("findSemanticBoundary", () => {
+  it("returns boundary after LF paragraph marker", () => {
+    const text = "Alpha section\n\nBeta section continues with more text."
+    const maxChars = text.length - 5
+
+    const boundary = findSemanticBoundary(text, maxChars)
+    const markerIndex = text.indexOf("\n\n")
+
+    expect(markerIndex).toBeGreaterThanOrEqual(0)
+    expect(boundary).toBe(markerIndex + 2)
+  })
+
+  it("returns boundary after full CRLF paragraph marker", () => {
+    const text = "Alpha section\r\n\r\nBeta section continues with more text."
+    const maxChars = text.length - 5
+
+    const boundary = findSemanticBoundary(text, maxChars)
+    const markerIndex = text.indexOf("\r\n\r\n")
+
+    expect(markerIndex).toBeGreaterThanOrEqual(0)
+    expect(boundary).toBe(markerIndex + 4)
+  })
+})
diff --git a/lib/tools/__tests__/ui-metadata.test.ts b/lib/tools/__tests__/ui-metadata.test.ts
new file mode 100644
index 00000000..498b96c2
--- /dev/null
+++ b/lib/tools/__tests__/ui-metadata.test.ts
@@ -0,0 +1,163 @@
+import { describe, expect, it } from "vitest"
+import {
+  buildFinishToolInvocationStreamMetadata,
+  buildStartToolInvocationStreamMetadata,
+  buildToolInvocationMetadataByName,
+  humanizeToolName,
+  resolveToolInvocationMetadata,
+} from "../ui-metadata"
+import type { ToolMetadata } from "../types"
+import type { ServerInfo } from "@/lib/mcp/load-tools"
+
+describe("humanizeToolName", () => {
+  it("converts snake/camel case names to readable labels", () => {
+    expect(humanizeToolName("github_create_issue")).toBe("Github Create Issue")
+    expect(humanizeToolName("readFileFromRepo")).toBe("Read File From Repo")
+  })
+})
+
+describe("buildToolInvocationMetadataByName", () => {
+  it("merges non-MCP and MCP metadata into transport-safe shape", () => {
+    const nonMcpMetadata = new Map<string, ToolMetadata>([
+      [
+        "web_search",
+        {
+          displayName: "Web Search",
+          source: "third-party",
+          serviceName: "Exa",
+          icon: "search",
+          estimatedCostPer1k: 5,
+          readOnly: true,
+          idempotent: true,
+        },
+      ],
+    ])
+
+    const mcpToolServerMap = new Map<string, ServerInfo>([
+      [
+        "github_create_issue",
+        {
+          displayName: "create_issue",
+          serverName: "GitHub MCP",
+          serverId: "server_123",
+          readOnly: false,
+          destructive: false,
+          idempotent: true,
+          openWorld: true,
+        },
+      ],
+    ])
+
+    const metadata = buildToolInvocationMetadataByName({
+      nonMcpMetadata,
+      mcpToolServerMap,
+    })
+
+    expect(metadata.web_search).toEqual({
+      displayName: "Web Search",
+      source: "third-party",
+      serviceName: "Exa",
+      icon: "search",
+      estimatedCostPer1k: 5,
+      readOnly: true,
+      destructive: undefined,
+      idempotent: true,
+      openWorld: undefined,
+    })
+
+    expect(metadata.github_create_issue).toEqual({
+      displayName: "Create Issue",
+      source: "mcp",
+      serviceName: "GitHub MCP",
+      icon: "wrench",
+      estimatedCostPer1k: undefined,
+      readOnly: false,
+      destructive: false,
+      idempotent: true,
+      openWorld: true,
+    })
+  })
+})
+
+describe("tool invocation stream metadata", () => {
+  it("emits by-name metadata only in start payload", () => {
+    const byName = {
+      web_search: {
+        displayName: "Web Search",
+        source: "third-party" as const,
+        serviceName: "Exa",
+        icon: "search" as const,
+      },
+    }
+
+    expect(buildStartToolInvocationStreamMetadata(byName)).toEqual({
+      toolMetadataByName: byName,
+    })
+    expect(
+      buildFinishToolInvocationStreamMetadata({
+        toolMetadataByCallId: {},
+        reasoningDurationMs: null,
+      })
+    ).toEqual({})
+  })
+
+  it("emits finish payload only for call-id metadata and reasoning duration", () => {
+    const byCallId = {
+      call_1: {
+        displayName: "Web Search",
+        source: "third-party" as const,
+        serviceName: "Exa",
+        icon: "search" as const,
+      },
+    }
+
+    expect(
+      buildFinishToolInvocationStreamMetadata({
+        toolMetadataByCallId: byCallId,
+        reasoningDurationMs: 1234,
+      })
+    ).toEqual({
+      toolMetadataByCallId: byCallId,
+      reasoningDurationMs: 1234,
+    })
+  })
+
+  it("resolves metadata by call-id first, then tool name fallback", () => {
+    const resolvedByCallId = resolveToolInvocationMetadata({
+      toolName: "web_search",
+      toolCallId: "call_1",
+      streamMetadata: {
+        toolMetadataByName: {
+          web_search: {
+            displayName: "By Name",
+            source: "third-party",
+            serviceName: "Exa",
+          },
+        },
+        toolMetadataByCallId: {
+          call_1: {
+            displayName: "By Call ID",
+            source: "third-party",
+            serviceName: "Exa",
+          },
+        },
+      },
+    })
+    expect(resolvedByCallId?.displayName).toBe("By Call ID")
+
+    const fallbackByName = resolveToolInvocationMetadata({
+      toolName: "web_search",
+      toolCallId: "missing",
+      streamMetadata: {
+        toolMetadataByName: {
+          web_search: {
+            displayName: "By Name",
+            source: "third-party",
+            serviceName: "Exa",
+          },
+        },
+      },
+    })
+    expect(fallbackByName?.displayName).toBe("By Name")
+  })
+})
diff --git a/lib/tools/__tests__/utils.test.ts b/lib/tools/__tests__/utils.test.ts
index c9b04317..896821e6 100644
--- a/lib/tools/__tests__/utils.test.ts
+++ b/lib/tools/__tests__/utils.test.ts
@@ -1,9 +1,27 @@
 import { describe, it, expect, vi } from "vitest"
-import { truncateToolResult, isTruncated, wrapToolsWithTruncation } from "../utils"
+import {
+  truncateToolResult,
+  isTruncated,
+  wrapToolsWithTruncation,
+  wrapToolsWithTracing,
+  enrichToolError,
+} from "../utils"
+import { ToolTraceCollector } from "../types"
+import { ToolExecutionError } from "../errors"
+import { ToolPolicyError } from "../policy"
+
+function serializedSize(value: unknown): number {
+  try {
+    return new TextEncoder().encode(JSON.stringify(value)).length
+  } catch {
+    return new TextEncoder().encode(String(value)).length
+  }
+}
 
 // Mock the config module to control MAX_TOOL_RESULT_SIZE in tests
 vi.mock("@/lib/config", () => ({
   MAX_TOOL_RESULT_SIZE: 100 * 1024, // 100KB default
+  TOOL_EXECUTION_TIMEOUT_MS: 15_000,
 }))
 
 describe("truncateToolResult", () => {
@@ -45,7 +63,7 @@ describe("truncateToolResult", () => {
 
       expect(typeof result).toBe("string")
       expect((result as string).length).toBeLessThan(2000)
-      expect(result).toContain("[truncated — result exceeded size limit]")
+      expect(result).toContain("[truncated — showing first")
     })
 
     it("preserves content up to the character limit", () => {
@@ -67,7 +85,7 @@ describe("truncateToolResult", () => {
     it("truncates strings exactly at the 100KB boundary", () => {
       const overLimit = "a".repeat(100 * 1024 + 1)
       const result = truncateToolResult(overLimit) as string
-      expect(result).toContain("[truncated — result exceeded size limit]")
+      expect(result).toContain("[truncated — showing first")
     })
   })
 
@@ -97,7 +115,7 @@ describe("truncateToolResult", () => {
       expect(result.data.length).toBe(result._returnedCount)
     })
 
-    it("halves array size iteratively until within budget", () => {
+    it("keeps a best-effort array subset within budget", () => {
       const items = Array.from({ length: 64 }, (_, i) => ({
         id: i,
         data: "y".repeat(200),
@@ -109,9 +127,9 @@ describe("truncateToolResult", () => {
       }
 
       expect(result._truncated).toBe(true)
-      // Halving: 64 → 32 → 16 → 8 → 4 (each item ~210 bytes)
       expect(result._returnedCount).toBeLessThanOrEqual(64)
       expect(result.data.length).toBeGreaterThan(0)
+      expect(serializedSize(result)).toBeLessThanOrEqual(2048)
     })
 
     it("falls back to empty array when a single element exceeds budget", () => {
@@ -181,12 +199,154 @@ describe("truncateToolResult", () => {
       const result = truncateToolResult(largeObj, 1024) as {
         _truncated: boolean
         _originalSizeBytes: number
-        _raw: string
+        _hint: string
       }
 
       expect(result._truncated).toBe(true)
       expect(result._originalSizeBytes).toBeGreaterThan(1024)
-      expect(result._raw).toContain("...")
+      expect(result._hint).toContain("Request specific fields")
+    })
+
+    it("preserves truncation metadata when input uses reserved metadata keys", () => {
+      const payload = {
+        _hint: "user supplied hint",
+        _truncated: false,
+        _originalSizeBytes: "not-a-number",
+        _keptKeys: "fake-count",
+        title: "Important title",
+        debugBlob: "x".repeat(5000),
+      }
+
+      const result = truncateToolResult(payload, 500) as Record<string, unknown>
+
+      expect(result._truncated).toBe(true)
+      expect(typeof result._hint).toBe("string")
+      expect(result._hint).not.toBe("user supplied hint")
+      expect(typeof result._originalSizeBytes).toBe("number")
+      expect((result._originalSizeBytes as number) > 500).toBe(true)
+      expect(serializedSize(result)).toBeLessThanOrEqual(500)
+    })
+  })
+
+  describe("priority-aware truncation v2", () => {
+    it("prioritizes high-signal keys in mixed objects", () => {
+      const payload = {
+        internalBlob: "x".repeat(3500),
+        debugStack: "y".repeat(3500),
+        title: "Important title",
+        url: "https://example.com/post",
+        content: "short excerpt",
+        error: "upstream timeout",
+      }
+
+      const result = truncateToolResult(payload, 512) as Record<string, unknown>
+
+      expect(result._truncated).toBe(true)
+      expect(result.error).toBe("upstream timeout")
+      expect(result.title).toBe("Important title")
+      expect(result.url).toBe("https://example.com/post")
+      expect(serializedSize(result)).toBeLessThanOrEqual(512)
+    })
+
+    it("retains useful subset for large result arrays", () => {
+      const rows = Array.from({ length: 40 }, (_, i) => ({
+        title: `Result ${i}`,
+        url: `https://example.com/${i}`,
+        content: i === 39 ? "tail with error context" : "z".repeat(260),
+        ...(i === 39 ? { error: "failed_to_fetch" } : {}),
+      }))
+
+      const result = truncateToolResult(rows, {
+        maxBytes: 900,
+        toolName: "web_search",
+        resultCategory: "search_results",
+      }) as {
+        _truncated: boolean
+        _returnedCount: number
+        data: Array<Record<string, unknown>>
+      }
+
+      expect(result._truncated).toBe(true)
+      expect(result._returnedCount).toBeGreaterThan(0)
+      expect(result.data[0]).toHaveProperty("title")
+      expect(result.data[0]).toHaveProperty("url")
+      expect(serializedSize(result)).toBeLessThanOrEqual(900)
+    })
+
+    it("keeps truncated object output JSON-serializable with circular high-priority keys", () => {
+      const circularError: Record<string, unknown> = {}
+      circularError.self = circularError
+      const payload = {
+        error: circularError,
+        title: "Failure from upstream",
+        url: "https://example.com/failure",
+        debugBlob: "x".repeat(8_000),
+      }
+
+      const result = truncateToolResult(payload, 700) as Record<string, unknown>
+      expect(result._truncated).toBe(true)
+      expect(() => JSON.stringify(result)).not.toThrow()
+      expect(serializedSize(result)).toBeLessThanOrEqual(700)
+    })
+
+    it("keeps truncated array output JSON-serializable with circular retained items", () => {
+      const circularItem: Record<string, unknown> = {
+        title: "Circular row",
+        url: "https://example.com/circular",
+      }
+      circularItem.self = circularItem
+
+      const rows = [
+        circularItem,
+        ...Array.from({ length: 24 }, (_, i) => ({
+          title: `Result ${i}`,
+          url: `https://example.com/${i}`,
+          content: "z".repeat(300),
+        })),
+      ]
+
+      const result = truncateToolResult(rows, {
+        maxBytes: 900,
+        resultCategory: "search_results",
+      }) as {
+        _truncated: boolean
+        data: unknown[]
+      }
+
+      expect(result._truncated).toBe(true)
+      expect(result.data.length).toBeGreaterThan(0)
+      expect(() => JSON.stringify(result)).not.toThrow()
+      expect(serializedSize(result)).toBeLessThanOrEqual(900)
+    })
+
+    it("truncates plain text at semantic boundaries when feasible", () => {
+      const text =
+        "Alpha sentence. Beta sentence with details.\n\nGamma paragraph starts here and continues with supporting notes. Delta closing sentence."
+      const large = text.repeat(120)
+      const result = truncateToolResult(large, {
+        maxBytes: 700,
+        resultCategory: "plain_text",
+      }) as string
+
+      expect(result).toContain("[truncated — showing first")
+      const [prefix] = result.split("\n[truncated")
+      expect(prefix.length).toBeGreaterThan(0)
+      expect(/[.\n ]$/.test(prefix)).toBe(true)
+      expect(serializedSize(result)).toBeLessThanOrEqual(700)
+    })
+
+    it("stays within hard budget for non-serializable deep payloads", () => {
+      const root: Record<string, unknown> = { id: "root" }
+      root.self = root
+      root.large = {
+        a: "a".repeat(6000),
+        b: "b".repeat(6000),
+      }
+
+      const result = truncateToolResult(root, 512) as Record<string, unknown>
+      expect(result._truncated).toBe(true)
+      expect(() => JSON.stringify(result)).not.toThrow()
+      expect(serializedSize(result)).toBeLessThanOrEqual(512)
     })
   })
 
@@ -253,7 +413,7 @@ describe("isTruncated", () => {
 
   it("returns true for truncated object results", () => {
     expect(
-      isTruncated({ _truncated: true, _originalSizeBytes: 200000, _raw: "..." })
+      isTruncated({ _truncated: true, _originalSizeBytes: 200000, _hint: "..." })
     ).toBe(true)
   })
 
@@ -291,7 +451,7 @@ describe("wrapToolsWithTruncation", () => {
 
     expect(typeof result).toBe("string")
     expect((result as string).length).toBeLessThan(2000)
-    expect(result).toContain("[truncated — result exceeded size limit]")
+    expect(result).toContain("[truncated — showing first")
   })
 
   it("passes through results that are under the limit", async () => {
@@ -361,9 +521,235 @@ describe("wrapToolsWithTruncation", () => {
     const resultB = await toolB.execute()
 
     // Tool A should be truncated (2000 chars > 100 bytes)
-    expect(resultA).toContain("[truncated — result exceeded size limit]")
+    expect(resultA).toContain("[truncated — showing first")
 
     // Tool B should pass through unchanged
     expect(resultB).toBe("small result")
   })
 })
+
+describe("structured tool errors", () => {
+  it("enrichToolError returns ToolExecutionError with taxonomy code", () => {
+    const err = enrichToolError(new Error("429 rate limit exceeded"), "web_search")
+    expect(err).toBeInstanceOf(ToolExecutionError)
+    const typed = err as ToolExecutionError
+    expect(typed.code).toBe("rate_limit")
+    expect(typed.retryable).toBe(true)
+    expect(typed.message).toContain("web_search failed")
+  })
+
+  it("enrichToolError passes policy errors through unchanged", () => {
+    const policyError = new ToolPolicyError(
+      "TOOL_BUDGET_EXCEEDED: Retry after approximately 10 seconds.",
+      {
+        code: "TOOL_BUDGET_EXCEEDED",
+        retryAfterSeconds: 10,
+        keyMode: "platform",
+        budgetDenied: true,
+      }
+    )
+    expect(enrichToolError(policyError, "web_search")).toBe(policyError)
+  })
+
+  it("wrapToolsWithTracing records taxonomy code for failures", async () => {
+    const traces = new ToolTraceCollector()
+    const tools = {
+      flaky_tool: {
+        description: "flaky",
+        execute: async () => {
+          throw new Error("fetch failed")
+        },
+      },
+    }
+
+    const wrapped = wrapToolsWithTracing(
+      tools as unknown as import("ai").ToolSet,
+      traces,
+      "req_1"
+    )
+
+    await expect(
+      (wrapped.flaky_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_network" }
+      )
+    ).rejects.toThrow("fetch failed")
+
+    const trace = traces.get("call_network")
+    expect(trace).toBeDefined()
+    expect(trace?.errorCode).toBe("network")
+  })
+})
+
+describe("wrapToolsWithTracing reliability", () => {
+  it("cancels promptly when upstream abortSignal is aborted", async () => {
+    const traces = new ToolTraceCollector()
+    const tools = {
+      cancellable_tool: {
+        description: "cancellable",
+        execute: async (
+          _params: unknown,
+          options: { abortSignal?: AbortSignal }
+        ) =>
+          new Promise((resolve, reject) => {
+            const signal = options.abortSignal
+            if (!signal) return resolve("ok")
+            if (signal.aborted) {
+              reject(new Error("should not reach"))
+              return
+            }
+            signal.addEventListener(
+              "abort",
+              () => reject(new Error("inner aborted")),
+              { once: true }
+            )
+          }),
+      },
+    }
+
+    const wrapped = wrapToolsWithTracing(
+      tools as unknown as import("ai").ToolSet,
+      traces,
+      "req_abort",
+      undefined,
+      new Map([
+        ["cancellable_tool", { readOnly: true, idempotent: true, destructive: false }],
+      ])
+    )
+
+    const controller = new AbortController()
+    const execution = (wrapped.cancellable_tool as { execute: Function }).execute(
+      {},
+      { toolCallId: "call_abort", abortSignal: controller.signal }
+    )
+    controller.abort("caller_cancelled")
+
+    await expect(execution).rejects.toThrow(/cancelled|aborted/i)
+    const trace = traces.get("call_abort")
+    expect(trace?.success).toBe(false)
+    expect(trace?.errorCode).toBe("aborted")
+  })
+
+  it("retries idempotent transient failures and succeeds", async () => {
+    const traces = new ToolTraceCollector()
+    const execute = vi
+      .fn()
+      .mockRejectedValueOnce(new Error("fetch failed ECONNREFUSED"))
+      .mockResolvedValueOnce({ ok: true })
+    const tools = {
+      flaky_tool: {
+        description: "flaky",
+        execute,
+      },
+    }
+
+    const wrapped = wrapToolsWithTracing(
+      tools as unknown as import("ai").ToolSet,
+      traces,
+      "req_retry",
+      undefined,
+      new Map([
+        ["flaky_tool", { readOnly: true, idempotent: true, destructive: false }],
+      ])
+    )
+
+    const result = await (wrapped.flaky_tool as { execute: Function }).execute(
+      {},
+      { toolCallId: "call_retry_success" }
+    )
+
+    expect(result).toEqual({ ok: true })
+    expect(execute).toHaveBeenCalledTimes(2)
+    const trace = traces.get("call_retry_success")
+    expect(trace?.success).toBe(true)
+    expect(trace?.retryCount).toBe(1)
+  })
+
+  it("does not retry non-idempotent tools", async () => {
+    const traces = new ToolTraceCollector()
+    const execute = vi.fn().mockRejectedValue(new Error("fetch failed"))
+    const tools = {
+      write_tool: {
+        description: "write",
+        execute,
+      },
+    }
+
+    const wrapped = wrapToolsWithTracing(
+      tools as unknown as import("ai").ToolSet,
+      traces,
+      "req_non_idempotent",
+      undefined,
+      new Map([
+        ["write_tool", { readOnly: false, idempotent: false, destructive: false }],
+      ])
+    )
+
+    await expect(
+      (wrapped.write_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_non_idempotent" }
+      )
+    ).rejects.toThrow("fetch failed")
+
+    expect(execute).toHaveBeenCalledTimes(1)
+    const trace = traces.get("call_non_idempotent")
+    expect(trace?.retryCount).toBe(0)
+  })
+
+  it("does not retry policy/auth/validation failures", async () => {
+    const traces = new ToolTraceCollector()
+    const execute = vi
+      .fn()
+      .mockRejectedValueOnce(
+        new ToolPolicyError("TOOL_BUDGET_EXCEEDED: retry later", {
+          code: "TOOL_BUDGET_EXCEEDED",
+          retryAfterSeconds: 10,
+          keyMode: "platform",
+          budgetDenied: true,
+        })
+      )
+    const tools = {
+      guarded_tool: {
+        description: "guarded",
+        execute,
+      },
+    }
+
+    const wrapped = wrapToolsWithTracing(
+      tools as unknown as import("ai").ToolSet,
+      traces,
+      "req_no_retry",
+      undefined,
+      new Map([
+        ["guarded_tool", { readOnly: true, idempotent: true, destructive: false }],
+      ])
+    )
+
+    await expect(
+      (wrapped.guarded_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_policy_no_retry" }
+      )
+    ).rejects.toThrow("TOOL_BUDGET_EXCEEDED")
+    expect(execute).toHaveBeenCalledTimes(1)
+
+    execute.mockReset().mockRejectedValueOnce(new Error("401 unauthorized"))
+    await expect(
+      (wrapped.guarded_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_auth_no_retry" }
+      )
+    ).rejects.toThrow("401 unauthorized")
+    expect(execute).toHaveBeenCalledTimes(1)
+
+    execute.mockReset().mockRejectedValueOnce(new Error("Validation failed: invalid input"))
+    await expect(
+      (wrapped.guarded_tool as { execute: Function }).execute(
+        {},
+        { toolCallId: "call_validation_no_retry" }
+      )
+    ).rejects.toThrow("Validation failed")
+    expect(execute).toHaveBeenCalledTimes(1)
+  })
+})
diff --git a/lib/tools/cache.ts b/lib/tools/cache.ts
new file mode 100644
index 00000000..a7f31f31
--- /dev/null
+++ b/lib/tools/cache.ts
@@ -0,0 +1,73 @@
+type CacheEntry<V> = {
+  value: V
+  fetchedAt: number
+}
+
+export type LruTtlCacheOptions = {
+  ttlMs: number
+  maxEntries: number
+}
+
+/**
+ * Minimal in-memory LRU cache with TTL expiration.
+ * - `get()` refreshes recency (true LRU behavior)
+ * - TTL is based on original fetch time (reads do not extend freshness)
+ */
+export class LruTtlCache<K, V> {
+  private readonly entries = new Map<K, CacheEntry<V>>()
+  private readonly now: () => number
+
+  constructor(
+    private readonly options: LruTtlCacheOptions,
+    now: () => number = Date.now
+  ) {
+    this.now = now
+  }
+
+  get(key: K): V | null {
+    const entry = this.entries.get(key)
+    if (!entry) return null
+
+    if (this.isExpired(entry)) {
+      this.entries.delete(key)
+      return null
+    }
+
+    // Refresh recency without modifying freshness timestamp.
+    this.entries.delete(key)
+    this.entries.set(key, entry)
+    return entry.value
+  }
+
+  set(key: K, value: V): void {
+    if (this.entries.has(key)) {
+      this.entries.delete(key)
+    } else {
+      this.pruneExpiredFromLeastRecent()
+      if (this.entries.size >= this.options.maxEntries) {
+        const leastRecentKey = this.entries.keys().next().value
+        if (leastRecentKey !== undefined) {
+          this.entries.delete(leastRecentKey)
+        }
+      }
+    }
+
+    this.entries.set(key, { value, fetchedAt: this.now() })
+  }
+
+  clear(): void {
+    this.entries.clear()
+  }
+
+  private isExpired(entry: CacheEntry<V>): boolean {
+    return this.now() - entry.fetchedAt > this.options.ttlMs
+  }
+
+  private pruneExpiredFromLeastRecent(): void {
+    for (const [key, entry] of this.entries) {
+      if (this.isExpired(entry)) {
+        this.entries.delete(key)
+      }
+    }
+  }
+}
diff --git a/lib/tools/capability-policy.ts b/lib/tools/capability-policy.ts
new file mode 100644
index 00000000..760cb3de
--- /dev/null
+++ b/lib/tools/capability-policy.ts
@@ -0,0 +1,285 @@
+import type { ToolKeyMode } from "@/lib/user-keys"
+import type { ToolSet } from "ai"
+import {
+  resolveToolCapabilities,
+  type ToolCapabilities,
+  type ToolSource,
+} from "./types"
+
+export type CapabilityAxis = "search" | "extract" | "code" | "mcp" | "platform"
+
+export type ToolRiskLevel =
+  | "read_only"
+  | "stateful_non_destructive"
+  | "destructive"
+  | "open_world"
+  | "unknown"
+
+export type UserTier = "anonymous" | "authenticated"
+
+export type ToolPolicyInput = {
+  toolName: string
+  source: ToolSource
+  capability?: CapabilityAxis
+  /**
+   * Safety boundary for MCP annotations:
+   * risk-driving hints are only consumed when explicitly trusted.
+   */
+  riskHintsTrusted?: boolean
+  readOnly?: boolean
+  destructive?: boolean
+  idempotent?: boolean
+  openWorld?: boolean
+}
+
+export type CapabilityReasonCode =
+  | "model_enabled"
+  | "model_disabled"
+  | "anonymous_blocked"
+  | "authenticated_allowed"
+
+export type KeyModeReasonCode =
+  | "key_mode_byok"
+  | "key_mode_platform"
+  | "key_mode_unknown"
+
+export type ToolPolicyReasonCode =
+  | "allowed"
+  | "capability_disabled"
+  | "risk_unknown_early_step_block"
+  | "risk_unknown_early_step_advisory_allow"
+  | "risk_read_only_allowed"
+  | "risk_stateful_late_step_block"
+  | "risk_destructive_late_step_block"
+  | "risk_open_world_late_step_block"
+  | "risk_unknown_fail_closed"
+  | "key_mode_byok_allowed"
+  | "key_mode_platform_allowed"
+  | "key_mode_unknown_fail_closed"
+
+export type ToolPolicyDecision = {
+  toolName: string
+  source: ToolSource
+  capability: CapabilityAxis
+  risk: ToolRiskLevel
+  allowInEarlySteps: boolean
+  allowInLateSteps: boolean
+  earlyReasonCode: ToolPolicyReasonCode
+  lateReasonCode: ToolPolicyReasonCode
+}
+
+export type CapabilityPolicyResult = {
+  userTier: UserTier
+  keyMode?: ToolKeyMode
+  keyModeReason: KeyModeReasonCode
+  capabilities: Required<ToolCapabilities>
+  capabilityReasons: Record<CapabilityAxis, CapabilityReasonCode>
+  toolDecisions: ToolPolicyDecision[]
+  earlyToolNames: string[]
+  lateToolNames: string[]
+}
+
+type CapabilityPolicyOptions = {
+  modelTools: boolean | ToolCapabilities | undefined
+  isAuthenticated: boolean
+  keyMode?: ToolKeyMode
+  tools?: ToolPolicyInput[]
+}
+
+function resolveKeyModeReason(keyMode?: ToolKeyMode): KeyModeReasonCode {
+  if (keyMode === "byok") return "key_mode_byok"
+  if (keyMode === "platform") return "key_mode_platform"
+  return "key_mode_unknown"
+}
+
+function classifyToolRisk(tool: ToolPolicyInput): ToolRiskLevel {
+  const canUseHintsForRisk =
+    tool.source !== "mcp" || tool.riskHintsTrusted === true
+  if (!canUseHintsForRisk) {
+    return "unknown"
+  }
+
+  if (tool.openWorld === true) return "open_world"
+  if (tool.destructive === true) return "destructive"
+  if (tool.readOnly === true) return "read_only"
+  if (tool.readOnly === false) {
+    return "stateful_non_destructive"
+  }
+  return "unknown"
+}
+
+function inferCapability(tool: ToolPolicyInput): CapabilityAxis {
+  if (tool.capability) return tool.capability
+  if (tool.source === "platform") return "platform"
+  if (tool.source === "mcp") return "mcp"
+  if (tool.toolName === "extract_content") return "extract"
+  if (/code|execute|sandbox/i.test(tool.toolName)) return "code"
+  return "search"
+}
+
+function computeCapabilityMatrix(options: CapabilityPolicyOptions): {
+  userTier: UserTier
+  capabilities: Required<ToolCapabilities>
+  reasons: Record<CapabilityAxis, CapabilityReasonCode>
+} {
+  const userTier: UserTier = options.isAuthenticated
+    ? "authenticated"
+    : "anonymous"
+  const modelCapabilities = resolveToolCapabilities(options.modelTools)
+  const capabilities: Required<ToolCapabilities> = { ...modelCapabilities }
+  const reasons: Record<CapabilityAxis, CapabilityReasonCode> = {
+    search: modelCapabilities.search ? "model_enabled" : "model_disabled",
+    extract: modelCapabilities.extract ? "model_enabled" : "model_disabled",
+    code: modelCapabilities.code ? "model_enabled" : "model_disabled",
+    mcp: modelCapabilities.mcp ? "model_enabled" : "model_disabled",
+    platform: modelCapabilities.platform ? "model_enabled" : "model_disabled",
+  }
+
+  if (!options.isAuthenticated) {
+    // Anonymous mode is fail-safe: no stateful/open-world capability classes.
+    capabilities.code = false
+    capabilities.mcp = false
+    capabilities.platform = false
+    reasons.code = "anonymous_blocked"
+    reasons.mcp = "anonymous_blocked"
+    reasons.platform = "anonymous_blocked"
+  } else {
+    if (capabilities.code) reasons.code = "authenticated_allowed"
+    if (capabilities.mcp) reasons.mcp = "authenticated_allowed"
+    if (capabilities.platform) reasons.platform = "authenticated_allowed"
+  }
+
+  return { userTier, capabilities, reasons }
+}
+
+function lateStepReasonForRisk(risk: ToolRiskLevel): ToolPolicyReasonCode {
+  if (risk === "read_only") return "risk_read_only_allowed"
+  if (risk === "stateful_non_destructive") return "risk_stateful_late_step_block"
+  if (risk === "destructive") return "risk_destructive_late_step_block"
+  if (risk === "open_world") return "risk_open_world_late_step_block"
+  return "risk_unknown_fail_closed"
+}
+
+function resolveKeyModePolicyForTool(
+  tool: ToolPolicyInput,
+  keyMode?: ToolKeyMode
+): {
+  allow: boolean
+  allowReason?: ToolPolicyReasonCode
+  denyReason?: ToolPolicyReasonCode
+} {
+  // Key mode is only decision-driving for third-party tools where request-level
+  // key provenance is explicit (BYOK vs platform budget envelope).
+  if (tool.source !== "third-party") {
+    return { allow: true }
+  }
+
+  if (keyMode === "byok") {
+    return { allow: true, allowReason: "key_mode_byok_allowed" }
+  }
+  if (keyMode === "platform") {
+    return { allow: true, allowReason: "key_mode_platform_allowed" }
+  }
+  return { allow: false, denyReason: "key_mode_unknown_fail_closed" }
+}
+
+export function resolveCapabilityPolicy(
+  options: CapabilityPolicyOptions
+): CapabilityPolicyResult {
+  const { userTier, capabilities, reasons } = computeCapabilityMatrix(options)
+  const toolDecisions: ToolPolicyDecision[] = []
+
+  for (const tool of options.tools ?? []) {
+    const capability = inferCapability(tool)
+    const risk = classifyToolRisk(tool)
+    const capabilityAllowed = capabilities[capability]
+    const keyModePolicy = resolveKeyModePolicyForTool(tool, options.keyMode)
+    const riskAllowedInEarlySteps =
+      risk !== "unknown" || (tool.source === "mcp" && risk === "unknown")
+
+    const allowInEarlySteps =
+      capabilityAllowed && keyModePolicy.allow && riskAllowedInEarlySteps
+    const earlyReasonCode: ToolPolicyReasonCode = !capabilityAllowed
+      ? "capability_disabled"
+      : !keyModePolicy.allow
+        ? (keyModePolicy.denyReason ?? "key_mode_unknown_fail_closed")
+        : !riskAllowedInEarlySteps
+          ? "risk_unknown_early_step_block"
+          : risk === "unknown" && tool.source === "mcp"
+            ? "risk_unknown_early_step_advisory_allow"
+          : (keyModePolicy.allowReason ?? "allowed")
+
+    const allowInLateSteps =
+      capabilityAllowed && keyModePolicy.allow && risk === "read_only"
+    const lateReasonCode: ToolPolicyReasonCode = !capabilityAllowed
+      ? "capability_disabled"
+      : !keyModePolicy.allow
+        ? (keyModePolicy.denyReason ?? "key_mode_unknown_fail_closed")
+        : risk === "read_only" && keyModePolicy.allowReason
+          ? keyModePolicy.allowReason
+          : lateStepReasonForRisk(risk)
+
+    toolDecisions.push({
+      toolName: tool.toolName,
+      source: tool.source,
+      capability,
+      risk,
+      allowInEarlySteps,
+      allowInLateSteps,
+      earlyReasonCode,
+      lateReasonCode,
+    })
+  }
+
+  const earlyToolNames = toolDecisions
+    .filter((decision) => decision.allowInEarlySteps)
+    .map((decision) => decision.toolName)
+
+  const lateToolNames = toolDecisions
+    .filter((decision) => decision.allowInLateSteps)
+    .map((decision) => decision.toolName)
+
+  return {
+    userTier,
+    keyMode: options.keyMode,
+    keyModeReason: resolveKeyModeReason(options.keyMode),
+    capabilities,
+    capabilityReasons: reasons,
+    toolDecisions,
+    earlyToolNames,
+    lateToolNames,
+  }
+}
+
+export function getActiveToolsForStep(
+  policy: CapabilityPolicyResult,
+  stepNumber: number,
+  threshold: number
+): string[] | undefined {
+  if (stepNumber <= threshold) return policy.earlyToolNames
+  return policy.lateToolNames
+}
+
+export function filterToolSetByPolicy(
+  tools: ToolSet,
+  policy: CapabilityPolicyResult
+): ToolSet {
+  const allowed = new Set(policy.earlyToolNames)
+  const filtered: Record<string, unknown> = {}
+  for (const [name, descriptor] of Object.entries(tools)) {
+    if (allowed.has(name)) {
+      filtered[name] = descriptor
+    }
+  }
+  return filtered as ToolSet
+}
+
+export function filterMetadataMapByPolicy<T>(
+  metadata: ReadonlyMap<string, T>,
+  policy: CapabilityPolicyResult
+): Map<string, T> {
+  const allowed = new Set(policy.earlyToolNames)
+  return new Map(
+    Array.from(metadata.entries()).filter(([name]) => allowed.has(name))
+  )
+}
diff --git a/lib/tools/errors.ts b/lib/tools/errors.ts
new file mode 100644
index 00000000..c78958ab
--- /dev/null
+++ b/lib/tools/errors.ts
@@ -0,0 +1,260 @@
+import { extractPolicyErrorData, type ToolPolicyCode } from "./policy"
+
+export type ToolErrorCode =
+  | "aborted"
+  | "timeout"
+  | "rate_limit"
+  | "auth"
+  | "network"
+  | "validation_input"
+  | "policy_limit"
+  | "upstream_failure"
+  | "unknown"
+
+export type ToolExecutionErrorDetails = Record<string, unknown>
+
+export type ToolExecutionErrorOptions = {
+  code: ToolErrorCode
+  retryable: boolean
+  retryAfterSeconds?: number
+  statusCode?: number
+  toolName?: string
+  details?: ToolExecutionErrorDetails
+}
+
+export class ToolExecutionError extends Error {
+  readonly code: ToolErrorCode
+  readonly retryable: boolean
+  readonly retryAfterSeconds?: number
+  readonly statusCode?: number
+  readonly toolName?: string
+  readonly details?: ToolExecutionErrorDetails
+
+  constructor(message: string, options: ToolExecutionErrorOptions) {
+    super(message)
+    this.name = "ToolExecutionError"
+    this.code = options.code
+    this.retryable = options.retryable
+    this.retryAfterSeconds = options.retryAfterSeconds
+    this.statusCode = options.statusCode
+    this.toolName = options.toolName
+    this.details = options.details
+  }
+}
+
+export type ToolErrorData = {
+  code: ToolErrorCode
+  retryable: boolean
+  retryAfterSeconds?: number
+  statusCode?: number
+  toolName?: string
+  details?: ToolExecutionErrorDetails
+}
+
+function toError(err: unknown): Error {
+  if (err instanceof Error) return err
+  return new Error(String(err))
+}
+
+function extractStatusCode(err: Error): number | undefined {
+  const maybeStatusCode = (err as Error & { statusCode?: unknown }).statusCode
+  if (typeof maybeStatusCode === "number") return maybeStatusCode
+
+  const maybeStatus = (err as Error & { status?: unknown }).status
+  if (typeof maybeStatus === "number") return maybeStatus
+
+  const match = err.message.match(/\b(4\d{2}|5\d{2})\b/)
+  if (!match) return undefined
+
+  const parsed = Number(match[1])
+  return Number.isFinite(parsed) ? parsed : undefined
+}
+
+function extractRetryAfterSeconds(err: Error): number | undefined {
+  const retryAfterProp = (err as Error & { retryAfterSeconds?: unknown })
+    .retryAfterSeconds
+  if (typeof retryAfterProp === "number" && Number.isFinite(retryAfterProp)) {
+    return retryAfterProp
+  }
+
+  const match = err.message.match(
+    /retry after(?: approximately)?\s+(\d+)\s*seconds?/i
+  )
+  if (!match) return undefined
+
+  const parsed = Number(match[1])
+  return Number.isFinite(parsed) ? parsed : undefined
+}
+
+function isTimeoutLike(err: Error): boolean {
+  return (
+    err.name === "ToolTimeoutError" ||
+    /\btime(?:d)?\s*out\b/i.test(err.message) ||
+    /\btimeout\b/i.test(err.message)
+  )
+}
+
+function isAbortedLike(err: Error): boolean {
+  return (
+    err.name === "AbortError" ||
+    err.name === "ToolAbortError" ||
+    /\baborted\b/i.test(err.message) ||
+    /\bcancel(?:ed|led)\b/i.test(err.message)
+  )
+}
+
+function isRateLimitLike(err: Error, statusCode?: number): boolean {
+  if (statusCode === 429) return true
+  return (
+    /\brate limit\b/i.test(err.message) ||
+    /\brate limited\b/i.test(err.message) ||
+    /\btoo many requests\b/i.test(err.message) ||
+    /\b429\b/.test(err.message)
+  )
+}
+
+function isAuthLike(err: Error, statusCode?: number): boolean {
+  if (statusCode === 401 || statusCode === 403) return true
+  return (
+    /\bunauthorized\b/i.test(err.message) ||
+    /\bforbidden\b/i.test(err.message) ||
+    /\binvalid api key\b/i.test(err.message) ||
+    /\bexpired api key\b/i.test(err.message)
+  )
+}
+
+function isNetworkLike(err: Error): boolean {
+  return (
+    /\bECONNREFUSED\b/.test(err.message) ||
+    /\bENOTFOUND\b/.test(err.message) ||
+    /\bEAI_AGAIN\b/.test(err.message) ||
+    /\bfetch failed\b/i.test(err.message) ||
+    /\bnetwork\b/i.test(err.message)
+  )
+}
+
+function isValidationLike(err: Error, statusCode?: number): boolean {
+  if (statusCode === 400 || statusCode === 422) return true
+  return (
+    /\bvalidation\b/i.test(err.message) ||
+    /\binvalid input\b/i.test(err.message) ||
+    /\binput invalid\b/i.test(err.message) ||
+    /\bmissing required\b/i.test(err.message) ||
+    /\bschema\b/i.test(err.message)
+  )
+}
+
+function mapPolicyErrorDetails(policyCode: ToolPolicyCode): ToolExecutionErrorDetails {
+  return { policyCode }
+}
+
+export function normalizeToolError(
+  err: unknown,
+  options: { toolName?: string } = {}
+): ToolExecutionError {
+  if (err instanceof ToolExecutionError) return err
+
+  const policyData = extractPolicyErrorData(err)
+  if (policyData) {
+    return new ToolExecutionError(
+      err instanceof Error ? err.message : "Tool policy limit reached.",
+      {
+        code: "policy_limit",
+        retryable: true,
+        retryAfterSeconds: policyData.retryAfterSeconds,
+        toolName: options.toolName,
+        details: {
+          ...mapPolicyErrorDetails(policyData.code),
+          keyMode: policyData.keyMode,
+          scopeKey: policyData.scopeKey,
+          budgetDenied: policyData.budgetDenied,
+        },
+      }
+    )
+  }
+
+  const original = toError(err)
+  const statusCode = extractStatusCode(original)
+  const retryAfterSeconds = extractRetryAfterSeconds(original)
+
+  let code: ToolErrorCode = "unknown"
+  let retryable = false
+
+  if (isTimeoutLike(original)) {
+    code = "timeout"
+    retryable = true
+  } else if (isAbortedLike(original)) {
+    code = "aborted"
+    retryable = false
+  } else if (isRateLimitLike(original, statusCode)) {
+    code = "rate_limit"
+    retryable = true
+  } else if (isAuthLike(original, statusCode)) {
+    code = "auth"
+    retryable = false
+  } else if (isNetworkLike(original)) {
+    code = "network"
+    retryable = true
+  } else if (isValidationLike(original, statusCode)) {
+    code = "validation_input"
+    retryable = false
+  } else if (typeof statusCode === "number" && statusCode >= 500) {
+    code = "upstream_failure"
+    retryable = true
+  }
+
+  const normalized = new ToolExecutionError(original.message, {
+    code,
+    retryable,
+    retryAfterSeconds,
+    statusCode,
+    toolName: options.toolName,
+    details: { originalName: original.name },
+  })
+
+  if (original.stack) normalized.stack = original.stack
+  return normalized
+}
+
+export function extractToolErrorData(
+  err: unknown,
+  options: { toolName?: string } = {}
+): ToolErrorData {
+  const normalized = normalizeToolError(err, options)
+  return {
+    code: normalized.code,
+    retryable: normalized.retryable,
+    retryAfterSeconds: normalized.retryAfterSeconds,
+    statusCode: normalized.statusCode,
+    toolName: normalized.toolName,
+    details: normalized.details,
+  }
+}
+
+export function getToolRecoveryHint(data: ToolErrorData): string {
+  switch (data.code) {
+    case "aborted":
+      return "Tool execution was cancelled. Continue without this tool result."
+    case "timeout":
+      return "Try a shorter or more specific query, or skip this step."
+    case "rate_limit":
+      return data.retryAfterSeconds
+        ? `Rate limit exceeded. Retry after about ${data.retryAfterSeconds} seconds.`
+        : "Rate limit exceeded. Wait before trying again or use a different approach."
+    case "auth":
+      return "Authentication failed. Inform the user to verify or refresh their API key."
+    case "network":
+      return "Network error. The service may be temporarily unavailable; try again or skip this step."
+    case "validation_input":
+      return "Tool input is invalid. Adjust the arguments and retry with a more specific request."
+    case "policy_limit":
+      return data.retryAfterSeconds
+        ? `Tool policy limit reached. Retry after about ${data.retryAfterSeconds} seconds.`
+        : "Tool policy limit reached. Retry later with fewer tool calls."
+    case "upstream_failure":
+      return "Upstream service failed. Retry later or use an alternate approach."
+    case "unknown":
+    default:
+      return "If the error persists, try a different approach or skip this step."
+  }
+}
diff --git a/lib/tools/mcp-wrapper.ts b/lib/tools/mcp-wrapper.ts
index e87094af..5a283384 100644
--- a/lib/tools/mcp-wrapper.ts
+++ b/lib/tools/mcp-wrapper.ts
@@ -7,91 +7,54 @@
 // Replaces wrapToolsWithTruncation(mcpTools) in route.ts:300-302.
 
 import type { ToolSet } from "ai"
-import type { ToolResultEnvelope } from "./types"
-import { truncateToolResult } from "./utils"
+import { ToolTraceCollector } from "./types"
+import type { ToolTrace } from "./types"
+import {
+  truncateToolResult,
+  enrichToolError,
+  executeWithRetries,
+  extractAbortSignalFromOptions,
+  runWithToolAbortAndTimeout,
+  ToolTimeoutError,
+} from "./utils"
+import { extractToolErrorData, type ToolErrorCode } from "./errors"
+import { extractPolicyErrorData } from "./policy"
+import type { ServerInfo } from "@/lib/mcp/load-tools"
 import {
   MAX_TOOL_RESULT_SIZE,
+  MCP_CIRCUIT_BREAKER_THRESHOLD,
   MCP_TOOL_EXECUTION_TIMEOUT_MS,
 } from "@/lib/config"
 
-// ── Error Types ────────────────────────────────────────────
-
-/**
- * Thrown when a tool execution exceeds its timeout.
- * The AI SDK catches this in execute() and returns a tool-error to the model,
- * which can acknowledge the failure and continue streaming.
- */
-export class ToolTimeoutError extends Error {
-  readonly toolName: string
-  readonly timeoutMs: number
-
-  constructor(toolName: string, timeoutMs: number) {
-    super(
-      `Tool "${toolName}" timed out after ${timeoutMs}ms. ` +
-        `The operation was taking too long and was cancelled.`
-    )
-    this.name = "ToolTimeoutError"
-    this.toolName = toolName
-    this.timeoutMs = timeoutMs
-  }
-}
-
-// ── Trace Types ────────────────────────────────────────────
-
-export type ToolTrace = {
-  toolName: string
-  toolCallId: string
-  durationMs: number
-  success: boolean
-  error?: string
-  resultSizeBytes?: number
-}
+// Re-export trace types for backward compatibility (route.ts imports from here)
+export { ToolTraceCollector, type ToolTrace }
+export { ToolTimeoutError }
 
-/**
- * Collects per-tool-call traces for a single streamText() request.
- * Created before streamText(), read in onStepFinish and onFinish.
- *
- * Lifecycle:
- *   1. Created in route.ts before streamText()
- *   2. wrapMcpTools() records traces during execute()
- *   3. onStepFinish reads traces for structured logging
- *   4. onFinish reads traces for Convex + PostHog enrichment
- *   5. Garbage collected when the request ends
- */
-export class ToolTraceCollector {
-  private traces = new Map<string, ToolTrace>()
-
-  record(trace: ToolTrace): void {
-    this.traces.set(trace.toolCallId, trace)
-  }
-
-  get(toolCallId: string): ToolTrace | undefined {
-    return this.traces.get(toolCallId)
-  }
-
-  getAll(): ToolTrace[] {
-    return Array.from(this.traces.values())
-  }
+function isTransientCircuitFailure(errorCode: ToolErrorCode | undefined): boolean {
+  if (!errorCode) return false
+  return (
+    errorCode === "timeout" ||
+    errorCode === "rate_limit" ||
+    errorCode === "network" ||
+    errorCode === "upstream_failure"
+  )
 }
 
 // ── Configuration ──────────────────────────────────────────
 
 type WrapMcpToolsConfig = {
   /** Map of namespaced tool names → server info for display names and audit */
-  toolServerMap: Map<
-    string,
-    {
-      displayName: string
-      serverName: string
-      serverId: string
-    }
-  >
+  toolServerMap: Map<string, ServerInfo>
   /** Trace collector — shared with onStepFinish/onFinish in route.ts */
   traceCollector: ToolTraceCollector
+  /** Request-scoped correlation ID for grouping tool traces */
+  requestId?: string
   /** Per-tool timeout in ms. Default: MCP_TOOL_EXECUTION_TIMEOUT_MS (30s) */
   timeoutMs?: number
   /** Max result size in bytes. Default: MAX_TOOL_RESULT_SIZE (100KB) */
   maxResultBytes?: number
+  /** Optional centralized budget enforcement hook */
+  enforceToolBudget?: (toolName: string) => Promise<void>
 }
 
 // ── Wrapper ────────────────────────────────────────────────
@@ -121,10 +84,15 @@ export function wrapMcpTools(
   const {
     toolServerMap,
     traceCollector,
+    requestId,
     timeoutMs = MCP_TOOL_EXECUTION_TIMEOUT_MS,
     maxResultBytes = MAX_TOOL_RESULT_SIZE,
+    enforceToolBudget,
   } = config
 
+  const serverFailureCounts = new Map<string, number>()
+  const circuitThreshold = MCP_CIRCUIT_BREAKER_THRESHOLD
+
   const wrapped: Record<string, unknown> = {}
 
   for (const [name, t] of Object.entries(tools)) {
@@ -145,39 +113,90 @@ export function wrapMcpTools(
 
     const serverInfo = toolServerMap.get(name)
     const displayName = serverInfo?.displayName ?? name
+    const trustedRetryHints = serverInfo?.retrySafetyTrusted === true
+    // MCP annotation hints are advisory by default. Automatic retries are only
+    // enabled when the request context explicitly trusts the server AND the
+    // safety signal is clear (explicit idempotent + explicit non-destructive).
+    const hasExplicitNonDestructiveSignal =
+      serverInfo?.destructive === false || serverInfo?.readOnly === true
+    const retryMetadata =
+      trustedRetryHints &&
+      serverInfo?.idempotent === true &&
+      hasExplicitNonDestructiveSignal
+        ? {
+            idempotent: true,
+            readOnly: serverInfo?.readOnly,
+            destructive: serverInfo?.destructive,
+          }
+        : undefined
 
     wrapped[name] = {
       ...original,
       execute: async (
         params: unknown,
         options: { toolCallId: string; [k: string]: unknown }
-      ): Promise<ToolResultEnvelope> => {
+      ): Promise<unknown> => {
+        const upstreamAbortSignal = extractAbortSignalFromOptions(options)
+        // Keep circuit state isolated when server metadata is missing.
+        const circuitKey = serverInfo?.serverId ?? `tool:${name}`
+        const failures = serverFailureCounts.get(circuitKey) ?? 0
+        if (failures >= circuitThreshold) {
+          throw enrichToolError(
+            new Error(
+              `Server "${serverInfo?.serverName ?? displayName}" circuit open — ${failures} consecutive transient tool failures in this request`
+            ),
+            displayName
+          )
+        }
+
         const startMs = Date.now()
         let success = true
         let error: string | undefined
         let resultSizeBytes: number | undefined
+        let errorCode: ToolErrorCode | undefined
+        let retryAfterSeconds: number | undefined
+        let budgetKeyMode: "platform" | "byok" | undefined
+        let budgetDenied: boolean | undefined
+        let retryCount = 0
 
         try {
-          // ── Timeout + Execution ────────────────────────
-          // Promise.race: either the tool resolves or the timeout rejects.
-          // When timeout wins, ToolTimeoutError is thrown → caught below →
-          // re-thrown → SDK sets isError: true on the tool result.
-          let timeoutId: ReturnType<typeof setTimeout> | undefined
-          const timeoutPromise = new Promise<never>((_, reject) => {
-            timeoutId = setTimeout(
-              () => reject(new ToolTimeoutError(name, timeoutMs)),
-              timeoutMs
-            )
-          })
+          if (enforceToolBudget) {
+            await enforceToolBudget(name)
+          }
 
-          const rawResult = await Promise.race([
-            origExec(params, options),
-            timeoutPromise,
-          ]).finally(() => {
-            if (timeoutId !== undefined) {
-              clearTimeout(timeoutId)
-            }
-          })
+          const { value: rawResult, retryCount: retries } =
+            await executeWithRetries({
+              toolName: name,
+              metadata: retryMetadata,
+              abortSignal: upstreamAbortSignal,
+              execute: async () =>
+                runWithToolAbortAndTimeout({
+                  toolName: name,
+                  timeoutMs,
+                  upstreamSignal: upstreamAbortSignal,
+                  operation: (combinedSignal) =>
+                    origExec(params, {
+                      ...options,
+                      abortSignal: combinedSignal,
+                    }),
+                }),
+              onRetryAttempt: (attempt) => {
+                console.warn(
+                  JSON.stringify({
+                    _tag: "tool_retry",
+                    requestId,
+                    tool: name,
+                    source: "mcp",
+                    server: serverInfo?.serverName ?? "unknown",
+                    attempt: attempt.attempt,
+                    maxAttempts: attempt.maxAttempts,
+                    delayMs: attempt.delayMs,
+                    errorCode: attempt.error.code,
+                  })
+                )
+              },
+            })
+          retryCount = retries
 
           // ── Measure result size (for trace, before truncation) ──
           try {
@@ -188,41 +207,57 @@ export function wrapMcpTools(
           }
 
           // ── Truncation ─────────────────────────────────
-          const truncatedResult = truncateToolResult(rawResult, maxResultBytes)
-
-          // ── Envelope ───────────────────────────────────
-          return {
-            ok: true,
-            data: truncatedResult,
-            error: null,
-            meta: {
-              tool: displayName,
-              source: "mcp",
-              durationMs: Date.now() - startMs,
-              serverName: serverInfo?.serverName ?? "unknown",
-            },
-          }
+          const truncatedResult = truncateToolResult(rawResult, {
+            maxBytes: maxResultBytes,
+            toolName: name,
+          })
+
+          serverFailureCounts.delete(circuitKey)
+
+          return truncatedResult
         } catch (err) {
-          // Record failure for tracing, then re-throw so the AI SDK
-          // sets isError: true — preserving audit log success detection
-          // and PostHog event accuracy.
           success = false
           error = err instanceof Error ? err.message : String(err)
+          const errorData = extractToolErrorData(err, { toolName: displayName })
+          errorCode = errorData.code
+          retryAfterSeconds = errorData.retryAfterSeconds
+
+          const policyData = extractPolicyErrorData(err)
+          if (policyData) {
+            budgetKeyMode = policyData.keyMode
+            budgetDenied = policyData.budgetDenied
+          }
+
+          if (isTransientCircuitFailure(errorCode)) {
+            serverFailureCounts.set(
+              circuitKey,
+              (serverFailureCounts.get(circuitKey) ?? 0) + 1
+            )
+          } else {
+            // Circuit breaker tracks consecutive transient failures only.
+            serverFailureCounts.delete(circuitKey)
+          }
 
           console.error(
             `[tools/mcp] ${displayName} failed after ${Date.now() - startMs}ms:`,
             error
           )
-          throw err
+          throw enrichToolError(err, displayName)
         } finally {
           // ── Trace (always — success or failure) ────────
           traceCollector.record({
             toolName: name,
             toolCallId: options.toolCallId,
+            requestId,
             durationMs: Date.now() - startMs,
             success,
             error,
             resultSizeBytes,
+            errorCode,
+            retryAfterSeconds,
+            budgetKeyMode,
+            budgetDenied,
+            retryCount,
           })
         }
       },
@@ -232,22 +267,3 @@ export function wrapMcpTools(
   return wrapped as ToolSet
 }
 
-// ── Type Guard ─────────────────────────────────────────────
-
-/**
- * Check if a tool result is a ToolResultEnvelope.
- * Used in onFinish audit logging to extract `data` from envelopes
- * before generating output previews — ensures the preview contains
- * actual result data instead of envelope metadata.
- */
-export function isToolResultEnvelope(
-  value: unknown
-): value is ToolResultEnvelope {
-  return (
-    typeof value === "object" &&
-    value !== null &&
-    "ok" in value &&
-    "data" in value &&
-    "meta" in value
-  )
-}
diff --git a/lib/tools/naming.ts b/lib/tools/naming.ts
new file mode 100644
index 00000000..8ca6088d
--- /dev/null
+++ b/lib/tools/naming.ts
@@ -0,0 +1,171 @@
+import type { ToolSet } from "ai"
+
+export const TOOL_NAME_MAX_LENGTH = 128
+const TOOL_NAME_PATTERN = /^[A-Za-z0-9_.-]+$/
+
+export type ToolLayer =
+  | "built-in"
+  | "third-party-search"
+  | "content-extraction"
+  | "platform"
+  | "mcp"
+
+export const TOOL_LAYER_PRECEDENCE: readonly ToolLayer[] = [
+  "built-in",
+  "third-party-search",
+  "content-extraction",
+  "platform",
+  "mcp",
+]
+
+export type ToolLayerMap = Partial<Record<ToolLayer, ToolSet>>
+
+export type ToolNameValidationResult =
+  | { valid: true }
+  | { valid: false; reason: string }
+
+export type GlobalToolCollision = {
+  toolKey: string
+  owners: ToolLayer[]
+  winner: ToolLayer
+}
+
+export type InvalidToolName = {
+  toolKey: string
+  layer: ToolLayer
+  reason: string
+}
+
+export type ToolNamingEnforcementResult = {
+  sanitizedLayers: ToolLayerMap
+  invalid: InvalidToolName[]
+  collisions: GlobalToolCollision[]
+}
+
+export function validateToolName(name: string): ToolNameValidationResult {
+  if (name.length < 1 || name.length > TOOL_NAME_MAX_LENGTH) {
+    return { valid: false, reason: `length must be 1..${TOOL_NAME_MAX_LENGTH}` }
+  }
+
+  if (!TOOL_NAME_PATTERN.test(name)) {
+    return {
+      valid: false,
+      reason: "allowed characters are ASCII letters, digits, '_', '-', '.'",
+    }
+  }
+
+  return { valid: true }
+}
+
+export function collectInvalidToolNames(layers: ToolLayerMap): InvalidToolName[] {
+  const invalid: InvalidToolName[] = []
+
+  for (const layer of TOOL_LAYER_PRECEDENCE) {
+    const tools = layers[layer]
+    if (!tools) continue
+
+    for (const toolKey of Object.keys(tools)) {
+      const result = validateToolName(toolKey)
+      if (!result.valid) {
+        invalid.push({
+          toolKey,
+          layer,
+          reason: result.reason,
+        })
+      }
+    }
+  }
+
+  return invalid.sort((a, b) => a.toolKey.localeCompare(b.toolKey))
+}
+
+export function collectGlobalCollisions(layers: ToolLayerMap): GlobalToolCollision[] {
+  const ownership = new Map<string, Set<ToolLayer>>()
+
+  for (const layer of TOOL_LAYER_PRECEDENCE) {
+    const tools = layers[layer]
+    if (!tools) continue
+
+    for (const toolKey of Object.keys(tools)) {
+      const owners = ownership.get(toolKey) ?? new Set<ToolLayer>()
+      owners.add(layer)
+      ownership.set(toolKey, owners)
+    }
+  }
+
+  const collisions: GlobalToolCollision[] = []
+
+  for (const [toolKey, ownersSet] of ownership) {
+    if (ownersSet.size < 2) continue
+
+    const owners = TOOL_LAYER_PRECEDENCE.filter((layer) => ownersSet.has(layer))
+    const winner = owners[owners.length - 1]
+    if (!winner) continue
+
+    collisions.push({
+      toolKey,
+      owners,
+      winner,
+    })
+  }
+
+  return collisions.sort((a, b) => a.toolKey.localeCompare(b.toolKey))
+}
+
+function removeToolKeys(
+  tools: ToolSet | undefined,
+  deniedKeys: ReadonlySet<string>
+): ToolSet | undefined {
+  if (!tools) return undefined
+  const filtered: Record<string, unknown> = {}
+  for (const [toolKey, descriptor] of Object.entries(tools)) {
+    if (deniedKeys.has(toolKey)) continue
+    filtered[toolKey] = descriptor
+  }
+  return filtered as ToolSet
+}
+
+export function enforceToolNamingGovernance(
+  layers: ToolLayerMap
+): ToolNamingEnforcementResult {
+  const invalid = collectInvalidToolNames(layers)
+  const invalidByLayer = new Map<ToolLayer, Set<string>>()
+  for (const entry of invalid) {
+    const denied = invalidByLayer.get(entry.layer) ?? new Set<string>()
+    denied.add(entry.toolKey)
+    invalidByLayer.set(entry.layer, denied)
+  }
+
+  const withoutInvalid: ToolLayerMap = {}
+  for (const layer of TOOL_LAYER_PRECEDENCE) {
+    withoutInvalid[layer] = removeToolKeys(
+      layers[layer],
+      invalidByLayer.get(layer) ?? new Set<string>()
+    )
+  }
+
+  const collisions = collectGlobalCollisions(withoutInvalid)
+  const collisionLosersByLayer = new Map<ToolLayer, Set<string>>()
+  for (const collision of collisions) {
+    for (const owner of collision.owners) {
+      if (owner === collision.winner) continue
+      const denied = collisionLosersByLayer.get(owner) ?? new Set<string>()
+      denied.add(collision.toolKey)
+      collisionLosersByLayer.set(owner, denied)
+    }
+  }
+
+  const sanitizedLayers: ToolLayerMap = {}
+  for (const layer of TOOL_LAYER_PRECEDENCE) {
+    sanitizedLayers[layer] = removeToolKeys(
+      withoutInvalid[layer],
+      collisionLosersByLayer.get(layer) ?? new Set<string>()
+    )
+  }
+
+  return {
+    sanitizedLayers,
+    invalid,
+    collisions,
+  }
+}
diff --git a/lib/tools/platform.ts b/lib/tools/platform.ts
index b5bea019..c95fce43 100644
--- a/lib/tools/platform.ts
+++ b/lib/tools/platform.ts
@@ -5,6 +5,8 @@ import type { ToolMetadata } from "./types"
 import { getPayClawConfig } from "@/lib/payclaw/config"
 import { payClawToolInputSchema, type ShippingAddress } from "@/lib/payclaw/schemas"
 import { PayClawApiError, createJob, getJob, getJobEvents } from "@/lib/payclaw/client"
+import { enrichToolError, extractAbortSignalFromOptions } from "./utils"
+import { extractToolErrorData } from "./errors"
 
 export async function getPlatformTools(options?: {
   userName?: string
@@ -22,7 +24,11 @@ export async function getPlatformTools(options?: {
     return { tools: tools as ToolSet, metadata }
   }
 
-  tools.flowglad_pay_buy = tool({
+  // Request-scoped dedupe cache to reduce duplicate purchase side effects when
+  // the model emits identical pay_purchase calls in the same generation.
+  const purchaseRequestCache = new Map<string, Promise<{ jobId: string; status: string }>>()
+
+  tools.pay_purchase = tool({
     description:
       "Buy a product or provision a service account using Flowglad Pay. " +
       "Provide the vendor URL and maximum spend in cents. " +
@@ -39,13 +45,41 @@ export async function getPlatformTools(options?: {
       "- ALWAYS extract phone number and email when the user provides them — many checkouts fail without both.\n" +
       "\n\nWhen to call this tool:\n" +
       "- The user asks to buy, purchase, order, or subscribe to something.\n" +
-      "- Do NOT call this tool to check on an existing job — use flowglad_pay_status instead.\n" +
-      "- Do NOT re-create a job for the same URL if a previous flowglad_pay_buy result already exists in this conversation, unless the user explicitly asks for a new purchase.",
+      "- Do NOT call this tool to check on an existing job — use pay_status instead.\n" +
+      "- Do NOT re-create a job for the same URL if a previous pay_purchase result already exists in this conversation, unless the user explicitly asks for a new purchase.",
     inputSchema: payClawToolInputSchema,
-    execute: async (input) => {
+    inputExamples: [
+      {
+        input: {
+          url: "https://example-saas.com/pricing",
+          maxSpend: 2500,
+          product: "Pro monthly subscription",
+        },
+      },
+      {
+        input: {
+          url: "https://store.example.com/products/ergonomic-mouse",
+          maxSpend: 4800,
+          shippingAddress: {
+            name: "Alex Rivera",
+            line1: "123 Main St",
+            city: "San Francisco",
+            state: "CA",
+            postalCode: "94105",
+            country: "US",
+            phone: "+1-415-555-0123",
+            email: "alex@example.com",
+          },
+        },
+      },
+    ],
+    execute: async (input, toolOptions) => {
       const startMs = Date.now()
       try {
         const resolvedInput = { ...input }
+        const abortSignal = toolOptions
+          ? extractAbortSignalFromOptions(toolOptions)
+          : undefined
 
         if (!resolvedInput.shippingAddress && options?.defaultShippingAddress) {
           resolvedInput.shippingAddress = {
@@ -67,64 +101,78 @@ export async function getPlatformTools(options?: {
 
         const hasPaymentMethod = resolvedInput.paymentMethod || config.defaultCardId
         if (!hasPaymentMethod) {
-          return {
-            ok: false,
-            data: null,
-            error: "No payment method available. Add a default card in settings or configure PAYCLAW_CARD_ID.",
-            meta: { tool: "Flowglad Pay", source: "platform", durationMs: Date.now() - startMs },
-          }
+          throw enrichToolError(
+            new Error("No payment method available. Add a default card in settings or configure PAYCLAW_CARD_ID."),
+            "pay_purchase"
+          )
         }
 
         const isLikelyPhysical = resolvedInput.shippingAddress !== undefined
         if (isLikelyPhysical) {
           const addr = resolvedInput.shippingAddress!
           if (!addr.phone) {
-            return {
-              ok: false,
-              data: null,
-              error:
+            throw enrichToolError(
+              new Error(
                 "Shipping address is missing a phone number, which most vendor checkouts require. " +
-                "Ask the user for their phone number and include it in the shippingAddress.",
-              meta: { tool: "Flowglad Pay", source: "platform", durationMs: Date.now() - startMs },
-            }
+                "Ask the user for their phone number and include it in the shippingAddress."
+              ),
+              "pay_purchase"
+            )
           }
           if (!addr.email) {
             console.warn("[tools/platform] Shipping address missing email — some checkouts may require it")
           }
         }
 
-        const result = await createJob(resolvedInput, config)
+        const dedupeKey = JSON.stringify({
+          url: resolvedInput.url,
+          maxSpend: resolvedInput.maxSpend,
+          product: resolvedInput.product ?? null,
+          shippingAddress: resolvedInput.shippingAddress ?? null,
+          paymentMethod: resolvedInput.paymentMethod ?? null,
+          browserProvider: resolvedInput.browserProvider ?? null,
+        })
+
+        let jobPromise = purchaseRequestCache.get(dedupeKey)
+        if (!jobPromise) {
+          jobPromise = createJob(resolvedInput, config, { signal: abortSignal })
+            .then((result) => ({ jobId: result.jobId, status: result.status }))
+            .catch((error) => {
+              purchaseRequestCache.delete(dedupeKey)
+              throw error
+            })
+          purchaseRequestCache.set(dedupeKey, jobPromise)
+        }
+
+        const result = await jobPromise
+        console.log(JSON.stringify({
+          _tag: "tool_exec",
+          tool: "pay_purchase",
+          source: "platform",
+          durationMs: Date.now() - startMs,
+        }))
         return {
-          ok: true,
-          data: {
-            jobId: result.jobId,
-            status: result.status,
-            message:
-              `Provisioning job created for ${input.url}. ` +
-              `Job ID: ${result.jobId}. The purchase agent is now working on this ` +
-              "and it typically takes 2–8 minutes to complete. " +
-              "Report this job ID and status to the user. " +
-              "Do NOT poll flowglad_pay_status repeatedly — ask the user to " +
-              "send a follow-up message when they want a status update.",
-          },
-          error: null,
-          meta: {
-            tool: "Flowglad Pay",
-            source: "platform",
-            durationMs: Date.now() - startMs,
-          },
+          jobId: result.jobId,
+          status: result.status,
+          message:
+            `Provisioning job created for ${input.url}. ` +
+            `Job ID: ${result.jobId}. The purchase agent is now working on this ` +
+            "and it typically takes 2–8 minutes to complete. " +
+            "Report this job ID and status to the user. " +
+            "Do NOT poll pay_status repeatedly — ask the user to " +
+            "send a follow-up message when they want a status update.",
         }
       } catch (err) {
         console.error(
           `[tools/platform] Flowglad Pay failed after ${Date.now() - startMs}ms:`,
           err instanceof Error ? err.message : String(err)
         )
-        throw err
+        throw enrichToolError(err, "pay_purchase")
       }
     },
   })
 
-  tools.flowglad_pay_status = tool({
+  tools.pay_status = tool({
     description:
       "Check the status of a Flowglad Pay purchase job. Returns current status, " +
       "latest progress message, and result if completed. Use this instead of creating " +
@@ -133,23 +181,36 @@ export async function getPlatformTools(options?: {
       "- Call this tool AT MOST ONCE per user message.\n" +
       "- If the job is still active (isTerminal: false), do NOT call again. " +
       "Tell the user the job is in progress and suggest they ask again in 1–2 minutes.\n" +
-      "- Only poll again when the user sends a new message asking for a status update.",
+      "- Only poll again when the user sends a new message asking for a status update.\n" +
+      "- Do NOT call pay_purchase to check status — use this tool instead.",
     inputSchema: z.object({
       jobId: z
         .string()
         .describe(
-          "The job ID returned by flowglad_pay_buy. Use this to check status of an existing purchase job."
+          "The job ID returned by pay_purchase. Use this to check status of an existing purchase job."
         ),
     }),
-    execute: async (input) => {
+    inputExamples: [
+      { input: { jobId: "job_abc123xyz" } },
+      { input: { jobId: "job_2026_02_23_001" } },
+    ],
+    execute: async (input, toolOptions) => {
       const startMs = Date.now()
       try {
-        const eventsPromise = getJobEvents(input.jobId, config)
+        const abortSignal = toolOptions
+          ? extractAbortSignalFromOptions(toolOptions)
+          : undefined
+
+        const eventsPromise = getJobEvents(input.jobId, config, { signal: abortSignal })
           .then((events) => ({ events, degraded: false as const }))
           .catch((err) => {
             if (err instanceof PayClawApiError) {
               throw err
             }
+            const errorData = extractToolErrorData(err, { toolName: "pay_status" })
+            if (errorData.code === "aborted" || errorData.code === "timeout") {
+              throw err
+            }
 
             console.warn("[tools/platform] Flowglad Pay events could not be parsed; falling back to job status", {
               jobId: input.jobId,
@@ -160,7 +221,7 @@ export async function getPlatformTools(options?: {
           })
 
         const [job, eventsResult] = await Promise.all([
-          getJob(input.jobId, config),
+          getJob(input.jobId, config, { signal: abortSignal }),
           eventsPromise,
         ])
         const events = eventsResult.events
@@ -175,25 +236,22 @@ export async function getPlatformTools(options?: {
           job.status === "failed" ||
           job.status === "cancelled"
 
+        console.log(JSON.stringify({
+          _tag: "tool_exec",
+          tool: "pay_status",
+          source: "platform",
+          durationMs: Date.now() - startMs,
+        }))
         return {
-          ok: true,
-          data: {
-            jobId: input.jobId,
-            status: job.status,
-            latestMessage,
-            isTerminal,
-            eventCount: events.length,
-            result: job.result,
-          },
-          error: null,
-          meta: {
-            tool: "Flowglad Pay",
-            source: "platform",
-            durationMs: Date.now() - startMs,
-          },
+          jobId: input.jobId,
+          status: job.status,
+          latestMessage,
+          isTerminal,
+          eventCount: events.length,
+          result: job.result,
           ...(!isTerminal && {
             hint:
-              "Job is still running. Do NOT call flowglad_pay_status again this turn. " +
+              "Job is still running. Do NOT call pay_status again this turn. " +
               "Tell the user the current status and ask them to check back in 1–2 minutes.",
           }),
         }
@@ -202,25 +260,28 @@ export async function getPlatformTools(options?: {
           `[tools/platform] Flowglad Pay failed after ${Date.now() - startMs}ms:`,
           err instanceof Error ? err.message : String(err)
         )
-        throw err
+        throw enrichToolError(err, "pay_status")
       }
     },
   })
 
-  metadata.set("flowglad_pay_buy", {
-    displayName: "Flowglad Pay",
-    source: "third-party",
+  metadata.set("pay_purchase", {
+    displayName: "Purchase",
+    source: "platform",
     serviceName: "Flowglad Pay",
     icon: "wrench",
     readOnly: false,
+    destructive: false,
+    idempotent: false,
   })
 
-  metadata.set("flowglad_pay_status", {
-    displayName: "Flowglad Pay Status",
-    source: "third-party",
+  metadata.set("pay_status", {
+    displayName: "Purchase Status",
+    source: "platform",
     serviceName: "Flowglad Pay",
     icon: "wrench",
     readOnly: true,
+    idempotent: true,
   })
 
   return { tools: tools as ToolSet, metadata }
diff --git a/lib/tools/policy.ts b/lib/tools/policy.ts
new file mode 100644
index 00000000..9ffd3868
--- /dev/null
+++ b/lib/tools/policy.ts
@@ -0,0 +1,582 @@
+import { fetchMutation } from "convex/nextjs"
+import { api } from "@/convex/_generated/api"
+import {
+  EXTRACT_CONTENT_DOMAIN_MAX_REQUESTS,
+  EXTRACT_CONTENT_DOMAIN_WINDOW_MS,
+  TOOL_BUDGET_LIMITS,
+  TOOL_BUDGET_WINDOW_MS,
+  TOOL_LIMIT_BUCKET_SIZE_MS,
+} from "@/lib/config"
+
+export type ToolKeyMode = "platform" | "byok"
+export type ToolLimitType = "domain" | "budget"
+
+export type ToolDomainLimitCode = `${string}_DOMAIN_LIMIT_EXCEEDED`
+export type ToolPolicyCode =
+  | ToolDomainLimitCode
+  | "TOOL_BUDGET_EXCEEDED"
+  | "TOOL_POLICY_UNAVAILABLE"
+
+export type ToolPolicyErrorData = {
+  code: ToolPolicyCode
+  retryAfterSeconds?: number
+  keyMode?: ToolKeyMode
+  scopeKey?: string
+  budgetDenied?: boolean
+}
+
+export class ToolPolicyError extends Error {
+  readonly code: ToolPolicyCode
+  readonly retryAfterSeconds?: number
+  readonly keyMode?: ToolKeyMode
+  readonly scopeKey?: string
+  readonly budgetDenied?: boolean
+
+  constructor(message: string, data: ToolPolicyErrorData) {
+    super(message)
+    this.name = "ToolPolicyError"
+    this.code = data.code
+    this.retryAfterSeconds = data.retryAfterSeconds
+    this.keyMode = data.keyMode
+    this.scopeKey = data.scopeKey
+    this.budgetDenied = data.budgetDenied
+  }
+}
+
+export function isToolPolicyError(err: unknown): err is ToolPolicyError {
+  return err instanceof ToolPolicyError
+}
+
+export function isPolicyUnavailableError(err: unknown): err is ToolPolicyError {
+  return isToolPolicyError(err) && err.code === "TOOL_POLICY_UNAVAILABLE"
+}
+
+export function extractPolicyErrorData(err: unknown): ToolPolicyErrorData | null {
+  if (!isToolPolicyError(err)) return null
+  return {
+    code: err.code,
+    retryAfterSeconds: err.retryAfterSeconds,
+    keyMode: err.keyMode,
+    scopeKey: err.scopeKey,
+    budgetDenied: err.budgetDenied,
+  }
+}
+
+export type RequestLocalToolSoftCapSnapshot = {
+  toolName: string
+  used: number
+  remaining: number
+  maxCalls: number
+}
+
+export type RequestLocalToolSoftCap = {
+  getSnapshot(toolName: string): RequestLocalToolSoftCapSnapshot
+  recordCall(toolName: string): RequestLocalToolSoftCapSnapshot
+}
+
+export function createRequestLocalToolSoftCap(options?: {
+  maxCallsPerTool?: number
+}): RequestLocalToolSoftCap {
+  const maxCallsPerTool = Math.max(1, Math.trunc(options?.maxCallsPerTool ?? 3))
+  const callsByTool = new Map<string, number>()
+
+  const snapshotFor = (toolName: string): RequestLocalToolSoftCapSnapshot => {
+    const used = callsByTool.get(toolName) ?? 0
+    return {
+      toolName,
+      used,
+      remaining: Math.max(0, maxCallsPerTool - used),
+      maxCalls: maxCallsPerTool,
+    }
+  }
+
+  return {
+    getSnapshot(toolName: string): RequestLocalToolSoftCapSnapshot {
+      return snapshotFor(toolName)
+    },
+    recordCall(toolName: string): RequestLocalToolSoftCapSnapshot {
+      const used = (callsByTool.get(toolName) ?? 0) + 1
+      callsByTool.set(toolName, used)
+      return snapshotFor(toolName)
+    },
+  }
+}
+
+export type OutageTolerantBudgetEvent =
+  | {
+      type: "recovered"
+      toolName: string
+      keyMode: ToolKeyMode
+    }
+  | {
+      type: "degraded_allow"
+      toolName: string
+      keyMode: ToolKeyMode
+      retryAfterSeconds?: number
+      snapshot: RequestLocalToolSoftCapSnapshot
+      error: string
+    }
+  | {
+      type: "degraded_block"
+      toolName: string
+      keyMode: ToolKeyMode
+      retryAfterSeconds?: number
+      snapshot: RequestLocalToolSoftCapSnapshot
+      error: string
+    }
+
+export function createOutageTolerantToolBudgetEnforcer(options: {
+  enforceToolBudget: (toolName: string) => Promise<void>
+  keyMode: ToolKeyMode
+  maxCallsPerTool?: number
+  onEvent?: (event: OutageTolerantBudgetEvent) => void
+}): (toolName: string) => Promise<void> {
+  const softCap = createRequestLocalToolSoftCap({
+    maxCallsPerTool: options.maxCallsPerTool,
+  })
+  const degradedTools = new Set<string>()
+  const exhaustedTools = new Set<string>()
+
+  return async (toolName: string) => {
+    if (exhaustedTools.has(toolName)) {
+      throw new ToolPolicyError(
+        "TOOL_BUDGET_EXCEEDED: Tool budget exceeded while policy service is unavailable. Retry later with fewer tool calls.",
+        {
+          code: "TOOL_BUDGET_EXCEEDED",
+          keyMode: options.keyMode,
+          scopeKey: "*",
+          budgetDenied: true,
+        }
+      )
+    }
+
+    try {
+      await options.enforceToolBudget(toolName)
+      if (degradedTools.delete(toolName)) {
+        options.onEvent?.({
+          type: "recovered",
+          toolName,
+          keyMode: options.keyMode,
+        })
+      }
+      return
+    } catch (error) {
+      if (!isPolicyUnavailableError(error)) {
+        throw error
+      }
+
+      degradedTools.add(toolName)
+      const current = softCap.getSnapshot(toolName)
+      if (current.remaining <= 0) {
+        exhaustedTools.add(toolName)
+        options.onEvent?.({
+          type: "degraded_block",
+          toolName,
+          keyMode: options.keyMode,
+          retryAfterSeconds: error.retryAfterSeconds,
+          snapshot: current,
+          error: error.message,
+        })
+        throw new ToolPolicyError(
+          "TOOL_BUDGET_EXCEEDED: Tool budget exceeded while policy service is unavailable. Retry later with fewer tool calls.",
+          {
+            code: "TOOL_BUDGET_EXCEEDED",
+            retryAfterSeconds: error.retryAfterSeconds,
+            keyMode: options.keyMode,
+            scopeKey: "*",
+            budgetDenied: true,
+          }
+        )
+      }
+
+      const snapshot = softCap.recordCall(toolName)
+      options.onEvent?.({
+        type: "degraded_allow",
+        toolName,
+        keyMode: options.keyMode,
+        retryAfterSeconds: error.retryAfterSeconds,
+        snapshot,
+        error: error.message,
+      })
+    }
+  }
+}
+
+type ScopeCount = {
+  scopeKey: string
+  count: number
+}
+
+type ToolLimitStoreInput = {
+  actorKey?: string
+  limitType: ToolLimitType
+  toolName: string
+  keyMode: ToolKeyMode
+  scopeCounts: ScopeCount[]
+  windowMs: number
+  maxCount: number
+  bucketSizeMs: number
+  consume?: boolean
+}
+
+type ToolLimitStoreResult = {
+  allowed: boolean
+  code?: ToolPolicyCode
+  message?: string
+  retryAfterSeconds?: number
+  scopeKey?: string
+  remaining: number
+}
+
+export type ToolLimitStore = {
+  checkAndConsume(input: ToolLimitStoreInput): Promise<ToolLimitStoreResult>
+}
+
+type ConvexToolLimitStoreOptions = {
+  convexToken?: string
+  anonymousId?: string
+}
+
+export function createConvexToolLimitStore(
+  options: ConvexToolLimitStoreOptions
+): ToolLimitStore {
+  const { convexToken, anonymousId } = options
+
+  return {
+    async checkAndConsume(input) {
+      try {
+        const { actorKey: _actorKey, ...rest } = input
+        const result = await fetchMutation(
+          api.toolLimits.checkAndConsume,
+          {
+            ...rest,
+            anonymousId,
+          },
+          { token: convexToken }
+        )
+        return {
+          allowed: result.allowed,
+          code: result.code as ToolPolicyCode | undefined,
+          message: result.message,
+          retryAfterSeconds: result.retryAfterSeconds,
+          scopeKey: result.scopeKey,
+          remaining: result.remaining,
+        }
+      } catch {
+        throw new ToolPolicyError(
+          "TOOL_POLICY_UNAVAILABLE: Tool policy service is unavailable. Retry in 30 seconds.",
+          {
+            code: "TOOL_POLICY_UNAVAILABLE",
+            retryAfterSeconds: 30,
+          }
+        )
+      }
+    },
+  }
+}
+
+export function getToolBudgetPolicy(toolName: string, keyMode: ToolKeyMode): {
+  windowMs: number
+  maxCount: number
+  bucketSizeMs: number
+} {
+  const modeLimits = TOOL_BUDGET_LIMITS[keyMode]
+  const maxCount =
+    modeLimits[toolName as keyof typeof modeLimits] ?? modeLimits.default
+
+  return {
+    windowMs: TOOL_BUDGET_WINDOW_MS,
+    maxCount,
+    bucketSizeMs: TOOL_LIMIT_BUCKET_SIZE_MS,
+  }
+}
+
+export function getExtractContentDomainPolicy(): {
+  windowMs: number
+  maxCount: number
+  bucketSizeMs: number
+} {
+  return {
+    windowMs: EXTRACT_CONTENT_DOMAIN_WINDOW_MS,
+    maxCount: EXTRACT_CONTENT_DOMAIN_MAX_REQUESTS,
+    bucketSizeMs: TOOL_LIMIT_BUCKET_SIZE_MS,
+  }
+}
+
+function toRetryHint(retryAfterSeconds?: number): string {
+  if (!retryAfterSeconds) {
+    return "Retry later with fewer tool calls."
+  }
+  return `Retry after approximately ${retryAfterSeconds} seconds.`
+}
+
+function formatDomainLimitCode(toolName: string): ToolDomainLimitCode {
+  const normalizedToolName = toolName
+    .trim()
+    .replace(/[^a-zA-Z0-9]+/g, "_")
+    .replace(/^_+|_+$/g, "")
+    .toUpperCase()
+  return `${normalizedToolName || "TOOL"}_DOMAIN_LIMIT_EXCEEDED`
+}
+
+export type ToolPolicyGuard = {
+  enforceToolBudget(toolName: string): Promise<void>
+  enforceExtractDomainLimit(domainCounts: Map<string, number>): Promise<void>
+}
+
+export async function probeToolBudget(options: {
+  store: ToolLimitStore
+  keyMode: ToolKeyMode
+  toolName: string
+  actorKey?: string
+}): Promise<{
+  allowed: boolean
+  retryAfterSeconds?: number
+  scopeKey?: string
+}> {
+  const { store, keyMode, toolName, actorKey } = options
+  const policy = getToolBudgetPolicy(toolName, keyMode)
+  const result = await store.checkAndConsume({
+    actorKey,
+    limitType: "budget",
+    toolName,
+    keyMode,
+    scopeCounts: [{ scopeKey: "*", count: 1 }],
+    ...policy,
+    consume: false,
+  })
+  return {
+    allowed: result.allowed,
+    retryAfterSeconds: result.retryAfterSeconds,
+    scopeKey: result.scopeKey,
+  }
+}
+
+export function createToolPolicyGuard(options: {
+  store: ToolLimitStore
+  keyMode: ToolKeyMode
+  actorKey?: string
+}): ToolPolicyGuard {
+  const { store, keyMode, actorKey } = options
+
+  return {
+    async enforceToolBudget(toolName: string): Promise<void> {
+      const policy = getToolBudgetPolicy(toolName, keyMode)
+      const result = await store.checkAndConsume({
+        actorKey,
+        limitType: "budget",
+        toolName,
+        keyMode,
+        scopeCounts: [{ scopeKey: "*", count: 1 }],
+        ...policy,
+        consume: true,
+      })
+
+      if (!result.allowed) {
+        const retryHint = toRetryHint(result.retryAfterSeconds)
+        throw new ToolPolicyError(
+          `TOOL_BUDGET_EXCEEDED: ${result.message ?? `Tool budget exceeded for ${toolName}.`} ${retryHint}`,
+          {
+            code: "TOOL_BUDGET_EXCEEDED",
+            retryAfterSeconds: result.retryAfterSeconds,
+            keyMode,
+            scopeKey: result.scopeKey ?? "*",
+            budgetDenied: true,
+          }
+        )
+      }
+    },
+
+    async enforceExtractDomainLimit(domainCounts: Map<string, number>): Promise<void> {
+      if (domainCounts.size === 0) return
+
+      const policy = getExtractContentDomainPolicy()
+      const scopeCounts = Array.from(domainCounts.entries()).map(
+        ([scopeKey, count]) => ({ scopeKey, count })
+      )
+
+      const result = await store.checkAndConsume({
+        actorKey,
+        limitType: "domain",
+        toolName: "extract_content",
+        keyMode,
+        scopeCounts,
+        ...policy,
+        consume: true,
+      })
+
+      if (!result.allowed) {
+        const retryHint = toRetryHint(result.retryAfterSeconds)
+        throw new ToolPolicyError(
+          `EXTRACT_CONTENT_DOMAIN_LIMIT_EXCEEDED: ${result.message ?? "Domain extraction limit exceeded."} ${retryHint} Use different domains or fewer URLs per domain.`,
+          {
+            code: "EXTRACT_CONTENT_DOMAIN_LIMIT_EXCEEDED",
+            retryAfterSeconds: result.retryAfterSeconds,
+            keyMode,
+            scopeKey: result.scopeKey,
+            budgetDenied: false,
+          }
+        )
+      }
+    },
+  }
+}
+
+type InMemoryBucket = {
+  count: number
+  bucketStartMs: number
+}
+
+export class InMemoryToolLimitStore implements ToolLimitStore {
+  private nowFn: () => number
+  private buckets = new Map<string, InMemoryBucket>()
+
+  constructor(nowFn: () => number = () => Date.now()) {
+    this.nowFn = nowFn
+  }
+
+  async checkAndConsume(input: ToolLimitStoreInput): Promise<ToolLimitStoreResult> {
+    const now = this.nowFn()
+    const currentBucketStartMs =
+      Math.floor(now / input.bucketSizeMs) * input.bucketSizeMs
+    const windowStartMs = now - input.windowMs + 1
+    const firstBucketStartMs =
+      Math.floor(windowStartMs / input.bucketSizeMs) * input.bucketSizeMs
+
+    const scopes = input.scopeCounts.map((scope) => ({
+      scopeKey: scope.scopeKey,
+      count: Math.max(1, Math.trunc(scope.count)),
+    }))
+
+    for (const scope of scopes) {
+      const totals = this.collectBuckets({
+        actorKey: input.actorKey,
+        limitType: input.limitType,
+        toolName: input.toolName,
+        keyMode: input.keyMode,
+        scopeKey: scope.scopeKey,
+        firstBucketStartMs,
+        currentBucketStartMs,
+      })
+
+      const projected = totals.total + scope.count
+      if (projected > input.maxCount) {
+        const retryAfterMs = totals.oldestBucketStartMs
+          ? Math.max(
+              1_000,
+              totals.oldestBucketStartMs + input.bucketSizeMs + input.windowMs - now
+            )
+          : input.windowMs
+
+        return {
+          allowed: false,
+          code:
+            input.limitType === "domain"
+              ? formatDomainLimitCode(input.toolName)
+              : "TOOL_BUDGET_EXCEEDED",
+          message:
+            input.limitType === "domain"
+              ? `Too many "${input.toolName}" requests for domain "${scope.scopeKey}" in the active window.`
+              : `Budget exceeded for "${input.toolName}" in the active window.`,
+          retryAfterSeconds: Math.max(1, Math.ceil(retryAfterMs / 1000)),
+          scopeKey: scope.scopeKey,
+          remaining: Math.max(0, input.maxCount - totals.total),
+        }
+      }
+    }
+
+    if (input.consume !== false) {
+      for (const scope of scopes) {
+        const key = this.bucketKey({
+          actorKey: input.actorKey,
+          limitType: input.limitType,
+          toolName: input.toolName,
+          scopeKey: scope.scopeKey,
+          keyMode: input.keyMode,
+          bucketStartMs: currentBucketStartMs,
+        })
+        const existing = this.buckets.get(key)
+        this.buckets.set(key, {
+          bucketStartMs: currentBucketStartMs,
+          count: (existing?.count ?? 0) + scope.count,
+        })
+      }
+    }
+
+    const minRemaining = scopes.reduce((min, scope) => {
+      const totals = this.collectBuckets({
+        actorKey: input.actorKey,
+        limitType: input.limitType,
+        toolName: input.toolName,
+        keyMode: input.keyMode,
+        scopeKey: scope.scopeKey,
+        firstBucketStartMs,
+        currentBucketStartMs,
+      })
+      return Math.min(min, Math.max(0, input.maxCount - totals.total))
+    }, input.maxCount)
+
+    return { allowed: true, remaining: minRemaining }
+  }
+
+  private bucketKey(input: {
+    actorKey?: string
+    limitType: ToolLimitType
+    toolName: string
+    scopeKey: string
+    keyMode: ToolKeyMode
+    bucketStartMs: number
+  }): string {
+    return [
+      input.actorKey ?? "default",
+      input.limitType,
+      input.toolName,
+      input.scopeKey,
+      input.keyMode,
+      input.bucketStartMs,
+    ].join("::")
+  }
+
+  private collectBuckets(input: {
+    actorKey?: string
+    limitType: ToolLimitType
+    toolName: string
+    scopeKey: string
+    keyMode: ToolKeyMode
+    firstBucketStartMs: number
+    currentBucketStartMs: number
+  }): {
+    total: number
+    oldestBucketStartMs?: number
+  } {
+    const prefix = [
+      input.actorKey ?? "default",
+      input.limitType,
+      input.toolName,
+      input.scopeKey,
+      input.keyMode,
+      "",
+    ].join("::")
+    let total = 0
+    let oldestBucketStartMs: number | undefined
+
+    for (const [key, value] of this.buckets.entries()) {
+      if (!key.startsWith(prefix)) continue
+      if (
+        value.bucketStartMs < input.firstBucketStartMs ||
+        value.bucketStartMs > input.currentBucketStartMs
+      ) {
+        continue
+      }
+      total += value.count
+      if (
+        oldestBucketStartMs === undefined ||
+        value.bucketStartMs < oldestBucketStartMs
+      ) {
+        oldestBucketStartMs = value.bucketStartMs
+      }
+    }
+
+    return { total, oldestBucketStartMs }
+  }
+}
diff --git a/lib/tools/provider.ts b/lib/tools/provider.ts
index 9fb5165c..3fda886d 100644
--- a/lib/tools/provider.ts
+++ b/lib/tools/provider.ts
@@ -62,6 +62,7 @@ export async function getProviderTools(
         icon: "search",
         estimatedCostPer1k: 30, // ~$25-50/1K depending on searchContextSize
         readOnly: true,
+        openWorld: true,
       })
       break
     }
@@ -77,6 +78,7 @@ export async function getProviderTools(
         icon: "search",
         estimatedCostPer1k: 10, // Usage-based, varies
         readOnly: true,
+        openWorld: true,
       })
       break
     }
@@ -91,6 +93,7 @@ export async function getProviderTools(
         icon: "search",
         estimatedCostPer1k: 35, // Grounding billing started Jan 5, 2026
         readOnly: true,
+        openWorld: true,
       })
       break
     }
@@ -106,6 +109,7 @@ export async function getProviderTools(
         icon: "search",
         estimatedCostPer1k: 0, // Included in Grok API pricing
         readOnly: true,
+        openWorld: true,
       })
       break
     }
diff --git a/lib/tools/third-party.ts b/lib/tools/third-party.ts
index c6d3e3aa..f38eabff 100644
--- a/lib/tools/third-party.ts
+++ b/lib/tools/third-party.ts
@@ -4,8 +4,89 @@ import { tool } from "ai"
 import { z } from "zod"
 import type { ToolSet } from "ai"
 import type { ToolMetadata } from "./types"
-import { truncateToolResult } from "./utils"
-import { TOOL_EXECUTION_TIMEOUT_MS } from "@/lib/config"
+import {
+  truncateToolResult,
+  enrichToolError,
+  extractAbortSignalFromOptions,
+  runWithToolAbortAndTimeout,
+  throwIfAborted,
+} from "./utils"
+import type { ToolPolicyGuard } from "./policy"
+import {
+  EXA_CONTENT_FRESHNESS_MAX_AGE_HOURS,
+  THIRD_PARTY_EXTRACTION_CACHE_MAX_ENTRIES,
+  THIRD_PARTY_EXTRACTION_CACHE_TTL_MS,
+  THIRD_PARTY_SEARCH_CACHE_MAX_ENTRIES,
+  THIRD_PARTY_SEARCH_CACHE_TTL_MS,
+  TOOL_EXECUTION_TIMEOUT_MS,
+} from "@/lib/config"
+import { LruTtlCache } from "./cache"
+
+// ── Content Extraction Cache ────────────────────────────────
+// Process-level LRU cache for content extraction results.
+// Same URL in multi-turn conversations avoids re-fetching from Exa.
+// TTL: 15 min. Max entries: 500. Shared across requests (public URLs only).
+
+type CachedExtraction = { url: string; title?: string; content?: string }
+
+const extractionCache = new LruTtlCache<string, CachedExtraction>({
+  ttlMs: THIRD_PARTY_EXTRACTION_CACHE_TTL_MS,
+  maxEntries: THIRD_PARTY_EXTRACTION_CACHE_MAX_ENTRIES,
+})
+
+function normalizeUrl(url: string): string {
+  try {
+    const parsed = new URL(url)
+    parsed.hostname = parsed.hostname.toLowerCase()
+    parsed.pathname = parsed.pathname.replace(/\/$/, "") || "/"
+    for (const key of [...parsed.searchParams.keys()]) {
+      if (key.startsWith("utm_")) parsed.searchParams.delete(key)
+    }
+    parsed.searchParams.sort()
+    return parsed.toString()
+  } catch {
+    return url.toLowerCase()
+  }
+}
+
+function getCachedExtraction(url: string): CachedExtraction | null {
+  const key = normalizeUrl(url)
+  return extractionCache.get(key)
+}
+
+function setCachedExtraction(inputUrl: string, data: CachedExtraction): void {
+  extractionCache.set(normalizeUrl(inputUrl), data)
+}
+
+// ── Search Result Cache ─────────────────────────────────────
+// Process-level LRU cache for search results.
+// Same query in multi-turn conversations avoids re-fetching from Exa.
+// TTL: 15 min. Max entries: 500. Shared across requests.
+
+type CachedSearchResult = {
+  title?: string
+  url: string
+  content?: string
+  publishedDate?: string
+}
+
+const searchCache = new LruTtlCache<string, CachedSearchResult[]>({
+  ttlMs: THIRD_PARTY_SEARCH_CACHE_TTL_MS,
+  maxEntries: THIRD_PARTY_SEARCH_CACHE_MAX_ENTRIES,
+})
+
+function normalizeQuery(query: string): string {
+  return query.toLowerCase().trim().replace(/\s+/g, " ")
+}
+
+function getCachedSearch(query: string): CachedSearchResult[] | null {
+  const key = normalizeQuery(query)
+  return searchCache.get(key)
+}
+
+function setCachedSearch(query: string, data: CachedSearchResult[]): void {
+  searchCache.set(normalizeQuery(query), data)
+}
 
 /**
  * Configuration for third-party tool loading.
@@ -72,7 +153,19 @@ export async function getThirdPartyTools(
       tools.web_search = tool({
         description:
           "Search the web for current information using AI-native semantic search. " +
-          "Returns relevant web pages with titles, URLs, content snippets, and publication dates.",
+          "Returns up to 5 results, each with title, URL, content snippet (up to 2000 chars), and publication date.\n\n" +
+          "When to use:\n" +
+          "- The user asks about recent events, current data, or time-sensitive information\n" +
+          "- The user explicitly asks to search, look up, or find something online\n" +
+          "- You need to verify or fact-check a claim with current sources\n\n" +
+          "When NOT to use:\n" +
+          "- General knowledge questions you can answer confidently from training data\n" +
+          "- Questions about the user's own files, code, or conversation history\n" +
+          "- URLs in code snippets, import statements, or reference links\n\n" +
+          "Query tips:\n" +
+          "- Be specific and concise — include key terms, names, and dates when relevant\n" +
+          "- Do NOT append the current year unless the user specifically asks for recent results\n" +
+          "- Prefer natural language queries over keyword-stuffed searches",
         inputSchema: z.object({
           query: z
             .string()
@@ -80,56 +173,82 @@ export async function getThirdPartyTools(
             .max(200)
             .describe("The search query — be specific for better results"),
         }),
-        execute: async ({ query }) => {
+        inputExamples: [
+          { input: { query: "latest federal reserve interest rate decision summary" } },
+          { input: { query: "vercel ai sdk 6 tool calling docs" } },
+        ],
+        execute: async ({ query }, options) => {
           const startMs = Date.now()
+          const upstreamAbortSignal = options
+            ? extractAbortSignalFromOptions(options)
+            : undefined
           try {
-            const { results } = await Promise.race([
-              exa.searchAndContents(query, {
-                type: "auto",
-                numResults: 5,
-                text: { maxCharacters: 2000 },
-                livecrawl: "fallback",
-              }),
-              new Promise<never>((_, reject) =>
-                setTimeout(
-                  () =>
-                    reject(
-                      new Error(
-                        `Exa search timed out after ${TOOL_EXECUTION_TIMEOUT_MS}ms`
-                      )
-                    ),
-                  TOOL_EXECUTION_TIMEOUT_MS
-                )
-              ),
-            ])
+            throwIfAborted(upstreamAbortSignal, {
+              toolName: "web_search",
+              upstreamSignal: upstreamAbortSignal,
+            })
+
+            // ── Cache check ──────────────────────────────
+            const cached = getCachedSearch(query)
+            if (cached) {
+              console.log(JSON.stringify({
+                _tag: "tool_exec",
+                tool: "web_search",
+                source: "exa",
+                durationMs: Date.now() - startMs,
+                resultCount: cached.length,
+                cached: true,
+              }))
+              return truncateToolResult(cached, {
+                toolName: "web_search",
+                resultCategory: "search_results",
+              })
+            }
+
+            // Exa SDK does not accept AbortSignal yet. Race the call against the
+            // composed signal so we return promptly on upstream cancel/timeout.
+            const { results } = await runWithToolAbortAndTimeout({
+              toolName: "web_search",
+              timeoutMs: TOOL_EXECUTION_TIMEOUT_MS,
+              upstreamSignal: upstreamAbortSignal,
+              operation: async () =>
+                // searchAndContents does not expose the same freshness knob as getContents.
+                // Keep current safe defaults and rely on cache TTL for freshness.
+                exa.searchAndContents(query, {
+                  type: "auto",
+                  numResults: 5,
+                  text: { maxCharacters: 2000 },
+                  livecrawl: "fallback",
+                }),
+            })
+            throwIfAborted(upstreamAbortSignal, {
+              toolName: "web_search",
+              upstreamSignal: upstreamAbortSignal,
+            })
             const mapped = results.map((r) => ({
               title: r.title ?? undefined,
               url: r.url,
               content: r.text?.slice(0, 2000),
               publishedDate: r.publishedDate ?? undefined,
             }))
-            return {
-              ok: true,
-              data: truncateToolResult(mapped),
-              error: null,
-              meta: {
-                tool: "web_search",
-                source: "exa",
-                durationMs: Date.now() - startMs,
-                resultCount: results.length,
-              },
-            }
+            setCachedSearch(query, mapped)
+            console.log(JSON.stringify({
+              _tag: "tool_exec",
+              tool: "web_search",
+              source: "exa",
+              durationMs: Date.now() - startMs,
+              resultCount: results.length,
+            }))
+            return truncateToolResult(mapped, {
+              toolName: "web_search",
+              resultCategory: "search_results",
+            })
           } catch (err) {
-            // Log for observability, then re-throw so the AI SDK sets isError: true
-            // on the tool result. This preserves correct success detection in
-            // onFinish (PostHog events, audit logs). The SDK passes the error
-            // message to the model as a tool result — the model can still
-            // explain "search failed" without the app crashing.
             console.error(
               `[tools/exa] Search failed after ${Date.now() - startMs}ms:`,
               err instanceof Error ? err.message : String(err)
             )
-            throw err
+            throw enrichToolError(err, "web_search")
           }
         },
       })
@@ -140,6 +259,8 @@ export async function getThirdPartyTools(
         icon: "search",
         estimatedCostPer1k: 5, // $5/1K search requests (1-25 results per request)
         readOnly: true,
+        idempotent: true,
+        openWorld: true,
       })
     } catch (err) {
       console.error("[tools/third-party] Failed to load Exa search:", err)
@@ -148,3 +269,284 @@ export async function getThirdPartyTools(
 
   return { tools: tools as ToolSet, metadata }
 }
+
+/**
+ * Returns content extraction tools powered by the Exa API.
+ *
+ * Loaded independently of search tools — not gated on `shouldInjectSearch`
+ * or `builtInHasSearch`. This ensures content extraction is available for
+ * ALL providers, including those with native Layer 1 search (OpenAI,
+ * Anthropic, Google, xAI).
+ *
+ * Follows the same { tools, metadata } return contract as getThirdPartyTools
+ * and getProviderTools (Layer 1).
+ *
+ * @param options.exaKey - Resolved Exa API key (BYOK or platform). Undefined → empty tools.
+ * @returns A ToolSet with content extraction tools, or empty object when no key
+ */
+export async function getContentExtractionTools(options: {
+  exaKey?: string
+  policyGuard?: ToolPolicyGuard
+}): Promise<{
+  tools: ToolSet
+  metadata: Map<string, ToolMetadata>
+}> {
+  const { exaKey, policyGuard } = options
+  const tools: Record<string, unknown> = {}
+  const metadata = new Map<string, ToolMetadata>()
+
+  if (!exaKey) {
+    return { tools: tools as ToolSet, metadata }
+  }
+
+  try {
+    const Exa = (await import("exa-js")).default
+    const exa = new Exa(exaKey)
+
+    tools.extract_content = tool({
+      description:
+        "Fetch and extract clean, readable page content from URLs. " +
+        "Returns each URL with title and extracted markdown content.\n\n" +
+        "When to use:\n" +
+        "- The user explicitly asks to read, summarize, compare, or analyze one or more URLs\n" +
+        "- You found a promising URL via search and need full page content before answering\n" +
+        "- The answer depends on details that are not present in short snippets\n\n" +
+        "When NOT to use:\n" +
+        "- URLs in code snippets, import statements, stack traces, or citation lists unless the user asks to open them\n" +
+        "- Cases where search snippets already answer the question\n" +
+        "- Internal/private URLs that are unlikely to be publicly accessible\n\n" +
+        "Caveats:\n" +
+        "- Batch related URLs in a single call (up to 5)\n" +
+        "- Keep URLs specific to avoid unnecessary extraction cost",
+      inputSchema: z.object({
+        urls: z
+          .array(z.string().url())
+          .min(1)
+          .max(5)
+          .describe("URLs to extract content from (1-5 URLs per call)"),
+      }),
+      inputExamples: [
+        {
+          input: {
+            urls: ["https://ai-sdk.dev/docs/ai-sdk-core/tools-and-tool-calling"],
+          },
+        },
+        {
+          input: {
+            urls: [
+              "https://vercel.com/blog",
+              "https://ai-sdk.dev/docs/reference/ai-sdk-core/stream-text",
+            ],
+          },
+        },
+      ],
+      execute: async ({ urls }, options) => {
+        const startMs = Date.now()
+        const upstreamAbortSignal = options
+          ? extractAbortSignalFromOptions(options)
+          : undefined
+        try {
+          throwIfAborted(upstreamAbortSignal, {
+            toolName: "extract_content",
+            upstreamSignal: upstreamAbortSignal,
+          })
+
+          // ── Cache check ────────────────────────────────
+          const cached = new Map<string, CachedExtraction>()
+          const uncachedUrls: string[] = []
+          for (const url of urls) {
+            const hit = getCachedExtraction(url)
+            if (hit) {
+              cached.set(url, hit)
+            } else {
+              uncachedUrls.push(url)
+            }
+          }
+
+          // If all URLs are cached, return immediately
+          if (uncachedUrls.length === 0) {
+            const mapped = urls.map(
+              (url) => cached.get(url) ?? { url, error: "CACHE_MISS" }
+            )
+            console.log(JSON.stringify({
+              _tag: "tool_exec",
+              tool: "extract_content",
+              source: "exa",
+              durationMs: Date.now() - startMs,
+              urlCount: urls.length,
+              successCount: cached.size,
+              failedCount: 0,
+              cachedCount: cached.size,
+            }))
+            return truncateToolResult(mapped, {
+              toolName: "extract_content",
+              resultCategory: "content_extraction",
+            })
+          }
+          throwIfAborted(upstreamAbortSignal, {
+            toolName: "extract_content",
+            upstreamSignal: upstreamAbortSignal,
+          })
+
+          // ── Persistent per-domain limit (uncached only) ─
+          // Cache hits intentionally do NOT consume domain quota.
+          if (policyGuard) {
+            const uncachedDomainCounts = new Map<string, number>()
+            for (const url of uncachedUrls) {
+              try {
+                const domain = new URL(url).hostname.toLowerCase()
+                uncachedDomainCounts.set(
+                  domain,
+                  (uncachedDomainCounts.get(domain) ?? 0) + 1
+                )
+              } catch {
+                /* invalid URL — Exa will return an error */
+              }
+            }
+            await policyGuard.enforceExtractDomainLimit(uncachedDomainCounts)
+          }
+          throwIfAborted(upstreamAbortSignal, {
+            toolName: "extract_content",
+            upstreamSignal: upstreamAbortSignal,
+          })
+
+          // ── Fetch uncached URLs from Exa ───────────────
+          // Exa SDK does not accept AbortSignal yet. Race to avoid blocking
+          // response teardown on cancellation while the HTTP request continues.
+          const response = await runWithToolAbortAndTimeout({
+            toolName: "extract_content",
+            timeoutMs: TOOL_EXECUTION_TIMEOUT_MS,
+            upstreamSignal: upstreamAbortSignal,
+            operation: async () => {
+              const getContentsOptions = {
+                text: { maxCharacters: 10000 },
+                maxAgeHours: EXA_CONTENT_FRESHNESS_MAX_AGE_HOURS,
+              }
+              return exa.getContents(uncachedUrls, getContentsOptions)
+            },
+          })
+          throwIfAborted(upstreamAbortSignal, {
+            toolName: "extract_content",
+            upstreamSignal: upstreamAbortSignal,
+          })
+
+          // SDK types for Status are minimal ({ id, status, source }) but the
+          // API returns error details (tag, httpStatusCode) on failures.
+          type StatusEntry = {
+            id: string
+            status: string
+            error?: { tag: string; httpStatusCode?: number }
+          }
+          const statusMap = new Map<string, StatusEntry>()
+          if (response.statuses) {
+            for (const s of response.statuses) {
+              statusMap.set(s.id, s as unknown as StatusEntry)
+            }
+          }
+
+          const resultMap = new Map<
+            string,
+            (typeof response.results)[number]
+          >()
+          for (const r of response.results) {
+            resultMap.set(r.url, r)
+          }
+
+          // Process fresh results and cache successes
+          const freshResults = new Map<string, CachedExtraction>()
+          const freshErrors = new Map<
+            string,
+            { url: string; error: string; httpStatusCode?: number }
+          >()
+
+          for (const url of uncachedUrls) {
+            const status = statusMap.get(url)
+            const result = resultMap.get(url)
+
+            if (status?.status === "error") {
+              freshErrors.set(url, {
+                url,
+                error: status.error?.tag ?? "UNKNOWN_ERROR",
+                ...(status.error?.httpStatusCode != null && {
+                  httpStatusCode: status.error.httpStatusCode,
+                }),
+              })
+            } else if (result) {
+              const extracted: CachedExtraction = {
+                url: result.url,
+                title: result.title ?? undefined,
+                content: result.text?.slice(0, 10000),
+              }
+              freshResults.set(url, extracted)
+              setCachedExtraction(url, extracted)
+            } else {
+              freshErrors.set(url, { url, error: "NO_RESULT_RETURNED" })
+            }
+          }
+
+          // ── Merge cached + fresh in original URL order ─
+          let successCount = 0
+          let failedCount = 0
+
+          const mapped = urls.map((url) => {
+            const cachedResult = cached.get(url)
+            if (cachedResult) {
+              successCount++
+              return cachedResult
+            }
+            const fresh = freshResults.get(url)
+            if (fresh) {
+              successCount++
+              return fresh
+            }
+            const error = freshErrors.get(url)
+            if (error) {
+              failedCount++
+              return error
+            }
+            failedCount++
+            return { url, error: "NO_RESULT_RETURNED" }
+          })
+
+          console.log(JSON.stringify({
+            _tag: "tool_exec",
+            tool: "extract_content",
+            source: "exa",
+            durationMs: Date.now() - startMs,
+            urlCount: urls.length,
+            successCount,
+            failedCount,
+            cachedCount: cached.size,
+          }))
+          return truncateToolResult(mapped, {
+            toolName: "extract_content",
+            resultCategory: "content_extraction",
+          })
+        } catch (err) {
+          console.error(
+            `[tools/exa] Content extraction failed after ${Date.now() - startMs}ms:`,
+            err instanceof Error ? err.message : String(err)
+          )
+          throw enrichToolError(err, "extract_content")
+        }
+      },
+    })
+    metadata.set("extract_content", {
+      displayName: "Read Page",
+      source: "third-party",
+      serviceName: "Exa",
+      icon: "extract",
+      estimatedCostPer1k: 1,
+      readOnly: true,
+      idempotent: true,
+      openWorld: true,
+    })
+  } catch (err) {
+    console.error(
+      "[tools/third-party] Failed to load content extraction tools:",
+      err
+    )
+  }
+
+  return { tools: tools as ToolSet, metadata }
+}
diff --git a/lib/tools/truncation-policy.ts b/lib/tools/truncation-policy.ts
new file mode 100644
index 00000000..aeed7040
--- /dev/null
+++ b/lib/tools/truncation-policy.ts
@@ -0,0 +1,253 @@
+export type TruncationCategory =
+  | "default"
+  | "generic"
+  | "search_results"
+  | "content_extraction"
+  | "plain_text"
+
+export type TruncationContext = {
+  toolName?: string
+  resultCategory?: TruncationCategory
+}
+
+type TruncationStrategy = {
+  id: string
+  keyPriority: string[]
+  arraySelector: "head" | "scored"
+  stringHint: string
+  arrayHint: string
+  objectHint: string
+}
+
+export type ResolvedTruncationStrategy = TruncationStrategy
+
+const DEFAULT_KEY_PRIORITY = [
+  "error",
+  "errors",
+  "message",
+  "hint",
+  "title",
+  "url",
+  "content",
+  "summary",
+  "snippet",
+  "id",
+  "name",
+  "status",
+  "publishedDate",
+  "date",
+]
+
+const DEFAULT_STRATEGY: TruncationStrategy = {
+  id: "default",
+  keyPriority: DEFAULT_KEY_PRIORITY,
+  arraySelector: "scored",
+  stringHint:
+    "Use a narrower query or ask for a specific section to retrieve complete text.",
+  arrayHint:
+    "Request fewer items or add tighter filters to retrieve the full result set.",
+  objectHint:
+    "Request specific fields instead of the full object to retrieve complete details.",
+}
+
+const CATEGORY_STRATEGIES: Record<TruncationCategory, TruncationStrategy> = {
+  default: DEFAULT_STRATEGY,
+  generic: DEFAULT_STRATEGY,
+  search_results: {
+    id: "search_results",
+    keyPriority: [
+      "error",
+      "title",
+      "url",
+      "content",
+      "summary",
+      "snippet",
+      "publishedDate",
+      "source",
+      ...DEFAULT_KEY_PRIORITY,
+    ],
+    arraySelector: "head",
+    stringHint:
+      "Refine the search query, add date/site filters, or request fewer results.",
+    arrayHint:
+      "Ask for fewer results or add filters (site, date, topic) to retrieve complete items.",
+    objectHint:
+      "Ask for specific search fields (title/url/content) instead of the full payload.",
+  },
+  content_extraction: {
+    id: "content_extraction",
+    keyPriority: [
+      "error",
+      "url",
+      "title",
+      "content",
+      "excerpt",
+      "summary",
+      ...DEFAULT_KEY_PRIORITY,
+    ],
+    arraySelector: "head",
+    stringHint:
+      "Ask for a specific section or shorter excerpt from the page content.",
+    arrayHint:
+      "Request fewer URLs per call or split extraction into smaller batches.",
+    objectHint:
+      "Request only required fields (url/title/content excerpt) for each page.",
+  },
+  plain_text: {
+    id: "plain_text",
+    keyPriority: DEFAULT_KEY_PRIORITY,
+    arraySelector: "scored",
+    stringHint:
+      "Ask for a specific section, paragraph range, or shorter format.",
+    arrayHint:
+      "Request fewer entries or a paginated subset to avoid truncation.",
+    objectHint:
+      "Request specific fields to reduce payload size and avoid truncation.",
+  },
+}
+
+const TOOL_STRATEGIES: Record<string, TruncationStrategy> = {
+  web_search: CATEGORY_STRATEGIES.search_results,
+  extract_content: CATEGORY_STRATEGIES.content_extraction,
+}
+
+function dedupePreserveOrder(values: string[]): string[] {
+  const seen = new Set<string>()
+  const output: string[] = []
+  for (const value of values) {
+    const normalized = value.trim()
+    if (!normalized) continue
+    const lower = normalized.toLowerCase()
+    if (seen.has(lower)) continue
+    seen.add(lower)
+    output.push(normalized)
+  }
+  return output
+}
+
+function withDefaultKeyPriority(
+  strategy: TruncationStrategy
+): TruncationStrategy {
+  return {
+    ...strategy,
+    keyPriority: dedupePreserveOrder([
+      ...strategy.keyPriority,
+      ...DEFAULT_KEY_PRIORITY,
+    ]),
+  }
+}
+
+const RESOLVED_CATEGORY_STRATEGIES = Object.fromEntries(
+  Object.entries(CATEGORY_STRATEGIES).map(([category, strategy]) => [
+    category,
+    withDefaultKeyPriority(strategy),
+  ])
+) as Record<TruncationCategory, TruncationStrategy>
+
+const RESOLVED_TOOL_STRATEGIES = Object.fromEntries(
+  Object.entries(TOOL_STRATEGIES).map(([toolName, strategy]) => [
+    toolName,
+    withDefaultKeyPriority(strategy),
+  ])
+) as Record<string, TruncationStrategy>
+
+export function resolveTruncationStrategy(
+  context?: TruncationContext
+): ResolvedTruncationStrategy {
+  const byTool = context?.toolName
+    ? RESOLVED_TOOL_STRATEGIES[context.toolName]
+    : undefined
+  const byCategory = context?.resultCategory
+    ? RESOLVED_CATEGORY_STRATEGIES[context.resultCategory]
+    : undefined
+  return byTool ?? byCategory ?? RESOLVED_CATEGORY_STRATEGIES.default
+}
+
+export function findSemanticBoundary(text: string, maxChars: number): number {
+  if (maxChars <= 0) return 0
+  if (text.length <= maxChars) return text.length
+
+  const hardLimit = Math.max(1, Math.min(maxChars, text.length))
+  const windowStart = Math.max(0, hardLimit - 300)
+  const chunk = text.slice(windowStart, hardLimit)
+
+  const paragraphMarkers = ["\n\n", "\r\n\r\n"] as const
+  let paragraphBreak = -1
+  let paragraphBreakLength = 0
+  for (const marker of paragraphMarkers) {
+    const idx = chunk.lastIndexOf(marker)
+    if (
+      idx > paragraphBreak ||
+      (idx === paragraphBreak && marker.length > paragraphBreakLength)
+    ) {
+      paragraphBreak = idx
+      paragraphBreakLength = marker.length
+    }
+  }
+  if (paragraphBreak >= 0) {
+    return windowStart + paragraphBreak + paragraphBreakLength
+  }
+
+  const sentenceBreakers = [". ", "! ", "? ", ".\n", "!\n", "?\n"]
+  let bestSentenceBreak = -1
+  for (const marker of sentenceBreakers) {
+    const idx = chunk.lastIndexOf(marker)
+    if (idx > bestSentenceBreak) bestSentenceBreak = idx
+  }
+  if (bestSentenceBreak >= 0) {
+    return windowStart + bestSentenceBreak + 1
+  }
+
+  const lineBreak = chunk.lastIndexOf("\n")
+  if (lineBreak >= 0) {
+    return windowStart + lineBreak + 1
+  }
+
+  const spaceBreak = chunk.lastIndexOf(" ")
+  if (spaceBreak >= 0) {
+    return windowStart + spaceBreak
+  }
+
+  return hardLimit
+}
+
+export function scoreArrayItem(
+  item: unknown,
+  index: number,
+  strategy: ResolvedTruncationStrategy
+): number {
+  // Keep earlier items favored for tools where ranking/order carries semantics.
+  if (strategy.arraySelector === "head") {
+    return 1_000_000 - index
+  }
+
+  const indexBonus = Math.max(0, 10_000 - index)
+
+  if (item === null || item === undefined) return indexBonus
+  if (typeof item === "string") {
+    // Slightly prefer shorter text snippets under tight budgets.
+    return indexBonus + Math.max(0, 2_000 - item.length)
+  }
+  if (typeof item !== "object") return indexBonus + 500
+
+  const asRecord = item as Record<string, unknown>
+  let score = indexBonus
+  const keys = Object.keys(asRecord)
+  for (const [priorityIndex, key] of strategy.keyPriority.entries()) {
+    if (Object.prototype.hasOwnProperty.call(asRecord, key)) {
+      score += Math.max(10, 500 - priorityIndex * 15)
+    }
+  }
+
+  // Strongly prefer result rows that include explicit failures so the model can recover.
+  if ("error" in asRecord || "errors" in asRecord) {
+    score += 800
+  }
+  if ("url" in asRecord) score += 300
+  if ("title" in asRecord) score += 250
+  if ("content" in asRecord) score += 200
+
+  // Prefer concise records over very large entries under budget pressure.
+  score -= keys.length * 5
+  return score
+}
diff --git a/lib/tools/types.ts b/lib/tools/types.ts
index 241ede9c..8ae4fb64 100644
--- a/lib/tools/types.ts
+++ b/lib/tools/types.ts
@@ -1,12 +1,35 @@
 // lib/tools/types.ts
 
+import type { ToolErrorCode } from "./errors"
+
+/**
+ * Tool Naming Convention
+ *
+ * Layer 1 (provider): Use the SDK-defined name (e.g., `web_search` from
+ * `openai.tools.webSearch()`). These names are controlled by the provider
+ * SDK and should not be renamed.
+ *
+ * Layer 2 (third-party): Use `{action}_{resource}` format.
+ * Current: `web_search` (Exa), `extract_content` (Exa).
+ * When the third provider is added, consider migrating to `{service}_{action}`
+ * format (e.g., `exa_search`, `exa_extract`) to disambiguate providers.
+ *
+ * Layer 3 (MCP): Namespaced automatically by the MCP client as
+ * `{serverName}_{toolName}`. No manual naming needed.
+ *
+ * Layer 4 (platform): Use `{action}_{resource}` format with the service
+ * implied by the action. Current: `pay_purchase`, `pay_status`.
+ * If a second payment provider is added, migrate to `{service}_{action}`
+ * (e.g., `flowglad_purchase`, `stripe_checkout`).
+ */
+
 /**
  * Source identifier for tool audit logging and UI display.
  * - "builtin": Provider-specific tools (OpenAI web search, Google grounding, etc.)
  * - "third-party": Third-party tools via API keys (Exa, Firecrawl, etc.)
  * - "mcp": User-configured MCP server tools (existing system)
  */
-export type ToolSource = "builtin" | "third-party" | "mcp"
+export type ToolSource = "builtin" | "third-party" | "mcp" | "platform"
 
 /**
  * Metadata for a tool, used for UI display, audit logging, and cost tracking.
@@ -38,21 +61,39 @@ export type ToolMetadata = {
    * Default: true for search tools, false for write tools.
    */
   readOnly?: boolean
+  /**
+   * Whether this tool performs destructive updates (delete, overwrite).
+   * Mapped from MCP `destructiveHint` annotation when available.
+   * Used for future approval UI and prepareStep restrictions.
+   */
+  destructive?: boolean
+  /**
+   * Whether calling this tool multiple times with the same input is safe.
+   * Mapped from MCP `idempotentHint` annotation when available.
+   * Used for future retry policies (only retry idempotent tools).
+   */
+  idempotent?: boolean
+  /**
+   * Whether this tool interacts with an open-world context
+   * (arbitrary internet/external systems).
+   * Mapped from MCP `openWorldHint` annotation when available.
+   * Used by centralized capability/risk policy decisions.
+   */
+  openWorld?: boolean
 }
 
 /**
- * Standardized tool result envelope for Layer 2 (third-party) tools.
- * Layer 1 (provider) tools return opaque results — do NOT wrap them.
- * Layer 3 (MCP) tools return their own format — do NOT wrap them.
+ * @deprecated No longer used for model-facing tool results.
+ *
+ * All tool layers now return raw data to the model. Observability
+ * metadata (durationMs, source, tool name) is emitted via structured
+ * console.log with `_tag: "tool_exec"` or recorded in ToolTraceCollector.
  *
- * IMPORTANT: Only used for the SUCCESS path. On error, tools should
- * throw so the AI SDK sets isError: true — this preserves correct
- * success detection in onFinish, PostHog events, and audit logs.
+ * On error, tools throw so the AI SDK sets isError: true — this
+ * preserves correct success detection in onFinish, PostHog events,
+ * and audit logs.
  *
- * This envelope enables:
- * - Structured success data with metadata
- * - Tool result caching (hash the envelope for dedup)
- * - Observability via meta field (duration, result count)
+ * Retained for type reference in tests only. Do NOT use in new code.
  */
 export type ToolResultEnvelope<T = unknown> = {
   ok: boolean
@@ -76,13 +117,63 @@ export type ToolResultEnvelope<T = unknown> = {
  * When `tools: false` on a ModelConfig, ALL capabilities are disabled.
  * When `tools: ToolCapabilities`, individual capabilities can be toggled.
  */
+// ── Trace Types ────────────────────────────────────────────
+
+export type ToolTrace = {
+  toolName: string
+  toolCallId: string
+  requestId?: string
+  durationMs: number
+  success: boolean
+  error?: string
+  resultSizeBytes?: number
+  errorCode?: ToolErrorCode
+  retryAfterSeconds?: number
+  budgetKeyMode?: "platform" | "byok"
+  budgetDenied?: boolean
+  retryCount?: number
+}
+
+/**
+ * Collects per-tool-call traces for a single streamText() request.
+ * Created before streamText(), read in onStepFinish and onFinish.
+ *
+ * Lifecycle:
+ *   1. Created in route.ts before streamText()
+ *   2. wrapMcpTools() / wrapToolsWithTracing() record traces during execute()
+ *   3. onStepFinish reads traces for structured logging
+ *   4. onFinish reads traces for Convex + PostHog enrichment
+ *   5. Garbage collected when the request ends
+ */
+export class ToolTraceCollector {
+  private traces = new Map<string, ToolTrace>()
+
+  record(trace: ToolTrace): void {
+    this.traces.set(trace.toolCallId, trace)
+  }
+
+  get(toolCallId: string): ToolTrace | undefined {
+    return this.traces.get(toolCallId)
+  }
+
+  getAll(): ToolTrace[] {
+    return Array.from(this.traces.values())
+  }
+}
+
+// ── Tool Capabilities ─────────────────────────────────────
+
 export type ToolCapabilities = {
   /** Web search (Layer 1 provider tools + Layer 2 Exa). Default: true */
   search?: boolean
+  /** Content extraction from URLs (Layer 2 Exa getContents). Default: true */
+  extract?: boolean
   /** Code execution (provider sandboxes, future). Default: true */
   code?: boolean
   /** MCP server tools (Layer 3). Default: true */
   mcp?: boolean
+  /** Platform tools like Flowglad Pay (Layer 4). Default: true */
+  platform?: boolean
 }
 
 /**
@@ -95,11 +186,13 @@ export type ToolCapabilities = {
 export function resolveToolCapabilities(
   tools: boolean | ToolCapabilities | undefined
 ): Required<ToolCapabilities> {
-  if (tools === false) return { search: false, code: false, mcp: false }
-  if (tools === true || tools === undefined) return { search: true, code: true, mcp: true }
+  if (tools === false) return { search: false, extract: false, code: false, mcp: false, platform: false }
+  if (tools === true || tools === undefined) return { search: true, extract: true, code: true, mcp: true, platform: true }
   return {
     search: tools.search !== false,
+    extract: tools.extract !== false,
     code: tools.code !== false,
     mcp: tools.mcp !== false,
+    platform: tools.platform !== false,
   }
 }
diff --git a/lib/tools/ui-metadata.ts b/lib/tools/ui-metadata.ts
new file mode 100644
index 00000000..adc75286
--- /dev/null
+++ b/lib/tools/ui-metadata.ts
@@ -0,0 +1,127 @@
+import type { ServerInfo } from "@/lib/mcp/load-tools"
+import type { ToolMetadata, ToolSource } from "./types"
+
+export type ToolInvocationDisplayMetadata = {
+  displayName: string
+  source: ToolSource
+  serviceName: string
+  icon?: ToolMetadata["icon"]
+  estimatedCostPer1k?: number
+  readOnly?: boolean
+  destructive?: boolean
+  idempotent?: boolean
+  openWorld?: boolean
+}
+
+export type ToolInvocationMetadataByName = Record<
+  string,
+  ToolInvocationDisplayMetadata
+>
+
+export type ToolInvocationMetadataByCallId = Record<
+  string,
+  ToolInvocationDisplayMetadata
+>
+
+export type ToolInvocationStreamMetadata = {
+  reasoningDurationMs?: number
+  toolMetadataByName?: ToolInvocationMetadataByName
+  toolMetadataByCallId?: ToolInvocationMetadataByCallId
+}
+
+function titleCaseSegment(value: string): string {
+  if (!value) return ""
+  return value.charAt(0).toUpperCase() + value.slice(1)
+}
+
+export function humanizeToolName(name: string): string {
+  const normalized = name
+    .replace(/[_-]+/g, " ")
+    .replace(/([a-z])([A-Z])/g, "$1 $2")
+    .trim()
+
+  if (!normalized) return "Tool"
+  return normalized
+    .split(/\s+/)
+    .map((segment) => titleCaseSegment(segment))
+    .join(" ")
+}
+
+function toDisplayMetadata(meta: ToolMetadata): ToolInvocationDisplayMetadata {
+  return {
+    displayName: meta.displayName,
+    source: meta.source,
+    serviceName: meta.serviceName,
+    icon: meta.icon,
+    estimatedCostPer1k: meta.estimatedCostPer1k,
+    readOnly: meta.readOnly,
+    destructive: meta.destructive,
+    idempotent: meta.idempotent,
+    openWorld: meta.openWorld,
+  }
+}
+
+function toMcpDisplayMetadata(info: ServerInfo): ToolInvocationDisplayMetadata {
+  return {
+    displayName: humanizeToolName(info.displayName),
+    source: "mcp",
+    serviceName: info.serverName,
+    icon: "wrench",
+    readOnly: info.readOnly,
+    destructive: info.destructive,
+    idempotent: info.idempotent,
+    openWorld: info.openWorld,
+  }
+}
+
+export function buildToolInvocationMetadataByName(options: {
+  nonMcpMetadata: ReadonlyMap<string, ToolMetadata>
+  mcpToolServerMap: ReadonlyMap<string, ServerInfo>
+}): ToolInvocationMetadataByName {
+  const { nonMcpMetadata, mcpToolServerMap } = options
+  const byName: ToolInvocationMetadataByName = {}
+
+  for (const [toolName, meta] of nonMcpMetadata) {
+    byName[toolName] = toDisplayMetadata(meta)
+  }
+
+  for (const [toolName, info] of mcpToolServerMap) {
+    byName[toolName] = toMcpDisplayMetadata(info)
+  }
+
+  return byName
+}
+
+export function buildStartToolInvocationStreamMetadata(
+  toolMetadataByName: ToolInvocationMetadataByName
+): ToolInvocationStreamMetadata {
+  if (Object.keys(toolMetadataByName).length === 0) return {}
+  return { toolMetadataByName }
+}
+
+export function buildFinishToolInvocationStreamMetadata(options: {
+  toolMetadataByCallId: ToolInvocationMetadataByCallId
+  reasoningDurationMs: number | null
+}): ToolInvocationStreamMetadata {
+  const metadata: ToolInvocationStreamMetadata = {}
+  if (Object.keys(options.toolMetadataByCallId).length > 0) {
+    metadata.toolMetadataByCallId = options.toolMetadataByCallId
+  }
+  if (options.reasoningDurationMs !== null) {
+    metadata.reasoningDurationMs = options.reasoningDurationMs
+  }
+  return metadata
+}
+
+export function resolveToolInvocationMetadata(options: {
+  toolName: string
+  toolCallId: string
+  streamMetadata?: ToolInvocationStreamMetadata
+}): ToolInvocationDisplayMetadata | undefined {
+  const byCallId = options.streamMetadata?.toolMetadataByCallId
+  if (byCallId?.[options.toolCallId]) {
+    return byCallId[options.toolCallId]
+  }
+  const byName = options.streamMetadata?.toolMetadataByName
+  return byName?.[options.toolName]
+}
diff --git a/lib/tools/utils.ts b/lib/tools/utils.ts
index f062979f..4e7ec460 100644
--- a/lib/tools/utils.ts
+++ b/lib/tools/utils.ts
@@ -1,86 +1,722 @@
 // lib/tools/utils.ts
 
-import { MAX_TOOL_RESULT_SIZE } from "@/lib/config"
+import { MAX_TOOL_RESULT_SIZE, TOOL_EXECUTION_TIMEOUT_MS } from "@/lib/config"
 import type { ToolSet } from "ai"
+import { ToolTraceCollector, type ToolMetadata } from "./types"
+import {
+  ToolExecutionError,
+  type ToolErrorData,
+  type ToolErrorCode,
+  extractToolErrorData,
+  getToolRecoveryHint,
+  normalizeToolError,
+} from "./errors"
+import {
+  extractPolicyErrorData,
+  isToolPolicyError,
+} from "./policy"
+import {
+  findSemanticBoundary,
+  resolveTruncationStrategy,
+  scoreArrayItem,
+  type TruncationCategory,
+  type TruncationContext,
+} from "./truncation-policy"
+
+type ToolExecuteOptions = {
+  toolCallId: string
+  abortSignal?: AbortSignal
+  [k: string]: unknown
+}
+
+type RetrySafetyMetadata = Pick<
+  ToolMetadata,
+  "idempotent" | "readOnly" | "destructive"
+>
+
+type RetryAttemptInfo = {
+  attempt: number
+  maxAttempts: number
+  delayMs: number
+  error: ToolErrorData
+}
+
+const RETRYABLE_TRANSIENT_CODES: ToolErrorCode[] = [
+  "timeout",
+  "rate_limit",
+  "network",
+  "upstream_failure",
+]
+
+/**
+ * Thrown when a tool execution exceeds its timeout.
+ */
+export class ToolTimeoutError extends Error {
+  readonly toolName: string
+  readonly timeoutMs: number
+
+  constructor(toolName: string, timeoutMs: number) {
+    super(
+      `Tool "${toolName}" timed out after ${timeoutMs}ms. ` +
+        `The operation was taking too long and was cancelled.`
+    )
+    this.name = "ToolTimeoutError"
+    this.toolName = toolName
+    this.timeoutMs = timeoutMs
+  }
+}
+
+/**
+ * Thrown when tool execution is cancelled by upstream abort.
+ */
+export class ToolAbortError extends Error {
+  readonly toolName: string
+
+  constructor(toolName: string, reason?: unknown) {
+    const reasonText =
+      typeof reason === "string" && reason.length > 0
+        ? ` Reason: ${reason}`
+        : ""
+    super(`Tool "${toolName}" was cancelled by upstream caller.${reasonText}`)
+    this.name = "ToolAbortError"
+    this.toolName = toolName
+  }
+}
+
+export function extractAbortSignalFromOptions(
+  options: unknown
+): AbortSignal | undefined {
+  if (typeof options !== "object" || options === null) return undefined
+  const withAbort = options as { abortSignal?: unknown }
+  const candidate = withAbort.abortSignal
+  if (
+    typeof candidate === "object" &&
+    candidate !== null &&
+    "aborted" in candidate &&
+    typeof (candidate as AbortSignal).aborted === "boolean" &&
+    "addEventListener" in candidate
+  ) {
+    return candidate as AbortSignal
+  }
+  return undefined
+}
+
+export function combineAbortSignals(
+  signals: Array<AbortSignal | undefined>
+): AbortSignal | undefined {
+  const active = signals.filter((signal): signal is AbortSignal => !!signal)
+  if (active.length === 0) return undefined
+  if (active.length === 1) return active[0]
+
+  if (typeof AbortSignal.any === "function") {
+    return AbortSignal.any(active)
+  }
+
+  const controller = new AbortController()
+  for (const signal of active) {
+    if (signal.aborted) {
+      controller.abort(signal.reason)
+      break
+    }
+  }
+  if (!controller.signal.aborted) {
+    const listeners: Array<{
+      signal: AbortSignal
+      handler: () => void
+    }> = []
+    const cleanupListeners = () => {
+      for (const { signal, handler } of listeners) {
+        signal.removeEventListener("abort", handler)
+      }
+      listeners.length = 0
+    }
+
+    for (const signal of active) {
+      const onAbort = () => {
+        cleanupListeners()
+        if (!controller.signal.aborted) {
+          controller.abort(signal.reason)
+        }
+      }
+      listeners.push({ signal, handler: onAbort })
+      signal.addEventListener(
+        "abort",
+        onAbort,
+        { once: true }
+      )
+    }
+  }
+  return controller.signal
+}
+
+function toCancellationError(
+  toolName: string,
+  timeoutMs: number,
+  timeoutSignal: AbortSignal,
+  upstreamSignal?: AbortSignal
+): ToolTimeoutError | ToolAbortError {
+  if (timeoutSignal.aborted) return new ToolTimeoutError(toolName, timeoutMs)
+  return new ToolAbortError(toolName, upstreamSignal?.reason)
+}
+
+export function throwIfAborted(
+  signal: AbortSignal | undefined,
+  options: {
+    toolName: string
+    timeoutMs?: number
+    timeoutSignal?: AbortSignal
+    upstreamSignal?: AbortSignal
+  }
+): void {
+  if (!signal?.aborted) return
+  if (options.timeoutSignal && typeof options.timeoutMs === "number") {
+    throw toCancellationError(
+      options.toolName,
+      options.timeoutMs,
+      options.timeoutSignal,
+      options.upstreamSignal
+    )
+  }
+  throw new ToolAbortError(options.toolName, options.upstreamSignal?.reason)
+}
+
+export async function runWithToolAbortAndTimeout<T>(options: {
+  toolName: string
+  timeoutMs: number
+  upstreamSignal?: AbortSignal
+  operation: (combinedSignal: AbortSignal) => Promise<T>
+}): Promise<T> {
+  const timeoutSignal = AbortSignal.timeout(options.timeoutMs)
+  const combinedSignal =
+    combineAbortSignals([options.upstreamSignal, timeoutSignal]) ??
+    timeoutSignal
+
+  throwIfAborted(combinedSignal, {
+    toolName: options.toolName,
+    timeoutMs: options.timeoutMs,
+    timeoutSignal,
+    upstreamSignal: options.upstreamSignal,
+  })
+
+  let onAbort: (() => void) | undefined
+  const abortPromise = new Promise<never>((_, reject) => {
+    onAbort = () =>
+      reject(
+        toCancellationError(
+          options.toolName,
+          options.timeoutMs,
+          timeoutSignal,
+          options.upstreamSignal
+        )
+      )
+    combinedSignal.addEventListener("abort", onAbort, { once: true })
+  })
+
+  try {
+    return await Promise.race([options.operation(combinedSignal), abortPromise])
+  } finally {
+    if (onAbort) combinedSignal.removeEventListener("abort", onAbort)
+  }
+}
+
+function isRetrySafeTool(metadata?: RetrySafetyMetadata): boolean {
+  if (!metadata) return false
+  if (metadata.idempotent === true) return true
+  // Read-only tools are safe to replay in this stack. Side-effectful tools
+  // (pay_purchase, mutations) should always set readOnly: false.
+  return metadata.readOnly === true && metadata.destructive !== true
+}
+
+function isTransientRetryableError(
+  error: ToolErrorData
+): boolean {
+  if (error.code === "policy_limit") return false
+  if (error.code === "auth") return false
+  if (error.code === "validation_input") return false
+  if (error.code === "aborted") return false
+
+  if (!RETRYABLE_TRANSIENT_CODES.includes(error.code)) return false
+  return true
+}
+
+function computeRetryDelayMs(options: {
+  attempt: number
+  retryAfterSeconds?: number
+  baseDelayMs: number
+  maxDelayMs: number
+  jitterRatio: number
+}): number {
+  const base = Math.min(
+    options.maxDelayMs,
+    options.baseDelayMs * 2 ** (options.attempt - 1)
+  )
+  const jitterCap = Math.max(0, Math.floor(base * options.jitterRatio))
+  const jitter = jitterCap > 0 ? Math.floor(Math.random() * (jitterCap + 1)) : 0
+  const backoffWithJitter = Math.min(options.maxDelayMs, base + jitter)
+  const retryAfterMs = (options.retryAfterSeconds ?? 0) * 1000
+  return Math.max(backoffWithJitter, retryAfterMs)
+}
+
+async function sleepWithAbort(
+  delayMs: number,
+  signal: AbortSignal | undefined,
+  toolName: string
+): Promise<void> {
+  if (delayMs <= 0) return
+  throwIfAborted(signal, { toolName, upstreamSignal: signal })
+
+  await new Promise<void>((resolve, reject) => {
+    let onAbort: (() => void) | undefined
+    const timer = setTimeout(() => {
+      if (onAbort && signal) signal.removeEventListener("abort", onAbort)
+      resolve()
+    }, delayMs)
+
+    if (signal) {
+      onAbort = () => {
+        clearTimeout(timer)
+        signal.removeEventListener("abort", onAbort!)
+        reject(new ToolAbortError(toolName, signal.reason))
+      }
+      signal.addEventListener("abort", onAbort, { once: true })
+    }
+  })
+}
+
+export async function executeWithRetries<T>(options: {
+  toolName: string
+  metadata?: RetrySafetyMetadata
+  maxAttempts?: number
+  baseDelayMs?: number
+  maxDelayMs?: number
+  jitterRatio?: number
+  abortSignal?: AbortSignal
+  execute: (attempt: number) => Promise<T>
+  onRetryAttempt?: (info: RetryAttemptInfo) => void
+}): Promise<{ value: T; retryCount: number }> {
+  const maxAttempts = Math.max(1, options.maxAttempts ?? 3)
+  const baseDelayMs = Math.max(0, options.baseDelayMs ?? 200)
+  const maxDelayMs = Math.max(baseDelayMs, options.maxDelayMs ?? 2000)
+  const jitterRatio = Math.min(Math.max(options.jitterRatio ?? 0.25, 0), 1)
+  const retrySafe = isRetrySafeTool(options.metadata)
+
+  let attempt = 1
+  while (true) {
+    try {
+      const value = await options.execute(attempt)
+      return { value, retryCount: attempt - 1 }
+    } catch (err) {
+      const normalized = extractToolErrorData(err, { toolName: options.toolName })
+      const shouldRetry =
+        retrySafe &&
+        attempt < maxAttempts &&
+        isTransientRetryableError(normalized)
+
+      if (!shouldRetry) throw err
+
+      const delayMs = computeRetryDelayMs({
+        attempt,
+        retryAfterSeconds: normalized.retryAfterSeconds,
+        baseDelayMs,
+        maxDelayMs,
+        jitterRatio,
+      })
+
+      options.onRetryAttempt?.({
+        attempt,
+        maxAttempts,
+        delayMs,
+        error: normalized,
+      })
+
+      await sleepWithAbort(delayMs, options.abortSignal, options.toolName)
+      attempt++
+    }
+  }
+}
+
+const TEXT_ENCODER = new TextEncoder()
+
+type JsonSafeValue =
+  | string
+  | number
+  | boolean
+  | null
+  | JsonSafeValue[]
+  | { [key: string]: JsonSafeValue }
+
+function normalizeNonSerializableLeaf(value: unknown): JsonSafeValue {
+  if (typeof value === "bigint") return value.toString()
+  if (typeof value === "function") {
+    return `[Function ${value.name || "anonymous"}]`
+  }
+  if (typeof value === "symbol") return value.toString()
+  if (typeof value === "undefined") return "[undefined]"
+  return String(value)
+}
+
+/**
+ * Convert unknown data into a deterministic JSON-safe value.
+ * Preserves structure where possible while replacing unsupported values
+ * (cycles, bigint, symbol, function, undefined) with stable markers.
+ */
+export function sanitizeForJson(value: unknown): JsonSafeValue {
+  try {
+    const seen = new WeakSet<object>()
+    const serialized = JSON.stringify(value, (_key, currentValue) => {
+      if (typeof currentValue === "bigint") {
+        return currentValue.toString()
+      }
+      if (typeof currentValue === "function") {
+        return `[Function ${currentValue.name || "anonymous"}]`
+      }
+      if (typeof currentValue === "symbol") {
+        return currentValue.toString()
+      }
+      if (typeof currentValue === "undefined") {
+        return "[undefined]"
+      }
+      if (typeof currentValue === "object" && currentValue !== null) {
+        if (seen.has(currentValue)) return "[Circular]"
+        seen.add(currentValue)
+      }
+      return currentValue
+    })
+
+    if (serialized === undefined) {
+      return normalizeNonSerializableLeaf(value)
+    }
+
+    return JSON.parse(serialized) as JsonSafeValue
+  } catch {
+    return normalizeNonSerializableLeaf(value)
+  }
+}
+
+export type TruncateToolResultOptions = {
+  maxBytes?: number
+  toolName?: string
+  resultCategory?: TruncationCategory
+}
+
+function serializedSizeBytes(value: unknown): number {
+  return TEXT_ENCODER.encode(safeStringify(value)).length
+}
+
+function enforceFinalSerializedBudget(
+  value: unknown,
+  maxBytes: number,
+  fallbackHint: string
+): unknown {
+  if (serializedSizeBytes(value) <= maxBytes) return value
+
+  const minimal = {
+    _truncated: true,
+    _hint:
+      compactTruncationHint(fallbackHint, Math.floor(maxBytes * 0.8)) ||
+      "Result truncated.",
+  }
+  if (serializedSizeBytes(minimal) <= maxBytes) return minimal
+  return { _truncated: true }
+}
+
+function trimStringToSerializedBudget(text: string, maxBytes: number): string {
+  if (maxBytes <= 0) return ""
+  if (serializedSizeBytes(text) <= maxBytes) return text
+  let low = 0
+  let high = text.length
+  let best = ""
+  while (low <= high) {
+    const mid = Math.floor((low + high) / 2)
+    const candidate = text.slice(0, mid)
+    if (serializedSizeBytes(candidate) <= maxBytes) {
+      best = candidate
+      low = mid + 1
+    } else {
+      high = mid - 1
+    }
+  }
+  return best
+}
+
+function compactTruncationHint(hint: string, maxBytes: number): string {
+  if (serializedSizeBytes(hint) <= maxBytes) return hint
+  return trimStringToSerializedBudget(hint, maxBytes)
+}
+
+function resolveTruncateOptions(
+  maxBytesOrOptions: number | TruncateToolResultOptions | undefined
+): Required<Pick<TruncateToolResultOptions, "maxBytes">> & TruncationContext {
+  if (typeof maxBytesOrOptions === "number") {
+    return { maxBytes: maxBytesOrOptions }
+  }
+  return {
+    maxBytes: maxBytesOrOptions?.maxBytes ?? MAX_TOOL_RESULT_SIZE,
+    toolName: maxBytesOrOptions?.toolName,
+    resultCategory: maxBytesOrOptions?.resultCategory,
+  }
+}
+
+function scoreObjectKey(
+  key: string,
+  originalIndex: number,
+  strategy: ReturnType<typeof resolveTruncationStrategy>
+): number {
+  const lower = key.toLowerCase()
+  const priorityIndex = strategy.keyPriority.findIndex(
+    (candidate) => candidate.toLowerCase() === lower
+  )
+  if (priorityIndex >= 0) {
+    return 2_000 - priorityIndex * 25 - originalIndex * 0.001
+  }
+  if (lower.includes("error")) return 1_800 - originalIndex * 0.001
+  if (lower.includes("title")) return 1_700 - originalIndex * 0.001
+  if (lower.includes("url")) return 1_650 - originalIndex * 0.001
+  if (lower.includes("content")) return 1_600 - originalIndex * 0.001
+  if (lower.includes("summary") || lower.includes("snippet")) {
+    return 1_550 - originalIndex * 0.001
+  }
+  return 1_000 - originalIndex * 0.001
+}
+
+function truncateLongString(
+  text: string,
+  maxBytes: number,
+  strategy: ReturnType<typeof resolveTruncationStrategy>
+): string {
+  const semanticTarget = Math.max(1, Math.floor(maxBytes * 0.65))
+  const semanticCut = findSemanticBoundary(text, semanticTarget)
+  const suffixHint = strategy.stringHint
+
+  const buildVerboseSuffix = (shownChars: number) =>
+    `\n[truncated — showing first ${shownChars} of ${text.length} chars. ${suffixHint}]`
+  const buildCompactSuffix = (shownChars: number) =>
+    `\n[truncated — showing first ${shownChars} of ${text.length} chars.]`
+
+  const initialCut = Math.max(1, semanticCut)
+  let prefix = text.slice(0, initialCut)
+  let suffix = buildVerboseSuffix(prefix.length)
+  let candidate = `${prefix}${suffix}`
+
+  if (serializedSizeBytes(candidate) <= maxBytes) {
+    return candidate
+  }
+
+  // Fit content first, preserving semantic boundary attempt, then fallback to
+  // exact byte-safe clipping via binary search.
+  prefix = trimStringToSerializedBudget(prefix, Math.floor(maxBytes * 0.75))
+  suffix = buildCompactSuffix(prefix.length)
+  candidate = `${prefix}${suffix}`
+  if (serializedSizeBytes(candidate) <= maxBytes) {
+    return candidate
+  }
+
+  let low = 0
+  let high = prefix.length
+  let best = ""
+  while (low <= high) {
+    const mid = Math.floor((low + high) / 2)
+    const currentPrefix = prefix.slice(0, mid)
+    const currentCandidate = `${currentPrefix}${buildCompactSuffix(currentPrefix.length)}`
+    if (serializedSizeBytes(currentCandidate) <= maxBytes) {
+      best = currentCandidate
+      low = mid + 1
+    } else {
+      high = mid - 1
+    }
+  }
+
+  if (best) return best
+  const clippedSuffix = compactTruncationHint(
+    buildCompactSuffix(0),
+    maxBytes
+  )
+  if (clippedSuffix) return clippedSuffix
+  return trimStringToSerializedBudget("[truncated]", maxBytes)
+}
+
+function truncateOversizedArray(
+  items: unknown[],
+  options: {
+    maxBytes: number
+    originalSizeBytes: number
+    strategy: ReturnType<typeof resolveTruncationStrategy>
+  }
+): Record<string, unknown> {
+  const sanitizedItems = items.map((item) => sanitizeForJson(item))
+  const selectedIndexes: number[] = []
+  const rankedIndexes = items
+    .map((item, index) => ({
+      index,
+      score: scoreArrayItem(item, index, options.strategy),
+    }))
+    .sort((a, b) => b.score - a.score || a.index - b.index)
+
+  const buildEnvelope = (data: unknown[], hint: string) => ({
+    _truncated: true,
+    _originalCount: items.length,
+    _returnedCount: data.length,
+    _originalSizeBytes: options.originalSizeBytes,
+    _hint: hint,
+    data,
+  })
+
+  const hint = `Result was truncated from ${items.length} items. ${options.strategy.arrayHint}`
+
+  for (const candidate of rankedIndexes) {
+    const nextIndexes = [...selectedIndexes, candidate.index].sort((a, b) => a - b)
+    const nextData = nextIndexes.map((index) => sanitizedItems[index])
+    const nextEnvelope = buildEnvelope(nextData, hint)
+    if (serializedSizeBytes(nextEnvelope) <= options.maxBytes) {
+      selectedIndexes.splice(0, selectedIndexes.length, ...nextIndexes)
+    }
+  }
+
+  const data = selectedIndexes.map((index) => sanitizedItems[index])
+  const envelope = buildEnvelope(data, hint)
+
+  if (serializedSizeBytes(envelope) <= options.maxBytes) {
+    return envelope
+  }
+
+  // Last-resort byte-safe minimal array envelope.
+  const minimalHint = compactTruncationHint(
+    `Result truncated. ${options.strategy.arrayHint}`,
+    Math.floor(options.maxBytes * 0.7)
+  )
+  const minimal = buildEnvelope([], minimalHint || "Result truncated.")
+  if (serializedSizeBytes(minimal) <= options.maxBytes) return minimal
+
+  return {
+    _truncated: true,
+    _originalCount: items.length,
+    _returnedCount: 0,
+    data: [],
+  }
+}
+
+function truncateOversizedObject(
+  value: Record<string, unknown>,
+  options: {
+    maxBytes: number
+    originalSizeBytes: number
+    strategy: ReturnType<typeof resolveTruncationStrategy>
+  }
+): Record<string, unknown> {
+  const reservedMetadataKeys = new Set([
+    "_hint",
+    "_truncated",
+    "_originalSizeBytes",
+    "_keptKeys",
+  ])
+  const entries = Object.entries(value)
+  const ranked = entries
+    .map(([key, entryValue], index) => ({
+      key,
+      entryValue: sanitizeForJson(entryValue),
+      index,
+      score: scoreObjectKey(key, index, options.strategy),
+    }))
+    .filter((entry) => !reservedMetadataKeys.has(entry.key))
+    .sort((a, b) => b.score - a.score || a.index - b.index)
+
+  const output: Record<string, unknown> = {
+    _truncated: true,
+    _originalSizeBytes: options.originalSizeBytes,
+    _hint: `Object was truncated. ${options.strategy.objectHint}`,
+  }
+
+  let keptKeys = 0
+  for (const entry of ranked) {
+    output[entry.key] = entry.entryValue
+    if (serializedSizeBytes(output) <= options.maxBytes) {
+      keptKeys++
+      continue
+    }
+    delete output[entry.key]
+  }
+  output._keptKeys = keptKeys
+
+  if (serializedSizeBytes(output) <= options.maxBytes) {
+    return output
+  }
+
+  // Remove kept key counter first to preserve user-facing guidance.
+  delete output._keptKeys
+  if (serializedSizeBytes(output) <= options.maxBytes) {
+    return output
+  }
+
+  output._hint = compactTruncationHint(
+    "Object truncated. Request specific fields.",
+    Math.floor(options.maxBytes * 0.7)
+  )
+  if (serializedSizeBytes(output) <= options.maxBytes) {
+    return output
+  }
+
+  return {
+    _truncated: true,
+    _hint: "Object truncated.",
+  }
+}
 
 /**
  * Truncate a tool result to a maximum byte size.
  * Applied as a safety net AFTER the tool's own execute() returns.
  *
- * Design decisions (from research):
- * - Shape-preserving: arrays truncated by item count, objects keep top-level keys
- * - Model-informed: adds _truncated marker so the model knows data is incomplete
- * - Layer 1 exempt: provider tools manage their own limits (do NOT pass through here)
- *
- * @param result - The raw tool result
- * @param maxBytes - Override for per-tool limits (from ToolMetadata.maxResultSize)
- * @returns The original result if within limits, or a truncated version
+ * Design decisions:
+ * - Shape-preserving: arrays remain arrays in `data`, objects preserve keys
+ * - Model-informed: includes `_truncated` markers with concise recovery hints
+ * - Strategy-driven: default policy + optional per-tool/per-category overrides
+ * - Hard budget safety: final payload is always clipped to maxBytes
  */
 export function truncateToolResult(
   result: unknown,
-  maxBytes: number = MAX_TOOL_RESULT_SIZE
+  maxBytesOrOptions: number | TruncateToolResultOptions = MAX_TOOL_RESULT_SIZE
 ): unknown {
-  const serialized = safeStringify(result)
-  const sizeBytes = new TextEncoder().encode(serialized).length
+  const options = resolveTruncateOptions(maxBytesOrOptions)
+  const maxBytes = Math.max(1, options.maxBytes)
+  const strategy = resolveTruncationStrategy({
+    toolName: options.toolName,
+    resultCategory: options.resultCategory,
+  })
 
-  if (sizeBytes <= maxBytes) return result
+  const originalSizeBytes = serializedSizeBytes(result)
+  if (originalSizeBytes <= maxBytes) return result
 
-  // Log truncation for observability
   console.warn(
-    `[tools] Result truncated: ${sizeBytes} bytes → ${maxBytes} bytes limit`
+    `[tools] Result truncated: ${originalSizeBytes} bytes → ${maxBytes} bytes limit` +
+      (options.toolName ? ` (tool: ${options.toolName})` : "")
   )
+  const finalize = (value: unknown, fallbackHint: string) =>
+    enforceFinalSerializedBudget(value, maxBytes, fallbackHint)
 
-  // String: truncation with marker
-  // Note: 0.9 factor is conservative for ASCII (the common case for tool
-  // results). For multi-byte heavy strings (CJK, emoji), this may slightly
-  // overshoot — acceptable since the primary concern is order-of-magnitude
-  // protection, not byte-exact limits.
   if (typeof result === "string") {
-    const charLimit = Math.floor(maxBytes * 0.9)
-    return (
-      result.slice(0, charLimit) +
-      "\n[truncated — result exceeded size limit]"
-    )
+    const truncated = truncateLongString(result, maxBytes, strategy)
+    return finalize(truncated, "Result truncated.")
   }
 
-  // Array: reduce item count until within budget
   if (Array.isArray(result)) {
-    let items = result
-    while (items.length > 1) {
-      items = items.slice(0, Math.ceil(items.length / 2))
-      const size = new TextEncoder().encode(safeStringify(items)).length
-      if (size <= maxBytes * 0.95) break // leave room for metadata
-    }
-
-    // Final size check: if the last remaining element is still oversized,
-    // fall back to an empty array so the wrapper stays within budget.
-    if (items.length > 0) {
-      const finalSize = new TextEncoder().encode(safeStringify(items)).length
-      if (finalSize > maxBytes * 0.95) {
-        items = []
-      }
-    }
-
-    return {
-      _truncated: true,
-      _originalCount: result.length,
-      _returnedCount: items.length,
-      data: items,
-    }
+    const truncated = truncateOversizedArray(result, {
+      maxBytes,
+      originalSizeBytes,
+      strategy,
+    })
+    return finalize(truncated, "Result truncated.")
   }
 
-  // Object: serialize and truncate the string representation
   if (typeof result === "object" && result !== null) {
-    const truncatedStr = serialized.slice(0, maxBytes * 0.9)
-    return {
-      _truncated: true,
-      _originalSizeBytes: sizeBytes,
-      _raw: truncatedStr + "...",
-    }
+    const truncated = truncateOversizedObject(result as Record<string, unknown>, {
+      maxBytes,
+      originalSizeBytes,
+      strategy,
+    })
+    return finalize(truncated, "Object truncated.")
   }
 
-  // Primitive types: return as-is (unlikely to exceed limits)
-  return result
+  const clipped = trimStringToSerializedBudget(String(result), maxBytes)
+  return finalize(clipped, "Result truncated.")
 }
 
 /**
@@ -116,7 +752,139 @@ export function wrapToolsWithTruncation(
         ...original,
         execute: async (...args: unknown[]) => {
           const result = await origExec(...args)
-          return truncateToolResult(result, maxBytes)
+          return truncateToolResult(result, {
+            maxBytes,
+            toolName: name,
+          })
+        },
+      }
+    } else {
+      wrapped[name] = original
+    }
+  }
+  return wrapped as ToolSet
+}
+
+/**
+ * Wrap all tools in a ToolSet with timing + trace recording.
+ * Records start/end time around each execute() call and writes
+ * the trace to the shared ToolTraceCollector so onStepFinish
+ * and onFinish can read durationMs for ALL tool types.
+ *
+ * Structural twin of wrapToolsWithTruncation — same iteration
+ * and casting pattern. Applied SEPARATELY (not composed) because
+ * truncation is an MCP-only concern while tracing applies to
+ * Layer 2 (third-party) and Layer 4 (platform) tools.
+ */
+export function wrapToolsWithTracing(
+  tools: ToolSet,
+  traceCollector: ToolTraceCollector,
+  requestId?: string,
+  enforceToolBudget?: (toolName: string) => Promise<void>,
+  metadataByToolName?: ReadonlyMap<string, RetrySafetyMetadata>
+): ToolSet {
+  const wrapped: Record<string, unknown> = {}
+  for (const [name, t] of Object.entries(tools)) {
+    const original = t as Record<string, unknown>
+    if (typeof original.execute === "function") {
+      const origExec = original.execute as (
+        params: unknown,
+        options: { toolCallId: string; [k: string]: unknown }
+      ) => Promise<unknown>
+
+      wrapped[name] = {
+        ...original,
+        execute: async (
+          params: unknown,
+          options: ToolExecuteOptions
+        ): Promise<unknown> => {
+          const startMs = Date.now()
+          const upstreamAbortSignal = extractAbortSignalFromOptions(
+            options
+          )
+          let success = true
+          let error: string | undefined
+          let resultSizeBytes: number | undefined
+          let errorCode: ToolErrorCode | undefined
+          let retryAfterSeconds: number | undefined
+          let budgetKeyMode: "platform" | "byok" | undefined
+          let budgetDenied: boolean | undefined
+          let retryCount = 0
+
+          try {
+            if (enforceToolBudget) {
+              await enforceToolBudget(name)
+            }
+
+            const { value: result, retryCount: retries } =
+              await executeWithRetries({
+                toolName: name,
+                metadata: metadataByToolName?.get(name),
+                abortSignal: upstreamAbortSignal,
+                execute: async () =>
+                  runWithToolAbortAndTimeout({
+                    toolName: name,
+                    timeoutMs: TOOL_EXECUTION_TIMEOUT_MS,
+                    upstreamSignal: upstreamAbortSignal,
+                    operation: (combinedSignal) =>
+                      origExec(params, {
+                        ...options,
+                        abortSignal: combinedSignal,
+                      }),
+                  }),
+                onRetryAttempt: (attempt) => {
+                  console.warn(
+                    JSON.stringify({
+                      _tag: "tool_retry",
+                      requestId,
+                      tool: name,
+                      attempt: attempt.attempt,
+                      maxAttempts: attempt.maxAttempts,
+                      delayMs: attempt.delayMs,
+                      errorCode: attempt.error.code,
+                    })
+                  )
+                },
+              })
+            retryCount = retries
+
+            try {
+              const serialized = JSON.stringify(result)
+              resultSizeBytes = new TextEncoder().encode(serialized).length
+            } catch {
+              // Non-serializable — skip measurement
+            }
+
+            return result
+          } catch (err) {
+            success = false
+            error = err instanceof Error ? err.message : String(err)
+            const errorData = extractToolErrorData(err, { toolName: name })
+            errorCode = errorData.code
+            retryAfterSeconds = errorData.retryAfterSeconds
+
+            const policyData = extractPolicyErrorData(err)
+            if (policyData) {
+              budgetKeyMode = policyData.keyMode
+              budgetDenied = policyData.budgetDenied
+            }
+            throw err
+          } finally {
+            traceCollector.record({
+              toolName: name,
+              toolCallId: options.toolCallId,
+              requestId,
+              durationMs: Date.now() - startMs,
+              success,
+              error,
+              resultSizeBytes,
+              errorCode,
+              retryAfterSeconds,
+              budgetKeyMode,
+              budgetDenied,
+              retryCount,
+            })
+          }
         },
       }
     } else {
@@ -126,9 +894,46 @@ export function wrapToolsWithTruncation(
   return wrapped as ToolSet
 }
 
+/**
+ * Enrich a tool error with actionable recovery hints for the model.
+ * The AI SDK passes thrown error messages to the model as tool results.
+ * Adding recovery guidance helps the model self-correct instead of
+ * retrying the same failing operation.
+ *
+ * @param err - The original error
+ * @param toolName - The tool that failed (for context in the message)
+ * @returns A new Error with the original message plus a recovery hint
+ */
+export function enrichToolError(err: unknown, toolName: string): Error {
+  if (isToolPolicyError(err)) return err
+  if (
+    err instanceof Error &&
+    (err.name === "ToolTimeoutError" || err.name === "ToolAbortError")
+  ) {
+    return err
+  }
+  if (err instanceof ToolExecutionError) return err
+
+  const normalized = normalizeToolError(err, { toolName })
+  const hint = getToolRecoveryHint(normalized)
+  const enriched = new ToolExecutionError(
+    `${toolName} failed: ${normalized.message}. ${hint}`,
+    {
+      code: normalized.code,
+      retryable: normalized.retryable,
+      retryAfterSeconds: normalized.retryAfterSeconds,
+      statusCode: normalized.statusCode,
+      toolName: normalized.toolName ?? toolName,
+      details: normalized.details,
+    }
+  )
+  if (err instanceof Error && err.stack) enriched.stack = err.stack
+  return enriched
+}
+
 function safeStringify(value: unknown): string {
   try {
-    return JSON.stringify(value)
+    return JSON.stringify(sanitizeForJson(value))
   } catch {
     return String(value)
   }
diff --git a/lib/user-keys.ts b/lib/user-keys.ts
index 7d24bfe1..10acae63 100644
--- a/lib/user-keys.ts
+++ b/lib/user-keys.ts
@@ -119,6 +119,7 @@ export async function getUserKey(
 /** Tool provider IDs that can be stored in userKeys */
 export const TOOL_PROVIDERS = ["exa", "firecrawl"] as const
 export type ToolProvider = (typeof TOOL_PROVIDERS)[number]
+export type ToolKeyMode = "platform" | "byok"
 
 /** Maps tool provider IDs to their environment variable names */
 const TOOL_ENV_MAP: Record<ToolProvider, string> = {
@@ -143,12 +144,28 @@ export async function getEffectiveToolKey(
   provider: ToolProvider,
   convexToken?: string
 ): Promise<string | undefined> {
+  const resolved = await getEffectiveToolKeyWithMode(provider, convexToken)
+  return resolved.key
+}
+
+/**
+ * Resolve a tool provider key and where it came from.
+ * Used by tool budget policy to apply platform-vs-BYOK limits.
+ */
+export async function getEffectiveToolKeyWithMode(
+  provider: ToolProvider,
+  convexToken?: string
+): Promise<{ key?: string; keyMode?: ToolKeyMode }> {
   // 1. Try user BYOK key first (getUserKeyFromConvex accepts string)
   if (convexToken) {
     const userKey = await getUserKeyFromConvex(provider, convexToken)
-    if (userKey) return userKey
+    if (userKey) return { key: userKey, keyMode: "byok" }
   }
 
   // 2. Fall back to platform env var
-  return process.env[TOOL_ENV_MAP[provider]] || undefined
+  const platformKey = process.env[TOOL_ENV_MAP[provider]] || undefined
+  if (platformKey) {
+    return { key: platformKey, keyMode: "platform" }
+  }
+  return {}
 }