diff --git a/.gitignore b/.gitignore
index 0688b83..e9287ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,5 +31,3 @@ Thumbs.db
 # Tests (local development only)
 tests/
 
-# Development notes
-notes/
diff --git a/.npmignore b/.npmignore
index cb4b50d..c1b05f1 100644
--- a/.npmignore
+++ b/.npmignore
@@ -10,6 +10,8 @@ bun.lock
 
 # Documentation
 ANALYSIS.md
+docs/
+notes/
 
 # Source files (since we're shipping dist/)
 index.ts
diff --git a/docs/providers/README.md b/docs/providers/README.md
new file mode 100644
index 0000000..29af628
--- /dev/null
+++ b/docs/providers/README.md
@@ -0,0 +1,339 @@
+# Provider API Formats Reference
+
+This directory contains documentation for each AI provider's API format, designed to help the context pruning plugin implement provider-specific logic.
+
+## Sources
+
+All information in these docs was gathered from:
+
+### Primary Sources
+
+| Source | Location | Description |
+|--------|----------|-------------|
+| **Vercel AI SDK** | https://github.com/vercel/ai | Provider conversion logic in `packages/{provider}/src/` |
+| **OpenCode Source** | `/packages/opencode/src/provider/` | Custom transforms and provider loading |
+| **models.dev API** | https://models.dev/api.json | Authoritative provider list with npm packages |
+
+### Key AI SDK Files
+
+| Provider | Conversion File |
+|----------|-----------------|
+| OpenAI | `packages/openai/src/chat/openai-chat-language-model.ts`, `packages/openai/src/responses/openai-responses-language-model.ts` |
+| OpenAI-Compatible | `packages/openai-compatible/src/chat/openai-compatible-chat-language-model.ts` |
+| Anthropic | `packages/anthropic/src/convert-to-anthropic-messages-prompt.ts`, `packages/anthropic/src/anthropic-messages-language-model.ts` |
+| Google | `packages/google/src/convert-to-google-generative-ai-messages.ts`, `packages/google/src/google-generative-ai-language-model.ts` |
+| AWS Bedrock | `packages/amazon-bedrock/src/convert-to-bedrock-chat-messages.ts`, `packages/amazon-bedrock/src/bedrock-chat-language-model.ts` |
+| Mistral | `packages/mistral/src/convert-to-mistral-chat-messages.ts`, `packages/mistral/src/mistral-chat-language-model.ts` |
+| Cohere | `packages/cohere/src/convert-to-cohere-chat-prompt.ts`, `packages/cohere/src/cohere-chat-language-model.ts` |
+
+### OpenCode Custom Transform Files
+
+| File | Purpose |
+|------|---------|
+| `src/provider/transform.ts` | Provider-specific message normalization, caching hints, schema transforms |
+| `src/provider/provider.ts` | Provider loading, custom loaders, SDK instantiation |
+| `src/provider/models.ts` | Model database schema, models.dev integration |
+| `src/session/message-v2.ts` | Internal message structure, `toModelMessage()` conversion |
+
+### Official API Documentation
+
+| Provider | Documentation URL |
+|----------|-------------------|
+| OpenAI | https://platform.openai.com/docs/api-reference |
+| Anthropic | https://docs.anthropic.com/en/api |
+| Google Gemini | https://ai.google.dev/api/rest |
+| AWS Bedrock | https://docs.aws.amazon.com/bedrock/latest/APIReference/ |
+| Mistral | https://docs.mistral.ai/api/ |
+| Cohere | https://docs.cohere.com/reference/chat |
+
+---
+
+## Format Categories
+
+Providers fall into several format categories based on their API structure:
+
+### 1. OpenAI Chat Completions Format
+**Most common format - used by ~60 providers**
+
+Key identifiers:
+- `body.messages[]` array
+- Tool results: `role: "tool"`, `tool_call_id`
+- System in messages array
+
+Providers: openai, together, deepseek, groq, fireworks, hyperbolic, novita, cerebras, sambanova, perplexity, openrouter, and most others
+
+### 2. OpenAI Responses Format (newer)
+**Used by OpenAI GPT models via responses API**
+
+Key identifiers:
+- `body.input[]` array
+- Tool results: `type: "function_call_output"`, `call_id`
+
+Providers: openai (responses endpoint), azure (responses endpoint)
+
+### 3. Anthropic Format
+**Distinct format with cache control**
+
+Key identifiers:
+- `body.messages[]` but tool results in user messages
+- Tool results: `type: "tool_result"`, `tool_use_id`
+- Top-level `system` array
+- `cache_control` support
+
+Providers: anthropic
+
+### 4. Google Gemini Format
+**Position-based tool correlation**
+
+Key identifiers:
+- `body.contents[]` array
+- Tool results: `functionResponse` parts (no IDs!)
+- Roles: `user`/`model` only
+- Top-level `systemInstruction`
+
+Providers: google, google-vertex
+
+### 5. AWS Bedrock Format
+**Converse API with cache points**
+
+Key identifiers:
+- Top-level `system` array
+- Tool results: `toolResult` blocks with `toolUseId`
+- `cachePoint` blocks
+
+Providers: amazon-bedrock
+
+### 6. Mistral Format (OpenAI-like with quirks)
+**Strict ID requirements**
+
+Key identifiers:
+- OpenAI-like but 9-char alphanumeric tool IDs required
+- User content always array
+
+Providers: mistral
+
+### 7. Cohere Format
+**RAG-native with citations**
+
+Key identifiers:
+- Uses `p`/`k` instead of `top_p`/`top_k`
+- Uppercase tool choice values
+- `documents` array for RAG
+
+Providers: cohere
+
+## Quick Reference: Thinking/Reasoning
+
+| Format | Request Config | Response Structure | Encrypted? | Signature? |
+|--------|---------------|-------------------|------------|------------|
+| OpenAI Responses | `reasoning: {effort, summary}` | `{type: "reasoning", encrypted_content, summary}` | Yes | No |
+| Anthropic | `thinking: {type, budget_tokens}` | `{type: "thinking", thinking, signature}` | Partial* | Yes |
+| Google Gemini | `thinkingConfig: {thinkingBudget}` | `{text, thought: true, thoughtSignature}` | No | Optional |
+| AWS Bedrock | `additionalModelRequestFields.thinking` | `{reasoningContent: {reasoningText/redactedReasoning}}` | Partial* | Yes |
+| Mistral | N/A (model decides) | `{type: "thinking", thinking: [{type: "text", text}]}` | No | No |
+| Cohere | `thinking: {type, token_budget}` | `{type: "thinking", thinking: "..."}` | No | No |
+
+*Partial = has both visible (`thinking`/`reasoningText`) and redacted (`redacted_thinking`/`redactedReasoning`) variants
+
+**Key differences:**
+- **OpenAI**: Reasoning is always encrypted; only summary is readable
+- **Anthropic/Bedrock**: Can have visible thinking with signature, or redacted thinking
+- **Gemini**: Thinking is a text part with `thought: true` flag
+- **Mistral**: Thinking is nested array of text parts
+- **Cohere**: Thinking is plain string
+
+**SDK normalization**: All formats are converted to `{type: "reasoning", text: "..."}` by the AI SDK
+
+## Quick Reference: Tool Call ID Fields
+
+| Format | Tool Call ID Field | Tool Result ID Field |
+|--------|-------------------|---------------------|
+| OpenAI Chat | `tool_calls[].id` | `tool_call_id` |
+| OpenAI Responses | `call_id` | `call_id` |
+| Anthropic | `tool_use.id` | `tool_use_id` |
+| Gemini | **NONE (position-based)** | **NONE** |
+| Bedrock | `toolUse.toolUseId` | `toolResult.toolUseId` |
+| Mistral | `tool_calls[].id` (9-char) | `tool_call_id` |
+| Cohere | `tool_calls[].id` | `tool_call_id` |
+
+## Detection Strategy
+
+To detect which format a request uses:
+
+```typescript
+function detectFormat(body: unknown): string {
+  if (body.input && Array.isArray(body.input)) return 'openai-responses'
+  if (body.contents && Array.isArray(body.contents)) return 'gemini'
+  if (body.system && Array.isArray(body.system) && body.inferenceConfig) return 'bedrock'
+  if (body.messages) {
+    // Check first message structure for Anthropic vs OpenAI
+    const msg = body.messages[0]
+    if (msg?.content?.[0]?.type === 'tool_result') return 'anthropic'
+    if (msg?.content?.[0]?.tool_use_id) return 'anthropic'
+  }
+  return 'openai-chat' // Default
+}
+```
+
+## Files
+
+- [openai.md](./openai.md) - OpenAI Chat Completions & Responses API
+- [anthropic.md](./anthropic.md) - Anthropic Messages API
+- [google-gemini.md](./google-gemini.md) - Google Generative AI (Gemini)
+- [aws-bedrock.md](./aws-bedrock.md) - AWS Bedrock Converse API
+- [mistral.md](./mistral.md) - Mistral API
+- [cohere.md](./cohere.md) - Cohere Chat API
+- [openai-compatible.md](./openai-compatible.md) - OpenAI-compatible providers
+
+## Context Pruning Universal Rules
+
+1. **Tool call/result pairing**: Always prune tool calls and their results together
+2. **Message alternation**: Most APIs expect alternating user/assistant messages
+3. **System preservation**: System messages typically should not be pruned
+4. **ID correlation**: Maintain ID relationships when pruning (except Gemini which is position-based)
+5. **Cache markers**: Consider preserving cache control markers when present
+
+---
+
+## Complete Provider List (models.dev)
+
+Every provider from models.dev and its API format:
+
+### OpenAI Chat Format (43 providers)
+*Uses `@ai-sdk/openai-compatible` - standard OpenAI messages format*
+
+| Provider ID | Name | Notes |
+|-------------|------|-------|
+| `agentrouter` | AgentRouter | |
+| `alibaba` | Alibaba | |
+| `alibaba-cn` | Alibaba (China) | |
+| `bailing` | Bailing | |
+| `baseten` | Baseten | |
+| `chutes` | Chutes | |
+| `cortecs` | Cortecs | |
+| `deepseek` | DeepSeek | Reasoning models (R1) |
+| `fastrouter` | FastRouter | |
+| `fireworks-ai` | Fireworks AI | |
+| `github-copilot` | GitHub Copilot | |
+| `github-models` | GitHub Models | |
+| `huggingface` | Hugging Face | |
+| `iflowcn` | iFlow | |
+| `inception` | Inception | |
+| `inference` | Inference | |
+| `io-net` | IO.NET | |
+| `llama` | Llama | |
+| `lmstudio` | LMStudio | Local inference |
+| `lucidquery` | LucidQuery AI | |
+| `modelscope` | ModelScope | |
+| `moonshotai` | Moonshot AI | |
+| `moonshotai-cn` | Moonshot AI (China) | |
+| `morph` | Morph | |
+| `nebius` | Nebius Token Factory | |
+| `nvidia` | Nvidia | |
+| `opencode` | OpenCode Zen | |
+| `openrouter` | OpenRouter | Meta-provider, cache support |
+| `ovhcloud` | OVHcloud AI Endpoints | |
+| `poe` | Poe | |
+| `requesty` | Requesty | |
+| `scaleway` | Scaleway | |
+| `siliconflow` | SiliconFlow | |
+| `submodel` | submodel | |
+| `synthetic` | Synthetic | |
+| `upstage` | Upstage | |
+| `venice` | Venice AI | |
+| `vultr` | Vultr | |
+| `wandb` | Weights & Biases | |
+| `zai` | Z.AI | |
+| `zai-coding-plan` | Z.AI Coding Plan | |
+| `zenmux` | ZenMux | |
+| `zhipuai` | Zhipu AI | |
+| `zhipuai-coding-plan` | Zhipu AI Coding Plan | |
+
+### OpenAI Native Format (1 provider)
+*Uses `@ai-sdk/openai` - supports both Chat Completions and Responses API*
+
+| Provider ID | Name | Notes |
+|-------------|------|-------|
+| `openai` | OpenAI | Responses API for GPT-4.1+ |
+
+### Azure Format (2 providers)
+*Uses `@ai-sdk/azure` - OpenAI format with Azure auth*
+
+| Provider ID | Name | Notes |
+|-------------|------|-------|
+| `azure` | Azure | Supports Responses API |
+| `azure-cognitive-services` | Azure Cognitive Services | |
+
+### Anthropic Format (4 providers)
+*Uses `@ai-sdk/anthropic` - distinct message format with cache control*
+
+| Provider ID | Name | Notes |
+|-------------|------|-------|
+| `anthropic` | Anthropic | Native Anthropic API |
+| `kimi-for-coding` | Kimi For Coding | Uses Anthropic format |
+| `minimax` | MiniMax | Uses Anthropic format |
+| `minimax-cn` | MiniMax (China) | Uses Anthropic format |
+
+### Google Gemini Format (3 providers)
+*Uses `@ai-sdk/google` or `@ai-sdk/google-vertex` - POSITION-BASED tool correlation*
+
+| Provider ID | Name | Notes |
+|-------------|------|-------|
+| `google` | Google | Native Gemini API |
+| `google-vertex` | Vertex | Google Cloud Vertex AI |
+| `google-vertex-anthropic` | Vertex (Anthropic) | Claude via Vertex |
+
+### AWS Bedrock Format (1 provider)
+*Uses `@ai-sdk/amazon-bedrock` - Converse API with cachePoint*
+
+| Provider ID | Name | Notes |
+|-------------|------|-------|
+| `amazon-bedrock` | Amazon Bedrock | Multi-model, cachePoint support |
+
+### Mistral Format (1 provider)
+*Uses `@ai-sdk/mistral` - requires 9-char alphanumeric tool IDs*
+
+| Provider ID | Name | Notes |
+|-------------|------|-------|
+| `mistral` | Mistral | Strict tool ID format |
+
+### Cohere Format (1 provider)
+*Uses `@ai-sdk/cohere` - RAG-native with citations*
+
+| Provider ID | Name | Notes |
+|-------------|------|-------|
+| `cohere` | Cohere | Uses `p`/`k`, uppercase tool choice |
+
+### Specialized SDK Providers (13 providers)
+*Use provider-specific SDKs but follow OpenAI-like format*
+
+| Provider ID | Name | SDK | Format |
+|-------------|------|-----|--------|
+| `cerebras` | Cerebras | `@ai-sdk/cerebras` | OpenAI-like |
+| `deepinfra` | Deep Infra | `@ai-sdk/deepinfra` | OpenAI-like |
+| `groq` | Groq | `@ai-sdk/groq` | OpenAI-like |
+| `perplexity` | Perplexity | `@ai-sdk/perplexity` | OpenAI-like |
+| `togetherai` | Together AI | `@ai-sdk/togetherai` | OpenAI-like |
+| `xai` | xAI | `@ai-sdk/xai` | OpenAI-like |
+| `vercel` | Vercel AI Gateway | `@ai-sdk/gateway` | OpenAI-like |
+| `v0` | v0 | `@ai-sdk/vercel` | OpenAI-like |
+| `cloudflare-workers-ai` | Cloudflare Workers AI | `workers-ai-provider` | OpenAI-like |
+| `ollama-cloud` | Ollama Cloud | `ai-sdk-ollama` | OpenAI-like |
+| `aihubmix` | AIHubMix | `@aihubmix/ai-sdk-provider` | OpenAI-like |
+| `sap-ai-core` | SAP AI Core | `@mymediset/sap-ai-provider` | OpenAI-like |
+
+---
+
+## Format Summary
+
+| Format | Provider Count | Tool ID Field | Key Identifier |
+|--------|---------------|---------------|----------------|
+| OpenAI Chat | 56 | `tool_call_id` | `body.messages[]` |
+| OpenAI Responses | 2 | `call_id` | `body.input[]` |
+| Anthropic | 4 | `tool_use_id` | `tool_result` in user msg |
+| Google Gemini | 3 | **NONE** | `body.contents[]` |
+| AWS Bedrock | 1 | `toolUseId` | `body.inferenceConfig` |
+| Mistral | 1 | `tool_call_id` (9-char) | Check provider ID |
+| Cohere | 1 | `tool_call_id` | Check provider ID |
+
+**Total: 69 providers**
diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md
new file mode 100644
index 0000000..d1610fa
--- /dev/null
+++ b/docs/providers/anthropic.md
@@ -0,0 +1,216 @@
+# Anthropic Messages API Format
+
+Anthropic uses a distinct message format with unique features like cache control and extended thinking.
+
+## Sources
+
+- **AI SDK**: `packages/anthropic/src/convert-to-anthropic-messages-prompt.ts`, `packages/anthropic/src/anthropic-messages-language-model.ts`
+- **OpenCode Transform**: `src/provider/transform.ts` (toolCallId sanitization, cache control)
+- **Official Docs**: https://docs.anthropic.com/en/api/messages
+
+## Request Structure
+
+```json
+{
+  "model": "claude-sonnet-4-5",
+  "max_tokens": 4096,
+  "temperature": 1.0,
+  "stream": true,
+  "system": [
+    {"type": "text", "text": "System instructions", "cache_control": {"type": "ephemeral"}}
+  ],
+  "messages": [...],
+  "tools": [...],
+  "tool_choice": {"type": "auto"},
+  "thinking": {"type": "enabled", "budget_tokens": 10000}
+}
+```
+
+## Key Differences from OpenAI
+
+| Feature | OpenAI | Anthropic |
+|---------|--------|-----------|
+| System message | In messages array | Top-level `system` array |
+| Tool results | `role: "tool"` message | In `user` message with `type: "tool_result"` |
+| Tool call ID field | `tool_call_id` | `tool_use_id` |
+| Caching | Not available | `cache_control` on content blocks |
+
+## Message Roles
+
+Only **two roles**: `user` and `assistant`. Tool results are embedded in user messages.
+
+## Message Formats
+
+### System Message (top-level, not in messages)
+```json
+{
+  "system": [
+    {
+      "type": "text",
+      "text": "You are a helpful assistant.",
+      "cache_control": {"type": "ephemeral"}
+    }
+  ]
+}
+```
+
+### User Message
+```json
+{
+  "role": "user",
+  "content": [
+    {"type": "text", "text": "Hello", "cache_control": {"type": "ephemeral"}},
+    {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": "..."}},
+    {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": "..."}, "title": "Doc"}
+  ]
+}
+```
+
+### Assistant Message with Tool Use
+```json
+{
+  "role": "assistant",
+  "content": [
+    {"type": "text", "text": "Let me check the weather."},
+    {
+      "type": "tool_use",
+      "id": "toolu_01XYZ",
+      "name": "get_weather",
+      "input": {"location": "San Francisco"},
+      "cache_control": {"type": "ephemeral"}
+    }
+  ]
+}
+```
+
+### Tool Result (in user message)
+```json
+{
+  "role": "user",
+  "content": [
+    {
+      "type": "tool_result",
+      "tool_use_id": "toolu_01XYZ",
+      "content": "72°F and sunny",
+      "is_error": false,
+      "cache_control": {"type": "ephemeral"}
+    }
+  ]
+}
+```
+
+## Thinking/Reasoning (Extended Thinking)
+
+### Request Configuration
+```json
+{
+  "thinking": {
+    "type": "enabled",
+    "budget_tokens": 10000
+  }
+}
+```
+
+**Parameters:**
+- `type`: `"enabled"` or `"disabled"`
+- `budget_tokens`: Token budget for thinking (minimum 1024)
+
+**Constraints when thinking enabled:**
+- `temperature`, `topK`, `topP` are **NOT supported** (ignored with warnings)
+- `max_tokens` is automatically adjusted to include `budget_tokens`
+- Minimum budget is 1,024 tokens
+
+### Response Content Blocks
+
+**Thinking Block** (visible reasoning):
+```json
+{
+  "type": "thinking",
+  "thinking": "Let me analyze this step by step...",
+  "signature": "cryptographic_signature_for_verification"
+}
+```
+
+**Redacted Thinking Block** (hidden reasoning):
+```json
+{
+  "type": "redacted_thinking",
+  "data": "encrypted_base64_redacted_content"
+}
+```
+
+### Streaming Deltas
+```json
+{"type": "thinking_delta", "thinking": "reasoning chunk..."}
+{"type": "signature_delta", "signature": "sig_chunk"}
+```
+
+### SDK Conversion
+The AI SDK converts Anthropic's `thinking` blocks to a unified `reasoning` type:
+```typescript
+// Anthropic response
+{type: "thinking", thinking: "...", signature: "..."}
+
+// Converted to SDK format
+{type: "reasoning", text: "...", signature: "..."}
+```
+
+### Context Pruning for Thinking
+- **Cannot apply cache_control** to thinking or redacted_thinking blocks
+- **Signatures are cryptographic** - preserve for verification if replaying
+- **Redacted thinking** contains encrypted content that cannot be inspected
+- Consider thinking blocks as important context but potentially large
+
+## Tool Definition
+
+```json
+{
+  "name": "get_weather",
+  "description": "Get weather for a location",
+  "input_schema": {
+    "type": "object",
+    "properties": {"location": {"type": "string"}},
+    "required": ["location"]
+  },
+  "cache_control": {"type": "ephemeral"}
+}
+```
+
+### Tool Choice Options
+- `{"type": "auto"}` - Model decides
+- `{"type": "any"}` - Force tool use
+- `{"type": "tool", "name": "get_weather"}` - Force specific tool
+
+## Cache Control
+
+```json
+{"type": "ephemeral", "ttl": "5m"}
+```
+
+**Limits**: Maximum **4 cache breakpoints** per request
+
+**Applicable to**: system messages, user/assistant content parts, tool results, tool definitions
+
+**NOT applicable to**: `thinking` blocks, `redacted_thinking` blocks
+
+## Special Tool Types
+
+**Server Tool Use** (provider-executed):
+```json
+{"type": "server_tool_use", "id": "...", "name": "web_search", "input": {...}}
+```
+Names: `web_fetch`, `web_search`, `code_execution`, `bash_code_execution`, `text_editor_code_execution`
+
+**MCP Tool Use**:
+```json
+{"type": "mcp_tool_use", "id": "...", "name": "custom_tool", "server_name": "my-mcp-server", "input": {...}}
+```
+
+## Context Pruning Considerations
+
+1. **Tool correlation**: Uses `tool_use_id` (not `tool_call_id`)
+2. **Tool results in user messages**: Unlike OpenAI, tool results are `content` parts in user messages
+3. **Message merging**: Consecutive user messages are merged; consecutive assistant messages are merged
+4. **Cache breakpoints**: Preserve `cache_control` markers when possible (max 4)
+5. **Thinking blocks**: Have signatures for verification; handle with care
+6. **Paired pruning**: `tool_use` and corresponding `tool_result` must be pruned together
diff --git a/docs/providers/aws-bedrock.md b/docs/providers/aws-bedrock.md
new file mode 100644
index 0000000..f1c4479
--- /dev/null
+++ b/docs/providers/aws-bedrock.md
@@ -0,0 +1,287 @@
+# AWS Bedrock API Format
+
+AWS Bedrock uses the Converse API with unique content block types and caching via `cachePoint`.
+
+## Sources
+
+- **AI SDK**: `packages/amazon-bedrock/src/convert-to-bedrock-chat-messages.ts`, `packages/amazon-bedrock/src/bedrock-chat-language-model.ts`
+- **OpenCode Transform**: `src/provider/transform.ts` (cachePoint insertion)
+- **Official Docs**: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html
+
+## Request Structure
+
+```json
+{
+  "system": [
+    {"text": "System message"},
+    {"cachePoint": {"type": "default"}}
+  ],
+  "messages": [
+    {"role": "user", "content": [...]},
+    {"role": "assistant", "content": [...]}
+  ],
+  "inferenceConfig": {
+    "maxTokens": 4096,
+    "temperature": 0.7,
+    "topP": 0.9,
+    "topK": 50,
+    "stopSequences": ["END"]
+  },
+  "toolConfig": {
+    "tools": [...],
+    "toolChoice": {"auto": {}}
+  },
+  "additionalModelRequestFields": {
+    "thinking": {"type": "enabled", "budget_tokens": 10000}
+  }
+}
+```
+
+## Key Differences from OpenAI
+
+| Feature | OpenAI | Bedrock |
+|---------|--------|--------|
+| System message | In messages | Top-level `system` array |
+| Tool calls | `tool_calls` array | `toolUse` content block |
+| Tool results | `role: "tool"` | `toolResult` in user content |
+| Tool call ID | `tool_call_id` | `toolUseId` |
+| Caching | Not available | `cachePoint` blocks |
+
+## Message Roles
+
+Only **two roles**: `user` and `assistant`. Tool results go in user messages.
+
+## Content Block Types
+
+### Text Block
+```json
+{"text": "Hello, how can I help?"}
+```
+
+### Image Block
+```json
+{
+  "image": {
+    "format": "jpeg",
+    "source": {"bytes": "<base64-encoded-data>"}
+  }
+}
+```
+Formats: `jpeg`, `png`, `gif`, `webp`
+
+### Document Block
+```json
+{
+  "document": {
+    "format": "pdf",
+    "name": "document-1",
+    "source": {"bytes": "<base64-encoded-data>"},
+    "citations": {"enabled": true}
+  }
+}
+```
+Formats: `pdf`, `csv`, `doc`, `docx`, `xls`, `xlsx`, `html`, `txt`, `md`
+
+### Tool Use Block (Assistant calling tool)
+```json
+{
+  "toolUse": {
+    "toolUseId": "tool_call_123",
+    "name": "get_weather",
+    "input": {"city": "Seattle"}
+  }
+}
+```
+
+### Tool Result Block (User providing result)
+```json
+{
+  "toolResult": {
+    "toolUseId": "tool_call_123",
+    "content": [
+      {"text": "Temperature: 72F"},
+      {"image": {"format": "png", "source": {"bytes": "..."}}}
+    ]
+  }
+}
+```
+
+### Reasoning Block (Anthropic models)
+```json
+{
+  "reasoningContent": {
+    "reasoningText": {
+      "text": "Let me think through this...",
+      "signature": "<signature-for-caching>"
+    }
+  }
+}
+```
+
+## Thinking/Reasoning (Anthropic Models via Bedrock)
+
+### Request Configuration
+```json
+{
+  "additionalModelRequestFields": {
+    "thinking": {
+      "type": "enabled",
+      "budget_tokens": 10000
+    }
+  }
+}
+```
+
+**Note**: Bedrock uses `reasoningConfig` in the SDK which gets transformed to Anthropic's `thinking` format in `additionalModelRequestFields`.
+
+**Parameters:**
+- `type`: `"enabled"` or `"disabled"`
+- `budget_tokens`: Token budget for thinking (minimum 1024)
+
+### Response Content Blocks
+
+**Reasoning Text Block** (visible reasoning):
+```json
+{
+  "reasoningContent": {
+    "reasoningText": {
+      "text": "Let me analyze this step by step...",
+      "signature": "cryptographic_signature_for_verification"
+    }
+  }
+}
+```
+
+**Redacted Reasoning Block** (hidden reasoning):
+```json
+{
+  "reasoningContent": {
+    "redactedReasoning": {
+      "data": "encrypted_base64_redacted_content"
+    }
+  }
+}
+```
+
+### SDK Conversion
+The AI SDK converts Bedrock's reasoning blocks to unified format:
+```typescript
+// Bedrock response
+{reasoningContent: {reasoningText: {text: "...", signature: "..."}}}
+
+// Converted to SDK format
+{type: "reasoning", text: "...", signature: "..."}
+
+// Redacted version
+{reasoningContent: {redactedReasoning: {data: "..."}}}
+
+// Converted to SDK format
+{type: "redacted-reasoning", data: "..."}
+```
+
+### Context Pruning for Reasoning
+- **Signatures are cryptographic** - preserve for verification
+- **Redacted reasoning** contains encrypted content that cannot be inspected
+- Reasoning blocks appear in assistant message content
+- Consider reasoning as important but potentially large context
+
+### Cache Point
+```json
+{"cachePoint": {"type": "default"}}
+```
+
+## Caching Mechanism
+
+Cache points can be inserted at:
+1. In system messages - After each system message
+2. In user message content - After content blocks
+3. In assistant message content - After content blocks
+4. In tool configuration - After tool definitions
+
+## Tool Definition
+
+```json
+{
+  "tools": [
+    {
+      "toolSpec": {
+        "name": "get_weather",
+        "description": "Get weather for a city",
+        "inputSchema": {
+          "json": {
+            "type": "object",
+            "properties": {"city": {"type": "string"}},
+            "required": ["city"]
+          }
+        }
+      }
+    },
+    {"cachePoint": {"type": "default"}}
+  ],
+  "toolChoice": {"auto": {}}
+}
+```
+
+### Tool Choice Options
+- `{"auto": {}}` - Model decides
+- `{"any": {}}` - Force tool use (maps to "required")
+- `{"tool": {"name": "tool_name"}}` - Force specific tool
+
+## Complete Example
+
+```json
+{
+  "system": [
+    {"text": "You are a helpful assistant."},
+    {"cachePoint": {"type": "default"}}
+  ],
+  "messages": [
+    {
+      "role": "user",
+      "content": [{"text": "What's the weather in Seattle?"}]
+    },
+    {
+      "role": "assistant",
+      "content": [{
+        "toolUse": {
+          "toolUseId": "call_001",
+          "name": "get_weather",
+          "input": {"city": "Seattle"}
+        }
+      }]
+    },
+    {
+      "role": "user",
+      "content": [
+        {
+          "toolResult": {
+            "toolUseId": "call_001",
+            "content": [{"text": "{\"temperature\": 72, \"condition\": \"sunny\"}"}]
+          }
+        },
+        {"cachePoint": {"type": "default"}}
+      ]
+    }
+  ],
+  "toolConfig": {
+    "tools": [{"toolSpec": {"name": "get_weather", "description": "Get weather", "inputSchema": {"json": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}}}],
+    "toolChoice": {"auto": {}}
+  }
+}
+```
+
+## Unique Behaviors
+
+1. **Trailing whitespace trimming**: Last text block in assistant messages is trimmed
+2. **Empty text blocks skipped**: Whitespace-only text blocks are filtered
+3. **Temperature clamping**: Clamped to [0, 1] range
+4. **Tool content filtering**: If no tools available, tool content is removed with warning
+
+## Context Pruning Considerations
+
+1. **Tool correlation**: Uses `toolUseId` for correlation
+2. **Tool results in user messages**: `toolResult` blocks are in user message content
+3. **Message grouping**: Consecutive same-role messages are merged
+4. **Cache points**: Preserve `cachePoint` markers when beneficial
+5. **Paired pruning**: `toolUse` and corresponding `toolResult` must be pruned together
+6. **System first**: System messages must come before user/assistant messages
diff --git a/docs/providers/cohere.md b/docs/providers/cohere.md
new file mode 100644
index 0000000..a1927fb
--- /dev/null
+++ b/docs/providers/cohere.md
@@ -0,0 +1,282 @@
+# Cohere API Format
+
+Cohere uses a chat-based API with unique features like built-in RAG via `documents` and citations.
+
+## Request Structure
+
+```json
+{
+  "model": "command-r-plus",
+  "messages": [...],
+  "max_tokens": 4096,
+  "temperature": 0.7,
+  "p": 0.9,
+  "k": 40,
+  "frequency_penalty": 0.0,
+  "presence_penalty": 0.0,
+  "seed": 12345,
+  "stop_sequences": ["END"],
+  "response_format": {"type": "json_object"},
+  "tools": [...],
+  "tool_choice": "REQUIRED",
+  "documents": [...],
+  "thinking": {"type": "enabled", "token_budget": 2048}
+}
+```
+
+## Key Differences from OpenAI
+
+| Feature | OpenAI | Cohere |
+|---------|--------|-------|
+| Top-p parameter | `top_p` | `p` |
+| Top-k parameter | `top_k` | `k` |
+| Tool choice required | `"required"` | `"REQUIRED"` (uppercase) |
+| RAG | Not built-in | `documents` array |
+| Citations | Not built-in | Automatic with documents |
+
+## Message Formats
+
+### System Message
+```json
+{"role": "system", "content": "You are a helpful assistant."}
+```
+
+### User Message (text only)
+```json
+{"role": "user", "content": "What is the weather today?"}
+```
+**Note**: Files/documents are extracted to top-level `documents` array for RAG.
+
+### Assistant Message
+```json
+{
+  "role": "assistant",
+  "content": "The weather is sunny.",
+  "tool_plan": undefined,
+  "tool_calls": undefined
+}
+```
+
+### Assistant Message with Tool Calls
+```json
+{
+  "role": "assistant",
+  "content": undefined,
+  "tool_plan": undefined,
+  "tool_calls": [{
+    "id": "call_abc123",
+    "type": "function",
+    "function": {
+      "name": "get_weather",
+      "arguments": "{\"location\": \"San Francisco\"}"
+    }
+  }]
+}
+```
+**Key quirk**: When `tool_calls` present, `content` is `undefined`.
+
+### Tool Result Message
+```json
+{
+  "role": "tool",
+  "tool_call_id": "call_abc123",
+  "content": "{\"temperature\": 72, \"conditions\": \"sunny\"}"
+}
+```
+
+## Tool Definition
+
+```json
+{
+  "tools": [{
+    "type": "function",
+    "function": {
+      "name": "get_weather",
+      "description": "Get weather for a location",
+      "parameters": {
+        "type": "object",
+        "properties": {"location": {"type": "string"}},
+        "required": ["location"]
+      }
+    }
+  }],
+  "tool_choice": "REQUIRED"
+}
+```
+
+### Tool Choice Values (UPPERCASE)
+- `undefined` - Auto (model decides)
+- `"NONE"` - Disable tool use
+- `"REQUIRED"` - Force tool use
+
+**Note**: To force a specific tool, filter `tools` array and set `tool_choice: "REQUIRED"`.
+
+## RAG via Documents
+
+```json
+{
+  "documents": [
+    {
+      "data": {
+        "text": "Document content here",
+        "title": "Optional Title"
+      }
+    }
+  ]
+}
+```
+
+## Response Structure
+
+```json
+{
+  "generation_id": "abc-123",
+  "message": {
+    "role": "assistant",
+    "content": [
+      {"type": "text", "text": "Response here."},
+      {"type": "thinking", "thinking": "Reasoning..."}
+    ],
+    "tool_plan": "I will call the API",
+    "tool_calls": [...],
+    "citations": [{
+      "start": 0,
+      "end": 10,
+      "text": "cited text",
+      "sources": [{"type": "document", "id": "doc1", "document": {...}}]
+    }]
+  },
+  "finish_reason": "COMPLETE",
+  "usage": {...}
+}
+```
+
+**Note**: Response `content` is an **array** of typed objects (unlike request which uses string).
+
+## Unique Features
+
+1. **Thinking mode**: Native reasoning via `thinking` config, returns `{"type": "thinking"}` blocks
+2. **Citations**: Automatic source citations when using `documents`
+3. **Tool plan**: `tool_plan` field explains tool usage reasoning
+4. **Null arguments**: May return `"null"` for parameterless tools (normalize to `"{}"`)
+
+## Thinking/Reasoning
+
+### Request Configuration
+```json
+{
+  "thinking": {
+    "type": "enabled",
+    "token_budget": 2048
+  }
+}
+```
+
+**Parameters:**
+- `type`: `"enabled"` or `"disabled"`
+- `token_budget`: Token budget for thinking
+
+### Response Content Blocks
+
+**Thinking Block** (in response content array):
+```json
+{
+  "type": "thinking",
+  "thinking": "Let me reason through this problem..."
+}
+```
+
+**Note**: Unlike Mistral, Cohere's `thinking` field is a **string**, not an array.
+
+### Response Structure with Thinking
+```json
+{
+  "message": {
+    "role": "assistant",
+    "content": [
+      {"type": "thinking", "thinking": "First, I need to consider..."},
+      {"type": "text", "text": "Based on my analysis..."}
+    ]
+  }
+}
+```
+
+### Streaming Events for Thinking
+```json
+// content-start (thinking)
+{"type": "content-start", "index": 0, "delta": {"message": {"content": {"type": "thinking", "thinking": ""}}}}
+
+// content-delta (thinking)
+{"type": "content-delta", "index": 0, "delta": {"message": {"content": {"thinking": "reasoning chunk..."}}}}
+```
+
+### SDK Conversion
+The AI SDK converts Cohere's thinking blocks to unified format:
+```typescript
+// Cohere response content
+{type: "thinking", thinking: "..."}
+
+// Converted to SDK format
+{type: "reasoning", text: "..."}
+```
+
+### Context Pruning for Thinking
+- Thinking blocks appear in response `content` array
+- No signatures or encryption - content is plaintext string
+- Consider thinking as important context but potentially large
+- Thinking appears before text content in the response
+
+## Complete Example
+
+```json
+{
+  "model": "command-r-plus",
+  "messages": [
+    {"role": "system", "content": "You are a weather assistant."},
+    {"role": "user", "content": "Weather in Paris?"},
+    {
+      "role": "assistant",
+      "content": undefined,
+      "tool_plan": undefined,
+      "tool_calls": [{
+        "id": "call_001",
+        "type": "function",
+        "function": {"name": "get_weather", "arguments": "{\"location\":\"Paris\"}"}
+      }]
+    },
+    {
+      "role": "tool",
+      "tool_call_id": "call_001",
+      "content": "{\"temperature\":18,\"conditions\":\"cloudy\"}"
+    }
+  ],
+  "tools": [{
+    "type": "function",
+    "function": {"name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}
+  }],
+  "max_tokens": 1024,
+  "temperature": 0.7
+}
+```
+
+## Streaming Events
+
+| Event | Purpose |
+|-------|--------|
+| `message-start` | Start of response |
+| `content-start` | Start of text/thinking block |
+| `content-delta` | Text or thinking chunk |
+| `tool-plan-delta` | Tool planning reasoning |
+| `tool-call-start` | Start of tool call |
+| `tool-call-delta` | Tool call arguments chunk |
+| `message-end` | Final with `finish_reason` and `usage` |
+
+## Context Pruning Considerations
+
+1. **Tool correlation**: Uses `tool_call_id` like OpenAI
+2. **Separate tool results**: Each result is a separate message (not grouped)
+3. **Content exclusivity**: When `tool_calls` present, `content` is `undefined`
+4. **Response vs request format**: Response content is array, request is string
+5. **Uppercase tool choice**: Use `"NONE"` and `"REQUIRED"` (not lowercase)
+6. **Paired pruning**: Tool calls and results must be pruned together
+7. **Documents top-level**: RAG documents are separate from messages
diff --git a/docs/providers/google-gemini.md b/docs/providers/google-gemini.md
new file mode 100644
index 0000000..8ab69b1
--- /dev/null
+++ b/docs/providers/google-gemini.md
@@ -0,0 +1,255 @@
+# Google Gemini API Format
+
+Google's Generative AI (Gemini) uses a unique format with **position-based tool correlation** (no tool call IDs).
+
+## Sources
+
+- **AI SDK**: `packages/google/src/convert-to-google-generative-ai-messages.ts`, `packages/google/src/google-generative-ai-language-model.ts`
+- **Schema Conversion**: `packages/google/src/convert-json-schema-to-openapi-schema.ts`
+- **OpenCode Transform**: `src/provider/transform.ts` (schema integer→string enum conversion)
+- **Official Docs**: https://ai.google.dev/api/rest/v1/models/generateContent
+
+## Request Structure
+
+```json
+{
+  "systemInstruction": {
+    "parts": [{"text": "System prompt text"}]
+  },
+  "contents": [
+    {"role": "user", "parts": [...]},
+    {"role": "model", "parts": [...]}
+  ],
+  "generationConfig": {
+    "maxOutputTokens": 1024,
+    "temperature": 0.7,
+    "topK": 40,
+    "topP": 0.95,
+    "responseMimeType": "application/json",
+    "responseSchema": {...}
+  },
+  "tools": [...],
+  "toolConfig": {
+    "functionCallingConfig": {"mode": "AUTO"}
+  }
+}
+```
+
+## Key Differences from OpenAI
+
+| Feature | OpenAI | Gemini |
+|---------|--------|--------|
+| Message container | `messages[]` | `contents[]` |
+| System message | In messages | Top-level `systemInstruction` |
+| Roles | system/user/assistant/tool | user/model only |
+| Tool call IDs | ID-based correlation | **POSITION-BASED** |
+| Tool results | Separate `tool` role | In `user` message as `functionResponse` |
+
+## Message Roles
+
+Only **two roles**: `user` and `model`
+
+| SDK Role | Gemini Role |
+|----------|-------------|
+| `system` | `systemInstruction` (top-level) |
+| `user` | `user` |
+| `assistant` | `model` |
+| `tool` (results) | `user` (with `functionResponse`) |
+
+## Content Parts
+
+### Text Part
+```json
+{"text": "Hello, how are you?"}
+```
+
+### Thinking Part
+```json
+{"text": "Let me think...", "thought": true, "thoughtSignature": "sig-for-caching"}
+```
+
+## Thinking/Reasoning
+
+### Request Configuration
+```json
+{
+  "generationConfig": {
+    "thinkingConfig": {
+      "thinkingBudget": 8192,
+      "includeThoughts": true
+    }
+  }
+}
+```
+
+**Parameters:**
+- `thinkingBudget`: Token budget for thinking
+- `includeThoughts`: Whether to include thinking in response (default true)
+
+### Response Content Parts
+
+**Thinking Part** (in model message):
+```json
+{
+  "text": "Let me reason through this problem...",
+  "thought": true,
+  "thoughtSignature": "signature_for_caching"
+}
+```
+
+**Key fields:**
+- `thought: true` - Marks this part as reasoning content
+- `thoughtSignature` - Optional signature for caching/verification
+
+### Usage Tracking
+```json
+{
+  "usageMetadata": {
+    "promptTokenCount": 100,
+    "candidatesTokenCount": 200,
+    "thoughtsTokenCount": 150
+  }
+}
+```
+
+### SDK Conversion
+The AI SDK converts Gemini's thought parts to unified `reasoning` type:
+```typescript
+// Gemini response part
+{text: "...", thought: true, thoughtSignature: "..."}
+
+// Converted to SDK format
+{type: "reasoning", text: "...", signature: "..."}
+```
+
+### Context Pruning for Thinking
+- **Thought parts are regular text parts** with `thought: true` flag
+- **thoughtSignature** should be preserved if present (used for caching)
+- Thinking parts appear in `model` role messages
+- Consider thinking as important but potentially large context
+
+## Image (inline base64)
+```json
+{"inlineData": {"mimeType": "image/jpeg", "data": "base64-encoded-data"}}
+```
+
+### Image (file URI)
+```json
+{"fileData": {"mimeType": "image/png", "fileUri": "gs://bucket/path/image.png"}}
+```
+
+### Function Call (tool invocation)
+```json
+{"functionCall": {"name": "get_weather", "args": {"location": "Tokyo"}}}
+```
+
+### Function Response (tool result)
+```json
+{"functionResponse": {"name": "get_weather", "response": {"name": "get_weather", "content": "{\"temp\": 22}"}}}
+```
+
+## CRITICAL: Position-Based Tool Correlation
+
+**Gemini does NOT use tool call IDs.** Tool results are correlated by **position/order**.
+
+### Tool Call (model message)
+```json
+{
+  "role": "model",
+  "parts": [
+    {"functionCall": {"name": "get_weather", "args": {"location": "SF"}}},
+    {"functionCall": {"name": "get_time", "args": {"timezone": "PST"}}}
+  ]
+}
+```
+
+### Tool Results (user message) - ORDER MUST MATCH
+```json
+{
+  "role": "user",
+  "parts": [
+    {"functionResponse": {"name": "get_weather", "response": {"name": "get_weather", "content": "72F"}}},
+    {"functionResponse": {"name": "get_time", "response": {"name": "get_time", "content": "2:30 PM"}}}
+  ]
+}
+```
+
+## Tool Definition
+
+```json
+{
+  "tools": [{
+    "functionDeclarations": [{
+      "name": "get_weather",
+      "description": "Get the current weather",
+      "parameters": {
+        "type": "object",
+        "properties": {"location": {"type": "string"}},
+        "required": ["location"]
+      }
+    }]
+  }],
+  "toolConfig": {
+    "functionCallingConfig": {"mode": "AUTO"}
+  }
+}
+```
+
+### Tool Config Modes
+- `AUTO` - Model decides
+- `NONE` - Disable tools
+- `ANY` - Force tool use
+- `ANY` + `allowedFunctionNames` - Force specific tools
+
+### Provider-Defined Tools
+```json
+{"googleSearch": {}},
+{"urlContext": {}},
+{"codeExecution": {}}
+```
+
+## Schema Conversion (JSON Schema to OpenAPI)
+
+Gemini requires **OpenAPI 3.0 schema format**:
+
+| JSON Schema | OpenAPI |
+|-------------|---------|
+| `const: value` | `enum: [value]` |
+| `type: ["string", "null"]` | `anyOf` + `nullable: true` |
+
+## Gemma Model Handling
+
+For `gemma-*` models, system instructions are **prepended to first user message**:
+```json
+{
+  "contents": [{
+    "role": "user",
+    "parts": [{"text": "System prompt\n\nActual user message"}]
+  }]
+}
+```
+
+## Complete Example
+
+```json
+{
+  "systemInstruction": {"parts": [{"text": "You are a weather assistant."}]},
+  "contents": [
+    {"role": "user", "parts": [{"text": "Weather in Tokyo?"}]},
+    {"role": "model", "parts": [{"functionCall": {"name": "get_weather", "args": {"location": "Tokyo"}}}]},
+    {"role": "user", "parts": [{"functionResponse": {"name": "get_weather", "response": {"name": "get_weather", "content": "22C cloudy"}}}]},
+    {"role": "model", "parts": [{"text": "Tokyo is 22C and cloudy."}]}
+  ],
+  "tools": [{"functionDeclarations": [{"name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}]}]
+}
+```
+
+## Context Pruning Considerations
+
+1. **POSITION-BASED CORRELATION**: Tool calls and results must be pruned TOGETHER and order preserved
+2. **No IDs**: Cannot selectively prune individual tool results - entire pairs must go
+3. **System separate**: `systemInstruction` is top-level, typically should NOT be pruned
+4. **Alternation required**: Must maintain alternating `user`/`model` pattern
+5. **Multi-part messages**: Each message can have multiple parts; prune entire messages, not parts
+6. **Tool results are user role**: `functionResponse` parts are in `user` messages
+7. **thoughtSignature**: Used for caching reasoning; preserve if present
diff --git a/docs/providers/mistral.md b/docs/providers/mistral.md
new file mode 100644
index 0000000..3767830
--- /dev/null
+++ b/docs/providers/mistral.md
@@ -0,0 +1,226 @@
+# Mistral API Format
+
+Mistral uses an OpenAI-compatible format but with **strict tool call ID requirements**.
+
+## Sources
+
+- **AI SDK**: `packages/mistral/src/convert-to-mistral-chat-messages.ts`, `packages/mistral/src/mistral-chat-language-model.ts`
+- **OpenCode Transform**: `src/provider/transform.ts` (9-char alphanumeric ID normalization)
+- **Official Docs**: https://docs.mistral.ai/api/#tag/chat
+
+## Request Structure
+
+```json
+{
+  "model": "mistral-large-latest",
+  "messages": [...],
+  "max_tokens": 4096,
+  "temperature": 0.7,
+  "top_p": 1.0,
+  "random_seed": 42,
+  "safe_prompt": false,
+  "stream": false,
+  "response_format": {"type": "json_object"},
+  "tools": [...],
+  "tool_choice": "auto"
+}
+```
+
+## CRITICAL: Tool Call ID Requirement
+
+**Mistral requires tool call IDs to be exactly 9 alphanumeric characters.**
+
+| Valid | Invalid |
+|-------|--------|
+| `abc123xyz` | `call_abc123` (too long, has underscore) |
+| `A1B2C3D4E` | `12345` (too short) |
+| `def456uvw` | `abc-123-xy` (has hyphens) |
+
+## Key Differences from OpenAI
+
+| Feature | OpenAI | Mistral |
+|---------|--------|--------|
+| Tool call ID format | `call_*` (variable) | **Exactly 9 alphanumeric** |
+| Tool choice `required` | `"required"` | `"any"` |
+| User content | String or array | **Always array** |
+| Assistant `prefix` | Not supported | Supported |
+| Stop sequences | Supported | Not supported |
+| Frequency/presence penalty | Supported | Not supported |
+
+## Message Formats
+
+### System Message
+```json
+{"role": "system", "content": "You are a helpful assistant."}
+```
+
+### User Message (always array)
+```json
+{
+  "role": "user",
+  "content": [
+    {"type": "text", "text": "What's in this image?"},
+    {"type": "image_url", "image_url": "https://example.com/image.jpg"},
+    {"type": "document_url", "document_url": "data:application/pdf;base64,..."}
+  ]
+}
+```
+
+### Assistant Message
+```json
+{
+  "role": "assistant",
+  "content": "Here's the analysis...",
+  "prefix": true,
+  "tool_calls": [
+    {
+      "id": "abc123xyz",
+      "type": "function",
+      "function": {
+        "name": "get_weather",
+        "arguments": "{\"location\":\"San Francisco\"}"
+      }
+    }
+  ]
+}
+```
+
+### Tool Result Message
+```json
+{
+  "role": "tool",
+  "name": "get_weather",
+  "tool_call_id": "abc123xyz",
+  "content": "{\"temperature\": 72, \"condition\": \"sunny\"}"
+}
+```
+
+## Tool Definition
+
+```json
+{
+  "tools": [{
+    "type": "function",
+    "function": {
+      "name": "get_weather",
+      "description": "Get weather for a location",
+      "parameters": {
+        "type": "object",
+        "properties": {"location": {"type": "string"}},
+        "required": ["location"]
+      },
+      "strict": true
+    }
+  }],
+  "tool_choice": "auto"
+}
+```
+
+### Tool Choice Options
+- `"auto"` - Model decides
+- `"none"` - Disable tool calling
+- `"any"` - Force tool use (NOT `"required"`)
+- `{"type": "function", "function": {"name": "..."}}` - Force specific tool
+
+## Unique Features
+
+1. **Prefix flag**: `prefix: true` on assistant messages for continuation mode
+2. **PDF support**: Via `document_url` content type with base64
+3. **Thinking mode**: Returns `{"type": "thinking", "thinking": [...]}` content blocks
+
+## Thinking/Reasoning (Magistral Models)
+
+### Response Content Structure
+
+Mistral's reasoning models (Magistral) return thinking in the response content:
+
+**Thinking Block** (in assistant message content):
+```json
+{
+  "type": "thinking",
+  "thinking": [
+    {"type": "text", "text": "Let me reason through this..."}
+  ]
+}
+```
+
+**Note**: The `thinking` field is an **array** of text parts, not a string.
+
+### Streaming Response
+When streaming, content can be a string OR array:
+```json
+{
+  "choices": [{
+    "delta": {
+      "role": "assistant",
+      "content": [
+        {"type": "thinking", "thinking": [{"type": "text", "text": "reasoning..."}]},
+        {"type": "text", "text": "final response"}
+      ]
+    }
+  }]
+}
+```
+
+### SDK Conversion
+The AI SDK extracts and converts Mistral's thinking blocks:
+```typescript
+// Mistral response content
+{type: "thinking", thinking: [{type: "text", text: "..."}]}
+
+// Converted to SDK format
+{type: "reasoning", text: "..."}
+```
+
+### Context Pruning for Thinking
+- Thinking blocks appear as content items in assistant messages
+- The nested `thinking` array contains text parts to concatenate
+- No signatures or encryption - content is plaintext
+- Consider thinking as important context but potentially large
+
+## Complete Example
+
+```json
+{
+  "model": "mistral-large-latest",
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": [{"type": "text", "text": "Weather in NYC?"}]},
+    {
+      "role": "assistant",
+      "content": "",
+      "tool_calls": [{
+        "id": "abc123xyz",
+        "type": "function",
+        "function": {"name": "get_weather", "arguments": "{\"location\":\"New York City\"}"}
+      }]
+    },
+    {
+      "role": "tool",
+      "name": "get_weather",
+      "tool_call_id": "abc123xyz",
+      "content": "{\"temperature\":72,\"condition\":\"sunny\"}"
+    }
+  ],
+  "tools": [{
+    "type": "function",
+    "function": {"name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}
+  }],
+  "tool_choice": "auto"
+}
+```
+
+## Unsupported Features
+
+- `topK`
+- `frequencyPenalty`
+- `presencePenalty`
+- `stopSequences`
+
+## Context Pruning Considerations
+
+1. **9-char alphanumeric IDs**: When generating synthetic tool calls, IDs must be exactly 9 alphanumeric chars
+2. **Tool correlation**: Uses `tool_call_id` like OpenAI
+3. **User content always array**: Even single text becomes `[{"type": "text", "text": "..."}]`
+4. **Tool name in result**: Tool result includes `name` field alongside `tool_call_id`
+5. **Paired pruning**: Tool calls and results must be pruned together
diff --git a/docs/providers/openai-compatible.md b/docs/providers/openai-compatible.md
new file mode 100644
index 0000000..3406248
--- /dev/null
+++ b/docs/providers/openai-compatible.md
@@ -0,0 +1,135 @@
+# OpenAI-Compatible Providers
+
+Most providers in models.dev use the OpenAI Chat Completions format via `@ai-sdk/openai-compatible`. This document covers these providers and any provider-specific quirks.
+
+## Standard OpenAI Chat Completions Format
+
+See [openai.md](./openai.md) for the full format specification.
+
+### Quick Reference
+
+```json
+{
+  "model": "model-name",
+  "messages": [
+    {"role": "system", "content": "..."},
+    {"role": "user", "content": "..."},
+    {"role": "assistant", "content": "...", "tool_calls": [...]},
+    {"role": "tool", "tool_call_id": "...", "content": "..."}
+  ],
+  "tools": [...],
+  "tool_choice": "auto"
+}
+```
+
+## Providers Using OpenAI-Compatible Format
+
+Based on models.dev, these providers use `@ai-sdk/openai-compatible`:
+
+| Provider | Base URL | Notes |
+|----------|----------|-------|
+| together | api.together.xyz | |
+| deepseek | api.deepseek.com | |
+| groq | api.groq.com | Very fast inference |
+| fireworks | api.fireworks.ai | |
+| hyperbolic | api.hyperbolic.xyz | |
+| novita | api.novita.ai | |
+| cerebras | api.cerebras.ai | |
+| sambanova | api.sambanova.ai | |
+| nebius | api.studio.nebius.ai | |
+| chutes | api.chutes.ai | |
+| openrouter | openrouter.ai | Meta-provider |
+| kluster | api.kluster.ai | |
+| glhf | glhf.chat | |
+| scaleway | api.scaleway.ai | |
+| lepton | api.lepton.ai | |
+| nano-gpt | api.nano-gpt.com | |
+| arcee | api.arcee.ai | |
+| inference-net | api.inference.net | |
+| nineteen | api.nineteen.ai | |
+| targon | api.targon.ai | |
+| req-ai | api.req.ai | |
+| vllm | (self-hosted) | |
+| ollama | localhost:11434 | Local models |
+| lmstudio | localhost:1234 | Local models |
+| jan | localhost:1337 | Local models |
+| any-provider | (configurable) | Generic OpenAI-compatible |
+
+## Provider-Specific Quirks
+
+### OpenRouter
+- Acts as a meta-provider routing to various backends
+- May have different caching semantics
+- Supports `cache_control` similar to Anthropic when routing to Claude
+
+### Groq
+- Extremely fast inference
+- Limited model selection
+- May have stricter rate limits
+
+### DeepSeek
+- Supports reasoning models (DeepSeek R1)
+- May include thinking/reasoning in responses
+
+### Ollama / LM Studio / Jan
+- Local inference
+- No rate limits but hardware-dependent
+- May not support all features (vision, tools)
+
+### Together AI
+- Wide model selection
+- Good tool support
+- Supports streaming
+
+## Caching Considerations
+
+Some OpenAI-compatible providers support caching hints:
+
+```json
+{
+  "role": "user",
+  "content": "...",
+  "cache_control": {"type": "ephemeral"}
+}
+```
+
+Supported by:
+- OpenRouter (when routing to Anthropic)
+- Some enterprise deployments
+
+## Vision Support
+
+Not all OpenAI-compatible providers support vision. Check model capabilities:
+
+```json
+{
+  "role": "user",
+  "content": [
+    {"type": "text", "text": "What's in this image?"},
+    {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}
+  ]
+}
+```
+
+## Tool Support
+
+Tool support varies by provider and model. Common limitations:
+- Some models don't support parallel tool calls
+- Some models don't support structured outputs/strict mode
+- Response format (`json_object`) support varies
+
+## Context Pruning Considerations
+
+1. **Standard ID correlation**: All use `tool_call_id` for tool result correlation
+2. **Consistent message format**: Messages follow OpenAI structure
+3. **Feature detection**: May need to check model capabilities at runtime
+4. **Cache support varies**: Not all providers honor cache hints
+5. **Paired pruning**: Tool calls and results must be pruned together
+
+## Detection
+
+OpenAI-compatible requests can be detected by:
+- `body.messages` array present
+- Messages have `role` field with values: `system`, `user`, `assistant`, `tool`
+- Tool results have `tool_call_id` field
+- No special top-level fields like `contents` (Gemini) or `system` array (Bedrock/Anthropic)
diff --git a/docs/providers/openai.md b/docs/providers/openai.md
new file mode 100644
index 0000000..db24be4
--- /dev/null
+++ b/docs/providers/openai.md
@@ -0,0 +1,223 @@
+# OpenAI API Format
+
+OpenAI offers two API formats: **Chat Completions** (original) and **Responses** (newer).
+
+## Sources
+
+- **AI SDK**: `packages/openai/src/chat/openai-chat-language-model.ts`, `packages/openai/src/responses/openai-responses-language-model.ts`
+- **AI SDK OpenAI-Compatible**: `packages/openai-compatible/src/chat/openai-compatible-chat-language-model.ts`
+- **Official Docs**: https://platform.openai.com/docs/api-reference/chat
+- **Responses API**: https://platform.openai.com/docs/api-reference/responses
+
+## Chat Completions API (`/chat/completions`)
+
+### Request Structure
+
+```json
+{
+  "model": "gpt-4o",
+  "messages": [...],
+  "tools": [...],
+  "tool_choice": "auto" | "none" | "required" | {"type": "function", "function": {"name": "..."}},
+  "max_tokens": 4096,
+  "temperature": 0.7,
+  "response_format": {"type": "json_object"} | {"type": "json_schema", "json_schema": {...}},
+  "stream": false
+}
+```
+
+### Message Roles
+
+| Role | Description |
+|------|-------------|
+| `system` | System instructions |
+| `user` | User input |
+| `assistant` | Model responses |
+| `tool` | Tool/function results |
+
+### Message Formats
+
+**System Message:**
+```json
+{"role": "system", "content": "You are a helpful assistant."}
+```
+
+**User Message (multimodal):**
+```json
+{
+  "role": "user",
+  "content": [
+    {"type": "text", "text": "What's in this image?"},
+    {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg", "detail": "auto"}},
+    {"type": "file", "file": {"file_id": "file-abc123"}}
+  ]
+}
+```
+
+**Assistant Message with Tool Calls:**
+```json
+{
+  "role": "assistant",
+  "content": null,
+  "tool_calls": [
+    {
+      "id": "call_abc123",
+      "type": "function",
+      "function": {
+        "name": "get_weather",
+        "arguments": "{\"location\": \"San Francisco\"}"
+      }
+    }
+  ]
+}
+```
+
+**Tool Result Message:**
+```json
+{
+  "role": "tool",
+  "tool_call_id": "call_abc123",
+  "content": "{\"temperature\": 72, \"condition\": \"sunny\"}"
+}
+```
+
+### Tool Definition
+
+```json
+{
+  "type": "function",
+  "function": {
+    "name": "get_weather",
+    "description": "Get the current weather",
+    "parameters": {
+      "type": "object",
+      "properties": {
+        "location": {"type": "string"}
+      },
+      "required": ["location"]
+    },
+    "strict": true
+  }
+}
+```
+
+---
+
+## Responses API (`/responses`)
+
+### Key Differences from Chat Completions
+
+| Feature | Chat Completions | Responses API |
+|---------|-----------------|---------------|
+| Message array | `messages` | `input` |
+| Tool call ID field | `tool_call_id` | `call_id` |
+| System message | In messages | `instructions` field or in input |
+| Token limit | `max_tokens` | `max_output_tokens` |
+| Reasoning | Not supported | `reasoning` config |
+
+### Request Structure
+
+```json
+{
+  "model": "gpt-4o",
+  "input": [...],
+  "instructions": "Optional system instructions",
+  "tools": [...],
+  "tool_choice": "auto" | "none" | "required" | {"type": "function", "name": "..."},
+  "max_output_tokens": 4096,
+  "previous_response_id": "resp_abc123",
+  "reasoning": {
+    "effort": "medium",
+    "summary": "auto"
+  },
+  "stream": false
+}
+```
+
+## Thinking/Reasoning (Responses API only)
+
+### Request Configuration
+```json
+{
+  "reasoning": {
+    "effort": "low" | "medium" | "high",
+    "summary": "auto" | "concise" | "detailed"
+  }
+}
+```
+
+**Parameters:**
+- `effort`: How much reasoning effort (affects token usage)
+- `summary`: How to summarize reasoning in response
+
+**Constraints when reasoning enabled:**
+- `temperature` is **NOT supported** (use default)
+- `topP` is **NOT supported**
+- Only available on reasoning models (o1, o3, etc.)
+
+### Response Output Items
+
+**Reasoning Item** (in output array):
+```json
+{
+  "type": "reasoning",
+  "id": "reasoning_abc123",
+  "encrypted_content": "encrypted_base64_reasoning_content",
+  "summary": [
+    {"type": "summary_text", "text": "I analyzed the problem by..."}
+  ]
+}
+```
+
+**Key fields:**
+- `encrypted_content`: The actual reasoning is encrypted/hidden
+- `summary`: Optional human-readable summary of reasoning
+
+### Usage Tracking
+```json
+{
+  "usage": {
+    "input_tokens": 100,
+    "output_tokens": 200,
+    "output_tokens_details": {
+      "reasoning_tokens": 150
+    }
+  }
+}
+```
+
+### SDK Conversion
+The AI SDK handles reasoning items:
+```typescript
+// OpenAI Responses output
+{type: "reasoning", id: "...", encrypted_content: "...", summary: [...]}
+
+// Kept as reasoning type in SDK
+{type: "reasoning", reasoningId: "...", text: "summary text"}
+```
+
+### Context Pruning for Reasoning
+- **Encrypted content** cannot be inspected or modified
+- **Summaries** provide readable insight into reasoning
+- Reasoning items appear as separate items in `output` array
+- `reasoning_tokens` in usage helps track cost
+
+---
+
+## Context Pruning Considerations
+
+1. **Tool correlation**: Both formats use ID-based correlation (`tool_call_id` or `call_id`)
+2. **Paired pruning**: Tool calls and their results should be pruned together
+3. **Message roles**: 4 distinct roles in Chat Completions; Responses API uses item types
+4. **Content types**: User content is `type: "text"/"image_url"` in Chat, `type: "input_text"/"input_image"` in Responses
+5. **Assistant content**: String in Chat Completions, `output_text` array in Responses
+
+## OpenAI-Compatible Providers
+
+Most providers in models.dev use the OpenAI Chat Completions format via `@ai-sdk/openai-compatible`:
+- together, deepseek, groq, fireworks, hyperbolic, novita, cerebras, sambanova, etc.
+
+These providers accept the same request format but may have different:
+- Supported models
+- Rate limits
+- Feature availability (vision, tool use, etc.)
diff --git a/lib/fetch-wrapper/formats/bedrock.ts b/lib/fetch-wrapper/formats/bedrock.ts
new file mode 100644
index 0000000..26c1ca5
--- /dev/null
+++ b/lib/fetch-wrapper/formats/bedrock.ts
@@ -0,0 +1,148 @@
+import type { FormatDescriptor, ToolOutput } from "../types"
+import type { PluginState } from "../../state"
+import type { Logger } from "../../logger"
+import type { ToolTracker } from "../../api-formats/synth-instruction"
+import { cacheToolParametersFromMessages } from "../../state/tool-cache"
+import { injectSynth, trackNewToolResults } from "../../api-formats/synth-instruction"
+import { injectPrunableList } from "../../api-formats/prunable-list"
+
+/**
+ * Format descriptor for AWS Bedrock Converse API.
+ * 
+ * Bedrock format characteristics:
+ * - Top-level `system` array for system messages
+ * - `messages` array with only 'user' and 'assistant' roles
+ * - `inferenceConfig` for model parameters (maxTokens, temperature, etc.)
+ * - Tool calls: `toolUse` blocks in assistant content with `toolUseId`
+ * - Tool results: `toolResult` blocks in user content with `toolUseId`
+ * - Cache points: `cachePoint` blocks that should be preserved
+ */
+export const bedrockFormat: FormatDescriptor = {
+    name: 'bedrock',
+
+    detect(body: any): boolean {
+        // Bedrock has a top-level system array AND inferenceConfig (not model params in messages)
+        // This distinguishes it from OpenAI/Anthropic which put system in messages
+        return (
+            Array.isArray(body.system) &&
+            body.inferenceConfig !== undefined &&
+            Array.isArray(body.messages)
+        )
+    },
+
+    getDataArray(body: any): any[] | undefined {
+        return body.messages
+    },
+
+    cacheToolParameters(data: any[], state: PluginState, logger?: Logger): void {
+        // Bedrock stores tool calls in assistant message content as toolUse blocks
+        // We need to extract toolUseId and tool name for later correlation
+        for (const m of data) {
+            if (m.role === 'assistant' && Array.isArray(m.content)) {
+                for (const block of m.content) {
+                    if (block.toolUse && block.toolUse.toolUseId) {
+                        const toolUseId = block.toolUse.toolUseId.toLowerCase()
+                        state.toolParameters.set(toolUseId, {
+                            tool: block.toolUse.name,
+                            parameters: block.toolUse.input
+                        })
+                        logger?.debug("bedrock", "Cached tool parameters", {
+                            toolUseId,
+                            toolName: block.toolUse.name
+                        })
+                    }
+                }
+            }
+        }
+        // Also use the generic message caching for any compatible structures
+        cacheToolParametersFromMessages(data, state, logger)
+    },
+
+    injectSynth(data: any[], instruction: string, nudgeText: string): boolean {
+        return injectSynth(data, instruction, nudgeText)
+    },
+
+    trackNewToolResults(data: any[], tracker: ToolTracker, protectedTools: Set<string>): number {
+        return trackNewToolResults(data, tracker, protectedTools)
+    },
+
+    injectPrunableList(data: any[], injection: string): boolean {
+        return injectPrunableList(data, injection)
+    },
+
+    extractToolOutputs(data: any[], state: PluginState): ToolOutput[] {
+        const outputs: ToolOutput[] = []
+
+        for (const m of data) {
+            // Bedrock tool results are in user messages as toolResult blocks
+            if (m.role === 'user' && Array.isArray(m.content)) {
+                for (const block of m.content) {
+                    if (block.toolResult && block.toolResult.toolUseId) {
+                        const toolUseId = block.toolResult.toolUseId.toLowerCase()
+                        const metadata = state.toolParameters.get(toolUseId)
+                        outputs.push({
+                            id: toolUseId,
+                            toolName: metadata?.tool
+                        })
+                    }
+                }
+            }
+        }
+
+        return outputs
+    },
+
+    replaceToolOutput(data: any[], toolId: string, prunedMessage: string, _state: PluginState): boolean {
+        const toolIdLower = toolId.toLowerCase()
+        let replaced = false
+
+        for (let i = 0; i < data.length; i++) {
+            const m = data[i]
+
+            // Tool results are in user messages as toolResult blocks
+            if (m.role === 'user' && Array.isArray(m.content)) {
+                let messageModified = false
+                const newContent = m.content.map((block: any) => {
+                    if (block.toolResult && block.toolResult.toolUseId?.toLowerCase() === toolIdLower) {
+                        messageModified = true
+                        // Replace the content array inside toolResult with pruned message
+                        return {
+                            ...block,
+                            toolResult: {
+                                ...block.toolResult,
+                                content: [{ text: prunedMessage }]
+                            }
+                        }
+                    }
+                    return block
+                })
+                if (messageModified) {
+                    data[i] = { ...m, content: newContent }
+                    replaced = true
+                }
+            }
+        }
+
+        return replaced
+    },
+
+    hasToolOutputs(data: any[]): boolean {
+        for (const m of data) {
+            if (m.role === 'user' && Array.isArray(m.content)) {
+                for (const block of m.content) {
+                    if (block.toolResult) return true
+                }
+            }
+        }
+        return false
+    },
+
+    getLogMetadata(data: any[], replacedCount: number, inputUrl: string): Record<string, any> {
+        return {
+            url: inputUrl,
+            replacedCount,
+            totalMessages: data.length,
+            format: 'bedrock'
+        }
+    }
+}
diff --git a/lib/fetch-wrapper/formats/index.ts b/lib/fetch-wrapper/formats/index.ts
index 0132c87..0e01388 100644
--- a/lib/fetch-wrapper/formats/index.ts
+++ b/lib/fetch-wrapper/formats/index.ts
@@ -1,3 +1,4 @@
 export { openaiChatFormat } from './openai-chat'
 export { openaiResponsesFormat } from './openai-responses'
 export { geminiFormat } from './gemini'
+export { bedrockFormat } from './bedrock'
diff --git a/lib/fetch-wrapper/index.ts b/lib/fetch-wrapper/index.ts
index abcf5ad..1c14444 100644
--- a/lib/fetch-wrapper/index.ts
+++ b/lib/fetch-wrapper/index.ts
@@ -3,7 +3,7 @@ import type { Logger } from "../logger"
 import type { FetchHandlerContext, SynthPrompts } from "./types"
 import type { ToolTracker } from "../api-formats/synth-instruction"
 import type { PluginConfig } from "../config"
-import { openaiChatFormat, openaiResponsesFormat, geminiFormat } from "./formats"
+import { openaiChatFormat, openaiResponsesFormat, geminiFormat, bedrockFormat } from "./formats"
 import { handleFormat } from "./handler"
 import { runStrategies } from "../core/strategies"
 import { accumulateGCStats } from "./gc-tracker"
@@ -15,11 +15,12 @@ export type { FetchHandlerContext, FetchHandlerResult, SynthPrompts } from "./ty
  * Creates a wrapped global fetch that intercepts API calls and performs
  * context pruning on tool outputs that have been marked for removal.
  * 
- * Supports four API formats:
+ * Supports five API formats:
  * 1. OpenAI Chat Completions (body.messages with role='tool')
  * 2. Anthropic (body.messages with role='user' containing tool_result)
  * 3. Google/Gemini (body.contents with functionResponse parts)
  * 4. OpenAI Responses API (body.input with function_call_output items)
+ * 5. AWS Bedrock Converse API (body.system + body.messages with toolResult blocks)
  */
 export function installFetchWrapper(
     state: PluginState,
@@ -57,12 +58,20 @@ export function installFetchWrapper(
                 const toolIdsBefore = new Set(state.toolParameters.keys())
 
                 // Mutually exclusive format handlers
+                // Note: bedrockFormat must be checked before openaiChatFormat since both have messages[]
+                // but Bedrock has distinguishing system[] array and inferenceConfig
                 if (openaiResponsesFormat.detect(body)) {
                     const result = await handleFormat(body, ctx, inputUrl, openaiResponsesFormat)
                     if (result.modified) {
                         modified = true
                     }
                 }
+                else if (bedrockFormat.detect(body)) {
+                    const result = await handleFormat(body, ctx, inputUrl, bedrockFormat)
+                    if (result.modified) {
+                        modified = true
+                    }
+                }
                 else if (openaiChatFormat.detect(body)) {
                     const result = await handleFormat(body, ctx, inputUrl, openaiChatFormat)
                     if (result.modified) {