diff --git a/.gitignore b/.gitignore index 0688b83..e9287ec 100644 --- a/.gitignore +++ b/.gitignore @@ -31,5 +31,3 @@ Thumbs.db # Tests (local development only) tests/ -# Development notes -notes/ diff --git a/.npmignore b/.npmignore index cb4b50d..c1b05f1 100644 --- a/.npmignore +++ b/.npmignore @@ -10,6 +10,8 @@ bun.lock # Documentation ANALYSIS.md +docs/ +notes/ # Source files (since we're shipping dist/) index.ts diff --git a/docs/providers/README.md b/docs/providers/README.md new file mode 100644 index 0000000..29af628 --- /dev/null +++ b/docs/providers/README.md @@ -0,0 +1,339 @@ +# Provider API Formats Reference + +This directory contains documentation for each AI provider's API format, designed to help the context pruning plugin implement provider-specific logic. + +## Sources + +All information in these docs was gathered from: + +### Primary Sources + +| Source | Location | Description | +|--------|----------|-------------| +| **Vercel AI SDK** | https://github.com/vercel/ai | Provider conversion logic in `packages/{provider}/src/` | +| **OpenCode Source** | `/packages/opencode/src/provider/` | Custom transforms and provider loading | +| **models.dev API** | https://models.dev/api.json | Authoritative provider list with npm packages | + +### Key AI SDK Files + +| Provider | Conversion File | +|----------|-----------------| +| OpenAI | `packages/openai/src/chat/openai-chat-language-model.ts`, `packages/openai/src/responses/openai-responses-language-model.ts` | +| OpenAI-Compatible | `packages/openai-compatible/src/chat/openai-compatible-chat-language-model.ts` | +| Anthropic | `packages/anthropic/src/convert-to-anthropic-messages-prompt.ts`, `packages/anthropic/src/anthropic-messages-language-model.ts` | +| Google | `packages/google/src/convert-to-google-generative-ai-messages.ts`, `packages/google/src/google-generative-ai-language-model.ts` | +| AWS Bedrock | `packages/amazon-bedrock/src/convert-to-bedrock-chat-messages.ts`, `packages/amazon-bedrock/src/bedrock-chat-language-model.ts` | +| Mistral | `packages/mistral/src/convert-to-mistral-chat-messages.ts`, `packages/mistral/src/mistral-chat-language-model.ts` | +| Cohere | `packages/cohere/src/convert-to-cohere-chat-prompt.ts`, `packages/cohere/src/cohere-chat-language-model.ts` | + +### OpenCode Custom Transform Files + +| File | Purpose | +|------|---------| +| `src/provider/transform.ts` | Provider-specific message normalization, caching hints, schema transforms | +| `src/provider/provider.ts` | Provider loading, custom loaders, SDK instantiation | +| `src/provider/models.ts` | Model database schema, models.dev integration | +| `src/session/message-v2.ts` | Internal message structure, `toModelMessage()` conversion | + +### Official API Documentation + +| Provider | Documentation URL | +|----------|-------------------| +| OpenAI | https://platform.openai.com/docs/api-reference | +| Anthropic | https://docs.anthropic.com/en/api | +| Google Gemini | https://ai.google.dev/api/rest | +| AWS Bedrock | https://docs.aws.amazon.com/bedrock/latest/APIReference/ | +| Mistral | https://docs.mistral.ai/api/ | +| Cohere | https://docs.cohere.com/reference/chat | + +--- + +## Format Categories + +Providers fall into several format categories based on their API structure: + +### 1. OpenAI Chat Completions Format +**Most common format - used by ~60 providers** + +Key identifiers: +- `body.messages[]` array +- Tool results: `role: "tool"`, `tool_call_id` +- System in messages array + +Providers: openai, together, deepseek, groq, fireworks, hyperbolic, novita, cerebras, sambanova, perplexity, openrouter, and most others + +### 2. OpenAI Responses Format (newer) +**Used by OpenAI GPT models via responses API** + +Key identifiers: +- `body.input[]` array +- Tool results: `type: "function_call_output"`, `call_id` + +Providers: openai (responses endpoint), azure (responses endpoint) + +### 3. Anthropic Format +**Distinct format with cache control** + +Key identifiers: +- `body.messages[]` but tool results in user messages +- Tool results: `type: "tool_result"`, `tool_use_id` +- Top-level `system` array +- `cache_control` support + +Providers: anthropic + +### 4. Google Gemini Format +**Position-based tool correlation** + +Key identifiers: +- `body.contents[]` array +- Tool results: `functionResponse` parts (no IDs!) +- Roles: `user`/`model` only +- Top-level `systemInstruction` + +Providers: google, google-vertex + +### 5. AWS Bedrock Format +**Converse API with cache points** + +Key identifiers: +- Top-level `system` array +- Tool results: `toolResult` blocks with `toolUseId` +- `cachePoint` blocks + +Providers: amazon-bedrock + +### 6. Mistral Format (OpenAI-like with quirks) +**Strict ID requirements** + +Key identifiers: +- OpenAI-like but 9-char alphanumeric tool IDs required +- User content always array + +Providers: mistral + +### 7. Cohere Format +**RAG-native with citations** + +Key identifiers: +- Uses `p`/`k` instead of `top_p`/`top_k` +- Uppercase tool choice values +- `documents` array for RAG + +Providers: cohere + +## Quick Reference: Thinking/Reasoning + +| Format | Request Config | Response Structure | Encrypted? | Signature? | +|--------|---------------|-------------------|------------|------------| +| OpenAI Responses | `reasoning: {effort, summary}` | `{type: "reasoning", encrypted_content, summary}` | Yes | No | +| Anthropic | `thinking: {type, budget_tokens}` | `{type: "thinking", thinking, signature}` | Partial* | Yes | +| Google Gemini | `thinkingConfig: {thinkingBudget}` | `{text, thought: true, thoughtSignature}` | No | Optional | +| AWS Bedrock | `additionalModelRequestFields.thinking` | `{reasoningContent: {reasoningText/redactedReasoning}}` | Partial* | Yes | +| Mistral | N/A (model decides) | `{type: "thinking", thinking: [{type: "text", text}]}` | No | No | +| Cohere | `thinking: {type, token_budget}` | `{type: "thinking", thinking: "..."}` | No | No | + +*Partial = has both visible (`thinking`/`reasoningText`) and redacted (`redacted_thinking`/`redactedReasoning`) variants + +**Key differences:** +- **OpenAI**: Reasoning is always encrypted; only summary is readable +- **Anthropic/Bedrock**: Can have visible thinking with signature, or redacted thinking +- **Gemini**: Thinking is a text part with `thought: true` flag +- **Mistral**: Thinking is nested array of text parts +- **Cohere**: Thinking is plain string + +**SDK normalization**: All formats are converted to `{type: "reasoning", text: "..."}` by the AI SDK + +## Quick Reference: Tool Call ID Fields + +| Format | Tool Call ID Field | Tool Result ID Field | +|--------|-------------------|---------------------| +| OpenAI Chat | `tool_calls[].id` | `tool_call_id` | +| OpenAI Responses | `call_id` | `call_id` | +| Anthropic | `tool_use.id` | `tool_use_id` | +| Gemini | **NONE (position-based)** | **NONE** | +| Bedrock | `toolUse.toolUseId` | `toolResult.toolUseId` | +| Mistral | `tool_calls[].id` (9-char) | `tool_call_id` | +| Cohere | `tool_calls[].id` | `tool_call_id` | + +## Detection Strategy + +To detect which format a request uses: + +```typescript +function detectFormat(body: unknown): string { + if (body.input && Array.isArray(body.input)) return 'openai-responses' + if (body.contents && Array.isArray(body.contents)) return 'gemini' + if (body.system && Array.isArray(body.system) && body.inferenceConfig) return 'bedrock' + if (body.messages) { + // Check first message structure for Anthropic vs OpenAI + const msg = body.messages[0] + if (msg?.content?.[0]?.type === 'tool_result') return 'anthropic' + if (msg?.content?.[0]?.tool_use_id) return 'anthropic' + } + return 'openai-chat' // Default +} +``` + +## Files + +- [openai.md](./openai.md) - OpenAI Chat Completions & Responses API +- [anthropic.md](./anthropic.md) - Anthropic Messages API +- [google-gemini.md](./google-gemini.md) - Google Generative AI (Gemini) +- [aws-bedrock.md](./aws-bedrock.md) - AWS Bedrock Converse API +- [mistral.md](./mistral.md) - Mistral API +- [cohere.md](./cohere.md) - Cohere Chat API +- [openai-compatible.md](./openai-compatible.md) - OpenAI-compatible providers + +## Context Pruning Universal Rules + +1. **Tool call/result pairing**: Always prune tool calls and their results together +2. **Message alternation**: Most APIs expect alternating user/assistant messages +3. **System preservation**: System messages typically should not be pruned +4. **ID correlation**: Maintain ID relationships when pruning (except Gemini which is position-based) +5. **Cache markers**: Consider preserving cache control markers when present + +--- + +## Complete Provider List (models.dev) + +Every provider from models.dev and its API format: + +### OpenAI Chat Format (43 providers) +*Uses `@ai-sdk/openai-compatible` - standard OpenAI messages format* + +| Provider ID | Name | Notes | +|-------------|------|-------| +| `agentrouter` | AgentRouter | | +| `alibaba` | Alibaba | | +| `alibaba-cn` | Alibaba (China) | | +| `bailing` | Bailing | | +| `baseten` | Baseten | | +| `chutes` | Chutes | | +| `cortecs` | Cortecs | | +| `deepseek` | DeepSeek | Reasoning models (R1) | +| `fastrouter` | FastRouter | | +| `fireworks-ai` | Fireworks AI | | +| `github-copilot` | GitHub Copilot | | +| `github-models` | GitHub Models | | +| `huggingface` | Hugging Face | | +| `iflowcn` | iFlow | | +| `inception` | Inception | | +| `inference` | Inference | | +| `io-net` | IO.NET | | +| `llama` | Llama | | +| `lmstudio` | LMStudio | Local inference | +| `lucidquery` | LucidQuery AI | | +| `modelscope` | ModelScope | | +| `moonshotai` | Moonshot AI | | +| `moonshotai-cn` | Moonshot AI (China) | | +| `morph` | Morph | | +| `nebius` | Nebius Token Factory | | +| `nvidia` | Nvidia | | +| `opencode` | OpenCode Zen | | +| `openrouter` | OpenRouter | Meta-provider, cache support | +| `ovhcloud` | OVHcloud AI Endpoints | | +| `poe` | Poe | | +| `requesty` | Requesty | | +| `scaleway` | Scaleway | | +| `siliconflow` | SiliconFlow | | +| `submodel` | submodel | | +| `synthetic` | Synthetic | | +| `upstage` | Upstage | | +| `venice` | Venice AI | | +| `vultr` | Vultr | | +| `wandb` | Weights & Biases | | +| `zai` | Z.AI | | +| `zai-coding-plan` | Z.AI Coding Plan | | +| `zenmux` | ZenMux | | +| `zhipuai` | Zhipu AI | | +| `zhipuai-coding-plan` | Zhipu AI Coding Plan | | + +### OpenAI Native Format (1 provider) +*Uses `@ai-sdk/openai` - supports both Chat Completions and Responses API* + +| Provider ID | Name | Notes | +|-------------|------|-------| +| `openai` | OpenAI | Responses API for GPT-4.1+ | + +### Azure Format (2 providers) +*Uses `@ai-sdk/azure` - OpenAI format with Azure auth* + +| Provider ID | Name | Notes | +|-------------|------|-------| +| `azure` | Azure | Supports Responses API | +| `azure-cognitive-services` | Azure Cognitive Services | | + +### Anthropic Format (4 providers) +*Uses `@ai-sdk/anthropic` - distinct message format with cache control* + +| Provider ID | Name | Notes | +|-------------|------|-------| +| `anthropic` | Anthropic | Native Anthropic API | +| `kimi-for-coding` | Kimi For Coding | Uses Anthropic format | +| `minimax` | MiniMax | Uses Anthropic format | +| `minimax-cn` | MiniMax (China) | Uses Anthropic format | + +### Google Gemini Format (3 providers) +*Uses `@ai-sdk/google` or `@ai-sdk/google-vertex` - POSITION-BASED tool correlation* + +| Provider ID | Name | Notes | +|-------------|------|-------| +| `google` | Google | Native Gemini API | +| `google-vertex` | Vertex | Google Cloud Vertex AI | +| `google-vertex-anthropic` | Vertex (Anthropic) | Claude via Vertex | + +### AWS Bedrock Format (1 provider) +*Uses `@ai-sdk/amazon-bedrock` - Converse API with cachePoint* + +| Provider ID | Name | Notes | +|-------------|------|-------| +| `amazon-bedrock` | Amazon Bedrock | Multi-model, cachePoint support | + +### Mistral Format (1 provider) +*Uses `@ai-sdk/mistral` - requires 9-char alphanumeric tool IDs* + +| Provider ID | Name | Notes | +|-------------|------|-------| +| `mistral` | Mistral | Strict tool ID format | + +### Cohere Format (1 provider) +*Uses `@ai-sdk/cohere` - RAG-native with citations* + +| Provider ID | Name | Notes | +|-------------|------|-------| +| `cohere` | Cohere | Uses `p`/`k`, uppercase tool choice | + +### Specialized SDK Providers (13 providers) +*Use provider-specific SDKs but follow OpenAI-like format* + +| Provider ID | Name | SDK | Format | +|-------------|------|-----|--------| +| `cerebras` | Cerebras | `@ai-sdk/cerebras` | OpenAI-like | +| `deepinfra` | Deep Infra | `@ai-sdk/deepinfra` | OpenAI-like | +| `groq` | Groq | `@ai-sdk/groq` | OpenAI-like | +| `perplexity` | Perplexity | `@ai-sdk/perplexity` | OpenAI-like | +| `togetherai` | Together AI | `@ai-sdk/togetherai` | OpenAI-like | +| `xai` | xAI | `@ai-sdk/xai` | OpenAI-like | +| `vercel` | Vercel AI Gateway | `@ai-sdk/gateway` | OpenAI-like | +| `v0` | v0 | `@ai-sdk/vercel` | OpenAI-like | +| `cloudflare-workers-ai` | Cloudflare Workers AI | `workers-ai-provider` | OpenAI-like | +| `ollama-cloud` | Ollama Cloud | `ai-sdk-ollama` | OpenAI-like | +| `aihubmix` | AIHubMix | `@aihubmix/ai-sdk-provider` | OpenAI-like | +| `sap-ai-core` | SAP AI Core | `@mymediset/sap-ai-provider` | OpenAI-like | + +--- + +## Format Summary + +| Format | Provider Count | Tool ID Field | Key Identifier | +|--------|---------------|---------------|----------------| +| OpenAI Chat | 56 | `tool_call_id` | `body.messages[]` | +| OpenAI Responses | 2 | `call_id` | `body.input[]` | +| Anthropic | 4 | `tool_use_id` | `tool_result` in user msg | +| Google Gemini | 3 | **NONE** | `body.contents[]` | +| AWS Bedrock | 1 | `toolUseId` | `body.inferenceConfig` | +| Mistral | 1 | `tool_call_id` (9-char) | Check provider ID | +| Cohere | 1 | `tool_call_id` | Check provider ID | + +**Total: 69 providers** diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md new file mode 100644 index 0000000..d1610fa --- /dev/null +++ b/docs/providers/anthropic.md @@ -0,0 +1,216 @@ +# Anthropic Messages API Format + +Anthropic uses a distinct message format with unique features like cache control and extended thinking. + +## Sources + +- **AI SDK**: `packages/anthropic/src/convert-to-anthropic-messages-prompt.ts`, `packages/anthropic/src/anthropic-messages-language-model.ts` +- **OpenCode Transform**: `src/provider/transform.ts` (toolCallId sanitization, cache control) +- **Official Docs**: https://docs.anthropic.com/en/api/messages + +## Request Structure + +```json +{ + "model": "claude-sonnet-4-5", + "max_tokens": 4096, + "temperature": 1.0, + "stream": true, + "system": [ + {"type": "text", "text": "System instructions", "cache_control": {"type": "ephemeral"}} + ], + "messages": [...], + "tools": [...], + "tool_choice": {"type": "auto"}, + "thinking": {"type": "enabled", "budget_tokens": 10000} +} +``` + +## Key Differences from OpenAI + +| Feature | OpenAI | Anthropic | +|---------|--------|-----------| +| System message | In messages array | Top-level `system` array | +| Tool results | `role: "tool"` message | In `user` message with `type: "tool_result"` | +| Tool call ID field | `tool_call_id` | `tool_use_id` | +| Caching | Not available | `cache_control` on content blocks | + +## Message Roles + +Only **two roles**: `user` and `assistant`. Tool results are embedded in user messages. + +## Message Formats + +### System Message (top-level, not in messages) +```json +{ + "system": [ + { + "type": "text", + "text": "You are a helpful assistant.", + "cache_control": {"type": "ephemeral"} + } + ] +} +``` + +### User Message +```json +{ + "role": "user", + "content": [ + {"type": "text", "text": "Hello", "cache_control": {"type": "ephemeral"}}, + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": "..."}}, + {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": "..."}, "title": "Doc"} + ] +} +``` + +### Assistant Message with Tool Use +```json +{ + "role": "assistant", + "content": [ + {"type": "text", "text": "Let me check the weather."}, + { + "type": "tool_use", + "id": "toolu_01XYZ", + "name": "get_weather", + "input": {"location": "San Francisco"}, + "cache_control": {"type": "ephemeral"} + } + ] +} +``` + +### Tool Result (in user message) +```json +{ + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_01XYZ", + "content": "72°F and sunny", + "is_error": false, + "cache_control": {"type": "ephemeral"} + } + ] +} +``` + +## Thinking/Reasoning (Extended Thinking) + +### Request Configuration +```json +{ + "thinking": { + "type": "enabled", + "budget_tokens": 10000 + } +} +``` + +**Parameters:** +- `type`: `"enabled"` or `"disabled"` +- `budget_tokens`: Token budget for thinking (minimum 1024) + +**Constraints when thinking enabled:** +- `temperature`, `topK`, `topP` are **NOT supported** (ignored with warnings) +- `max_tokens` is automatically adjusted to include `budget_tokens` +- Minimum budget is 1,024 tokens + +### Response Content Blocks + +**Thinking Block** (visible reasoning): +```json +{ + "type": "thinking", + "thinking": "Let me analyze this step by step...", + "signature": "cryptographic_signature_for_verification" +} +``` + +**Redacted Thinking Block** (hidden reasoning): +```json +{ + "type": "redacted_thinking", + "data": "encrypted_base64_redacted_content" +} +``` + +### Streaming Deltas +```json +{"type": "thinking_delta", "thinking": "reasoning chunk..."} +{"type": "signature_delta", "signature": "sig_chunk"} +``` + +### SDK Conversion +The AI SDK converts Anthropic's `thinking` blocks to a unified `reasoning` type: +```typescript +// Anthropic response +{type: "thinking", thinking: "...", signature: "..."} + +// Converted to SDK format +{type: "reasoning", text: "...", signature: "..."} +``` + +### Context Pruning for Thinking +- **Cannot apply cache_control** to thinking or redacted_thinking blocks +- **Signatures are cryptographic** - preserve for verification if replaying +- **Redacted thinking** contains encrypted content that cannot be inspected +- Consider thinking blocks as important context but potentially large + +## Tool Definition + +```json +{ + "name": "get_weather", + "description": "Get weather for a location", + "input_schema": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"] + }, + "cache_control": {"type": "ephemeral"} +} +``` + +### Tool Choice Options +- `{"type": "auto"}` - Model decides +- `{"type": "any"}` - Force tool use +- `{"type": "tool", "name": "get_weather"}` - Force specific tool + +## Cache Control + +```json +{"type": "ephemeral", "ttl": "5m"} +``` + +**Limits**: Maximum **4 cache breakpoints** per request + +**Applicable to**: system messages, user/assistant content parts, tool results, tool definitions + +**NOT applicable to**: `thinking` blocks, `redacted_thinking` blocks + +## Special Tool Types + +**Server Tool Use** (provider-executed): +```json +{"type": "server_tool_use", "id": "...", "name": "web_search", "input": {...}} +``` +Names: `web_fetch`, `web_search`, `code_execution`, `bash_code_execution`, `text_editor_code_execution` + +**MCP Tool Use**: +```json +{"type": "mcp_tool_use", "id": "...", "name": "custom_tool", "server_name": "my-mcp-server", "input": {...}} +``` + +## Context Pruning Considerations + +1. **Tool correlation**: Uses `tool_use_id` (not `tool_call_id`) +2. **Tool results in user messages**: Unlike OpenAI, tool results are `content` parts in user messages +3. **Message merging**: Consecutive user messages are merged; consecutive assistant messages are merged +4. **Cache breakpoints**: Preserve `cache_control` markers when possible (max 4) +5. **Thinking blocks**: Have signatures for verification; handle with care +6. **Paired pruning**: `tool_use` and corresponding `tool_result` must be pruned together diff --git a/docs/providers/aws-bedrock.md b/docs/providers/aws-bedrock.md new file mode 100644 index 0000000..f1c4479 --- /dev/null +++ b/docs/providers/aws-bedrock.md @@ -0,0 +1,287 @@ +# AWS Bedrock API Format + +AWS Bedrock uses the Converse API with unique content block types and caching via `cachePoint`. + +## Sources + +- **AI SDK**: `packages/amazon-bedrock/src/convert-to-bedrock-chat-messages.ts`, `packages/amazon-bedrock/src/bedrock-chat-language-model.ts` +- **OpenCode Transform**: `src/provider/transform.ts` (cachePoint insertion) +- **Official Docs**: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html + +## Request Structure + +```json +{ + "system": [ + {"text": "System message"}, + {"cachePoint": {"type": "default"}} + ], + "messages": [ + {"role": "user", "content": [...]}, + {"role": "assistant", "content": [...]} + ], + "inferenceConfig": { + "maxTokens": 4096, + "temperature": 0.7, + "topP": 0.9, + "topK": 50, + "stopSequences": ["END"] + }, + "toolConfig": { + "tools": [...], + "toolChoice": {"auto": {}} + }, + "additionalModelRequestFields": { + "thinking": {"type": "enabled", "budget_tokens": 10000} + } +} +``` + +## Key Differences from OpenAI + +| Feature | OpenAI | Bedrock | +|---------|--------|--------| +| System message | In messages | Top-level `system` array | +| Tool calls | `tool_calls` array | `toolUse` content block | +| Tool results | `role: "tool"` | `toolResult` in user content | +| Tool call ID | `tool_call_id` | `toolUseId` | +| Caching | Not available | `cachePoint` blocks | + +## Message Roles + +Only **two roles**: `user` and `assistant`. Tool results go in user messages. + +## Content Block Types + +### Text Block +```json +{"text": "Hello, how can I help?"} +``` + +### Image Block +```json +{ + "image": { + "format": "jpeg", + "source": {"bytes": ""} + } +} +``` +Formats: `jpeg`, `png`, `gif`, `webp` + +### Document Block +```json +{ + "document": { + "format": "pdf", + "name": "document-1", + "source": {"bytes": ""}, + "citations": {"enabled": true} + } +} +``` +Formats: `pdf`, `csv`, `doc`, `docx`, `xls`, `xlsx`, `html`, `txt`, `md` + +### Tool Use Block (Assistant calling tool) +```json +{ + "toolUse": { + "toolUseId": "tool_call_123", + "name": "get_weather", + "input": {"city": "Seattle"} + } +} +``` + +### Tool Result Block (User providing result) +```json +{ + "toolResult": { + "toolUseId": "tool_call_123", + "content": [ + {"text": "Temperature: 72F"}, + {"image": {"format": "png", "source": {"bytes": "..."}}} + ] + } +} +``` + +### Reasoning Block (Anthropic models) +```json +{ + "reasoningContent": { + "reasoningText": { + "text": "Let me think through this...", + "signature": "" + } + } +} +``` + +## Thinking/Reasoning (Anthropic Models via Bedrock) + +### Request Configuration +```json +{ + "additionalModelRequestFields": { + "thinking": { + "type": "enabled", + "budget_tokens": 10000 + } + } +} +``` + +**Note**: Bedrock uses `reasoningConfig` in the SDK which gets transformed to Anthropic's `thinking` format in `additionalModelRequestFields`. + +**Parameters:** +- `type`: `"enabled"` or `"disabled"` +- `budget_tokens`: Token budget for thinking (minimum 1024) + +### Response Content Blocks + +**Reasoning Text Block** (visible reasoning): +```json +{ + "reasoningContent": { + "reasoningText": { + "text": "Let me analyze this step by step...", + "signature": "cryptographic_signature_for_verification" + } + } +} +``` + +**Redacted Reasoning Block** (hidden reasoning): +```json +{ + "reasoningContent": { + "redactedReasoning": { + "data": "encrypted_base64_redacted_content" + } + } +} +``` + +### SDK Conversion +The AI SDK converts Bedrock's reasoning blocks to unified format: +```typescript +// Bedrock response +{reasoningContent: {reasoningText: {text: "...", signature: "..."}}} + +// Converted to SDK format +{type: "reasoning", text: "...", signature: "..."} + +// Redacted version +{reasoningContent: {redactedReasoning: {data: "..."}}} + +// Converted to SDK format +{type: "redacted-reasoning", data: "..."} +``` + +### Context Pruning for Reasoning +- **Signatures are cryptographic** - preserve for verification +- **Redacted reasoning** contains encrypted content that cannot be inspected +- Reasoning blocks appear in assistant message content +- Consider reasoning as important but potentially large context + +### Cache Point +```json +{"cachePoint": {"type": "default"}} +``` + +## Caching Mechanism + +Cache points can be inserted at: +1. In system messages - After each system message +2. In user message content - After content blocks +3. In assistant message content - After content blocks +4. In tool configuration - After tool definitions + +## Tool Definition + +```json +{ + "tools": [ + { + "toolSpec": { + "name": "get_weather", + "description": "Get weather for a city", + "inputSchema": { + "json": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"] + } + } + } + }, + {"cachePoint": {"type": "default"}} + ], + "toolChoice": {"auto": {}} +} +``` + +### Tool Choice Options +- `{"auto": {}}` - Model decides +- `{"any": {}}` - Force tool use (maps to "required") +- `{"tool": {"name": "tool_name"}}` - Force specific tool + +## Complete Example + +```json +{ + "system": [ + {"text": "You are a helpful assistant."}, + {"cachePoint": {"type": "default"}} + ], + "messages": [ + { + "role": "user", + "content": [{"text": "What's the weather in Seattle?"}] + }, + { + "role": "assistant", + "content": [{ + "toolUse": { + "toolUseId": "call_001", + "name": "get_weather", + "input": {"city": "Seattle"} + } + }] + }, + { + "role": "user", + "content": [ + { + "toolResult": { + "toolUseId": "call_001", + "content": [{"text": "{\"temperature\": 72, \"condition\": \"sunny\"}"}] + } + }, + {"cachePoint": {"type": "default"}} + ] + } + ], + "toolConfig": { + "tools": [{"toolSpec": {"name": "get_weather", "description": "Get weather", "inputSchema": {"json": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}}}], + "toolChoice": {"auto": {}} + } +} +``` + +## Unique Behaviors + +1. **Trailing whitespace trimming**: Last text block in assistant messages is trimmed +2. **Empty text blocks skipped**: Whitespace-only text blocks are filtered +3. **Temperature clamping**: Clamped to [0, 1] range +4. **Tool content filtering**: If no tools available, tool content is removed with warning + +## Context Pruning Considerations + +1. **Tool correlation**: Uses `toolUseId` for correlation +2. **Tool results in user messages**: `toolResult` blocks are in user message content +3. **Message grouping**: Consecutive same-role messages are merged +4. **Cache points**: Preserve `cachePoint` markers when beneficial +5. **Paired pruning**: `toolUse` and corresponding `toolResult` must be pruned together +6. **System first**: System messages must come before user/assistant messages diff --git a/docs/providers/cohere.md b/docs/providers/cohere.md new file mode 100644 index 0000000..a1927fb --- /dev/null +++ b/docs/providers/cohere.md @@ -0,0 +1,282 @@ +# Cohere API Format + +Cohere uses a chat-based API with unique features like built-in RAG via `documents` and citations. + +## Request Structure + +```json +{ + "model": "command-r-plus", + "messages": [...], + "max_tokens": 4096, + "temperature": 0.7, + "p": 0.9, + "k": 40, + "frequency_penalty": 0.0, + "presence_penalty": 0.0, + "seed": 12345, + "stop_sequences": ["END"], + "response_format": {"type": "json_object"}, + "tools": [...], + "tool_choice": "REQUIRED", + "documents": [...], + "thinking": {"type": "enabled", "token_budget": 2048} +} +``` + +## Key Differences from OpenAI + +| Feature | OpenAI | Cohere | +|---------|--------|-------| +| Top-p parameter | `top_p` | `p` | +| Top-k parameter | `top_k` | `k` | +| Tool choice required | `"required"` | `"REQUIRED"` (uppercase) | +| RAG | Not built-in | `documents` array | +| Citations | Not built-in | Automatic with documents | + +## Message Formats + +### System Message +```json +{"role": "system", "content": "You are a helpful assistant."} +``` + +### User Message (text only) +```json +{"role": "user", "content": "What is the weather today?"} +``` +**Note**: Files/documents are extracted to top-level `documents` array for RAG. + +### Assistant Message +```json +{ + "role": "assistant", + "content": "The weather is sunny.", + "tool_plan": undefined, + "tool_calls": undefined +} +``` + +### Assistant Message with Tool Calls +```json +{ + "role": "assistant", + "content": undefined, + "tool_plan": undefined, + "tool_calls": [{ + "id": "call_abc123", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"location\": \"San Francisco\"}" + } + }] +} +``` +**Key quirk**: When `tool_calls` present, `content` is `undefined`. + +### Tool Result Message +```json +{ + "role": "tool", + "tool_call_id": "call_abc123", + "content": "{\"temperature\": 72, \"conditions\": \"sunny\"}" +} +``` + +## Tool Definition + +```json +{ + "tools": [{ + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather for a location", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"] + } + } + }], + "tool_choice": "REQUIRED" +} +``` + +### Tool Choice Values (UPPERCASE) +- `undefined` - Auto (model decides) +- `"NONE"` - Disable tool use +- `"REQUIRED"` - Force tool use + +**Note**: To force a specific tool, filter `tools` array and set `tool_choice: "REQUIRED"`. + +## RAG via Documents + +```json +{ + "documents": [ + { + "data": { + "text": "Document content here", + "title": "Optional Title" + } + } + ] +} +``` + +## Response Structure + +```json +{ + "generation_id": "abc-123", + "message": { + "role": "assistant", + "content": [ + {"type": "text", "text": "Response here."}, + {"type": "thinking", "thinking": "Reasoning..."} + ], + "tool_plan": "I will call the API", + "tool_calls": [...], + "citations": [{ + "start": 0, + "end": 10, + "text": "cited text", + "sources": [{"type": "document", "id": "doc1", "document": {...}}] + }] + }, + "finish_reason": "COMPLETE", + "usage": {...} +} +``` + +**Note**: Response `content` is an **array** of typed objects (unlike request which uses string). + +## Unique Features + +1. **Thinking mode**: Native reasoning via `thinking` config, returns `{"type": "thinking"}` blocks +2. **Citations**: Automatic source citations when using `documents` +3. **Tool plan**: `tool_plan` field explains tool usage reasoning +4. **Null arguments**: May return `"null"` for parameterless tools (normalize to `"{}"`) + +## Thinking/Reasoning + +### Request Configuration +```json +{ + "thinking": { + "type": "enabled", + "token_budget": 2048 + } +} +``` + +**Parameters:** +- `type`: `"enabled"` or `"disabled"` +- `token_budget`: Token budget for thinking + +### Response Content Blocks + +**Thinking Block** (in response content array): +```json +{ + "type": "thinking", + "thinking": "Let me reason through this problem..." +} +``` + +**Note**: Unlike Mistral, Cohere's `thinking` field is a **string**, not an array. + +### Response Structure with Thinking +```json +{ + "message": { + "role": "assistant", + "content": [ + {"type": "thinking", "thinking": "First, I need to consider..."}, + {"type": "text", "text": "Based on my analysis..."} + ] + } +} +``` + +### Streaming Events for Thinking +```json +// content-start (thinking) +{"type": "content-start", "index": 0, "delta": {"message": {"content": {"type": "thinking", "thinking": ""}}}} + +// content-delta (thinking) +{"type": "content-delta", "index": 0, "delta": {"message": {"content": {"thinking": "reasoning chunk..."}}}} +``` + +### SDK Conversion +The AI SDK converts Cohere's thinking blocks to unified format: +```typescript +// Cohere response content +{type: "thinking", thinking: "..."} + +// Converted to SDK format +{type: "reasoning", text: "..."} +``` + +### Context Pruning for Thinking +- Thinking blocks appear in response `content` array +- No signatures or encryption - content is plaintext string +- Consider thinking as important context but potentially large +- Thinking appears before text content in the response + +## Complete Example + +```json +{ + "model": "command-r-plus", + "messages": [ + {"role": "system", "content": "You are a weather assistant."}, + {"role": "user", "content": "Weather in Paris?"}, + { + "role": "assistant", + "content": undefined, + "tool_plan": undefined, + "tool_calls": [{ + "id": "call_001", + "type": "function", + "function": {"name": "get_weather", "arguments": "{\"location\":\"Paris\"}"} + }] + }, + { + "role": "tool", + "tool_call_id": "call_001", + "content": "{\"temperature\":18,\"conditions\":\"cloudy\"}" + } + ], + "tools": [{ + "type": "function", + "function": {"name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}} + }], + "max_tokens": 1024, + "temperature": 0.7 +} +``` + +## Streaming Events + +| Event | Purpose | +|-------|--------| +| `message-start` | Start of response | +| `content-start` | Start of text/thinking block | +| `content-delta` | Text or thinking chunk | +| `tool-plan-delta` | Tool planning reasoning | +| `tool-call-start` | Start of tool call | +| `tool-call-delta` | Tool call arguments chunk | +| `message-end` | Final with `finish_reason` and `usage` | + +## Context Pruning Considerations + +1. **Tool correlation**: Uses `tool_call_id` like OpenAI +2. **Separate tool results**: Each result is a separate message (not grouped) +3. **Content exclusivity**: When `tool_calls` present, `content` is `undefined` +4. **Response vs request format**: Response content is array, request is string +5. **Uppercase tool choice**: Use `"NONE"` and `"REQUIRED"` (not lowercase) +6. **Paired pruning**: Tool calls and results must be pruned together +7. **Documents top-level**: RAG documents are separate from messages diff --git a/docs/providers/google-gemini.md b/docs/providers/google-gemini.md new file mode 100644 index 0000000..8ab69b1 --- /dev/null +++ b/docs/providers/google-gemini.md @@ -0,0 +1,255 @@ +# Google Gemini API Format + +Google's Generative AI (Gemini) uses a unique format with **position-based tool correlation** (no tool call IDs). + +## Sources + +- **AI SDK**: `packages/google/src/convert-to-google-generative-ai-messages.ts`, `packages/google/src/google-generative-ai-language-model.ts` +- **Schema Conversion**: `packages/google/src/convert-json-schema-to-openapi-schema.ts` +- **OpenCode Transform**: `src/provider/transform.ts` (schema integer→string enum conversion) +- **Official Docs**: https://ai.google.dev/api/rest/v1/models/generateContent + +## Request Structure + +```json +{ + "systemInstruction": { + "parts": [{"text": "System prompt text"}] + }, + "contents": [ + {"role": "user", "parts": [...]}, + {"role": "model", "parts": [...]} + ], + "generationConfig": { + "maxOutputTokens": 1024, + "temperature": 0.7, + "topK": 40, + "topP": 0.95, + "responseMimeType": "application/json", + "responseSchema": {...} + }, + "tools": [...], + "toolConfig": { + "functionCallingConfig": {"mode": "AUTO"} + } +} +``` + +## Key Differences from OpenAI + +| Feature | OpenAI | Gemini | +|---------|--------|--------| +| Message container | `messages[]` | `contents[]` | +| System message | In messages | Top-level `systemInstruction` | +| Roles | system/user/assistant/tool | user/model only | +| Tool call IDs | ID-based correlation | **POSITION-BASED** | +| Tool results | Separate `tool` role | In `user` message as `functionResponse` | + +## Message Roles + +Only **two roles**: `user` and `model` + +| SDK Role | Gemini Role | +|----------|-------------| +| `system` | `systemInstruction` (top-level) | +| `user` | `user` | +| `assistant` | `model` | +| `tool` (results) | `user` (with `functionResponse`) | + +## Content Parts + +### Text Part +```json +{"text": "Hello, how are you?"} +``` + +### Thinking Part +```json +{"text": "Let me think...", "thought": true, "thoughtSignature": "sig-for-caching"} +``` + +## Thinking/Reasoning + +### Request Configuration +```json +{ + "generationConfig": { + "thinkingConfig": { + "thinkingBudget": 8192, + "includeThoughts": true + } + } +} +``` + +**Parameters:** +- `thinkingBudget`: Token budget for thinking +- `includeThoughts`: Whether to include thinking in response (default true) + +### Response Content Parts + +**Thinking Part** (in model message): +```json +{ + "text": "Let me reason through this problem...", + "thought": true, + "thoughtSignature": "signature_for_caching" +} +``` + +**Key fields:** +- `thought: true` - Marks this part as reasoning content +- `thoughtSignature` - Optional signature for caching/verification + +### Usage Tracking +```json +{ + "usageMetadata": { + "promptTokenCount": 100, + "candidatesTokenCount": 200, + "thoughtsTokenCount": 150 + } +} +``` + +### SDK Conversion +The AI SDK converts Gemini's thought parts to unified `reasoning` type: +```typescript +// Gemini response part +{text: "...", thought: true, thoughtSignature: "..."} + +// Converted to SDK format +{type: "reasoning", text: "...", signature: "..."} +``` + +### Context Pruning for Thinking +- **Thought parts are regular text parts** with `thought: true` flag +- **thoughtSignature** should be preserved if present (used for caching) +- Thinking parts appear in `model` role messages +- Consider thinking as important but potentially large context + +## Image (inline base64) +```json +{"inlineData": {"mimeType": "image/jpeg", "data": "base64-encoded-data"}} +``` + +### Image (file URI) +```json +{"fileData": {"mimeType": "image/png", "fileUri": "gs://bucket/path/image.png"}} +``` + +### Function Call (tool invocation) +```json +{"functionCall": {"name": "get_weather", "args": {"location": "Tokyo"}}} +``` + +### Function Response (tool result) +```json +{"functionResponse": {"name": "get_weather", "response": {"name": "get_weather", "content": "{\"temp\": 22}"}}} +``` + +## CRITICAL: Position-Based Tool Correlation + +**Gemini does NOT use tool call IDs.** Tool results are correlated by **position/order**. + +### Tool Call (model message) +```json +{ + "role": "model", + "parts": [ + {"functionCall": {"name": "get_weather", "args": {"location": "SF"}}}, + {"functionCall": {"name": "get_time", "args": {"timezone": "PST"}}} + ] +} +``` + +### Tool Results (user message) - ORDER MUST MATCH +```json +{ + "role": "user", + "parts": [ + {"functionResponse": {"name": "get_weather", "response": {"name": "get_weather", "content": "72F"}}}, + {"functionResponse": {"name": "get_time", "response": {"name": "get_time", "content": "2:30 PM"}}} + ] +} +``` + +## Tool Definition + +```json +{ + "tools": [{ + "functionDeclarations": [{ + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"] + } + }] + }], + "toolConfig": { + "functionCallingConfig": {"mode": "AUTO"} + } +} +``` + +### Tool Config Modes +- `AUTO` - Model decides +- `NONE` - Disable tools +- `ANY` - Force tool use +- `ANY` + `allowedFunctionNames` - Force specific tools + +### Provider-Defined Tools +```json +{"googleSearch": {}}, +{"urlContext": {}}, +{"codeExecution": {}} +``` + +## Schema Conversion (JSON Schema to OpenAPI) + +Gemini requires **OpenAPI 3.0 schema format**: + +| JSON Schema | OpenAPI | +|-------------|---------| +| `const: value` | `enum: [value]` | +| `type: ["string", "null"]` | `anyOf` + `nullable: true` | + +## Gemma Model Handling + +For `gemma-*` models, system instructions are **prepended to first user message**: +```json +{ + "contents": [{ + "role": "user", + "parts": [{"text": "System prompt\n\nActual user message"}] + }] +} +``` + +## Complete Example + +```json +{ + "systemInstruction": {"parts": [{"text": "You are a weather assistant."}]}, + "contents": [ + {"role": "user", "parts": [{"text": "Weather in Tokyo?"}]}, + {"role": "model", "parts": [{"functionCall": {"name": "get_weather", "args": {"location": "Tokyo"}}}]}, + {"role": "user", "parts": [{"functionResponse": {"name": "get_weather", "response": {"name": "get_weather", "content": "22C cloudy"}}}]}, + {"role": "model", "parts": [{"text": "Tokyo is 22C and cloudy."}]} + ], + "tools": [{"functionDeclarations": [{"name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}]}] +} +``` + +## Context Pruning Considerations + +1. **POSITION-BASED CORRELATION**: Tool calls and results must be pruned TOGETHER and order preserved +2. **No IDs**: Cannot selectively prune individual tool results - entire pairs must go +3. **System separate**: `systemInstruction` is top-level, typically should NOT be pruned +4. **Alternation required**: Must maintain alternating `user`/`model` pattern +5. **Multi-part messages**: Each message can have multiple parts; prune entire messages, not parts +6. **Tool results are user role**: `functionResponse` parts are in `user` messages +7. **thoughtSignature**: Used for caching reasoning; preserve if present diff --git a/docs/providers/mistral.md b/docs/providers/mistral.md new file mode 100644 index 0000000..3767830 --- /dev/null +++ b/docs/providers/mistral.md @@ -0,0 +1,226 @@ +# Mistral API Format + +Mistral uses an OpenAI-compatible format but with **strict tool call ID requirements**. + +## Sources + +- **AI SDK**: `packages/mistral/src/convert-to-mistral-chat-messages.ts`, `packages/mistral/src/mistral-chat-language-model.ts` +- **OpenCode Transform**: `src/provider/transform.ts` (9-char alphanumeric ID normalization) +- **Official Docs**: https://docs.mistral.ai/api/#tag/chat + +## Request Structure + +```json +{ + "model": "mistral-large-latest", + "messages": [...], + "max_tokens": 4096, + "temperature": 0.7, + "top_p": 1.0, + "random_seed": 42, + "safe_prompt": false, + "stream": false, + "response_format": {"type": "json_object"}, + "tools": [...], + "tool_choice": "auto" +} +``` + +## CRITICAL: Tool Call ID Requirement + +**Mistral requires tool call IDs to be exactly 9 alphanumeric characters.** + +| Valid | Invalid | +|-------|--------| +| `abc123xyz` | `call_abc123` (too long, has underscore) | +| `A1B2C3D4E` | `12345` (too short) | +| `def456uvw` | `abc-123-xy` (has hyphens) | + +## Key Differences from OpenAI + +| Feature | OpenAI | Mistral | +|---------|--------|--------| +| Tool call ID format | `call_*` (variable) | **Exactly 9 alphanumeric** | +| Tool choice `required` | `"required"` | `"any"` | +| User content | String or array | **Always array** | +| Assistant `prefix` | Not supported | Supported | +| Stop sequences | Supported | Not supported | +| Frequency/presence penalty | Supported | Not supported | + +## Message Formats + +### System Message +```json +{"role": "system", "content": "You are a helpful assistant."} +``` + +### User Message (always array) +```json +{ + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": "https://example.com/image.jpg"}, + {"type": "document_url", "document_url": "data:application/pdf;base64,..."} + ] +} +``` + +### Assistant Message +```json +{ + "role": "assistant", + "content": "Here's the analysis...", + "prefix": true, + "tool_calls": [ + { + "id": "abc123xyz", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"location\":\"San Francisco\"}" + } + } + ] +} +``` + +### Tool Result Message +```json +{ + "role": "tool", + "name": "get_weather", + "tool_call_id": "abc123xyz", + "content": "{\"temperature\": 72, \"condition\": \"sunny\"}" +} +``` + +## Tool Definition + +```json +{ + "tools": [{ + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather for a location", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"] + }, + "strict": true + } + }], + "tool_choice": "auto" +} +``` + +### Tool Choice Options +- `"auto"` - Model decides +- `"none"` - Disable tool calling +- `"any"` - Force tool use (NOT `"required"`) +- `{"type": "function", "function": {"name": "..."}}` - Force specific tool + +## Unique Features + +1. **Prefix flag**: `prefix: true` on assistant messages for continuation mode +2. **PDF support**: Via `document_url` content type with base64 +3. **Thinking mode**: Returns `{"type": "thinking", "thinking": [...]}` content blocks + +## Thinking/Reasoning (Magistral Models) + +### Response Content Structure + +Mistral's reasoning models (Magistral) return thinking in the response content: + +**Thinking Block** (in assistant message content): +```json +{ + "type": "thinking", + "thinking": [ + {"type": "text", "text": "Let me reason through this..."} + ] +} +``` + +**Note**: The `thinking` field is an **array** of text parts, not a string. + +### Streaming Response +When streaming, content can be a string OR array: +```json +{ + "choices": [{ + "delta": { + "role": "assistant", + "content": [ + {"type": "thinking", "thinking": [{"type": "text", "text": "reasoning..."}]}, + {"type": "text", "text": "final response"} + ] + } + }] +} +``` + +### SDK Conversion +The AI SDK extracts and converts Mistral's thinking blocks: +```typescript +// Mistral response content +{type: "thinking", thinking: [{type: "text", text: "..."}]} + +// Converted to SDK format +{type: "reasoning", text: "..."} +``` + +### Context Pruning for Thinking +- Thinking blocks appear as content items in assistant messages +- The nested `thinking` array contains text parts to concatenate +- No signatures or encryption - content is plaintext +- Consider thinking as important context but potentially large + +## Complete Example + +```json +{ + "model": "mistral-large-latest", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": [{"type": "text", "text": "Weather in NYC?"}]}, + { + "role": "assistant", + "content": "", + "tool_calls": [{ + "id": "abc123xyz", + "type": "function", + "function": {"name": "get_weather", "arguments": "{\"location\":\"New York City\"}"} + }] + }, + { + "role": "tool", + "name": "get_weather", + "tool_call_id": "abc123xyz", + "content": "{\"temperature\":72,\"condition\":\"sunny\"}" + } + ], + "tools": [{ + "type": "function", + "function": {"name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}} + }], + "tool_choice": "auto" +} +``` + +## Unsupported Features + +- `topK` +- `frequencyPenalty` +- `presencePenalty` +- `stopSequences` + +## Context Pruning Considerations + +1. **9-char alphanumeric IDs**: When generating synthetic tool calls, IDs must be exactly 9 alphanumeric chars +2. **Tool correlation**: Uses `tool_call_id` like OpenAI +3. **User content always array**: Even single text becomes `[{"type": "text", "text": "..."}]` +4. **Tool name in result**: Tool result includes `name` field alongside `tool_call_id` +5. **Paired pruning**: Tool calls and results must be pruned together diff --git a/docs/providers/openai-compatible.md b/docs/providers/openai-compatible.md new file mode 100644 index 0000000..3406248 --- /dev/null +++ b/docs/providers/openai-compatible.md @@ -0,0 +1,135 @@ +# OpenAI-Compatible Providers + +Most providers in models.dev use the OpenAI Chat Completions format via `@ai-sdk/openai-compatible`. This document covers these providers and any provider-specific quirks. + +## Standard OpenAI Chat Completions Format + +See [openai.md](./openai.md) for the full format specification. + +### Quick Reference + +```json +{ + "model": "model-name", + "messages": [ + {"role": "system", "content": "..."}, + {"role": "user", "content": "..."}, + {"role": "assistant", "content": "...", "tool_calls": [...]}, + {"role": "tool", "tool_call_id": "...", "content": "..."} + ], + "tools": [...], + "tool_choice": "auto" +} +``` + +## Providers Using OpenAI-Compatible Format + +Based on models.dev, these providers use `@ai-sdk/openai-compatible`: + +| Provider | Base URL | Notes | +|----------|----------|-------| +| together | api.together.xyz | | +| deepseek | api.deepseek.com | | +| groq | api.groq.com | Very fast inference | +| fireworks | api.fireworks.ai | | +| hyperbolic | api.hyperbolic.xyz | | +| novita | api.novita.ai | | +| cerebras | api.cerebras.ai | | +| sambanova | api.sambanova.ai | | +| nebius | api.studio.nebius.ai | | +| chutes | api.chutes.ai | | +| openrouter | openrouter.ai | Meta-provider | +| kluster | api.kluster.ai | | +| glhf | glhf.chat | | +| scaleway | api.scaleway.ai | | +| lepton | api.lepton.ai | | +| nano-gpt | api.nano-gpt.com | | +| arcee | api.arcee.ai | | +| inference-net | api.inference.net | | +| nineteen | api.nineteen.ai | | +| targon | api.targon.ai | | +| req-ai | api.req.ai | | +| vllm | (self-hosted) | | +| ollama | localhost:11434 | Local models | +| lmstudio | localhost:1234 | Local models | +| jan | localhost:1337 | Local models | +| any-provider | (configurable) | Generic OpenAI-compatible | + +## Provider-Specific Quirks + +### OpenRouter +- Acts as a meta-provider routing to various backends +- May have different caching semantics +- Supports `cache_control` similar to Anthropic when routing to Claude + +### Groq +- Extremely fast inference +- Limited model selection +- May have stricter rate limits + +### DeepSeek +- Supports reasoning models (DeepSeek R1) +- May include thinking/reasoning in responses + +### Ollama / LM Studio / Jan +- Local inference +- No rate limits but hardware-dependent +- May not support all features (vision, tools) + +### Together AI +- Wide model selection +- Good tool support +- Supports streaming + +## Caching Considerations + +Some OpenAI-compatible providers support caching hints: + +```json +{ + "role": "user", + "content": "...", + "cache_control": {"type": "ephemeral"} +} +``` + +Supported by: +- OpenRouter (when routing to Anthropic) +- Some enterprise deployments + +## Vision Support + +Not all OpenAI-compatible providers support vision. Check model capabilities: + +```json +{ + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}} + ] +} +``` + +## Tool Support + +Tool support varies by provider and model. Common limitations: +- Some models don't support parallel tool calls +- Some models don't support structured outputs/strict mode +- Response format (`json_object`) support varies + +## Context Pruning Considerations + +1. **Standard ID correlation**: All use `tool_call_id` for tool result correlation +2. **Consistent message format**: Messages follow OpenAI structure +3. **Feature detection**: May need to check model capabilities at runtime +4. **Cache support varies**: Not all providers honor cache hints +5. **Paired pruning**: Tool calls and results must be pruned together + +## Detection + +OpenAI-compatible requests can be detected by: +- `body.messages` array present +- Messages have `role` field with values: `system`, `user`, `assistant`, `tool` +- Tool results have `tool_call_id` field +- No special top-level fields like `contents` (Gemini) or `system` array (Bedrock/Anthropic) diff --git a/docs/providers/openai.md b/docs/providers/openai.md new file mode 100644 index 0000000..db24be4 --- /dev/null +++ b/docs/providers/openai.md @@ -0,0 +1,223 @@ +# OpenAI API Format + +OpenAI offers two API formats: **Chat Completions** (original) and **Responses** (newer). + +## Sources + +- **AI SDK**: `packages/openai/src/chat/openai-chat-language-model.ts`, `packages/openai/src/responses/openai-responses-language-model.ts` +- **AI SDK OpenAI-Compatible**: `packages/openai-compatible/src/chat/openai-compatible-chat-language-model.ts` +- **Official Docs**: https://platform.openai.com/docs/api-reference/chat +- **Responses API**: https://platform.openai.com/docs/api-reference/responses + +## Chat Completions API (`/chat/completions`) + +### Request Structure + +```json +{ + "model": "gpt-4o", + "messages": [...], + "tools": [...], + "tool_choice": "auto" | "none" | "required" | {"type": "function", "function": {"name": "..."}}, + "max_tokens": 4096, + "temperature": 0.7, + "response_format": {"type": "json_object"} | {"type": "json_schema", "json_schema": {...}}, + "stream": false +} +``` + +### Message Roles + +| Role | Description | +|------|-------------| +| `system` | System instructions | +| `user` | User input | +| `assistant` | Model responses | +| `tool` | Tool/function results | + +### Message Formats + +**System Message:** +```json +{"role": "system", "content": "You are a helpful assistant."} +``` + +**User Message (multimodal):** +```json +{ + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg", "detail": "auto"}}, + {"type": "file", "file": {"file_id": "file-abc123"}} + ] +} +``` + +**Assistant Message with Tool Calls:** +```json +{ + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_abc123", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"location\": \"San Francisco\"}" + } + } + ] +} +``` + +**Tool Result Message:** +```json +{ + "role": "tool", + "tool_call_id": "call_abc123", + "content": "{\"temperature\": 72, \"condition\": \"sunny\"}" +} +``` + +### Tool Definition + +```json +{ + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string"} + }, + "required": ["location"] + }, + "strict": true + } +} +``` + +--- + +## Responses API (`/responses`) + +### Key Differences from Chat Completions + +| Feature | Chat Completions | Responses API | +|---------|-----------------|---------------| +| Message array | `messages` | `input` | +| Tool call ID field | `tool_call_id` | `call_id` | +| System message | In messages | `instructions` field or in input | +| Token limit | `max_tokens` | `max_output_tokens` | +| Reasoning | Not supported | `reasoning` config | + +### Request Structure + +```json +{ + "model": "gpt-4o", + "input": [...], + "instructions": "Optional system instructions", + "tools": [...], + "tool_choice": "auto" | "none" | "required" | {"type": "function", "name": "..."}, + "max_output_tokens": 4096, + "previous_response_id": "resp_abc123", + "reasoning": { + "effort": "medium", + "summary": "auto" + }, + "stream": false +} +``` + +## Thinking/Reasoning (Responses API only) + +### Request Configuration +```json +{ + "reasoning": { + "effort": "low" | "medium" | "high", + "summary": "auto" | "concise" | "detailed" + } +} +``` + +**Parameters:** +- `effort`: How much reasoning effort (affects token usage) +- `summary`: How to summarize reasoning in response + +**Constraints when reasoning enabled:** +- `temperature` is **NOT supported** (use default) +- `topP` is **NOT supported** +- Only available on reasoning models (o1, o3, etc.) + +### Response Output Items + +**Reasoning Item** (in output array): +```json +{ + "type": "reasoning", + "id": "reasoning_abc123", + "encrypted_content": "encrypted_base64_reasoning_content", + "summary": [ + {"type": "summary_text", "text": "I analyzed the problem by..."} + ] +} +``` + +**Key fields:** +- `encrypted_content`: The actual reasoning is encrypted/hidden +- `summary`: Optional human-readable summary of reasoning + +### Usage Tracking +```json +{ + "usage": { + "input_tokens": 100, + "output_tokens": 200, + "output_tokens_details": { + "reasoning_tokens": 150 + } + } +} +``` + +### SDK Conversion +The AI SDK handles reasoning items: +```typescript +// OpenAI Responses output +{type: "reasoning", id: "...", encrypted_content: "...", summary: [...]} + +// Kept as reasoning type in SDK +{type: "reasoning", reasoningId: "...", text: "summary text"} +``` + +### Context Pruning for Reasoning +- **Encrypted content** cannot be inspected or modified +- **Summaries** provide readable insight into reasoning +- Reasoning items appear as separate items in `output` array +- `reasoning_tokens` in usage helps track cost + +--- + +## Context Pruning Considerations + +1. **Tool correlation**: Both formats use ID-based correlation (`tool_call_id` or `call_id`) +2. **Paired pruning**: Tool calls and their results should be pruned together +3. **Message roles**: 4 distinct roles in Chat Completions; Responses API uses item types +4. **Content types**: User content is `type: "text"/"image_url"` in Chat, `type: "input_text"/"input_image"` in Responses +5. **Assistant content**: String in Chat Completions, `output_text` array in Responses + +## OpenAI-Compatible Providers + +Most providers in models.dev use the OpenAI Chat Completions format via `@ai-sdk/openai-compatible`: +- together, deepseek, groq, fireworks, hyperbolic, novita, cerebras, sambanova, etc. + +These providers accept the same request format but may have different: +- Supported models +- Rate limits +- Feature availability (vision, tool use, etc.) diff --git a/lib/fetch-wrapper/formats/bedrock.ts b/lib/fetch-wrapper/formats/bedrock.ts new file mode 100644 index 0000000..26c1ca5 --- /dev/null +++ b/lib/fetch-wrapper/formats/bedrock.ts @@ -0,0 +1,148 @@ +import type { FormatDescriptor, ToolOutput } from "../types" +import type { PluginState } from "../../state" +import type { Logger } from "../../logger" +import type { ToolTracker } from "../../api-formats/synth-instruction" +import { cacheToolParametersFromMessages } from "../../state/tool-cache" +import { injectSynth, trackNewToolResults } from "../../api-formats/synth-instruction" +import { injectPrunableList } from "../../api-formats/prunable-list" + +/** + * Format descriptor for AWS Bedrock Converse API. + * + * Bedrock format characteristics: + * - Top-level `system` array for system messages + * - `messages` array with only 'user' and 'assistant' roles + * - `inferenceConfig` for model parameters (maxTokens, temperature, etc.) + * - Tool calls: `toolUse` blocks in assistant content with `toolUseId` + * - Tool results: `toolResult` blocks in user content with `toolUseId` + * - Cache points: `cachePoint` blocks that should be preserved + */ +export const bedrockFormat: FormatDescriptor = { + name: 'bedrock', + + detect(body: any): boolean { + // Bedrock has a top-level system array AND inferenceConfig (not model params in messages) + // This distinguishes it from OpenAI/Anthropic which put system in messages + return ( + Array.isArray(body.system) && + body.inferenceConfig !== undefined && + Array.isArray(body.messages) + ) + }, + + getDataArray(body: any): any[] | undefined { + return body.messages + }, + + cacheToolParameters(data: any[], state: PluginState, logger?: Logger): void { + // Bedrock stores tool calls in assistant message content as toolUse blocks + // We need to extract toolUseId and tool name for later correlation + for (const m of data) { + if (m.role === 'assistant' && Array.isArray(m.content)) { + for (const block of m.content) { + if (block.toolUse && block.toolUse.toolUseId) { + const toolUseId = block.toolUse.toolUseId.toLowerCase() + state.toolParameters.set(toolUseId, { + tool: block.toolUse.name, + parameters: block.toolUse.input + }) + logger?.debug("bedrock", "Cached tool parameters", { + toolUseId, + toolName: block.toolUse.name + }) + } + } + } + } + // Also use the generic message caching for any compatible structures + cacheToolParametersFromMessages(data, state, logger) + }, + + injectSynth(data: any[], instruction: string, nudgeText: string): boolean { + return injectSynth(data, instruction, nudgeText) + }, + + trackNewToolResults(data: any[], tracker: ToolTracker, protectedTools: Set): number { + return trackNewToolResults(data, tracker, protectedTools) + }, + + injectPrunableList(data: any[], injection: string): boolean { + return injectPrunableList(data, injection) + }, + + extractToolOutputs(data: any[], state: PluginState): ToolOutput[] { + const outputs: ToolOutput[] = [] + + for (const m of data) { + // Bedrock tool results are in user messages as toolResult blocks + if (m.role === 'user' && Array.isArray(m.content)) { + for (const block of m.content) { + if (block.toolResult && block.toolResult.toolUseId) { + const toolUseId = block.toolResult.toolUseId.toLowerCase() + const metadata = state.toolParameters.get(toolUseId) + outputs.push({ + id: toolUseId, + toolName: metadata?.tool + }) + } + } + } + } + + return outputs + }, + + replaceToolOutput(data: any[], toolId: string, prunedMessage: string, _state: PluginState): boolean { + const toolIdLower = toolId.toLowerCase() + let replaced = false + + for (let i = 0; i < data.length; i++) { + const m = data[i] + + // Tool results are in user messages as toolResult blocks + if (m.role === 'user' && Array.isArray(m.content)) { + let messageModified = false + const newContent = m.content.map((block: any) => { + if (block.toolResult && block.toolResult.toolUseId?.toLowerCase() === toolIdLower) { + messageModified = true + // Replace the content array inside toolResult with pruned message + return { + ...block, + toolResult: { + ...block.toolResult, + content: [{ text: prunedMessage }] + } + } + } + return block + }) + if (messageModified) { + data[i] = { ...m, content: newContent } + replaced = true + } + } + } + + return replaced + }, + + hasToolOutputs(data: any[]): boolean { + for (const m of data) { + if (m.role === 'user' && Array.isArray(m.content)) { + for (const block of m.content) { + if (block.toolResult) return true + } + } + } + return false + }, + + getLogMetadata(data: any[], replacedCount: number, inputUrl: string): Record { + return { + url: inputUrl, + replacedCount, + totalMessages: data.length, + format: 'bedrock' + } + } +} diff --git a/lib/fetch-wrapper/formats/index.ts b/lib/fetch-wrapper/formats/index.ts index 0132c87..0e01388 100644 --- a/lib/fetch-wrapper/formats/index.ts +++ b/lib/fetch-wrapper/formats/index.ts @@ -1,3 +1,4 @@ export { openaiChatFormat } from './openai-chat' export { openaiResponsesFormat } from './openai-responses' export { geminiFormat } from './gemini' +export { bedrockFormat } from './bedrock' diff --git a/lib/fetch-wrapper/index.ts b/lib/fetch-wrapper/index.ts index abcf5ad..1c14444 100644 --- a/lib/fetch-wrapper/index.ts +++ b/lib/fetch-wrapper/index.ts @@ -3,7 +3,7 @@ import type { Logger } from "../logger" import type { FetchHandlerContext, SynthPrompts } from "./types" import type { ToolTracker } from "../api-formats/synth-instruction" import type { PluginConfig } from "../config" -import { openaiChatFormat, openaiResponsesFormat, geminiFormat } from "./formats" +import { openaiChatFormat, openaiResponsesFormat, geminiFormat, bedrockFormat } from "./formats" import { handleFormat } from "./handler" import { runStrategies } from "../core/strategies" import { accumulateGCStats } from "./gc-tracker" @@ -15,11 +15,12 @@ export type { FetchHandlerContext, FetchHandlerResult, SynthPrompts } from "./ty * Creates a wrapped global fetch that intercepts API calls and performs * context pruning on tool outputs that have been marked for removal. * - * Supports four API formats: + * Supports five API formats: * 1. OpenAI Chat Completions (body.messages with role='tool') * 2. Anthropic (body.messages with role='user' containing tool_result) * 3. Google/Gemini (body.contents with functionResponse parts) * 4. OpenAI Responses API (body.input with function_call_output items) + * 5. AWS Bedrock Converse API (body.system + body.messages with toolResult blocks) */ export function installFetchWrapper( state: PluginState, @@ -57,12 +58,20 @@ export function installFetchWrapper( const toolIdsBefore = new Set(state.toolParameters.keys()) // Mutually exclusive format handlers + // Note: bedrockFormat must be checked before openaiChatFormat since both have messages[] + // but Bedrock has distinguishing system[] array and inferenceConfig if (openaiResponsesFormat.detect(body)) { const result = await handleFormat(body, ctx, inputUrl, openaiResponsesFormat) if (result.modified) { modified = true } } + else if (bedrockFormat.detect(body)) { + const result = await handleFormat(body, ctx, inputUrl, bedrockFormat) + if (result.modified) { + modified = true + } + } else if (openaiChatFormat.detect(body)) { const result = await handleFormat(body, ctx, inputUrl, openaiChatFormat) if (result.modified) {