From a45ac176f8cbb3148ad2b417ff6ff66391596161 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Thu, 4 Sep 2025 23:20:58 -0600 Subject: [PATCH 1/5] Add release notes for v3.26.7 - Added Kimi K2-0905 models with 256K context window - Added OpenAI service tiers (Standard/Flex/Priority) - Added DeepInfra provider with 100+ models - Fixed multiple bugs including MCP validation, zsh command safety - Updated combined v3.26 notes with new features --- docs/update-notes/index.md | 1 + docs/update-notes/v3.26.7.mdx | 63 +++++++++++++++++++++++++++++++++++ docs/update-notes/v3.26.mdx | 31 +++++++++++++++++ sidebars.ts | 1 + 4 files changed, 96 insertions(+) create mode 100644 docs/update-notes/v3.26.7.mdx diff --git a/docs/update-notes/index.md b/docs/update-notes/index.md index fbbe1427..1ceb66d4 100644 --- a/docs/update-notes/index.md +++ b/docs/update-notes/index.md @@ -19,6 +19,7 @@ image: /img/social-share.jpg ### Version 3.26 +* [3.26.7](/update-notes/v3.26.7) (2025-09-05) * [3.26.6](/update-notes/v3.26.6) (2025-09-03) * [3.26.5](/update-notes/v3.26.5) (2025-09-03) * [3.26.4](/update-notes/v3.26.4) (2025-09-01) diff --git a/docs/update-notes/v3.26.7.mdx b/docs/update-notes/v3.26.7.mdx new file mode 100644 index 00000000..16d1dbb4 --- /dev/null +++ b/docs/update-notes/v3.26.7.mdx @@ -0,0 +1,63 @@ +--- +description: Enhanced Kimi K2 models with 256K+ context windows, OpenAI service tiers for flexible pricing, and DeepInfra as a new provider with 100+ models. +keywords: + - roo code 3.26.7 + - kimi k2 models + - openai service tiers + - deepinfra provider + - bug fixes +image: /img/social-share.jpg +--- + +# Roo Code 3.26.7 Release Notes (2025-09-05) + +This release brings enhanced Kimi K2 models with massive context windows, OpenAI service tier selection, and DeepInfra as a new provider offering 100+ models. + +## Kimi K2-0905: Moonshot's Latest Open Source Model is Live in Roo Code + +We've upgraded to the latest Kimi K2-0905 models across multiple providers (thanks CellenLee!) ([#7663](https://github.com/RooCodeInc/Roo-Code/pull/7663), [#7693](https://github.com/RooCodeInc/Roo-Code/pull/7693)): + +K2-0905 comes with three major upgrades: +- **256K Context Window**: Massive context supporting up to 256K-262K tokens, doubling the previous limit for processing much larger documents and conversations +- **Improved Tool Calling**: Enhanced function calling and tool use capabilities for better agentic workflows +- **Enhanced Front-end Development**: Superior HTML, CSS, and JavaScript generation with modern framework support + +Available through Groq, Moonshot, and Fireworks providers. These models excel at handling large codebases, long conversations, and complex multi-file operations. + +## OpenAI Service Tiers + +We've added support for OpenAI's new Responses API service tiers ([#7646](https://github.com/RooCodeInc/Roo-Code/pull/7646)): + +- **Standard Tier**: Default tier with regular pricing +- **Flex Tier**: 50% discount with slightly longer response times for non-urgent tasks +- **Priority Tier**: Faster response times for time-critical operations + +Select your preferred tier directly in the UI based on your needs and budget. This gives you more control over costs while maintaining access to OpenAI's powerful models. + +> **📚 Documentation**: See [OpenAI Provider Guide](/providers/openai) for detailed tier comparison and pricing. + +## DeepInfra Provider + +DeepInfra is now available as a model provider (thanks Thachnh!) ([#7677](https://github.com/RooCodeInc/Roo-Code/pull/7677)): + +- **100+ Models**: Access to a vast selection of open-source and frontier models +- **Competitive Pricing**: Very cost-effective rates compared to other providers +- **Automatic Prompt Caching**: Built-in prompt caching for supported models like Qwen3 Coder +- **Fast Inference**: Optimized infrastructure for quick response times + +DeepInfra is an excellent choice for developers looking for variety and value in their AI model selection. + +> **📚 Documentation**: See [DeepInfra Provider Setup](/providers/deepinfra) to get started. + +## QOL Improvements + +* **Shell Security**: Added shell executable allowlist validation with platform-specific fallbacks for improved command execution safety ([#7681](https://github.com/RooCodeInc/Roo-Code/pull/7681)) + +## Bug Fixes + +* **MCP Tool Validation**: Roo now validates MCP tool existence before execution and shows helpful error messages with available tools (thanks R-omk!) ([#7632](https://github.com/RooCodeInc/Roo-Code/pull/7632)) +* **OpenAI API Key Errors**: Clear error messages now display when API keys contain invalid characters instead of cryptic ByteString errors (thanks A0nameless0man!) ([#7586](https://github.com/RooCodeInc/Roo-Code/pull/7586)) +* **Follow-up Questions**: Fixed countdown timer incorrectly reappearing in task history for already answered follow-up questions (thanks XuyiK!) ([#7686](https://github.com/RooCodeInc/Roo-Code/pull/7686)) +* **Moonshot Token Limit**: Resolved issue where Moonshot models were incorrectly limited to 1024 tokens, now properly respects configured limits (thanks wangxiaolong100, greyishsong!) ([#7673](https://github.com/RooCodeInc/Roo-Code/pull/7673)) +* **Zsh Command Safety**: Improved handling of zsh process substitution and glob qualifiers to prevent auto-execution of potentially dangerous commands ([#7658](https://github.com/RooCodeInc/Roo-Code/pull/7658), [#7667](https://github.com/RooCodeInc/Roo-Code/pull/7667)) +* **Traditional Chinese Localization**: Fixed typo in zh-TW locale text (thanks PeterDaveHello!) ([#7672](https://github.com/RooCodeInc/Roo-Code/pull/7672)) \ No newline at end of file diff --git a/docs/update-notes/v3.26.mdx b/docs/update-notes/v3.26.mdx index cb0d4f0a..e190aee3 100644 --- a/docs/update-notes/v3.26.mdx +++ b/docs/update-notes/v3.26.mdx @@ -94,8 +94,32 @@ PRs: [#7474](https://github.com/RooCodeInc/Roo-Code/pull/7474), [#7492](https:// > **📚 Documentation**: See [Image Generation - Editing Existing Images](/features/image-generation#editing-existing-images) for transformation examples. +### Kimi K2-0905: Moonshot's Latest Open Source Model is Live in Roo Code + +We've upgraded to the latest Kimi K2-0905 models across multiple providers (thanks CellenLee!) ([#7663](https://github.com/RooCodeInc/Roo-Code/pull/7663), [#7693](https://github.com/RooCodeInc/Roo-Code/pull/7693)): + +K2-0905 comes with three major upgrades: +- **256K Context Window**: Massive context supporting up to 256K-262K tokens, doubling the previous limit for processing much larger documents and conversations +- **Improved Tool Calling**: Enhanced function calling and tool use capabilities for better agentic workflows +- **Enhanced Front-end Development**: Superior HTML, CSS, and JavaScript generation with modern framework support + +Available through Groq, Moonshot, and Fireworks providers. These models excel at handling large codebases, long conversations, and complex multi-file operations. + +### OpenAI Service Tiers + +We've added support for OpenAI's new Responses API service tiers ([#7646](https://github.com/RooCodeInc/Roo-Code/pull/7646)): + +- **Standard Tier**: Default tier with regular pricing +- **Flex Tier**: 50% discount with slightly longer response times for non-urgent tasks +- **Priority Tier**: Faster response times for time-critical operations + +Select your preferred tier directly in the UI based on your needs and budget. This gives you more control over costs while maintaining access to OpenAI's powerful models. + +> **📚 Documentation**: See [OpenAI Provider Guide](/providers/openai) for detailed tier comparison and pricing. + ### Provider Updates +* **DeepInfra Provider**: DeepInfra is now available as a model provider with 100+ open-source and frontier models, competitive pricing, and automatic prompt caching for supported models like Qwen3 Coder (thanks Thachnh!) ([#7677](https://github.com/RooCodeInc/Roo-Code/pull/7677)) * **Kimi K2 Turbo Model**: Added support for the high-speed Kimi K2 Turbo model with 60-100 tokens/sec processing and a 131K token context window (thanks wangxiaolong100!) ([#7593](https://github.com/RooCodeInc/Roo-Code/pull/7593)) * **Qwen3 235B Thinking Model**: Added support for Qwen3-235B-A22B-Thinking-2507 model with an impressive 262K context window, enabling processing of extremely long documents and large codebases in a single request through the Chutes provider (thanks mohammad154, apple-techie!) ([#7578](https://github.com/RooCodeInc/Roo-Code/pull/7578)) * **Ollama Turbo Mode**: Added API key support for Turbo mode, enabling faster model execution with datacenter-grade hardware (thanks LivioGama!) ([#7425](https://github.com/RooCodeInc/Roo-Code/pull/7425)) @@ -104,6 +128,7 @@ PRs: [#7474](https://github.com/RooCodeInc/Roo-Code/pull/7474), [#7492](https:// ### QOL Improvements +* **Shell Security**: Added shell executable allowlist validation with platform-specific fallbacks for improved command execution safety ([#7681](https://github.com/RooCodeInc/Roo-Code/pull/7681)) * **Settings Scroll Position**: Settings tabs now remember their individual scroll positions when switching between them (thanks DC-Dancao!) ([#7587](https://github.com/RooCodeInc/Roo-Code/pull/7587)) * **MCP Resource Auto-Approval**: MCP resource access requests are now automatically approved when auto-approve is enabled, eliminating manual approval steps and enabling smoother automation workflows (thanks m-ibm!) ([#7606](https://github.com/RooCodeInc/Roo-Code/pull/7606)) * **Message Queue Performance**: Improved message queueing reliability and performance by moving the queue management to the extension host, making the interface more stable ([#7604](https://github.com/RooCodeInc/Roo-Code/pull/7604)) @@ -122,6 +147,12 @@ PRs: [#7474](https://github.com/RooCodeInc/Roo-Code/pull/7474), [#7492](https:// ### Bug Fixes +* **MCP Tool Validation**: Roo now validates MCP tool existence before execution and shows helpful error messages with available tools (thanks R-omk!) ([#7632](https://github.com/RooCodeInc/Roo-Code/pull/7632)) +* **OpenAI API Key Errors**: Clear error messages now display when API keys contain invalid characters instead of cryptic ByteString errors (thanks A0nameless0man!) ([#7586](https://github.com/RooCodeInc/Roo-Code/pull/7586)) +* **Follow-up Questions**: Fixed countdown timer incorrectly reappearing in task history for already answered follow-up questions (thanks XuyiK!) ([#7686](https://github.com/RooCodeInc/Roo-Code/pull/7686)) +* **Moonshot Token Limit**: Resolved issue where Moonshot models were incorrectly limited to 1024 tokens, now properly respects configured limits (thanks wangxiaolong100, greyishsong!) ([#7673](https://github.com/RooCodeInc/Roo-Code/pull/7673)) +* **Zsh Command Safety**: Improved handling of zsh process substitution and glob qualifiers to prevent auto-execution of potentially dangerous commands ([#7658](https://github.com/RooCodeInc/Roo-Code/pull/7658), [#7667](https://github.com/RooCodeInc/Roo-Code/pull/7667)) +* **Traditional Chinese Localization**: Fixed typo in zh-TW locale text (thanks PeterDaveHello!) ([#7672](https://github.com/RooCodeInc/Roo-Code/pull/7672)) * **Tool Approval Fix**: Fixed an error that occurred when using insert_content and search_and_replace tools on write-protected files - these tools now handle file protection correctly ([#7649](https://github.com/RooCodeInc/Roo-Code/pull/7649)) * **Configurable Embedding Batch Size**: Fixed an issue where users with API providers having stricter batch limits couldn't use code indexing. You can now configure the embedding batch size (1-2048, default: 400) to match your provider's limits (thanks BenLampson!) ([#7464](https://github.com/RooCodeInc/Roo-Code/pull/7464)) * **OpenAI-Native Cache Reporting**: Fixed cache usage statistics and cost calculations when using the OpenAI-Native provider with cached content ([#7602](https://github.com/RooCodeInc/Roo-Code/pull/7602)) diff --git a/sidebars.ts b/sidebars.ts index 51a6853f..5b112eae 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -221,6 +221,7 @@ const sidebars: SidebarsConfig = { label: '3.26', items: [ { type: 'doc', id: 'update-notes/v3.26', label: '3.26 Combined' }, + { type: 'doc', id: 'update-notes/v3.26.7', label: '3.26.7' }, { type: 'doc', id: 'update-notes/v3.26.6', label: '3.26.6' }, { type: 'doc', id: 'update-notes/v3.26.5', label: '3.26.5' }, { type: 'doc', id: 'update-notes/v3.26.4', label: '3.26.4' }, From 139eebf399308aae3950515b148598a33434c7ad Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 5 Sep 2025 05:34:40 +0000 Subject: [PATCH 2/5] docs: add DeepInfra provider extraction report --- deepinfra-provider-extraction.md | 365 +++++++++++++++++++++++++++++++ 1 file changed, 365 insertions(+) create mode 100644 deepinfra-provider-extraction.md diff --git a/deepinfra-provider-extraction.md b/deepinfra-provider-extraction.md new file mode 100644 index 00000000..091c41f2 --- /dev/null +++ b/deepinfra-provider-extraction.md @@ -0,0 +1,365 @@ + +# DeepInfra Provider - Feature Extraction Report + +## Executive Summary +DeepInfra is a model provider integration in Roo Code that offers access to various AI models through DeepInfra's API infrastructure. It provides a cost-effective way to access high-performance models including Qwen, Llama, and other open-source models with features like prompt caching, vision support, and reasoning capabilities. + +## UI/UX Analysis + +### User Interface Components + +#### 1. Provider Selection (`webview-ui/src/components/settings/constants.ts`) +**Visual Layout:** +- DeepInfra appears in the provider dropdown list +- Position: Between OpenRouter and Anthropic in the provider list +- Label: "DeepInfra" (user-friendly name) +- Value: "deepinfra" (internal identifier) + +#### 2. Settings Panel (`webview-ui/src/components/settings/providers/DeepInfra.tsx`) +**Visual Elements:** +- **API Key Input Field** + - Type: Password field (masked input) + - Placeholder: Localized "API Key" placeholder text + - Label: "API Key" (font-medium, mb-1 spacing) + - Full width text field using VSCode's webview UI toolkit + - Real-time input handling with onChange events + +- **Refresh Models Button** + - Visual: Outline variant button with icon + - Icon: Codicon refresh icon (spinning animation) + - Text: "Refresh Models" (localized) + - Feedback: Shows hint text after refresh + - Error state: Red text color for error messages + +- **Model Picker Component** + - Dropdown selector for available models + - Default selection: Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo + - External link: "Browse models at deepinfra.com/models" + - Error display: Shows validation errors in red + - Organization restrictions: Respects allow lists + +**User Feedback:** +- Loading states during model fetching +- Success confirmation after refresh +- Error messages for invalid API keys +- Hint text: "Models refreshed. Check the model dropdown." + +### User Experience Elements + +#### Visual Patterns +**Consistent VSCode Integration:** +- Uses VSCode's native color variables +- Follows VSCode's dark/light theme automatically +- Consistent spacing and typography with other providers +- Standard form field styling + +**Interactive Behaviors:** +- Auto-save on field changes (debounced) +- Silent model refresh on API key/URL changes +- Immediate visual feedback on interactions +- Keyboard accessible (tab navigation) + +### User Workflows + +#### 1. Initial Setup +``` +User Journey: +1. Open Settings → Navigate to API Provider section +2. Select "DeepInfra" from provider dropdown +3. Enter API Key (obtained from deepinfra.com) + → Field masks input for security + → Auto-validates format +4. Models auto-populate after valid key entry +5. Select desired model from dropdown + → Default: Qwen3-Coder-480B + → Shows model descriptions +6. Configuration auto-saves +``` + +#### 2. Model Selection and Management +``` +Workflow: +1. View available models in dropdown + → Shows model ID and description + → Indicates capabilities (vision, caching) +2. Click "Browse models" link + → Opens deepinfra.com/models in browser + → User can explore full catalog +3. Click "Refresh Models" if needed + → Fetches latest model list + → Shows refresh confirmation +4. Select different model + → Immediate effect on next conversation + → Preserves selection across sessions +``` + +#### 3. Troubleshooting Flow +``` +Error Recovery: +1. Invalid API Key + → Error message appears + → Models list shows as empty + → User corrects API key +2. Network Issues + → Timeout message shown + → Retry with "Refresh Models" + → Falls back to default model +3. Model Unavailable + → Automatically uses fallback model + → Shows warning to user + → Suggests refresh or different model +``` + +## Technical Details + +### Core Components + +#### 1. **DeepInfraHandler** (`src/api/providers/deepinfra.ts`) +- **Class Hierarchy**: Extends `RouterProvider` → `BaseProvider` +- **Interfaces**: Implements `SingleCompletionHandler` +- **Key Methods**: + - `createMessage()`: Handles streaming chat completions + - `completePrompt()`: Non-streaming completions + - `fetchModel()`: Retrieves available models + - `processUsageMetrics()`: Calculates costs and token usage + +#### 2. **Model Fetcher** (`src/api/providers/fetchers/deepinfra.ts`) +- **API Endpoint**: `/models` (OpenAI-compatible) +- **Response Parsing**: Zod schema validation +- **Metadata Extraction**: + ```typescript + { + contextWindow: number, // Default: 8192 + maxTokens: number, // Default: 20% of context + supportsImages: boolean, // From tags + supportsPromptCache: boolean, // From tags + inputPrice: number, // Per million tokens + outputPrice: number, // Per million tokens + cacheReadsPrice: number, // Discounted cache reads + } + ``` + +### API Integration + +#### Request Configuration +```typescript +{ + baseURL: "https://api.deepinfra.com/v1/openai", + headers: { + "Authorization": "Bearer {API_KEY}", + "X-Deepinfra-Source": "roo-code", + "X-Deepinfra-Version": "2025-08-25" + } +} +``` + +#### Streaming Response Handling +- Supports text chunks via `delta.content` +- Handles reasoning content via `delta.reasoning_content` +- Includes usage metrics in stream +- Processes cache read/write tokens + +### Configuration Options + +| Setting | Type | Default | Description | +|---------|------|---------|-------------| +| `deepInfraApiKey` | string | - | API authentication key | +| `deepInfraBaseUrl` | string | https://api.deepinfra.com/v1/openai | API endpoint | +| `deepInfraModelId` | string | Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo | Selected model | +| `modelTemperature` | number | 0 | Response randomness (0-2) | +| `includeMaxTokens` | boolean | true | Include max tokens in requests | +| `modelMaxTokens` | number | Model default | Maximum response length | + +### Advanced Features + +#### 1. **Prompt Caching** +- Enabled for models with `prompt_cache` tag +- Uses task ID as cache key +- Reduces costs for repeated contexts +- Automatic cache management + +#### 2. **Vision Support** +- Detected via model tags +- Enables image input for compatible models +- Seamless integration with Roo's image handling + +#### 3. **Reasoning Models** +- Special handling for reasoning content +- Separate token tracking for thinking +- Supports models like o1-preview variants + +#### 4. **Dynamic Model Discovery** +- Real-time model list fetching +- Automatic capability detection +- Pricing information extraction +- Fallback to defaults on failure + +## Non-Technical Information + +### Business Value +1. **Cost Efficiency** + - Competitive pricing vs. direct API access + - Prompt caching reduces repeated costs + - Pay-per-use model with no minimums + +2. **Model Variety** + - Access to latest open-source models + - Specialized coding models (Qwen Coder) + - Vision and multimodal capabilities + - Different size/speed tradeoffs + +3. **Performance Benefits** + - Low latency infrastructure + - High availability + - Automatic load balancing + - Global edge locations + +### Common Use Cases + +#### For Developers +- **Code Generation**: Qwen Coder models excel at programming tasks +- **Debugging**: Large context windows for entire codebases +- **Documentation**: Generate technical docs with code understanding +- **Refactoring**: Analyze and improve existing code + +#### For Teams +- **Shared Infrastructure**: Single API key for team +- **Model Experimentation**: Try different models easily +- **Cost Control**: Usage-based pricing, no subscriptions +- **Compliance**: Data processing transparency + +### User Benefits +1. **Ease of Use** + - Simple API key setup + - Automatic model discovery + - Sensible defaults + - No complex configuration + +2. **Flexibility** + - Switch models on-the-fly + - Custom base URLs for enterprise + - Temperature and token controls + - Organization-level restrictions + +3. **Reliability** + - Automatic fallbacks + - Error recovery + - Model availability checks + - Usage tracking + +## Integration Points + +### External Dependencies +- **DeepInfra API**: Primary service dependency +- **Model Catalog**: deepinfra.com/models for browsing +- **Authentication**: Bearer token via API key + +### Internal Integration +- **Provider Registry**: Registered as "deepinfra" provider +- **Model Cache**: 5-minute TTL for model lists +- **Cost Calculation**: OpenAI-style pricing model +- **Streaming**: Full streaming support with usage metrics +- **Context Management**: Supports Roo's context window handling + +### Data Flow +``` +User Input → Roo Code → DeepInfraHandler → DeepInfra API + ↓ ↓ + Token Counting Model Processing + ↓ ↓ + Cost Calculation Streaming Response + ↓ ↓ + UI Update ← Stream Processing ← API Response +``` + +## Security Considerations + +### API Key Management +- Stored securely in VSCode settings +- Never exposed in UI (password field) +- Transmitted only via HTTPS +- No key logging or debugging output + +### Data Privacy +- Direct API communication (no proxies) +- No request/response caching by default +- Optional prompt caching with explicit task IDs +- Headers identify Roo Code as source + +## Performance Characteristics + +### Response Times +- Initial connection: ~200-500ms +- First token: ~500-1000ms (model dependent) +- Streaming rate: 50-200 tokens/second +- Model list fetch: ~500ms + +### Resource Usage +- Minimal memory overhead +- No local model storage +- Efficient streaming processing +- Automatic connection pooling + +## Error Handling + +### Common Error Scenarios +1. **Invalid API Key** + - Clear error message to user + - Falls back to no models available + - Suggests checking API key + +2. **Network Timeout** + - Automatic retry with backoff + - User-friendly timeout message + - Manual refresh option + +3. **Model Unavailable** + - Automatic fallback to default + - Warning shown to user + - Model list refresh suggested + +4. **Rate Limiting** + - Respects rate limit headers + - Automatic request throttling + - User notification of limits + +## Documentation Recommendations + +### Critical Areas for User Documentation +1. **Getting Started Guide** + - How to obtain DeepInfra API key + - Step-by-step setup screenshots + - Model selection guidance + - First conversation example + +2. **Model Selection Guide** + - Comparison of available models + - Use case recommendations + - Performance vs. cost tradeoffs + - Context window considerations + +3. **Troubleshooting Section** + - Common error messages and fixes + - API key validation steps + - Network configuration tips + - Model availability checking + +### Developer Integration Guide +1. **API Configuration** + - Custom base URL setup + - Header customization + - Proxy configuration + - Enterprise deployment + +2. **Advanced Features** + - Prompt caching strategies + - Vision model usage + - Reasoning model handling + - Cost optimization tips + +## Summary for Documentation Team + +This extraction report provides comprehensive details about the DeepInfra provider integration in Roo Code. The implementation offers a seamless user experience with automatic model discovery, intelligent fallbacks, and comprehensive error handling. + +Key highlights for documentation: +- Simple one-time setup with just \ No newline at end of file From 8bc0150de1ac44e74e511c435a812d8cb2dda5dc Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 5 Sep 2025 05:36:57 +0000 Subject: [PATCH 3/5] docs: add DeepInfra provider documentation --- docs/providers/deepinfra.md | 90 +++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 docs/providers/deepinfra.md diff --git a/docs/providers/deepinfra.md b/docs/providers/deepinfra.md new file mode 100644 index 00000000..ed8963df --- /dev/null +++ b/docs/providers/deepinfra.md @@ -0,0 +1,90 @@ +--- +sidebar_label: DeepInfra +description: Configure DeepInfra's high-performance AI models in Roo Code. Access Qwen Coder, Llama, and other open-source models with prompt caching and vision capabilities. +keywords: + - deepinfra + - deep infra + - roo code + - api provider + - qwen coder + - llama models + - prompt caching + - vision models + - open source ai +image: /img/social-share.jpg +--- + +# Using DeepInfra With Roo Code + +DeepInfra provides cost-effective access to high-performance open-source models with features like prompt caching, vision support, and specialized coding models. Their infrastructure offers low latency and automatic load balancing across global edge locations. + +**Website:** [https://deepinfra.com/](https://deepinfra.com/) + +--- + +## Getting an API Key + +1. **Sign Up/Sign In:** Go to [DeepInfra](https://deepinfra.com/). Create an account or sign in. +2. **Navigate to API Keys:** Access the API keys section in your dashboard. +3. **Create a Key:** Generate a new API key. Give it a descriptive name (e.g., "Roo Code"). +4. **Copy the Key:** **Important:** Copy the API key immediately. Store it securely. + +--- + +## Supported Models + +Roo Code dynamically fetches available models from DeepInfra's API. The default model is: + +* `Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo` (256K context, optimized for coding) + +Common models available include: + +* **Coding Models:** Qwen Coder series, specialized for programming tasks +* **General Models:** Llama 3.1, Mixtral, and other open-source models +* **Vision Models:** Models with image understanding capabilities +* **Reasoning Models:** Models with advanced reasoning support + +Browse the full catalog at [deepinfra.com/models](https://deepinfra.com/models). + +--- + +## Configuration in Roo Code + +1. **Open Roo Code Settings:** Click the gear icon () in the Roo Code panel. +2. **Select Provider:** Choose "DeepInfra" from the "API Provider" dropdown. +3. **Enter API Key:** Paste your DeepInfra API key into the "DeepInfra API Key" field. +4. **Select Model:** Choose your desired model from the "Model" dropdown. + - Models will auto-populate after entering a valid API key + - Click "Refresh Models" to update the list + +--- + +## Advanced Features + +### Prompt Caching + +DeepInfra supports prompt caching for eligible models, which: +- Reduces costs for repeated contexts +- Improves response times for similar queries +- Automatically manages cache based on task IDs + +### Vision Support + +Models with vision capabilities can: +- Process images alongside text +- Understand visual content for coding tasks +- Analyze screenshots and diagrams + +### Custom Base URL + +For enterprise deployments, you can configure a custom base URL in the advanced settings. + +--- + +## Tips and Notes + +* **Performance:** DeepInfra offers low latency with automatic load balancing across global locations. +* **Cost Efficiency:** Competitive pricing with prompt caching to reduce costs for repeated contexts. +* **Model Variety:** Access to the latest open-source models including specialized coding models. +* **Context Windows:** Models support context windows up to 256K tokens for large codebases. +* **Pricing:** Pay-per-use model with no minimums. Check [deepinfra.com](https://deepinfra.com/) for current pricing. \ No newline at end of file From a50b73ef4243b33a9f3c8a12a7e9390af05f71a3 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 5 Sep 2025 05:37:30 +0000 Subject: [PATCH 4/5] chore: remove temporary extraction notes --- deepinfra-provider-extraction.md | 365 ------------------------------- 1 file changed, 365 deletions(-) delete mode 100644 deepinfra-provider-extraction.md diff --git a/deepinfra-provider-extraction.md b/deepinfra-provider-extraction.md deleted file mode 100644 index 091c41f2..00000000 --- a/deepinfra-provider-extraction.md +++ /dev/null @@ -1,365 +0,0 @@ - -# DeepInfra Provider - Feature Extraction Report - -## Executive Summary -DeepInfra is a model provider integration in Roo Code that offers access to various AI models through DeepInfra's API infrastructure. It provides a cost-effective way to access high-performance models including Qwen, Llama, and other open-source models with features like prompt caching, vision support, and reasoning capabilities. - -## UI/UX Analysis - -### User Interface Components - -#### 1. Provider Selection (`webview-ui/src/components/settings/constants.ts`) -**Visual Layout:** -- DeepInfra appears in the provider dropdown list -- Position: Between OpenRouter and Anthropic in the provider list -- Label: "DeepInfra" (user-friendly name) -- Value: "deepinfra" (internal identifier) - -#### 2. Settings Panel (`webview-ui/src/components/settings/providers/DeepInfra.tsx`) -**Visual Elements:** -- **API Key Input Field** - - Type: Password field (masked input) - - Placeholder: Localized "API Key" placeholder text - - Label: "API Key" (font-medium, mb-1 spacing) - - Full width text field using VSCode's webview UI toolkit - - Real-time input handling with onChange events - -- **Refresh Models Button** - - Visual: Outline variant button with icon - - Icon: Codicon refresh icon (spinning animation) - - Text: "Refresh Models" (localized) - - Feedback: Shows hint text after refresh - - Error state: Red text color for error messages - -- **Model Picker Component** - - Dropdown selector for available models - - Default selection: Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo - - External link: "Browse models at deepinfra.com/models" - - Error display: Shows validation errors in red - - Organization restrictions: Respects allow lists - -**User Feedback:** -- Loading states during model fetching -- Success confirmation after refresh -- Error messages for invalid API keys -- Hint text: "Models refreshed. Check the model dropdown." - -### User Experience Elements - -#### Visual Patterns -**Consistent VSCode Integration:** -- Uses VSCode's native color variables -- Follows VSCode's dark/light theme automatically -- Consistent spacing and typography with other providers -- Standard form field styling - -**Interactive Behaviors:** -- Auto-save on field changes (debounced) -- Silent model refresh on API key/URL changes -- Immediate visual feedback on interactions -- Keyboard accessible (tab navigation) - -### User Workflows - -#### 1. Initial Setup -``` -User Journey: -1. Open Settings → Navigate to API Provider section -2. Select "DeepInfra" from provider dropdown -3. Enter API Key (obtained from deepinfra.com) - → Field masks input for security - → Auto-validates format -4. Models auto-populate after valid key entry -5. Select desired model from dropdown - → Default: Qwen3-Coder-480B - → Shows model descriptions -6. Configuration auto-saves -``` - -#### 2. Model Selection and Management -``` -Workflow: -1. View available models in dropdown - → Shows model ID and description - → Indicates capabilities (vision, caching) -2. Click "Browse models" link - → Opens deepinfra.com/models in browser - → User can explore full catalog -3. Click "Refresh Models" if needed - → Fetches latest model list - → Shows refresh confirmation -4. Select different model - → Immediate effect on next conversation - → Preserves selection across sessions -``` - -#### 3. Troubleshooting Flow -``` -Error Recovery: -1. Invalid API Key - → Error message appears - → Models list shows as empty - → User corrects API key -2. Network Issues - → Timeout message shown - → Retry with "Refresh Models" - → Falls back to default model -3. Model Unavailable - → Automatically uses fallback model - → Shows warning to user - → Suggests refresh or different model -``` - -## Technical Details - -### Core Components - -#### 1. **DeepInfraHandler** (`src/api/providers/deepinfra.ts`) -- **Class Hierarchy**: Extends `RouterProvider` → `BaseProvider` -- **Interfaces**: Implements `SingleCompletionHandler` -- **Key Methods**: - - `createMessage()`: Handles streaming chat completions - - `completePrompt()`: Non-streaming completions - - `fetchModel()`: Retrieves available models - - `processUsageMetrics()`: Calculates costs and token usage - -#### 2. **Model Fetcher** (`src/api/providers/fetchers/deepinfra.ts`) -- **API Endpoint**: `/models` (OpenAI-compatible) -- **Response Parsing**: Zod schema validation -- **Metadata Extraction**: - ```typescript - { - contextWindow: number, // Default: 8192 - maxTokens: number, // Default: 20% of context - supportsImages: boolean, // From tags - supportsPromptCache: boolean, // From tags - inputPrice: number, // Per million tokens - outputPrice: number, // Per million tokens - cacheReadsPrice: number, // Discounted cache reads - } - ``` - -### API Integration - -#### Request Configuration -```typescript -{ - baseURL: "https://api.deepinfra.com/v1/openai", - headers: { - "Authorization": "Bearer {API_KEY}", - "X-Deepinfra-Source": "roo-code", - "X-Deepinfra-Version": "2025-08-25" - } -} -``` - -#### Streaming Response Handling -- Supports text chunks via `delta.content` -- Handles reasoning content via `delta.reasoning_content` -- Includes usage metrics in stream -- Processes cache read/write tokens - -### Configuration Options - -| Setting | Type | Default | Description | -|---------|------|---------|-------------| -| `deepInfraApiKey` | string | - | API authentication key | -| `deepInfraBaseUrl` | string | https://api.deepinfra.com/v1/openai | API endpoint | -| `deepInfraModelId` | string | Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo | Selected model | -| `modelTemperature` | number | 0 | Response randomness (0-2) | -| `includeMaxTokens` | boolean | true | Include max tokens in requests | -| `modelMaxTokens` | number | Model default | Maximum response length | - -### Advanced Features - -#### 1. **Prompt Caching** -- Enabled for models with `prompt_cache` tag -- Uses task ID as cache key -- Reduces costs for repeated contexts -- Automatic cache management - -#### 2. **Vision Support** -- Detected via model tags -- Enables image input for compatible models -- Seamless integration with Roo's image handling - -#### 3. **Reasoning Models** -- Special handling for reasoning content -- Separate token tracking for thinking -- Supports models like o1-preview variants - -#### 4. **Dynamic Model Discovery** -- Real-time model list fetching -- Automatic capability detection -- Pricing information extraction -- Fallback to defaults on failure - -## Non-Technical Information - -### Business Value -1. **Cost Efficiency** - - Competitive pricing vs. direct API access - - Prompt caching reduces repeated costs - - Pay-per-use model with no minimums - -2. **Model Variety** - - Access to latest open-source models - - Specialized coding models (Qwen Coder) - - Vision and multimodal capabilities - - Different size/speed tradeoffs - -3. **Performance Benefits** - - Low latency infrastructure - - High availability - - Automatic load balancing - - Global edge locations - -### Common Use Cases - -#### For Developers -- **Code Generation**: Qwen Coder models excel at programming tasks -- **Debugging**: Large context windows for entire codebases -- **Documentation**: Generate technical docs with code understanding -- **Refactoring**: Analyze and improve existing code - -#### For Teams -- **Shared Infrastructure**: Single API key for team -- **Model Experimentation**: Try different models easily -- **Cost Control**: Usage-based pricing, no subscriptions -- **Compliance**: Data processing transparency - -### User Benefits -1. **Ease of Use** - - Simple API key setup - - Automatic model discovery - - Sensible defaults - - No complex configuration - -2. **Flexibility** - - Switch models on-the-fly - - Custom base URLs for enterprise - - Temperature and token controls - - Organization-level restrictions - -3. **Reliability** - - Automatic fallbacks - - Error recovery - - Model availability checks - - Usage tracking - -## Integration Points - -### External Dependencies -- **DeepInfra API**: Primary service dependency -- **Model Catalog**: deepinfra.com/models for browsing -- **Authentication**: Bearer token via API key - -### Internal Integration -- **Provider Registry**: Registered as "deepinfra" provider -- **Model Cache**: 5-minute TTL for model lists -- **Cost Calculation**: OpenAI-style pricing model -- **Streaming**: Full streaming support with usage metrics -- **Context Management**: Supports Roo's context window handling - -### Data Flow -``` -User Input → Roo Code → DeepInfraHandler → DeepInfra API - ↓ ↓ - Token Counting Model Processing - ↓ ↓ - Cost Calculation Streaming Response - ↓ ↓ - UI Update ← Stream Processing ← API Response -``` - -## Security Considerations - -### API Key Management -- Stored securely in VSCode settings -- Never exposed in UI (password field) -- Transmitted only via HTTPS -- No key logging or debugging output - -### Data Privacy -- Direct API communication (no proxies) -- No request/response caching by default -- Optional prompt caching with explicit task IDs -- Headers identify Roo Code as source - -## Performance Characteristics - -### Response Times -- Initial connection: ~200-500ms -- First token: ~500-1000ms (model dependent) -- Streaming rate: 50-200 tokens/second -- Model list fetch: ~500ms - -### Resource Usage -- Minimal memory overhead -- No local model storage -- Efficient streaming processing -- Automatic connection pooling - -## Error Handling - -### Common Error Scenarios -1. **Invalid API Key** - - Clear error message to user - - Falls back to no models available - - Suggests checking API key - -2. **Network Timeout** - - Automatic retry with backoff - - User-friendly timeout message - - Manual refresh option - -3. **Model Unavailable** - - Automatic fallback to default - - Warning shown to user - - Model list refresh suggested - -4. **Rate Limiting** - - Respects rate limit headers - - Automatic request throttling - - User notification of limits - -## Documentation Recommendations - -### Critical Areas for User Documentation -1. **Getting Started Guide** - - How to obtain DeepInfra API key - - Step-by-step setup screenshots - - Model selection guidance - - First conversation example - -2. **Model Selection Guide** - - Comparison of available models - - Use case recommendations - - Performance vs. cost tradeoffs - - Context window considerations - -3. **Troubleshooting Section** - - Common error messages and fixes - - API key validation steps - - Network configuration tips - - Model availability checking - -### Developer Integration Guide -1. **API Configuration** - - Custom base URL setup - - Header customization - - Proxy configuration - - Enterprise deployment - -2. **Advanced Features** - - Prompt caching strategies - - Vision model usage - - Reasoning model handling - - Cost optimization tips - -## Summary for Documentation Team - -This extraction report provides comprehensive details about the DeepInfra provider integration in Roo Code. The implementation offers a seamless user experience with automatic model discovery, intelligent fallbacks, and comprehensive error handling. - -Key highlights for documentation: -- Simple one-time setup with just \ No newline at end of file From 9726cb009a121e3b027192d18e32dd11da6877d0 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 5 Sep 2025 05:38:53 +0000 Subject: [PATCH 5/5] docs: add DeepInfra to providers sidebar menu --- sidebars.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/sidebars.ts b/sidebars.ts index 5b112eae..e1a045da 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -164,6 +164,7 @@ const sidebars: SidebarsConfig = { 'providers/claude-code', 'providers/bedrock', 'providers/cerebras', + 'providers/deepinfra', 'providers/deepseek', 'providers/doubao', 'providers/featherless',