From a45ac176f8cbb3148ad2b417ff6ff66391596161 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Thu, 4 Sep 2025 23:20:58 -0600
Subject: [PATCH 1/5] Add release notes for v3.26.7

- Added Kimi K2-0905 models with 256K context window
- Added OpenAI service tiers (Standard/Flex/Priority)
- Added DeepInfra provider with 100+ models
- Fixed multiple bugs including MCP validation, zsh command safety
- Updated combined v3.26 notes with new features
---
 docs/update-notes/index.md    |  1 +
 docs/update-notes/v3.26.7.mdx | 63 +++++++++++++++++++++++++++++++++++
 docs/update-notes/v3.26.mdx   | 31 +++++++++++++++++
 sidebars.ts                   |  1 +
 4 files changed, 96 insertions(+)
 create mode 100644 docs/update-notes/v3.26.7.mdx

diff --git a/docs/update-notes/index.md b/docs/update-notes/index.md
index fbbe1427..1ceb66d4 100644
--- a/docs/update-notes/index.md
+++ b/docs/update-notes/index.md
@@ -19,6 +19,7 @@ image: /img/social-share.jpg
 
 ### Version 3.26
 
+*   [3.26.7](/update-notes/v3.26.7) (2025-09-05)
 *   [3.26.6](/update-notes/v3.26.6) (2025-09-03)
 *   [3.26.5](/update-notes/v3.26.5) (2025-09-03)
 *   [3.26.4](/update-notes/v3.26.4) (2025-09-01)
diff --git a/docs/update-notes/v3.26.7.mdx b/docs/update-notes/v3.26.7.mdx
new file mode 100644
index 00000000..16d1dbb4
--- /dev/null
+++ b/docs/update-notes/v3.26.7.mdx
@@ -0,0 +1,63 @@
+---
+description: Enhanced Kimi K2 models with 256K+ context windows, OpenAI service tiers for flexible pricing, and DeepInfra as a new provider with 100+ models.
+keywords:
+  - roo code 3.26.7
+  - kimi k2 models
+  - openai service tiers
+  - deepinfra provider
+  - bug fixes
+image: /img/social-share.jpg
+---
+
+# Roo Code 3.26.7 Release Notes (2025-09-05)
+
+This release brings enhanced Kimi K2 models with massive context windows, OpenAI service tier selection, and DeepInfra as a new provider offering 100+ models.
+
+## Kimi K2-0905: Moonshot's Latest Open Source Model is Live in Roo Code
+
+We've upgraded to the latest Kimi K2-0905 models across multiple providers (thanks CellenLee!) ([#7663](https://github.com/RooCodeInc/Roo-Code/pull/7663), [#7693](https://github.com/RooCodeInc/Roo-Code/pull/7693)):
+
+K2-0905 comes with three major upgrades:
+- **256K Context Window**: Massive context supporting up to 256K-262K tokens, doubling the previous limit for processing much larger documents and conversations
+- **Improved Tool Calling**: Enhanced function calling and tool use capabilities for better agentic workflows
+- **Enhanced Front-end Development**: Superior HTML, CSS, and JavaScript generation with modern framework support
+
+Available through Groq, Moonshot, and Fireworks providers. These models excel at handling large codebases, long conversations, and complex multi-file operations.
+
+## OpenAI Service Tiers
+
+We've added support for OpenAI's new Responses API service tiers ([#7646](https://github.com/RooCodeInc/Roo-Code/pull/7646)):
+
+- **Standard Tier**: Default tier with regular pricing
+- **Flex Tier**: 50% discount with slightly longer response times for non-urgent tasks
+- **Priority Tier**: Faster response times for time-critical operations
+
+Select your preferred tier directly in the UI based on your needs and budget. This gives you more control over costs while maintaining access to OpenAI's powerful models.
+
+> **📚 Documentation**: See [OpenAI Provider Guide](/providers/openai) for detailed tier comparison and pricing.
+
+## DeepInfra Provider
+
+DeepInfra is now available as a model provider (thanks Thachnh!) ([#7677](https://github.com/RooCodeInc/Roo-Code/pull/7677)):
+
+- **100+ Models**: Access to a vast selection of open-source and frontier models
+- **Competitive Pricing**: Very cost-effective rates compared to other providers
+- **Automatic Prompt Caching**: Built-in prompt caching for supported models like Qwen3 Coder
+- **Fast Inference**: Optimized infrastructure for quick response times
+
+DeepInfra is an excellent choice for developers looking for variety and value in their AI model selection.
+
+> **📚 Documentation**: See [DeepInfra Provider Setup](/providers/deepinfra) to get started.
+
+## QOL Improvements
+
+* **Shell Security**: Added shell executable allowlist validation with platform-specific fallbacks for improved command execution safety ([#7681](https://github.com/RooCodeInc/Roo-Code/pull/7681))
+
+## Bug Fixes
+
+* **MCP Tool Validation**: Roo now validates MCP tool existence before execution and shows helpful error messages with available tools (thanks R-omk!) ([#7632](https://github.com/RooCodeInc/Roo-Code/pull/7632))
+* **OpenAI API Key Errors**: Clear error messages now display when API keys contain invalid characters instead of cryptic ByteString errors (thanks A0nameless0man!) ([#7586](https://github.com/RooCodeInc/Roo-Code/pull/7586))
+* **Follow-up Questions**: Fixed countdown timer incorrectly reappearing in task history for already answered follow-up questions (thanks XuyiK!) ([#7686](https://github.com/RooCodeInc/Roo-Code/pull/7686))
+* **Moonshot Token Limit**: Resolved issue where Moonshot models were incorrectly limited to 1024 tokens, now properly respects configured limits (thanks wangxiaolong100, greyishsong!) ([#7673](https://github.com/RooCodeInc/Roo-Code/pull/7673))
+* **Zsh Command Safety**: Improved handling of zsh process substitution and glob qualifiers to prevent auto-execution of potentially dangerous commands ([#7658](https://github.com/RooCodeInc/Roo-Code/pull/7658), [#7667](https://github.com/RooCodeInc/Roo-Code/pull/7667))
+* **Traditional Chinese Localization**: Fixed typo in zh-TW locale text (thanks PeterDaveHello!) ([#7672](https://github.com/RooCodeInc/Roo-Code/pull/7672))
\ No newline at end of file
diff --git a/docs/update-notes/v3.26.mdx b/docs/update-notes/v3.26.mdx
index cb0d4f0a..e190aee3 100644
--- a/docs/update-notes/v3.26.mdx
+++ b/docs/update-notes/v3.26.mdx
@@ -94,8 +94,32 @@ PRs: [#7474](https://github.com/RooCodeInc/Roo-Code/pull/7474), [#7492](https://
 
 > **📚 Documentation**: See [Image Generation - Editing Existing Images](/features/image-generation#editing-existing-images) for transformation examples.
 
+### Kimi K2-0905: Moonshot's Latest Open Source Model is Live in Roo Code
+
+We've upgraded to the latest Kimi K2-0905 models across multiple providers (thanks CellenLee!) ([#7663](https://github.com/RooCodeInc/Roo-Code/pull/7663), [#7693](https://github.com/RooCodeInc/Roo-Code/pull/7693)):
+
+K2-0905 comes with three major upgrades:
+- **256K Context Window**: Massive context supporting up to 256K-262K tokens, doubling the previous limit for processing much larger documents and conversations
+- **Improved Tool Calling**: Enhanced function calling and tool use capabilities for better agentic workflows
+- **Enhanced Front-end Development**: Superior HTML, CSS, and JavaScript generation with modern framework support
+
+Available through Groq, Moonshot, and Fireworks providers. These models excel at handling large codebases, long conversations, and complex multi-file operations.
+
+### OpenAI Service Tiers
+
+We've added support for OpenAI's new Responses API service tiers ([#7646](https://github.com/RooCodeInc/Roo-Code/pull/7646)):
+
+- **Standard Tier**: Default tier with regular pricing
+- **Flex Tier**: 50% discount with slightly longer response times for non-urgent tasks
+- **Priority Tier**: Faster response times for time-critical operations
+
+Select your preferred tier directly in the UI based on your needs and budget. This gives you more control over costs while maintaining access to OpenAI's powerful models.
+
+> **📚 Documentation**: See [OpenAI Provider Guide](/providers/openai) for detailed tier comparison and pricing.
+
 ### Provider Updates
 
+* **DeepInfra Provider**: DeepInfra is now available as a model provider with 100+ open-source and frontier models, competitive pricing, and automatic prompt caching for supported models like Qwen3 Coder (thanks Thachnh!) ([#7677](https://github.com/RooCodeInc/Roo-Code/pull/7677))
 * **Kimi K2 Turbo Model**: Added support for the high-speed Kimi K2 Turbo model with 60-100 tokens/sec processing and a 131K token context window (thanks wangxiaolong100!) ([#7593](https://github.com/RooCodeInc/Roo-Code/pull/7593))
 * **Qwen3 235B Thinking Model**: Added support for Qwen3-235B-A22B-Thinking-2507 model with an impressive 262K context window, enabling processing of extremely long documents and large codebases in a single request through the Chutes provider (thanks mohammad154, apple-techie!) ([#7578](https://github.com/RooCodeInc/Roo-Code/pull/7578))
 * **Ollama Turbo Mode**: Added API key support for Turbo mode, enabling faster model execution with datacenter-grade hardware (thanks LivioGama!) ([#7425](https://github.com/RooCodeInc/Roo-Code/pull/7425))
@@ -104,6 +128,7 @@ PRs: [#7474](https://github.com/RooCodeInc/Roo-Code/pull/7474), [#7492](https://
 
 ### QOL Improvements
 
+* **Shell Security**: Added shell executable allowlist validation with platform-specific fallbacks for improved command execution safety ([#7681](https://github.com/RooCodeInc/Roo-Code/pull/7681))
 * **Settings Scroll Position**: Settings tabs now remember their individual scroll positions when switching between them (thanks DC-Dancao!) ([#7587](https://github.com/RooCodeInc/Roo-Code/pull/7587))
 * **MCP Resource Auto-Approval**: MCP resource access requests are now automatically approved when auto-approve is enabled, eliminating manual approval steps and enabling smoother automation workflows (thanks m-ibm!) ([#7606](https://github.com/RooCodeInc/Roo-Code/pull/7606))
 * **Message Queue Performance**: Improved message queueing reliability and performance by moving the queue management to the extension host, making the interface more stable ([#7604](https://github.com/RooCodeInc/Roo-Code/pull/7604))
@@ -122,6 +147,12 @@ PRs: [#7474](https://github.com/RooCodeInc/Roo-Code/pull/7474), [#7492](https://
 
 ### Bug Fixes
 
+* **MCP Tool Validation**: Roo now validates MCP tool existence before execution and shows helpful error messages with available tools (thanks R-omk!) ([#7632](https://github.com/RooCodeInc/Roo-Code/pull/7632))
+* **OpenAI API Key Errors**: Clear error messages now display when API keys contain invalid characters instead of cryptic ByteString errors (thanks A0nameless0man!) ([#7586](https://github.com/RooCodeInc/Roo-Code/pull/7586))
+* **Follow-up Questions**: Fixed countdown timer incorrectly reappearing in task history for already answered follow-up questions (thanks XuyiK!) ([#7686](https://github.com/RooCodeInc/Roo-Code/pull/7686))
+* **Moonshot Token Limit**: Resolved issue where Moonshot models were incorrectly limited to 1024 tokens, now properly respects configured limits (thanks wangxiaolong100, greyishsong!) ([#7673](https://github.com/RooCodeInc/Roo-Code/pull/7673))
+* **Zsh Command Safety**: Improved handling of zsh process substitution and glob qualifiers to prevent auto-execution of potentially dangerous commands ([#7658](https://github.com/RooCodeInc/Roo-Code/pull/7658), [#7667](https://github.com/RooCodeInc/Roo-Code/pull/7667))
+* **Traditional Chinese Localization**: Fixed typo in zh-TW locale text (thanks PeterDaveHello!) ([#7672](https://github.com/RooCodeInc/Roo-Code/pull/7672))
 * **Tool Approval Fix**: Fixed an error that occurred when using insert_content and search_and_replace tools on write-protected files - these tools now handle file protection correctly ([#7649](https://github.com/RooCodeInc/Roo-Code/pull/7649))
 * **Configurable Embedding Batch Size**: Fixed an issue where users with API providers having stricter batch limits couldn't use code indexing. You can now configure the embedding batch size (1-2048, default: 400) to match your provider's limits (thanks BenLampson!) ([#7464](https://github.com/RooCodeInc/Roo-Code/pull/7464))
 * **OpenAI-Native Cache Reporting**: Fixed cache usage statistics and cost calculations when using the OpenAI-Native provider with cached content ([#7602](https://github.com/RooCodeInc/Roo-Code/pull/7602))
diff --git a/sidebars.ts b/sidebars.ts
index 51a6853f..5b112eae 100644
--- a/sidebars.ts
+++ b/sidebars.ts
@@ -221,6 +221,7 @@ const sidebars: SidebarsConfig = {
           label: '3.26',
           items: [
             { type: 'doc', id: 'update-notes/v3.26', label: '3.26 Combined' },
+            { type: 'doc', id: 'update-notes/v3.26.7', label: '3.26.7' },
             { type: 'doc', id: 'update-notes/v3.26.6', label: '3.26.6' },
             { type: 'doc', id: 'update-notes/v3.26.5', label: '3.26.5' },
             { type: 'doc', id: 'update-notes/v3.26.4', label: '3.26.4' },

From 139eebf399308aae3950515b148598a33434c7ad Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Fri, 5 Sep 2025 05:34:40 +0000
Subject: [PATCH 2/5] docs: add DeepInfra provider extraction report

---
 deepinfra-provider-extraction.md | 365 +++++++++++++++++++++++++++++++
 1 file changed, 365 insertions(+)
 create mode 100644 deepinfra-provider-extraction.md

diff --git a/deepinfra-provider-extraction.md b/deepinfra-provider-extraction.md
new file mode 100644
index 00000000..091c41f2
--- /dev/null
+++ b/deepinfra-provider-extraction.md
@@ -0,0 +1,365 @@
+
+# DeepInfra Provider - Feature Extraction Report
+
+## Executive Summary
+DeepInfra is a model provider integration in Roo Code that offers access to various AI models through DeepInfra's API infrastructure. It provides a cost-effective way to access high-performance models including Qwen, Llama, and other open-source models with features like prompt caching, vision support, and reasoning capabilities.
+
+## UI/UX Analysis
+
+### User Interface Components
+
+#### 1. Provider Selection (`webview-ui/src/components/settings/constants.ts`)
+**Visual Layout:**
+- DeepInfra appears in the provider dropdown list
+- Position: Between OpenRouter and Anthropic in the provider list
+- Label: "DeepInfra" (user-friendly name)
+- Value: "deepinfra" (internal identifier)
+
+#### 2. Settings Panel (`webview-ui/src/components/settings/providers/DeepInfra.tsx`)
+**Visual Elements:**
+- **API Key Input Field**
+  - Type: Password field (masked input)
+  - Placeholder: Localized "API Key" placeholder text
+  - Label: "API Key" (font-medium, mb-1 spacing)
+  - Full width text field using VSCode's webview UI toolkit
+  - Real-time input handling with onChange events
+
+- **Refresh Models Button**
+  - Visual: Outline variant button with icon
+  - Icon: Codicon refresh icon (spinning animation)
+  - Text: "Refresh Models" (localized)
+  - Feedback: Shows hint text after refresh
+  - Error state: Red text color for error messages
+
+- **Model Picker Component**
+  - Dropdown selector for available models
+  - Default selection: Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo
+  - External link: "Browse models at deepinfra.com/models"
+  - Error display: Shows validation errors in red
+  - Organization restrictions: Respects allow lists
+
+**User Feedback:**
+- Loading states during model fetching
+- Success confirmation after refresh
+- Error messages for invalid API keys
+- Hint text: "Models refreshed. Check the model dropdown."
+
+### User Experience Elements
+
+#### Visual Patterns
+**Consistent VSCode Integration:**
+- Uses VSCode's native color variables
+- Follows VSCode's dark/light theme automatically
+- Consistent spacing and typography with other providers
+- Standard form field styling
+
+**Interactive Behaviors:**
+- Auto-save on field changes (debounced)
+- Silent model refresh on API key/URL changes
+- Immediate visual feedback on interactions
+- Keyboard accessible (tab navigation)
+
+### User Workflows
+
+#### 1. Initial Setup
+```
+User Journey:
+1. Open Settings → Navigate to API Provider section
+2. Select "DeepInfra" from provider dropdown
+3. Enter API Key (obtained from deepinfra.com)
+   → Field masks input for security
+   → Auto-validates format
+4. Models auto-populate after valid key entry
+5. Select desired model from dropdown
+   → Default: Qwen3-Coder-480B
+   → Shows model descriptions
+6. Configuration auto-saves
+```
+
+#### 2. Model Selection and Management
+```
+Workflow:
+1. View available models in dropdown
+   → Shows model ID and description
+   → Indicates capabilities (vision, caching)
+2. Click "Browse models" link
+   → Opens deepinfra.com/models in browser
+   → User can explore full catalog
+3. Click "Refresh Models" if needed
+   → Fetches latest model list
+   → Shows refresh confirmation
+4. Select different model
+   → Immediate effect on next conversation
+   → Preserves selection across sessions
+```
+
+#### 3. Troubleshooting Flow
+```
+Error Recovery:
+1. Invalid API Key
+   → Error message appears
+   → Models list shows as empty
+   → User corrects API key
+2. Network Issues
+   → Timeout message shown
+   → Retry with "Refresh Models"
+   → Falls back to default model
+3. Model Unavailable
+   → Automatically uses fallback model
+   → Shows warning to user
+   → Suggests refresh or different model
+```
+
+## Technical Details
+
+### Core Components
+
+#### 1. **DeepInfraHandler** (`src/api/providers/deepinfra.ts`)
+- **Class Hierarchy**: Extends `RouterProvider` → `BaseProvider`
+- **Interfaces**: Implements `SingleCompletionHandler`
+- **Key Methods**:
+  - `createMessage()`: Handles streaming chat completions
+  - `completePrompt()`: Non-streaming completions
+  - `fetchModel()`: Retrieves available models
+  - `processUsageMetrics()`: Calculates costs and token usage
+
+#### 2. **Model Fetcher** (`src/api/providers/fetchers/deepinfra.ts`)
+- **API Endpoint**: `/models` (OpenAI-compatible)
+- **Response Parsing**: Zod schema validation
+- **Metadata Extraction**:
+  ```typescript
+  {
+    contextWindow: number,    // Default: 8192
+    maxTokens: number,        // Default: 20% of context
+    supportsImages: boolean,  // From tags
+    supportsPromptCache: boolean, // From tags
+    inputPrice: number,       // Per million tokens
+    outputPrice: number,      // Per million tokens
+    cacheReadsPrice: number,  // Discounted cache reads
+  }
+  ```
+
+### API Integration
+
+#### Request Configuration
+```typescript
+{
+  baseURL: "https://api.deepinfra.com/v1/openai",
+  headers: {
+    "Authorization": "Bearer {API_KEY}",
+    "X-Deepinfra-Source": "roo-code",
+    "X-Deepinfra-Version": "2025-08-25"
+  }
+}
+```
+
+#### Streaming Response Handling
+- Supports text chunks via `delta.content`
+- Handles reasoning content via `delta.reasoning_content`
+- Includes usage metrics in stream
+- Processes cache read/write tokens
+
+### Configuration Options
+
+| Setting | Type | Default | Description |
+|---------|------|---------|-------------|
+| `deepInfraApiKey` | string | - | API authentication key |
+| `deepInfraBaseUrl` | string | https://api.deepinfra.com/v1/openai | API endpoint |
+| `deepInfraModelId` | string | Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo | Selected model |
+| `modelTemperature` | number | 0 | Response randomness (0-2) |
+| `includeMaxTokens` | boolean | true | Include max tokens in requests |
+| `modelMaxTokens` | number | Model default | Maximum response length |
+
+### Advanced Features
+
+#### 1. **Prompt Caching**
+- Enabled for models with `prompt_cache` tag
+- Uses task ID as cache key
+- Reduces costs for repeated contexts
+- Automatic cache management
+
+#### 2. **Vision Support**
+- Detected via model tags
+- Enables image input for compatible models
+- Seamless integration with Roo's image handling
+
+#### 3. **Reasoning Models**
+- Special handling for reasoning content
+- Separate token tracking for thinking
+- Supports models like o1-preview variants
+
+#### 4. **Dynamic Model Discovery**
+- Real-time model list fetching
+- Automatic capability detection
+- Pricing information extraction
+- Fallback to defaults on failure
+
+## Non-Technical Information
+
+### Business Value
+1. **Cost Efficiency**
+   - Competitive pricing vs. direct API access
+   - Prompt caching reduces repeated costs
+   - Pay-per-use model with no minimums
+
+2. **Model Variety**
+   - Access to latest open-source models
+   - Specialized coding models (Qwen Coder)
+   - Vision and multimodal capabilities
+   - Different size/speed tradeoffs
+
+3. **Performance Benefits**
+   - Low latency infrastructure
+   - High availability
+   - Automatic load balancing
+   - Global edge locations
+
+### Common Use Cases
+
+#### For Developers
+- **Code Generation**: Qwen Coder models excel at programming tasks
+- **Debugging**: Large context windows for entire codebases
+- **Documentation**: Generate technical docs with code understanding
+- **Refactoring**: Analyze and improve existing code
+
+#### For Teams
+- **Shared Infrastructure**: Single API key for team
+- **Model Experimentation**: Try different models easily
+- **Cost Control**: Usage-based pricing, no subscriptions
+- **Compliance**: Data processing transparency
+
+### User Benefits
+1. **Ease of Use**
+   - Simple API key setup
+   - Automatic model discovery
+   - Sensible defaults
+   - No complex configuration
+
+2. **Flexibility**
+   - Switch models on-the-fly
+   - Custom base URLs for enterprise
+   - Temperature and token controls
+   - Organization-level restrictions
+
+3. **Reliability**
+   - Automatic fallbacks
+   - Error recovery
+   - Model availability checks
+   - Usage tracking
+
+## Integration Points
+
+### External Dependencies
+- **DeepInfra API**: Primary service dependency
+- **Model Catalog**: deepinfra.com/models for browsing
+- **Authentication**: Bearer token via API key
+
+### Internal Integration
+- **Provider Registry**: Registered as "deepinfra" provider
+- **Model Cache**: 5-minute TTL for model lists
+- **Cost Calculation**: OpenAI-style pricing model
+- **Streaming**: Full streaming support with usage metrics
+- **Context Management**: Supports Roo's context window handling
+
+### Data Flow
+```
+User Input → Roo Code → DeepInfraHandler → DeepInfra API
+                ↓                              ↓
+          Token Counting                  Model Processing
+                ↓                              ↓
+          Cost Calculation              Streaming Response
+                ↓                              ↓
+            UI Update ← Stream Processing ← API Response
+```
+
+## Security Considerations
+
+### API Key Management
+- Stored securely in VSCode settings
+- Never exposed in UI (password field)
+- Transmitted only via HTTPS
+- No key logging or debugging output
+
+### Data Privacy
+- Direct API communication (no proxies)
+- No request/response caching by default
+- Optional prompt caching with explicit task IDs
+- Headers identify Roo Code as source
+
+## Performance Characteristics
+
+### Response Times
+- Initial connection: ~200-500ms
+- First token: ~500-1000ms (model dependent)
+- Streaming rate: 50-200 tokens/second
+- Model list fetch: ~500ms
+
+### Resource Usage
+- Minimal memory overhead
+- No local model storage
+- Efficient streaming processing
+- Automatic connection pooling
+
+## Error Handling
+
+### Common Error Scenarios
+1. **Invalid API Key**
+   - Clear error message to user
+   - Falls back to no models available
+   - Suggests checking API key
+
+2. **Network Timeout**
+   - Automatic retry with backoff
+   - User-friendly timeout message
+   - Manual refresh option
+
+3. **Model Unavailable**
+   - Automatic fallback to default
+   - Warning shown to user
+   - Model list refresh suggested
+
+4. **Rate Limiting**
+   - Respects rate limit headers
+   - Automatic request throttling
+   - User notification of limits
+
+## Documentation Recommendations
+
+### Critical Areas for User Documentation
+1. **Getting Started Guide**
+   - How to obtain DeepInfra API key
+   - Step-by-step setup screenshots
+   - Model selection guidance
+   - First conversation example
+
+2. **Model Selection Guide**
+   - Comparison of available models
+   - Use case recommendations
+   - Performance vs. cost tradeoffs
+   - Context window considerations
+
+3. **Troubleshooting Section**
+   - Common error messages and fixes
+   - API key validation steps
+   - Network configuration tips
+   - Model availability checking
+
+### Developer Integration Guide
+1. **API Configuration**
+   - Custom base URL setup
+   - Header customization
+   - Proxy configuration
+   - Enterprise deployment
+
+2. **Advanced Features**
+   - Prompt caching strategies
+   - Vision model usage
+   - Reasoning model handling
+   - Cost optimization tips
+
+## Summary for Documentation Team
+
+This extraction report provides comprehensive details about the DeepInfra provider integration in Roo Code. The implementation offers a seamless user experience with automatic model discovery, intelligent fallbacks, and comprehensive error handling. 
+
+Key highlights for documentation:
+- Simple one-time setup with just
\ No newline at end of file

From 8bc0150de1ac44e74e511c435a812d8cb2dda5dc Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Fri, 5 Sep 2025 05:36:57 +0000
Subject: [PATCH 3/5] docs: add DeepInfra provider documentation

---
 docs/providers/deepinfra.md | 90 +++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 docs/providers/deepinfra.md

diff --git a/docs/providers/deepinfra.md b/docs/providers/deepinfra.md
new file mode 100644
index 00000000..ed8963df
--- /dev/null
+++ b/docs/providers/deepinfra.md
@@ -0,0 +1,90 @@
+---
+sidebar_label: DeepInfra
+description: Configure DeepInfra's high-performance AI models in Roo Code. Access Qwen Coder, Llama, and other open-source models with prompt caching and vision capabilities.
+keywords:
+  - deepinfra
+  - deep infra
+  - roo code
+  - api provider
+  - qwen coder
+  - llama models
+  - prompt caching
+  - vision models
+  - open source ai
+image: /img/social-share.jpg
+---
+
+# Using DeepInfra With Roo Code
+
+DeepInfra provides cost-effective access to high-performance open-source models with features like prompt caching, vision support, and specialized coding models. Their infrastructure offers low latency and automatic load balancing across global edge locations.
+
+**Website:** [https://deepinfra.com/](https://deepinfra.com/)
+
+---
+
+## Getting an API Key
+
+1. **Sign Up/Sign In:** Go to [DeepInfra](https://deepinfra.com/). Create an account or sign in.
+2. **Navigate to API Keys:** Access the API keys section in your dashboard.
+3. **Create a Key:** Generate a new API key. Give it a descriptive name (e.g., "Roo Code").
+4. **Copy the Key:** **Important:** Copy the API key immediately. Store it securely.
+
+---
+
+## Supported Models
+
+Roo Code dynamically fetches available models from DeepInfra's API. The default model is:
+
+* `Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo` (256K context, optimized for coding)
+
+Common models available include:
+
+* **Coding Models:** Qwen Coder series, specialized for programming tasks
+* **General Models:** Llama 3.1, Mixtral, and other open-source models
+* **Vision Models:** Models with image understanding capabilities
+* **Reasoning Models:** Models with advanced reasoning support
+
+Browse the full catalog at [deepinfra.com/models](https://deepinfra.com/models).
+
+---
+
+## Configuration in Roo Code
+
+1. **Open Roo Code Settings:** Click the gear icon (<Codicon name="gear" />) in the Roo Code panel.
+2. **Select Provider:** Choose "DeepInfra" from the "API Provider" dropdown.
+3. **Enter API Key:** Paste your DeepInfra API key into the "DeepInfra API Key" field.
+4. **Select Model:** Choose your desired model from the "Model" dropdown.
+   - Models will auto-populate after entering a valid API key
+   - Click "Refresh Models" to update the list
+
+---
+
+## Advanced Features
+
+### Prompt Caching
+
+DeepInfra supports prompt caching for eligible models, which:
+- Reduces costs for repeated contexts
+- Improves response times for similar queries
+- Automatically manages cache based on task IDs
+
+### Vision Support
+
+Models with vision capabilities can:
+- Process images alongside text
+- Understand visual content for coding tasks
+- Analyze screenshots and diagrams
+
+### Custom Base URL
+
+For enterprise deployments, you can configure a custom base URL in the advanced settings.
+
+---
+
+## Tips and Notes
+
+* **Performance:** DeepInfra offers low latency with automatic load balancing across global locations.
+* **Cost Efficiency:** Competitive pricing with prompt caching to reduce costs for repeated contexts.
+* **Model Variety:** Access to the latest open-source models including specialized coding models.
+* **Context Windows:** Models support context windows up to 256K tokens for large codebases.
+* **Pricing:** Pay-per-use model with no minimums. Check [deepinfra.com](https://deepinfra.com/) for current pricing.
\ No newline at end of file

From a50b73ef4243b33a9f3c8a12a7e9390af05f71a3 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Fri, 5 Sep 2025 05:37:30 +0000
Subject: [PATCH 4/5] chore: remove temporary extraction notes

---
 deepinfra-provider-extraction.md | 365 -------------------------------
 1 file changed, 365 deletions(-)
 delete mode 100644 deepinfra-provider-extraction.md

diff --git a/deepinfra-provider-extraction.md b/deepinfra-provider-extraction.md
deleted file mode 100644
index 091c41f2..00000000
--- a/deepinfra-provider-extraction.md
+++ /dev/null
@@ -1,365 +0,0 @@
-
-# DeepInfra Provider - Feature Extraction Report
-
-## Executive Summary
-DeepInfra is a model provider integration in Roo Code that offers access to various AI models through DeepInfra's API infrastructure. It provides a cost-effective way to access high-performance models including Qwen, Llama, and other open-source models with features like prompt caching, vision support, and reasoning capabilities.
-
-## UI/UX Analysis
-
-### User Interface Components
-
-#### 1. Provider Selection (`webview-ui/src/components/settings/constants.ts`)
-**Visual Layout:**
-- DeepInfra appears in the provider dropdown list
-- Position: Between OpenRouter and Anthropic in the provider list
-- Label: "DeepInfra" (user-friendly name)
-- Value: "deepinfra" (internal identifier)
-
-#### 2. Settings Panel (`webview-ui/src/components/settings/providers/DeepInfra.tsx`)
-**Visual Elements:**
-- **API Key Input Field**
-  - Type: Password field (masked input)
-  - Placeholder: Localized "API Key" placeholder text
-  - Label: "API Key" (font-medium, mb-1 spacing)
-  - Full width text field using VSCode's webview UI toolkit
-  - Real-time input handling with onChange events
-
-- **Refresh Models Button**
-  - Visual: Outline variant button with icon
-  - Icon: Codicon refresh icon (spinning animation)
-  - Text: "Refresh Models" (localized)
-  - Feedback: Shows hint text after refresh
-  - Error state: Red text color for error messages
-
-- **Model Picker Component**
-  - Dropdown selector for available models
-  - Default selection: Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo
-  - External link: "Browse models at deepinfra.com/models"
-  - Error display: Shows validation errors in red
-  - Organization restrictions: Respects allow lists
-
-**User Feedback:**
-- Loading states during model fetching
-- Success confirmation after refresh
-- Error messages for invalid API keys
-- Hint text: "Models refreshed. Check the model dropdown."
-
-### User Experience Elements
-
-#### Visual Patterns
-**Consistent VSCode Integration:**
-- Uses VSCode's native color variables
-- Follows VSCode's dark/light theme automatically
-- Consistent spacing and typography with other providers
-- Standard form field styling
-
-**Interactive Behaviors:**
-- Auto-save on field changes (debounced)
-- Silent model refresh on API key/URL changes
-- Immediate visual feedback on interactions
-- Keyboard accessible (tab navigation)
-
-### User Workflows
-
-#### 1. Initial Setup
-```
-User Journey:
-1. Open Settings → Navigate to API Provider section
-2. Select "DeepInfra" from provider dropdown
-3. Enter API Key (obtained from deepinfra.com)
-   → Field masks input for security
-   → Auto-validates format
-4. Models auto-populate after valid key entry
-5. Select desired model from dropdown
-   → Default: Qwen3-Coder-480B
-   → Shows model descriptions
-6. Configuration auto-saves
-```
-
-#### 2. Model Selection and Management
-```
-Workflow:
-1. View available models in dropdown
-   → Shows model ID and description
-   → Indicates capabilities (vision, caching)
-2. Click "Browse models" link
-   → Opens deepinfra.com/models in browser
-   → User can explore full catalog
-3. Click "Refresh Models" if needed
-   → Fetches latest model list
-   → Shows refresh confirmation
-4. Select different model
-   → Immediate effect on next conversation
-   → Preserves selection across sessions
-```
-
-#### 3. Troubleshooting Flow
-```
-Error Recovery:
-1. Invalid API Key
-   → Error message appears
-   → Models list shows as empty
-   → User corrects API key
-2. Network Issues
-   → Timeout message shown
-   → Retry with "Refresh Models"
-   → Falls back to default model
-3. Model Unavailable
-   → Automatically uses fallback model
-   → Shows warning to user
-   → Suggests refresh or different model
-```
-
-## Technical Details
-
-### Core Components
-
-#### 1. **DeepInfraHandler** (`src/api/providers/deepinfra.ts`)
-- **Class Hierarchy**: Extends `RouterProvider` → `BaseProvider`
-- **Interfaces**: Implements `SingleCompletionHandler`
-- **Key Methods**:
-  - `createMessage()`: Handles streaming chat completions
-  - `completePrompt()`: Non-streaming completions
-  - `fetchModel()`: Retrieves available models
-  - `processUsageMetrics()`: Calculates costs and token usage
-
-#### 2. **Model Fetcher** (`src/api/providers/fetchers/deepinfra.ts`)
-- **API Endpoint**: `/models` (OpenAI-compatible)
-- **Response Parsing**: Zod schema validation
-- **Metadata Extraction**:
-  ```typescript
-  {
-    contextWindow: number,    // Default: 8192
-    maxTokens: number,        // Default: 20% of context
-    supportsImages: boolean,  // From tags
-    supportsPromptCache: boolean, // From tags
-    inputPrice: number,       // Per million tokens
-    outputPrice: number,      // Per million tokens
-    cacheReadsPrice: number,  // Discounted cache reads
-  }
-  ```
-
-### API Integration
-
-#### Request Configuration
-```typescript
-{
-  baseURL: "https://api.deepinfra.com/v1/openai",
-  headers: {
-    "Authorization": "Bearer {API_KEY}",
-    "X-Deepinfra-Source": "roo-code",
-    "X-Deepinfra-Version": "2025-08-25"
-  }
-}
-```
-
-#### Streaming Response Handling
-- Supports text chunks via `delta.content`
-- Handles reasoning content via `delta.reasoning_content`
-- Includes usage metrics in stream
-- Processes cache read/write tokens
-
-### Configuration Options
-
-| Setting | Type | Default | Description |
-|---------|------|---------|-------------|
-| `deepInfraApiKey` | string | - | API authentication key |
-| `deepInfraBaseUrl` | string | https://api.deepinfra.com/v1/openai | API endpoint |
-| `deepInfraModelId` | string | Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo | Selected model |
-| `modelTemperature` | number | 0 | Response randomness (0-2) |
-| `includeMaxTokens` | boolean | true | Include max tokens in requests |
-| `modelMaxTokens` | number | Model default | Maximum response length |
-
-### Advanced Features
-
-#### 1. **Prompt Caching**
-- Enabled for models with `prompt_cache` tag
-- Uses task ID as cache key
-- Reduces costs for repeated contexts
-- Automatic cache management
-
-#### 2. **Vision Support**
-- Detected via model tags
-- Enables image input for compatible models
-- Seamless integration with Roo's image handling
-
-#### 3. **Reasoning Models**
-- Special handling for reasoning content
-- Separate token tracking for thinking
-- Supports models like o1-preview variants
-
-#### 4. **Dynamic Model Discovery**
-- Real-time model list fetching
-- Automatic capability detection
-- Pricing information extraction
-- Fallback to defaults on failure
-
-## Non-Technical Information
-
-### Business Value
-1. **Cost Efficiency**
-   - Competitive pricing vs. direct API access
-   - Prompt caching reduces repeated costs
-   - Pay-per-use model with no minimums
-
-2. **Model Variety**
-   - Access to latest open-source models
-   - Specialized coding models (Qwen Coder)
-   - Vision and multimodal capabilities
-   - Different size/speed tradeoffs
-
-3. **Performance Benefits**
-   - Low latency infrastructure
-   - High availability
-   - Automatic load balancing
-   - Global edge locations
-
-### Common Use Cases
-
-#### For Developers
-- **Code Generation**: Qwen Coder models excel at programming tasks
-- **Debugging**: Large context windows for entire codebases
-- **Documentation**: Generate technical docs with code understanding
-- **Refactoring**: Analyze and improve existing code
-
-#### For Teams
-- **Shared Infrastructure**: Single API key for team
-- **Model Experimentation**: Try different models easily
-- **Cost Control**: Usage-based pricing, no subscriptions
-- **Compliance**: Data processing transparency
-
-### User Benefits
-1. **Ease of Use**
-   - Simple API key setup
-   - Automatic model discovery
-   - Sensible defaults
-   - No complex configuration
-
-2. **Flexibility**
-   - Switch models on-the-fly
-   - Custom base URLs for enterprise
-   - Temperature and token controls
-   - Organization-level restrictions
-
-3. **Reliability**
-   - Automatic fallbacks
-   - Error recovery
-   - Model availability checks
-   - Usage tracking
-
-## Integration Points
-
-### External Dependencies
-- **DeepInfra API**: Primary service dependency
-- **Model Catalog**: deepinfra.com/models for browsing
-- **Authentication**: Bearer token via API key
-
-### Internal Integration
-- **Provider Registry**: Registered as "deepinfra" provider
-- **Model Cache**: 5-minute TTL for model lists
-- **Cost Calculation**: OpenAI-style pricing model
-- **Streaming**: Full streaming support with usage metrics
-- **Context Management**: Supports Roo's context window handling
-
-### Data Flow
-```
-User Input → Roo Code → DeepInfraHandler → DeepInfra API
-                ↓                              ↓
-          Token Counting                  Model Processing
-                ↓                              ↓
-          Cost Calculation              Streaming Response
-                ↓                              ↓
-            UI Update ← Stream Processing ← API Response
-```
-
-## Security Considerations
-
-### API Key Management
-- Stored securely in VSCode settings
-- Never exposed in UI (password field)
-- Transmitted only via HTTPS
-- No key logging or debugging output
-
-### Data Privacy
-- Direct API communication (no proxies)
-- No request/response caching by default
-- Optional prompt caching with explicit task IDs
-- Headers identify Roo Code as source
-
-## Performance Characteristics
-
-### Response Times
-- Initial connection: ~200-500ms
-- First token: ~500-1000ms (model dependent)
-- Streaming rate: 50-200 tokens/second
-- Model list fetch: ~500ms
-
-### Resource Usage
-- Minimal memory overhead
-- No local model storage
-- Efficient streaming processing
-- Automatic connection pooling
-
-## Error Handling
-
-### Common Error Scenarios
-1. **Invalid API Key**
-   - Clear error message to user
-   - Falls back to no models available
-   - Suggests checking API key
-
-2. **Network Timeout**
-   - Automatic retry with backoff
-   - User-friendly timeout message
-   - Manual refresh option
-
-3. **Model Unavailable**
-   - Automatic fallback to default
-   - Warning shown to user
-   - Model list refresh suggested
-
-4. **Rate Limiting**
-   - Respects rate limit headers
-   - Automatic request throttling
-   - User notification of limits
-
-## Documentation Recommendations
-
-### Critical Areas for User Documentation
-1. **Getting Started Guide**
-   - How to obtain DeepInfra API key
-   - Step-by-step setup screenshots
-   - Model selection guidance
-   - First conversation example
-
-2. **Model Selection Guide**
-   - Comparison of available models
-   - Use case recommendations
-   - Performance vs. cost tradeoffs
-   - Context window considerations
-
-3. **Troubleshooting Section**
-   - Common error messages and fixes
-   - API key validation steps
-   - Network configuration tips
-   - Model availability checking
-
-### Developer Integration Guide
-1. **API Configuration**
-   - Custom base URL setup
-   - Header customization
-   - Proxy configuration
-   - Enterprise deployment
-
-2. **Advanced Features**
-   - Prompt caching strategies
-   - Vision model usage
-   - Reasoning model handling
-   - Cost optimization tips
-
-## Summary for Documentation Team
-
-This extraction report provides comprehensive details about the DeepInfra provider integration in Roo Code. The implementation offers a seamless user experience with automatic model discovery, intelligent fallbacks, and comprehensive error handling. 
-
-Key highlights for documentation:
-- Simple one-time setup with just
\ No newline at end of file

From 9726cb009a121e3b027192d18e32dd11da6877d0 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Fri, 5 Sep 2025 05:38:53 +0000
Subject: [PATCH 5/5] docs: add DeepInfra to providers sidebar menu

---
 sidebars.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sidebars.ts b/sidebars.ts
index 5b112eae..e1a045da 100644
--- a/sidebars.ts
+++ b/sidebars.ts
@@ -164,6 +164,7 @@ const sidebars: SidebarsConfig = {
         'providers/claude-code',
         'providers/bedrock',
         'providers/cerebras',
+        'providers/deepinfra',
         'providers/deepseek',
         'providers/doubao',
         'providers/featherless',