RooCodeInc · ertan2002 · Sep 30, 2025 · Sep 30, 2025 · Sep 30, 2025 · Sep 30, 2025
@@ -35,7 +35,7 @@
 - [简体中文](locales/zh-CN/README.md)
 - [繁體中文](locales/zh-TW/README.md)
 - ...
-      </details>
+  </details>
 
 ---
 

diff --git a/apps/web-roo-code/src/app/reviewer/page.tsx b/apps/web-roo-code/src/app/reviewer/page.tsx
@@ -118,9 +118,9 @@ export default function AgentReviewerPage() {
 										issues.
 									</p>
 									<p>
-										Roo Code&apos;s PR Reviewer flips the script: you bring your own key and leverage
-										it to the max – to find real issues, increase code quality and keep your PR
-										queue moving.
+										Roo Code&apos;s PR Reviewer flips the script: you bring your own key and
+										leverage it to the max – to find real issues, increase code quality and keep
+										your PR queue moving.
 									</p>
 								</div>
 							</div>

diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts
@@ -88,7 +88,8 @@ export const chutesModels = {
 		supportsPromptCache: false,
 		inputPrice: 0.23,
 		outputPrice: 0.9,
-		description: "DeepSeek‑V3.1‑Terminus is an update to V3.1 that improves language consistency by reducing CN/EN mix‑ups and eliminating random characters, while strengthening agent capabilities with notably better Code Agent and Search Agent performance.",
+		description:
+			"DeepSeek‑V3.1‑Terminus is an update to V3.1 that improves language consistency by reducing CN/EN mix‑ups and eliminating random characters, while strengthening agent capabilities with notably better Code Agent and Search Agent performance.",
 	},
 	"deepseek-ai/DeepSeek-V3.1-turbo": {
 		maxTokens: 32768,
@@ -97,7 +98,8 @@ export const chutesModels = {
 		supportsPromptCache: false,
 		inputPrice: 1.0,
 		outputPrice: 3.0,
-		description: "DeepSeek-V3.1-turbo is an FP8, speculative-decoding turbo variant optimized for ultra-fast single-shot queries (~200 TPS), with outputs close to the originals and solid function calling/reasoning/structured output, priced at $1/M input and $3/M output tokens, using 2× quota per request and not intended for bulk workloads.",
+		description:
+			"DeepSeek-V3.1-turbo is an FP8, speculative-decoding turbo variant optimized for ultra-fast single-shot queries (~200 TPS), with outputs close to the originals and solid function calling/reasoning/structured output, priced at $1/M input and $3/M output tokens, using 2× quota per request and not intended for bulk workloads.",
 	},
 	"deepseek-ai/DeepSeek-V3.2-Exp": {
 		maxTokens: 163840,
@@ -106,7 +108,8 @@ export const chutesModels = {
 		supportsPromptCache: false,
 		inputPrice: 0.25,
 		outputPrice: 0.35,
-		description: "DeepSeek-V3.2-Exp is an experimental LLM that introduces DeepSeek Sparse Attention to improve long‑context training and inference efficiency while maintaining performance comparable to V3.1‑Terminus.",
+		description:
+			"DeepSeek-V3.2-Exp is an experimental LLM that introduces DeepSeek Sparse Attention to improve long‑context training and inference efficiency while maintaining performance comparable to V3.1‑Terminus.",
 	},
 	"unsloth/Llama-3.3-70B-Instruct": {
 		maxTokens: 32768, // From Groq
@@ -397,8 +400,9 @@ export const chutesModels = {
 		contextWindow: 262144,
 		supportsImages: true,
 		supportsPromptCache: false,
-		inputPrice: 0.1600,
-		outputPrice: 0.6500,
-		description: "Qwen3‑VL‑235B‑A22B‑Thinking is an open‑weight MoE vision‑language model (235B total, ~22B activated) optimized for deliberate multi‑step reasoning with strong text‑image‑video understanding and long‑context capabilities.",
+		inputPrice: 0.16,
+		outputPrice: 0.65,
+		description:
+			"Qwen3‑VL‑235B‑A22B‑Thinking is an open‑weight MoE vision‑language model (235B total, ~22B activated) optimized for deliberate multi‑step reasoning with strong text‑image‑video understanding and long‑context capabilities.",
 	},
 } as const satisfies Record<string, ModelInfo>
@@ -58,6 +58,42 @@ vi.mock("../fetchers/io-intelligence", () => ({
 	})),
 }))
 
+// Mock the model cache
+vi.mock("../fetchers/modelCache", () => ({
+	getModels: vi.fn().mockImplementation(() => {
+		return Promise.resolve({
+			"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": {
+				maxTokens: 8192,
+				contextWindow: 430000,
+				description: "Llama 4 Maverick 17B model",
+				supportsImages: true,
+				supportsPromptCache: false,
+			},
+			"deepseek-ai/DeepSeek-R1-0528": {
+				maxTokens: 8192,
+				contextWindow: 128000,
+				supportsImages: false,
+				supportsPromptCache: false,
+				description: "DeepSeek R1 reasoning model",
+			},
+			"Intel/Qwen3-Coder-480B-A35B-Instruct-int4-mixed-ar": {
+				maxTokens: 4096,
+				contextWindow: 106000,
+				supportsImages: false,
+				supportsPromptCache: false,
+				description: "Qwen3 Coder 480B specialized for coding",
+			},
+			"openai/gpt-oss-120b": {
+				maxTokens: 8192,
+				contextWindow: 131072,
+				supportsImages: false,
+				supportsPromptCache: false,
+				description: "OpenAI GPT-OSS 120B model",
+			},
+		})
+	}),
+}))
+
 // Mock constants
 vi.mock("../constants", () => ({
 	DEFAULT_HEADERS: { "User-Agent": "roo-cline" },
@@ -72,11 +108,11 @@ describe("IOIntelligenceHandler", () => {
 	let handler: IOIntelligenceHandler
 	let mockOptions: ApiHandlerOptions
 
-	beforeEach(() => {
+	beforeEach(async () => {
 		vi.clearAllMocks()
 		mockOptions = {
 			ioIntelligenceApiKey: "test-api-key",
-			apiModelId: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+			ioIntelligenceModelId: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 			modelTemperature: 0.7,
 			includeMaxTokens: false,
 			modelMaxTokens: undefined,
@@ -129,10 +165,7 @@ describe("IOIntelligenceHandler", () => {
 	it("should initialize with correct configuration", () => {
 		expect(handler).toBeInstanceOf(IOIntelligenceHandler)
 		expect(handler["client"]).toBeDefined()
-		expect(handler["options"]).toEqual({
-			...mockOptions,
-			apiKey: mockOptions.ioIntelligenceApiKey,
-		})
+		expect(handler["options"]).toEqual(mockOptions)
 	})
 
 	it("should throw error when API key is missing", () => {

@@ -1,9 +1,8 @@
 import axios from "axios"
 import { z } from "zod"
-
 import { type ModelInfo, IO_INTELLIGENCE_CACHE_DURATION } from "@roo-code/types"
-
 import type { ModelRecord } from "../../../shared/api"
+import { parseApiPrice } from "../../../shared/cost"
 
 const ioIntelligenceModelSchema = z.object({
 	id: z.string(),
@@ -29,6 +28,15 @@ const ioIntelligenceModelSchema = z.object({
 			is_blocking: z.boolean(),
 		}),
 	),
+	max_tokens: z.number().nullable().optional(),
+	context_window: z.number().optional(),
+	supports_images_input: z.boolean().optional().default(false),
+	supports_prompt_cache: z.boolean().optional().default(false),
+	input_token_price: z.number().nullable().optional(),
+	output_token_price: z.number().nullable().optional(),
+	cache_write_token_price: z.number().nullable().optional(),
+	cache_read_token_price: z.number().nullable().optional(),
+	precision: z.string().nullable().optional(),
 })
 
 export type IOIntelligenceModel = z.infer<typeof ioIntelligenceModelSchema>
@@ -47,34 +55,21 @@ interface CacheEntry {
 
 let cache: CacheEntry | null = null
 
-/**
- * Model context length mapping based on the documentation
- * <mcreference link="https://docs.io.net/reference/get-started-with-io-intelligence-api" index="1">1</mcreference>
- */
-const MODEL_CONTEXT_LENGTHS: Record<string, number> = {
-	"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 430000,
-	"deepseek-ai/DeepSeek-R1-0528": 128000,
-	"Intel/Qwen3-Coder-480B-A35B-Instruct-int4-mixed-ar": 106000,
-	"openai/gpt-oss-120b": 131072,
-}
-
-const VISION_MODELS = new Set([
-	"Qwen/Qwen2.5-VL-32B-Instruct",
-	"meta-llama/Llama-3.2-90B-Vision-Instruct",
-	"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
-])
-
 function parseIOIntelligenceModel(model: IOIntelligenceModel): ModelInfo {
-	const contextLength = MODEL_CONTEXT_LENGTHS[model.id] || 8192
-	// Cap maxTokens at 32k for very large context windows, or 20% of context length, whichever is smaller.
-	const maxTokens = Math.min(contextLength, Math.ceil(contextLength * 0.2), 32768)
-	const supportsImages = VISION_MODELS.has(model.id)
+	const contextWindow = model.context_window ?? model.max_model_len ?? 8192
+
+	// Use API max_tokens if provided, otherwise calculate 20% of context window
+	const maxTokens = model.max_tokens && model.max_tokens > 0 ? model.max_tokens : Math.ceil(contextWindow * 0.2)
 
 	return {
 		maxTokens,
-		contextWindow: contextLength,
-		supportsImages,
-		supportsPromptCache: false,
+		contextWindow,
+		supportsImages: model.supports_images_input,
+		supportsPromptCache: model.supports_prompt_cache,
+		inputPrice: parseApiPrice(model.input_token_price),
+		outputPrice: parseApiPrice(model.output_token_price),
+		cacheWritesPrice: parseApiPrice(model.cache_write_token_price),
+		cacheReadsPrice: parseApiPrice(model.cache_read_token_price),
 		description: `${model.id} via IO Intelligence`,
 	}
 }
@@ -97,18 +92,17 @@ export async function getIOIntelligenceModels(apiKey?: string): Promise<ModelRec
 			"Content-Type": "application/json",
 		}
 
+		// Note: IO Intelligence models endpoint does not require authentication
+		// API key is optional for future use if needed
 		if (apiKey) {
 			headers.Authorization = `Bearer ${apiKey}`
-		} else {
-			console.error("IO Intelligence API key is required")
-			throw new Error("IO Intelligence API key is required")
 		}
 
 		const response = await axios.get<IOIntelligenceApiResponse>(
 			"https://api.intelligence.io.solutions/api/v1/models",
 			{
 				headers,
-				timeout: 10_000,
+				timeout: 10000,
 			},
 		)