diff --git a/README.md b/README.md index 14de4ab10941..099bf623d503 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ - [简体中文](locales/zh-CN/README.md) - [繁體中文](locales/zh-TW/README.md) - ... - + --- diff --git a/locales/ca/README.md b/locales/ca/README.md index 2c7d1788f775..93608042a4eb 100644 --- a/locales/ca/README.md +++ b/locales/ca/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/de/README.md b/locales/de/README.md index 22b4db6399a8..d500dc542d8d 100644 --- a/locales/de/README.md +++ b/locales/de/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/es/README.md b/locales/es/README.md index 3f0135c1d83c..398247b556b2 100644 --- a/locales/es/README.md +++ b/locales/es/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/fr/README.md b/locales/fr/README.md index 9596d9cbae72..b46bf46c8f83 100644 --- a/locales/fr/README.md +++ b/locales/fr/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/hi/README.md b/locales/hi/README.md index 53939eb80213..f399c6f7e97d 100644 --- a/locales/hi/README.md +++ b/locales/hi/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/id/README.md b/locales/id/README.md index 8ec72301fc2b..a164cd9c0f9d 100644 --- a/locales/id/README.md +++ b/locales/id/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/it/README.md b/locales/it/README.md index e3687df010b0..628399ac71e9 100644 --- a/locales/it/README.md +++ b/locales/it/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/ja/README.md b/locales/ja/README.md index 16e2f0d59d76..f6ecc9e62be9 100644 --- a/locales/ja/README.md +++ b/locales/ja/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/ko/README.md b/locales/ko/README.md index 6ad585859bed..b303e4f34c01 100644 --- a/locales/ko/README.md +++ b/locales/ko/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/nl/README.md b/locales/nl/README.md index c8b660dfb0d8..976ccef2462a 100644 --- a/locales/nl/README.md +++ b/locales/nl/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/pl/README.md b/locales/pl/README.md index 099c4154571f..22ba0e8ff97d 100644 --- a/locales/pl/README.md +++ b/locales/pl/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/pt-BR/README.md b/locales/pt-BR/README.md index ea27fb3a14f2..0b0562f68d7e 100644 --- a/locales/pt-BR/README.md +++ b/locales/pt-BR/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/ru/README.md b/locales/ru/README.md index 70f1f8cd903a..5d35be25a864 100644 --- a/locales/ru/README.md +++ b/locales/ru/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/tr/README.md b/locales/tr/README.md index a540061325aa..a1fc75a3984c 100644 --- a/locales/tr/README.md +++ b/locales/tr/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/vi/README.md b/locales/vi/README.md index 3c846d9c965c..92e7d7e1f3e4 100644 --- a/locales/vi/README.md +++ b/locales/vi/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/zh-CN/README.md b/locales/zh-CN/README.md index 5a4f5e77c14f..6a9c2e2b2ece 100644 --- a/locales/zh-CN/README.md +++ b/locales/zh-CN/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/locales/zh-TW/README.md b/locales/zh-TW/README.md index 240d6794c409..975e4fc27304 100644 --- a/locales/zh-TW/README.md +++ b/locales/zh-TW/README.md @@ -35,7 +35,7 @@ - [简体中文](../zh-CN/README.md) - [繁體中文](../zh-TW/README.md) - ... - + --- diff --git a/src/api/providers/__tests__/io-intelligence.spec.ts b/src/api/providers/__tests__/io-intelligence.spec.ts index 3b46b79ee25f..ac9005edab35 100644 --- a/src/api/providers/__tests__/io-intelligence.spec.ts +++ b/src/api/providers/__tests__/io-intelligence.spec.ts @@ -58,6 +58,42 @@ vi.mock("../fetchers/io-intelligence", () => ({ })), })) +// Mock the model cache +vi.mock("../fetchers/modelCache", () => ({ + getModels: vi.fn().mockImplementation(() => { + return Promise.resolve({ + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { + maxTokens: 8192, + contextWindow: 430000, + description: "Llama 4 Maverick 17B model", + supportsImages: true, + supportsPromptCache: false, + }, + "deepseek-ai/DeepSeek-R1-0528": { + maxTokens: 8192, + contextWindow: 128000, + supportsImages: false, + supportsPromptCache: false, + description: "DeepSeek R1 reasoning model", + }, + "Intel/Qwen3-Coder-480B-A35B-Instruct-int4-mixed-ar": { + maxTokens: 4096, + contextWindow: 106000, + supportsImages: false, + supportsPromptCache: false, + description: "Qwen3 Coder 480B specialized for coding", + }, + "openai/gpt-oss-120b": { + maxTokens: 8192, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + description: "OpenAI GPT-OSS 120B model", + }, + }) + }), +})) + // Mock constants vi.mock("../constants", () => ({ DEFAULT_HEADERS: { "User-Agent": "roo-cline" }, @@ -72,11 +108,11 @@ describe("IOIntelligenceHandler", () => { let handler: IOIntelligenceHandler let mockOptions: ApiHandlerOptions - beforeEach(() => { + beforeEach(async () => { vi.clearAllMocks() mockOptions = { ioIntelligenceApiKey: "test-api-key", - apiModelId: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + ioIntelligenceModelId: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", modelTemperature: 0.7, includeMaxTokens: false, modelMaxTokens: undefined, @@ -129,17 +165,19 @@ describe("IOIntelligenceHandler", () => { it("should initialize with correct configuration", () => { expect(handler).toBeInstanceOf(IOIntelligenceHandler) expect(handler["client"]).toBeDefined() - expect(handler["options"]).toEqual({ - ...mockOptions, - apiKey: mockOptions.ioIntelligenceApiKey, - }) + expect(handler["options"]).toEqual(mockOptions) }) - it("should throw error when API key is missing", () => { + it("should allow handler creation without API key for model fetching", () => { const optionsWithoutKey = { ...mockOptions } delete optionsWithoutKey.ioIntelligenceApiKey - expect(() => new IOIntelligenceHandler(optionsWithoutKey)).toThrow("IO Intelligence API key is required") + // Handler can be created without API key (validation happens at UI level) + const handlerWithoutKey = new IOIntelligenceHandler(optionsWithoutKey) + expect(handlerWithoutKey).toBeInstanceOf(IOIntelligenceHandler) + expect(handlerWithoutKey["client"]).toBeDefined() + // Client should have a placeholder API key + expect(handlerWithoutKey["client"].apiKey).toBe("not-provided") }) it("should handle streaming response correctly", async () => { diff --git a/src/api/providers/fetchers/io-intelligence.ts b/src/api/providers/fetchers/io-intelligence.ts index 42d88083b966..eeaa0e839fa3 100644 --- a/src/api/providers/fetchers/io-intelligence.ts +++ b/src/api/providers/fetchers/io-intelligence.ts @@ -1,9 +1,8 @@ import axios from "axios" import { z } from "zod" - import { type ModelInfo, IO_INTELLIGENCE_CACHE_DURATION } from "@roo-code/types" - import type { ModelRecord } from "../../../shared/api" +import { parseApiPrice } from "../../../shared/cost" const ioIntelligenceModelSchema = z.object({ id: z.string(), @@ -29,6 +28,15 @@ const ioIntelligenceModelSchema = z.object({ is_blocking: z.boolean(), }), ), + max_tokens: z.number().nullable().optional(), + context_window: z.number().optional(), + supports_images_input: z.boolean().optional().default(false), + supports_prompt_cache: z.boolean().optional().default(false), + input_token_price: z.number().nullable().optional(), + output_token_price: z.number().nullable().optional(), + cache_write_token_price: z.number().nullable().optional(), + cache_read_token_price: z.number().nullable().optional(), + precision: z.string().nullable().optional(), }) export type IOIntelligenceModel = z.infer @@ -47,34 +55,21 @@ interface CacheEntry { let cache: CacheEntry | null = null -/** - * Model context length mapping based on the documentation - * 1 - */ -const MODEL_CONTEXT_LENGTHS: Record = { - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 430000, - "deepseek-ai/DeepSeek-R1-0528": 128000, - "Intel/Qwen3-Coder-480B-A35B-Instruct-int4-mixed-ar": 106000, - "openai/gpt-oss-120b": 131072, -} - -const VISION_MODELS = new Set([ - "Qwen/Qwen2.5-VL-32B-Instruct", - "meta-llama/Llama-3.2-90B-Vision-Instruct", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", -]) - function parseIOIntelligenceModel(model: IOIntelligenceModel): ModelInfo { - const contextLength = MODEL_CONTEXT_LENGTHS[model.id] || 8192 - // Cap maxTokens at 32k for very large context windows, or 20% of context length, whichever is smaller. - const maxTokens = Math.min(contextLength, Math.ceil(contextLength * 0.2), 32768) - const supportsImages = VISION_MODELS.has(model.id) + const contextWindow = model.context_window ?? model.max_model_len ?? 8192 + + // Use API max_tokens if provided, otherwise calculate 20% of context window + const maxTokens = model.max_tokens && model.max_tokens > 0 ? model.max_tokens : Math.ceil(contextWindow * 0.2) return { maxTokens, - contextWindow: contextLength, - supportsImages, - supportsPromptCache: false, + contextWindow, + supportsImages: model.supports_images_input, + supportsPromptCache: model.supports_prompt_cache, + inputPrice: parseApiPrice(model.input_token_price), + outputPrice: parseApiPrice(model.output_token_price), + cacheWritesPrice: parseApiPrice(model.cache_write_token_price), + cacheReadsPrice: parseApiPrice(model.cache_read_token_price), description: `${model.id} via IO Intelligence`, } } @@ -97,18 +92,17 @@ export async function getIOIntelligenceModels(apiKey?: string): Promise( "https://api.intelligence.io.solutions/api/v1/models", { headers, - timeout: 10_000, + timeout: 10000, }, ) diff --git a/src/api/providers/io-intelligence.ts b/src/api/providers/io-intelligence.ts index ef1c60a6a2c7..11abb32cc0ed 100644 --- a/src/api/providers/io-intelligence.ts +++ b/src/api/providers/io-intelligence.ts @@ -1,44 +1,150 @@ import { ioIntelligenceDefaultModelId, ioIntelligenceModels, type IOIntelligenceModelId } from "@roo-code/types" +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" -import type { ApiHandlerOptions } from "../../shared/api" -import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" +import type { ApiHandlerOptions, ModelRecord } from "../../shared/api" +import { BaseProvider } from "./base-provider" +import { getModels } from "./fetchers/modelCache" +import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" +import { ApiStream } from "../transform/stream" +import { convertToOpenAiMessages } from "../transform/openai-format" +import { getModelParams } from "../transform/model-params" +import { DEFAULT_HEADERS } from "./constants" +import { handleOpenAIError } from "./utils/openai-error-handler" + +export class IOIntelligenceHandler extends BaseProvider implements SingleCompletionHandler { + protected options: ApiHandlerOptions + private client: OpenAI + protected models: ModelRecord = {} + private readonly providerName = "IO Intelligence" -export class IOIntelligenceHandler extends BaseOpenAiCompatibleProvider { constructor(options: ApiHandlerOptions) { - if (!options.ioIntelligenceApiKey) { - throw new Error("IO Intelligence API key is required") - } + super() + this.options = options - super({ - ...options, - providerName: "IO Intelligence", + // API key is optional for model discovery, but required for actual API calls + this.client = new OpenAI({ baseURL: "https://api.intelligence.io.solutions/api/v1", - defaultProviderModelId: ioIntelligenceDefaultModelId, - providerModels: ioIntelligenceModels, - defaultTemperature: 0.7, - apiKey: options.ioIntelligenceApiKey, + apiKey: options.ioIntelligenceApiKey || "not-provided", + defaultHeaders: DEFAULT_HEADERS, }) } + public async fetchModel() { + try { + this.models = await getModels({ + provider: "io-intelligence", + apiKey: this.options.ioIntelligenceApiKey || undefined, + }) + } catch (error) { + console.error("Failed to fetch IO Intelligence models, falling back to default models:", error) + this.models = ioIntelligenceModels + } + return this.getModel() + } + + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + const model = await this.fetchModel() + + const { id: modelId, maxTokens, temperature } = model + + // Convert Anthropic messages to OpenAI format + const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + ...convertToOpenAiMessages(messages), + ] + + const completionParams: OpenAI.Chat.ChatCompletionCreateParams = { + model: modelId, + ...(maxTokens && maxTokens > 0 && { max_tokens: maxTokens }), + temperature: temperature ?? 0.7, + messages: openAiMessages, + stream: true, + stream_options: { include_usage: true }, + } + + let stream: AsyncIterable + try { + stream = await this.client.chat.completions.create(completionParams) + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } + + let lastUsage: OpenAI.Completions.CompletionUsage | undefined = undefined + + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta + + if (delta?.content) { + yield { type: "text", text: delta.content } + } + + if (chunk.usage) { + lastUsage = chunk.usage + } + } + + if (lastUsage) { + yield { + type: "usage", + inputTokens: lastUsage.prompt_tokens || 0, + outputTokens: lastUsage.completion_tokens || 0, + } + } + } + + async completePrompt(prompt: string): Promise { + const { id: modelId } = await this.fetchModel() + + try { + const response = await this.client.chat.completions.create({ + model: modelId, + messages: [{ role: "user", content: prompt }], + }) + + return response.choices[0]?.message.content || "" + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } + } + override getModel() { - const modelId = this.options.ioIntelligenceModelId || (ioIntelligenceDefaultModelId as IOIntelligenceModelId) + const modelId = this.options.ioIntelligenceModelId || ioIntelligenceDefaultModelId + + // If models haven't been fetched yet, use fallback + if (!this.models || Object.keys(this.models).length === 0) { + this.models = ioIntelligenceModels + } - const modelInfo = - this.providerModels[modelId as IOIntelligenceModelId] ?? this.providerModels[ioIntelligenceDefaultModelId] + let modelInfo = this.models[modelId] - if (modelInfo) { - return { id: modelId as IOIntelligenceModelId, info: modelInfo } + if (!modelInfo) { + modelInfo = + ioIntelligenceModels[modelId as IOIntelligenceModelId] ?? + ioIntelligenceModels[ioIntelligenceDefaultModelId] } - // Return the requested model ID even if not found, with fallback info. - return { - id: modelId as IOIntelligenceModelId, - info: { + if (!modelInfo) { + // Return the requested model ID even if not found, with fallback info + modelInfo = { maxTokens: 8192, contextWindow: 128000, supportsImages: false, supportsPromptCache: false, - }, + } } + + const params = getModelParams({ + format: "openai", + modelId, + model: modelInfo, + settings: this.options, + }) + + return { id: modelId, info: modelInfo, ...params } } } diff --git a/src/shared/api.ts b/src/shared/api.ts index 802654adaad9..3f19769e3952 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -157,7 +157,7 @@ const dynamicProviderExtras = { huggingface: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type litellm: {} as { apiKey: string; baseUrl: string }, deepinfra: {} as { apiKey?: string; baseUrl?: string }, - "io-intelligence": {} as { apiKey: string }, + "io-intelligence": {} as { apiKey?: string }, requesty: {} as { apiKey?: string; baseUrl?: string }, unbound: {} as { apiKey?: string }, glama: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type