-
Notifications
You must be signed in to change notification settings - Fork 2.5k
feat: Add DeepInfra as a model provider in Roo Code #7677
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| --- | ||
| "roo-cline": minor | ||
| "@roo-code/types": patch | ||
| --- | ||
|
|
||
| Added DeepInfra provider with dynamic model fetching and prompt caching |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| import type { ModelInfo } from "../model.js" | ||
|
|
||
| // Default fallback values for DeepInfra when model metadata is not yet loaded. | ||
| export const deepInfraDefaultModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo" | ||
|
|
||
| export const deepInfraDefaultModelInfo: ModelInfo = { | ||
| maxTokens: 16384, | ||
| contextWindow: 262144, | ||
| supportsImages: false, | ||
| supportsPromptCache: false, | ||
| inputPrice: 0.3, | ||
| outputPrice: 1.2, | ||
| description: "Qwen 3 Coder 480B A35B Instruct Turbo model, 256K context.", | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,147 @@ | ||||||
| import { Anthropic } from "@anthropic-ai/sdk" | ||||||
| import OpenAI from "openai" | ||||||
|
|
||||||
| import { deepInfraDefaultModelId, deepInfraDefaultModelInfo } from "@roo-code/types" | ||||||
|
|
||||||
| import type { ApiHandlerOptions } from "../../shared/api" | ||||||
| import { calculateApiCostOpenAI } from "../../shared/cost" | ||||||
|
|
||||||
| import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" | ||||||
| import { convertToOpenAiMessages } from "../transform/openai-format" | ||||||
|
|
||||||
| import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" | ||||||
| import { RouterProvider } from "./router-provider" | ||||||
| import { getModelParams } from "../transform/model-params" | ||||||
| import { getModels } from "./fetchers/modelCache" | ||||||
|
|
||||||
| export class DeepInfraHandler extends RouterProvider implements SingleCompletionHandler { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing test coverage for this new provider implementation. Could you add unit tests similar to other providers in the codebase? This would help ensure the DeepInfra integration works correctly and prevent regressions. |
||||||
| constructor(options: ApiHandlerOptions) { | ||||||
| super({ | ||||||
| options: { | ||||||
| ...options, | ||||||
| openAiHeaders: { | ||||||
| "X-Deepinfra-Source": "roo-code", | ||||||
| "X-Deepinfra-Version": `2025-08-25`, | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this intentional? The version date appears to be in the future (August 2025). Should this be '2024-08-25' or another appropriate date?
Suggested change
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We live in the future! |
||||||
| }, | ||||||
| }, | ||||||
| name: "deepinfra", | ||||||
| baseURL: `${options.deepInfraBaseUrl || "https://api.deepinfra.com/v1/openai"}`, | ||||||
| apiKey: options.deepInfraApiKey || "not-provided", | ||||||
| modelId: options.deepInfraModelId, | ||||||
| defaultModelId: deepInfraDefaultModelId, | ||||||
| defaultModelInfo: deepInfraDefaultModelInfo, | ||||||
| }) | ||||||
| } | ||||||
|
|
||||||
| public override async fetchModel() { | ||||||
| this.models = await getModels({ provider: this.name, apiKey: this.client.apiKey, baseUrl: this.client.baseURL }) | ||||||
| return this.getModel() | ||||||
| } | ||||||
|
|
||||||
| override getModel() { | ||||||
| const id = this.options.deepInfraModelId ?? deepInfraDefaultModelId | ||||||
| const info = this.models[id] ?? deepInfraDefaultModelInfo | ||||||
|
|
||||||
| const params = getModelParams({ | ||||||
| format: "openai", | ||||||
| modelId: id, | ||||||
| model: info, | ||||||
| settings: this.options, | ||||||
| }) | ||||||
|
|
||||||
| return { id, info, ...params } | ||||||
| } | ||||||
|
|
||||||
| override async *createMessage( | ||||||
| systemPrompt: string, | ||||||
| messages: Anthropic.Messages.MessageParam[], | ||||||
| _metadata?: ApiHandlerCreateMessageMetadata, | ||||||
| ): ApiStream { | ||||||
| // Ensure we have up-to-date model metadata | ||||||
| await this.fetchModel() | ||||||
| const { id: modelId, info, reasoningEffort: reasoning_effort } = await this.fetchModel() | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a duplicate call to |
||||||
| let prompt_cache_key = undefined | ||||||
| if (info.supportsPromptCache && _metadata?.taskId) { | ||||||
| prompt_cache_key = _metadata.taskId | ||||||
| } | ||||||
|
|
||||||
| const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { | ||||||
| model: modelId, | ||||||
| messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], | ||||||
| stream: true, | ||||||
| stream_options: { include_usage: true }, | ||||||
| reasoning_effort, | ||||||
| prompt_cache_key, | ||||||
| } as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming | ||||||
|
|
||||||
| if (this.supportsTemperature(modelId)) { | ||||||
| requestOptions.temperature = this.options.modelTemperature ?? 0 | ||||||
| } | ||||||
|
|
||||||
| if (this.options.includeMaxTokens === true && info.maxTokens) { | ||||||
| ;(requestOptions as any).max_completion_tokens = this.options.modelMaxTokens || info.maxTokens | ||||||
| } | ||||||
|
|
||||||
| const { data: stream } = await this.client.chat.completions.create(requestOptions).withResponse() | ||||||
|
|
||||||
| let lastUsage: OpenAI.CompletionUsage | undefined | ||||||
| for await (const chunk of stream) { | ||||||
| const delta = chunk.choices[0]?.delta | ||||||
|
|
||||||
| if (delta?.content) { | ||||||
| yield { type: "text", text: delta.content } | ||||||
| } | ||||||
|
|
||||||
| if (delta && "reasoning_content" in delta && delta.reasoning_content) { | ||||||
| yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } | ||||||
| } | ||||||
|
|
||||||
| if (chunk.usage) { | ||||||
| lastUsage = chunk.usage | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if (lastUsage) { | ||||||
| yield this.processUsageMetrics(lastUsage, info) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| async completePrompt(prompt: string): Promise<string> { | ||||||
| await this.fetchModel() | ||||||
| const { id: modelId, info } = this.getModel() | ||||||
|
|
||||||
| const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { | ||||||
| model: modelId, | ||||||
| messages: [{ role: "user", content: prompt }], | ||||||
| } | ||||||
| if (this.supportsTemperature(modelId)) { | ||||||
| requestOptions.temperature = this.options.modelTemperature ?? 0 | ||||||
| } | ||||||
| if (this.options.includeMaxTokens === true && info.maxTokens) { | ||||||
| ;(requestOptions as any).max_completion_tokens = this.options.modelMaxTokens || info.maxTokens | ||||||
| } | ||||||
|
|
||||||
| const resp = await this.client.chat.completions.create(requestOptions) | ||||||
| return resp.choices[0]?.message?.content || "" | ||||||
| } | ||||||
|
|
||||||
| protected processUsageMetrics(usage: any, modelInfo?: any): ApiStreamUsageChunk { | ||||||
| const inputTokens = usage?.prompt_tokens || 0 | ||||||
| const outputTokens = usage?.completion_tokens || 0 | ||||||
| const cacheWriteTokens = usage?.prompt_tokens_details?.cache_write_tokens || 0 | ||||||
| const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0 | ||||||
|
|
||||||
| const totalCost = modelInfo | ||||||
| ? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens) | ||||||
| : 0 | ||||||
|
|
||||||
| return { | ||||||
| type: "usage", | ||||||
| inputTokens, | ||||||
| outputTokens, | ||||||
| cacheWriteTokens: cacheWriteTokens || undefined, | ||||||
| cacheReadTokens: cacheReadTokens || undefined, | ||||||
| totalCost, | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| import axios from "axios" | ||
| import { z } from "zod" | ||
|
|
||
| import { type ModelInfo } from "@roo-code/types" | ||
|
|
||
| import { DEFAULT_HEADERS } from "../constants" | ||
|
|
||
| // DeepInfra models endpoint follows OpenAI /models shape with an added metadata object. | ||
|
|
||
| const DeepInfraModelSchema = z.object({ | ||
| id: z.string(), | ||
| object: z.literal("model").optional(), | ||
| owned_by: z.string().optional(), | ||
| created: z.number().optional(), | ||
| root: z.string().optional(), | ||
| metadata: z | ||
| .object({ | ||
| description: z.string().optional(), | ||
| context_length: z.number().optional(), | ||
| max_tokens: z.number().optional(), | ||
| tags: z.array(z.string()).optional(), // e.g., ["vision", "prompt_cache"] | ||
| pricing: z | ||
| .object({ | ||
| input_tokens: z.number().optional(), | ||
| output_tokens: z.number().optional(), | ||
| cache_read_tokens: z.number().optional(), | ||
| }) | ||
| .optional(), | ||
| }) | ||
| .optional(), | ||
| }) | ||
|
|
||
| const DeepInfraModelsResponseSchema = z.object({ data: z.array(DeepInfraModelSchema) }) | ||
|
|
||
| export async function getDeepInfraModels( | ||
| apiKey?: string, | ||
| baseUrl: string = "https://api.deepinfra.com/v1/openai", | ||
| ): Promise<Record<string, ModelInfo>> { | ||
| const headers: Record<string, string> = { ...DEFAULT_HEADERS } | ||
| if (apiKey) headers["Authorization"] = `Bearer ${apiKey}` | ||
|
|
||
| const url = `${baseUrl.replace(/\/$/, "")}/models` | ||
| const models: Record<string, ModelInfo> = {} | ||
|
|
||
| const response = await axios.get(url, { headers }) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding more specific error handling here. For example, distinguishing between rate limiting (429), authentication failures (401/403), and other errors would provide better user feedback. Could we enhance this to match the error handling patterns used in other fetchers? |
||
| const parsed = DeepInfraModelsResponseSchema.safeParse(response.data) | ||
| const data = parsed.success ? parsed.data.data : response.data?.data || [] | ||
|
|
||
| for (const m of data as Array<z.infer<typeof DeepInfraModelSchema>>) { | ||
| const meta = m.metadata || {} | ||
| const tags = meta.tags || [] | ||
|
|
||
| const contextWindow = typeof meta.context_length === "number" ? meta.context_length : 8192 | ||
| const maxTokens = typeof meta.max_tokens === "number" ? meta.max_tokens : Math.ceil(contextWindow * 0.2) | ||
|
|
||
| const info: ModelInfo = { | ||
| maxTokens, | ||
| contextWindow, | ||
| supportsImages: tags.includes("vision"), | ||
| supportsPromptCache: tags.includes("prompt_cache"), | ||
| inputPrice: meta.pricing?.input_tokens, | ||
| outputPrice: meta.pricing?.output_tokens, | ||
| cacheReadsPrice: meta.pricing?.cache_read_tokens, | ||
| description: meta.description, | ||
| } | ||
|
|
||
| models[m.id] = info | ||
| } | ||
|
|
||
| return models | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider adding JSDoc comments to document the DeepInfra-specific features, especially the prompt caching support. This would help other developers understand the unique capabilities of this provider.