Kilo-Org
diff --git a/‎packages/types/src/global-settings.ts‎
Lines changed: 1 addition & 0 deletions b/‎packages/types/src/global-settings.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/types/src/provider-settings.ts‎
Lines changed: 12 additions & 0 deletions b/‎packages/types/src/provider-settings.ts‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎packages/types/src/providers/deepinfra.ts‎
Lines changed: 14 additions & 0 deletions b/‎packages/types/src/providers/deepinfra.ts‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎packages/types/src/providers/index.ts‎
Lines changed: 1 addition & 0 deletions b/‎packages/types/src/providers/index.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/api/index.ts‎
Lines changed: 3 additions & 0 deletions b/‎src/api/index.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/api/providers/deepinfra.ts‎
Lines changed: 144 additions & 0 deletions b/‎src/api/providers/deepinfra.ts‎
Lines changed: 144 additions & 0 deletions
diff --git a/‎src/api/providers/fetchers/deepinfra.ts‎
Lines changed: 73 additions & 0 deletions b/‎src/api/providers/fetchers/deepinfra.ts‎
Lines changed: 73 additions & 0 deletions
diff --git a/‎src/api/providers/fetchers/modelCache.ts‎
Lines changed: 4 additions & 0 deletions b/‎src/api/providers/fetchers/modelCache.ts‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/api/providers/index.ts‎
Lines changed: 1 addition & 0 deletions b/‎src/api/providers/index.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/core/webview/webviewMessageHandler.ts‎
Lines changed: 2 additions & 0 deletions b/‎src/core/webview/webviewMessageHandler.ts‎
Lines changed: 2 additions & 0 deletions
@@ -203,6 +203,7 @@ export const SECRET_STATE_KEYS = [
 	"groqApiKey",
 	"chutesApiKey",
 	"litellmApiKey",
+	"deepInfraApiKey",
 	"codeIndexOpenAiKey",
 	"codeIndexQdrantApiKey",
 	// kilocode_change start
 
@@ -53,6 +53,7 @@ export const providerNames = [
 	"fake-ai",
 	"xai",
 	"groq",
+	"deepinfra",
 	"chutes",
 	"litellm",
 	// kilocode_change start
@@ -248,6 +249,12 @@ const deepSeekSchema = apiModelIdProviderModelSchema.extend({
 	deepSeekApiKey: z.string().optional(),
 })
 
+const deepInfraSchema = apiModelIdProviderModelSchema.extend({
+	deepInfraBaseUrl: z.string().optional(),
+	deepInfraApiKey: z.string().optional(),
+	deepInfraModelId: z.string().optional(),
+})
+
 const doubaoSchema = apiModelIdProviderModelSchema.extend({
 	doubaoBaseUrl: z.string().optional(),
 	doubaoApiKey: z.string().optional(),
@@ -385,6 +392,7 @@ export const providerSettingsSchemaDiscriminated = z.discriminatedUnion("apiProv
 	openAiNativeSchema.merge(z.object({ apiProvider: z.literal("openai-native") })),
 	mistralSchema.merge(z.object({ apiProvider: z.literal("mistral") })),
 	deepSeekSchema.merge(z.object({ apiProvider: z.literal("deepseek") })),
+	deepInfraSchema.merge(z.object({ apiProvider: z.literal("deepinfra") })),
 	doubaoSchema.merge(z.object({ apiProvider: z.literal("doubao") })),
 	moonshotSchema.merge(z.object({ apiProvider: z.literal("moonshot") })),
 	unboundSchema.merge(z.object({ apiProvider: z.literal("unbound") })),
@@ -434,6 +442,7 @@ export const providerSettingsSchema = z.object({
 	...openAiNativeSchema.shape,
 	...mistralSchema.shape,
 	...deepSeekSchema.shape,
+	...deepInfraSchema.shape,
 	...doubaoSchema.shape,
 	...moonshotSchema.shape,
 	...unboundSchema.shape,
@@ -478,6 +487,7 @@ export const MODEL_ID_KEYS: Partial<keyof ProviderSettings>[] = [
 	"litellmModelId",
 	"huggingFaceModelId",
 	"ioIntelligenceModelId",
+	"deepInfraModelId",
 ]
 
 export const getModelId = (settings: ProviderSettings): string | undefined => {
@@ -593,6 +603,7 @@ export const MODELS_BY_PROVIDER: Record<
 	openrouter: { id: "openrouter", label: "OpenRouter", models: [] },
 	requesty: { id: "requesty", label: "Requesty", models: [] },
 	unbound: { id: "unbound", label: "Unbound", models: [] },
+	deepinfra: { id: "deepinfra", label: "DeepInfra", models: [] },
 
 	// kilocode_change start
 	kilocode: { id: "kilocode", label: "Kilocode", models: [] },
@@ -608,6 +619,7 @@ export const dynamicProviders = [
 	"openrouter",
 	"requesty",
 	"unbound",
+	"deepinfra",
 	// kilocode_change start
 	"kilocode",
 	"virtual-quota-fallback",
 
@@ -0,0 +1,14 @@
+import type { ModelInfo } from "../model.js"
+
+// Default fallback values for DeepInfra when model metadata is not yet loaded.
+export const deepInfraDefaultModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo"
+
+export const deepInfraDefaultModelInfo: ModelInfo = {
+	maxTokens: 16384,
+	contextWindow: 262144,
+	supportsImages: false,
+	supportsPromptCache: false,
+	inputPrice: 0.3,
+	outputPrice: 1.2,
+	description: "Qwen 3 Coder 480B A35B Instruct Turbo model, 256K context.",
+}
@@ -29,3 +29,4 @@ export * from "./vertex.js"
 export * from "./vscode-llm.js"
 export * from "./xai.js"
 export * from "./zai.js"
+export * from "./deepinfra.js"
@@ -43,6 +43,7 @@ import {
 	FireworksHandler,
 	RooHandler,
 	FeatherlessHandler,
+	DeepInfraHandler,
 } from "./providers"
 // kilocode_change start
 import { KilocodeOpenrouterHandler } from "./providers/kilocode-openrouter"
@@ -145,6 +146,8 @@ export function buildApiHandler(configuration: ProviderSettings): ApiHandler {
 			return new XAIHandler(options)
 		case "groq":
 			return new GroqHandler(options)
+		case "deepinfra":
+			return new DeepInfraHandler(options)
 		case "huggingface":
 			return new HuggingFaceHandler(options)
 		case "chutes":
 
@@ -0,0 +1,144 @@
+import { Anthropic } from "@anthropic-ai/sdk" // for message param types
+import OpenAI from "openai"
+
+import { deepInfraDefaultModelId, deepInfraDefaultModelInfo } from "@roo-code/types"
+
+import type { ApiHandlerOptions } from "../../shared/api"
+import { calculateApiCostOpenAI } from "../../shared/cost"
+
+import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
+import { convertToOpenAiMessages } from "../transform/openai-format"
+
+import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
+import { RouterProvider } from "./router-provider"
+import { getModelParams } from "../transform/model-params"
+import { getModels } from "./fetchers/modelCache"
+
+/**
+ * DeepInfra provider handler (OpenAI compatible)
+ */
+export class DeepInfraHandler extends RouterProvider implements SingleCompletionHandler {
+	constructor(options: ApiHandlerOptions) {
+		super({
+			options: {
+				...options,
+				openAiHeaders: {
+					"X-Deepinfra-Source": "kilocode",
+					"X-Deepinfra-Version": `2025-08-25`,
+				},
+			},
+			name: "deepinfra",
+			baseURL: `${options.deepInfraBaseUrl || "https://api.deepinfra.com/v1/openai"}`,
+			apiKey: options.deepInfraApiKey || "not-provided",
+			modelId: options.deepInfraModelId,
+			defaultModelId: deepInfraDefaultModelId,
+			defaultModelInfo: deepInfraDefaultModelInfo,
+		})
+	}
+
+	public override async fetchModel() {
+		this.models = await getModels({ provider: this.name, apiKey: this.client.apiKey, baseUrl: this.client.baseURL })
+		return this.getModel()
+	}
+
+	override getModel() {
+		const id = this.options.deepInfraModelId ?? deepInfraDefaultModelId
+		const info = this.models[id] ?? deepInfraDefaultModelInfo
+
+		const params = getModelParams({
+			format: "openai",
+			modelId: id,
+			model: info,
+			settings: this.options,
+		})
+
+		return { id, info, ...params }
+	}
+
+	override async *createMessage(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		_metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		const { id: modelId, info, reasoningEffort: reasoning_effort } = await this.fetchModel()
+
+		const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+			model: modelId,
+			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+			stream: true,
+			stream_options: { include_usage: true },
+			reasoning_effort,
+		}
+
+		if (this.supportsTemperature(modelId)) {
+			requestOptions.temperature = this.options.modelTemperature ?? 0
+		}
+
+		// If includeMaxTokens is enabled, set a cap using model info
+		if (this.options.includeMaxTokens === true && info.maxTokens) {
+			// Prefer modern OpenAI param when available in SDK
+			;(requestOptions as any).max_completion_tokens = this.options.modelMaxTokens || info.maxTokens
+		}
+
+		const { data: stream } = await this.client.chat.completions.create(requestOptions).withResponse()
+
+		let lastUsage: OpenAI.CompletionUsage | undefined
+		for await (const chunk of stream) {
+			const delta = chunk.choices[0]?.delta
+
+			if (delta?.content) {
+				yield { type: "text", text: delta.content }
+			}
+
+			if (delta && "reasoning_content" in delta && delta.reasoning_content) {
+				yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" }
+			}
+
+			if (chunk.usage) {
+				lastUsage = chunk.usage
+			}
+		}
+
+		if (lastUsage) {
+			yield this.processUsageMetrics(lastUsage, info)
+		}
+	}
+
+	async completePrompt(prompt: string): Promise<string> {
+		const { id: modelId, info } = await this.fetchModel()
+
+		const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
+			model: modelId,
+			messages: [{ role: "user", content: prompt }],
+		}
+		if (this.supportsTemperature(modelId)) {
+			requestOptions.temperature = this.options.modelTemperature ?? 0
+		}
+		if (this.options.includeMaxTokens === true && info.maxTokens) {
+			;(requestOptions as any).max_completion_tokens = this.options.modelMaxTokens || info.maxTokens
+		}
+
+		const resp = await this.client.chat.completions.create(requestOptions)
+		return resp.choices[0]?.message?.content || ""
+	}
+
+	protected processUsageMetrics(usage: any, modelInfo?: any): ApiStreamUsageChunk {
+		const inputTokens = usage?.prompt_tokens || 0
+		const outputTokens = usage?.completion_tokens || 0
+		const cacheWriteTokens = usage?.prompt_tokens_details?.cache_write_tokens || 0
+		const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
+
+		const totalCost = modelInfo
+			? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
+			: 0
+
+		return {
+			type: "usage",
+			inputTokens,
+			outputTokens,
+			cacheWriteTokens: cacheWriteTokens || undefined,
+			cacheReadTokens: cacheReadTokens || undefined,
+			totalCost,
+		}
+	}
+}
@@ -0,0 +1,73 @@
+import axios from "axios"
+import { z } from "zod"
+
+import { type ModelInfo } from "@roo-code/types"
+
+import { DEFAULT_HEADERS } from "../constants"
+
+// DeepInfra models endpoint follows OpenAI /models shape with an added metadata object.
+// Use only the supported fields and infer capabilities from tags.
+
+const DeepInfraModelSchema = z.object({
+	id: z.string(),
+	object: z.literal("model"),
+	owned_by: z.string().optional(),
+	created: z.number().optional(),
+	root: z.string().optional(),
+	metadata: z
+		.object({
+			description: z.string().optional(),
+			context_length: z.number().optional(),
+			max_tokens: z.number().optional(),
+			tags: z.array(z.string()).optional(), // e.g., ["vision", "prompt_cache"]
+			pricing: z
+				.object({
+					input_tokens: z.number().optional(),
+					output_tokens: z.number().optional(),
+					cache_read_tokens: z.number().optional(),
+				})
+				.optional(),
+		})
+		.optional(),
+})
+
+const DeepInfraModelsResponseSchema = z.object({ data: z.array(DeepInfraModelSchema) })
+
+export async function getDeepInfraModels(
+	apiKey?: string,
+	baseUrl: string = "https://api.deepinfra.com/v1/openai",
+): Promise<Record<string, ModelInfo>> {
+	const headers: Record<string, string> = { ...DEFAULT_HEADERS }
+	if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`
+
+	const url = `${baseUrl.replace(/\/$/, "")}/models`
+	const models: Record<string, ModelInfo> = {}
+
+	const response = await axios.get(url, { headers })
+	const parsed = DeepInfraModelsResponseSchema.safeParse(response.data)
+	const data = parsed.success ? parsed.data.data : response.data?.data || []
+
+	for (const m of data as Array<z.infer<typeof DeepInfraModelSchema>>) {
+		const meta = m.metadata || {}
+		const tags = meta.tags || []
+
+		const contextWindow = typeof meta.context_length === "number" ? meta.context_length : 8192
+		const maxTokens = typeof meta.max_tokens === "number" ? meta.max_tokens : Math.ceil(contextWindow * 0.2)
+
+		const info: ModelInfo = {
+			maxTokens,
+			contextWindow,
+			supportsImages: tags.includes("vision"),
+			supportsPromptCache: tags.includes("prompt_cache"),
+			supportsReasoningEffort: tags.includes("reasoning_effort"),
+			inputPrice: meta.pricing?.input_tokens,
+			outputPrice: meta.pricing?.output_tokens,
+			cacheReadsPrice: meta.pricing?.cache_read_tokens,
+			description: meta.description,
+		}
+
+		models[m.id] = info
+	}
+
+	return models
+}
@@ -19,6 +19,7 @@ import { getKiloBaseUriFromToken } from "../../../shared/kilocode/token"
 import { getOllamaModels } from "./ollama"
 import { getLMStudioModels } from "./lmstudio"
 import { getIOIntelligenceModels } from "./io-intelligence"
+import { getDeepInfraModels } from "./deepinfra"
 const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
 
 export /*kilocode_change*/ async function writeModels(router: RouterName, data: ModelRecord) {
@@ -78,6 +79,9 @@ export const getModels = async (options: GetModelsOptions): Promise<ModelRecord>
 				// Type safety ensures apiKey and baseUrl are always provided for litellm
 				models = await getLiteLLMModels(options.apiKey, options.baseUrl)
 				break
+			case "deepinfra":
+				models = await getDeepInfraModels(options.apiKey, options.baseUrl)
+				break
 			// kilocode_change start
 			case "kilocode-openrouter":
 				models = await getOpenRouterModels({
 
@@ -36,3 +36,4 @@ export { ZAiHandler } from "./zai"
 export { FireworksHandler } from "./fireworks"
 export { RooHandler } from "./roo"
 export { FeatherlessHandler } from "./featherless"
+export { DeepInfraHandler } from "./deepinfra"
@@ -571,6 +571,7 @@ export const webviewMessageHandler = async (
 				"kilocode-openrouter": {}, // kilocode_change
 				ollama: {},
 				lmstudio: {},
+				deepinfra: {},
 			}
 
 			const safeGetModels = async (options: GetModelsOptions): Promise<ModelRecord> => {
@@ -613,6 +614,7 @@ export const webviewMessageHandler = async (
 					},
 				},
 				{ key: "ollama", options: { provider: "ollama", baseUrl: apiConfiguration.ollamaBaseUrl } },
+				{ key: "deepinfra", options: { provider: "deepinfra", apiKey: apiConfiguration.deepInfraApiKey } },
 			]
 			// kilocode_change end
Original file line number	Diff line number	Diff line change
`@@ -571,6 +571,7 @@ export const webviewMessageHandler = async (`
`571`	`571`	`"kilocode-openrouter": {}, // kilocode_change`
`572`	`572`	`ollama: {},`
`573`	`573`	`lmstudio: {},`
	`574`	`+ deepinfra: {},`
`574`	`575`	`}`
`575`	`576`
`576`	`577`	`const safeGetModels = async (options: GetModelsOptions): Promise<ModelRecord> => {`
`@@ -613,6 +614,7 @@ export const webviewMessageHandler = async (`
`613`	`614`	`},`
`614`	`615`	`},`
`615`	`616`	`{ key: "ollama", options: { provider: "ollama", baseUrl: apiConfiguration.ollamaBaseUrl } },`
	`617`	`+ { key: "deepinfra", options: { provider: "deepinfra", apiKey: apiConfiguration.deepInfraApiKey } },`
`616`	`618`	`]`
`617`	`619`	`// kilocode_change end`
`618`	`620`