|
| 1 | +import { Anthropic } from "@anthropic-ai/sdk" |
| 2 | +import OpenAI from "openai" |
| 3 | + |
| 4 | +import { deepInfraDefaultModelId, deepInfraDefaultModelInfo } from "@roo-code/types" |
| 5 | + |
| 6 | +import type { ApiHandlerOptions } from "../../shared/api" |
| 7 | +import { calculateApiCostOpenAI } from "../../shared/cost" |
| 8 | + |
| 9 | +import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" |
| 10 | +import { convertToOpenAiMessages } from "../transform/openai-format" |
| 11 | + |
| 12 | +import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" |
| 13 | +import { RouterProvider } from "./router-provider" |
| 14 | +import { getModelParams } from "../transform/model-params" |
| 15 | +import { getModels } from "./fetchers/modelCache" |
| 16 | + |
| 17 | +export class DeepInfraHandler extends RouterProvider implements SingleCompletionHandler { |
| 18 | + constructor(options: ApiHandlerOptions) { |
| 19 | + super({ |
| 20 | + options: { |
| 21 | + ...options, |
| 22 | + openAiHeaders: { |
| 23 | + "X-Deepinfra-Source": "roo-code", |
| 24 | + "X-Deepinfra-Version": `2025-08-25`, |
| 25 | + }, |
| 26 | + }, |
| 27 | + name: "deepinfra", |
| 28 | + baseURL: `${options.deepInfraBaseUrl || "https://api.deepinfra.com/v1/openai"}`, |
| 29 | + apiKey: options.deepInfraApiKey || "not-provided", |
| 30 | + modelId: options.deepInfraModelId, |
| 31 | + defaultModelId: deepInfraDefaultModelId, |
| 32 | + defaultModelInfo: deepInfraDefaultModelInfo, |
| 33 | + }) |
| 34 | + } |
| 35 | + |
| 36 | + public override async fetchModel() { |
| 37 | + this.models = await getModels({ provider: this.name, apiKey: this.client.apiKey, baseUrl: this.client.baseURL }) |
| 38 | + return this.getModel() |
| 39 | + } |
| 40 | + |
| 41 | + override getModel() { |
| 42 | + const id = this.options.deepInfraModelId ?? deepInfraDefaultModelId |
| 43 | + const info = this.models[id] ?? deepInfraDefaultModelInfo |
| 44 | + |
| 45 | + const params = getModelParams({ |
| 46 | + format: "openai", |
| 47 | + modelId: id, |
| 48 | + model: info, |
| 49 | + settings: this.options, |
| 50 | + }) |
| 51 | + |
| 52 | + return { id, info, ...params } |
| 53 | + } |
| 54 | + |
| 55 | + override async *createMessage( |
| 56 | + systemPrompt: string, |
| 57 | + messages: Anthropic.Messages.MessageParam[], |
| 58 | + _metadata?: ApiHandlerCreateMessageMetadata, |
| 59 | + ): ApiStream { |
| 60 | + // Ensure we have up-to-date model metadata |
| 61 | + await this.fetchModel() |
| 62 | + const { id: modelId, info, reasoningEffort: reasoning_effort } = await this.fetchModel() |
| 63 | + let prompt_cache_key = undefined |
| 64 | + if (info.supportsPromptCache && _metadata?.taskId) { |
| 65 | + prompt_cache_key = _metadata.taskId |
| 66 | + } |
| 67 | + |
| 68 | + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { |
| 69 | + model: modelId, |
| 70 | + messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], |
| 71 | + stream: true, |
| 72 | + stream_options: { include_usage: true }, |
| 73 | + reasoning_effort, |
| 74 | + prompt_cache_key, |
| 75 | + } as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming |
| 76 | + |
| 77 | + if (this.supportsTemperature(modelId)) { |
| 78 | + requestOptions.temperature = this.options.modelTemperature ?? 0 |
| 79 | + } |
| 80 | + |
| 81 | + if (this.options.includeMaxTokens === true && info.maxTokens) { |
| 82 | + ;(requestOptions as any).max_completion_tokens = this.options.modelMaxTokens || info.maxTokens |
| 83 | + } |
| 84 | + |
| 85 | + const { data: stream } = await this.client.chat.completions.create(requestOptions).withResponse() |
| 86 | + |
| 87 | + let lastUsage: OpenAI.CompletionUsage | undefined |
| 88 | + for await (const chunk of stream) { |
| 89 | + const delta = chunk.choices[0]?.delta |
| 90 | + |
| 91 | + if (delta?.content) { |
| 92 | + yield { type: "text", text: delta.content } |
| 93 | + } |
| 94 | + |
| 95 | + if (delta && "reasoning_content" in delta && delta.reasoning_content) { |
| 96 | + yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } |
| 97 | + } |
| 98 | + |
| 99 | + if (chunk.usage) { |
| 100 | + lastUsage = chunk.usage |
| 101 | + } |
| 102 | + } |
| 103 | + |
| 104 | + if (lastUsage) { |
| 105 | + yield this.processUsageMetrics(lastUsage, info) |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + async completePrompt(prompt: string): Promise<string> { |
| 110 | + await this.fetchModel() |
| 111 | + const { id: modelId, info } = this.getModel() |
| 112 | + |
| 113 | + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { |
| 114 | + model: modelId, |
| 115 | + messages: [{ role: "user", content: prompt }], |
| 116 | + } |
| 117 | + if (this.supportsTemperature(modelId)) { |
| 118 | + requestOptions.temperature = this.options.modelTemperature ?? 0 |
| 119 | + } |
| 120 | + if (this.options.includeMaxTokens === true && info.maxTokens) { |
| 121 | + ;(requestOptions as any).max_completion_tokens = this.options.modelMaxTokens || info.maxTokens |
| 122 | + } |
| 123 | + |
| 124 | + const resp = await this.client.chat.completions.create(requestOptions) |
| 125 | + return resp.choices[0]?.message?.content || "" |
| 126 | + } |
| 127 | + |
| 128 | + protected processUsageMetrics(usage: any, modelInfo?: any): ApiStreamUsageChunk { |
| 129 | + const inputTokens = usage?.prompt_tokens || 0 |
| 130 | + const outputTokens = usage?.completion_tokens || 0 |
| 131 | + const cacheWriteTokens = usage?.prompt_tokens_details?.cache_write_tokens || 0 |
| 132 | + const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0 |
| 133 | + |
| 134 | + const totalCost = modelInfo |
| 135 | + ? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens) |
| 136 | + : 0 |
| 137 | + |
| 138 | + return { |
| 139 | + type: "usage", |
| 140 | + inputTokens, |
| 141 | + outputTokens, |
| 142 | + cacheWriteTokens: cacheWriteTokens || undefined, |
| 143 | + cacheReadTokens: cacheReadTokens || undefined, |
| 144 | + totalCost, |
| 145 | + } |
| 146 | + } |
| 147 | +} |
0 commit comments