Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
441 changes: 165 additions & 276 deletions src/api/providers/__tests__/requesty.test.ts

Large diffs are not rendered by default.

14 changes: 11 additions & 3 deletions src/api/providers/fetchers/cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@ async function readModels(router: RouterName): Promise<ModelRecord | undefined>
* @param router - The router to fetch models from.
* @returns The models from the cache or the fetched models.
*/
export const getModels = async (router: RouterName): Promise<ModelRecord> => {
export const getModels = async (router: RouterName, apiKey: string | undefined = undefined): Promise<ModelRecord> => {
let models = memoryCache.get<ModelRecord>(router)

if (models) {
// console.log(`[getModels] NodeCache hit for ${router} -> ${Object.keys(models).length}`)
return models
Expand All @@ -51,7 +50,8 @@ export const getModels = async (router: RouterName): Promise<ModelRecord> => {
models = await getOpenRouterModels()
break
case "requesty":
models = await getRequestyModels()
// Requesty models endpoint requires an API key for per-user custom policies
models = await getRequestyModels(apiKey)
break
case "glama":
models = await getGlamaModels()
Expand Down Expand Up @@ -80,3 +80,11 @@ export const getModels = async (router: RouterName): Promise<ModelRecord> => {

return models ?? {}
}

/**
* Flush models memory cache for a specific router
* @param router - The router to flush models for.
*/
export const flushModels = async (router: RouterName) => {
memoryCache.del(router)
}
119 changes: 98 additions & 21 deletions src/api/providers/requesty.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI from "openai"

import { ModelInfo, ModelRecord, requestyDefaultModelId, requestyDefaultModelInfo } from "../../shared/api"
import {
ApiHandlerOptions,
ModelInfo,
ModelRecord,
requestyDefaultModelId,
requestyDefaultModelInfo,
} from "../../shared/api"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { calculateApiCostOpenAI } from "../../utils/cost"
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { OpenAiHandler, OpenAiHandlerOptions } from "./openai"
import { SingleCompletionHandler } from "../"
import { BaseProvider } from "./base-provider"
import { DEFAULT_HEADERS } from "./constants"
import { getModels } from "./fetchers/cache"
import OpenAI from "openai"

// Requesty usage includes an extra field for Anthropic use cases.
// Safely cast the prompt token details section to the appropriate structure.
Expand All @@ -17,25 +25,28 @@ interface RequestyUsage extends OpenAI.CompletionUsage {
total_cost?: number
}

export class RequestyHandler extends OpenAiHandler {
type RequestyChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {}

export class RequestyHandler extends BaseProvider implements SingleCompletionHandler {
protected options: ApiHandlerOptions
protected models: ModelRecord = {}
private client: OpenAI

constructor(options: OpenAiHandlerOptions) {
if (!options.requestyApiKey) {
throw new Error("Requesty API key is required. Please provide it in the settings.")
}
constructor(options: ApiHandlerOptions) {
super()
this.options = options

const apiKey = this.options.requestyApiKey ?? "not-provided"
const baseURL = "https://router.requesty.ai/v1"

super({
...options,
openAiApiKey: options.requestyApiKey,
openAiModelId: options.requestyModelId ?? requestyDefaultModelId,
openAiBaseUrl: "https://router.requesty.ai/v1",
})
const defaultHeaders = DEFAULT_HEADERS

this.client = new OpenAI({ baseURL, apiKey, defaultHeaders })
}

override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
public async fetchModel() {
this.models = await getModels("requesty")
yield* super.createMessage(systemPrompt, messages)
return this.getModel()
}

override getModel(): { id: string; info: ModelInfo } {
Expand All @@ -44,7 +55,7 @@ export class RequestyHandler extends OpenAiHandler {
return { id, info }
}

protected override processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
protected processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
const requestyUsage = usage as RequestyUsage
const inputTokens = requestyUsage?.prompt_tokens || 0
const outputTokens = requestyUsage?.completion_tokens || 0
Expand All @@ -64,8 +75,74 @@ export class RequestyHandler extends OpenAiHandler {
}
}

override async completePrompt(prompt: string): Promise<string> {
this.models = await getModels("requesty")
return super.completePrompt(prompt)
override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
const model = await this.fetchModel()

let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages),
]

let maxTokens = undefined
if (this.options.includeMaxTokens) {
maxTokens = model.info.maxTokens
}

const temperature = this.options.modelTemperature

const completionParams: RequestyChatCompletionParams = {
model: model.id,
max_tokens: maxTokens,
messages: openAiMessages,
temperature: temperature,
stream: true,
stream_options: { include_usage: true },
}

const stream = await this.client.chat.completions.create(completionParams)

for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta
if (delta?.content) {
yield {
type: "text",
text: delta.content,
}
}

if (delta && "reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: (delta.reasoning_content as string | undefined) || "",
}
}

if (chunk.usage) {
yield this.processUsageMetrics(chunk.usage, model.info)
}
}
}

async completePrompt(prompt: string): Promise<string> {
const model = await this.fetchModel()

let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "system", content: prompt }]

let maxTokens = undefined
if (this.options.includeMaxTokens) {
maxTokens = model.info.maxTokens
}

const temperature = this.options.modelTemperature

const completionParams: RequestyChatCompletionParams = {
model: model.id,
max_tokens: maxTokens,
messages: openAiMessages,
temperature: temperature,
}

const response: OpenAI.Chat.ChatCompletion = await this.client.chat.completions.create(completionParams)
return response.choices[0]?.message.content || ""
}
}
18 changes: 12 additions & 6 deletions src/core/webview/webviewMessageHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import * as vscode from "vscode"
import { ClineProvider } from "./ClineProvider"
import { Language, ApiConfigMeta } from "../../schemas"
import { changeLanguage, t } from "../../i18n"
import { ApiConfiguration } from "../../shared/api"
import { ApiConfiguration, RouterName, toRouterName } from "../../shared/api"
import { supportPrompt } from "../../shared/support-prompt"

import { checkoutDiffPayloadSchema, checkoutRestorePayloadSchema, WebviewMessage } from "../../shared/WebviewMessage"
Expand Down Expand Up @@ -34,7 +34,7 @@ import { TelemetrySetting } from "../../shared/TelemetrySetting"
import { getWorkspacePath } from "../../utils/path"
import { Mode, defaultModeSlug } from "../../shared/modes"
import { GlobalState } from "../../schemas"
import { getModels } from "../../api/providers/fetchers/cache"
import { getModels, flushModels } from "../../api/providers/fetchers/cache"
import { generateSystemPrompt } from "./generateSystemPrompt"

const ALLOWED_VSCODE_SETTINGS = new Set(["terminal.integrated.inheritEnv"])
Expand Down Expand Up @@ -282,12 +282,18 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We
case "resetState":
await provider.resetState()
break
case "flushRouterModels":
const routerName: RouterName = toRouterName(message.text)
await flushModels(routerName)
break
case "requestRouterModels":
const { apiConfiguration } = await provider.getState()

const [openRouterModels, requestyModels, glamaModels, unboundModels] = await Promise.all([
getModels("openrouter"),
getModels("requesty"),
getModels("glama"),
getModels("unbound"),
getModels("openrouter", apiConfiguration.openRouterApiKey),
getModels("requesty", apiConfiguration.requestyApiKey),
getModels("glama", apiConfiguration.glamaApiKey),
getModels("unbound", apiConfiguration.unboundApiKey),
])

provider.postMessageToWebview({
Expand Down
1 change: 1 addition & 0 deletions src/shared/WebviewMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export interface WebviewMessage {
| "importSettings"
| "exportSettings"
| "resetState"
| "flushRouterModels"
| "requestRouterModels"
| "requestOpenAiModels"
| "requestOllamaModels"
Expand Down
11 changes: 9 additions & 2 deletions src/shared/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ export const glamaDefaultModelInfo: ModelInfo = {

// Requesty
// https://requesty.ai/router-2
export const requestyDefaultModelId = "anthropic/claude-3-7-sonnet-latest"
export const requestyDefaultModelId = "coding/claude-3-7-sonnet"
export const requestyDefaultModelInfo: ModelInfo = {
maxTokens: 8192,
contextWindow: 200_000,
Expand All @@ -449,7 +449,7 @@ export const requestyDefaultModelInfo: ModelInfo = {
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
description:
"Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
"The best coding model, optimized by Requesty, and automatically routed to the fastest provider. Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
}

// OpenRouter
Expand Down Expand Up @@ -1701,6 +1701,13 @@ export type RouterName = (typeof routerNames)[number]

export const isRouterName = (value: string): value is RouterName => routerNames.includes(value as RouterName)

export function toRouterName(value?: string): RouterName {
if (value && isRouterName(value)) {
return value
}
throw new Error(`Invalid router name: ${value}`)
}

export type ModelRecord = Record<string, ModelInfo>

export type RouterModels = Record<RouterName, ModelRecord>
31 changes: 24 additions & 7 deletions webview-ui/src/components/settings/ApiOptions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ import {
useOpenRouterModelProviders,
OPENROUTER_DEFAULT_PROVIDER_NAME,
} from "@src/components/ui/hooks/useOpenRouterModelProviders"
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@src/components/ui"
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, Button } from "@src/components/ui"
import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink"
import { getRequestyAuthUrl, getGlamaAuthUrl } from "@src/oauth/urls"
import { getRequestyApiKeyUrl, getGlamaAuthUrl } from "@src/oauth/urls"

// Providers
import { Anthropic } from "./providers/Anthropic"
Expand Down Expand Up @@ -75,6 +75,8 @@ const ApiOptions = ({
return Object.entries(headers)
})

const [requestyShowRefreshHint, setRequestyShowRefreshHint] = useState<boolean>()

useEffect(() => {
const propHeaders = apiConfiguration?.openAiHeaders || {}

Expand Down Expand Up @@ -138,7 +140,7 @@ const ApiOptions = ({
info: selectedModelInfo,
} = useSelectedModel(apiConfiguration)

const { data: routerModels } = useRouterModels()
const { data: routerModels, refetch: refetchRouterModels } = useRouterModels()

// Update apiConfiguration.aiModelId whenever selectedModelId changes.
useEffect(() => {
Expand Down Expand Up @@ -373,13 +375,28 @@ const ApiOptions = ({
{t("settings:providers.apiKeyStorageNotice")}
</div>
{!apiConfiguration?.requestyApiKey && (
<VSCodeButtonLink
href={getRequestyAuthUrl(uriScheme)}
style={{ width: "100%" }}
appearance="primary">
<VSCodeButtonLink href={getRequestyApiKeyUrl()} style={{ width: "100%" }} appearance="primary">
{t("settings:providers.getRequestyApiKey")}
</VSCodeButtonLink>
)}
<Button
variant="outline"
title={t("settings:providers.refetchModels")}
onClick={() => {
vscode.postMessage({ type: "flushRouterModels", text: "requesty" })
refetchRouterModels()
setRequestyShowRefreshHint(true)
}}>
<div className="flex items-center gap-2">
<span className="codicon codicon-refresh" />
{t("settings:providers.flushModelsCache")}
</div>
</Button>
{requestyShowRefreshHint && (
<div className="flex items-center text-vscode-errorForeground">
{t("settings:providers.flushedModelsCache")}
</div>
)}
</>
)}

Expand Down
2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/ca/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@
"glamaApiKey": "Clau API de Glama",
"getGlamaApiKey": "Obtenir clau API de Glama",
"requestyApiKey": "Clau API de Requesty",
"flushModelsCache": "Netejar memòria cau de models",
"flushedModelsCache": "Memòria cau netejada, si us plau torna a obrir la vista de configuració",
"getRequestyApiKey": "Obtenir clau API de Requesty",
"anthropicApiKey": "Clau API d'Anthropic",
"getAnthropicApiKey": "Obtenir clau API d'Anthropic",
Expand Down
2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/de/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@
"awsCustomArnUse": "Geben Sie eine gültige Amazon Bedrock ARN für das Modell ein, das Sie verwenden möchten. Formatbeispiele:",
"awsCustomArnDesc": "Stellen Sie sicher, dass die Region in der ARN mit Ihrer oben ausgewählten AWS-Region übereinstimmt.",
"openRouterApiKey": "OpenRouter API-Schlüssel",
"flushModelsCache": "Modell-Cache leeren",
"flushedModelsCache": "Cache geleert, bitte öffnen Sie die Einstellungsansicht erneut",
"getOpenRouterApiKey": "OpenRouter API-Schlüssel erhalten",
"apiKeyStorageNotice": "API-Schlüssel werden sicher im VSCode Secret Storage gespeichert",
"glamaApiKey": "Glama API-Schlüssel",
Expand Down
2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/en/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@
"headerValue": "Header value",
"noCustomHeaders": "No custom headers defined. Click the + button to add one.",
"requestyApiKey": "Requesty API Key",
"flushModelsCache": "Flush cached models",
"flushedModelsCache": "Flushed cache, please reopen the settings view",
"getRequestyApiKey": "Get Requesty API Key",
"openRouterTransformsText": "Compress prompts and message chains to the context size (<a>OpenRouter Transforms</a>)",
"anthropicApiKey": "Anthropic API Key",
Expand Down
2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/es/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@
"awsCustomArnUse": "Ingrese un ARN de Amazon Bedrock válido para el modelo que desea utilizar. Ejemplos de formato:",
"awsCustomArnDesc": "Asegúrese de que la región en el ARN coincida con la región de AWS seleccionada anteriormente.",
"openRouterApiKey": "Clave API de OpenRouter",
"flushModelsCache": "Limpiar modelos en caché",
"flushedModelsCache": "Caché limpiada, por favor vuelva a abrir la vista de configuración",
"getOpenRouterApiKey": "Obtener clave API de OpenRouter",
"apiKeyStorageNotice": "Las claves API se almacenan de forma segura en el Almacenamiento Secreto de VSCode",
"glamaApiKey": "Clave API de Glama",
Expand Down
2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/fr/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@
"awsCustomArnUse": "Entrez un ARN Amazon Bedrock valide pour le modèle que vous souhaitez utiliser. Exemples de format :",
"awsCustomArnDesc": "Assurez-vous que la région dans l'ARN correspond à la région AWS sélectionnée ci-dessus.",
"openRouterApiKey": "Clé API OpenRouter",
"flushModelsCache": "Vider le cache des modèles",
"flushedModelsCache": "Cache vidé, veuillez rouvrir la vue des paramètres",
"getOpenRouterApiKey": "Obtenir la clé API OpenRouter",
"apiKeyStorageNotice": "Les clés API sont stockées en toute sécurité dans le stockage sécurisé de VSCode",
"glamaApiKey": "Clé API Glama",
Expand Down
2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/hi/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@
"headerValue": "हेडर मूल्य",
"noCustomHeaders": "कोई कस्टम हेडर परिभाषित नहीं है। एक जोड़ने के लिए + बटन पर क्लिक करें।",
"requestyApiKey": "Requesty API कुंजी",
"flushModelsCache": "मॉडल कैश साफ़ करें",
"flushedModelsCache": "कैश साफ़ किया गया, कृपया सेटिंग्स व्यू को फिर से खोलें",
"getRequestyApiKey": "Requesty API कुंजी प्राप्त करें",
"openRouterTransformsText": "संदर्भ आकार के लिए प्रॉम्प्ट और संदेश श्रृंखलाओं को संपीड़ित करें (<a>OpenRouter ट्रांसफॉर्म</a>)",
"anthropicApiKey": "Anthropic API कुंजी",
Expand Down
2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/it/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@
"headerValue": "Valore intestazione",
"noCustomHeaders": "Nessuna intestazione personalizzata definita. Fai clic sul pulsante + per aggiungerne una.",
"requestyApiKey": "Chiave API Requesty",
"flushModelsCache": "Svuota cache dei modelli",
"flushedModelsCache": "Cache svuotata, riapri la vista delle impostazioni",
"getRequestyApiKey": "Ottieni chiave API Requesty",
"openRouterTransformsText": "Comprimi prompt e catene di messaggi alla dimensione del contesto (<a>Trasformazioni OpenRouter</a>)",
"anthropicApiKey": "Chiave API Anthropic",
Expand Down
Loading
Loading