RooCodeInc · cte · May 6, 2025 · Mar 18, 2025 · May 4, 2025 · May 5, 2025
@@ -38,9 +38,8 @@ async function readModels(router: RouterName): Promise<ModelRecord | undefined>
  * @param router - The router to fetch models from.
  * @returns The models from the cache or the fetched models.
  */
-export const getModels = async (router: RouterName): Promise<ModelRecord> => {
+export const getModels = async (router: RouterName, apiKey: string | undefined = undefined): Promise<ModelRecord> => {
 	let models = memoryCache.get<ModelRecord>(router)
-
 	if (models) {
 		// console.log(`[getModels] NodeCache hit for ${router} -> ${Object.keys(models).length}`)
 		return models
@@ -51,7 +50,8 @@ export const getModels = async (router: RouterName): Promise<ModelRecord> => {
 			models = await getOpenRouterModels()
 			break
 		case "requesty":
-			models = await getRequestyModels()
+			// Requesty models endpoint requires an API key for per-user custom policies
+			models = await getRequestyModels(apiKey)
 			break
 		case "glama":
 			models = await getGlamaModels()
@@ -80,3 +80,11 @@ export const getModels = async (router: RouterName): Promise<ModelRecord> => {
 
 	return models ?? {}
 }
+
+/**
+ * Flush models memory cache for a specific router
+ * @param router - The router to flush models for.
+ */
+export const flushModels = async (router: RouterName) => {
+	memoryCache.del(router)
+}
@@ -1,11 +1,19 @@
 import { Anthropic } from "@anthropic-ai/sdk"
-import OpenAI from "openai"
-
-import { ModelInfo, ModelRecord, requestyDefaultModelId, requestyDefaultModelInfo } from "../../shared/api"
+import {
+	ApiHandlerOptions,
+	ModelInfo,
+	ModelRecord,
+	requestyDefaultModelId,
+	requestyDefaultModelInfo,
+} from "../../shared/api"
+import { convertToOpenAiMessages } from "../transform/openai-format"
 import { calculateApiCostOpenAI } from "../../utils/cost"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
-import { OpenAiHandler, OpenAiHandlerOptions } from "./openai"
+import { SingleCompletionHandler } from "../"
+import { BaseProvider } from "./base-provider"
+import { DEFAULT_HEADERS } from "./constants"
 import { getModels } from "./fetchers/cache"
+import OpenAI from "openai"
 
 // Requesty usage includes an extra field for Anthropic use cases.
 // Safely cast the prompt token details section to the appropriate structure.
@@ -17,25 +25,28 @@ interface RequestyUsage extends OpenAI.CompletionUsage {
 	total_cost?: number
 }
 
-export class RequestyHandler extends OpenAiHandler {
+type RequestyChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {}
+
+export class RequestyHandler extends BaseProvider implements SingleCompletionHandler {
+	protected options: ApiHandlerOptions
 	protected models: ModelRecord = {}
+	private client: OpenAI
 
-	constructor(options: OpenAiHandlerOptions) {
-		if (!options.requestyApiKey) {
-			throw new Error("Requesty API key is required. Please provide it in the settings.")
-		}
+	constructor(options: ApiHandlerOptions) {
+		super()
+		this.options = options
+
+		const apiKey = this.options.requestyApiKey ?? "not-provided"
+		const baseURL = "https://router.requesty.ai/v1"
 
-		super({
-			...options,
-			openAiApiKey: options.requestyApiKey,
-			openAiModelId: options.requestyModelId ?? requestyDefaultModelId,
-			openAiBaseUrl: "https://router.requesty.ai/v1",
-		})
+		const defaultHeaders = DEFAULT_HEADERS
+
+		this.client = new OpenAI({ baseURL, apiKey, defaultHeaders })
 	}
 
-	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+	public async fetchModel() {
 		this.models = await getModels("requesty")
-		yield* super.createMessage(systemPrompt, messages)
+		return this.getModel()
 	}
 
 	override getModel(): { id: string; info: ModelInfo } {
@@ -44,7 +55,7 @@ export class RequestyHandler extends OpenAiHandler {
 		return { id, info }
 	}
 
-	protected override processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
+	protected processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
 		const requestyUsage = usage as RequestyUsage
 		const inputTokens = requestyUsage?.prompt_tokens || 0
 		const outputTokens = requestyUsage?.completion_tokens || 0
@@ -64,8 +75,74 @@ export class RequestyHandler extends OpenAiHandler {
 		}
 	}
 
-	override async completePrompt(prompt: string): Promise<string> {
-		this.models = await getModels("requesty")
-		return super.completePrompt(prompt)
+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+		const model = await this.fetchModel()
+
+		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: "system", content: systemPrompt },
+			...convertToOpenAiMessages(messages),
+		]
+
+		let maxTokens = undefined
+		if (this.options.includeMaxTokens) {
+			maxTokens = model.info.maxTokens
+		}
+
+		const temperature = this.options.modelTemperature
+
+		const completionParams: RequestyChatCompletionParams = {
+			model: model.id,
+			max_tokens: maxTokens,
+			messages: openAiMessages,
+			temperature: temperature,
+			stream: true,
+			stream_options: { include_usage: true },
+		}
+
+		const stream = await this.client.chat.completions.create(completionParams)
+
+		for await (const chunk of stream) {
+			const delta = chunk.choices[0]?.delta
+			if (delta?.content) {
+				yield {
+					type: "text",
+					text: delta.content,
+				}
+			}
+
+			if (delta && "reasoning_content" in delta && delta.reasoning_content) {
+				yield {
+					type: "reasoning",
+					text: (delta.reasoning_content as string | undefined) || "",
+				}
+			}
+
+			if (chunk.usage) {
+				yield this.processUsageMetrics(chunk.usage, model.info)
+			}
+		}
+	}
+
+	async completePrompt(prompt: string): Promise<string> {
+		const model = await this.fetchModel()
+
+		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "system", content: prompt }]
+
+		let maxTokens = undefined
+		if (this.options.includeMaxTokens) {
+			maxTokens = model.info.maxTokens
+		}
+
+		const temperature = this.options.modelTemperature
+
+		const completionParams: RequestyChatCompletionParams = {
+			model: model.id,
+			max_tokens: maxTokens,
+			messages: openAiMessages,
+			temperature: temperature,
+		}
+
+		const response: OpenAI.Chat.ChatCompletion = await this.client.chat.completions.create(completionParams)
+		return response.choices[0]?.message.content || ""
 	}
 }
@@ -6,7 +6,7 @@ import * as vscode from "vscode"
 import { ClineProvider } from "./ClineProvider"
 import { Language, ApiConfigMeta } from "../../schemas"
 import { changeLanguage, t } from "../../i18n"
-import { ApiConfiguration } from "../../shared/api"
+import { ApiConfiguration, RouterName, toRouterName } from "../../shared/api"
 import { supportPrompt } from "../../shared/support-prompt"
 
 import { checkoutDiffPayloadSchema, checkoutRestorePayloadSchema, WebviewMessage } from "../../shared/WebviewMessage"
@@ -34,7 +34,7 @@ import { TelemetrySetting } from "../../shared/TelemetrySetting"
 import { getWorkspacePath } from "../../utils/path"
 import { Mode, defaultModeSlug } from "../../shared/modes"
 import { GlobalState } from "../../schemas"
-import { getModels } from "../../api/providers/fetchers/cache"
+import { getModels, flushModels } from "../../api/providers/fetchers/cache"
 import { generateSystemPrompt } from "./generateSystemPrompt"
 
 const ALLOWED_VSCODE_SETTINGS = new Set(["terminal.integrated.inheritEnv"])
@@ -282,12 +282,18 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We
 		case "resetState":
 			await provider.resetState()
 			break
+		case "flushRouterModels":
+			const routerName: RouterName = toRouterName(message.text)
+			await flushModels(routerName)
+			break
 		case "requestRouterModels":
+			const { apiConfiguration } = await provider.getState()
+
 			const [openRouterModels, requestyModels, glamaModels, unboundModels] = await Promise.all([
-				getModels("openrouter"),
-				getModels("requesty"),
-				getModels("glama"),
-				getModels("unbound"),
+				getModels("openrouter", apiConfiguration.openRouterApiKey),
+				getModels("requesty", apiConfiguration.requestyApiKey),
+				getModels("glama", apiConfiguration.glamaApiKey),
+				getModels("unbound", apiConfiguration.unboundApiKey),
 			])
 
 			provider.postMessageToWebview({

@@ -42,6 +42,7 @@ export interface WebviewMessage {
 		| "importSettings"
 		| "exportSettings"
 		| "resetState"
+		| "flushRouterModels"
 		| "requestRouterModels"
 		| "requestOpenAiModels"
 		| "requestOllamaModels"

@@ -437,7 +437,7 @@ export const glamaDefaultModelInfo: ModelInfo = {
 
 // Requesty
 // https://requesty.ai/router-2
-export const requestyDefaultModelId = "anthropic/claude-3-7-sonnet-latest"
+export const requestyDefaultModelId = "coding/claude-3-7-sonnet"
 export const requestyDefaultModelInfo: ModelInfo = {
 	maxTokens: 8192,
 	contextWindow: 200_000,
@@ -449,7 +449,7 @@ export const requestyDefaultModelInfo: ModelInfo = {
 	cacheWritesPrice: 3.75,
 	cacheReadsPrice: 0.3,
 	description:
-		"Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
+		"The best coding model, optimized by Requesty, and automatically routed to the fastest provider. Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
 }
 
 // OpenRouter
@@ -1701,6 +1701,13 @@ export type RouterName = (typeof routerNames)[number]
 
 export const isRouterName = (value: string): value is RouterName => routerNames.includes(value as RouterName)
 
+export function toRouterName(value?: string): RouterName {
+	if (value && isRouterName(value)) {
+		return value
+	}
+	throw new Error(`Invalid router name: ${value}`)
+}
+
 export type ModelRecord = Record<string, ModelInfo>
 
 export type RouterModels = Record<RouterName, ModelRecord>
@@ -22,9 +22,9 @@ import {
 	useOpenRouterModelProviders,
 	OPENROUTER_DEFAULT_PROVIDER_NAME,
 } from "@src/components/ui/hooks/useOpenRouterModelProviders"
-import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@src/components/ui"
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, Button } from "@src/components/ui"
 import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink"
-import { getRequestyAuthUrl, getGlamaAuthUrl } from "@src/oauth/urls"
+import { getRequestyApiKeyUrl, getGlamaAuthUrl } from "@src/oauth/urls"
 
 // Providers
 import { Anthropic } from "./providers/Anthropic"
@@ -75,6 +75,8 @@ const ApiOptions = ({
 		return Object.entries(headers)
 	})
 
+	const [requestyShowRefreshHint, setRequestyShowRefreshHint] = useState<boolean>()
+
 	useEffect(() => {
 		const propHeaders = apiConfiguration?.openAiHeaders || {}
 
@@ -138,7 +140,7 @@ const ApiOptions = ({
 		info: selectedModelInfo,
 	} = useSelectedModel(apiConfiguration)
 
-	const { data: routerModels } = useRouterModels()
+	const { data: routerModels, refetch: refetchRouterModels } = useRouterModels()
 
 	// Update apiConfiguration.aiModelId whenever selectedModelId changes.
 	useEffect(() => {
@@ -373,13 +375,28 @@ const ApiOptions = ({
 						{t("settings:providers.apiKeyStorageNotice")}
 					</div>
 					{!apiConfiguration?.requestyApiKey && (
-						<VSCodeButtonLink
-							href={getRequestyAuthUrl(uriScheme)}
-							style={{ width: "100%" }}
-							appearance="primary">
+						<VSCodeButtonLink href={getRequestyApiKeyUrl()} style={{ width: "100%" }} appearance="primary">
 							{t("settings:providers.getRequestyApiKey")}
 						</VSCodeButtonLink>
 					)}
+					<Button
+						variant="outline"
+						title={t("settings:providers.refetchModels")}
+						onClick={() => {
+							vscode.postMessage({ type: "flushRouterModels", text: "requesty" })
+							refetchRouterModels()
+							setRequestyShowRefreshHint(true)
+						}}>
+						<div className="flex items-center gap-2">
+							<span className="codicon codicon-refresh" />
+							{t("settings:providers.flushModelsCache")}
+						</div>
+					</Button>
+					{requestyShowRefreshHint && (
+						<div className="flex items-center text-vscode-errorForeground">
+							{t("settings:providers.flushedModelsCache")}
+						</div>
+					)}
 				</>
 			)}
 

@@ -119,6 +119,8 @@
 		"glamaApiKey": "Clau API de Glama",
 		"getGlamaApiKey": "Obtenir clau API de Glama",
 		"requestyApiKey": "Clau API de Requesty",
+		"flushModelsCache": "Netejar memòria cau de models",
+		"flushedModelsCache": "Memòria cau netejada, si us plau torna a obrir la vista de configuració",
 		"getRequestyApiKey": "Obtenir clau API de Requesty",
 		"anthropicApiKey": "Clau API d'Anthropic",
 		"getAnthropicApiKey": "Obtenir clau API d'Anthropic",

@@ -106,6 +106,8 @@
 		"awsCustomArnUse": "Geben Sie eine gültige Amazon Bedrock ARN für das Modell ein, das Sie verwenden möchten. Formatbeispiele:",
 		"awsCustomArnDesc": "Stellen Sie sicher, dass die Region in der ARN mit Ihrer oben ausgewählten AWS-Region übereinstimmt.",
 		"openRouterApiKey": "OpenRouter API-Schlüssel",
+		"flushModelsCache": "Modell-Cache leeren",
+		"flushedModelsCache": "Cache geleert, bitte öffnen Sie die Einstellungsansicht erneut",
 		"getOpenRouterApiKey": "OpenRouter API-Schlüssel erhalten",
 		"apiKeyStorageNotice": "API-Schlüssel werden sicher im VSCode Secret Storage gespeichert",
 		"glamaApiKey": "Glama API-Schlüssel",

@@ -118,6 +118,8 @@
 		"headerValue": "Header value",
 		"noCustomHeaders": "No custom headers defined. Click the + button to add one.",
 		"requestyApiKey": "Requesty API Key",
+		"flushModelsCache": "Flush cached models",
+		"flushedModelsCache": "Flushed cache, please reopen the settings view",
 		"getRequestyApiKey": "Get Requesty API Key",
 		"openRouterTransformsText": "Compress prompts and message chains to the context size (<a>OpenRouter Transforms</a>)",
 		"anthropicApiKey": "Anthropic API Key",

@@ -106,6 +106,8 @@
 		"awsCustomArnUse": "Ingrese un ARN de Amazon Bedrock válido para el modelo que desea utilizar. Ejemplos de formato:",
 		"awsCustomArnDesc": "Asegúrese de que la región en el ARN coincida con la región de AWS seleccionada anteriormente.",
 		"openRouterApiKey": "Clave API de OpenRouter",
+		"flushModelsCache": "Limpiar modelos en caché",
+		"flushedModelsCache": "Caché limpiada, por favor vuelva a abrir la vista de configuración",
 		"getOpenRouterApiKey": "Obtener clave API de OpenRouter",
 		"apiKeyStorageNotice": "Las claves API se almacenan de forma segura en el Almacenamiento Secreto de VSCode",
 		"glamaApiKey": "Clave API de Glama",

@@ -106,6 +106,8 @@
 		"awsCustomArnUse": "Entrez un ARN Amazon Bedrock valide pour le modèle que vous souhaitez utiliser. Exemples de format :",
 		"awsCustomArnDesc": "Assurez-vous que la région dans l'ARN correspond à la région AWS sélectionnée ci-dessus.",
 		"openRouterApiKey": "Clé API OpenRouter",
+		"flushModelsCache": "Vider le cache des modèles",
+		"flushedModelsCache": "Cache vidé, veuillez rouvrir la vue des paramètres",
 		"getOpenRouterApiKey": "Obtenir la clé API OpenRouter",
 		"apiKeyStorageNotice": "Les clés API sont stockées en toute sécurité dans le stockage sécurisé de VSCode",
 		"glamaApiKey": "Clé API Glama",

@@ -118,6 +118,8 @@
 		"headerValue": "हेडर मूल्य",
 		"noCustomHeaders": "कोई कस्टम हेडर परिभाषित नहीं है। एक जोड़ने के लिए + बटन पर क्लिक करें।",
 		"requestyApiKey": "Requesty API कुंजी",
+		"flushModelsCache": "मॉडल कैश साफ़ करें",
+		"flushedModelsCache": "कैश साफ़ किया गया, कृपया सेटिंग्स व्यू को फिर से खोलें",
 		"getRequestyApiKey": "Requesty API कुंजी प्राप्त करें",
 		"openRouterTransformsText": "संदर्भ आकार के लिए प्रॉम्प्ट और संदेश श्रृंखलाओं को संपीड़ित करें (<a>OpenRouter ट्रांसफॉर्म</a>)",
 		"anthropicApiKey": "Anthropic API कुंजी",

@@ -118,6 +118,8 @@
 		"headerValue": "Valore intestazione",
 		"noCustomHeaders": "Nessuna intestazione personalizzata definita. Fai clic sul pulsante + per aggiungerne una.",
 		"requestyApiKey": "Chiave API Requesty",
+		"flushModelsCache": "Svuota cache dei modelli",
+		"flushedModelsCache": "Cache svuotata, riapri la vista delle impostazioni",
 		"getRequestyApiKey": "Ottieni chiave API Requesty",
 		"openRouterTransformsText": "Comprimi prompt e catene di messaggi alla dimensione del contesto (<a>Trasformazioni OpenRouter</a>)",
 		"anthropicApiKey": "Chiave API Anthropic",