Skip to content

Commit ce8fbbd

Browse files
dtrugmancte
andauthored
Requesty provider fixes (#3193)
Co-authored-by: Chris Estreich <[email protected]>
1 parent 8ab0de3 commit ce8fbbd

File tree

24 files changed

+356
-315
lines changed

24 files changed

+356
-315
lines changed

src/api/providers/__tests__/requesty.test.ts

Lines changed: 165 additions & 276 deletions
Large diffs are not rendered by default.

src/api/providers/fetchers/cache.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,8 @@ async function readModels(router: RouterName): Promise<ModelRecord | undefined>
3838
* @param router - The router to fetch models from.
3939
* @returns The models from the cache or the fetched models.
4040
*/
41-
export const getModels = async (router: RouterName): Promise<ModelRecord> => {
41+
export const getModels = async (router: RouterName, apiKey: string | undefined = undefined): Promise<ModelRecord> => {
4242
let models = memoryCache.get<ModelRecord>(router)
43-
4443
if (models) {
4544
// console.log(`[getModels] NodeCache hit for ${router} -> ${Object.keys(models).length}`)
4645
return models
@@ -51,7 +50,8 @@ export const getModels = async (router: RouterName): Promise<ModelRecord> => {
5150
models = await getOpenRouterModels()
5251
break
5352
case "requesty":
54-
models = await getRequestyModels()
53+
// Requesty models endpoint requires an API key for per-user custom policies
54+
models = await getRequestyModels(apiKey)
5555
break
5656
case "glama":
5757
models = await getGlamaModels()
@@ -80,3 +80,11 @@ export const getModels = async (router: RouterName): Promise<ModelRecord> => {
8080

8181
return models ?? {}
8282
}
83+
84+
/**
85+
* Flush models memory cache for a specific router
86+
* @param router - The router to flush models for.
87+
*/
88+
export const flushModels = async (router: RouterName) => {
89+
memoryCache.del(router)
90+
}

src/api/providers/requesty.ts

Lines changed: 98 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
2-
import OpenAI from "openai"
3-
4-
import { ModelInfo, ModelRecord, requestyDefaultModelId, requestyDefaultModelInfo } from "../../shared/api"
2+
import {
3+
ApiHandlerOptions,
4+
ModelInfo,
5+
ModelRecord,
6+
requestyDefaultModelId,
7+
requestyDefaultModelInfo,
8+
} from "../../shared/api"
9+
import { convertToOpenAiMessages } from "../transform/openai-format"
510
import { calculateApiCostOpenAI } from "../../utils/cost"
611
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
7-
import { OpenAiHandler, OpenAiHandlerOptions } from "./openai"
12+
import { SingleCompletionHandler } from "../"
13+
import { BaseProvider } from "./base-provider"
14+
import { DEFAULT_HEADERS } from "./constants"
815
import { getModels } from "./fetchers/cache"
16+
import OpenAI from "openai"
917

1018
// Requesty usage includes an extra field for Anthropic use cases.
1119
// Safely cast the prompt token details section to the appropriate structure.
@@ -17,25 +25,28 @@ interface RequestyUsage extends OpenAI.CompletionUsage {
1725
total_cost?: number
1826
}
1927

20-
export class RequestyHandler extends OpenAiHandler {
28+
type RequestyChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {}
29+
30+
export class RequestyHandler extends BaseProvider implements SingleCompletionHandler {
31+
protected options: ApiHandlerOptions
2132
protected models: ModelRecord = {}
33+
private client: OpenAI
2234

23-
constructor(options: OpenAiHandlerOptions) {
24-
if (!options.requestyApiKey) {
25-
throw new Error("Requesty API key is required. Please provide it in the settings.")
26-
}
35+
constructor(options: ApiHandlerOptions) {
36+
super()
37+
this.options = options
38+
39+
const apiKey = this.options.requestyApiKey ?? "not-provided"
40+
const baseURL = "https://router.requesty.ai/v1"
2741

28-
super({
29-
...options,
30-
openAiApiKey: options.requestyApiKey,
31-
openAiModelId: options.requestyModelId ?? requestyDefaultModelId,
32-
openAiBaseUrl: "https://router.requesty.ai/v1",
33-
})
42+
const defaultHeaders = DEFAULT_HEADERS
43+
44+
this.client = new OpenAI({ baseURL, apiKey, defaultHeaders })
3445
}
3546

36-
override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
47+
public async fetchModel() {
3748
this.models = await getModels("requesty")
38-
yield* super.createMessage(systemPrompt, messages)
49+
return this.getModel()
3950
}
4051

4152
override getModel(): { id: string; info: ModelInfo } {
@@ -44,7 +55,7 @@ export class RequestyHandler extends OpenAiHandler {
4455
return { id, info }
4556
}
4657

47-
protected override processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
58+
protected processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
4859
const requestyUsage = usage as RequestyUsage
4960
const inputTokens = requestyUsage?.prompt_tokens || 0
5061
const outputTokens = requestyUsage?.completion_tokens || 0
@@ -64,8 +75,74 @@ export class RequestyHandler extends OpenAiHandler {
6475
}
6576
}
6677

67-
override async completePrompt(prompt: string): Promise<string> {
68-
this.models = await getModels("requesty")
69-
return super.completePrompt(prompt)
78+
override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
79+
const model = await this.fetchModel()
80+
81+
let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
82+
{ role: "system", content: systemPrompt },
83+
...convertToOpenAiMessages(messages),
84+
]
85+
86+
let maxTokens = undefined
87+
if (this.options.includeMaxTokens) {
88+
maxTokens = model.info.maxTokens
89+
}
90+
91+
const temperature = this.options.modelTemperature
92+
93+
const completionParams: RequestyChatCompletionParams = {
94+
model: model.id,
95+
max_tokens: maxTokens,
96+
messages: openAiMessages,
97+
temperature: temperature,
98+
stream: true,
99+
stream_options: { include_usage: true },
100+
}
101+
102+
const stream = await this.client.chat.completions.create(completionParams)
103+
104+
for await (const chunk of stream) {
105+
const delta = chunk.choices[0]?.delta
106+
if (delta?.content) {
107+
yield {
108+
type: "text",
109+
text: delta.content,
110+
}
111+
}
112+
113+
if (delta && "reasoning_content" in delta && delta.reasoning_content) {
114+
yield {
115+
type: "reasoning",
116+
text: (delta.reasoning_content as string | undefined) || "",
117+
}
118+
}
119+
120+
if (chunk.usage) {
121+
yield this.processUsageMetrics(chunk.usage, model.info)
122+
}
123+
}
124+
}
125+
126+
async completePrompt(prompt: string): Promise<string> {
127+
const model = await this.fetchModel()
128+
129+
let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "system", content: prompt }]
130+
131+
let maxTokens = undefined
132+
if (this.options.includeMaxTokens) {
133+
maxTokens = model.info.maxTokens
134+
}
135+
136+
const temperature = this.options.modelTemperature
137+
138+
const completionParams: RequestyChatCompletionParams = {
139+
model: model.id,
140+
max_tokens: maxTokens,
141+
messages: openAiMessages,
142+
temperature: temperature,
143+
}
144+
145+
const response: OpenAI.Chat.ChatCompletion = await this.client.chat.completions.create(completionParams)
146+
return response.choices[0]?.message.content || ""
70147
}
71148
}

src/core/webview/webviewMessageHandler.ts

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import * as vscode from "vscode"
66
import { ClineProvider } from "./ClineProvider"
77
import { Language, ApiConfigMeta } from "../../schemas"
88
import { changeLanguage, t } from "../../i18n"
9-
import { ApiConfiguration } from "../../shared/api"
9+
import { ApiConfiguration, RouterName, toRouterName } from "../../shared/api"
1010
import { supportPrompt } from "../../shared/support-prompt"
1111

1212
import { checkoutDiffPayloadSchema, checkoutRestorePayloadSchema, WebviewMessage } from "../../shared/WebviewMessage"
@@ -34,7 +34,7 @@ import { TelemetrySetting } from "../../shared/TelemetrySetting"
3434
import { getWorkspacePath } from "../../utils/path"
3535
import { Mode, defaultModeSlug } from "../../shared/modes"
3636
import { GlobalState } from "../../schemas"
37-
import { getModels } from "../../api/providers/fetchers/cache"
37+
import { getModels, flushModels } from "../../api/providers/fetchers/cache"
3838
import { generateSystemPrompt } from "./generateSystemPrompt"
3939

4040
const ALLOWED_VSCODE_SETTINGS = new Set(["terminal.integrated.inheritEnv"])
@@ -282,12 +282,18 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We
282282
case "resetState":
283283
await provider.resetState()
284284
break
285+
case "flushRouterModels":
286+
const routerName: RouterName = toRouterName(message.text)
287+
await flushModels(routerName)
288+
break
285289
case "requestRouterModels":
290+
const { apiConfiguration } = await provider.getState()
291+
286292
const [openRouterModels, requestyModels, glamaModels, unboundModels] = await Promise.all([
287-
getModels("openrouter"),
288-
getModels("requesty"),
289-
getModels("glama"),
290-
getModels("unbound"),
293+
getModels("openrouter", apiConfiguration.openRouterApiKey),
294+
getModels("requesty", apiConfiguration.requestyApiKey),
295+
getModels("glama", apiConfiguration.glamaApiKey),
296+
getModels("unbound", apiConfiguration.unboundApiKey),
291297
])
292298

293299
provider.postMessageToWebview({

src/shared/WebviewMessage.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export interface WebviewMessage {
4242
| "importSettings"
4343
| "exportSettings"
4444
| "resetState"
45+
| "flushRouterModels"
4546
| "requestRouterModels"
4647
| "requestOpenAiModels"
4748
| "requestOllamaModels"

src/shared/api.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ export const glamaDefaultModelInfo: ModelInfo = {
437437

438438
// Requesty
439439
// https://requesty.ai/router-2
440-
export const requestyDefaultModelId = "anthropic/claude-3-7-sonnet-latest"
440+
export const requestyDefaultModelId = "coding/claude-3-7-sonnet"
441441
export const requestyDefaultModelInfo: ModelInfo = {
442442
maxTokens: 8192,
443443
contextWindow: 200_000,
@@ -449,7 +449,7 @@ export const requestyDefaultModelInfo: ModelInfo = {
449449
cacheWritesPrice: 3.75,
450450
cacheReadsPrice: 0.3,
451451
description:
452-
"Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
452+
"The best coding model, optimized by Requesty, and automatically routed to the fastest provider. Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. Claude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks. Read more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)",
453453
}
454454

455455
// OpenRouter
@@ -1701,6 +1701,13 @@ export type RouterName = (typeof routerNames)[number]
17011701

17021702
export const isRouterName = (value: string): value is RouterName => routerNames.includes(value as RouterName)
17031703

1704+
export function toRouterName(value?: string): RouterName {
1705+
if (value && isRouterName(value)) {
1706+
return value
1707+
}
1708+
throw new Error(`Invalid router name: ${value}`)
1709+
}
1710+
17041711
export type ModelRecord = Record<string, ModelInfo>
17051712

17061713
export type RouterModels = Record<RouterName, ModelRecord>

webview-ui/src/components/settings/ApiOptions.tsx

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ import {
2222
useOpenRouterModelProviders,
2323
OPENROUTER_DEFAULT_PROVIDER_NAME,
2424
} from "@src/components/ui/hooks/useOpenRouterModelProviders"
25-
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@src/components/ui"
25+
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, Button } from "@src/components/ui"
2626
import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink"
27-
import { getRequestyAuthUrl, getGlamaAuthUrl } from "@src/oauth/urls"
27+
import { getRequestyApiKeyUrl, getGlamaAuthUrl } from "@src/oauth/urls"
2828

2929
// Providers
3030
import { Anthropic } from "./providers/Anthropic"
@@ -75,6 +75,8 @@ const ApiOptions = ({
7575
return Object.entries(headers)
7676
})
7777

78+
const [requestyShowRefreshHint, setRequestyShowRefreshHint] = useState<boolean>()
79+
7880
useEffect(() => {
7981
const propHeaders = apiConfiguration?.openAiHeaders || {}
8082

@@ -138,7 +140,7 @@ const ApiOptions = ({
138140
info: selectedModelInfo,
139141
} = useSelectedModel(apiConfiguration)
140142

141-
const { data: routerModels } = useRouterModels()
143+
const { data: routerModels, refetch: refetchRouterModels } = useRouterModels()
142144

143145
// Update apiConfiguration.aiModelId whenever selectedModelId changes.
144146
useEffect(() => {
@@ -373,13 +375,28 @@ const ApiOptions = ({
373375
{t("settings:providers.apiKeyStorageNotice")}
374376
</div>
375377
{!apiConfiguration?.requestyApiKey && (
376-
<VSCodeButtonLink
377-
href={getRequestyAuthUrl(uriScheme)}
378-
style={{ width: "100%" }}
379-
appearance="primary">
378+
<VSCodeButtonLink href={getRequestyApiKeyUrl()} style={{ width: "100%" }} appearance="primary">
380379
{t("settings:providers.getRequestyApiKey")}
381380
</VSCodeButtonLink>
382381
)}
382+
<Button
383+
variant="outline"
384+
title={t("settings:providers.refetchModels")}
385+
onClick={() => {
386+
vscode.postMessage({ type: "flushRouterModels", text: "requesty" })
387+
refetchRouterModels()
388+
setRequestyShowRefreshHint(true)
389+
}}>
390+
<div className="flex items-center gap-2">
391+
<span className="codicon codicon-refresh" />
392+
{t("settings:providers.flushModelsCache")}
393+
</div>
394+
</Button>
395+
{requestyShowRefreshHint && (
396+
<div className="flex items-center text-vscode-errorForeground">
397+
{t("settings:providers.flushedModelsCache")}
398+
</div>
399+
)}
383400
</>
384401
)}
385402

webview-ui/src/i18n/locales/ca/settings.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@
119119
"glamaApiKey": "Clau API de Glama",
120120
"getGlamaApiKey": "Obtenir clau API de Glama",
121121
"requestyApiKey": "Clau API de Requesty",
122+
"flushModelsCache": "Netejar memòria cau de models",
123+
"flushedModelsCache": "Memòria cau netejada, si us plau torna a obrir la vista de configuració",
122124
"getRequestyApiKey": "Obtenir clau API de Requesty",
123125
"anthropicApiKey": "Clau API d'Anthropic",
124126
"getAnthropicApiKey": "Obtenir clau API d'Anthropic",

webview-ui/src/i18n/locales/de/settings.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@
106106
"awsCustomArnUse": "Geben Sie eine gültige Amazon Bedrock ARN für das Modell ein, das Sie verwenden möchten. Formatbeispiele:",
107107
"awsCustomArnDesc": "Stellen Sie sicher, dass die Region in der ARN mit Ihrer oben ausgewählten AWS-Region übereinstimmt.",
108108
"openRouterApiKey": "OpenRouter API-Schlüssel",
109+
"flushModelsCache": "Modell-Cache leeren",
110+
"flushedModelsCache": "Cache geleert, bitte öffnen Sie die Einstellungsansicht erneut",
109111
"getOpenRouterApiKey": "OpenRouter API-Schlüssel erhalten",
110112
"apiKeyStorageNotice": "API-Schlüssel werden sicher im VSCode Secret Storage gespeichert",
111113
"glamaApiKey": "Glama API-Schlüssel",

webview-ui/src/i18n/locales/en/settings.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@
118118
"headerValue": "Header value",
119119
"noCustomHeaders": "No custom headers defined. Click the + button to add one.",
120120
"requestyApiKey": "Requesty API Key",
121+
"flushModelsCache": "Flush cached models",
122+
"flushedModelsCache": "Flushed cache, please reopen the settings view",
121123
"getRequestyApiKey": "Get Requesty API Key",
122124
"openRouterTransformsText": "Compress prompts and message chains to the context size (<a>OpenRouter Transforms</a>)",
123125
"anthropicApiKey": "Anthropic API Key",

0 commit comments

Comments
 (0)