Skip to content

Commit 392a237

Browse files
committed
Support Claude 3.7 Sonnet "Thinking" in OpenRouter
1 parent 159621c commit 392a237

File tree

4 files changed

+86
-39
lines changed

4 files changed

+86
-39
lines changed

src/api/providers/openrouter.ts

Lines changed: 61 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,14 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
5252
...convertToOpenAiMessages(messages),
5353
]
5454

55+
const { id: modelId, info: modelInfo } = this.getModel()
56+
5557
// prompt caching: https://openrouter.ai/docs/prompt-caching
5658
// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
57-
switch (this.getModel().id) {
59+
switch (modelId) {
60+
case "anthropic/claude-3.7-sonnet:thinking":
5861
case "anthropic/claude-3.7-sonnet":
62+
case "anthropic/claude-3.7-sonnet:beta":
5963
case "anthropic/claude-3.5-sonnet":
6064
case "anthropic/claude-3.5-sonnet:beta":
6165
case "anthropic/claude-3.5-sonnet-20240620":
@@ -103,31 +107,25 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
103107
break
104108
}
105109

106-
// Not sure how openrouter defaults max tokens when no value is provided, but the anthropic api requires this value and since they offer both 4096 and 8192 variants, we should ensure 8192.
107-
// (models usually default to max tokens allowed)
108-
let maxTokens: number | undefined
109-
switch (this.getModel().id) {
110-
case "anthropic/claude-3.7-sonnet":
111-
case "anthropic/claude-3.5-sonnet":
112-
case "anthropic/claude-3.5-sonnet:beta":
113-
case "anthropic/claude-3.5-sonnet-20240620":
114-
case "anthropic/claude-3.5-sonnet-20240620:beta":
115-
case "anthropic/claude-3-5-haiku":
116-
case "anthropic/claude-3-5-haiku:beta":
117-
case "anthropic/claude-3-5-haiku-20241022":
118-
case "anthropic/claude-3-5-haiku-20241022:beta":
119-
maxTokens = 8_192
120-
break
110+
// Not sure how openrouter defaults max tokens when no value is
111+
// provided, but the Anthropic API requires this value and since they
112+
// offer both 4096 and 8192 variants, we should ensure 8192.
113+
// (Models usually default to max tokens allowed.)
114+
let maxTokens: number | undefined = undefined
115+
116+
if (modelId.startsWith("anthropic/claude-3.5")) {
117+
maxTokens = modelInfo.maxTokens ?? 8_192
118+
}
119+
120+
if (modelId.startsWith("anthropic/claude-3.7")) {
121+
maxTokens = modelInfo.maxTokens ?? 16_384
121122
}
122123

123124
let defaultTemperature = OPENROUTER_DEFAULT_TEMPERATURE
124125
let topP: number | undefined = undefined
125126

126127
// Handle models based on deepseek-r1
127-
if (
128-
this.getModel().id.startsWith("deepseek/deepseek-r1") ||
129-
this.getModel().id === "perplexity/sonar-reasoning"
130-
) {
128+
if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") {
131129
// Recommended temperature for DeepSeek reasoning models
132130
defaultTemperature = DEEP_SEEK_DEFAULT_TEMPERATURE
133131
// DeepSeek highly recommends using user instead of system role
@@ -136,24 +134,37 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
136134
topP = 0.95
137135
}
138136

137+
let temperature = this.options.modelTemperature ?? defaultTemperature
138+
139+
if (modelInfo.thinking) {
140+
temperature = 1.0
141+
}
142+
139143
// https://openrouter.ai/docs/transforms
140144
let fullResponseText = ""
141-
const stream = await this.client.chat.completions.create({
142-
model: this.getModel().id,
145+
146+
const completionParams: OpenRouterChatCompletionParams = {
147+
model: modelId,
143148
max_tokens: maxTokens,
144-
temperature: this.options.modelTemperature ?? defaultTemperature,
149+
temperature,
145150
top_p: topP,
146151
messages: openAiMessages,
147152
stream: true,
148153
include_reasoning: true,
149154
// This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
150155
...(this.options.openRouterUseMiddleOutTransform && { transforms: ["middle-out"] }),
151-
} as OpenRouterChatCompletionParams)
156+
}
157+
158+
console.log("OpenRouter completionParams:", completionParams)
159+
160+
const stream = await this.client.chat.completions.create(completionParams)
152161

153162
let genId: string | undefined
154163

155164
for await (const chunk of stream as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>) {
156-
// openrouter returns an error object instead of the openai sdk throwing an error
165+
console.log("OpenRouter chunk:", chunk)
166+
167+
// OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
157168
if ("error" in chunk) {
158169
const error = chunk.error as { message?: string; code?: number }
159170
console.error(`OpenRouter API Error: ${error?.code} - ${error?.message}`)
@@ -165,19 +176,22 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
165176
}
166177

167178
const delta = chunk.choices[0]?.delta
179+
168180
if ("reasoning" in delta && delta.reasoning) {
169181
yield {
170182
type: "reasoning",
171183
text: delta.reasoning,
172184
} as ApiStreamChunk
173185
}
186+
174187
if (delta?.content) {
175188
fullResponseText += delta.content
176189
yield {
177190
type: "text",
178191
text: delta.content,
179192
} as ApiStreamChunk
180193
}
194+
181195
// if (chunk.usage) {
182196
// yield {
183197
// type: "usage",
@@ -187,10 +201,12 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
187201
// }
188202
}
189203

190-
// retry fetching generation details
204+
// Retry fetching generation details.
191205
let attempt = 0
206+
192207
while (attempt++ < 10) {
193208
await delay(200) // FIXME: necessary delay to ensure generation endpoint is ready
209+
194210
try {
195211
const response = await axios.get(`https://openrouter.ai/api/v1/generation?id=${genId}`, {
196212
headers: {
@@ -201,6 +217,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
201217

202218
const generation = response.data?.data
203219
console.log("OpenRouter generation details:", response.data)
220+
204221
yield {
205222
type: "usage",
206223
// cacheWriteTokens: 0,
@@ -211,20 +228,21 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
211228
totalCost: generation?.total_cost || 0,
212229
fullResponseText,
213230
} as OpenRouterApiStreamUsageChunk
231+
214232
return
215233
} catch (error) {
216234
// ignore if fails
217235
console.error("Error fetching OpenRouter generation details:", error)
218236
}
219237
}
220238
}
221-
getModel(): { id: string; info: ModelInfo } {
239+
240+
getModel() {
222241
const modelId = this.options.openRouterModelId
223242
const modelInfo = this.options.openRouterModelInfo
224-
if (modelId && modelInfo) {
225-
return { id: modelId, info: modelInfo }
226-
}
227-
return { id: openRouterDefaultModelId, info: openRouterDefaultModelInfo }
243+
return modelId && modelInfo
244+
? { id: modelId, info: modelInfo }
245+
: { id: openRouterDefaultModelId, info: openRouterDefaultModelInfo }
228246
}
229247

230248
async completePrompt(prompt: string): Promise<string> {
@@ -247,6 +265,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
247265
if (error instanceof Error) {
248266
throw new Error(`OpenRouter completion error: ${error.message}`)
249267
}
268+
250269
throw error
251270
}
252271
}
@@ -268,21 +287,31 @@ export async function getOpenRouterModels() {
268287
inputPrice: parseApiPrice(rawModel.pricing?.prompt),
269288
outputPrice: parseApiPrice(rawModel.pricing?.completion),
270289
description: rawModel.description,
290+
thinking: rawModel.id === "anthropic/claude-3.7-sonnet:thinking",
271291
}
272292

273293
switch (rawModel.id) {
294+
case "anthropic/claude-3.7-sonnet:thinking":
274295
case "anthropic/claude-3.7-sonnet":
275296
case "anthropic/claude-3.7-sonnet:beta":
297+
modelInfo.maxTokens = 16_384
298+
modelInfo.supportsComputerUse = true
299+
modelInfo.supportsPromptCache = true
300+
modelInfo.cacheWritesPrice = 3.75
301+
modelInfo.cacheReadsPrice = 0.3
302+
break
276303
case "anthropic/claude-3.5-sonnet":
277304
case "anthropic/claude-3.5-sonnet:beta":
278305
// NOTE: This needs to be synced with api.ts/openrouter default model info.
306+
modelInfo.maxTokens = 8_192
279307
modelInfo.supportsComputerUse = true
280308
modelInfo.supportsPromptCache = true
281309
modelInfo.cacheWritesPrice = 3.75
282310
modelInfo.cacheReadsPrice = 0.3
283311
break
284312
case "anthropic/claude-3.5-sonnet-20240620":
285313
case "anthropic/claude-3.5-sonnet-20240620:beta":
314+
modelInfo.maxTokens = 8_192
286315
modelInfo.supportsPromptCache = true
287316
modelInfo.cacheWritesPrice = 3.75
288317
modelInfo.cacheReadsPrice = 0.3
@@ -295,6 +324,7 @@ export async function getOpenRouterModels() {
295324
case "anthropic/claude-3.5-haiku:beta":
296325
case "anthropic/claude-3.5-haiku-20241022":
297326
case "anthropic/claude-3.5-haiku-20241022:beta":
327+
modelInfo.maxTokens = 8_192
298328
modelInfo.supportsPromptCache = true
299329
modelInfo.cacheWritesPrice = 1.25
300330
modelInfo.cacheReadsPrice = 0.1

src/shared/api.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,13 @@ export interface ModelInfo {
8989
cacheReadsPrice?: number
9090
description?: string
9191
reasoningEffort?: "low" | "medium" | "high"
92+
thinking?: boolean
93+
}
94+
95+
export const THINKING_BUDGET = {
96+
step: 1024,
97+
min: 1024,
98+
default: 8 * 1024,
9299
}
93100

94101
// Anthropic
@@ -106,6 +113,7 @@ export const anthropicModels = {
106113
outputPrice: 15.0, // $15 per million output tokens
107114
cacheWritesPrice: 3.75, // $3.75 per million tokens
108115
cacheReadsPrice: 0.3, // $0.30 per million tokens
116+
thinking: true,
109117
},
110118
"claude-3-5-sonnet-20241022": {
111119
maxTokens: 8192,

webview-ui/src/components/settings/ApiOptions.tsx

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import {
3333
unboundDefaultModelInfo,
3434
requestyDefaultModelId,
3535
requestyDefaultModelInfo,
36+
THINKING_BUDGET,
3637
} from "../../../../src/shared/api"
3738
import { ExtensionMessage } from "../../../../src/shared/ExtensionMessage"
3839

@@ -1270,12 +1271,20 @@ const ApiOptions = ({
12701271
</>
12711272
)}
12721273

1273-
{selectedProvider === "anthropic" && selectedModelId === "claude-3-7-sonnet-20250219" && (
1274+
{selectedModelInfo && selectedModelInfo.thinking && (
12741275
<div className="flex flex-col gap-2 mt-2">
12751276
<Checkbox
12761277
checked={!!anthropicThinkingBudget}
12771278
onChange={(checked) =>
1278-
setApiConfigurationField("anthropicThinking", checked ? 16_384 : undefined)
1279+
setApiConfigurationField(
1280+
"anthropicThinking",
1281+
checked
1282+
? Math.min(
1283+
THINKING_BUDGET.default,
1284+
selectedModelInfo.maxTokens ?? THINKING_BUDGET.default,
1285+
)
1286+
: undefined,
1287+
)
12791288
}>
12801289
Thinking?
12811290
</Checkbox>
@@ -1286,13 +1295,13 @@ const ApiOptions = ({
12861295
</div>
12871296
<div className="flex items-center gap-2">
12881297
<Slider
1289-
min={1024}
1290-
max={anthropicModels["claude-3-7-sonnet-20250219"].maxTokens - 1}
1291-
step={1024}
1298+
min={THINKING_BUDGET.min}
1299+
max={(selectedModelInfo.maxTokens ?? THINKING_BUDGET.default) - 1}
1300+
step={THINKING_BUDGET.step}
12921301
value={[anthropicThinkingBudget]}
12931302
onValueChange={(value) => setApiConfigurationField("anthropicThinking", value[0])}
12941303
/>
1295-
<div className="w-10">{anthropicThinkingBudget}</div>
1304+
<div className="w-12">{anthropicThinkingBudget}</div>
12961305
</div>
12971306
</>
12981307
)}

webview-ui/src/components/ui/slider.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ const Slider = React.forwardRef<
1111
ref={ref}
1212
className={cn("relative flex w-full touch-none select-none items-center", className)}
1313
{...props}>
14-
<SliderPrimitive.Track className="relative h-1 w-full grow overflow-hidden bg-primary/20">
15-
<SliderPrimitive.Range className="absolute h-full bg-primary" />
14+
<SliderPrimitive.Track className="relative w-full h-[8px] grow overflow-hidden bg-vscode-button-secondaryBackground border border-[#767676] dark:border-[#858585] rounded-sm">
15+
<SliderPrimitive.Range className="absolute h-full bg-vscode-button-background" />
1616
</SliderPrimitive.Track>
1717
<SliderPrimitive.Thumb className="block h-3 w-3 rounded-full border border-primary/50 bg-primary shadow transition-colors cursor-pointer focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50" />
1818
</SliderPrimitive.Root>

0 commit comments

Comments
 (0)