Skip to content

Commit 3514f65

Browse files
committed
Added support for Claude Sonnet 3.7 thinking via Vertex AI
1 parent 22ec3b2 commit 3514f65

File tree

8 files changed

+143
-33
lines changed

8 files changed

+143
-33
lines changed

package-lock.json

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@
305305
"dependencies": {
306306
"@anthropic-ai/bedrock-sdk": "^0.10.2",
307307
"@anthropic-ai/sdk": "^0.37.0",
308-
"@anthropic-ai/vertex-sdk": "^0.4.1",
308+
"@anthropic-ai/vertex-sdk": "^0.7.0",
309309
"@aws-sdk/client-bedrock-runtime": "^3.706.0",
310310
"@google/generative-ai": "^0.18.0",
311311
"@mistralai/mistralai": "^1.3.6",

src/api/providers/vertex.ts

Lines changed: 91 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
22
import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
33
import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
44
import { ApiHandler, SingleCompletionHandler } from "../"
5+
import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
56
import { ApiHandlerOptions, ModelInfo, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api"
67
import { ApiStream } from "../transform/stream"
78

@@ -70,15 +71,25 @@ interface VertexMessageStreamEvent {
7071
usage?: {
7172
output_tokens: number
7273
}
73-
content_block?: {
74-
type: "text"
75-
text: string
76-
}
74+
content_block?:
75+
| {
76+
type: "text"
77+
text: string
78+
}
79+
| {
80+
type: "thinking"
81+
thinking: string
82+
}
7783
index?: number
78-
delta?: {
79-
type: "text_delta"
80-
text: string
81-
}
84+
delta?:
85+
| {
86+
type: "text_delta"
87+
text: string
88+
}
89+
| {
90+
type: "thinking_delta"
91+
thinking: string
92+
}
8293
}
8394

8495
// https://docs.anthropic.com/en/api/claude-on-vertex-ai
@@ -145,6 +156,7 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
145156

146157
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
147158
const model = this.getModel()
159+
let { id, info, temperature, maxTokens, thinking } = model
148160
const useCache = model.info.supportsPromptCache
149161

150162
// Find indices of user messages that we want to cache
@@ -158,9 +170,10 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
158170

159171
// Create the stream with appropriate caching configuration
160172
const params = {
161-
model: model.id,
162-
max_tokens: model.info.maxTokens || 8192,
163-
temperature: this.options.modelTemperature ?? 0,
173+
model: id,
174+
max_tokens: maxTokens,
175+
temperature,
176+
thinking,
164177
// Cache the system prompt if caching is enabled
165178
system: useCache
166179
? [
@@ -220,6 +233,19 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
220233
}
221234
break
222235
}
236+
case "thinking": {
237+
if (chunk.index! > 0) {
238+
yield {
239+
type: "reasoning",
240+
text: "\n",
241+
}
242+
}
243+
yield {
244+
type: "reasoning",
245+
text: (chunk.content_block as any).thinking,
246+
}
247+
break
248+
}
223249
}
224250
break
225251
}
@@ -232,31 +258,77 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
232258
}
233259
break
234260
}
261+
case "thinking_delta": {
262+
yield {
263+
type: "reasoning",
264+
text: (chunk.delta as any).thinking,
265+
}
266+
break
267+
}
235268
}
236269
break
237270
}
238271
}
239272
}
240273
}
241274

242-
getModel(): { id: VertexModelId; info: ModelInfo } {
275+
getModel(): {
276+
id: VertexModelId
277+
info: ModelInfo
278+
temperature: number
279+
maxTokens: number
280+
thinking?: BetaThinkingConfigParam
281+
} {
243282
const modelId = this.options.apiModelId
283+
let temperature = this.options.modelTemperature ?? 0
284+
let thinking: BetaThinkingConfigParam | undefined = undefined
285+
244286
if (modelId && modelId in vertexModels) {
245287
const id = modelId as VertexModelId
246-
return { id, info: vertexModels[id] }
288+
const info: ModelInfo = vertexModels[id]
289+
290+
// The `:thinking` variant is a virtual identifier for thinking-enabled models
291+
// Similar to how it's handled in the Anthropic provider
292+
let actualId = id
293+
if (id.endsWith(":thinking")) {
294+
actualId = id.replace(":thinking", "") as VertexModelId
295+
}
296+
297+
const maxTokens = this.options.modelMaxTokens || info.maxTokens || 8192
298+
299+
if (info.thinking) {
300+
temperature = 1.0 // Thinking requires temperature 1.0
301+
const maxBudgetTokens = Math.floor(maxTokens * 0.8)
302+
const budgetTokens = Math.max(
303+
Math.min(
304+
this.options.vertexThinking ?? this.options.anthropicThinking ?? maxBudgetTokens,
305+
maxBudgetTokens,
306+
),
307+
1024,
308+
)
309+
thinking = { type: "enabled", budget_tokens: budgetTokens }
310+
}
311+
312+
return { id: actualId, info, temperature, maxTokens, thinking }
247313
}
248-
return { id: vertexDefaultModelId, info: vertexModels[vertexDefaultModelId] }
314+
315+
const id = vertexDefaultModelId
316+
const info = vertexModels[id]
317+
const maxTokens = this.options.modelMaxTokens || info.maxTokens || 8192
318+
319+
return { id, info, temperature, maxTokens, thinking }
249320
}
250321

251322
async completePrompt(prompt: string): Promise<string> {
252323
try {
253-
const model = this.getModel()
254-
const useCache = model.info.supportsPromptCache
324+
let { id, info, temperature, maxTokens, thinking } = this.getModel()
325+
const useCache = info.supportsPromptCache
255326

256327
const params = {
257-
model: model.id,
258-
max_tokens: model.info.maxTokens || 8192,
259-
temperature: this.options.modelTemperature ?? 0,
328+
model: id,
329+
max_tokens: maxTokens,
330+
temperature,
331+
thinking,
260332
system: "", // No system prompt needed for single completions
261333
messages: [
262334
{

src/core/webview/ClineProvider.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1652,6 +1652,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
16521652
lmStudioBaseUrl,
16531653
anthropicBaseUrl,
16541654
anthropicThinking,
1655+
vertexThinking,
16551656
geminiApiKey,
16561657
openAiNativeApiKey,
16571658
deepSeekApiKey,
@@ -1701,6 +1702,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
17011702
this.updateGlobalState("lmStudioBaseUrl", lmStudioBaseUrl),
17021703
this.updateGlobalState("anthropicBaseUrl", anthropicBaseUrl),
17031704
this.updateGlobalState("anthropicThinking", anthropicThinking),
1705+
this.updateGlobalState("vertexThinking", vertexThinking),
17041706
this.storeSecret("geminiApiKey", geminiApiKey),
17051707
this.storeSecret("openAiNativeApiKey", openAiNativeApiKey),
17061708
this.storeSecret("deepSeekApiKey", deepSeekApiKey),
@@ -2158,6 +2160,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
21582160
lmStudioBaseUrl,
21592161
anthropicBaseUrl,
21602162
anthropicThinking,
2163+
vertexThinking,
21612164
geminiApiKey,
21622165
openAiNativeApiKey,
21632166
deepSeekApiKey,
@@ -2242,6 +2245,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
22422245
this.getGlobalState("lmStudioBaseUrl") as Promise<string | undefined>,
22432246
this.getGlobalState("anthropicBaseUrl") as Promise<string | undefined>,
22442247
this.getGlobalState("anthropicThinking") as Promise<number | undefined>,
2248+
this.getGlobalState("vertexThinking") as Promise<number | undefined>,
22452249
this.getSecret("geminiApiKey") as Promise<string | undefined>,
22462250
this.getSecret("openAiNativeApiKey") as Promise<string | undefined>,
22472251
this.getSecret("deepSeekApiKey") as Promise<string | undefined>,
@@ -2343,6 +2347,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
23432347
lmStudioBaseUrl,
23442348
anthropicBaseUrl,
23452349
anthropicThinking,
2350+
vertexThinking,
23462351
geminiApiKey,
23472352
openAiNativeApiKey,
23482353
deepSeekApiKey,

src/shared/api.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ export interface ApiHandlerOptions {
4141
awsUseProfile?: boolean
4242
vertexProjectId?: string
4343
vertexRegion?: string
44+
vertexThinking?: number
4445
openAiBaseUrl?: string
4546
openAiApiKey?: string
4647
openAiModelId?: string
@@ -436,6 +437,18 @@ export const openRouterDefaultModelInfo: ModelInfo = {
436437
export type VertexModelId = keyof typeof vertexModels
437438
export const vertexDefaultModelId: VertexModelId = "claude-3-7-sonnet@20250219"
438439
export const vertexModels = {
440+
"claude-3-7-sonnet@20250219:thinking": {
441+
maxTokens: 64000,
442+
contextWindow: 200_000,
443+
supportsImages: true,
444+
supportsComputerUse: true,
445+
supportsPromptCache: true,
446+
inputPrice: 3.0,
447+
outputPrice: 15.0,
448+
cacheWritesPrice: 3.75,
449+
cacheReadsPrice: 0.3,
450+
thinking: true,
451+
},
439452
"claude-3-7-sonnet@20250219": {
440453
maxTokens: 8192,
441454
contextWindow: 200_000,
@@ -446,6 +459,7 @@ export const vertexModels = {
446459
outputPrice: 15.0,
447460
cacheWritesPrice: 3.75,
448461
cacheReadsPrice: 0.3,
462+
thinking: false,
449463
},
450464
"claude-3-5-sonnet-v2@20241022": {
451465
maxTokens: 8192,

src/shared/globalState.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ export type GlobalStateKey =
2424
| "awsUseProfile"
2525
| "vertexProjectId"
2626
| "vertexRegion"
27+
| "vertexThinking"
2728
| "lastShownAnnouncementId"
2829
| "customInstructions"
2930
| "alwaysAllowReadOnly"
@@ -43,6 +44,7 @@ export type GlobalStateKey =
4344
| "lmStudioBaseUrl"
4445
| "anthropicBaseUrl"
4546
| "anthropicThinking"
47+
| "vertexThinking"
4648
| "azureApiVersion"
4749
| "openAiStreamingEnabled"
4850
| "openRouterModelId"

webview-ui/src/components/settings/ApiOptions.tsx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import * as vscodemodels from "vscode"
77
import {
88
ApiConfiguration,
99
ModelInfo,
10+
ApiProvider,
1011
anthropicDefaultModelId,
1112
anthropicModels,
1213
azureOpenAiDefaultApiVersion,
@@ -1380,9 +1381,11 @@ const ApiOptions = ({
13801381
/>
13811382
</div>
13821383
<ThinkingBudget
1384+
key={`${selectedProvider}-${selectedModelId}`}
13831385
apiConfiguration={apiConfiguration}
13841386
setApiConfigurationField={setApiConfigurationField}
13851387
modelInfo={selectedModelInfo}
1388+
provider={selectedProvider as ApiProvider}
13861389
/>
13871390
<ModelInfoView
13881391
selectedModelId={selectedModelId}

webview-ui/src/components/settings/ThinkingBudget.tsx

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import { useEffect } from "react"
2-
1+
import { useEffect, useMemo } from "react"
2+
import { ApiProvider } from "../../../../src/shared/api"
33
import { Slider } from "@/components/ui"
44

55
import { ApiConfiguration, ModelInfo } from "../../../../src/shared/api"
@@ -8,24 +8,38 @@ interface ThinkingBudgetProps {
88
apiConfiguration: ApiConfiguration
99
setApiConfigurationField: <K extends keyof ApiConfiguration>(field: K, value: ApiConfiguration[K]) => void
1010
modelInfo?: ModelInfo
11+
provider?: ApiProvider
1112
}
1213

13-
export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, modelInfo }: ThinkingBudgetProps) => {
14+
export const ThinkingBudget = ({
15+
apiConfiguration,
16+
setApiConfigurationField,
17+
modelInfo,
18+
provider,
19+
}: ThinkingBudgetProps) => {
20+
const isVertexProvider = provider === "vertex"
21+
const budgetField = isVertexProvider ? "vertexThinking" : "anthropicThinking"
22+
1423
const tokens = apiConfiguration?.modelMaxTokens || modelInfo?.maxTokens || 64_000
1524
const tokensMin = 8192
1625
const tokensMax = modelInfo?.maxTokens || 64_000
1726

18-
const thinkingTokens = apiConfiguration?.anthropicThinking || 8192
27+
// Get the appropriate thinking tokens based on provider
28+
const thinkingTokens = useMemo(() => {
29+
const value = isVertexProvider ? apiConfiguration?.vertexThinking : apiConfiguration?.anthropicThinking
30+
return value || Math.min(Math.floor(0.8 * tokens), 8192)
31+
}, [apiConfiguration, isVertexProvider, tokens])
32+
1933
const thinkingTokensMin = 1024
2034
const thinkingTokensMax = Math.floor(0.8 * tokens)
2135

2236
useEffect(() => {
2337
if (thinkingTokens > thinkingTokensMax) {
24-
setApiConfigurationField("anthropicThinking", thinkingTokensMax)
38+
setApiConfigurationField(budgetField, thinkingTokensMax)
2539
}
26-
}, [thinkingTokens, thinkingTokensMax, setApiConfigurationField])
40+
}, [thinkingTokens, thinkingTokensMax, setApiConfigurationField, budgetField])
2741

28-
if (!modelInfo || !modelInfo.thinking) {
42+
if (!modelInfo?.thinking) {
2943
return null
3044
}
3145

@@ -52,7 +66,7 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
5266
max={thinkingTokensMax}
5367
step={1024}
5468
value={[thinkingTokens]}
55-
onValueChange={([value]) => setApiConfigurationField("anthropicThinking", value)}
69+
onValueChange={([value]) => setApiConfigurationField(budgetField, value)}
5670
/>
5771
<div className="w-12 text-sm text-center">{thinkingTokens}</div>
5872
</div>

0 commit comments

Comments
 (0)