@@ -2,6 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
22import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
33import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
44import { ApiHandler , SingleCompletionHandler } from "../"
5+ import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
56import { ApiHandlerOptions , ModelInfo , vertexDefaultModelId , VertexModelId , vertexModels } from "../../shared/api"
67import { ApiStream } from "../transform/stream"
78
@@ -70,15 +71,25 @@ interface VertexMessageStreamEvent {
7071 usage ?: {
7172 output_tokens : number
7273 }
73- content_block ?: {
74- type : "text"
75- text : string
76- }
74+ content_block ?:
75+ | {
76+ type : "text"
77+ text : string
78+ }
79+ | {
80+ type : "thinking"
81+ thinking : string
82+ }
7783 index ?: number
78- delta ?: {
79- type : "text_delta"
80- text : string
81- }
84+ delta ?:
85+ | {
86+ type : "text_delta"
87+ text : string
88+ }
89+ | {
90+ type : "thinking_delta"
91+ thinking : string
92+ }
8293}
8394
8495// https://docs.anthropic.com/en/api/claude-on-vertex-ai
@@ -145,6 +156,7 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
145156
146157 async * createMessage ( systemPrompt : string , messages : Anthropic . Messages . MessageParam [ ] ) : ApiStream {
147158 const model = this . getModel ( )
159+ let { id, info, temperature, maxTokens, thinking } = model
148160 const useCache = model . info . supportsPromptCache
149161
150162 // Find indices of user messages that we want to cache
@@ -158,9 +170,10 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
158170
159171 // Create the stream with appropriate caching configuration
160172 const params = {
161- model : model . id ,
162- max_tokens : model . info . maxTokens || 8192 ,
163- temperature : this . options . modelTemperature ?? 0 ,
173+ model : id ,
174+ max_tokens : maxTokens ,
175+ temperature,
176+ thinking,
164177 // Cache the system prompt if caching is enabled
165178 system : useCache
166179 ? [
@@ -220,6 +233,19 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
220233 }
221234 break
222235 }
236+ case "thinking" : {
237+ if ( chunk . index ! > 0 ) {
238+ yield {
239+ type : "reasoning" ,
240+ text : "\n" ,
241+ }
242+ }
243+ yield {
244+ type : "reasoning" ,
245+ text : ( chunk . content_block as any ) . thinking ,
246+ }
247+ break
248+ }
223249 }
224250 break
225251 }
@@ -232,31 +258,77 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
232258 }
233259 break
234260 }
261+ case "thinking_delta" : {
262+ yield {
263+ type : "reasoning" ,
264+ text : ( chunk . delta as any ) . thinking ,
265+ }
266+ break
267+ }
235268 }
236269 break
237270 }
238271 }
239272 }
240273 }
241274
242- getModel ( ) : { id : VertexModelId ; info : ModelInfo } {
275+ getModel ( ) : {
276+ id : VertexModelId
277+ info : ModelInfo
278+ temperature : number
279+ maxTokens : number
280+ thinking ?: BetaThinkingConfigParam
281+ } {
243282 const modelId = this . options . apiModelId
283+ let temperature = this . options . modelTemperature ?? 0
284+ let thinking : BetaThinkingConfigParam | undefined = undefined
285+
244286 if ( modelId && modelId in vertexModels ) {
245287 const id = modelId as VertexModelId
246- return { id, info : vertexModels [ id ] }
288+ const info : ModelInfo = vertexModels [ id ]
289+
290+ // The `:thinking` variant is a virtual identifier for thinking-enabled models
291+ // Similar to how it's handled in the Anthropic provider
292+ let actualId = id
293+ if ( id . endsWith ( ":thinking" ) ) {
294+ actualId = id . replace ( ":thinking" , "" ) as VertexModelId
295+ }
296+
297+ const maxTokens = this . options . modelMaxTokens || info . maxTokens || 8192
298+
299+ if ( info . thinking ) {
300+ temperature = 1.0 // Thinking requires temperature 1.0
301+ const maxBudgetTokens = Math . floor ( maxTokens * 0.8 )
302+ const budgetTokens = Math . max (
303+ Math . min (
304+ this . options . vertexThinking ?? this . options . anthropicThinking ?? maxBudgetTokens ,
305+ maxBudgetTokens ,
306+ ) ,
307+ 1024 ,
308+ )
309+ thinking = { type : "enabled" , budget_tokens : budgetTokens }
310+ }
311+
312+ return { id : actualId , info, temperature, maxTokens, thinking }
247313 }
248- return { id : vertexDefaultModelId , info : vertexModels [ vertexDefaultModelId ] }
314+
315+ const id = vertexDefaultModelId
316+ const info = vertexModels [ id ]
317+ const maxTokens = this . options . modelMaxTokens || info . maxTokens || 8192
318+
319+ return { id, info, temperature, maxTokens, thinking }
249320 }
250321
251322 async completePrompt ( prompt : string ) : Promise < string > {
252323 try {
253- const model = this . getModel ( )
254- const useCache = model . info . supportsPromptCache
324+ let { id , info , temperature , maxTokens , thinking } = this . getModel ( )
325+ const useCache = info . supportsPromptCache
255326
256327 const params = {
257- model : model . id ,
258- max_tokens : model . info . maxTokens || 8192 ,
259- temperature : this . options . modelTemperature ?? 0 ,
328+ model : id ,
329+ max_tokens : maxTokens ,
330+ temperature,
331+ thinking,
260332 system : "" , // No system prompt needed for single completions
261333 messages : [
262334 {
0 commit comments