@@ -29,7 +29,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
2929 async * createMessage ( systemPrompt : string , messages : Anthropic . Messages . MessageParam [ ] ) : ApiStream {
3030 let stream : AnthropicStream < Anthropic . Messages . RawMessageStreamEvent >
3131 const cacheControl : CacheControlEphemeral = { type : "ephemeral" }
32- let { id : modelId , maxTokens, thinking, temperature } = this . getModel ( )
32+ let { id : modelId , maxTokens, thinking, temperature, virtualId } = this . getModel ( )
3333
3434 switch ( modelId ) {
3535 case "claude-3-7-sonnet-20250219" :
@@ -82,13 +82,24 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
8282 // prompt caching: https://x.com/alexalbert__/status/1823751995901272068
8383 // https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers
8484 // https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393
85+
86+ const betas = [ ]
87+
88+ // Check for the thinking-128k variant first
89+ if ( virtualId === "claude-3-7-sonnet-20250219:thinking" ) {
90+ betas . push ( "output-128k-2025-02-19" )
91+ }
92+
93+ // Then check for models that support prompt caching
8594 switch ( modelId ) {
95+ case "claude-3-7-sonnet-20250219" :
8696 case "claude-3-5-sonnet-20241022" :
8797 case "claude-3-5-haiku-20241022" :
8898 case "claude-3-opus-20240229" :
8999 case "claude-3-haiku-20240307" :
100+ betas . push ( "prompt-caching-2024-07-31" )
90101 return {
91- headers : { "anthropic-beta" : "prompt-caching-2024-07-31" } ,
102+ headers : { "anthropic-beta" : betas . join ( "," ) } ,
92103 }
93104 default :
94105 return undefined
@@ -184,6 +195,9 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
184195 let id = modelId && modelId in anthropicModels ? ( modelId as AnthropicModelId ) : anthropicDefaultModelId
185196 const info : ModelInfo = anthropicModels [ id ]
186197
198+ // Track the original model ID for special variant handling
199+ const virtualId = id
200+
187201 // The `:thinking` variant is a virtual identifier for the
188202 // `claude-3-7-sonnet-20250219` model with a thinking budget.
189203 // We can handle this more elegantly in the future.
@@ -194,6 +208,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
194208 return {
195209 id,
196210 info,
211+ virtualId, // Include the original ID to use for header selection
197212 ...getModelParams ( { options : this . options , model : info , defaultMaxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS } ) ,
198213 }
199214 }
0 commit comments