@@ -29,7 +29,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
2929 async * createMessage ( systemPrompt : string , messages : Anthropic . Messages . MessageParam [ ] ) : ApiStream {
3030 let stream : AnthropicStream < Anthropic . Messages . RawMessageStreamEvent >
3131 const cacheControl : CacheControlEphemeral = { type : "ephemeral" }
32- let { id : modelId , maxTokens, thinking, temperature } = this . getModel ( )
32+ let { id : modelId , maxTokens, thinking, temperature, virtualId } = this . getModel ( )
3333
3434 switch ( modelId ) {
3535 case "claude-3-7-sonnet-20250219" :
@@ -82,6 +82,15 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
8282 // prompt caching: https://x.com/alexalbert__/status/1823751995901272068
8383 // https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers
8484 // https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393
85+
86+ // Check for the thinking-128k variant first
87+ if ( virtualId === "claude-3-7-sonnet-20250219:thinking-128k" ) {
88+ return {
89+ headers : { "anthropic-beta" : "output-128k-2025-02-19" } ,
90+ }
91+ }
92+
93+ // Then check for models that support prompt caching
8594 switch ( modelId ) {
8695 case "claude-3-5-sonnet-20241022" :
8796 case "claude-3-5-haiku-20241022" :
@@ -184,31 +193,58 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
184193 let id = modelId && modelId in anthropicModels ? ( modelId as AnthropicModelId ) : anthropicDefaultModelId
185194 const info : ModelInfo = anthropicModels [ id ]
186195
196+ // Track the original model ID for special variant handling
197+ const virtualId = id
198+
187199 // The `:thinking` variant is a virtual identifier for the
188200 // `claude-3-7-sonnet-20250219` model with a thinking budget.
189201 // We can handle this more elegantly in the future.
190- if ( id === "claude-3-7-sonnet-20250219:thinking" ) {
202+ if ( id === "claude-3-7-sonnet-20250219:thinking" || id === "claude-3-7-sonnet-20250219:thinking-128k" ) {
191203 id = "claude-3-7-sonnet-20250219"
192204 }
193205
194206 return {
195207 id,
196208 info,
209+ virtualId, // Include the original ID to use for header selection
197210 ...getModelParams ( { options : this . options , model : info , defaultMaxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS } ) ,
198211 }
199212 }
200213
201214 async completePrompt ( prompt : string ) {
202- let { id : modelId , maxTokens, thinking, temperature } = this . getModel ( )
203-
204- const message = await this . client . messages . create ( {
205- model : modelId ,
206- max_tokens : maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS ,
207- thinking,
208- temperature,
209- messages : [ { role : "user" , content : prompt } ] ,
210- stream : false ,
211- } )
215+ let { id : modelId , maxTokens, thinking, temperature, virtualId } = this . getModel ( )
216+
217+ const message = await this . client . messages . create (
218+ {
219+ model : modelId ,
220+ max_tokens : maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS ,
221+ thinking,
222+ temperature,
223+ messages : [ { role : "user" , content : prompt } ] ,
224+ stream : false ,
225+ } ,
226+ ( ( ) => {
227+ // Check for the thinking-128k variant first
228+ if ( virtualId === "claude-3-7-sonnet-20250219:thinking-128k" ) {
229+ return {
230+ headers : { "anthropic-beta" : "output-128k-2025-02-19" } ,
231+ }
232+ }
233+
234+ // Then check for models that support prompt caching
235+ switch ( modelId ) {
236+ case "claude-3-5-sonnet-20241022" :
237+ case "claude-3-5-haiku-20241022" :
238+ case "claude-3-opus-20240229" :
239+ case "claude-3-haiku-20240307" :
240+ return {
241+ headers : { "anthropic-beta" : "prompt-caching-2024-07-31" } ,
242+ }
243+ default :
244+ return undefined
245+ }
246+ } ) ( ) ,
247+ )
212248
213249 const content = message . content . find ( ( { type } ) => type === "text" )
214250 return content ?. type === "text" ? content . text : ""
@@ -223,17 +259,40 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
223259 override async countTokens ( content : Array < Anthropic . Messages . ContentBlockParam > ) : Promise < number > {
224260 try {
225261 // Use the current model
226- const actualModelId = this . getModel ( ) . id
262+ const { id : actualModelId , virtualId } = this . getModel ( )
227263
228- const response = await this . client . messages . countTokens ( {
229- model : actualModelId ,
230- messages : [
231- {
232- role : "user" ,
233- content : content ,
234- } ,
235- ] ,
236- } )
264+ const response = await this . client . messages . countTokens (
265+ {
266+ model : actualModelId ,
267+ messages : [
268+ {
269+ role : "user" ,
270+ content : content ,
271+ } ,
272+ ] ,
273+ } ,
274+ ( ( ) => {
275+ // Check for the thinking-128k variant first
276+ if ( virtualId === "claude-3-7-sonnet-20250219:thinking-128k" ) {
277+ return {
278+ headers : { "anthropic-beta" : "output-128k-2025-02-19" } ,
279+ }
280+ }
281+
282+ // Then check for models that support prompt caching
283+ switch ( actualModelId ) {
284+ case "claude-3-5-sonnet-20241022" :
285+ case "claude-3-5-haiku-20241022" :
286+ case "claude-3-opus-20240229" :
287+ case "claude-3-haiku-20240307" :
288+ return {
289+ headers : { "anthropic-beta" : "prompt-caching-2024-07-31" } ,
290+ }
291+ default :
292+ return undefined
293+ }
294+ } ) ( ) ,
295+ )
237296
238297 return response . input_tokens
239298 } catch ( error ) {
0 commit comments