11import { Anthropic } from "@anthropic-ai/sdk"
22import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
3+ import { CacheControlEphemeral } from "@anthropic-ai/sdk/resources"
4+ import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
35import {
46 anthropicDefaultModelId ,
57 AnthropicModelId ,
@@ -12,39 +14,52 @@ import { ApiStream } from "../transform/stream"
1214
1315const ANTHROPIC_DEFAULT_TEMPERATURE = 0
1416
17+ const THINKING_MODELS = [ "claude-3-7-sonnet-20250219" ]
18+
1519export class AnthropicHandler implements ApiHandler , SingleCompletionHandler {
1620 private options : ApiHandlerOptions
1721 private client : Anthropic
1822
1923 constructor ( options : ApiHandlerOptions ) {
2024 this . options = options
25+
2126 this . client = new Anthropic ( {
2227 apiKey : this . options . apiKey ,
2328 baseURL : this . options . anthropicBaseUrl || undefined ,
2429 } )
2530 }
2631
2732 async * createMessage ( systemPrompt : string , messages : Anthropic . Messages . MessageParam [ ] ) : ApiStream {
28- let stream : AnthropicStream < Anthropic . Beta . PromptCaching . Messages . RawPromptCachingBetaMessageStreamEvent >
33+ let stream : AnthropicStream < Anthropic . Messages . RawMessageStreamEvent >
34+ const cacheControl : CacheControlEphemeral = { type : "ephemeral" }
2935 const modelId = this . getModel ( ) . id
36+ let thinking : BetaThinkingConfigParam | undefined = undefined
37+
38+ if ( THINKING_MODELS . includes ( modelId ) ) {
39+ thinking = this . options . anthropicThinking
40+ ? { type : "enabled" , budget_tokens : this . options . anthropicThinking }
41+ : { type : "disabled" }
42+ }
3043
3144 switch ( modelId ) {
32- // 'latest' alias does not support cache_control
3345 case "claude-3-7-sonnet-20250219" :
3446 case "claude-3-5-sonnet-20241022" :
3547 case "claude-3-5-haiku-20241022" :
3648 case "claude-3-opus-20240229" :
3749 case "claude-3-haiku-20240307" : {
38- /*
39- The latest message will be the new user message, one before will be the assistant message from a previous request, and the user message before that will be a previously cached user message. So we need to mark the latest user message as ephemeral to cache it for the next request, and mark the second to last user message as ephemeral to let the server know the last message to retrieve from the cache for the current request..
40- */
50+ /**
51+ * The latest message will be the new user message, one before will
52+ * be the assistant message from a previous request, and the user message before that will be a previously cached user message. So we need to mark the latest user message as ephemeral to cache it for the next request, and mark the second to last user message as ephemeral to let the server know the last message to retrieve from the cache for the current request..
53+ */
4154 const userMsgIndices = messages . reduce (
4255 ( acc , msg , index ) => ( msg . role === "user" ? [ ...acc , index ] : acc ) ,
4356 [ ] as number [ ] ,
4457 )
58+
4559 const lastUserMsgIndex = userMsgIndices [ userMsgIndices . length - 1 ] ?? - 1
4660 const secondLastMsgUserIndex = userMsgIndices [ userMsgIndices . length - 2 ] ?? - 1
47- stream = await this . client . beta . promptCaching . messages . create (
61+
62+ stream = await this . client . messages . create (
4863 {
4964 model : modelId ,
5065 max_tokens : this . getModel ( ) . info . maxTokens || 8192 ,
@@ -60,12 +75,12 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
6075 {
6176 type : "text" ,
6277 text : message . content ,
63- cache_control : { type : "ephemeral" } ,
78+ cache_control : cacheControl ,
6479 } ,
6580 ]
6681 : message . content . map ( ( content , contentIndex ) =>
6782 contentIndex === message . content . length - 1
68- ? { ...content , cache_control : { type : "ephemeral" } }
83+ ? { ...content , cache_control : cacheControl }
6984 : content ,
7085 ) ,
7186 }
@@ -76,6 +91,7 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
7691 // tool_choice: { type: "auto" },
7792 // tools: tools,
7893 stream : true ,
94+ thinking,
7995 } ,
8096 ( ( ) => {
8197 // prompt caching: https://x.com/alexalbert__/status/1823751995901272068
@@ -114,52 +130,51 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
114130 for await ( const chunk of stream ) {
115131 switch ( chunk . type ) {
116132 case "message_start" :
117- // tells us cache reads/writes/input/output
133+ // Tells us cache reads/writes/input/output.
118134 const usage = chunk . message . usage
135+
119136 yield {
120137 type : "usage" ,
121138 inputTokens : usage . input_tokens || 0 ,
122139 outputTokens : usage . output_tokens || 0 ,
123140 cacheWriteTokens : usage . cache_creation_input_tokens || undefined ,
124141 cacheReadTokens : usage . cache_read_input_tokens || undefined ,
125142 }
143+
126144 break
127145 case "message_delta" :
128- // tells us stop_reason, stop_sequence, and output tokens along the way and at the end of the message
129-
146+ // Tells us stop_reason, stop_sequence, and output tokens
147+ // along the way and at the end of the message.
130148 yield {
131149 type : "usage" ,
132150 inputTokens : 0 ,
133151 outputTokens : chunk . usage . output_tokens || 0 ,
134152 }
153+
135154 break
136155 case "message_stop" :
137- // no usage data, just an indicator that the message is done
156+ // No usage data, just an indicator that the message is done.
138157 break
139158 case "content_block_start" :
140159 switch ( chunk . content_block . type ) {
160+ case "thinking" :
161+ yield { type : "reasoning" , text : chunk . content_block . thinking }
162+ break
141163 case "text" :
142- // we may receive multiple text blocks, in which case just insert a line break between them
164+ // We may receive multiple text blocks, in which
165+ // case just insert a line break between them.
143166 if ( chunk . index > 0 ) {
144- yield {
145- type : "text" ,
146- text : "\n" ,
147- }
148- }
149- yield {
150- type : "text" ,
151- text : chunk . content_block . text ,
167+ yield { type : "text" , text : "\n" }
152168 }
169+
170+ yield { type : "text" , text : chunk . content_block . text }
153171 break
154172 }
155173 break
156174 case "content_block_delta" :
157175 switch ( chunk . delta . type ) {
158176 case "text_delta" :
159- yield {
160- type : "text" ,
161- text : chunk . delta . text ,
162- }
177+ yield { type : "text" , text : chunk . delta . text }
163178 break
164179 }
165180 break
0 commit comments