@@ -12,8 +12,6 @@ import {
1212import { ApiHandler , SingleCompletionHandler } from "../index"
1313import { ApiStream } from "../transform/stream"
1414
15- const ANTHROPIC_DEFAULT_TEMPERATURE = 0
16-
1715export class AnthropicHandler implements ApiHandler , SingleCompletionHandler {
1816 private options : ApiHandlerOptions
1917 private client : Anthropic
@@ -30,7 +28,7 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
3028 async * createMessage ( systemPrompt : string , messages : Anthropic . Messages . MessageParam [ ] ) : ApiStream {
3129 let stream : AnthropicStream < Anthropic . Messages . RawMessageStreamEvent >
3230 const cacheControl : CacheControlEphemeral = { type : "ephemeral" }
33- let { id : modelId , temperature , maxTokens, thinking } = this . getModel ( )
31+ let { id : modelId , maxTokens, thinking, temperature } = this . getModel ( )
3432
3533 switch ( modelId ) {
3634 case "claude-3-7-sonnet-20250219" :
@@ -182,55 +180,52 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
182180
183181 getModel ( ) {
184182 const modelId = this . options . apiModelId
185- let temperature = this . options . modelTemperature ?? ANTHROPIC_DEFAULT_TEMPERATURE
186- let thinking : BetaThinkingConfigParam | undefined = undefined
187183
188- if ( modelId && modelId in anthropicModels ) {
189- let id = modelId as AnthropicModelId
190- const info : ModelInfo = anthropicModels [ id ]
184+ const {
185+ modelMaxTokens : customMaxTokens ,
186+ modelMaxThinkingTokens : customMaxThinkingTokens ,
187+ modelTemperature : customTemperature ,
188+ } = this . options
191189
192- // The `:thinking` variant is a virtual identifier for the
193- // `claude-3-7-sonnet-20250219` model with a thinking budget.
194- // We can handle this more elegantly in the future.
195- if ( id === "claude-3-7-sonnet-20250219:thinking" ) {
196- id = "claude-3-7-sonnet-20250219"
197- }
190+ let id = modelId && modelId in anthropicModels ? ( modelId as AnthropicModelId ) : anthropicDefaultModelId
191+ const info : ModelInfo = anthropicModels [ id ]
198192
199- const maxTokens = this . options . modelMaxTokens || info . maxTokens || 8192
193+ // The `:thinking` variant is a virtual identifier for the
194+ // `claude-3-7-sonnet-20250219` model with a thinking budget.
195+ // We can handle this more elegantly in the future.
196+ if ( id === "claude-3-7-sonnet-20250219:thinking" ) {
197+ id = "claude-3-7-sonnet-20250219"
198+ }
200199
201- if ( info . thinking ) {
202- // Anthropic "Thinking" models require a temperature of 1.0.
203- temperature = 1. 0
200+ let maxTokens = info . maxTokens ?? 8192
201+ let thinking : BetaThinkingConfigParam | undefined = undefined
202+ let temperature = customTemperature ?? 0
204203
205- // Clamp the thinking budget to be at most 80% of max tokens and at
206- // least 1024 tokens.
207- const maxBudgetTokens = Math . floor ( maxTokens * 0.8 )
208- const budgetTokens = Math . max (
209- Math . min ( this . options . modelMaxThinkingTokens ?? maxBudgetTokens , maxBudgetTokens ) ,
210- 1024 ,
211- )
204+ if ( info . thinking ) {
205+ // Only honor `customMaxTokens` for thinking models.
206+ maxTokens = customMaxTokens ?? maxTokens
212207
213- thinking = { type : "enabled" , budget_tokens : budgetTokens }
214- }
208+ // Clamp the thinking budget to be at most 80% of max tokens and at
209+ // least 1024 tokens.
210+ const maxBudgetTokens = Math . floor ( maxTokens * 0.8 )
211+ const budgetTokens = Math . max ( Math . min ( customMaxThinkingTokens ?? maxBudgetTokens , maxBudgetTokens ) , 1024 )
212+ thinking = { type : "enabled" , budget_tokens : budgetTokens }
215213
216- return { id, info, temperature, maxTokens, thinking }
214+ // Anthropic "Thinking" models require a temperature of 1.0.
215+ temperature = 1.0
217216 }
218217
219- const id = anthropicDefaultModelId
220- const info : ModelInfo = anthropicModels [ id ]
221- const maxTokens = this . options . modelMaxTokens || info . maxTokens || 8192
222-
223- return { id, info, temperature, maxTokens, thinking }
218+ return { id, info, maxTokens, thinking, temperature }
224219 }
225220
226221 async completePrompt ( prompt : string ) {
227- let { id : modelId , temperature , maxTokens, thinking } = this . getModel ( )
222+ let { id : modelId , maxTokens, thinking, temperature } = this . getModel ( )
228223
229224 const message = await this . client . messages . create ( {
230225 model : modelId ,
231226 max_tokens : maxTokens ,
232- temperature,
233227 thinking,
228+ temperature,
234229 messages : [ { role : "user" , content : prompt } ] ,
235230 stream : false ,
236231 } )
0 commit comments