Skip to content

Commit aec03d7

Browse files
committed
feat: add virtual id for extended output capabilities
1 parent 8e8079d commit aec03d7

File tree

2 files changed

+93
-22
lines changed

2 files changed

+93
-22
lines changed

src/api/providers/anthropic.ts

Lines changed: 81 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
2929
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
3030
let stream: AnthropicStream<Anthropic.Messages.RawMessageStreamEvent>
3131
const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
32-
let { id: modelId, maxTokens, thinking, temperature } = this.getModel()
32+
let { id: modelId, maxTokens, thinking, temperature, virtualId } = this.getModel()
3333

3434
switch (modelId) {
3535
case "claude-3-7-sonnet-20250219":
@@ -82,6 +82,15 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
8282
// prompt caching: https://x.com/alexalbert__/status/1823751995901272068
8383
// https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers
8484
// https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393
85+
86+
// Check for the thinking-128k variant first
87+
if (virtualId === "claude-3-7-sonnet-20250219:thinking-128k") {
88+
return {
89+
headers: { "anthropic-beta": "output-128k-2025-02-19" },
90+
}
91+
}
92+
93+
// Then check for models that support prompt caching
8594
switch (modelId) {
8695
case "claude-3-5-sonnet-20241022":
8796
case "claude-3-5-haiku-20241022":
@@ -184,31 +193,58 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
184193
let id = modelId && modelId in anthropicModels ? (modelId as AnthropicModelId) : anthropicDefaultModelId
185194
const info: ModelInfo = anthropicModels[id]
186195

196+
// Track the original model ID for special variant handling
197+
const virtualId = id
198+
187199
// The `:thinking` variant is a virtual identifier for the
188200
// `claude-3-7-sonnet-20250219` model with a thinking budget.
189201
// We can handle this more elegantly in the future.
190-
if (id === "claude-3-7-sonnet-20250219:thinking") {
202+
if (id === "claude-3-7-sonnet-20250219:thinking" || id === "claude-3-7-sonnet-20250219:thinking-128k") {
191203
id = "claude-3-7-sonnet-20250219"
192204
}
193205

194206
return {
195207
id,
196208
info,
209+
virtualId, // Include the original ID to use for header selection
197210
...getModelParams({ options: this.options, model: info, defaultMaxTokens: ANTHROPIC_DEFAULT_MAX_TOKENS }),
198211
}
199212
}
200213

201214
async completePrompt(prompt: string) {
202-
let { id: modelId, maxTokens, thinking, temperature } = this.getModel()
203-
204-
const message = await this.client.messages.create({
205-
model: modelId,
206-
max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
207-
thinking,
208-
temperature,
209-
messages: [{ role: "user", content: prompt }],
210-
stream: false,
211-
})
215+
let { id: modelId, maxTokens, thinking, temperature, virtualId } = this.getModel()
216+
217+
const message = await this.client.messages.create(
218+
{
219+
model: modelId,
220+
max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
221+
thinking,
222+
temperature,
223+
messages: [{ role: "user", content: prompt }],
224+
stream: false,
225+
},
226+
(() => {
227+
// Check for the thinking-128k variant first
228+
if (virtualId === "claude-3-7-sonnet-20250219:thinking-128k") {
229+
return {
230+
headers: { "anthropic-beta": "output-128k-2025-02-19" },
231+
}
232+
}
233+
234+
// Then check for models that support prompt caching
235+
switch (modelId) {
236+
case "claude-3-5-sonnet-20241022":
237+
case "claude-3-5-haiku-20241022":
238+
case "claude-3-opus-20240229":
239+
case "claude-3-haiku-20240307":
240+
return {
241+
headers: { "anthropic-beta": "prompt-caching-2024-07-31" },
242+
}
243+
default:
244+
return undefined
245+
}
246+
})(),
247+
)
212248

213249
const content = message.content.find(({ type }) => type === "text")
214250
return content?.type === "text" ? content.text : ""
@@ -223,17 +259,40 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
223259
override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
224260
try {
225261
// Use the current model
226-
const actualModelId = this.getModel().id
262+
const { id: actualModelId, virtualId } = this.getModel()
227263

228-
const response = await this.client.messages.countTokens({
229-
model: actualModelId,
230-
messages: [
231-
{
232-
role: "user",
233-
content: content,
234-
},
235-
],
236-
})
264+
const response = await this.client.messages.countTokens(
265+
{
266+
model: actualModelId,
267+
messages: [
268+
{
269+
role: "user",
270+
content: content,
271+
},
272+
],
273+
},
274+
(() => {
275+
// Check for the thinking-128k variant first
276+
if (virtualId === "claude-3-7-sonnet-20250219:thinking-128k") {
277+
return {
278+
headers: { "anthropic-beta": "output-128k-2025-02-19" },
279+
}
280+
}
281+
282+
// Then check for models that support prompt caching
283+
switch (actualModelId) {
284+
case "claude-3-5-sonnet-20241022":
285+
case "claude-3-5-haiku-20241022":
286+
case "claude-3-opus-20240229":
287+
case "claude-3-haiku-20240307":
288+
return {
289+
headers: { "anthropic-beta": "prompt-caching-2024-07-31" },
290+
}
291+
default:
292+
return undefined
293+
}
294+
})(),
295+
)
237296

238297
return response.input_tokens
239298
} catch (error) {

src/shared/api.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,18 @@ export interface ModelInfo {
9898
export type AnthropicModelId = keyof typeof anthropicModels
9999
export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219"
100100
export const anthropicModels = {
101+
"claude-3-7-sonnet-20250219:thinking-128k": {
102+
maxTokens: 128_000,
103+
contextWindow: 200_000,
104+
supportsImages: true,
105+
supportsComputerUse: true,
106+
supportsPromptCache: true,
107+
inputPrice: 3.0, // $3 per million input tokens
108+
outputPrice: 15.0, // $15 per million output tokens
109+
cacheWritesPrice: 3.75, // $3.75 per million tokens
110+
cacheReadsPrice: 0.3, // $0.30 per million tokens
111+
thinking: true,
112+
},
101113
"claude-3-7-sonnet-20250219:thinking": {
102114
maxTokens: 64_000,
103115
contextWindow: 200_000,

0 commit comments

Comments
 (0)