Skip to content

Commit b2b135c

Browse files
committed
[WIP] Claude 3.7 Sonnet (Thinking)
1 parent 70c9cbd commit b2b135c

File tree

4 files changed

+47
-30
lines changed

4 files changed

+47
-30
lines changed

package-lock.json

Lines changed: 5 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@
304304
},
305305
"dependencies": {
306306
"@anthropic-ai/bedrock-sdk": "^0.10.2",
307-
"@anthropic-ai/sdk": "^0.26.0",
307+
"@anthropic-ai/sdk": "^0.37.0",
308308
"@anthropic-ai/vertex-sdk": "^0.4.1",
309309
"@aws-sdk/client-bedrock-runtime": "^3.706.0",
310310
"@google/generative-ai": "^0.18.0",

src/api/providers/anthropic.ts

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
3+
import { CacheControlEphemeral } from "@anthropic-ai/sdk/resources"
4+
import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
35
import {
46
anthropicDefaultModelId,
57
AnthropicModelId,
@@ -12,39 +14,52 @@ import { ApiStream } from "../transform/stream"
1214

1315
const ANTHROPIC_DEFAULT_TEMPERATURE = 0
1416

17+
const THINKING_MODELS = ["claude-3-7-sonnet-20250219"]
18+
1519
export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
1620
private options: ApiHandlerOptions
1721
private client: Anthropic
1822

1923
constructor(options: ApiHandlerOptions) {
2024
this.options = options
25+
2126
this.client = new Anthropic({
2227
apiKey: this.options.apiKey,
2328
baseURL: this.options.anthropicBaseUrl || undefined,
2429
})
2530
}
2631

2732
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
28-
let stream: AnthropicStream<Anthropic.Beta.PromptCaching.Messages.RawPromptCachingBetaMessageStreamEvent>
33+
let stream: AnthropicStream<Anthropic.Messages.RawMessageStreamEvent>
34+
const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
2935
const modelId = this.getModel().id
36+
let thinking: BetaThinkingConfigParam | undefined = undefined
37+
38+
if (THINKING_MODELS.includes(modelId)) {
39+
thinking = this.options.anthropicThinking
40+
? { type: "enabled", budget_tokens: this.options.anthropicThinking }
41+
: { type: "disabled" }
42+
}
3043

3144
switch (modelId) {
32-
// 'latest' alias does not support cache_control
3345
case "claude-3-7-sonnet-20250219":
3446
case "claude-3-5-sonnet-20241022":
3547
case "claude-3-5-haiku-20241022":
3648
case "claude-3-opus-20240229":
3749
case "claude-3-haiku-20240307": {
38-
/*
39-
The latest message will be the new user message, one before will be the assistant message from a previous request, and the user message before that will be a previously cached user message. So we need to mark the latest user message as ephemeral to cache it for the next request, and mark the second to last user message as ephemeral to let the server know the last message to retrieve from the cache for the current request..
40-
*/
50+
/**
51+
* The latest message will be the new user message, one before will
52+
* be the assistant message from a previous request, and the user message before that will be a previously cached user message. So we need to mark the latest user message as ephemeral to cache it for the next request, and mark the second to last user message as ephemeral to let the server know the last message to retrieve from the cache for the current request..
53+
*/
4154
const userMsgIndices = messages.reduce(
4255
(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
4356
[] as number[],
4457
)
58+
4559
const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
4660
const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
47-
stream = await this.client.beta.promptCaching.messages.create(
61+
62+
stream = await this.client.messages.create(
4863
{
4964
model: modelId,
5065
max_tokens: this.getModel().info.maxTokens || 8192,
@@ -60,12 +75,12 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
6075
{
6176
type: "text",
6277
text: message.content,
63-
cache_control: { type: "ephemeral" },
78+
cache_control: cacheControl,
6479
},
6580
]
6681
: message.content.map((content, contentIndex) =>
6782
contentIndex === message.content.length - 1
68-
? { ...content, cache_control: { type: "ephemeral" } }
83+
? { ...content, cache_control: cacheControl }
6984
: content,
7085
),
7186
}
@@ -76,6 +91,7 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
7691
// tool_choice: { type: "auto" },
7792
// tools: tools,
7893
stream: true,
94+
thinking,
7995
},
8096
(() => {
8197
// prompt caching: https://x.com/alexalbert__/status/1823751995901272068
@@ -114,52 +130,51 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
114130
for await (const chunk of stream) {
115131
switch (chunk.type) {
116132
case "message_start":
117-
// tells us cache reads/writes/input/output
133+
// Tells us cache reads/writes/input/output.
118134
const usage = chunk.message.usage
135+
119136
yield {
120137
type: "usage",
121138
inputTokens: usage.input_tokens || 0,
122139
outputTokens: usage.output_tokens || 0,
123140
cacheWriteTokens: usage.cache_creation_input_tokens || undefined,
124141
cacheReadTokens: usage.cache_read_input_tokens || undefined,
125142
}
143+
126144
break
127145
case "message_delta":
128-
// tells us stop_reason, stop_sequence, and output tokens along the way and at the end of the message
129-
146+
// Tells us stop_reason, stop_sequence, and output tokens
147+
// along the way and at the end of the message.
130148
yield {
131149
type: "usage",
132150
inputTokens: 0,
133151
outputTokens: chunk.usage.output_tokens || 0,
134152
}
153+
135154
break
136155
case "message_stop":
137-
// no usage data, just an indicator that the message is done
156+
// No usage data, just an indicator that the message is done.
138157
break
139158
case "content_block_start":
140159
switch (chunk.content_block.type) {
160+
case "thinking":
161+
yield { type: "reasoning", text: chunk.content_block.thinking }
162+
break
141163
case "text":
142-
// we may receive multiple text blocks, in which case just insert a line break between them
164+
// We may receive multiple text blocks, in which
165+
// case just insert a line break between them.
143166
if (chunk.index > 0) {
144-
yield {
145-
type: "text",
146-
text: "\n",
147-
}
148-
}
149-
yield {
150-
type: "text",
151-
text: chunk.content_block.text,
167+
yield { type: "text", text: "\n" }
152168
}
169+
170+
yield { type: "text", text: chunk.content_block.text }
153171
break
154172
}
155173
break
156174
case "content_block_delta":
157175
switch (chunk.delta.type) {
158176
case "text_delta":
159-
yield {
160-
type: "text",
161-
text: chunk.delta.text,
162-
}
177+
yield { type: "text", text: chunk.delta.text }
163178
break
164179
}
165180
break

src/shared/api.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ export interface ApiHandlerOptions {
2121
apiModelId?: string
2222
apiKey?: string // anthropic
2323
anthropicBaseUrl?: string
24+
anthropicThinking?: number
2425
vsCodeLmModelSelector?: vscode.LanguageModelChatSelector
2526
glamaModelId?: string
2627
glamaModelInfo?: ModelInfo

0 commit comments

Comments
 (0)