@@ -26,6 +26,7 @@ import {
2626 handleAiSdkError ,
2727 yieldResponseMessage ,
2828} from "../transform/ai-sdk"
29+ import { applyToolCacheOptions , applySystemPromptCaching } from "../transform/cache-breakpoints"
2930import { calculateApiCostAnthropic } from "../../shared/cost"
3031
3132import { DEFAULT_HEADERS } from "./constants"
@@ -96,6 +97,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
9697 // Convert tools to AI SDK format
9798 const openAiTools = this . convertToolsForOpenAI ( metadata ?. tools )
9899 const aiSdkTools = convertToolsForAiSdk ( openAiTools ) as ToolSet | undefined
100+ applyToolCacheOptions ( aiSdkTools as Parameters < typeof applyToolCacheOptions > [ 0 ] , metadata ?. toolProviderOptions )
99101
100102 // Build Anthropic provider options
101103 const anthropicProviderOptions : Record < string , unknown > = { }
@@ -119,45 +121,18 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
119121 anthropicProviderOptions . disableParallelToolUse = true
120122 }
121123
122- /**
123- * Vertex API has specific limitations for prompt caching:
124- * 1. Maximum of 4 blocks can have cache_control
125- * 2. Only text blocks can be cached (images and other content types cannot)
126- * 3. Cache control can only be applied to user messages, not assistant messages
127- *
128- * Our caching strategy:
129- * - Cache the system prompt (1 block)
130- * - Cache the last text block of the second-to-last user message (1 block)
131- * - Cache the last text block of the last user message (1 block)
132- * This ensures we stay under the 4-block limit while maintaining effective caching
133- * for the most relevant context.
134- */
135- const cacheProviderOption = { anthropic : { cacheControl : { type : "ephemeral" as const } } }
136-
137- const userMsgIndices = messages . reduce (
138- ( acc , msg , index ) => ( "role" in msg && msg . role === "user" ? [ ...acc , index ] : acc ) ,
139- [ ] as number [ ] ,
124+ // Breakpoint 1: System prompt caching — inject as cached system message
125+ const effectiveSystemPrompt = applySystemPromptCaching (
126+ systemPrompt ,
127+ aiSdkMessages ,
128+ metadata ?. systemProviderOptions ,
140129 )
141130
142- const targetIndices = new Set < number > ( )
143- const lastUserMsgIndex = userMsgIndices [ userMsgIndices . length - 1 ] ?? - 1
144- const secondLastUserMsgIndex = userMsgIndices [ userMsgIndices . length - 2 ] ?? - 1
145-
146- if ( lastUserMsgIndex >= 0 ) targetIndices . add ( lastUserMsgIndex )
147- if ( secondLastUserMsgIndex >= 0 ) targetIndices . add ( secondLastUserMsgIndex )
148-
149- if ( targetIndices . size > 0 ) {
150- this . applyCacheControlToAiSdkMessages ( messages as ModelMessage [ ] , targetIndices , cacheProviderOption )
151- }
152-
153131 // Build streamText request
154132 // Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
155133 const requestOptions : Parameters < typeof streamText > [ 0 ] = {
156134 model : this . provider ( modelConfig . id ) ,
157- system : systemPrompt ,
158- ...( {
159- systemProviderOptions : { anthropic : { cacheControl : { type : "ephemeral" } } } ,
160- } as Record < string , unknown > ) ,
135+ system : effectiveSystemPrompt ,
161136 messages : aiSdkMessages ,
162137 temperature : modelConfig . temperature ,
163138 maxOutputTokens : modelConfig . maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS ,
@@ -216,12 +191,19 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
216191 const inputTokens = usage . inputTokens ?? 0
217192 const outputTokens = usage . outputTokens ?? 0
218193
219- // Extract cache metrics from Anthropic's providerMetadata
194+ // Extract cache metrics from Anthropic's providerMetadata.
195+ // In @ai -sdk/anthropic v3.0.38+, cacheReadInputTokens may only exist at
196+ // usage.cache_read_input_tokens rather than the top-level property.
220197 const anthropicMeta = providerMetadata ?. anthropic as
221- | { cacheCreationInputTokens ?: number ; cacheReadInputTokens ?: number }
198+ | {
199+ cacheCreationInputTokens ?: number
200+ cacheReadInputTokens ?: number
201+ usage ?: { cache_read_input_tokens ?: number }
202+ }
222203 | undefined
223204 const cacheWriteTokens = anthropicMeta ?. cacheCreationInputTokens ?? 0
224- const cacheReadTokens = anthropicMeta ?. cacheReadInputTokens ?? 0
205+ const cacheReadTokens =
206+ anthropicMeta ?. cacheReadInputTokens ?? anthropicMeta ?. usage ?. cache_read_input_tokens ?? 0
225207
226208 const { totalCost } = calculateApiCostAnthropic (
227209 info ,
@@ -241,29 +223,6 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
241223 }
242224 }
243225
244- /**
245- * Apply cacheControl providerOptions to the correct AI SDK messages by walking
246- * the original Anthropic messages and converted AI SDK messages in parallel.
247- *
248- * convertToAiSdkMessages() can split a single Anthropic user message (containing
249- * tool_results + text) into 2 AI SDK messages (tool role + user role). This method
250- * accounts for that split so cache control lands on the right message.
251- */
252- private applyCacheControlToAiSdkMessages (
253- aiSdkMessages : { role : string ; providerOptions ?: Record < string , Record < string , unknown > > } [ ] ,
254- targetIndices : Set < number > ,
255- cacheProviderOption : Record < string , Record < string , unknown > > ,
256- ) : void {
257- for ( const idx of targetIndices ) {
258- if ( idx >= 0 && idx < aiSdkMessages . length ) {
259- aiSdkMessages [ idx ] . providerOptions = {
260- ...aiSdkMessages [ idx ] . providerOptions ,
261- ...cacheProviderOption ,
262- }
263- }
264- }
265- }
266-
267226 getModel ( ) {
268227 const modelId = this . options . apiModelId
269228 let id = modelId && modelId in vertexModels ? ( modelId as VertexModelId ) : vertexDefaultModelId
0 commit comments