Skip to content

Commit 897c372

Browse files
refactor: unify cache control with centralized breakpoints and universal provider options (#11426)
1 parent fa9dff4 commit 897c372

29 files changed

+601
-257
lines changed

src/api/index.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,13 @@ export interface ApiHandlerCreateMessageMetadata {
8888
* Only applies to providers that support function calling restrictions (e.g., Gemini).
8989
*/
9090
allowedFunctionNames?: string[]
91+
/** Provider-specific options for tool definitions (e.g. cache control). */
92+
toolProviderOptions?: Record<string, Record<string, unknown>>
93+
/** Provider-specific options for the system prompt (e.g. cache control).
94+
* Cache-aware providers use this to inject the system prompt as a cached
95+
* system message, since AI SDK v6 does not support providerOptions on the
96+
* `system` string parameter. */
97+
systemProviderOptions?: Record<string, Record<string, unknown>>
9198
}
9299

93100
export interface ApiHandler {

src/api/providers/__tests__/anthropic.spec.ts

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ describe("AnthropicHandler", () => {
399399
expect(endChunk).toBeDefined()
400400
})
401401

402-
it("should pass system prompt via system param with systemProviderOptions for cache control", async () => {
402+
it("should pass system prompt via system param when no systemProviderOptions", async () => {
403403
setupStreamTextMock([{ type: "text-delta", text: "test" }])
404404

405405
const stream = handler.createMessage(systemPrompt, [
@@ -410,16 +410,37 @@ describe("AnthropicHandler", () => {
410410
// Consume
411411
}
412412

413-
// Verify streamText was called with system + systemProviderOptions (not as a message)
413+
// Without systemProviderOptions, system prompt is passed via the system parameter
414414
const callArgs = mockStreamText.mock.calls[0]![0]
415415
expect(callArgs.system).toBe(systemPrompt)
416-
expect(callArgs.systemProviderOptions).toEqual({
417-
anthropic: { cacheControl: { type: "ephemeral" } },
418-
})
419416
// System prompt should NOT be in the messages array
420417
const systemMessages = callArgs.messages.filter((m: any) => m.role === "system")
421418
expect(systemMessages).toHaveLength(0)
422419
})
420+
421+
it("should inject system prompt as cached system message when systemProviderOptions provided", async () => {
422+
setupStreamTextMock([{ type: "text-delta", text: "test" }])
423+
424+
const cacheOpts = { anthropic: { cacheControl: { type: "ephemeral" } } }
425+
const stream = handler.createMessage(
426+
systemPrompt,
427+
[{ role: "user", content: [{ type: "text" as const, text: "test" }] }],
428+
{ taskId: "test-task", systemProviderOptions: cacheOpts },
429+
)
430+
431+
for await (const _chunk of stream) {
432+
// Consume
433+
}
434+
435+
// With systemProviderOptions, system prompt is injected as messages[0]
436+
const callArgs = mockStreamText.mock.calls[0]![0]
437+
expect(callArgs.system).toBeUndefined()
438+
// System prompt should be the first message with providerOptions
439+
const systemMessages = callArgs.messages.filter((m: any) => m.role === "system")
440+
expect(systemMessages).toHaveLength(1)
441+
expect(systemMessages[0].content).toBe(systemPrompt)
442+
expect(systemMessages[0].providerOptions).toEqual(cacheOpts)
443+
})
423444
})
424445

425446
describe("completePrompt", () => {

src/api/providers/__tests__/minimax.spec.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -338,16 +338,12 @@ describe("MiniMaxHandler", () => {
338338

339339
expect(mockMergeEnvironmentDetailsForMiniMax).toHaveBeenCalledWith(messages)
340340
const callArgs = mockStreamText.mock.calls[0]?.[0]
341+
// Cache control is now applied centrally in Task.ts, not per-provider
341342
expect(callArgs.messages).toEqual(
342343
expect.arrayContaining([
343344
expect.objectContaining({
344345
role: "user",
345346
content: [{ type: "text", text: "Merged message" }],
346-
providerOptions: {
347-
anthropic: {
348-
cacheControl: { type: "ephemeral" },
349-
},
350-
},
351347
}),
352348
]),
353349
)

src/api/providers/anthropic-vertex.ts

Lines changed: 18 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import {
2626
handleAiSdkError,
2727
yieldResponseMessage,
2828
} from "../transform/ai-sdk"
29+
import { applyToolCacheOptions, applySystemPromptCaching } from "../transform/cache-breakpoints"
2930
import { calculateApiCostAnthropic } from "../../shared/cost"
3031

3132
import { DEFAULT_HEADERS } from "./constants"
@@ -96,6 +97,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
9697
// Convert tools to AI SDK format
9798
const openAiTools = this.convertToolsForOpenAI(metadata?.tools)
9899
const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined
100+
applyToolCacheOptions(aiSdkTools as Parameters<typeof applyToolCacheOptions>[0], metadata?.toolProviderOptions)
99101

100102
// Build Anthropic provider options
101103
const anthropicProviderOptions: Record<string, unknown> = {}
@@ -119,45 +121,18 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
119121
anthropicProviderOptions.disableParallelToolUse = true
120122
}
121123

122-
/**
123-
* Vertex API has specific limitations for prompt caching:
124-
* 1. Maximum of 4 blocks can have cache_control
125-
* 2. Only text blocks can be cached (images and other content types cannot)
126-
* 3. Cache control can only be applied to user messages, not assistant messages
127-
*
128-
* Our caching strategy:
129-
* - Cache the system prompt (1 block)
130-
* - Cache the last text block of the second-to-last user message (1 block)
131-
* - Cache the last text block of the last user message (1 block)
132-
* This ensures we stay under the 4-block limit while maintaining effective caching
133-
* for the most relevant context.
134-
*/
135-
const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
136-
137-
const userMsgIndices = messages.reduce(
138-
(acc, msg, index) => ("role" in msg && msg.role === "user" ? [...acc, index] : acc),
139-
[] as number[],
124+
// Breakpoint 1: System prompt caching — inject as cached system message
125+
const effectiveSystemPrompt = applySystemPromptCaching(
126+
systemPrompt,
127+
aiSdkMessages,
128+
metadata?.systemProviderOptions,
140129
)
141130

142-
const targetIndices = new Set<number>()
143-
const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
144-
const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
145-
146-
if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
147-
if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)
148-
149-
if (targetIndices.size > 0) {
150-
this.applyCacheControlToAiSdkMessages(messages as ModelMessage[], targetIndices, cacheProviderOption)
151-
}
152-
153131
// Build streamText request
154132
// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
155133
const requestOptions: Parameters<typeof streamText>[0] = {
156134
model: this.provider(modelConfig.id),
157-
system: systemPrompt,
158-
...({
159-
systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral" } } },
160-
} as Record<string, unknown>),
135+
system: effectiveSystemPrompt,
161136
messages: aiSdkMessages,
162137
temperature: modelConfig.temperature,
163138
maxOutputTokens: modelConfig.maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
@@ -216,12 +191,19 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
216191
const inputTokens = usage.inputTokens ?? 0
217192
const outputTokens = usage.outputTokens ?? 0
218193

219-
// Extract cache metrics from Anthropic's providerMetadata
194+
// Extract cache metrics from Anthropic's providerMetadata.
195+
// In @ai-sdk/anthropic v3.0.38+, cacheReadInputTokens may only exist at
196+
// usage.cache_read_input_tokens rather than the top-level property.
220197
const anthropicMeta = providerMetadata?.anthropic as
221-
| { cacheCreationInputTokens?: number; cacheReadInputTokens?: number }
198+
| {
199+
cacheCreationInputTokens?: number
200+
cacheReadInputTokens?: number
201+
usage?: { cache_read_input_tokens?: number }
202+
}
222203
| undefined
223204
const cacheWriteTokens = anthropicMeta?.cacheCreationInputTokens ?? 0
224-
const cacheReadTokens = anthropicMeta?.cacheReadInputTokens ?? 0
205+
const cacheReadTokens =
206+
anthropicMeta?.cacheReadInputTokens ?? anthropicMeta?.usage?.cache_read_input_tokens ?? 0
225207

226208
const { totalCost } = calculateApiCostAnthropic(
227209
info,
@@ -241,29 +223,6 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
241223
}
242224
}
243225

244-
/**
245-
* Apply cacheControl providerOptions to the correct AI SDK messages by walking
246-
* the original Anthropic messages and converted AI SDK messages in parallel.
247-
*
248-
* convertToAiSdkMessages() can split a single Anthropic user message (containing
249-
* tool_results + text) into 2 AI SDK messages (tool role + user role). This method
250-
* accounts for that split so cache control lands on the right message.
251-
*/
252-
private applyCacheControlToAiSdkMessages(
253-
aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
254-
targetIndices: Set<number>,
255-
cacheProviderOption: Record<string, Record<string, unknown>>,
256-
): void {
257-
for (const idx of targetIndices) {
258-
if (idx >= 0 && idx < aiSdkMessages.length) {
259-
aiSdkMessages[idx].providerOptions = {
260-
...aiSdkMessages[idx].providerOptions,
261-
...cacheProviderOption,
262-
}
263-
}
264-
}
265-
}
266-
267226
getModel() {
268227
const modelId = this.options.apiModelId
269228
let id = modelId && modelId in vertexModels ? (modelId as VertexModelId) : vertexDefaultModelId

src/api/providers/anthropic.ts

Lines changed: 20 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import {
2424
handleAiSdkError,
2525
yieldResponseMessage,
2626
} from "../transform/ai-sdk"
27+
import { applyToolCacheOptions, applySystemPromptCaching } from "../transform/cache-breakpoints"
2728
import { calculateApiCostAnthropic } from "../../shared/cost"
2829

2930
import { DEFAULT_HEADERS } from "./constants"
@@ -82,6 +83,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
8283
// Convert tools to AI SDK format
8384
const openAiTools = this.convertToolsForOpenAI(metadata?.tools)
8485
const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined
86+
applyToolCacheOptions(aiSdkTools as Parameters<typeof applyToolCacheOptions>[0], metadata?.toolProviderOptions)
8587

8688
// Build Anthropic provider options
8789
const anthropicProviderOptions: Record<string, unknown> = {}
@@ -105,34 +107,20 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
105107
anthropicProviderOptions.disableParallelToolUse = true
106108
}
107109

108-
// Apply cache control to user messages
109-
// Strategy: cache the last 2 user messages (write-to-cache + read-from-cache)
110-
const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
111-
112-
const userMsgIndices = messages.reduce(
113-
(acc, msg, index) => ("role" in msg && msg.role === "user" ? [...acc, index] : acc),
114-
[] as number[],
110+
// Breakpoint 1: System prompt caching — inject as cached system message
111+
// AI SDK v6 does not support providerOptions on the system string parameter,
112+
// so cache-aware providers convert it to a system message with providerOptions.
113+
const effectiveSystemPrompt = applySystemPromptCaching(
114+
systemPrompt,
115+
aiSdkMessages,
116+
metadata?.systemProviderOptions,
115117
)
116118

117-
const targetIndices = new Set<number>()
118-
const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
119-
const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
120-
121-
if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
122-
if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)
123-
124-
if (targetIndices.size > 0) {
125-
this.applyCacheControlToAiSdkMessages(messages as ModelMessage[], targetIndices, cacheProviderOption)
126-
}
127-
128119
// Build streamText request
129120
// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
130121
const requestOptions: Parameters<typeof streamText>[0] = {
131122
model: this.provider(modelConfig.id),
132-
system: systemPrompt,
133-
...({
134-
systemProviderOptions: { anthropic: { cacheControl: { type: "ephemeral" } } },
135-
} as Record<string, unknown>),
123+
system: effectiveSystemPrompt,
136124
messages: aiSdkMessages,
137125
temperature: modelConfig.temperature,
138126
maxOutputTokens: modelConfig.maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
@@ -191,12 +179,19 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
191179
const inputTokens = usage.inputTokens ?? 0
192180
const outputTokens = usage.outputTokens ?? 0
193181

194-
// Extract cache metrics from Anthropic's providerMetadata
182+
// Extract cache metrics from Anthropic's providerMetadata.
183+
// In @ai-sdk/anthropic v3.0.38+, cacheReadInputTokens may only exist at
184+
// usage.cache_read_input_tokens rather than the top-level property.
195185
const anthropicMeta = providerMetadata?.anthropic as
196-
| { cacheCreationInputTokens?: number; cacheReadInputTokens?: number }
186+
| {
187+
cacheCreationInputTokens?: number
188+
cacheReadInputTokens?: number
189+
usage?: { cache_read_input_tokens?: number }
190+
}
197191
| undefined
198192
const cacheWriteTokens = anthropicMeta?.cacheCreationInputTokens ?? 0
199-
const cacheReadTokens = anthropicMeta?.cacheReadInputTokens ?? 0
193+
const cacheReadTokens =
194+
anthropicMeta?.cacheReadInputTokens ?? anthropicMeta?.usage?.cache_read_input_tokens ?? 0
200195

201196
const { totalCost } = calculateApiCostAnthropic(
202197
info,
@@ -216,29 +211,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
216211
}
217212
}
218213

219-
/**
220-
* Apply cacheControl providerOptions to the correct AI SDK messages by walking
221-
* the original Anthropic messages and converted AI SDK messages in parallel.
222-
*
223-
* convertToAiSdkMessages() can split a single Anthropic user message (containing
224-
* tool_results + text) into 2 AI SDK messages (tool role + user role). This method
225-
* accounts for that split so cache control lands on the right message.
226-
*/
227-
private applyCacheControlToAiSdkMessages(
228-
aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
229-
targetIndices: Set<number>,
230-
cacheProviderOption: Record<string, Record<string, unknown>>,
231-
): void {
232-
for (const idx of targetIndices) {
233-
if (idx >= 0 && idx < aiSdkMessages.length) {
234-
aiSdkMessages[idx].providerOptions = {
235-
...aiSdkMessages[idx].providerOptions,
236-
...cacheProviderOption,
237-
}
238-
}
239-
}
240-
}
241-
242214
getModel() {
243215
const modelId = this.options.apiModelId
244216
let id = modelId && modelId in anthropicModels ? (modelId as AnthropicModelId) : anthropicDefaultModelId

src/api/providers/azure.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
mapToolChoice,
1414
handleAiSdkError,
1515
} from "../transform/ai-sdk"
16+
import { applyToolCacheOptions } from "../transform/cache-breakpoints"
1617
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
1718
import { getModelParams } from "../transform/model-params"
1819

@@ -144,11 +145,12 @@ export class AzureHandler extends BaseProvider implements SingleCompletionHandle
144145
// Convert tools to OpenAI format first, then to AI SDK format
145146
const openAiTools = this.convertToolsForOpenAI(metadata?.tools)
146147
const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined
148+
applyToolCacheOptions(aiSdkTools as Parameters<typeof applyToolCacheOptions>[0], metadata?.toolProviderOptions)
147149

148150
// Build the request options
149151
const requestOptions: Parameters<typeof streamText>[0] = {
150152
model: languageModel,
151-
system: systemPrompt,
153+
system: systemPrompt || undefined,
152154
messages: aiSdkMessages,
153155
temperature: this.options.modelTemperature ?? temperature ?? AZURE_DEFAULT_TEMPERATURE,
154156
maxOutputTokens: this.getMaxOutputTokens(),

src/api/providers/baseten.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
mapToolChoice,
1414
handleAiSdkError,
1515
} from "../transform/ai-sdk"
16+
import { applyToolCacheOptions } from "../transform/cache-breakpoints"
1617
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
1718
import { getModelParams } from "../transform/model-params"
1819

@@ -105,10 +106,11 @@ export class BasetenHandler extends BaseProvider implements SingleCompletionHand
105106

106107
const openAiTools = this.convertToolsForOpenAI(metadata?.tools)
107108
const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined
109+
applyToolCacheOptions(aiSdkTools as Parameters<typeof applyToolCacheOptions>[0], metadata?.toolProviderOptions)
108110

109111
const requestOptions: Parameters<typeof streamText>[0] = {
110112
model: languageModel,
111-
system: systemPrompt,
113+
system: systemPrompt || undefined,
112114
messages: aiSdkMessages,
113115
temperature: this.options.modelTemperature ?? temperature ?? BASETEN_DEFAULT_TEMPERATURE,
114116
maxOutputTokens: this.getMaxOutputTokens(),

0 commit comments

Comments
 (0)