Skip to content

Commit ef22985

Browse files
authored
Clean up anthropic caching (#855)
Since [this](https://www.anthropic.com/news/token-saving-updates) released, we only need to add a cache point at the end of the messages we send. This allows us to remove the hack we did earlier. I tested locally and confirmed that we still are putting/reading tokens from the cache.
1 parent fe4ab0d commit ef22985

File tree

2 files changed

+16
-63
lines changed

2 files changed

+16
-63
lines changed

app/lib/.server/llm/convex-agent.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,16 @@ export async function convexAgent(args: {
124124
};
125125
}
126126

127+
if (modelProvider === 'Anthropic') {
128+
messagesForDataStream[messagesForDataStream.length - 1].providerOptions = {
129+
anthropic: {
130+
cacheControl: {
131+
type: 'ephemeral',
132+
},
133+
},
134+
};
135+
}
136+
127137
const dataStream = createDataStream({
128138
execute(dataStream) {
129139
const result = streamText({

app/lib/.server/llm/provider.ts

Lines changed: 6 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import { getEnv } from '~/lib/.server/env';
1313
// workaround for Vercel environment from
1414
// https://github.com/vercel/ai/issues/199#issuecomment-1605245593
1515
import { fetch } from '~/lib/.server/fetch';
16-
import { GENERAL_SYSTEM_PROMPT_PRELUDE, ROLE_SYSTEM_PROMPT } from 'chef-agent/prompts/system';
1716

1817
const ALLOWED_AWS_REGIONS = ['us-east-1', 'us-east-2', 'us-west-2'];
1918

@@ -161,8 +160,6 @@ export function getProvider(
161160
// Falls back to the low Quality-of-Service Anthropic API key if the primary key is rate limited
162161
const rateLimitAwareFetch = () => {
163162
return async (input: RequestInfo | URL, init?: RequestInit) => {
164-
const enrichedOptions = anthropicInjectCacheControl(init);
165-
166163
const throwIfBad = async (response: Response, isLowQos: boolean) => {
167164
if (response.ok) {
168165
return response;
@@ -181,7 +178,7 @@ export function getProvider(
181178
throw new Error(JSON.stringify({ error: 'The model hit an error. Try sending your message again.' }));
182179
};
183180

184-
const response = await fetch(input, enrichedOptions);
181+
const response = await fetch(input, init);
185182

186183
if (response.status !== 429 && response.status !== 529) {
187184
return throwIfBad(response, false);
@@ -201,12 +198,12 @@ export function getProvider(
201198
response,
202199
},
203200
});
204-
if (enrichedOptions && enrichedOptions.headers) {
205-
const headers = new Headers(enrichedOptions.headers);
201+
if (init && init.headers) {
202+
const headers = new Headers(init.headers);
206203
headers.set('x-api-key', lowQosKey);
207-
enrichedOptions.headers = headers;
204+
init.headers = headers;
208205
}
209-
const lowQosResponse = await fetch(input, enrichedOptions);
206+
const lowQosResponse = await fetch(input, init);
210207
return throwIfBad(lowQosResponse, true);
211208
};
212209
};
@@ -228,8 +225,7 @@ export function getProvider(
228225

229226
const userKeyApiFetch = (provider: ModelProvider) => {
230227
return async (input: RequestInfo | URL, init?: RequestInit) => {
231-
const requestInit = provider === 'Anthropic' ? anthropicInjectCacheControl(init) : init;
232-
const result = await fetch(input, requestInit);
228+
const result = await fetch(input, init);
233229
if (result.status === 401) {
234230
const text = await result.text();
235231
throw new Error(JSON.stringify({ error: 'Invalid API key', details: text }));
@@ -273,56 +269,3 @@ const userKeyApiFetch = (provider: ModelProvider) => {
273269
return result;
274270
};
275271
};
276-
277-
// sujayakar, 2025-03-25: This is mega-hax, but I can't figure out
278-
// how to get the AI SDK to pass the cache control header to
279-
// Anthropic with the `streamText` function. Setting
280-
// `providerOptions.anthropic.cacheControl` doesn't seem to do
281-
// anything. So, we instead directly inject the cache control
282-
// header into the body of the request.
283-
//
284-
// tom, 2025-04-25: This is still an outstanding bug
285-
// https://github.com/vercel/ai/issues/5942
286-
function anthropicInjectCacheControl(options?: RequestInit) {
287-
const start = Date.now();
288-
if (!options) {
289-
return options;
290-
}
291-
if (options.method !== 'POST') {
292-
return options;
293-
}
294-
const headers = options.headers;
295-
if (!headers) {
296-
return options;
297-
}
298-
const contentType = new Headers(headers).get('content-type');
299-
if (contentType !== 'application/json') {
300-
return options;
301-
}
302-
if (typeof options.body !== 'string') {
303-
throw new Error('Body must be a string');
304-
}
305-
306-
const body = JSON.parse(options.body);
307-
308-
if (body.system[0].text !== ROLE_SYSTEM_PROMPT) {
309-
throw new Error('First system message must be the role system prompt');
310-
}
311-
if (!body.system[1].text.startsWith(GENERAL_SYSTEM_PROMPT_PRELUDE)) {
312-
throw new Error('Second system message must be the general system prompt prelude');
313-
}
314-
// Cache system prompt.
315-
body.system[1].cache_control = { type: 'ephemeral' };
316-
// Cache relevant files in system messages that are the same for all LLM requests after a user message.
317-
body.system[body.system.length - 1].cache_control = { type: 'ephemeral' };
318-
319-
// Cache all messages.
320-
const lastMessage = body.messages[body.messages.length - 1];
321-
const lastMessagePartIndex = lastMessage.content.length - 1;
322-
lastMessage.content[lastMessagePartIndex].cache_control = { type: 'ephemeral' };
323-
body.messages[body.messages.length - 1].content[lastMessagePartIndex].cache_control = { type: 'ephemeral' };
324-
325-
const newBody = JSON.stringify(body);
326-
console.log(`Injected system messages in ${Date.now() - start}ms`);
327-
return { ...options, body: newBody };
328-
}

0 commit comments

Comments
 (0)