Skip to content

Commit 0d47c5d

Browse files
don't add cache_control if already present (#1833)
* fix(openrouter): preserve existing cache_control * Only check messages * Formatting * fix(openrouter): add messages API support to addCacheBreakpoints * Retain request level cache_control * Fix type errors --------- Co-authored-by: kiloconnect[bot] <240665456+kiloconnect[bot]@users.noreply.github.com> Co-authored-by: Christiaan Arnoldus <christiaan.arnoldus@outlook.com>
1 parent 23103e0 commit 0d47c5d

File tree

3 files changed

+195
-5
lines changed

3 files changed

+195
-5
lines changed

src/app/api/openrouter/[...path]/route.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,6 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
173173
requestBodyParsed = { kind: 'chat_completions', body };
174174
} else if (path === '/messages') {
175175
const body: GatewayMessagesRequest = JSON.parse(requestBodyText);
176-
if (!body.cache_control && body.messages.length > 1) {
177-
body.cache_control = { type: 'ephemeral' };
178-
}
179176
requestBodyParsed = { kind: 'messages', body };
180177
} else {
181178
const body: GatewayResponsesRequest = JSON.parse(requestBodyText);

src/lib/providers/openrouter/request-helpers.ts

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,29 @@ function setCacheControlOnResponsesMessage(message: OpenAI.Responses.ResponseInp
8080
}
8181
}
8282

83+
function isObjectRecord(value: unknown): value is Record<string, unknown> {
84+
return typeof value === 'object' && value !== null;
85+
}
86+
87+
function containsCacheControl(value: unknown): boolean {
88+
if (Array.isArray(value)) {
89+
return value.some(containsCacheControl);
90+
}
91+
if (!isObjectRecord(value)) {
92+
return false;
93+
}
94+
if (Object.hasOwn(value, 'cache_control')) {
95+
return true;
96+
}
97+
return Object.values(value).some(containsCacheControl);
98+
}
99+
83100
export function addCacheBreakpoints(request: GatewayRequest) {
84101
if (
85102
request.kind === 'chat_completions' &&
86103
Array.isArray(request.body.messages) &&
87-
request.body.messages.length > 1
104+
request.body.messages.length > 1 &&
105+
!containsCacheControl(request.body.messages)
88106
) {
89107
const lastMessage = request.body.messages.findLast(
90108
msg => msg.role === 'user' || msg.role === 'tool'
@@ -98,7 +116,8 @@ export function addCacheBreakpoints(request: GatewayRequest) {
98116
} else if (
99117
request.kind === 'responses' &&
100118
Array.isArray(request.body.input) &&
101-
request.body.input.length > 1
119+
request.body.input.length > 1 &&
120+
!containsCacheControl(request.body.input)
102121
) {
103122
const lastMessage = request.body.input.findLast(
104123
msg => (msg.type === 'message' && msg.role === 'user') || msg.type === 'function_call_output'
@@ -109,6 +128,14 @@ export function addCacheBreakpoints(request: GatewayRequest) {
109128
);
110129
setCacheControlOnResponsesMessage(lastMessage);
111130
}
131+
} else if (
132+
request.kind === 'messages' &&
133+
request.body.messages.length > 1 &&
134+
!request.body.cache_control &&
135+
!containsCacheControl(request.body.messages)
136+
) {
137+
console.debug('[addCacheBreakpoints] setting cache breakpoint on messages request');
138+
request.body.cache_control = { type: 'ephemeral' };
112139
}
113140
}
114141

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import { describe, expect, test } from '@jest/globals';
2+
import { addCacheBreakpoints } from '@/lib/providers/openrouter/request-helpers';
3+
import type { GatewayRequest } from '@/lib/providers/openrouter/types';
4+
import type OpenAI from 'openai';
5+
6+
describe('addCacheBreakpoints', () => {
7+
test('adds a cache breakpoint to the last eligible chat completions message when none exist', () => {
8+
const request: GatewayRequest = {
9+
kind: 'chat_completions',
10+
body: {
11+
model: 'test-model',
12+
messages: [
13+
{ role: 'system', content: 'You are helpful.' },
14+
{ role: 'user', content: 'First prompt' },
15+
{ role: 'assistant', content: 'First response' },
16+
{
17+
role: 'user',
18+
content: [
19+
{ type: 'text', text: 'Latest prompt' },
20+
{ type: 'text', text: 'Latest detail' },
21+
],
22+
},
23+
],
24+
},
25+
};
26+
27+
addCacheBreakpoints(request);
28+
29+
const lastContent = request.body.messages.at(-1)?.content;
30+
expect(Array.isArray(lastContent)).toBe(true);
31+
if (!Array.isArray(lastContent)) return;
32+
expect(lastContent.at(-1)).toMatchObject({
33+
type: 'text',
34+
text: 'Latest detail',
35+
cache_control: { type: 'ephemeral' },
36+
});
37+
});
38+
39+
test('does nothing for chat completions requests when any cache_control is already present', () => {
40+
const request: GatewayRequest = {
41+
kind: 'chat_completions',
42+
body: {
43+
model: 'test-model',
44+
messages: [
45+
{ role: 'system', content: 'You are helpful.' },
46+
{
47+
role: 'user',
48+
content: [
49+
{
50+
type: 'text',
51+
text: 'First prompt',
52+
cache_control: { type: 'ephemeral' },
53+
} as OpenAI.ChatCompletionContentPartText,
54+
],
55+
},
56+
{ role: 'assistant', content: 'First response' },
57+
{
58+
role: 'user',
59+
content: [
60+
{ type: 'text', text: 'Latest prompt' },
61+
{ type: 'text', text: 'Latest detail' },
62+
],
63+
},
64+
],
65+
},
66+
};
67+
68+
addCacheBreakpoints(request);
69+
70+
const lastContent =
71+
request.kind === 'chat_completions' && request.body.messages.at(-1)?.content;
72+
expect(lastContent).toEqual([
73+
{ type: 'text', text: 'Latest prompt' },
74+
{ type: 'text', text: 'Latest detail' },
75+
]);
76+
});
77+
78+
test('does nothing for responses requests when any cache_control is already present', () => {
79+
const request: GatewayRequest = {
80+
kind: 'responses',
81+
body: {
82+
model: 'test-model',
83+
input: [
84+
{
85+
type: 'message',
86+
role: 'user',
87+
content: [
88+
{
89+
type: 'input_text',
90+
text: 'First prompt',
91+
// @ts-expect-error non-standard cache_control extension
92+
cache_control: { type: 'ephemeral' },
93+
},
94+
],
95+
},
96+
{
97+
type: 'function_call_output',
98+
call_id: 'call_123',
99+
output: [
100+
{ type: 'input_text', text: 'Tool output' },
101+
{ type: 'input_text', text: 'Tool detail' },
102+
],
103+
},
104+
],
105+
},
106+
};
107+
108+
addCacheBreakpoints(request);
109+
110+
const lastItem = request.kind === 'responses' && request.body.input?.at(-1);
111+
expect(lastItem).toMatchObject({
112+
type: 'function_call_output',
113+
output: [
114+
{ type: 'input_text', text: 'Tool output' },
115+
{ type: 'input_text', text: 'Tool detail' },
116+
],
117+
});
118+
});
119+
120+
test('adds top-level cache_control on messages request when none is present', () => {
121+
const request: GatewayRequest = {
122+
kind: 'messages',
123+
body: {
124+
model: 'anthropic/claude-sonnet-4-5',
125+
max_tokens: 1024,
126+
messages: [
127+
{ role: 'user', content: 'First prompt' },
128+
{ role: 'assistant', content: 'First response' },
129+
{ role: 'user', content: 'Latest prompt' },
130+
],
131+
},
132+
};
133+
134+
addCacheBreakpoints(request);
135+
136+
expect(request.body.cache_control).toEqual({ type: 'ephemeral' });
137+
});
138+
139+
test('does nothing for messages request when any cache_control is already present', () => {
140+
const request: GatewayRequest = {
141+
kind: 'messages',
142+
body: {
143+
model: 'anthropic/claude-sonnet-4-5',
144+
max_tokens: 1024,
145+
messages: [
146+
{
147+
role: 'user',
148+
content: [
149+
{
150+
type: 'text',
151+
text: 'First prompt',
152+
cache_control: { type: 'ephemeral' },
153+
},
154+
],
155+
},
156+
{ role: 'assistant', content: 'First response' },
157+
{ role: 'user', content: 'Latest prompt' },
158+
],
159+
},
160+
};
161+
162+
addCacheBreakpoints(request);
163+
164+
expect(request.body.cache_control).toBeUndefined();
165+
});
166+
});

0 commit comments

Comments
 (0)