Skip to content

Commit 7df7404

Browse files
test: add regression test for issue #234 prompt caching behavior (#375)
* test: add regression test for issue #234 prompt caching behavior Co-Authored-By: Robert Yeakel <robert.yeakel@openrouter.ai> * fix: use correct Kimi K2 model ID (moonshotai/kimi-k2-thinking) Co-Authored-By: Robert Yeakel <robert.yeakel@openrouter.ai> * refactor: improve test robustness - match exact issue scenario with 3 requests Co-Authored-By: Robert Yeakel <robert.yeakel@openrouter.ai> * refactor: match exact issue #234 format with 3 requests for both models Co-Authored-By: Robert Yeakel <robert.yeakel@openrouter.ai> * refactor: remove root cause analysis from test header - keep only known facts Co-Authored-By: Robert Yeakel <robert.yeakel@openrouter.ai> * refactor: remove redundant comments from test file Co-Authored-By: Robert Yeakel <robert.yeakel@openrouter.ai> --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
1 parent b57ec9d commit 7df7404

File tree

2 files changed

+203
-0
lines changed

2 files changed

+203
-0
lines changed

.changeset/floppy-pants-roll.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
---
2+
---
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
/**
2+
* Regression test for GitHub issue #234
3+
* https://github.com/OpenRouterTeam/ai-sdk-provider/issues/234
4+
*
5+
* Issue: "Delayed caching with GPT 4.1, none with Kimi K2 Thinking"
6+
*
7+
* The user reported the following behavior with three consecutive prompts:
8+
*
9+
* GPT 4.1:
10+
* Request 1: native_tokens_cached=0
11+
* Request 2: native_tokens_cached=0
12+
* Request 3: native_tokens_cached=1536
13+
*
14+
* Kimi K2 Thinking:
15+
* Request 1: native_tokens_cached=0, native_tokens_reasoning=231
16+
* Request 2: native_tokens_cached=0, native_tokens_reasoning=297
17+
* Request 3: native_tokens_cached=0, native_tokens_reasoning=322
18+
*
19+
* This test verifies that the provider correctly exposes cachedTokens and
20+
* reasoningTokens in providerMetadata when present in the API response.
21+
*/
22+
import { generateText } from 'ai';
23+
import { describe, expect, it, vi } from 'vitest';
24+
import { createOpenRouter } from '@/src';
25+
26+
vi.setConfig({
27+
testTimeout: 180_000,
28+
});
29+
30+
interface OpenRouterUsageMetadata {
31+
promptTokens?: number;
32+
completionTokens?: number;
33+
totalTokens?: number;
34+
promptTokensDetails?: { cachedTokens?: number };
35+
completionTokensDetails?: { reasoningTokens?: number };
36+
}
37+
38+
describe('Issue #234: Prompt caching behavior', () => {
39+
const openrouter = createOpenRouter({
40+
apiKey: process.env.OPENROUTER_API_KEY,
41+
baseUrl: `${process.env.OPENROUTER_API_BASE}/api/v1`,
42+
});
43+
44+
const longSystemPrompt = `You are a helpful assistant. Here is some context that should be cached:
45+
46+
${Array(50)
47+
.fill(
48+
'This is padding text to ensure the prompt meets the minimum token threshold for automatic caching. ' +
49+
'Automatic prompt caching requires a minimum number of tokens in the prompt prefix. ' +
50+
'This text is repeated multiple times to reach that threshold. ',
51+
)
52+
.join('\n')}
53+
54+
Remember to be helpful and concise in your responses.`;
55+
56+
describe('GPT 4.1 automatic caching', () => {
57+
const model = openrouter('openai/gpt-4.1');
58+
59+
it('should correctly expose cachedTokens in providerMetadata structure', async () => {
60+
interface RequestResult {
61+
tokens_prompt: number;
62+
tokens_completion: number;
63+
cachedTokens: number | undefined;
64+
}
65+
66+
const responses: RequestResult[] = [];
67+
68+
for (let i = 0; i < 3; i++) {
69+
const response = await generateText({
70+
model,
71+
messages: [
72+
{ role: 'system', content: longSystemPrompt },
73+
{
74+
role: 'user',
75+
content: 'What is 2+2? Answer with just the number.',
76+
},
77+
],
78+
});
79+
80+
expect(response.text).toBeDefined();
81+
expect(response.text.length).toBeGreaterThan(0);
82+
expect(response.finishReason).toBeDefined();
83+
expect(response.usage).toBeDefined();
84+
expect(response.usage.totalTokens).toBeGreaterThan(0);
85+
expect(response.providerMetadata).toBeDefined();
86+
expect(response.providerMetadata?.openrouter).toBeDefined();
87+
88+
const openrouterMetadata = response.providerMetadata?.openrouter as {
89+
usage?: OpenRouterUsageMetadata;
90+
};
91+
92+
expect(openrouterMetadata?.usage).toBeDefined();
93+
expect(openrouterMetadata?.usage?.promptTokens).toBeGreaterThan(0);
94+
expect(openrouterMetadata?.usage?.completionTokens).toBeGreaterThan(0);
95+
96+
const cachedTokens =
97+
openrouterMetadata?.usage?.promptTokensDetails?.cachedTokens;
98+
99+
responses.push({
100+
tokens_prompt: openrouterMetadata?.usage?.promptTokens ?? 0,
101+
tokens_completion: openrouterMetadata?.usage?.completionTokens ?? 0,
102+
cachedTokens,
103+
});
104+
105+
if (cachedTokens !== undefined) {
106+
expect(typeof cachedTokens).toBe('number');
107+
expect(cachedTokens).toBeGreaterThanOrEqual(0);
108+
}
109+
}
110+
111+
console.log('GPT 4.1 caching test results:');
112+
responses.forEach((r, i) => {
113+
console.log(
114+
` Request ${i + 1}: tokens_prompt=${r.tokens_prompt}, tokens_completion=${r.tokens_completion}, cachedTokens=${r.cachedTokens ?? 0}`,
115+
);
116+
});
117+
118+
responses.forEach((r) => {
119+
expect(r.tokens_prompt).toBeGreaterThan(0);
120+
expect(r.tokens_completion).toBeGreaterThan(0);
121+
});
122+
});
123+
});
124+
125+
describe('Kimi K2 automatic caching', () => {
126+
const model = openrouter('moonshotai/kimi-k2-thinking');
127+
128+
it('should correctly expose usage structure for Kimi K2 responses', async () => {
129+
interface RequestResult {
130+
tokens_prompt: number;
131+
tokens_completion: number;
132+
cachedTokens: number | undefined;
133+
reasoningTokens: number | undefined;
134+
}
135+
136+
const responses: RequestResult[] = [];
137+
138+
for (let i = 0; i < 3; i++) {
139+
const response = await generateText({
140+
model,
141+
messages: [
142+
{ role: 'system', content: longSystemPrompt },
143+
{
144+
role: 'user',
145+
content: 'What is 2+2? Answer with just the number.',
146+
},
147+
],
148+
});
149+
150+
expect(response.text).toBeDefined();
151+
expect(response.text.length).toBeGreaterThan(0);
152+
expect(response.finishReason).toBeDefined();
153+
expect(response.usage).toBeDefined();
154+
expect(response.usage.totalTokens).toBeGreaterThan(0);
155+
expect(response.providerMetadata).toBeDefined();
156+
expect(response.providerMetadata?.openrouter).toBeDefined();
157+
158+
const openrouterMetadata = response.providerMetadata?.openrouter as {
159+
usage?: OpenRouterUsageMetadata;
160+
};
161+
162+
expect(openrouterMetadata?.usage).toBeDefined();
163+
expect(openrouterMetadata?.usage?.promptTokens).toBeGreaterThan(0);
164+
165+
const cachedTokens =
166+
openrouterMetadata?.usage?.promptTokensDetails?.cachedTokens;
167+
const reasoningTokens =
168+
openrouterMetadata?.usage?.completionTokensDetails?.reasoningTokens;
169+
170+
responses.push({
171+
tokens_prompt: openrouterMetadata?.usage?.promptTokens ?? 0,
172+
tokens_completion: openrouterMetadata?.usage?.completionTokens ?? 0,
173+
cachedTokens,
174+
reasoningTokens,
175+
});
176+
177+
if (cachedTokens !== undefined) {
178+
expect(typeof cachedTokens).toBe('number');
179+
expect(cachedTokens).toBeGreaterThanOrEqual(0);
180+
}
181+
182+
if (reasoningTokens !== undefined) {
183+
expect(typeof reasoningTokens).toBe('number');
184+
expect(reasoningTokens).toBeGreaterThanOrEqual(0);
185+
}
186+
}
187+
188+
console.log('Kimi K2 Thinking test results:');
189+
responses.forEach((r, i) => {
190+
console.log(
191+
` Request ${i + 1}: tokens_prompt=${r.tokens_prompt}, tokens_completion=${r.tokens_completion}, cachedTokens=${r.cachedTokens ?? 0}, reasoningTokens=${r.reasoningTokens ?? 0}`,
192+
);
193+
});
194+
195+
responses.forEach((r) => {
196+
expect(r.tokens_prompt).toBeGreaterThan(0);
197+
expect(r.tokens_completion).toBeGreaterThan(0);
198+
});
199+
});
200+
});
201+
});

0 commit comments

Comments
 (0)