Skip to content

Commit 057c545

Browse files
dancergr2m
authored andcommitted
fix(provider/xai): handle inconsistent cached token reporting (#12485)
## background xAI's token reporting is inconsistent across models. most models report `prompt_tokens`/`input_tokens` inclusive of cached tokens (like OpenAI), but some models (e.g. `grok-4-1-fast-non-reasoning`) report them exclusive of cached tokens, where `cached_tokens > prompt_tokens` ## summary - detect which reporting style xAI is using based on whether `cached_tokens <= prompt_tokens` - when inclusive (normal): subtract cached from prompt to get noCache (OpenAI pattern) - when exclusive (anomalous): prompt tokens already represent noCache, add cached for total (Anthropic pattern) - applies to both chat completions and responses APIs - add unit tests for the non-inclusive reporting edge case - add responses usage test file ## verification <details> <summary>gateway bug case (cached > prompt)</summary> ``` before: total=4142, noCache=-186, cacheRead=4328 after: total=8470, noCache=4142, cacheRead=4328 ``` </details> <details> <summary>normal case (cached <= prompt)</summary> ``` raw: input_tokens: 12, cached_tokens: 3 sdk: noCache: 9, cacheRead: 3, total: 12 ``` </details> ## checklist - [x] tests have been added / updated (for bug fixes / features) - [ ] documentation has been added / updated (for bug fixes / features) - [x] a _patch_ changeset for relevant packages has been added (run `pnpm changeset` in root) - [x] i have reviewed this pull request (self-review)
1 parent 0b19e76 commit 057c545

9 files changed

Lines changed: 307 additions & 4 deletions

File tree

.changeset/stupid-bags-crash.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ai-sdk/xai': patch
3+
---
4+
5+
fix(provider/xai): handle inconsistent cached token reporting
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import { xai } from '@ai-sdk/xai';
2+
import { generateText } from 'ai';
3+
import { run } from '../lib/run';
4+
5+
const models = [
6+
'grok-4',
7+
'grok-4-1-fast-reasoning',
8+
'grok-4-1-fast-non-reasoning',
9+
'grok-4-fast-reasoning',
10+
'grok-4-fast-non-reasoning',
11+
'grok-code-fast-1',
12+
'grok-3',
13+
'grok-3-fast',
14+
'grok-3-mini',
15+
'grok-3-mini-fast',
16+
];
17+
18+
run(async () => {
19+
for (const modelId of models) {
20+
try {
21+
const result = await generateText({
22+
model: xai.responses(modelId),
23+
prompt: 'Say a single word.',
24+
});
25+
26+
const body = result.response.body as Record<string, any>;
27+
const raw = body.usage;
28+
const sdk = result.usage;
29+
30+
console.log(`--- ${modelId} ---`);
31+
console.log(
32+
`raw: output_tokens=${raw.output_tokens}, reasoning_tokens=${raw.output_tokens_details?.reasoning_tokens ?? 0}, total_tokens=${raw.total_tokens}`,
33+
);
34+
console.log(
35+
`sdk: outputTokens=${sdk.outputTokens}, textTokens=${sdk.outputTokenDetails?.textTokens}, reasoningTokens=${sdk.outputTokenDetails?.reasoningTokens}, totalTokens=${sdk.totalTokens}`,
36+
);
37+
console.log();
38+
} catch (e: any) {
39+
console.log(`--- ${modelId} ---`);
40+
console.log(`error: ${e.message?.slice(0, 80)}`);
41+
console.log();
42+
}
43+
}
44+
});
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import { xai } from '@ai-sdk/xai';
2+
import { generateText } from 'ai';
3+
import { run } from '../lib/run';
4+
5+
run(async () => {
6+
const result = await generateText({
7+
model: xai.responses('grok-4-1-fast-non-reasoning'),
8+
prompt: 'Say a single word.',
9+
});
10+
11+
console.log('text:', result.text);
12+
console.log();
13+
console.log('raw usage:', JSON.stringify(result.response.body, null, 2));
14+
console.log();
15+
console.log('sdk usage:', JSON.stringify(result.usage, null, 2));
16+
});
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import { xai } from '@ai-sdk/xai';
2+
import { streamText } from 'ai';
3+
import { run } from '../lib/run';
4+
5+
const models = [
6+
'grok-4',
7+
'grok-4-1-fast-reasoning',
8+
'grok-4-1-fast-non-reasoning',
9+
'grok-4-fast-reasoning',
10+
'grok-4-fast-non-reasoning',
11+
'grok-code-fast-1',
12+
'grok-3',
13+
'grok-3-fast',
14+
'grok-3-mini',
15+
'grok-3-mini-fast',
16+
];
17+
18+
run(async () => {
19+
for (const modelId of models) {
20+
try {
21+
const result = streamText({
22+
model: xai.responses(modelId),
23+
prompt: 'Say a single word.',
24+
});
25+
26+
for await (const textPart of result.textStream) {
27+
void textPart;
28+
}
29+
30+
const sdk = await result.usage;
31+
32+
console.log(`--- ${modelId} ---`);
33+
console.log(
34+
`sdk: outputTokens=${sdk.outputTokens}, textTokens=${sdk.outputTokenDetails?.textTokens}, reasoningTokens=${sdk.outputTokenDetails?.reasoningTokens}, totalTokens=${sdk.totalTokens}`,
35+
);
36+
console.log();
37+
} catch (e: any) {
38+
console.log(`--- ${modelId} ---`);
39+
console.log(`error: ${e.message?.slice(0, 80)}`);
40+
console.log();
41+
}
42+
}
43+
});
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import { xai } from '@ai-sdk/xai';
2+
import { streamText } from 'ai';
3+
import { run } from '../lib/run';
4+
5+
run(async () => {
6+
const result = streamText({
7+
model: xai.responses('grok-3-mini'),
8+
prompt: 'Say a single word.',
9+
});
10+
11+
for await (const textPart of result.textStream) {
12+
process.stdout.write(textPart);
13+
}
14+
15+
console.log();
16+
console.log();
17+
console.log('sdk usage:', JSON.stringify(await result.usage, null, 2));
18+
});

packages/xai/src/convert-xai-chat-usage.test.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,26 @@ describe('convertXaiChatUsage', () => {
113113
`);
114114
});
115115

116+
it('should handle cached_tokens exceeding prompt_tokens (non-inclusive reporting)', () => {
117+
const result = convertXaiChatUsage({
118+
prompt_tokens: 4142,
119+
completion_tokens: 254,
120+
total_tokens: 8724,
121+
prompt_tokens_details: {
122+
cached_tokens: 4328,
123+
},
124+
});
125+
126+
expect(result.inputTokens).toMatchInlineSnapshot(`
127+
{
128+
"cacheRead": 4328,
129+
"cacheWrite": undefined,
130+
"noCache": 4142,
131+
"total": 8470,
132+
}
133+
`);
134+
});
135+
116136
it('should handle null token details', () => {
117137
const result = convertXaiChatUsage({
118138
prompt_tokens: 100,

packages/xai/src/convert-xai-chat-usage.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,16 @@ export function convertXaiChatUsage(usage: XaiChatUsage): LanguageModelV3Usage {
66
const reasoningTokens =
77
usage.completion_tokens_details?.reasoning_tokens ?? 0;
88

9+
const promptTokensIncludesCached = cacheReadTokens <= usage.prompt_tokens;
10+
911
return {
1012
inputTokens: {
11-
total: usage.prompt_tokens,
12-
noCache: usage.prompt_tokens - cacheReadTokens,
13+
total: promptTokensIncludesCached
14+
? usage.prompt_tokens
15+
: usage.prompt_tokens + cacheReadTokens,
16+
noCache: promptTokensIncludesCached
17+
? usage.prompt_tokens - cacheReadTokens
18+
: usage.prompt_tokens,
1319
cacheRead: cacheReadTokens,
1420
cacheWrite: undefined,
1521
},
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import { convertXaiResponsesUsage } from './convert-xai-responses-usage';
2+
import { describe, it, expect } from 'vitest';
3+
4+
describe('convertXaiResponsesUsage', () => {
5+
it('should convert basic usage without caching or reasoning', () => {
6+
const result = convertXaiResponsesUsage({
7+
input_tokens: 100,
8+
output_tokens: 50,
9+
});
10+
11+
expect(result).toMatchInlineSnapshot(`
12+
{
13+
"inputTokens": {
14+
"cacheRead": 0,
15+
"cacheWrite": undefined,
16+
"noCache": 100,
17+
"total": 100,
18+
},
19+
"outputTokens": {
20+
"reasoning": 0,
21+
"text": 50,
22+
"total": 50,
23+
},
24+
"raw": {
25+
"input_tokens": 100,
26+
"output_tokens": 50,
27+
},
28+
}
29+
`);
30+
});
31+
32+
it('should convert usage with reasoning tokens', () => {
33+
const result = convertXaiResponsesUsage({
34+
input_tokens: 1941,
35+
output_tokens: 583,
36+
total_tokens: 2524,
37+
output_tokens_details: {
38+
reasoning_tokens: 380,
39+
},
40+
});
41+
42+
expect(result.outputTokens).toMatchInlineSnapshot(`
43+
{
44+
"reasoning": 380,
45+
"text": 203,
46+
"total": 583,
47+
}
48+
`);
49+
});
50+
51+
it('should convert usage with cached input tokens', () => {
52+
const result = convertXaiResponsesUsage({
53+
input_tokens: 200,
54+
output_tokens: 50,
55+
input_tokens_details: {
56+
cached_tokens: 150,
57+
},
58+
});
59+
60+
expect(result.inputTokens).toMatchInlineSnapshot(`
61+
{
62+
"cacheRead": 150,
63+
"cacheWrite": undefined,
64+
"noCache": 50,
65+
"total": 200,
66+
}
67+
`);
68+
});
69+
70+
it('should handle cached_tokens exceeding input_tokens (non-inclusive reporting)', () => {
71+
const result = convertXaiResponsesUsage({
72+
input_tokens: 4142,
73+
output_tokens: 254,
74+
input_tokens_details: {
75+
cached_tokens: 4328,
76+
},
77+
});
78+
79+
expect(result.inputTokens).toMatchInlineSnapshot(`
80+
{
81+
"cacheRead": 4328,
82+
"cacheWrite": undefined,
83+
"noCache": 4142,
84+
"total": 8470,
85+
}
86+
`);
87+
});
88+
89+
it('should convert usage with both cached input and reasoning', () => {
90+
const result = convertXaiResponsesUsage({
91+
input_tokens: 200,
92+
output_tokens: 583,
93+
input_tokens_details: {
94+
cached_tokens: 150,
95+
},
96+
output_tokens_details: {
97+
reasoning_tokens: 380,
98+
},
99+
});
100+
101+
expect(result).toMatchInlineSnapshot(`
102+
{
103+
"inputTokens": {
104+
"cacheRead": 150,
105+
"cacheWrite": undefined,
106+
"noCache": 50,
107+
"total": 200,
108+
},
109+
"outputTokens": {
110+
"reasoning": 380,
111+
"text": 203,
112+
"total": 583,
113+
},
114+
"raw": {
115+
"input_tokens": 200,
116+
"input_tokens_details": {
117+
"cached_tokens": 150,
118+
},
119+
"output_tokens": 583,
120+
"output_tokens_details": {
121+
"reasoning_tokens": 380,
122+
},
123+
},
124+
}
125+
`);
126+
});
127+
128+
it('should preserve raw usage data', () => {
129+
const rawUsage = {
130+
input_tokens: 12,
131+
output_tokens: 319,
132+
total_tokens: 331,
133+
input_tokens_details: {
134+
cached_tokens: 2,
135+
},
136+
output_tokens_details: {
137+
reasoning_tokens: 317,
138+
},
139+
};
140+
141+
const result = convertXaiResponsesUsage(rawUsage);
142+
143+
expect(result.raw).toEqual(rawUsage);
144+
});
145+
});

packages/xai/src/responses/convert-xai-responses-usage.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,16 @@ export function convertXaiResponsesUsage(
77
const cacheReadTokens = usage.input_tokens_details?.cached_tokens ?? 0;
88
const reasoningTokens = usage.output_tokens_details?.reasoning_tokens ?? 0;
99

10+
const inputTokensIncludesCached = cacheReadTokens <= usage.input_tokens;
11+
1012
return {
1113
inputTokens: {
12-
total: usage.input_tokens,
13-
noCache: usage.input_tokens - cacheReadTokens,
14+
total: inputTokensIncludesCached
15+
? usage.input_tokens
16+
: usage.input_tokens + cacheReadTokens,
17+
noCache: inputTokensIncludesCached
18+
? usage.input_tokens - cacheReadTokens
19+
: usage.input_tokens,
1420
cacheRead: cacheReadTokens,
1521
cacheWrite: undefined,
1622
},

0 commit comments

Comments
 (0)