Skip to content

Commit c80d906

Browse files
Backport: fix(provider/xai): handle inconsistent cached token reporting (#12518)
## background backport of #12485 to `release-v5.0` xAI's token reporting is inconsistent across models. most models report `prompt_tokens`/`input_tokens` inclusive of cached tokens (like OpenAI), but some models (e.g. `grok-4-1-fast-non-reasoning`) report them exclusive of cached tokens, where `cached_tokens > prompt_tokens` ## summary - add `convertXaiChatUsage` and `convertXaiResponsesUsage` converter functions - detect which reporting style xAI is using based on whether `cached_tokens <= prompt_tokens` - when inclusive (normal): use prompt tokens as-is - when exclusive (anomalous): add cached tokens to prompt for total input tokens - applies to both chat completions and responses APIs - adapted for v5 `LanguageModelV2Usage` flat format (vs v6 structured format) ## verification <details> <summary>tests</summary> ``` ✓ src/convert-xai-chat-usage.test.ts (6 tests) 6ms ✓ src/responses/convert-xai-responses-usage.test.ts (6 tests) 6ms Test Files 2 passed (2) Tests 12 passed (12) ``` </details> ## checklist - [x] tests have been added / updated (for bug fixes / features) - [ ] documentation has been added / updated (for bug fixes / features) - [x] a _patch_ changeset for relevant packages has been added (run `pnpm changeset` in root) - [x] i have reviewed this pull request (self-review) ## related issues backport of #12485 --------- Co-authored-by: josh <josh@afterima.ge>
1 parent ca7f69e commit c80d906

11 files changed

+440
-44
lines changed

.changeset/stupid-bags-crash.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ai-sdk/xai': patch
3+
---
4+
5+
fix(provider/xai): handle inconsistent cached token reporting
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import { xai } from '@ai-sdk/xai';
2+
import { generateText } from 'ai';
3+
import { run } from '../lib/run';
4+
5+
const models = [
6+
'grok-4',
7+
'grok-4-1-fast-reasoning',
8+
'grok-4-1-fast-non-reasoning',
9+
'grok-4-fast-reasoning',
10+
'grok-4-fast-non-reasoning',
11+
'grok-code-fast-1',
12+
'grok-3',
13+
'grok-3-fast',
14+
'grok-3-mini',
15+
'grok-3-mini-fast',
16+
];
17+
18+
run(async () => {
19+
for (const modelId of models) {
20+
try {
21+
const result = await generateText({
22+
model: xai.responses(modelId),
23+
prompt: 'Say a single word.',
24+
});
25+
26+
const body = result.response.body as Record<string, any>;
27+
const raw = body.usage;
28+
const sdk = result.usage;
29+
30+
console.log(`--- ${modelId} ---`);
31+
console.log(
32+
`raw: output_tokens=${raw.output_tokens}, reasoning_tokens=${raw.output_tokens_details?.reasoning_tokens ?? 0}, total_tokens=${raw.total_tokens}`,
33+
);
34+
console.log(
35+
`sdk: outputTokens=${sdk.outputTokens}, reasoningTokens=${sdk.reasoningTokens}, cachedInputTokens=${sdk.cachedInputTokens}, totalTokens=${sdk.totalTokens}`,
36+
);
37+
console.log();
38+
} catch (e: any) {
39+
console.log(`--- ${modelId} ---`);
40+
console.log(`error: ${e.message?.slice(0, 80)}`);
41+
console.log();
42+
}
43+
}
44+
});
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import { xai } from '@ai-sdk/xai';
2+
import { generateText } from 'ai';
3+
import { run } from '../lib/run';
4+
5+
run(async () => {
6+
const result = await generateText({
7+
model: xai.responses('grok-4-1-fast-non-reasoning'),
8+
prompt: 'Say a single word.',
9+
});
10+
11+
console.log('text:', result.text);
12+
console.log();
13+
console.log('raw usage:', JSON.stringify(result.response.body, null, 2));
14+
console.log();
15+
console.log('sdk usage:', JSON.stringify(result.usage, null, 2));
16+
});
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import { xai } from '@ai-sdk/xai';
2+
import { streamText } from 'ai';
3+
import { run } from '../lib/run';
4+
5+
const models = [
6+
'grok-4',
7+
'grok-4-1-fast-reasoning',
8+
'grok-4-1-fast-non-reasoning',
9+
'grok-4-fast-reasoning',
10+
'grok-4-fast-non-reasoning',
11+
'grok-code-fast-1',
12+
'grok-3',
13+
'grok-3-fast',
14+
'grok-3-mini',
15+
'grok-3-mini-fast',
16+
];
17+
18+
run(async () => {
19+
for (const modelId of models) {
20+
try {
21+
const result = streamText({
22+
model: xai.responses(modelId),
23+
prompt: 'Say a single word.',
24+
});
25+
26+
for await (const textPart of result.textStream) {
27+
void textPart;
28+
}
29+
30+
const sdk = await result.usage;
31+
32+
console.log(`--- ${modelId} ---`);
33+
console.log(
34+
`sdk: outputTokens=${sdk.outputTokens}, reasoningTokens=${sdk.reasoningTokens}, cachedInputTokens=${sdk.cachedInputTokens}, totalTokens=${sdk.totalTokens}`,
35+
);
36+
console.log();
37+
} catch (e: any) {
38+
console.log(`--- ${modelId} ---`);
39+
console.log(`error: ${e.message?.slice(0, 80)}`);
40+
console.log();
41+
}
42+
}
43+
});
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import { xai } from '@ai-sdk/xai';
2+
import { streamText } from 'ai';
3+
import { run } from '../lib/run';
4+
5+
run(async () => {
6+
const result = streamText({
7+
model: xai.responses('grok-3-mini'),
8+
prompt: 'Say a single word.',
9+
});
10+
11+
for await (const textPart of result.textStream) {
12+
process.stdout.write(textPart);
13+
}
14+
15+
console.log();
16+
console.log();
17+
console.log('sdk usage:', JSON.stringify(await result.usage, null, 2));
18+
});
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import { convertXaiChatUsage } from './convert-xai-chat-usage';
2+
import { describe, it, expect } from 'vitest';
3+
4+
describe('convertXaiChatUsage', () => {
5+
it('should convert basic usage without caching or reasoning', () => {
6+
const result = convertXaiChatUsage({
7+
prompt_tokens: 100,
8+
completion_tokens: 50,
9+
total_tokens: 150,
10+
});
11+
12+
expect(result).toMatchInlineSnapshot(`
13+
{
14+
"cachedInputTokens": undefined,
15+
"inputTokens": 100,
16+
"outputTokens": 50,
17+
"reasoningTokens": undefined,
18+
"totalTokens": 150,
19+
}
20+
`);
21+
});
22+
23+
it('should convert usage with cached tokens (inclusive reporting)', () => {
24+
const result = convertXaiChatUsage({
25+
prompt_tokens: 200,
26+
completion_tokens: 50,
27+
total_tokens: 250,
28+
prompt_tokens_details: {
29+
cached_tokens: 150,
30+
},
31+
});
32+
33+
expect(result).toMatchInlineSnapshot(`
34+
{
35+
"cachedInputTokens": 150,
36+
"inputTokens": 200,
37+
"outputTokens": 50,
38+
"reasoningTokens": undefined,
39+
"totalTokens": 250,
40+
}
41+
`);
42+
});
43+
44+
it('should convert usage with reasoning tokens', () => {
45+
const result = convertXaiChatUsage({
46+
prompt_tokens: 100,
47+
completion_tokens: 254,
48+
total_tokens: 734,
49+
completion_tokens_details: {
50+
reasoning_tokens: 380,
51+
},
52+
});
53+
54+
expect(result).toMatchInlineSnapshot(`
55+
{
56+
"cachedInputTokens": undefined,
57+
"inputTokens": 100,
58+
"outputTokens": 634,
59+
"reasoningTokens": 380,
60+
"totalTokens": 734,
61+
}
62+
`);
63+
});
64+
65+
it('should handle cached_tokens exceeding prompt_tokens (non-inclusive reporting)', () => {
66+
const result = convertXaiChatUsage({
67+
prompt_tokens: 4142,
68+
completion_tokens: 254,
69+
total_tokens: 8724,
70+
prompt_tokens_details: {
71+
cached_tokens: 4328,
72+
},
73+
});
74+
75+
expect(result).toMatchInlineSnapshot(`
76+
{
77+
"cachedInputTokens": 4328,
78+
"inputTokens": 8470,
79+
"outputTokens": 254,
80+
"reasoningTokens": undefined,
81+
"totalTokens": 8724,
82+
}
83+
`);
84+
});
85+
86+
it('should handle undefined usage', () => {
87+
const result = convertXaiChatUsage(undefined);
88+
89+
expect(result).toMatchInlineSnapshot(`
90+
{
91+
"cachedInputTokens": undefined,
92+
"inputTokens": undefined,
93+
"outputTokens": undefined,
94+
"reasoningTokens": undefined,
95+
"totalTokens": undefined,
96+
}
97+
`);
98+
});
99+
100+
it('should handle null token details', () => {
101+
const result = convertXaiChatUsage({
102+
prompt_tokens: 100,
103+
completion_tokens: 50,
104+
total_tokens: 150,
105+
prompt_tokens_details: null,
106+
completion_tokens_details: null,
107+
});
108+
109+
expect(result).toMatchInlineSnapshot(`
110+
{
111+
"cachedInputTokens": undefined,
112+
"inputTokens": 100,
113+
"outputTokens": 50,
114+
"reasoningTokens": undefined,
115+
"totalTokens": 150,
116+
}
117+
`);
118+
});
119+
});
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import { LanguageModelV2Usage } from '@ai-sdk/provider';
2+
3+
export type XaiChatUsage = {
4+
prompt_tokens: number;
5+
completion_tokens: number;
6+
total_tokens: number;
7+
prompt_tokens_details?: {
8+
cached_tokens?: number | null;
9+
} | null;
10+
completion_tokens_details?: {
11+
reasoning_tokens?: number | null;
12+
} | null;
13+
};
14+
15+
export function convertXaiChatUsage(
16+
usage: XaiChatUsage | undefined | null,
17+
): LanguageModelV2Usage {
18+
if (usage == null) {
19+
return {
20+
inputTokens: undefined,
21+
outputTokens: undefined,
22+
totalTokens: undefined,
23+
reasoningTokens: undefined,
24+
cachedInputTokens: undefined,
25+
};
26+
}
27+
28+
const cacheReadTokens = usage.prompt_tokens_details?.cached_tokens ?? 0;
29+
const reasoningTokens =
30+
usage.completion_tokens_details?.reasoning_tokens ?? 0;
31+
32+
const promptTokensIncludesCached = cacheReadTokens <= usage.prompt_tokens;
33+
34+
return {
35+
inputTokens: promptTokensIncludesCached
36+
? usage.prompt_tokens
37+
: usage.prompt_tokens + cacheReadTokens,
38+
outputTokens: usage.completion_tokens + reasoningTokens,
39+
totalTokens: usage.total_tokens,
40+
reasoningTokens: reasoningTokens || undefined,
41+
cachedInputTokens: cacheReadTokens || undefined,
42+
};
43+
}

0 commit comments

Comments
 (0)