Skip to content

Commit a7a27ec

Browse files
DouweMclaude[bot]
andauthored
fix: Handle missing token details in vLLM/OpenAI-compatible APIs (#2669)
Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> Co-authored-by: Douwe Maan <[email protected]>
1 parent fc6b1e5 commit a7a27ec

File tree

3 files changed

+122
-3
lines changed

3 files changed

+122
-3
lines changed

pydantic_ai_slim/pydantic_ai/models/openai.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,11 +1375,21 @@ def _map_usage(response: chat.ChatCompletion | ChatCompletionChunk | responses.R
13751375
).items()
13761376
if isinstance(value, int)
13771377
}
1378-
details['reasoning_tokens'] = response_usage.output_tokens_details.reasoning_tokens
1378+
# Handle vLLM compatibility - some providers don't include token details
1379+
if getattr(response_usage, 'input_tokens_details', None) is not None:
1380+
cache_read_tokens = response_usage.input_tokens_details.cached_tokens
1381+
else:
1382+
cache_read_tokens = 0
1383+
1384+
if getattr(response_usage, 'output_tokens_details', None) is not None:
1385+
details['reasoning_tokens'] = response_usage.output_tokens_details.reasoning_tokens
1386+
else:
1387+
details['reasoning_tokens'] = 0
1388+
13791389
return usage.RequestUsage(
13801390
input_tokens=response_usage.input_tokens,
13811391
output_tokens=response_usage.output_tokens,
1382-
cache_read_tokens=response_usage.input_tokens_details.cached_tokens,
1392+
cache_read_tokens=cache_read_tokens,
13831393
details=details,
13841394
)
13851395
else:
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
interactions:
2+
- request:
3+
headers:
4+
accept:
5+
- application/json
6+
accept-encoding:
7+
- gzip, deflate
8+
connection:
9+
- keep-alive
10+
content-length:
11+
- '84'
12+
content-type:
13+
- application/json
14+
host:
15+
- api.openai.com
16+
method: POST
17+
parsed_body:
18+
input:
19+
- content: What is 2+2?
20+
role: user
21+
model: gpt-4o
22+
stream: false
23+
uri: https://api.openai.com/v1/responses
24+
response:
25+
headers:
26+
alt-svc:
27+
- h3=":443"; ma=86400
28+
connection:
29+
- keep-alive
30+
content-length:
31+
- '1369'
32+
content-type:
33+
- application/json
34+
openai-organization:
35+
- pydantic-28gund
36+
openai-processing-ms:
37+
- '899'
38+
openai-project:
39+
- proj_dKobscVY9YJxeEaDJen54e3d
40+
openai-version:
41+
- '2020-10-01'
42+
strict-transport-security:
43+
- max-age=31536000; includeSubDomains; preload
44+
transfer-encoding:
45+
- chunked
46+
parsed_body:
47+
background: false
48+
created_at: 1756246493
49+
error: null
50+
id: resp_68ae31dd0edc819da9e77ad3cdb128770306cb62070aed80
51+
incomplete_details: null
52+
instructions: null
53+
max_output_tokens: null
54+
max_tool_calls: null
55+
metadata: {}
56+
model: gpt-4o-2024-08-06
57+
object: response
58+
output:
59+
- content:
60+
- annotations: []
61+
logprobs: []
62+
text: 2 + 2 equals 4.
63+
type: output_text
64+
id: msg_68ae31ddcef0819da13eb00d2b393eb60306cb62070aed80
65+
role: assistant
66+
status: completed
67+
type: message
68+
parallel_tool_calls: true
69+
previous_response_id: null
70+
prompt_cache_key: null
71+
reasoning:
72+
effort: null
73+
summary: null
74+
safety_identifier: null
75+
service_tier: default
76+
status: completed
77+
store: true
78+
temperature: 1.0
79+
text:
80+
format:
81+
type: text
82+
verbosity: medium
83+
tool_choice: auto
84+
tools: []
85+
top_logprobs: 0
86+
top_p: 1.0
87+
truncation: disabled
88+
usage:
89+
input_tokens: 14
90+
output_tokens: 9
91+
total_tokens: 23
92+
user: null
93+
status:
94+
code: 200
95+
message: OK
96+
version: 1

tests/models/test_openai_responses.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput
3030
from pydantic_ai.profiles.openai import openai_model_profile
3131
from pydantic_ai.tools import ToolDefinition
32-
from pydantic_ai.usage import RequestUsage
32+
from pydantic_ai.usage import RequestUsage, RunUsage
3333

3434
from ..conftest import IsDatetime, IsStr, TestEnv, try_import
3535
from ..parts_from_messages import part_types_from_messages
@@ -1076,3 +1076,16 @@ async def test_openai_responses_verbosity(allow_model_requests: None, openai_api
10761076
agent = Agent(model=model, model_settings=OpenAIResponsesModelSettings(openai_text_verbosity='low'))
10771077
result = await agent.run('What is 2+2?')
10781078
assert result.output == snapshot('4')
1079+
1080+
1081+
async def test_openai_responses_usage_without_tokens_details(allow_model_requests: None, openai_api_key: str):
1082+
# The VCR cassette was manually modified to remove the input_tokens_details and output_tokens_details fields.
1083+
provider = OpenAIProvider(api_key=openai_api_key)
1084+
model = OpenAIResponsesModel('gpt-4o', provider=provider)
1085+
1086+
agent = Agent(model=model)
1087+
result = await agent.run('What is 2+2?')
1088+
1089+
assert result.usage() == snapshot(
1090+
RunUsage(input_tokens=14, output_tokens=9, details={'reasoning_tokens': 0}, requests=1)
1091+
)

0 commit comments

Comments
 (0)