Skip to content

Commit 6d2d4dd

Browse files
committed
add changes
1 parent acf86b0 commit 6d2d4dd

File tree

5 files changed

+100317
-200088
lines changed

5 files changed

+100317
-200088
lines changed

pydantic_ai_slim/pydantic_ai/models/openai.py

Lines changed: 12 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -922,8 +922,9 @@ async def count_tokens(
922922
if self.system != 'openai':
923923
raise NotImplementedError('Token counting is only supported for OpenAI system.')
924924

925+
model_settings, model_request_parameters = self.prepare_request(model_settings, model_request_parameters)
925926
openai_messages = await self._map_messages(messages, model_request_parameters)
926-
token_count = num_tokens_from_messages(openai_messages, self.model_name)
927+
token_count = _num_tokens_from_messages(openai_messages, self.model_name)
927928

928929
return usage.RequestUsage(
929930
input_tokens=token_count,
@@ -1733,10 +1734,11 @@ async def count_tokens(
17331734
if self.system != 'openai':
17341735
raise NotImplementedError('Token counting is only supported for OpenAI system.')
17351736

1737+
model_settings, model_request_parameters = self.prepare_request(model_settings, model_request_parameters)
17361738
_, openai_messages = await self._map_messages(
17371739
messages, cast(OpenAIResponsesModelSettings, model_settings or {}), model_request_parameters
17381740
)
1739-
token_count = num_tokens_from_messages(openai_messages, self.model_name)
1741+
token_count = _num_tokens_from_messages(openai_messages, self.model_name)
17401742

17411743
return usage.RequestUsage(
17421744
input_tokens=token_count,
@@ -2376,7 +2378,7 @@ def _map_mcp_call(
23762378
)
23772379

23782380

2379-
def num_tokens_from_messages(
2381+
def _num_tokens_from_messages(
23802382
messages: list[chat.ChatCompletionMessageParam] | list[responses.ResponseInputItemParam],
23812383
model: OpenAIModelName,
23822384
) -> int:
@@ -2385,34 +2387,15 @@ def num_tokens_from_messages(
23852387
encoding = tiktoken.encoding_for_model(model)
23862388
except KeyError:
23872389
encoding = tiktoken.get_encoding('o200k_base')
2388-
if model in {
2389-
'gpt-3.5-turbo-0125',
2390-
'gpt-4-0314',
2391-
'gpt-4-32k-0314',
2392-
'gpt-4-0613',
2393-
'gpt-4-32k-0613',
2394-
'gpt-4o-mini-2024-07-18',
2395-
'gpt-4o-2024-08-06',
2396-
}:
2397-
tokens_per_message = 3
2398-
final_primer = 3 # every reply is primed with <|start|>assistant<|message|>
2399-
elif model in {
2400-
'gpt-5-2025-08-07',
2401-
}:
2390+
2391+
if 'gpt-5' in model:
24022392
tokens_per_message = 3
2403-
final_primer = 2
2404-
elif 'gpt-3.5-turbo' in model:
2405-
return num_tokens_from_messages(messages, model='gpt-3.5-turbo-0125')
2406-
elif 'gpt-4o-mini' in model:
2407-
return num_tokens_from_messages(messages, model='gpt-4o-mini-2024-07-18')
2408-
elif 'gpt-4o' in model:
2409-
return num_tokens_from_messages(messages, model='gpt-4o-2024-08-06')
2410-
elif 'gpt-4' in model:
2411-
return num_tokens_from_messages(messages, model='gpt-4-0613')
2412-
elif 'gpt-5' in model:
2413-
return num_tokens_from_messages(messages, model='gpt-5-2025-08-07')
2393+
final_primer = 2 # "reverse engineered" based on test cases
24142394
else:
2415-
raise NotImplementedError(f"""num_tokens_from_messages() is not implemented for model {model}.""")
2395+
# Adapted from https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken#6-counting-tokens-for-chat-completions-api-calls
2396+
tokens_per_message = 3
2397+
final_primer = 3 # every reply is primed with <|start|>assistant<|message|>
2398+
24162399
num_tokens = 0
24172400
for message in messages:
24182401
num_tokens += tokens_per_message

pydantic_ai_slim/pydantic_ai/usage.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,16 @@ class UsageLimits:
267267
"""The maximum number of tokens allowed in requests and responses combined."""
268268
count_tokens_before_request: bool = False
269269
"""If True, perform a token counting pass before sending the request to the model,
270-
to enforce `request_tokens_limit` ahead of time. This may incur additional overhead
271-
(from calling the model's `count_tokens` API before making the actual request) and is disabled by default."""
270+
to enforce `input_tokens_limit` ahead of time. This may incur additional overhead
271+
(from calling the model's `count_tokens` method before making the actual request) and is disabled by default.
272+
273+
This feature is only supported by models that implement the `count_tokens` method:
274+
[`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel] and
275+
[`OpenAIResponsesModel`][pydantic_ai.models.openai.OpenAIResponsesModel] (only for models with tiktoken support),
276+
[`AnthropicModel`][pydantic_ai.models.anthropic.AnthropicModel] (only with `provider='anthropic'`, not with Bedrock),
277+
[`BedrockModel`][pydantic_ai.models.bedrock.BedrockModel] (for Bedrock-native Anthropic models),
278+
and [`GoogleModel`][pydantic_ai.models.google.GoogleModel].
279+
"""
272280

273281
@property
274282
@deprecated('`request_tokens_limit` is deprecated, use `input_tokens_limit` instead')

0 commit comments

Comments
 (0)