@@ -922,8 +922,9 @@ async def count_tokens(
922922 if self .system != 'openai' :
923923 raise NotImplementedError ('Token counting is only supported for OpenAI system.' )
924924
925+ model_settings , model_request_parameters = self .prepare_request (model_settings , model_request_parameters )
925926 openai_messages = await self ._map_messages (messages , model_request_parameters )
926- token_count = num_tokens_from_messages (openai_messages , self .model_name )
927+ token_count = _num_tokens_from_messages (openai_messages , self .model_name )
927928
928929 return usage .RequestUsage (
929930 input_tokens = token_count ,
@@ -1733,10 +1734,11 @@ async def count_tokens(
17331734 if self .system != 'openai' :
17341735 raise NotImplementedError ('Token counting is only supported for OpenAI system.' )
17351736
1737+ model_settings , model_request_parameters = self .prepare_request (model_settings , model_request_parameters )
17361738 _ , openai_messages = await self ._map_messages (
17371739 messages , cast (OpenAIResponsesModelSettings , model_settings or {}), model_request_parameters
17381740 )
1739- token_count = num_tokens_from_messages (openai_messages , self .model_name )
1741+ token_count = _num_tokens_from_messages (openai_messages , self .model_name )
17401742
17411743 return usage .RequestUsage (
17421744 input_tokens = token_count ,
@@ -2376,7 +2378,7 @@ def _map_mcp_call(
23762378 )
23772379
23782380
2379- def num_tokens_from_messages (
2381+ def _num_tokens_from_messages (
23802382 messages : list [chat .ChatCompletionMessageParam ] | list [responses .ResponseInputItemParam ],
23812383 model : OpenAIModelName ,
23822384) -> int :
@@ -2385,34 +2387,15 @@ def num_tokens_from_messages(
23852387 encoding = tiktoken .encoding_for_model (model )
23862388 except KeyError :
23872389 encoding = tiktoken .get_encoding ('o200k_base' )
2388- if model in {
2389- 'gpt-3.5-turbo-0125' ,
2390- 'gpt-4-0314' ,
2391- 'gpt-4-32k-0314' ,
2392- 'gpt-4-0613' ,
2393- 'gpt-4-32k-0613' ,
2394- 'gpt-4o-mini-2024-07-18' ,
2395- 'gpt-4o-2024-08-06' ,
2396- }:
2397- tokens_per_message = 3
2398- final_primer = 3 # every reply is primed with <|start|>assistant<|message|>
2399- elif model in {
2400- 'gpt-5-2025-08-07' ,
2401- }:
2390+
2391+ if 'gpt-5' in model :
24022392 tokens_per_message = 3
2403- final_primer = 2
2404- elif 'gpt-3.5-turbo' in model :
2405- return num_tokens_from_messages (messages , model = 'gpt-3.5-turbo-0125' )
2406- elif 'gpt-4o-mini' in model :
2407- return num_tokens_from_messages (messages , model = 'gpt-4o-mini-2024-07-18' )
2408- elif 'gpt-4o' in model :
2409- return num_tokens_from_messages (messages , model = 'gpt-4o-2024-08-06' )
2410- elif 'gpt-4' in model :
2411- return num_tokens_from_messages (messages , model = 'gpt-4-0613' )
2412- elif 'gpt-5' in model :
2413- return num_tokens_from_messages (messages , model = 'gpt-5-2025-08-07' )
2393+ final_primer = 2 # "reverse engineered" based on test cases
24142394 else :
2415- raise NotImplementedError (f"""num_tokens_from_messages() is not implemented for model { model } .""" )
2395+ # Adapted from https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken#6-counting-tokens-for-chat-completions-api-calls
2396+ tokens_per_message = 3
2397+ final_primer = 3 # every reply is primed with <|start|>assistant<|message|>
2398+
24162399 num_tokens = 0
24172400 for message in messages :
24182401 num_tokens += tokens_per_message
0 commit comments