Skip to content

Commit 9101c0e

Browse files
committed
functions added
1 parent ab69ac5 commit 9101c0e

File tree

1 file changed

+181
-0
lines changed

1 file changed

+181
-0
lines changed

examples/How_to_count_tokens_with_tiktoken.ipynb

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,187 @@
602602
" print(f'{response.usage.prompt_tokens} prompt tokens counted by the OpenAI API.')\n",
603603
" print()\n"
604604
]
605+
},
606+
{
607+
"cell_type": "markdown",
608+
"metadata": {},
609+
"source": [
610+
"## 7. Counting tokens for chat completions with tool calls\n",
611+
"\n",
612+
"Next, we will look into how to apply this calculations to messages that may contain function calls. This is not immediately trivial, due to the formatting of the tools themselves. \n",
613+
"\n",
614+
"Below is an example function for counting tokens for messages that contain tools, passed to `gpt-3.5-turbo`, `gpt-4`, `gpt-4o` and `gpt-4o-mini`."
615+
]
616+
},
617+
{
618+
"cell_type": "code",
619+
"execution_count": 16,
620+
"metadata": {},
621+
"outputs": [],
622+
"source": [
623+
"def num_tokens_for_tools(functions, messages, model):\n",
624+
" \n",
625+
" # Initialize function settings to 0\n",
626+
" func_init = 0\n",
627+
" prop_init = 0\n",
628+
" prop_key = 0\n",
629+
" enum_init = 0\n",
630+
" enum_item = 0\n",
631+
" func_end = 0\n",
632+
" \n",
633+
" if model in [\n",
634+
" \"gpt-4o\",\n",
635+
" \"gpt-4o-mini\"\n",
636+
" ]:\n",
637+
" \n",
638+
" # Set function settings for the above models\n",
639+
" func_init = 7\n",
640+
" prop_init = 3\n",
641+
" prop_key = 3\n",
642+
" enum_init = -3\n",
643+
" enum_item = 3\n",
644+
" func_end = 12\n",
645+
" elif model in [\n",
646+
" \"gpt-3.5-turbo\",\n",
647+
" \"gpt-4\"\n",
648+
" ]:\n",
649+
" # Set function settings for the above models\n",
650+
" func_init = 10\n",
651+
" prop_init = 3\n",
652+
" prop_key = 3\n",
653+
" enum_init = -3\n",
654+
" enum_item = 3\n",
655+
" func_end = 12\n",
656+
" else:\n",
657+
" raise NotImplementedError(\n",
658+
" f\"\"\"num_tokens_for_tools() is not implemented for model {model}.\"\"\"\n",
659+
" )\n",
660+
" \n",
661+
" try:\n",
662+
" encoding = tiktoken.encoding_for_model(model)\n",
663+
" except KeyError:\n",
664+
" print(\"Warning: model not found. Using o200k_base encoding.\")\n",
665+
" encoding = tiktoken.get_encoding(\"o200k_base\")\n",
666+
" \n",
667+
" func_token_count = 0\n",
668+
" if len(functions) > 0:\n",
669+
" for f in functions:\n",
670+
" func_token_count += func_init # Add tokens for start of each function\n",
671+
" function = f[\"function\"]\n",
672+
" f_name = function[\"name\"]\n",
673+
" f_desc = function[\"description\"]\n",
674+
" if f_desc.endswith(\".\"):\n",
675+
" f_desc = f_desc[:-1]\n",
676+
" line = f_name + \":\" + f_desc\n",
677+
" func_token_count += len(encoding.encode(line)) # Add tokens for set name and description\n",
678+
" if len(function[\"parameters\"][\"properties\"]) > 0:\n",
679+
" func_token_count += prop_init # Add tokens for start of each property\n",
680+
" for key in list(function[\"parameters\"][\"properties\"].keys()):\n",
681+
" func_token_count += prop_key # Add tokens for each set property\n",
682+
" p_name = key\n",
683+
" p_type = function[\"parameters\"][\"properties\"][key][\"type\"]\n",
684+
" p_desc = function[\"parameters\"][\"properties\"][key][\"description\"]\n",
685+
" if \"enum\" in function[\"parameters\"][\"properties\"][key].keys():\n",
686+
" func_token_count += enum_init # Add tokens if property has enum list\n",
687+
" for item in function[\"parameters\"][\"properties\"][key][\"enum\"]:\n",
688+
" func_token_count += enum_item\n",
689+
" func_token_count += len(encoding.encode(item))\n",
690+
" if p_desc.endswith(\".\"):\n",
691+
" p_desc = p_desc[:-1]\n",
692+
" line = f\"{p_name}:{p_type}:{p_desc}\"\n",
693+
" func_token_count += len(encoding.encode(line))\n",
694+
" func_token_count += func_end\n",
695+
" \n",
696+
" messages_token_count = num_tokens_from_messages(messages, model)\n",
697+
" total_tokens = messages_token_count + func_token_count\n",
698+
" \n",
699+
" return total_tokens"
700+
]
701+
},
702+
{
703+
"cell_type": "code",
704+
"execution_count": 17,
705+
"metadata": {},
706+
"outputs": [
707+
{
708+
"name": "stdout",
709+
"output_type": "stream",
710+
"text": [
711+
"gpt-3.5-turbo\n",
712+
"Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.\n",
713+
"105 prompt tokens counted by num_tokens_for_tools().\n",
714+
"105 prompt tokens counted by the OpenAI API.\n",
715+
"\n",
716+
"gpt-4\n",
717+
"Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.\n",
718+
"105 prompt tokens counted by num_tokens_for_tools().\n",
719+
"105 prompt tokens counted by the OpenAI API.\n",
720+
"\n",
721+
"gpt-4o\n",
722+
"Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.\n",
723+
"101 prompt tokens counted by num_tokens_for_tools().\n",
724+
"101 prompt tokens counted by the OpenAI API.\n",
725+
"\n",
726+
"gpt-4o-mini\n",
727+
"Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.\n",
728+
"101 prompt tokens counted by num_tokens_for_tools().\n",
729+
"101 prompt tokens counted by the OpenAI API.\n",
730+
"\n"
731+
]
732+
}
733+
],
734+
"source": [
735+
"tools = [\n",
736+
" {\n",
737+
" \"type\": \"function\",\n",
738+
" \"function\": {\n",
739+
" \"name\": \"get_current_weather\",\n",
740+
" \"description\": \"Get the current weather in a given location\",\n",
741+
" \"parameters\": {\n",
742+
" \"type\": \"object\",\n",
743+
" \"properties\": {\n",
744+
" \"location\": {\n",
745+
" \"type\": \"string\",\n",
746+
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
747+
" },\n",
748+
" \"unit\": {\"type\": \"string\", \n",
749+
" \"description\": \"The unit of temperature to return\",\n",
750+
" \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
751+
" },\n",
752+
" \"required\": [\"location\"],\n",
753+
" },\n",
754+
" }\n",
755+
" }\n",
756+
"]\n",
757+
"\n",
758+
"example_messages = [\n",
759+
" {\n",
760+
" \"role\": \"system\",\n",
761+
" \"content\": \"You are a helpful assistant that can answer to questions about the weather.\",\n",
762+
" },\n",
763+
" {\n",
764+
" \"role\": \"user\",\n",
765+
" \"content\": \"What's the weather like in San Francisco?\",\n",
766+
" },\n",
767+
"]\n",
768+
"\n",
769+
"for model in [\n",
770+
" \"gpt-3.5-turbo\",\n",
771+
" \"gpt-4\",\n",
772+
" \"gpt-4o\",\n",
773+
" \"gpt-4o-mini\"\n",
774+
" ]:\n",
775+
" print(model)\n",
776+
" # example token count from the function defined above\n",
777+
" print(f\"{num_tokens_for_tools(tools, example_messages, model)} prompt tokens counted by num_tokens_for_tools().\")\n",
778+
" # example token count from the OpenAI API\n",
779+
" response = client.chat.completions.create(model=model,\n",
780+
" messages=example_messages,\n",
781+
" tools=tools,\n",
782+
" temperature=0)\n",
783+
" print(f'{response.usage.prompt_tokens} prompt tokens counted by the OpenAI API.')\n",
784+
" print()"
785+
]
605786
}
606787
],
607788
"metadata": {

0 commit comments

Comments
 (0)