Merge pull request #7 from pamelafox/functions-support

pamelafox · web-flow · commit c9cfddc8c367 · 2024-05-02T01:42:40.000-07:00
Adding support for counting tokens of functions
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,17 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Debug Tests",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "purpose": ["debug-test"],
+            "console": "integratedTerminal",
+            "justMyCode": false
+        }
+    ]
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 All notable changes to this project will be documented in this file.
 
+## [0.1.0] - May 2, 2024
+
+- Add `count_tokens_for_system_and_tools` to count tokens for system message and tools. You should count the tokens for both together, since the token count for tools varies based off whether a system message is provided.
+- Updated `build_messages` to allow for `tools` and `tool_choice` to be passed in.
+
 ## [0.0.6] - April 24, 2024
 
 - Add keyword argument `fallback_to_default` to `build_messages` function to allow for defaulting to the CL100k token encoder and minimum GPT token limit if the model is not found.
diff --git a/README.md b/README.md
@@ -31,11 +31,14 @@ Arguments:
 
 * `model` (`str`): The model name to use for token calculation, like gpt-3.5-turbo.
 * `system_prompt` (`str`): The initial system prompt message.
-* `new_user_message` (`str | List[openai.types.chat.ChatCompletionContentPartParam]`): The new user message to append.
-* `past_messages` (`list[dict]`): The list of past messages in the conversation.
-* `few_shots` (`list[dict]`): A few-shot list of messages to insert after the system prompt.
-* `max_tokens` (`int`): The maximum number of tokens allowed for the conversation.
-* `fallback_to_default` (`bool`): Whether to fallback to default model/token limits if model is not found. Defaults to `False`.
+* `tools` (`List[openai.types.chat.ChatCompletionToolParam]`): (Optional) The tools that will be used in the conversation. These won't be part of the final returned messages, but they will be used to calculate the token count.
+* `tool_choice` (`str | dict`): (Optional) The tool choice that will be used in the conversation. This won't be part of the final returned messages, but it will be used to calculate the token count.
+* `new_user_content` (`str | List[openai.types.chat.ChatCompletionContentPartParam]`): (Optional) The content of new user message to append.
+* `past_messages` (`list[dict]`): (Optional) The list of past messages in the conversation.
+* `few_shots` (`list[dict]`): (Optional) A few-shot list of messages to insert after the system prompt.
+* `max_tokens` (`int`): (Optional) The maximum number of tokens allowed for the conversation.
+* `fallback_to_default` (`bool`): (Optional) Whether to fallback to default model/token limits if model is not found. Defaults to `False`.
+
 
 Returns:
 
@@ -49,7 +52,7 @@ from openai_messages_token_helper import build_messages
 messages = build_messages(
     model="gpt-35-turbo",
     system_prompt="You are a bot.",
-    new_user_message="That wasn't a good poem.",
+    new_user_content="That wasn't a good poem.",
     past_messages=[
         {
             "role": "user",
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "openai-messages-token-helper"
 description = "A helper library for estimating tokens used by messages sent through OpenAI Chat Completions API."
-version = "0.0.6"
+version = "0.1.0"
 authors = [{name = "Pamela Fox"}]
 requires-python = ">=3.9"
 readme = "README.md"
diff --git a/src/openai_messages_token_helper/__init__.py b/src/openai_messages_token_helper/__init__.py
@@ -1,5 +1,11 @@
 from .images_helper import count_tokens_for_image
 from .message_builder import build_messages
-from .model_helper import count_tokens_for_message, get_token_limit
+from .model_helper import count_tokens_for_message, count_tokens_for_system_and_tools, get_token_limit
 
-__all__ = ["build_messages", "count_tokens_for_message", "count_tokens_for_image", "get_token_limit"]
+__all__ = [
+    "build_messages",
+    "count_tokens_for_message",
+    "count_tokens_for_image",
+    "get_token_limit",
+    "count_tokens_for_system_and_tools",
+]
diff --git a/src/openai_messages_token_helper/function_format.py b/src/openai_messages_token_helper/function_format.py
@@ -0,0 +1,64 @@
+# Based on https://github.com/forestwanglin/openai-java/blob/main/jtokkit/src/main/java/xyz/felh/openai/jtokkit/utils/TikTokenUtils.java
+
+
+def format_function_definitions(tools):
+    lines = []
+    lines.append("namespace functions {")
+    lines.append("")
+    for tool in tools:
+        function = tool.get("function")
+        if function_description := function.get("description"):
+            lines.append(f"// {function_description}")
+        function_name = function.get("name")
+        parameters = function.get("parameters", {})
+        properties = parameters.get("properties")
+        if properties and properties.keys():
+            lines.append(f"type {function_name} = (_: {{")
+            lines.append(format_object_parameters(parameters, 0))
+            lines.append("}) => any;")
+        else:
+            lines.append(f"type {function_name} = () => any;")
+        lines.append("")
+    lines.append("} // namespace functions")
+    return "\n".join(lines)
+
+
+def format_object_parameters(parameters, indent):
+    properties = parameters.get("properties")
+    if not properties:
+        return ""
+    required_params = parameters.get("required", [])
+    lines = []
+    for key, props in properties.items():
+        description = props.get("description")
+        if description:
+            lines.append(f"// {description}")
+        question = "?"
+        if required_params and key in required_params:
+            question = ""
+        lines.append(f"{key}{question}: {format_type(props, indent)},")
+    return "\n".join([" " * max(0, indent) + line for line in lines])
+
+
+def format_type(props, indent):
+    type = props.get("type")
+    if type == "string":
+        if "enum" in props:
+            return " | ".join([f'"{item}"' for item in props["enum"]])
+        return "string"
+    elif type == "array":
+        # items is required, OpenAI throws an error if it's missing
+        return f"{format_type(props['items'], indent)}[]"
+    elif type == "object":
+        return f"{{\n{format_object_parameters(props, indent + 2)}\n}}"
+    elif type in ["integer", "number"]:
+        if "enum" in props:
+            return " | ".join([f'"{item}"' for item in props["enum"]])
+        return "number"
+    elif type == "boolean":
+        return "boolean"
+    elif type == "null":
+        return "null"
+    else:
+        # This is a guess, as an empty string doesn't yield the expected token count
+        return "any"
diff --git a/src/openai_messages_token_helper/message_builder.py b/src/openai_messages_token_helper/message_builder.py
@@ -10,10 +10,20 @@
     ChatCompletionUserMessageParam,
 )
 
-from .model_helper import count_tokens_for_message, get_token_limit
+from .model_helper import count_tokens_for_message, count_tokens_for_system_and_tools, get_token_limit
 
 
-class MessageBuilder:
+def normalize_content(content: Union[str, list[ChatCompletionContentPartParam]]):
+    if isinstance(content, str):
+        return unicodedata.normalize("NFC", content)
+    elif isinstance(content, list):
+        for part in content:
+            if "image_url" not in part:
+                part["text"] = unicodedata.normalize("NFC", part["text"])
+        return content
+
+
+class _MessageBuilder:
     """
     A class for building and managing messages in a chat conversation.
     Attributes:
@@ -25,11 +35,10 @@ class MessageBuilder:
         insert_message(self, role: str, content: str, index: int = 1): Inserts a new message to the conversation.
     """
 
-    def __init__(self, system_content: str, chatgpt_model: str):
+    def __init__(self, system_content: str):
         self.messages: list[ChatCompletionMessageParam] = [
-            ChatCompletionSystemMessageParam(role="system", content=unicodedata.normalize("NFC", system_content))
+            ChatCompletionSystemMessageParam(role="system", content=normalize_content(system_content))
         ]
-        self.model = chatgpt_model
 
     def insert_message(self, role: str, content: Union[str, list[ChatCompletionContentPartParam]], index: int = 1):
         """
@@ -42,29 +51,21 @@ def insert_message(self, role: str, content: Union[str, list[ChatCompletionConte
         """
         message: ChatCompletionMessageParam
         if role == "user":
-            message = ChatCompletionUserMessageParam(role="user", content=self.normalize_content(content))
+            message = ChatCompletionUserMessageParam(role="user", content=normalize_content(content))
         elif role == "assistant" and isinstance(content, str):
-            message = ChatCompletionAssistantMessageParam(
-                role="assistant", content=unicodedata.normalize("NFC", content)
-            )
+            message = ChatCompletionAssistantMessageParam(role="assistant", content=normalize_content(content))
         else:
             raise ValueError(f"Invalid role: {role}")
         self.messages.insert(index, message)
 
-    def normalize_content(self, content: Union[str, list[ChatCompletionContentPartParam]]):
-        if isinstance(content, str):
-            return unicodedata.normalize("NFC", content)
-        elif isinstance(content, list):
-            for part in content:
-                if "image_url" not in part:
-                    part["text"] = unicodedata.normalize("NFC", part["text"])
-            return content
-
 
 def build_messages(
     model: str,
     system_prompt: str,
-    new_user_message: Union[str, list[ChatCompletionContentPartParam], None] = None,  # list is for GPT4v usage
+    *,
+    tools: Optional[list[dict[str, dict]]] = None,
+    tool_choice: Optional[Union[str, dict]] = None,
+    new_user_content: Union[str, list[ChatCompletionContentPartParam], None] = None,  # list is for GPT4v usage
     past_messages: list[dict[str, str]] = [],  # *not* including system prompt
     few_shots=[],  # will always be inserted after system prompt
     max_tokens: Optional[int] = None,
@@ -77,26 +78,32 @@ def build_messages(
     Args:
         model (str): The model name to use for token calculation, like gpt-3.5-turbo.
         system_prompt (str): The initial system prompt message.
-        new_user_message (str | List[ChatCompletionContentPartParam]): The new user message to append.
+        tools (list[dict]): A list of tools to include in the conversation.
+        tool_choice (str | dict): The tool to use in the conversation.
+        new_user_content (str | List[ChatCompletionContentPartParam]): Content of new user message to append.
         past_messages (list[dict]): The list of past messages in the conversation.
         few_shots (list[dict]): A few-shot list of messages to insert after the system prompt.
         max_tokens (int): The maximum number of tokens allowed for the conversation.
         fallback_to_default (bool): Whether to fallback to default model if the model is not found.
     """
-    message_builder = MessageBuilder(system_prompt, model)
     if max_tokens is None:
         max_tokens = get_token_limit(model, default_to_minimum=fallback_to_default)
 
+    # Start with the required messages: system prompt, few-shots, and new user message
+    message_builder = _MessageBuilder(system_prompt)
+
     for shot in reversed(few_shots):
         message_builder.insert_message(shot.get("role"), shot.get("content"))
 
     append_index = len(few_shots) + 1
 
-    if new_user_message:
-        message_builder.insert_message("user", new_user_message, index=append_index)
+    if new_user_content:
+        message_builder.insert_message("user", new_user_content, index=append_index)
 
-    total_token_count = 0
-    for existing_message in message_builder.messages:
+    total_token_count = count_tokens_for_system_and_tools(
+        model, message_builder.messages[0], tools, tool_choice, default_to_cl100k=fallback_to_default
+    )
+    for existing_message in message_builder.messages[1:]:
         total_token_count += count_tokens_for_message(model, existing_message, default_to_cl100k=fallback_to_default)
 
     newest_to_oldest = list(reversed(past_messages))
diff --git a/src/openai_messages_token_helper/model_helper.py b/src/openai_messages_token_helper/model_helper.py
@@ -5,6 +5,7 @@
 
 import tiktoken
 
+from .function_format import format_function_definitions
 from .images_helper import count_tokens_for_image
 
 MODELS_2_TOKEN_LIMITS = {
@@ -42,22 +43,14 @@ def get_token_limit(model: str, default_to_minimum=False) -> int:
     return MODELS_2_TOKEN_LIMITS[model]
 
 
-def count_tokens_for_message(model: str, message: Mapping[str, object], default_to_cl100k=False) -> int:
+def encoding_for_model(model: str, default_to_cl100k=False) -> tiktoken.Encoding:
     """
-    Calculate the number of tokens required to encode a message. Based off cookbook:
-    https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
-
+    Get the encoding for a given GPT model name (OpenAI.com or Azure OpenAI supported).
     Args:
-        model (str): The name of the model to use for encoding.
-        message (Mapping): The message to encode, in a dictionary-like object.
+        model (str): The name of the model to get the encoding for.
         default_to_cl100k (bool): Whether to default to the CL100k encoding if the model is not found.
     Returns:
-        int: The total number of tokens required to encode the message.
-
-    >> model = 'gpt-3.5-turbo'
-    >> message = {'role': 'user', 'content': 'Hello, how are you?'}
-    >> count_tokens_for_message(model, message)
-    13
+        tiktoken.Encoding: The encoding for the model.
     """
     if (
         model == ""
@@ -67,14 +60,34 @@ def count_tokens_for_message(model: str, message: Mapping[str, object], default_
         raise ValueError("Expected valid OpenAI GPT model name")
     model = AOAI_2_OAI.get(model, model)
     try:
-        encoding = tiktoken.encoding_for_model(model)
+        return tiktoken.encoding_for_model(model)
     except KeyError:
         if default_to_cl100k:
             logger.warning("Model %s not found, defaulting to CL100k encoding", model)
-            encoding = tiktoken.get_encoding("cl100k_base")
+            return tiktoken.get_encoding("cl100k_base")
         else:
             raise
 
+
+def count_tokens_for_message(model: str, message: Mapping[str, object], default_to_cl100k=False) -> int:
+    """
+    Calculate the number of tokens required to encode a message. Based off cookbook:
+    https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
+
+    Args:
+        model (str): The name of the model to use for encoding.
+        message (Mapping): The message to encode, in a dictionary-like object.
+        default_to_cl100k (bool): Whether to default to the CL100k encoding if the model is not found.
+    Returns:
+        int: The total number of tokens required to encode the message.
+
+    >> model = 'gpt-3.5-turbo'
+    >> message = {'role': 'user', 'content': 'Hello, how are you?'}
+    >> count_tokens_for_message(model, message)
+    13
+    """
+    encoding = encoding_for_model(model, default_to_cl100k)
+
     # Assumes we're using a recent model
     tokens_per_message = 3
 
@@ -96,3 +109,48 @@ def count_tokens_for_message(model: str, message: Mapping[str, object], default_
             num_tokens += 1
     num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
     return num_tokens
+
+
+def count_tokens_for_system_and_tools(
+    model: str,
+    system_message: dict | None = None,
+    tools: list[dict[str, dict]] | None = None,
+    tool_choice: str | dict | None = None,
+    default_to_cl100k: bool = False,
+) -> int:
+    """
+    Calculate the number of tokens required to encode a system message and tools.
+    Both must be calculated together because the count is lower if both are present.
+    Based on https://github.com/forestwanglin/openai-java/blob/main/jtokkit/src/main/java/xyz/felh/openai/jtokkit/utils/TikTokenUtils.java
+
+    Args:
+        model (str): The name of the model to use for encoding.
+        tools (list[dict[str, dict]]): The tools to encode.
+        tool_choice (str | dict): The tool choice to encode.
+        system_message (dict): The system message to encode.
+        default_to_cl100k (bool): Whether to default to the CL100k encoding if the model is not found.
+    Returns:
+        int: The total number of tokens required to encode the system message and tools.
+    """
+    encoding = encoding_for_model(model, default_to_cl100k)
+
+    tokens = 0
+    if system_message:
+        tokens += count_tokens_for_message(model, system_message, default_to_cl100k)
+    if tools:
+        encoding = tiktoken.encoding_for_model(model)
+        print(format_function_definitions(tools))
+        tokens += len(encoding.encode(format_function_definitions(tools)))
+        tokens += 9  # Additional tokens for function definition of tools
+    # If there's a system message and tools are present, subtract four tokens
+    if tools and system_message:
+        tokens -= 4
+    # If tool_choice is 'none', add one token.
+    # If it's an object, add 4 + the number of tokens in the function name.
+    # If it's undefined or 'auto', don't add anything.
+    if tool_choice == "none":
+        tokens += 1
+    elif isinstance(tool_choice, dict):
+        tokens += 7
+        tokens += len(encoding.encode(tool_choice["function"]["name"]))
+    return tokens
diff --git a/tests/functions.py b/tests/functions.py
diff --git a/tests/test_messagebuilder.py b/tests/test_messagebuilder.py
diff --git a/tests/test_modelhelper.py b/tests/test_modelhelper.py
diff --git a/tests/verify_functions.py b/tests/verify_functions.py