Defaults for encoders and limits

pamelafox · pamelafox · commit 648a39c735e4 · 2024-04-24T07:56:37.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 All notable changes to this project will be documented in this file.
 
+## [0.0.5] - April 24, 2024
+
+- Add keyword argument `default_to_cl100k` to `count_tokens_for_message` function to allow for defaulting to the CL100k token limit if the model is not found.
+- Add keyword argument `default_to_minimum` to `get_token_limit` function to allow for defaulting to the minimum token limit if the model is not found.
+
 ## [0.0.4] - April 21, 2024
 
 - Rename to openai-messages-token-helper from llm-messages-token-helper to reflect library's current OpenAI focus.
diff --git a/README.md b/README.md
@@ -79,6 +79,8 @@ Counts the number of tokens in a message.
 Arguments:
 
 * `model` (`str`): The model name to use for token calculation, like gpt-3.5-turbo.
+* `message` (`dict`): The message to count tokens for.
+* `default_to_cl100k` (`bool`): Whether to default to the CL100k token limit if the model is not found.
 
 Returns:
 
@@ -129,6 +131,7 @@ Get the token limit for a given GPT model name (OpenAI.com or Azure OpenAI suppo
 Arguments:
 
 * `model` (`str`): The model name to use for token calculation, like gpt-3.5-turbo (OpenAI.com) or gpt-35-turbo (Azure).
+* `default_to_minimum` (`bool`): Whether to default to the minimum token limit if the model is not found.
 
 Returns:
 
diff --git a/src/openai_messages_token_helper/model_helper.py b/src/openai_messages_token_helper/model_helper.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 from collections.abc import Mapping
 
 import tiktoken
@@ -19,28 +20,37 @@
 
 AOAI_2_OAI = {"gpt-35-turbo": "gpt-3.5-turbo", "gpt-35-turbo-16k": "gpt-3.5-turbo-16k", "gpt-4v": "gpt-4-turbo-vision"}
 
+logger = logging.getLogger("openai_messages_token_helper")
 
-def get_token_limit(model: str) -> int:
+
+def get_token_limit(model: str, default_to_minimum=False) -> int:
     """
     Get the token limit for a given GPT model name (OpenAI.com or Azure OpenAI supported).
     Args:
         model (str): The name of the model to get the token limit for.
+        default_to_minimum (bool): Whether to default to the minimum token limit if the model is not found.
     Returns:
         int: The token limit for the model.
     """
     if model not in MODELS_2_TOKEN_LIMITS:
-        raise ValueError(f"Called with unknown model name: {model}")
+        if default_to_minimum:
+            min_token_limit = min(MODELS_2_TOKEN_LIMITS.values())
+            logger.warning("Model %s not found, defaulting to minimum token limit %d", model, min_token_limit)
+            return min_token_limit
+        else:
+            raise ValueError(f"Called with unknown model name: {model}")
     return MODELS_2_TOKEN_LIMITS[model]
 
 
-def count_tokens_for_message(model: str, message: Mapping[str, object]) -> int:
+def count_tokens_for_message(model: str, message: Mapping[str, object], default_to_cl100k=False) -> int:
     """
     Calculate the number of tokens required to encode a message. Based off cookbook:
     https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
 
     Args:
         model (str): The name of the model to use for encoding.
         message (Mapping): The message to encode, in a dictionary-like object.
+        default_to_cl100k (bool): Whether to default to the CL100k encoding if the model is not found.
     Returns:
         int: The total number of tokens required to encode the message.
 
@@ -49,8 +59,22 @@ def count_tokens_for_message(model: str, message: Mapping[str, object]) -> int:
     >> count_tokens_for_message(model, message)
     13
     """
+    if (
+        model == ""
+        or model is None
+        or (model not in AOAI_2_OAI and model not in MODELS_2_TOKEN_LIMITS and not default_to_cl100k)
+    ):
+        raise ValueError("Expected valid OpenAI GPT model name")
+    model = AOAI_2_OAI.get(model, model)
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        if default_to_cl100k:
+            logger.warning("Model %s not found, defaulting to CL100k encoding", model)
+            encoding = tiktoken.get_encoding("cl100k_base")
+        else:
+            raise
 
-    encoding = tiktoken.encoding_for_model(get_oai_chatmodel_tiktok(model))
     # Assumes we're using a recent model
     tokens_per_message = 3
 
@@ -72,12 +96,3 @@ def count_tokens_for_message(model: str, message: Mapping[str, object]) -> int:
             num_tokens += 1
     num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
     return num_tokens
-
-
-def get_oai_chatmodel_tiktok(aoaimodel: str) -> str:
-    message = "Expected valid OpenAI GPT model name"
-    if aoaimodel == "" or aoaimodel is None:
-        raise ValueError(message)
-    if aoaimodel not in AOAI_2_OAI and aoaimodel not in MODELS_2_TOKEN_LIMITS:
-        raise ValueError(message)
-    return AOAI_2_OAI.get(aoaimodel, aoaimodel)
diff --git a/tests/test_modelhelper.py b/tests/test_modelhelper.py
@@ -18,6 +18,12 @@ def test_get_token_limit_error():
         get_token_limit("gpt-3")
 
 
+def test_get_token_limit_default(caplog):
+    with caplog.at_level("WARNING"):
+        assert get_token_limit("gpt-3", default_to_minimum=True) == 4000
+        assert "Model gpt-3 not found, defaulting to minimum token limit 4000" in caplog.text
+
+
 # parameterize the model and the expected number of tokens
 @pytest.mark.parametrize(
     "model",
@@ -58,14 +64,17 @@ def test_count_tokens_for_message_error():
         count_tokens_for_message(model, message)
 
 
-def test_get_oai_chatmodel_tiktok_error():
-    message = {
-        "role": "user",
-        "content": "hello",
-    }
+def test_count_tokens_for_message_model_error():
     with pytest.raises(ValueError, match="Expected valid OpenAI GPT model name"):
-        count_tokens_for_message("", message)
+        count_tokens_for_message("", user_message["message"])
     with pytest.raises(ValueError, match="Expected valid OpenAI GPT model name"):
-        count_tokens_for_message(None, message)
+        count_tokens_for_message(None, user_message["message"])
     with pytest.raises(ValueError, match="Expected valid OpenAI GPT model name"):
-        count_tokens_for_message("gpt44", message)
+        count_tokens_for_message("gpt44", user_message["message"])
+
+
+def test_count_tokens_for_message_model_default(caplog):
+    model = "phi-3"
+    with caplog.at_level("WARNING"):
+        assert count_tokens_for_message(model, user_message["message"], default_to_cl100k=True) == user_message["count"]
+        assert "Model phi-3 not found, defaulting to CL100k encoding" in caplog.text