gpt4o mini support

pamelafox · pamelafox · commit 0fd185831864 · 2024-08-07T17:18:27.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 All notable changes to this project will be documented in this file.
 
+## [0.1.9] - Aug 7, 2024
+
+- Add gpt-4o-mini support, by adding a 33.3x multiplier to the token cost.
+
 ## [0.1.8] - Aug 3, 2024
 
 - Fix the type for the tool_choice param to be inclusive of "auto" and other options.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "openai-messages-token-helper"
 description = "A helper library for estimating tokens used by messages sent through OpenAI Chat Completions API."
-version = "0.1.8"
+version = "0.1.9"
 authors = [{name = "Pamela Fox"}]
 requires-python = ">=3.9"
 readme = "README.md"
diff --git a/src/openai_messages_token_helper/images_helper.py b/src/openai_messages_token_helper/images_helper.py
@@ -1,6 +1,7 @@
 import base64
 import math
 import re
+from fractions import Fraction
 from io import BytesIO
 
 from PIL import Image
@@ -16,20 +17,23 @@ def get_image_dims(image_uri: str) -> tuple[int, int]:
         raise ValueError("Image must be a base64 string.")
 
 
-def count_tokens_for_image(image_uri: str, detail: str = "auto") -> int:
+def count_tokens_for_image(image_uri: str, detail: str = "auto", model: str = None) -> int:
     # From https://github.com/openai/openai-cookbook/pull/881/files
     # Based on https://platform.openai.com/docs/guides/vision
-    LOW_DETAIL_COST = 85
-    HIGH_DETAIL_COST_PER_TILE = 170
-    ADDITIONAL_COST = 85
+    multiplier = 1
+    if model == "gpt-4o-mini":
+        multiplier = 33 + Fraction(1, 3)
+    COST_PER_TILE = 85 * multiplier
+    LOW_DETAIL_COST = COST_PER_TILE
+    HIGH_DETAIL_COST_PER_TILE = COST_PER_TILE * 2
 
     if detail == "auto":
         # assume high detail for now
         detail = "high"
 
     if detail == "low":
         # Low detail images have a fixed cost
-        return LOW_DETAIL_COST
+        return int(LOW_DETAIL_COST)
     elif detail == "high":
         # Calculate token cost for high detail images
         width, height = get_image_dims(image_uri)
@@ -47,8 +51,8 @@ def count_tokens_for_image(image_uri: str, detail: str = "auto") -> int:
         # Calculate the number of 512px squares
         num_squares = math.ceil(width / 512) * math.ceil(height / 512)
         # Calculate the total token cost
-        total_cost = num_squares * HIGH_DETAIL_COST_PER_TILE + ADDITIONAL_COST
-        return total_cost
+        total_cost = num_squares * HIGH_DETAIL_COST_PER_TILE + COST_PER_TILE
+        return math.ceil(total_cost)
     else:
         # Invalid detail_option
         raise ValueError("Invalid value for detail parameter. Use 'low' or 'high'.")
diff --git a/src/openai_messages_token_helper/model_helper.py b/src/openai_messages_token_helper/model_helper.py
@@ -22,6 +22,7 @@
     "gpt-4-32k": 32000,
     "gpt-4v": 128000,
     "gpt-4o": 128000,
+    "gpt-4o-mini": 128000,
 }
 
 
@@ -106,7 +107,7 @@ def count_tokens_for_message(model: str, message: ChatCompletionMessageParam, de
                 if item["type"] == "text":
                     num_tokens += len(encoding.encode(item["text"]))
                 elif item["type"] == "image_url":
-                    num_tokens += count_tokens_for_image(item["image_url"]["url"], item["image_url"]["detail"])
+                    num_tokens += count_tokens_for_image(item["image_url"]["url"], item["image_url"]["detail"], model)
         elif isinstance(value, str):
             num_tokens += len(encoding.encode(value))
         else:
diff --git a/tests/image_messages.py b/tests/image_messages.py
diff --git a/tests/messages.py b/tests/messages.py
@@ -146,5 +146,4 @@
     assistant_message_perf,
     assistant_message_perf_short,
     assistant_message_dresscode,
-    text_and_image_message,
 ]
diff --git a/tests/test_imageshelper.py b/tests/test_imageshelper.py
@@ -18,6 +18,7 @@ def large_image():
 
 def test_count_tokens_for_image(small_image, large_image):
     assert count_tokens_for_image(small_image, "low") == 85
+    assert count_tokens_for_image(small_image, "low", "gpt-4o-mini") == 2833
     assert count_tokens_for_image(small_image, "high") == 255
     assert count_tokens_for_image(small_image) == 255
     assert count_tokens_for_image(large_image, "low") == 85
diff --git a/tests/test_messagebuilder.py b/tests/test_messagebuilder.py
@@ -9,14 +9,14 @@
 from openai_messages_token_helper import build_messages, count_tokens_for_message
 
 from .functions import search_sources_toolchoice_auto
+from .image_messages import text_and_tiny_image_message
 from .messages import (
     assistant_message_dresscode,
     assistant_message_perf,
     assistant_message_perf_short,
     system_message_long,
     system_message_short,
     system_message_unicode,
-    text_and_image_message,
     user_message,
     user_message_dresscode,
     user_message_perf,
@@ -35,9 +35,9 @@ def test_messagebuilder_imagemessage():
     messages = build_messages(
         "gpt-35-turbo",
         system_message_short["message"]["content"],
-        new_user_content=text_and_image_message["message"]["content"],
+        new_user_content=text_and_tiny_image_message["message"]["content"],
     )
-    assert messages == [system_message_short["message"], text_and_image_message["message"]]
+    assert messages == [system_message_short["message"], text_and_tiny_image_message["message"]]
 
 
 def test_messagebuilder_append():
diff --git a/tests/test_modelhelper.py b/tests/test_modelhelper.py
@@ -2,7 +2,8 @@
 from openai_messages_token_helper import count_tokens_for_message, count_tokens_for_system_and_tools, get_token_limit
 
 from .functions import FUNCTION_COUNTS, search_sources_toolchoice_auto
-from .messages import system_message, system_message_with_name, text_and_image_message, user_message
+from .image_messages import IMAGE_MESSAGE_COUNTS
+from .messages import system_message, system_message_with_name, user_message
 
 
 def test_get_token_limit():
@@ -56,11 +57,13 @@ def test_count_tokens_for_message(model, count_key, message):
     "model, count_key",
     [
         ("gpt-4", "count"),
-        ("gpt-4o", "count_omni"),
+        ("gpt-4o", "count"),
+        ("gpt-4o-mini", "count_4o_mini"),
     ],
 )
 def test_count_tokens_for_message_list(model, count_key):
-    assert count_tokens_for_message(model, text_and_image_message["message"]) == text_and_image_message[count_key]
+    for message_count_pair in IMAGE_MESSAGE_COUNTS:
+        assert count_tokens_for_message(model, message_count_pair["message"]) == message_count_pair[count_key]
 
 
 def test_count_tokens_for_message_error():
diff --git a/tests/verify_openai.py b/tests/verify_openai.py
@@ -4,7 +4,8 @@
 import azure.identity
 import openai
 from dotenv import load_dotenv
-from messages import MESSAGE_COUNTS  # type: ignore[import-not-found]
+from image_messages import IMAGE_MESSAGE_COUNTS
+from messages import MESSAGE_COUNTS
 
 # Setup the OpenAI client to use either Azure OpenAI or OpenAI API
 load_dotenv()
@@ -38,10 +39,29 @@
     MODEL_NAME = openai_model
 
 # Test the token count for each message
+
 for message_count_pair in MESSAGE_COUNTS:
+    for model, expected_tokens in [("gpt-4o", message_count_pair["count_omni"])]:
+        message = message_count_pair["message"]
+        expected_tokens = message_count_pair["count"]
+        response = client.chat.completions.create(
+            model=MODEL_NAME,
+            temperature=0.7,
+            n=1,
+            messages=[message],  # type: ignore[list-item]
+        )
+
+        print(message)
+        assert response.usage is not None, "Expected usage to be present"
+        assert (
+            response.usage.prompt_tokens == expected_tokens
+        ), f"Expected {expected_tokens} tokens, got {response.usage.prompt_tokens} for model {MODEL_NAME}"
+
+
+for message_count_pair in IMAGE_MESSAGE_COUNTS:
     for model, expected_tokens in [
-        (MODEL_NAME, message_count_pair["count"]),
-        ("gpt-4o", message_count_pair["count_omni"]),
+        ("gpt-4o", message_count_pair["count"]),
+        ("gpt-4o-mini", message_count_pair["count_4o_mini"]),
     ]:
         response = client.chat.completions.create(
             model=model,
@@ -50,7 +70,6 @@
             messages=[message_count_pair["message"]],  # type: ignore[list-item]
         )
 
-        print(message_count_pair["message"])
         assert response.usage is not None, "Expected usage to be present"
         assert (
             response.usage.prompt_tokens == expected_tokens

Original file line number	Diff line number	Diff line change
`@@ -146,5 +146,4 @@`
`146`	`146`	`assistant_message_perf,`
`147`	`147`	`assistant_message_perf_short,`
`148`	`148`	`assistant_message_dresscode,`
`149`		`- text_and_image_message,`
`150`	`149`	`]`