Merge pull request #11 from pamelafox/gpt4o

pamelafox · web-flow · commit 0e83623bbcd3 · 2024-05-13T22:13:50.000-07:00
Add gpt 4o support
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 All notable changes to this project will be documented in this file.
 
+## [0.1.4] - May 14, 2024
+
+- Add support and tests for gpt-4o, which has a different tokenizer.
+
 ## [0.1.3] - May 2, 2024
 
 - Use openai type annotations for more precise type hints, and add a typing test.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "openai-messages-token-helper"
 description = "A helper library for estimating tokens used by messages sent through OpenAI Chat Completions API."
-version = "0.1.3"
+version = "0.1.4"
 authors = [{name = "Pamela Fox"}]
 requires-python = ">=3.9"
 readme = "README.md"
diff --git a/src/openai_messages_token_helper/model_helper.py b/src/openai_messages_token_helper/model_helper.py
@@ -21,6 +21,7 @@
     "gpt-4": 8100,
     "gpt-4-32k": 32000,
     "gpt-4v": 128000,
+    "gpt-4o": 128000,
 }
 
 
diff --git a/tests/messages.py b/tests/messages.py
@@ -4,6 +4,7 @@
         "content": "You are a bot.",
     },
     "count": 12,
+    "count_omni": 12,
 }
 
 system_message = {
@@ -12,6 +13,7 @@
         "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.",
     },
     "count": 25,
+    "count_omni": 24,
 }
 
 system_message_long = {
@@ -20,6 +22,7 @@
         "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.",
     },
     "count": 31,
+    "count_omni": 31,
 }
 
 system_message_unicode = {
@@ -28,6 +31,7 @@
         "content": "á",
     },
     "count": 8,
+    "count_omni": 8,
 }
 
 system_message_with_name = {
@@ -37,6 +41,7 @@
         "content": "New synergies will help drive top-line growth.",
     },
     "count": 20,  # Less tokens in older vision preview models
+    "count_omni": 20,
 }
 
 user_message = {
@@ -45,6 +50,7 @@
         "content": "Hello, how are you?",
     },
     "count": 13,
+    "count_omni": 13,
 }
 
 user_message_unicode = {
@@ -53,6 +59,7 @@
         "content": "á",
     },
     "count": 8,
+    "count_omni": 8,
 }
 
 user_message_perf = {
@@ -61,6 +68,7 @@
         "content": "What happens in a performance review?",
     },
     "count": 14,
+    "count_omni": 14,
 }
 
 assistant_message_perf = {
@@ -69,6 +77,7 @@
         "content": "During the performance review at Contoso Electronics, the supervisor will discuss the employee's performance over the past year and provide feedback on areas for improvement. They will also provide an opportunity for the employee to discuss their goals and objectives for the upcoming year. The review is a two-way dialogue between managers and employees, and employees will receive a written summary of their performance review which will include a rating of their performance, feedback, and goals and objectives for the upcoming year [employee_handbook-3.pdf].",
     },
     "count": 106,
+    "count_omni": 106,
 }
 
 assistant_message_perf_short = {
@@ -77,6 +86,7 @@
         "content": "The supervisor will discuss the employee's performance and provide feedback on areas for improvement. They will also provide an opportunity for the employee to discuss their goals and objectives for the upcoming year. The review is a two-way dialogue between managers and employees, and employees will receive a written summary of their performance review which will include a rating of their performance, feedback, and goals for the upcoming year [employee_handbook-3.pdf].",
     },
     "count": 91,
+    "count_omni": 91,
 }
 
 user_message_dresscode = {
@@ -85,6 +95,7 @@
         "content": "Is there a dress code?",
     },
     "count": 13,
+    "count_omni": 13,
 }
 
 assistant_message_dresscode = {
@@ -93,13 +104,15 @@
         "content": "Yes, there is a dress code at Contoso Electronics. Look sharp! [employee_handbook-1.pdf]",
     },
     "count": 30,
+    "count_omni": 30,
 }
 user_message_pm = {
     "message": {
         "role": "user",
         "content": "What does a Product Manager do?",
     },
     "count": 14,
+    "count_omni": 14,
 }
 text_and_image_message = {
     "message": {
@@ -116,6 +129,7 @@
         ],
     },
     "count": 266,
+    "count_omni": 266,
 }
 
 MESSAGE_COUNTS = [
diff --git a/tests/test_modelhelper.py b/tests/test_modelhelper.py
@@ -12,6 +12,7 @@ def test_get_token_limit():
     assert get_token_limit("gpt-3.5-turbo-16k") == 16000
     assert get_token_limit("gpt-4") == 8100
     assert get_token_limit("gpt-4-32k") == 32000
+    assert get_token_limit("gpt-4o") == 128000
 
 
 def test_get_token_limit_error():
@@ -27,15 +28,16 @@ def test_get_token_limit_default(caplog):
 
 # parameterize the model and the expected number of tokens
 @pytest.mark.parametrize(
-    "model",
+    "model, count_key",
     [
-        "gpt-35-turbo",
-        "gpt-3.5-turbo",
-        "gpt-35-turbo-16k",
-        "gpt-3.5-turbo-16k",
-        "gpt-4",
-        "gpt-4-32k",
-        "gpt-4v",
+        ("gpt-35-turbo", "count"),
+        ("gpt-3.5-turbo", "count"),
+        ("gpt-35-turbo-16k", "count"),
+        ("gpt-3.5-turbo-16k", "count"),
+        ("gpt-4", "count"),
+        ("gpt-4-32k", "count"),
+        ("gpt-4v", "count"),
+        ("gpt-4o", "count_omni"),
     ],
 )
 @pytest.mark.parametrize(
@@ -46,13 +48,19 @@ def test_get_token_limit_default(caplog):
         system_message_with_name,
     ],
 )
-def test_count_tokens_for_message(model: str, message: dict):
-    assert count_tokens_for_message(model, message["message"]) == message["count"]
+def test_count_tokens_for_message(model, count_key, message):
+    assert count_tokens_for_message(model, message["message"]) == message[count_key]
 
 
-def test_count_tokens_for_message_list():
-    model = "gpt-4"
-    assert count_tokens_for_message(model, text_and_image_message["message"]) == text_and_image_message["count"]
+@pytest.mark.parametrize(
+    "model, count_key",
+    [
+        ("gpt-4", "count"),
+        ("gpt-4o", "count_omni"),
+    ],
+)
+def test_count_tokens_for_message_list(model, count_key):
+    assert count_tokens_for_message(model, text_and_image_message["message"]) == text_and_image_message[count_key]
 
 
 def test_count_tokens_for_message_error():
diff --git a/tests/verify_openai.py b/tests/verify_openai.py
@@ -39,16 +39,19 @@
 
 # Test the token count for each message
 for message_count_pair in MESSAGE_COUNTS:
-    response = client.chat.completions.create(
-        model=MODEL_NAME,
-        temperature=0.7,
-        n=1,
-        messages=[message_count_pair["message"]],  # type: ignore[list-item]
-    )
+    for model, expected_tokens in [
+        (MODEL_NAME, message_count_pair["count"]),
+        ("gpt-4o", message_count_pair["count_omni"]),
+    ]:
+        response = client.chat.completions.create(
+            model=model,
+            temperature=0.7,
+            n=1,
+            messages=[message_count_pair["message"]],  # type: ignore[list-item]
+        )
 
-    print(message_count_pair["message"])
-    expected_tokens = message_count_pair["count"]
-    assert response.usage is not None, "Expected usage to be present"
-    assert (
-        response.usage.prompt_tokens == expected_tokens
-    ), f"Expected {expected_tokens} tokens, got {response.usage.prompt_tokens}"
+        print(message_count_pair["message"])
+        assert response.usage is not None, "Expected usage to be present"
+        assert (
+            response.usage.prompt_tokens == expected_tokens
+        ), f"Expected {expected_tokens} tokens, got {response.usage.prompt_tokens} for model {model}"

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@`
`21`	`21`	`"gpt-4": 8100,`
`22`	`22`	`"gpt-4-32k": 32000,`
`23`	`23`	`"gpt-4v": 128000,`
	`24`	`+ "gpt-4o": 128000,`
`24`	`25`	`}`
`25`	`26`
`26`	`27`