Skip to content

Commit 690ab5d

Browse files
authored
Improvements to token counting for images and type annotations (#1244)
* Improve token counting * Update tests * Adding pillow dependency to backend * Update pillow-types * Add source in comment * Comments and types * Adding comment
1 parent 51b6453 commit 690ab5d

File tree

14 files changed

+152
-18
lines changed

14 files changed

+152
-18
lines changed

app/backend/approaches/chatapproach.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def get_messages_from_history(
110110

111111
total_token_count = 0
112112
for existing_message in message_builder.messages:
113-
total_token_count += message_builder.count_tokens_for_message(dict(existing_message)) # type: ignore
113+
total_token_count += message_builder.count_tokens_for_message(existing_message)
114114

115115
newest_to_oldest = list(reversed(history[:-1]))
116116
for message in newest_to_oldest:

app/backend/core/imageshelper.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
import base64
2+
import math
23
import os
4+
import re
5+
from io import BytesIO
36
from typing import Optional
47

58
from azure.storage.blob.aio import ContainerClient
9+
from PIL import Image
610
from typing_extensions import Literal, Required, TypedDict
711

812
from approaches.approach import Document
@@ -34,3 +38,51 @@ async def fetch_image(blob_container_client: ContainerClient, result: Document)
3438
else:
3539
return None
3640
return None
41+
42+
43+
def get_image_dims(image_uri: str) -> tuple[int, int]:
44+
# From https://github.com/openai/openai-cookbook/pull/881/files
45+
if re.match(r"data:image\/\w+;base64", image_uri):
46+
image_uri = re.sub(r"data:image\/\w+;base64,", "", image_uri)
47+
image = Image.open(BytesIO(base64.b64decode(image_uri)))
48+
return image.size
49+
else:
50+
raise ValueError("Image must be a base64 string.")
51+
52+
53+
def calculate_image_token_cost(image_uri: str, detail: str = "auto") -> int:
54+
# From https://github.com/openai/openai-cookbook/pull/881/files
55+
# Based on https://platform.openai.com/docs/guides/vision
56+
LOW_DETAIL_COST = 85
57+
HIGH_DETAIL_COST_PER_TILE = 170
58+
ADDITIONAL_COST = 85
59+
60+
if detail == "auto":
61+
# assume high detail for now
62+
detail = "high"
63+
64+
if detail == "low":
65+
# Low detail images have a fixed cost
66+
return LOW_DETAIL_COST
67+
elif detail == "high":
68+
# Calculate token cost for high detail images
69+
width, height = get_image_dims(image_uri)
70+
# Check if resizing is needed to fit within a 2048 x 2048 square
71+
if max(width, height) > 2048:
72+
# Resize dimensions to fit within a 2048 x 2048 square
73+
ratio = 2048 / max(width, height)
74+
width = int(width * ratio)
75+
height = int(height * ratio)
76+
# Further scale down to 768px on the shortest side
77+
if min(width, height) > 768:
78+
ratio = 768 / min(width, height)
79+
width = int(width * ratio)
80+
height = int(height * ratio)
81+
# Calculate the number of 512px squares
82+
num_squares = math.ceil(width / 512) * math.ceil(height / 512)
83+
# Calculate the total token cost
84+
total_cost = num_squares * HIGH_DETAIL_COST_PER_TILE + ADDITIONAL_COST
85+
return total_cost
86+
else:
87+
# Invalid detail_option
88+
raise ValueError("Invalid value for detail parameter. Use 'low' or 'high'.")

app/backend/core/messagebuilder.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import unicodedata
2+
from collections.abc import Mapping
23
from typing import List, Union
34

45
from openai.types.chat import (
@@ -52,7 +53,7 @@ def insert_message(self, role: str, content: Union[str, List[ChatCompletionConte
5253
raise ValueError(f"Invalid role: {role}")
5354
self.messages.insert(index, message)
5455

55-
def count_tokens_for_message(self, message: dict[str, str]):
56+
def count_tokens_for_message(self, message: Mapping[str, object]):
5657
return num_tokens_from_messages(message, self.model)
5758

5859
def normalize_content(self, content: Union[str, List[ChatCompletionContentPartParam]]):

app/backend/core/modelhelper.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
from __future__ import annotations
22

3+
from collections.abc import Mapping
4+
35
import tiktoken
46

7+
from .imageshelper import calculate_image_token_cost
8+
59
MODELS_2_TOKEN_LIMITS = {
610
"gpt-35-turbo": 4000,
711
"gpt-3.5-turbo": 4000,
@@ -22,11 +26,11 @@ def get_token_limit(model_id: str) -> int:
2226
return MODELS_2_TOKEN_LIMITS[model_id]
2327

2428

25-
def num_tokens_from_messages(message: dict[str, str], model: str) -> int:
29+
def num_tokens_from_messages(message: Mapping[str, object], model: str) -> int:
2630
"""
2731
Calculate the number of tokens required to encode a message.
2832
Args:
29-
message (dict): The message to encode, represented as a dictionary.
33+
message (Mapping): The message to encode, in a dictionary-like object.
3034
model (str): The name of the model to use for encoding.
3135
Returns:
3236
int: The total number of tokens required to encode the message.
@@ -39,14 +43,19 @@ def num_tokens_from_messages(message: dict[str, str], model: str) -> int:
3943

4044
encoding = tiktoken.encoding_for_model(get_oai_chatmodel_tiktok(model))
4145
num_tokens = 2 # For "role" and "content" keys
42-
for key, value in message.items():
46+
for value in message.values():
4347
if isinstance(value, list):
44-
for v in value:
45-
# TODO: Update token count for images https://github.com/openai/openai-cookbook/pull/881/files
46-
if isinstance(v, str):
47-
num_tokens += len(encoding.encode(v))
48-
else:
48+
# For GPT-4-vision support, based on https://github.com/openai/openai-cookbook/pull/881/files
49+
for item in value:
50+
num_tokens += len(encoding.encode(item["type"]))
51+
if item["type"] == "text":
52+
num_tokens += len(encoding.encode(item["text"]))
53+
elif item["type"] == "image_url":
54+
num_tokens += calculate_image_token_cost(item["image_url"]["url"], item["image_url"]["detail"])
55+
elif isinstance(value, str):
4956
num_tokens += len(encoding.encode(value))
57+
else:
58+
raise ValueError(f"Could not encode unsupported message value type: {type(value)}")
5059
return num_tokens
5160

5261

app/backend/requirements.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,5 @@ msal
1717
azure-keyvault-secrets
1818
cryptography
1919
python-jose[cryptography]
20+
Pillow
21+
types-Pillow

app/backend/requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,8 @@ pandas==2.2.0
257257
# via openai
258258
pandas-stubs==2.1.4.231227
259259
# via openai
260+
pillow==10.2.0
261+
# via -r requirements.in
260262
portalocker==2.8.2
261263
# via msal-extensions
262264
priority==2.0.0
@@ -315,6 +317,8 @@ tiktoken==0.5.2
315317
# via -r requirements.in
316318
tqdm==4.66.1
317319
# via openai
320+
types-pillow==10.2.0.20240206
321+
# via -r requirements.in
318322
types-pytz==2023.4.0.20240130
319323
# via pandas-stubs
320324
typing-extensions==4.9.0

scripts/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ tiktoken==0.5.2
174174
# via -r requirements.in
175175
tqdm==4.66.1
176176
# via openai
177-
types-pillow==10.2.0.20240125
177+
types-pillow==10.2.0.20240206
178178
# via -r requirements.in
179179
types-pytz==2023.4.0.20240130
180180
# via pandas-stubs

tests/image_large.png

310 KB
Loading

tests/mocks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def __init__(self):
2828
)
2929

3030
async def readall(self):
31-
return b"\x89PNG\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52\x00\x00\x00\x01\x00\x00\x00\x01\x01\x00\x00\x00\x00\x37\x6e\xf9\x24\x00\x00\x00\x0a\x49\x44\x41\x54\x78\x9c\x63\x00\x01\x00\x00\x05\x00\x01\x0d\x0d\x2d\xba\x1b\x00\x00\x00\x00\x49\x45\x4e\x44\xae\x42\x60\x82"
31+
return b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\xdac\xfc\xcf\xf0\xbf\x1e\x00\x06\x83\x02\x7f\x94\xad\xd0\xeb\x00\x00\x00\x00IEND\xaeB`\x82"
3232

3333

3434
class MockKeyVaultSecret:

tests/snapshots/test_app/test_ask_vision/client1/result.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"images": [
77
{
88
"detail": "auto",
9-
"url": "data:image/png;base64,iVBOR1BORw0KGgoAAAANSUhEUgAAAAEAAAABAQAAAAA3bvkkAAAACklEQVR4nGMAAQAABQABDQ0tuhsAAAAASUVORK5CYII="
9+
"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z/C/HgAGgwJ/lK3Q6wAAAABJRU5ErkJggg=="
1010
}
1111
],
1212
"text": [
@@ -46,7 +46,7 @@
4646
{
4747
"description": [
4848
"{'role': 'system', 'content': \"You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images. Each image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:<file_name> Each text source starts in a new line and has the file name followed by colon and the actual information Always include the source name from the image or text for each fact you use in the response in the format: [filename] Answer the following question using only the data provided in the sources below. For tabular information return it as an html table. Do not return markdown format. The text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned If you cannot answer using the sources below, say you don't know. Return just the answer without any input texts \"}",
49-
"{'role': 'user', 'content': [{'text': 'Are interest rates high?', 'type': 'text'}, {'text': 'Financial Market Analysis Report 2023-6.png: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions ', 'type': 'text'}, {'image_url': {'url': 'data:image/png;base64,iVBOR1BORw0KGgoAAAANSUhEUgAAAAEAAAABAQAAAAA3bvkkAAAACklEQVR4nGMAAQAABQABDQ0tuhsAAAAASUVORK5CYII=', 'detail': 'auto'}, 'type': 'image_url'}]}"
49+
"{'role': 'user', 'content': [{'text': 'Are interest rates high?', 'type': 'text'}, {'text': 'Financial Market Analysis Report 2023-6.png: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions ', 'type': 'text'}, {'image_url': {'url': 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z/C/HgAGgwJ/lK3Q6wAAAABJRU5ErkJggg==', 'detail': 'auto'}, 'type': 'image_url'}]}"
5050
],
5151
"props": null,
5252
"title": "Prompt"

0 commit comments

Comments
 (0)