Skip to content

Commit f66f5d1

Browse files
committed
Shrink images under limit
1 parent baf44be commit f66f5d1

File tree

3 files changed

+54
-89
lines changed

3 files changed

+54
-89
lines changed

interpreter/core/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def __init__(
4848
debug=False,
4949
max_output=2800,
5050
safe_mode="off",
51-
shrink_images=False,
51+
shrink_images=True,
5252
loop=False,
5353
loop_message="""Proceed. You CAN run code on my machine. If the entire task I asked for is done, say exactly 'The task is done.' If you need some specific information (like username or password) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going.""",
5454
loop_breakers=[

interpreter/core/llm/utils/convert_to_openai_messages.py

Lines changed: 53 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -123,95 +123,18 @@ def convert_to_openai_messages(
123123
else:
124124
extension = "png"
125125

126-
# Construct the content string
127-
content = f"data:image/{extension};base64,{message['content']}"
128-
129-
if shrink_images:
130-
try:
131-
# Decode the base64 image
132-
img_data = base64.b64decode(message["content"])
133-
img = Image.open(io.BytesIO(img_data))
134-
135-
# Resize the image if it's width is more than 1024
136-
if img.width > 1024:
137-
new_height = int(img.height * 1024 / img.width)
138-
img = img.resize((1024, new_height))
139-
140-
# Convert the image back to base64
141-
buffered = io.BytesIO()
142-
img.save(buffered, format=extension)
143-
img_str = base64.b64encode(buffered.getvalue()).decode(
144-
"utf-8"
145-
)
146-
content = f"data:image/{extension};base64,{img_str}"
147-
except:
148-
# This should be non blocking. It's not required
149-
# print("Failed to shrink image. Proceeding with original image size.")
150-
pass
151-
152-
# Must be less than 5mb
153-
# Calculate the size of the original binary data in bytes
154-
content_size_bytes = len(message["content"]) * 3 / 4
155-
156-
# Convert the size to MB
157-
content_size_mb = content_size_bytes / (1024 * 1024)
158-
159-
# If the content size is greater than 5 MB, resize the image
160-
if content_size_mb > 5:
161-
try:
162-
# Decode the base64 image
163-
img_data = base64.b64decode(message["content"])
164-
img = Image.open(io.BytesIO(img_data))
165-
166-
# Calculate the size of the original binary data in bytes
167-
content_size_bytes = len(img_data)
168-
169-
# Convert the size to MB
170-
content_size_mb = content_size_bytes / (1024 * 1024)
171-
172-
# Run in a loop to make SURE it's less than 5mb
173-
while content_size_mb > 5:
174-
# Calculate the scale factor needed to reduce the image size to 5 MB
175-
scale_factor = (5 / content_size_mb) ** 0.5
176-
177-
# Calculate the new dimensions
178-
new_width = int(img.width * scale_factor)
179-
new_height = int(img.height * scale_factor)
180-
181-
# Resize the image
182-
img = img.resize((new_width, new_height))
183-
184-
# Convert the image back to base64
185-
buffered = io.BytesIO()
186-
img.save(buffered, format=extension)
187-
img_str = base64.b64encode(buffered.getvalue()).decode(
188-
"utf-8"
189-
)
190-
191-
# Set the content
192-
content = f"data:image/{extension};base64,{img_str}"
193-
194-
# Recalculate the size of the content in bytes
195-
content_size_bytes = len(content) * 3 / 4
196-
197-
# Convert the size to MB
198-
content_size_mb = content_size_bytes / (1024 * 1024)
199-
except:
200-
# This should be non blocking. It's not required
201-
# print("Failed to shrink image. Proceeding with original image size.")
202-
pass
126+
encoded_string = message["content"]
203127

204128
elif message["format"] == "path":
205129
# Convert to base64
206130
image_path = message["content"]
207-
file_extension = image_path.split(".")[-1]
131+
extension = image_path.split(".")[-1]
208132

209133
with open(image_path, "rb") as image_file:
210134
encoded_string = base64.b64encode(image_file.read()).decode(
211135
"utf-8"
212136
)
213137

214-
content = f"data:image/{file_extension};base64,{encoded_string}"
215138
else:
216139
# Probably would be better to move this to a validation pass
217140
# Near core, through the whole messages object
@@ -222,17 +145,60 @@ def convert_to_openai_messages(
222145
f"Unrecognized image format: {message['format']}"
223146
)
224147

225-
# Calculate the size of the original binary data in bytes
226-
content_size_bytes = len(content) * 3 / 4
148+
content = f"data:image/{extension};base64,{encoded_string}"
149+
150+
if shrink_images:
151+
# Shrink to less than 5mb
152+
153+
# Calculate size
154+
content_size_bytes = len(content) * 3 / 4
155+
156+
# Convert the size to MB
157+
content_size_mb = content_size_bytes / (1024 * 1024)
158+
159+
# If the content size is greater than 5 MB, resize the image
160+
if content_size_mb > 5:
161+
# Decode the base64 image
162+
img_data = base64.b64decode(encoded_string)
163+
img = Image.open(io.BytesIO(img_data))
164+
165+
# Calculate the size of the original binary data in bytes
166+
content_size_bytes = len(img_data)
227167

228-
# Convert the size to MB
229-
content_size_mb = content_size_bytes / (1024 * 1024)
168+
# Convert the size to MB
169+
content_size_mb = content_size_bytes / (1024 * 1024)
230170

231-
# Print the size of the content in MB
232-
# print(f"File size: {content_size_mb} MB")
171+
# Run in a loop to make SURE it's less than 5mb
172+
for _ in range(10):
173+
# Calculate the scale factor needed to reduce the image size to 4.9 MB
174+
scale_factor = (4.9 / content_size_mb) ** 0.5
233175

234-
# Assert that the content size is under 20 MB
235-
assert content_size_mb < 20, "Content size exceeds 20 MB"
176+
# Calculate the new dimensions
177+
new_width = int(img.width * scale_factor)
178+
new_height = int(img.height * scale_factor)
179+
180+
# Resize the image
181+
img = img.resize((new_width, new_height))
182+
183+
# Convert the image back to base64
184+
buffered = io.BytesIO()
185+
img.save(buffered, format=extension)
186+
encoded_string = base64.b64encode(
187+
buffered.getvalue()
188+
).decode("utf-8")
189+
190+
# Set the content
191+
content = f"data:image/{extension};base64,{encoded_string}"
192+
193+
# Recalculate the size of the content in bytes
194+
content_size_bytes = len(content) * 3 / 4
195+
196+
# Convert the size to MB
197+
content_size_mb = content_size_bytes / (1024 * 1024)
198+
else:
199+
print(
200+
"Attempted to shrink the image but failed. Sending to the LLM anyway."
201+
)
236202

237203
new_message = {
238204
"role": "user",

interpreter/terminal_interface/profiles/defaults/os.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
interpreter.os = True
66
interpreter.llm.supports_vision = True
7-
# interpreter.shrink_images = True # Faster but less accurate
87

98
interpreter.llm.model = "gpt-4o"
109

0 commit comments

Comments
 (0)