Shrink images under limit

KillianLucas · KillianLucas · commit f66f5d1c5f26 · 2024-07-29T13:52:58.000-07:00
diff --git a/interpreter/core/core.py b/interpreter/core/core.py
@@ -48,7 +48,7 @@ def __init__(
         debug=False,
         max_output=2800,
         safe_mode="off",
-        shrink_images=False,
+        shrink_images=True,
         loop=False,
         loop_message="""Proceed. You CAN run code on my machine. If the entire task I asked for is done, say exactly 'The task is done.' If you need some specific information (like username or password) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going.""",
         loop_breakers=[
diff --git a/interpreter/core/llm/utils/convert_to_openai_messages.py b/interpreter/core/llm/utils/convert_to_openai_messages.py
@@ -123,95 +123,18 @@ def convert_to_openai_messages(
                     else:
                         extension = "png"
 
-                    # Construct the content string
-                    content = f"data:image/{extension};base64,{message['content']}"
-
-                    if shrink_images:
-                        try:
-                            # Decode the base64 image
-                            img_data = base64.b64decode(message["content"])
-                            img = Image.open(io.BytesIO(img_data))
-
-                            # Resize the image if it's width is more than 1024
-                            if img.width > 1024:
-                                new_height = int(img.height * 1024 / img.width)
-                                img = img.resize((1024, new_height))
-
-                            # Convert the image back to base64
-                            buffered = io.BytesIO()
-                            img.save(buffered, format=extension)
-                            img_str = base64.b64encode(buffered.getvalue()).decode(
-                                "utf-8"
-                            )
-                            content = f"data:image/{extension};base64,{img_str}"
-                        except:
-                            # This should be non blocking. It's not required
-                            # print("Failed to shrink image. Proceeding with original image size.")
-                            pass
-
-                    # Must be less than 5mb
-                    # Calculate the size of the original binary data in bytes
-                    content_size_bytes = len(message["content"]) * 3 / 4
-
-                    # Convert the size to MB
-                    content_size_mb = content_size_bytes / (1024 * 1024)
-
-                    # If the content size is greater than 5 MB, resize the image
-                    if content_size_mb > 5:
-                        try:
-                            # Decode the base64 image
-                            img_data = base64.b64decode(message["content"])
-                            img = Image.open(io.BytesIO(img_data))
-
-                            # Calculate the size of the original binary data in bytes
-                            content_size_bytes = len(img_data)
-
-                            # Convert the size to MB
-                            content_size_mb = content_size_bytes / (1024 * 1024)
-
-                            # Run in a loop to make SURE it's less than 5mb
-                            while content_size_mb > 5:
-                                # Calculate the scale factor needed to reduce the image size to 5 MB
-                                scale_factor = (5 / content_size_mb) ** 0.5
-
-                                # Calculate the new dimensions
-                                new_width = int(img.width * scale_factor)
-                                new_height = int(img.height * scale_factor)
-
-                                # Resize the image
-                                img = img.resize((new_width, new_height))
-
-                                # Convert the image back to base64
-                                buffered = io.BytesIO()
-                                img.save(buffered, format=extension)
-                                img_str = base64.b64encode(buffered.getvalue()).decode(
-                                    "utf-8"
-                                )
-
-                                # Set the content
-                                content = f"data:image/{extension};base64,{img_str}"
-
-                                # Recalculate the size of the content in bytes
-                                content_size_bytes = len(content) * 3 / 4
-
-                                # Convert the size to MB
-                                content_size_mb = content_size_bytes / (1024 * 1024)
-                        except:
-                            # This should be non blocking. It's not required
-                            # print("Failed to shrink image. Proceeding with original image size.")
-                            pass
+                    encoded_string = message["content"]
 
                 elif message["format"] == "path":
                     # Convert to base64
                     image_path = message["content"]
-                    file_extension = image_path.split(".")[-1]
+                    extension = image_path.split(".")[-1]
 
                     with open(image_path, "rb") as image_file:
                         encoded_string = base64.b64encode(image_file.read()).decode(
                             "utf-8"
                         )
 
-                    content = f"data:image/{file_extension};base64,{encoded_string}"
                 else:
                     # Probably would be better to move this to a validation pass
                     # Near core, through the whole messages object
@@ -222,17 +145,60 @@ def convert_to_openai_messages(
                             f"Unrecognized image format: {message['format']}"
                         )
 
-                # Calculate the size of the original binary data in bytes
-                content_size_bytes = len(content) * 3 / 4
+                content = f"data:image/{extension};base64,{encoded_string}"
+
+                if shrink_images:
+                    # Shrink to less than 5mb
+
+                    # Calculate size
+                    content_size_bytes = len(content) * 3 / 4
+
+                    # Convert the size to MB
+                    content_size_mb = content_size_bytes / (1024 * 1024)
+
+                    # If the content size is greater than 5 MB, resize the image
+                    if content_size_mb > 5:
+                        # Decode the base64 image
+                        img_data = base64.b64decode(encoded_string)
+                        img = Image.open(io.BytesIO(img_data))
+
+                        # Calculate the size of the original binary data in bytes
+                        content_size_bytes = len(img_data)
 
-                # Convert the size to MB
-                content_size_mb = content_size_bytes / (1024 * 1024)
+                        # Convert the size to MB
+                        content_size_mb = content_size_bytes / (1024 * 1024)
 
-                # Print the size of the content in MB
-                # print(f"File size: {content_size_mb} MB")
+                        # Run in a loop to make SURE it's less than 5mb
+                        for _ in range(10):
+                            # Calculate the scale factor needed to reduce the image size to 4.9 MB
+                            scale_factor = (4.9 / content_size_mb) ** 0.5
 
-                # Assert that the content size is under 20 MB
-                assert content_size_mb < 20, "Content size exceeds 20 MB"
+                            # Calculate the new dimensions
+                            new_width = int(img.width * scale_factor)
+                            new_height = int(img.height * scale_factor)
+
+                            # Resize the image
+                            img = img.resize((new_width, new_height))
+
+                            # Convert the image back to base64
+                            buffered = io.BytesIO()
+                            img.save(buffered, format=extension)
+                            encoded_string = base64.b64encode(
+                                buffered.getvalue()
+                            ).decode("utf-8")
+
+                            # Set the content
+                            content = f"data:image/{extension};base64,{encoded_string}"
+
+                            # Recalculate the size of the content in bytes
+                            content_size_bytes = len(content) * 3 / 4
+
+                            # Convert the size to MB
+                            content_size_mb = content_size_bytes / (1024 * 1024)
+                        else:
+                            print(
+                                "Attempted to shrink the image but failed. Sending to the LLM anyway."
+                            )
 
                 new_message = {
                     "role": "user",
diff --git a/interpreter/terminal_interface/profiles/defaults/os.py b/interpreter/terminal_interface/profiles/defaults/os.py
@@ -4,7 +4,6 @@
 
 interpreter.os = True
 interpreter.llm.supports_vision = True
-# interpreter.shrink_images = True # Faster but less accurate
 
 interpreter.llm.model = "gpt-4o"