Fixed tool using bugs

KillianLucas · web-flow · commit 75596becfaec · 2024-07-26T13:44:34.000-07:00
diff --git a/interpreter/core/async_core.py b/interpreter/core/async_core.py
@@ -34,6 +34,9 @@
     pass
 
 
+complete_message = {"role": "server", "type": "status", "content": "complete"}
+
+
 class AsyncInterpreter(OpenInterpreter):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -129,9 +132,7 @@ def respond(self, run_code=None):
 
                 self.output_queue.sync_q.put(chunk)
 
-            self.output_queue.sync_q.put(
-                {"role": "server", "type": "status", "content": "complete"}
-            )
+            self.output_queue.sync_q.put(complete_message)
         except Exception as e:
             error = traceback.format_exc() + "\n" + str(e)
             error_message = {
@@ -140,6 +141,7 @@ def respond(self, run_code=None):
                 "content": traceback.format_exc() + "\n" + str(e),
             }
             self.output_queue.sync_q.put(error_message)
+            self.output_queue.sync_q.put(complete_message)
             print("\n\n--- SENT ERROR: ---\n\n")
             print(error)
             print("\n\n--- (ERROR ABOVE WAS SENT) ---\n\n")
@@ -428,6 +430,7 @@ async def receive_input():
                             "content": traceback.format_exc() + "\n" + str(e),
                         }
                         await websocket.send_text(json.dumps(error_message))
+                        await websocket.send_text(json.dumps(complete_message))
                         print("\n\n--- SENT ERROR: ---\n\n")
                         print(error)
                         print("\n\n--- (ERROR ABOVE WAS SENT) ---\n\n")
@@ -496,6 +499,7 @@ async def send_output():
                             "content": traceback.format_exc() + "\n" + str(e),
                         }
                         await websocket.send_text(json.dumps(error_message))
+                        await websocket.send_text(json.dumps(complete_message))
                         print("\n\n--- SENT ERROR: ---\n\n")
                         print(error)
                         print("\n\n--- (ERROR ABOVE WAS SENT) ---\n\n")
@@ -510,6 +514,7 @@ async def send_output():
                     "content": traceback.format_exc() + "\n" + str(e),
                 }
                 await websocket.send_text(json.dumps(error_message))
+                await websocket.send_text(json.dumps(complete_message))
                 print("\n\n--- SENT ERROR: ---\n\n")
                 print(error)
                 print("\n\n--- (ERROR ABOVE WAS SENT) ---\n\n")
diff --git a/interpreter/core/core.py b/interpreter/core/core.py
@@ -322,6 +322,22 @@ def is_ephemeral(chunk):
                 if chunk["content"] == "":
                     continue
 
+                # If active_line is None, we finished running code.
+                if (
+                    chunk.get("format") == "active_line"
+                    and chunk.get("content", "") == None
+                ):
+                    # If output wasn't yet produced, add an empty output
+                    if self.messages[-1]["role"] != "computer":
+                        self.messages.append(
+                            {
+                                "role": "computer",
+                                "type": "console",
+                                "format": "output",
+                                "content": "",
+                            }
+                        )
+
                 # Handle the special "confirmation" chunk, which neither triggers a flag or creates a message
                 if chunk["type"] == "confirmation":
                     # Emit a end flag for the last message type, and reset last_flag_base
@@ -334,14 +350,14 @@ def is_ephemeral(chunk):
 
                     # We want to append this now, so even if content is never filled, we know that the execution didn't produce output.
                     # ... rethink this though.
-                    self.messages.append(
-                        {
-                            "role": "computer",
-                            "type": "console",
-                            "format": "output",
-                            "content": "",
-                        }
-                    )
+                    # self.messages.append(
+                    #     {
+                    #         "role": "computer",
+                    #         "type": "console",
+                    #         "format": "output",
+                    #         "content": "",
+                    #     }
+                    # )
                     continue
 
                 # Check if the chunk's role, type, and format (if present) match the last_flag_base
@@ -362,7 +378,19 @@ def is_ephemeral(chunk):
                     # If they match, append the chunk's content to the current message's content
                     # (Except active_line, which shouldn't be stored)
                     if not is_ephemeral(chunk):
-                        self.messages[-1]["content"] += chunk["content"]
+                        if any(
+                            [
+                                (property in self.messages[-1])
+                                and (
+                                    self.messages[-1].get(property)
+                                    != chunk.get(property)
+                                )
+                                for property in ["role", "type", "format"]
+                            ]
+                        ):
+                            self.messages.append(chunk)
+                        else:
+                            self.messages[-1]["content"] += chunk["content"]
                 else:
                     # If they don't match, yield a end message for the last message type and a start message for the new one
                     if last_flag_base:
diff --git a/interpreter/core/llm/run_tool_calling_llm.py b/interpreter/core/llm/run_tool_calling_llm.py
@@ -27,6 +27,74 @@
 }
 
 
+def process_messages(messages):
+    processed_messages = []
+    last_tool_id = 0
+
+    i = 0
+    while i < len(messages):
+        message = messages[i]
+
+        if message.get("function_call"):
+            last_tool_id += 1
+            tool_id = f"toolu_{last_tool_id}"
+
+            # Convert function_call to tool_calls
+            function = message.pop("function_call")
+            message["tool_calls"] = [
+                {"id": tool_id, "type": "function", "function": function}
+            ]
+            processed_messages.append(message)
+
+            # Process the next message if it's a function response
+            if i + 1 < len(messages) and messages[i + 1].get("role") == "function":
+                next_message = messages[i + 1].copy()
+                next_message["role"] = "tool"
+                next_message["tool_call_id"] = tool_id
+                processed_messages.append(next_message)
+                i += 1  # Skip the next message as we've already processed it
+            else:
+                # Add an empty tool response if there isn't one
+                processed_messages.append(
+                    {"role": "tool", "tool_call_id": tool_id, "content": ""}
+                )
+
+        elif message.get("role") == "function":
+            # This handles orphaned function responses
+            last_tool_id += 1
+            tool_id = f"toolu_{last_tool_id}"
+
+            # Add a tool call before this orphaned tool response
+            processed_messages.append(
+                {
+                    "role": "assistant",
+                    "tool_calls": [
+                        {
+                            "id": tool_id,
+                            "type": "function",
+                            "function": {
+                                "name": "execute",
+                                "arguments": "# Automated tool call to fetch more output, triggered by the user.",
+                            },
+                        }
+                    ],
+                }
+            )
+
+            # Process the function response
+            message["role"] = "tool"
+            message["tool_call_id"] = tool_id
+            processed_messages.append(message)
+
+        else:
+            # For non-tool-related messages, just add them as is
+            processed_messages.append(message)
+
+        i += 1
+
+    return processed_messages
+
+
 def run_tool_calling_llm(llm, request_params):
     ## Setup
 
@@ -36,43 +104,72 @@ def run_tool_calling_llm(llm, request_params):
     ]
     request_params["tools"] = [tool_schema]
 
-    last_tool_id = 0
-    for i, message in enumerate(request_params["messages"]):
-        if "function_call" in message:
-            last_tool_id += 1
-            function = message.pop("function_call")
-            message["tool_calls"] = [
-                {
-                    "id": "toolu_" + str(last_tool_id),
-                    "type": "function",
-                    "function": function,
-                }
-            ]
-        if message["role"] == "function":
-            if i != 0 and request_params["messages"][i - 1]["role"] == "tool":
-                request_params["messages"][i]["content"] += message["content"]
-                message = None
-            else:
-                message["role"] = "tool"
-                message["tool_call_id"] = "toolu_" + str(last_tool_id)
-
-    request_params["messages"] = [m for m in request_params["messages"] if m != None]
-
-    new_messages = []
-    for i, message in enumerate(request_params["messages"]):
-        new_messages.append(message)
-        if "tool_calls" in message:
-            tool_call_id = message["tool_calls"][0]["id"]
-            if not any(
-                m
-                for m in request_params["messages"]
-                if m.get("role") == "tool" and m.get("tool_call_id") == tool_call_id
-            ):
-                new_messages.append(
-                    {"role": "tool", "tool_call_id": tool_call_id, "content": ""}
-                )
+    import pprint
+
+    pprint.pprint(
+        [str(m)[:600] if len(str(m)) > 1000 else m for m in request_params["messages"]]
+    )
+
+    print("PROCESSING")
+
+    request_params["messages"] = process_messages(request_params["messages"])
+
+    # # This makes any role: tool have the ID of the last tool call
+    # last_tool_id = 0
+    # for i, message in enumerate(request_params["messages"]):
+    #     if "function_call" in message:
+    #         last_tool_id += 1
+    #         function = message.pop("function_call")
+    #         message["tool_calls"] = [
+    #             {
+    #                 "id": "toolu_" + str(last_tool_id),
+    #                 "type": "function",
+    #                 "function": function,
+    #             }
+    #         ]
+    #     if message["role"] == "function":
+    #         if i != 0 and request_params["messages"][i - 1]["role"] == "tool":
+    #             request_params["messages"][i]["content"] += message["content"]
+    #             message = None
+    #         else:
+    #             message["role"] = "tool"
+    #             message["tool_call_id"] = "toolu_" + str(last_tool_id)
+    # request_params["messages"] = [m for m in request_params["messages"] if m != None]
+
+    # This adds an empty tool response for any tool call without a tool response
+    # new_messages = []
+    # for i, message in enumerate(request_params["messages"]):
+    #     new_messages.append(message)
+    #     if "tool_calls" in message:
+    #         tool_call_id = message["tool_calls"][0]["id"]
+    #         if not any(
+    #             m
+    #             for m in request_params["messages"]
+    #             if m.get("role") == "tool" and m.get("tool_call_id") == tool_call_id
+    #         ):
+    #             new_messages.append(
+    #                 {"role": "tool", "tool_call_id": tool_call_id, "content": ""}
+    #             )
+    # request_params["messages"] = new_messages
+
+    # messages = request_params["messages"]
+    # for i in range(len(messages)):
+    #     if messages[i]["role"] == "user" and isinstance(messages[i]["content"], list):
+    #         # Found an image from the user
+    #         image_message = messages[i]
+    #         j = i + 1
+    #         while j < len(messages) and messages[j]["role"] == "tool":
+    #             # Move the image down until it's after all the role: tools
+    #             j += 1
+    #         messages.insert(j, image_message)
+    #         del messages[i]
+    # request_params["messages"] = messages
+
+    import pprint
 
-    request_params["messages"] = new_messages
+    pprint.pprint(
+        [str(m)[:600] if len(str(m)) > 1000 else m for m in request_params["messages"]]
+    )
 
     # Add OpenAI's recommended function message
     # request_params["messages"][0][
diff --git a/interpreter/core/llm/utils/convert_to_openai_messages.py b/interpreter/core/llm/utils/convert_to_openai_messages.py
@@ -155,6 +155,58 @@ def convert_to_openai_messages(
                             # print("Failed to shrink image. Proceeding with original image size.")
                             pass
 
+                    # Must be less than 5mb
+                    # Calculate the size of the original binary data in bytes
+                    content_size_bytes = len(message["content"]) * 3 / 4
+
+                    # Convert the size to MB
+                    content_size_mb = content_size_bytes / (1024 * 1024)
+
+                    # If the content size is greater than 5 MB, resize the image
+                    if content_size_mb > 5:
+                        try:
+                            # Decode the base64 image
+                            img_data = base64.b64decode(message["content"])
+                            img = Image.open(io.BytesIO(img_data))
+
+                            # Calculate the size of the original binary data in bytes
+                            content_size_bytes = len(img_data)
+
+                            # Convert the size to MB
+                            content_size_mb = content_size_bytes / (1024 * 1024)
+
+                            # Run in a loop to make SURE it's less than 5mb
+                            while content_size_mb > 5:
+                                # Calculate the scale factor needed to reduce the image size to 5 MB
+                                scale_factor = (5 / content_size_mb) ** 0.5
+
+                                # Calculate the new dimensions
+                                new_width = int(img.width * scale_factor)
+                                new_height = int(img.height * scale_factor)
+
+                                # Resize the image
+                                img = img.resize((new_width, new_height))
+
+                                # Convert the image back to base64
+                                buffered = io.BytesIO()
+                                img.save(buffered, format=extension)
+                                img_str = base64.b64encode(buffered.getvalue()).decode(
+                                    "utf-8"
+                                )
+
+                                # Set the content
+                                content = f"data:image/{extension};base64,{img_str}"
+
+                                # Recalculate the size of the content in bytes
+                                content_size_bytes = len(content) * 3 / 4
+
+                                # Convert the size to MB
+                                content_size_mb = content_size_bytes / (1024 * 1024)
+                        except:
+                            # This should be non blocking. It's not required
+                            # print("Failed to shrink image. Proceeding with original image size.")
+                            pass
+
                 elif message["format"] == "path":
                     # Convert to base64
                     image_path = message["content"]
@@ -198,6 +250,33 @@ def convert_to_openai_messages(
                     ],
                 }
 
+                if message["role"] == "computer":
+                    new_message["content"].append(
+                        {
+                            "type": "text",
+                            "text": "This image is the result of the last tool output. What does it mean / are we done?",
+                        }
+                    )
+                if message.get("format") == "path":
+                    if any(
+                        content.get("type") == "text"
+                        for content in new_message["content"]
+                    ):
+                        for content in new_message["content"]:
+                            if content.get("type") == "text":
+                                content["text"] += (
+                                    "\nThis image is at this path: "
+                                    + message["content"]
+                                )
+                    else:
+                        new_message["content"].append(
+                            {
+                                "type": "text",
+                                "text": "This image is at this path: "
+                                + message["content"],
+                            }
+                        )
+
         elif message["type"] == "file":
             new_message = {"role": "user", "content": message["content"]}