Speedup, increased max_output default, server improvements

KillianLucas · KillianLucas · commit 6735370efc13 · 2024-07-01T20:14:13.000-07:00
diff --git a/benchmarks/simple.py b/benchmarks/simple.py
diff --git a/interpreter/core/async_core.py b/interpreter/core/async_core.py
@@ -1,7 +1,9 @@
 import asyncio
 import json
+import os
 import threading
 import traceback
+from datetime import datetime
 from typing import Any, Dict
 
 from .core import OpenInterpreter
@@ -22,6 +24,7 @@ def __init__(self, *args, **kwargs):
         self.respond_thread = None
         self.stop_event = threading.Event()
         self.output_queue = None
+        self.id = os.getenv("INTERPRETER_ID", datetime.now().timestamp())
 
         self.server = Server(self)
 
@@ -41,8 +44,20 @@ async def input(self, chunk):
             self.accumulate(chunk)
         elif "end" in chunk:
             # If the user is done talking, the interpreter should respond.
+
+            # But first, process any client messages.
+            if self.messages[-1]["role"] == "client":
+                command = self.messages[-1]["content"]
+                self.messages = self.messages[:-1]
+
+                if command == "stop":
+                    return
+                if command == "go":
+                    # This is to approve code.
+                    # We do nothing, as self.respond will run the last code block if the last message is one.
+                    pass
+
             self.stop_event.clear()
-            print("Responding.")
             self.respond_thread = threading.Thread(target=self.respond)
             self.respond_thread.start()
 
@@ -53,7 +68,7 @@ async def output(self):
 
     def respond(self):
         for chunk in self._respond_and_store():
-            print(chunk.get("content", ""), end="")
+            print(chunk.get("content", ""), end="", flush=True)
             if self.stop_event.is_set():
                 return
             self.output_queue.sync_q.put(chunk)
@@ -160,8 +175,18 @@ async def send_output():
         finally:
             await websocket.close()
 
+    @router.post("/run")
+    async def run_code(payload: Dict[str, Any]):
+        language, code = payload.get("language"), payload.get("code")
+        if not (language and code):
+            return {"error": "Both 'language' and 'code' are required."}, 400
+        try:
+            return {"output": async_interpreter.computer.run(language, code)}
+        except Exception as e:
+            return {"error": str(e)}, 500
+
     @router.post("/settings")
-    async def settings(payload: Dict[str, Any]):
+    async def set_settings(payload: Dict[str, Any]):
         for key, value in payload.items():
             print(f"Updating settings: {key} = {value}")
             if key in ["llm", "computer"] and isinstance(value, dict):
@@ -172,6 +197,17 @@ async def settings(payload: Dict[str, Any]):
 
         return {"status": "success"}
 
+    @router.get("/interpreter/{setting}")
+    async def get_setting(setting: str):
+        if hasattr(async_interpreter, setting):
+            setting_value = getattr(async_interpreter, setting)
+            try:
+                return json.dumps({setting: setting_value})
+            except TypeError:
+                return {"error": "Failed to serialize the setting value"}, 500
+        else:
+            return json.dumps({"error": "Setting not found"}), 404
+
     return router
 
 
diff --git a/interpreter/core/computer/terminal/terminal.py b/interpreter/core/computer/terminal/terminal.py
@@ -1,3 +1,4 @@
+import json
 import time
 
 from ..utils.recipient_utils import parse_for_recipient
@@ -59,6 +60,31 @@ def run(self, language, code, stream=False, display=False):
                 self.computer._has_imported_skills = True
                 self.computer.skills.import_skills()
 
+            # This won't work because truncated code is stored in interpreter.messages :/
+            # If the full code was stored, we could do this:
+            if False and "get_last_output()" in code:
+                if "# We wouldn't want to have maximum recursion depth!" in code:
+                    # We just tried to run this, in a moment.
+                    pass
+                else:
+                    code_outputs = [
+                        m
+                        for m in self.computer.interpreter.messages
+                        if m["role"] == "computer"
+                        and "content" in m
+                        and m["content"] != ""
+                    ]
+                    if len(code_outputs) > 0:
+                        last_output = code_outputs[-1]["content"]
+                    else:
+                        last_output = ""
+                    last_output = json.dumps(last_output)
+
+                    self.computer.run(
+                        "python",
+                        f"# We wouldn't want to have maximum recursion depth!\nimport json\ndef get_last_output():\n    return '''{last_output}'''",
+                    )
+
         if stream == False:
             # If stream == False, *pull* from _streaming_run.
             output_messages = []
diff --git a/interpreter/core/core.py b/interpreter/core/core.py
@@ -379,7 +379,9 @@ def is_active_line_chunk(chunk):
             # Truncate output if it's console output
             if chunk["type"] == "console" and chunk["format"] == "output":
                 self.messages[-1]["content"] = truncate_output(
-                    self.messages[-1]["content"], self.max_output
+                    self.messages[-1]["content"],
+                    self.max_output,
+                    add_scrollbars=self.computer.import_computer_api,  # I consider scrollbars to be a computer API thing
                 )
 
         # Yield a final end flag
diff --git a/interpreter/core/default_system_message.py b/interpreter/core/default_system_message.py
@@ -1,8 +1,7 @@
 import getpass
 import platform
 
-default_system_message = (
-    f"""
+default_system_message = f"""
 
 You are Open Interpreter, a world-class programmer that can complete any goal by executing code.
 First, write a plan. **Always recap the plan between each code block** (you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).
@@ -16,9 +15,3 @@
 
 User's Name: {getpass.getuser()}
 User's OS: {platform.system()}""".strip()
-    + r"""
-
-{{print(":)")}}
-
-""".strip()
-)
diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py
@@ -64,6 +64,16 @@ def run(self, messages):
         And then processing its output, whether it's a function or non function calling model, into LMC format.
         """
 
+        if (
+            self.max_tokens is not None
+            and self.context_window is not None
+            and self.max_tokens > self.context_window
+        ):
+            print(
+                "Warning: max_tokens is larger than context_window. Setting max_tokens to be 0.2 times the context_window."
+            )
+            self.max_tokens = int(0.2 * self.context_window)
+
         # Assertions
         assert (
             messages[0]["role"] == "system"
@@ -200,7 +210,7 @@ def run(self, messages):
                         if self.interpreter.in_terminal_interface:
                             display_markdown_message(
                                 """
-**We were unable to determine the context window of this model.** Defaulting to 3000.
+**We were unable to determine the context window of this model.** Defaulting to 8000.
 
 If your model can handle more, run `interpreter --context_window {token limit} --max_tokens {max tokens per response}`.
 
@@ -210,7 +220,7 @@ def run(self, messages):
                         else:
                             display_markdown_message(
                                 """
-**We were unable to determine the context window of this model.** Defaulting to 3000.
+**We were unable to determine the context window of this model.** Defaulting to 8000.
 
 If your model can handle more, run `self.context_window = {token limit}`.
 
@@ -220,7 +230,7 @@ def run(self, messages):
                             """
                             )
                     messages = tt.trim(
-                        messages, system_message=system_message, max_tokens=3000
+                        messages, system_message=system_message, max_tokens=8000
                     )
         except:
             # If we're trimming messages, this won't work.
@@ -330,7 +340,7 @@ def load(self):
                     self.context_window = context_length
             if self.max_tokens == None:
                 if self.context_window != None:
-                    self.max_tokens = int(self.context_window * 0.8)
+                    self.max_tokens = int(self.context_window * 0.2)
 
             # Send a ping, which will actually load the model
             print(f"Loading {model_name}...\n")
diff --git a/interpreter/core/respond.py b/interpreter/core/respond.py
@@ -75,56 +75,61 @@ def respond(interpreter):
 
         ### RUN THE LLM ###
 
-        try:
-            for chunk in interpreter.llm.run(messages_for_llm):
-                yield {"role": "assistant", **chunk}
+        if (
+            interpreter.messages[-1]["type"] != "code"
+        ):  # If it is, we should run the code (we do below)
+            try:
+                for chunk in interpreter.llm.run(messages_for_llm):
+                    yield {"role": "assistant", **chunk}
 
-        except litellm.exceptions.BudgetExceededError:
-            display_markdown_message(
-                f"""> Max budget exceeded
+            except litellm.exceptions.BudgetExceededError:
+                display_markdown_message(
+                    f"""> Max budget exceeded
 
-                **Session spend:** ${litellm._current_cost}
-                **Max budget:** ${interpreter.max_budget}
+                    **Session spend:** ${litellm._current_cost}
+                    **Max budget:** ${interpreter.max_budget}
 
-                Press CTRL-C then run `interpreter --max_budget [higher USD amount]` to proceed.
-            """
-            )
-            break
-        # Provide extra information on how to change API keys, if we encounter that error
-        # (Many people writing GitHub issues were struggling with this)
-        except Exception as e:
-            if (
-                interpreter.offline == False
-                and "auth" in str(e).lower()
-                or "api key" in str(e).lower()
-            ):
-                output = traceback.format_exc()
-                raise Exception(
-                    f"{output}\n\nThere might be an issue with your API key(s).\n\nTo reset your API key (we'll use OPENAI_API_KEY for this example, but you may need to reset your ANTHROPIC_API_KEY, HUGGINGFACE_API_KEY, etc):\n        Mac/Linux: 'export OPENAI_API_KEY=your-key-here'. Update your ~/.zshrc on MacOS or ~/.bashrc on Linux with the new key if it has already been persisted there.,\n        Windows: 'setx OPENAI_API_KEY your-key-here' then restart terminal.\n\n"
-                )
-            elif interpreter.offline == False and "not have access" in str(e).lower():
-                response = input(
-                    f"  You do not have access to {interpreter.llm.model}. You will need to add a payment method and purchase credits for the OpenAI API billing page (different from ChatGPT) to use `GPT-4`.\n\nhttps://platform.openai.com/account/billing/overview\n\nWould you like to try GPT-3.5-TURBO instead? (y/n)\n\n  "
+                    Press CTRL-C then run `interpreter --max_budget [higher USD amount]` to proceed.
+                """
                 )
-                print("")  # <- Aesthetic choice
-
-                if response.strip().lower() == "y":
-                    interpreter.llm.model = "gpt-3.5-turbo-1106"
-                    interpreter.llm.context_window = 16000
-                    interpreter.llm.max_tokens = 4096
-                    interpreter.llm.supports_functions = True
-                    display_markdown_message(
-                        f"> Model set to `{interpreter.llm.model}`"
-                    )
-                else:
+                break
+            # Provide extra information on how to change API keys, if we encounter that error
+            # (Many people writing GitHub issues were struggling with this)
+            except Exception as e:
+                if (
+                    interpreter.offline == False
+                    and "auth" in str(e).lower()
+                    or "api key" in str(e).lower()
+                ):
+                    output = traceback.format_exc()
                     raise Exception(
-                        "\n\nYou will need to add a payment method and purchase credits for the OpenAI API billing page (different from ChatGPT) to use GPT-4.\n\nhttps://platform.openai.com/account/billing/overview"
+                        f"{output}\n\nThere might be an issue with your API key(s).\n\nTo reset your API key (we'll use OPENAI_API_KEY for this example, but you may need to reset your ANTHROPIC_API_KEY, HUGGINGFACE_API_KEY, etc):\n        Mac/Linux: 'export OPENAI_API_KEY=your-key-here'. Update your ~/.zshrc on MacOS or ~/.bashrc on Linux with the new key if it has already been persisted there.,\n        Windows: 'setx OPENAI_API_KEY your-key-here' then restart terminal.\n\n"
+                    )
+                elif (
+                    interpreter.offline == False and "not have access" in str(e).lower()
+                ):
+                    response = input(
+                        f"  You do not have access to {interpreter.llm.model}. You will need to add a payment method and purchase credits for the OpenAI API billing page (different from ChatGPT) to use `GPT-4`.\n\nhttps://platform.openai.com/account/billing/overview\n\nWould you like to try GPT-3.5-TURBO instead? (y/n)\n\n  "
                     )
-            elif interpreter.offline and not interpreter.os:
-                print(traceback.format_exc())
-                raise Exception("Error occurred. " + str(e))
-            else:
-                raise
+                    print("")  # <- Aesthetic choice
+
+                    if response.strip().lower() == "y":
+                        interpreter.llm.model = "gpt-3.5-turbo-1106"
+                        interpreter.llm.context_window = 16000
+                        interpreter.llm.max_tokens = 4096
+                        interpreter.llm.supports_functions = True
+                        display_markdown_message(
+                            f"> Model set to `{interpreter.llm.model}`"
+                        )
+                    else:
+                        raise Exception(
+                            "\n\nYou will need to add a payment method and purchase credits for the OpenAI API billing page (different from ChatGPT) to use GPT-4.\n\nhttps://platform.openai.com/account/billing/overview"
+                        )
+                elif interpreter.offline and not interpreter.os:
+                    print(traceback.format_exc())
+                    raise Exception("Error occurred. " + str(e))
+                else:
+                    raise
 
         ### RUN CODE (if it's there) ###
 
@@ -142,7 +147,7 @@ def respond(interpreter):
                     if interpreter.verbose:
                         print("Removing `\n")
 
-                if language == "text":
+                if language == "text" or language == "markdown":
                     # It does this sometimes just to take notes. Let it, it's useful.
                     # In the future we should probably not detect this behavior as code at all.
                     continue
diff --git a/interpreter/core/utils/system_debug_info.py b/interpreter/core/utils/system_debug_info.py
@@ -103,6 +103,7 @@ def interpreter_info(interpreter):
         Function calling: {interpreter.llm.supports_functions}
         Context window: {interpreter.llm.context_window}
         Max tokens: {interpreter.llm.max_tokens}
+        Computer API: {interpreter.computer.import_computer_api}
 
         Auto run: {interpreter.auto_run}
         API base: {interpreter.llm.api_base}
diff --git a/interpreter/core/utils/truncate_output.py b/interpreter/core/utils/truncate_output.py
@@ -1,11 +1,20 @@
-def truncate_output(data, max_output_chars=2000):
+def truncate_output(data, max_output_chars=2800, add_scrollbars=False):
     if "@@@DO_NOT_TRUNCATE@@@" in data:
         return data
 
     needs_truncation = False
 
     message = f"Output truncated. Showing the last {max_output_chars} characters.\n\n"
 
+    # This won't work because truncated code is stored in interpreter.messages :/
+    # If the full code was stored, we could do this:
+    if add_scrollbars:
+        message = (
+            message.strip()
+            + f" Run `get_last_output()[0:{max_output_chars}]` to see the first page.\n\n"
+        )
+    # Then we have code in `terminal.py` which makes that function work. It should be a computer tool though to just access messages IMO. Or like, self.messages.
+
     # Remove previous truncation message if it exists
     if data.startswith(message):
         data = data[len(message) :]
diff --git a/interpreter/terminal_interface/start_terminal_interface.py b/interpreter/terminal_interface/start_terminal_interface.py
@@ -546,7 +546,7 @@ def main():
                             contribute = "y"
                         else:
                             print(
-                                "Thanks for your feedback! Would you like to send us this chat so we can improve?\n"
+                                "\nThanks for your feedback! Would you like to send us this chat so we can improve?\n"
                             )
                             contribute = input("(y/n): ").strip().lower()
 
diff --git a/interpreter/terminal_interface/terminal_interface.py b/interpreter/terminal_interface/terminal_interface.py
@@ -342,8 +342,10 @@ def terminal_interface(interpreter, message):
 
                         # Truncate output
                         active_block.output = truncate_output(
-                            active_block.output, interpreter.max_output
-                        )
+                            active_block.output,
+                            interpreter.max_output,
+                            add_scrollbars=False,
+                        )  # ^ Notice that this doesn't add the "scrollbars" line, which I think is fine
                     if "format" in chunk and chunk["format"] == "active_line":
                         active_block.active_line = chunk["content"]
 
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml

Original file line number	Diff line number	Diff line change
`@@ -546,7 +546,7 @@ def main():`
`546`	`546`	`contribute = "y"`
`547`	`547`	`else:`
`548`	`548`	`print(`
`549`		`- "Thanks for your feedback! Would you like to send us this chat so we can improve?\n"`
	`549`	`+ "\nThanks for your feedback! Would you like to send us this chat so we can improve?\n"`
`550`	`550`	`)`
`551`	`551`	`contribute = input("(y/n): ").strip().lower()`
`552`	`552`