Merge pull request #1327 from OpenInterpreter/main

KillianLucas · web-flow · commit f4bcbefe999e · 2024-06-26T10:46:45.000-07:00
Update dev branch
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,23 @@
+###########################################################################################
+# This Dockerfile runs an LMC-compatible websocket server at / on port 8000.              #
+# To learn more about LMC, visit https://docs.openinterpreter.com/protocols/lmc-messages. #
+###########################################################################################
+
+FROM python:3.11.8
+
+# Set environment variables
+# ENV OPENAI_API_KEY ...
+
+# Copy required files into container
+RUN mkdir -p interpreter
+COPY interpreter/ interpreter/
+COPY poetry.lock pyproject.toml README.md ./
+
+# Expose port 8000
+EXPOSE 8000
+
+# Install server dependencies
+RUN pip install -e ".[server]"
+
+# Start the server
+ENTRYPOINT ["interpreter", "--server"]
diff --git a/benchmarks/simple.py b/benchmarks/simple.py
@@ -0,0 +1,18 @@
+"""
+This is a sketch of a simple benchmark runner.
+"""
+
+tasks = [
+    {
+        "question": "",
+        "answer": "",
+    },
+    {"setup_script": "", "question": "", "answer": "", "evaluation_script": ""},
+]
+
+# For each task,
+# Start a thread that does the following:
+# Spin up a docker container
+# Run the setup script
+# Ask the question
+# Run the evaluation script or use an LLM to check the answer
diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md
@@ -9,6 +9,9 @@
 - [ ] Figure out how to get OI to answer to user input requests like python's `input()`. Do we somehow detect a delay in the output..? Is there some universal flag that TUIs emit when they expect user input? Should we do this semantically with embeddings, then ask OI to review it and respond..?
 - [ ] Placeholder text that gives a compelling example OI request. Probably use `textual`
 - [ ] Everything else `textual` offers, like could we make it easier to select text? Copy paste in and out? Code editing interface?
+- [ ] Let people turn off the active line highlighting
+- [ ] Add a --plain flag which doesn't use rich, just prints stuff in plain text
+- [ ] Use iPython stuff to track the active line, instead of inserting print statements, which makes debugging weird (From ChatGPT: For deeper insights into what's happening behind the scenes, including which line of code is being executed, you can increase the logging level of the IPython kernel. You can configure the kernel's logger to a more verbose setting, which logs each execution request. However, this requires modifying the kernel's startup settings, which might involve changing logging configurations in the IPython kernel source or when launching the kernel.)
 - [ ] Let people edit the code OI writes. Could just open it in the user's preferred editor. Simple. [Full description of how to implement this here.](https://github.com/KillianLucas/open-interpreter/pull/830#issuecomment-1854989795)
 - [ ] Display images in the terminal interface
 - [ ] There should be a function that just renders messages to the terminal, so we can revive conversation navigator, and let people look at their conversations
diff --git a/docs/guides/profiles.mdx b/docs/guides/profiles.mdx
@@ -18,9 +18,9 @@ from interpreter import interpreter
 interpreter.os = True
 interpreter.llm.supports_vision = True
 
-interpreter.llm.model = "gpt-4-vision-preview"
+interpreter.llm.model = "gpt-4o"
 
-interpreter.llm.supports_functions = False
+interpreter.llm.supports_functions = True
 interpreter.llm.context_window = 110000
 interpreter.llm.max_tokens = 4096
 interpreter.auto_run = True
diff --git a/docs/settings/all-settings.mdx b/docs/settings/all-settings.mdx
@@ -280,17 +280,17 @@ llm:
 
 ### Vision Mode
 
-Enables vision mode, which adds some special instructions to the prompt and switches to `gpt-4-vision-preview`.
+Enables vision mode, which adds some special instructions to the prompt and switches to `gpt-4o`.
 
 <CodeGroup>
 ```bash Terminal
 interpreter --vision
 ```
 
 ```python Python
-interpreter.llm.model = "gpt-4-vision-preview" # Any vision supporting model
+interpreter.llm.model = "gpt-4o" # Any vision supporting model
 interpreter.llm.supports_vision = True
-interpreter.llm.supports_functions = False # If model doesn't support functions, which is the case with gpt-4-vision.
+interpreter.llm.supports_functions = True
 
 interpreter.custom_instructions = """The user will show you an image of the code you write. You can view images directly.
 For HTML: This will be run STATELESSLY. You may NEVER write '<!-- previous code here... --!>' or `<!-- header will go here -->` or anything like that. It is CRITICAL TO NEVER WRITE PLACEHOLDERS. Placeholders will BREAK it. You must write the FULL HTML CODE EVERY TIME. Therefore you cannot write HTML piecemeal—write all the HTML, CSS, and possibly Javascript **in one step, in one code block**. The user will help you review it visually.
@@ -302,10 +302,10 @@ If you use `plt.show()`, the resulting image will be sent to you. However, if yo
 loop: True
 
 llm:
-  model: "gpt-4-vision-preview"
+  model: "gpt-4o"
   temperature: 0
   supports_vision: True
-  supports_functions: False
+  supports_functions: True
   context_window: 110000
   max_tokens: 4096
   custom_instructions: >
diff --git a/docs/usage/terminal/vision.mdx b/docs/usage/terminal/vision.mdx
@@ -8,4 +8,4 @@ To use vision (highly experimental), run the following command:
 interpreter --vision
 ```
 
-If a file path to an image is found in your input, it will be loaded into the vision model (`gpt-4-vision-preview` for now).
+If a file path to an image is found in your input, it will be loaded into the vision model (`gpt-4o` for now).
diff --git a/interpreter/core/async_core.py b/interpreter/core/async_core.py
@@ -187,4 +187,4 @@ def __init__(self, async_interpreter, host="0.0.0.0", port=8000):
         )
 
     def run(self):
-        uvicorn.run(self.app, host=self.host, port=self.port)
+        self.uvicorn_server.run()
diff --git a/interpreter/terminal_interface/profiles/defaults/os.py b/interpreter/terminal_interface/profiles/defaults/os.py
@@ -6,11 +6,11 @@
 interpreter.llm.supports_vision = True
 # interpreter.shrink_images = True # Faster but less accurate
 
-interpreter.llm.model = "gpt-4-vision-preview"
+interpreter.llm.model = "gpt-4o"
 
 interpreter.computer.import_computer_api = True
 
-interpreter.llm.supports_functions = False
+interpreter.llm.supports_functions = True
 interpreter.llm.context_window = 110000
 interpreter.llm.max_tokens = 4096
 interpreter.auto_run = True
diff --git a/interpreter/terminal_interface/profiles/defaults/vision.yaml b/interpreter/terminal_interface/profiles/defaults/vision.yaml
@@ -3,10 +3,10 @@
 loop: True
 
 llm:
-  model: "gpt-4-vision-preview"
+  model: "gpt-4o"
   temperature: 0
   supports_vision: True
-  supports_functions: False
+  supports_functions: True
   context_window: 110000
   max_tokens: 4096
   custom_instructions: >
diff --git a/interpreter/terminal_interface/start_terminal_interface.py b/interpreter/terminal_interface/start_terminal_interface.py
@@ -9,7 +9,6 @@
     contribute_conversations,
 )
 
-from ..core.core import OpenInterpreter
 from .conversation_navigator import conversation_navigator
 from .profiles.profiles import open_storage_dir, profile, reset_profile
 from .utils.check_for_update import check_for_update
@@ -320,6 +319,12 @@ def start_terminal_interface(interpreter):
 
     args, unknown_args = parser.parse_known_args()
 
+    if args.server:
+        # Instead use an async interpreter, which has a server. Set settings on that
+        from interpreter import AsyncInterpreter
+
+        interpreter = AsyncInterpreter()
+
     # handle unknown arguments
     if unknown_args:
         print(f"\nUnrecognized argument(s): {unknown_args}")
@@ -471,14 +476,14 @@ def start_terminal_interface(interpreter):
         conversation_navigator(interpreter)
         return
 
-    if args.server:
-        interpreter.server()
-        return
-
     validate_llm_settings(
         interpreter
     )  # This should actually just run interpreter.llm.load() once that's == to validate_llm_settings
 
+    if args.server:
+        interpreter.server.run()
+        return
+
     interpreter.in_terminal_interface = True
 
     contribute_conversation_launch_logic(interpreter)
diff --git a/pyproject.toml b/pyproject.toml
@@ -80,6 +80,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry.scripts]
 interpreter = "interpreter.terminal_interface.start_terminal_interface:main"
 i = "interpreter.terminal_interface.start_terminal_interface:main"
+ibench = "benchmarks.simple:main"
 
 [tool.black]
 target-version = ['py311']
diff --git a/tests/test_interpreter.py b/tests/test_interpreter.py
@@ -662,9 +662,9 @@ def test_vision():
     ]
 
     interpreter.llm.supports_vision = True
-    interpreter.llm.model = "gpt-4-vision-preview"
+    interpreter.llm.model = "gpt-4o"
     interpreter.system_message += "\nThe user will show you an image of the code you write. You can view images directly.\n\nFor HTML: This will be run STATELESSLY. You may NEVER write '<!-- previous code here... --!>' or `<!-- header will go here -->` or anything like that. It is CRITICAL TO NEVER WRITE PLACEHOLDERS. Placeholders will BREAK it. You must write the FULL HTML CODE EVERY TIME. Therefore you cannot write HTML piecemeal—write all the HTML, CSS, and possibly Javascript **in one step, in one code block**. The user will help you review it visually.\nIf the user submits a filepath, you will also see the image. The filepath and user image will both be in the user's message.\n\nIf you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you."
-    interpreter.llm.supports_functions = False
+    interpreter.llm.supports_functions = True
     interpreter.llm.context_window = 110000
     interpreter.llm.max_tokens = 4096
     interpreter.loop = True

Original file line number	Diff line number	Diff line change
`@@ -187,4 +187,4 @@ def __init__(self, async_interpreter, host="0.0.0.0", port=8000):`
`187`	`187`	`)`
`188`	`188`
`189`	`189`	`def run(self):`
`190`		`- uvicorn.run(self.app, host=self.host, port=self.port)`
	`190`	`+ self.uvicorn_server.run()`