Merge pull request #1346 from OpenInterpreter/development

KillianLucas · web-flow · commit 44192e27c375 · 2024-07-19T16:35:45.000-07:00
Development Update
diff --git a/docs/server/usage.mdx b/docs/server/usage.mdx
@@ -1,6 +1,4 @@
----
-title: Server Usage
----
+# Server Usage Guide
 
 ## Starting the Server
 
@@ -27,15 +25,29 @@ async_interpreter.server.run(port=8000)  # Default port is 8000, but you can cus
 Connect to the WebSocket server at `ws://localhost:8000/`.
 
 ### Message Format
-Messages must follow the LMC format with start and end flags. For detailed specifications, see the [LMC messages documentation](https://docs.openinterpreter.com/protocols/lmc-messages).
+The server uses an extended message format that allows for rich, multi-part messages. Here's the basic structure:
 
-Basic message structure:
 ```json
-{"role": "user", "type": "message", "start": true}
+{"role": "user", "start": true}
 {"role": "user", "type": "message", "content": "Your message here"}
-{"role": "user", "type": "message", "end": true}
+{"role": "user", "end": true}
 ```
 
+### Multi-part Messages
+You can send complex messages with multiple components:
+
+1. Start with `{"role": "user", "start": true}`
+2. Add various types of content (message, file, image, etc.)
+3. End with `{"role": "user", "end": true}`
+
+### Content Types
+You can include various types of content in your messages:
+
+- Text messages: `{"role": "user", "type": "message", "content": "Your text here"}`
+- File paths: `{"role": "user", "type": "file", "content": "path/to/file"}`
+- Images: `{"role": "user", "type": "image", "format": "path", "content": "path/to/photo"}`
+- Audio: `{"role": "user", "type": "audio", "format": "wav", "content": "path/to/audio.wav"}`
+
 ### Control Commands
 To control the server's behavior, send the following commands:
 
@@ -51,7 +63,7 @@ To control the server's behavior, send the following commands:
    ```
    This executes a generated code block and allows the agent to proceed.
 
-   **Important**: If `auto_run` is set to `False`, the agent will pause after generating code blocks. You must send the "go" command to continue execution.
+   **Note**: If `auto_run` is set to `False`, the agent will pause after generating code blocks. You must send the "go" command to continue execution.
 
 ### Completion Status
 The server indicates completion with the following message:
@@ -67,8 +79,46 @@ If an error occurs, the server will send an error message in the following forma
 ```
 Your client should be prepared to handle these error messages appropriately.
 
-### Example WebSocket Interaction
-Here's a simple example demonstrating the WebSocket interaction:
+## Code Execution Review
+
+After code blocks are executed, you'll receive a review message:
+
+```json
+{
+  "role": "assistant",
+  "type": "review",
+  "content": "Review of the executed code, including safety assessment and potential irreversible actions."
+}
+```
+
+This review provides important information about the safety and potential impact of the executed code. Pay close attention to these messages, especially when dealing with operations that might have significant effects on your system.
+
+The `content` field of the review message may have two possible formats:
+
+1. If the code is deemed completely safe, the content will be exactly `"<SAFE>"`.
+2. Otherwise, it will contain an explanation of why the code might be unsafe or have irreversible effects.
+
+Example of a safe code review:
+```json
+{
+  "role": "assistant",
+  "type": "review",
+  "content": "<SAFE>"
+}
+```
+
+Example of a potentially unsafe code review:
+```json
+{
+  "role": "assistant",
+  "type": "review",
+  "content": "This code performs file deletion operations which are irreversible. Please review carefully before proceeding."
+}
+```
+
+## Example WebSocket Interaction
+
+Here's an example demonstrating the WebSocket interaction:
 
 ```python
 import websockets
@@ -77,21 +127,25 @@ import asyncio
 
 async def websocket_interaction():
     async with websockets.connect("ws://localhost:8000/") as websocket:
-        # Send a message
-        await websocket.send(json.dumps({"role": "user", "type": "message", "start": True}))
-        await websocket.send(json.dumps({"role": "user", "type": "message", "content": "What's 2 + 2?"}))
-        await websocket.send(json.dumps({"role": "user", "type": "message", "end": True}))
+        # Send a multi-part user message
+        await websocket.send(json.dumps({"role": "user", "start": True}))
+        await websocket.send(json.dumps({"role": "user", "type": "message", "content": "Analyze this image:"}))
+        await websocket.send(json.dumps({"role": "user", "type": "image", "format": "path", "content": "path/to/image.jpg"}))
+        await websocket.send(json.dumps({"role": "user", "end": True}))
 
         # Receive and process messages
         while True:
             message = await websocket.recv()
             data = json.loads(message)
             
             if data.get("type") == "message":
-                print(data.get("content", ""), end="", flush=True)
+                print(f"Assistant: {data.get('content', '')}")
+            elif data.get("type") == "review":
+                print(f"Code Review: {data.get('content')}")
             elif data.get("type") == "error":
                 print(f"Error: {data.get('content')}")
             elif data == {"role": "assistant", "type": "status", "content": "complete"}:
+                print("Interaction complete")
                 break
 
 asyncio.run(websocket_interaction())
@@ -100,7 +154,7 @@ asyncio.run(websocket_interaction())
 ## HTTP API
 
 ### Modifying Settings
-To change server settings, send a POST request to `http://localhost:8000/settings`. The payload should conform to [the interpreter object's settings](https://docs.openinterpreter.com/settings/all-settings).
+To change server settings, send a POST request to `http://localhost:8000/settings`. The payload should conform to the interpreter object's settings.
 
 Example:
 ```python
@@ -122,9 +176,56 @@ Example:
 ```python
 response = requests.get("http://localhost:8000/settings/custom_instructions")
 print(response.json())
-# Output: {"custom_instructions": "You only write react."}
+# Output: {"custom_instructions": "You only write Python code."}
+```
+
+## OpenAI-Compatible Endpoint
+
+The server provides an OpenAI-compatible endpoint at `/openai`. This allows you to use the server with any tool or library that's designed to work with the OpenAI API.
+
+### Chat Completions Endpoint
+
+The chat completions endpoint is available at:
+
+```
+[server_url]/openai/chat/completions
+```
+
+To use this endpoint, set the `api_base` in your OpenAI client or configuration to `[server_url]/openai`. For example:
+
+```python
+import openai
+
+openai.api_base = "http://localhost:8000/openai"  # Replace with your server URL if different
+openai.api_key = "dummy"  # The key is not used but required by the OpenAI library
+
+response = openai.ChatCompletion.create(
+    model="gpt-3.5-turbo",  # This model name is ignored, but required
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "What's the capital of France?"}
+    ]
+)
+
+print(response.choices[0].message['content'])
 ```
 
+Note that only the chat completions endpoint (`/chat/completions`) is implemented. Other OpenAI API endpoints are not available.
+
+When using this endpoint:
+- The `model` parameter is required but ignored.
+- The `api_key` is required by the OpenAI library but not used by the server.
+
+## Best Practices
+
+1. Always handle the "complete" status message to ensure your client knows when the server has finished processing.
+2. If `auto_run` is set to `False`, remember to send the "go" command to execute code blocks and continue the interaction.
+3. Implement proper error handling in your client to manage potential connection issues, unexpected server responses, or server-sent error messages.
+4. Use the AsyncInterpreter class when working with the server in Python to ensure compatibility with asynchronous operations.
+5. Pay attention to the code execution review messages for important safety and operational information.
+6. Utilize the multi-part user message structure for complex inputs, including file paths and images.
+7. When sending file paths or image paths, ensure they are accessible to the server.
+
 ## Advanced Usage: Accessing the FastAPI App Directly
 
 The FastAPI app is exposed at `async_interpreter.server.app`. This allows you to add custom routes or host the app using Uvicorn directly.
@@ -147,25 +248,4 @@ if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=8000)
 ```
 
-## Using Docker
-
-You can also run the server using Docker. First, build the Docker image from the root of the repository:
-
-```bash
-docker build -t open-interpreter .
-```
-
-Then, run the container:
-
-```bash
-docker run -p 8000:8000 open-interpreter
-```
-
-This will expose the server on port 8000 of your host machine.
-
-## Best Practices
-1. Always handle the "complete" status message to ensure your client knows when the server has finished processing.
-2. If `auto_run` is set to `False`, remember to send the "go" command to execute code blocks and continue the interaction.
-3. Implement proper error handling in your client to manage potential connection issues, unexpected server responses, or server-sent error messages.
-4. Use the AsyncInterpreter class when working with the server in Python to ensure compatibility with asynchronous operations.
-5. When deploying in production, consider using the Docker container for easier setup and consistent environment across different machines.
+This guide covers all aspects of using the server, including the WebSocket API, HTTP API, OpenAI-compatible endpoint, code execution review, and various features. It provides clear explanations and examples for users to understand how to interact with the server effectively.
diff --git a/interpreter/core/async_core.py b/interpreter/core/async_core.py
@@ -137,7 +137,11 @@ def accumulate(self, chunk):
                 # We don't do anything with these.
                 pass
 
-            elif "start" in chunk:
+            elif (
+                "start" in chunk
+                or chunk["type"] != self.messages[-1]["type"]
+                or chunk.get("format") != self.messages[-1].get("format")
+            ):
                 chunk_copy = (
                     chunk.copy()
                 )  # So we don't modify the original chunk, which feels wrong.
@@ -277,10 +281,14 @@ async def receive_input():
                     try:
                         data = await websocket.receive()
 
-                        print("Received:", data)
+                        if False:
+                            print("Received:", data)
 
-                        if data.get("type") == "websocket.receive" and "text" in data:
-                            data = json.loads(data["text"])
+                        if data.get("type") == "websocket.receive":
+                            if "text" in data:
+                                data = json.loads(data["text"])
+                            elif "bytes" in data:
+                                data = data["bytes"]
                             await async_interpreter.input(data)
                         elif data.get("type") == "websocket.disconnect":
                             print("Disconnecting.")
@@ -363,12 +371,8 @@ async def send_output():
     # TODO
     @router.post("/")
     async def post_input(payload: Dict[str, Any]):
-        # This doesn't work, but something like this should exist
-        query = payload.get("query")
-        if not query:
-            return {"error": "Query is required."}, 400
         try:
-            async_interpreter.input.put(query)
+            async_interpreter.input(payload)
             return {"status": "success"}
         except Exception as e:
             return {"error": str(e)}, 500
diff --git a/interpreter/core/core.py b/interpreter/core/core.py
@@ -303,7 +303,11 @@ def _respond_and_store(self):
 
         # Utility function
         def is_active_line_chunk(chunk):
-            return "format" in chunk and chunk["format"] == "active_line"
+            if "format" in chunk and chunk["format"] == "active_line":
+                return True
+            if chunk["type"] == "review":
+                return True
+            return False
 
         last_flag_base = None
 
diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py
@@ -306,6 +306,8 @@ def load(self):
         if self._is_loaded:
             return
 
+        self._is_loaded = True
+
         if self.model.startswith("ollama/"):
             model_name = self.model.replace("ollama/", "")
             try:
@@ -371,8 +373,6 @@ def load(self):
             except:
                 pass
 
-        self._is_loaded = True
-
 
 def fixed_litellm_completions(**params):
     """
diff --git a/interpreter/core/llm/run_function_calling_llm.py b/interpreter/core/llm/run_function_calling_llm.py
@@ -40,25 +40,30 @@ def run_function_calling_llm(llm, request_params):
     accumulated_deltas = {}
     language = None
     code = ""
+    function_call_detected = False
 
     for chunk in llm.completions(**request_params):
         if "choices" not in chunk or len(chunk["choices"]) == 0:
             # This happens sometimes
             continue
 
         delta = chunk["choices"][0]["delta"]
-
         # Accumulate deltas
         accumulated_deltas = merge_deltas(accumulated_deltas, delta)
 
         if "content" in delta and delta["content"]:
-            yield {"type": "message", "content": delta["content"]}
+            if function_call_detected:
+                # More content after a code block? This is a code review by a judge layer.
+                yield {"type": "review", "content": delta["content"]}
+            else:
+                yield {"type": "message", "content": delta["content"]}
 
         if (
             accumulated_deltas.get("function_call")
             and "arguments" in accumulated_deltas["function_call"]
             and accumulated_deltas["function_call"]["arguments"]
         ):
+            function_call_detected = True
             if (
                 "name" in accumulated_deltas["function_call"]
                 and accumulated_deltas["function_call"]["name"] == "execute"
diff --git a/interpreter/core/respond.py b/interpreter/core/respond.py
@@ -1,6 +1,7 @@
 import json
 import os
 import re
+import time
 import traceback
 
 os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
@@ -181,8 +182,8 @@ def respond(interpreter):
 
                 if code.replace("\n", "").replace(" ", "").startswith("{language:"):
                     try:
-                        code = code.replace("language: ", "'language': ").replace(
-                            "code: ", "'code': "
+                        code = code.replace("language: ", '"language": ').replace(
+                            "code: ", '"code": '
                         )
                         code_dict = json.loads(code)
                         if set(code_dict.keys()) == {"language", "code"}:
@@ -197,9 +198,19 @@ def respond(interpreter):
                     except:
                         pass
 
-                if language == "text" or language == "markdown":
+                if (
+                    language == "text"
+                    or language == "markdown"
+                    or language == "plaintext"
+                ):
                     # It does this sometimes just to take notes. Let it, it's useful.
                     # In the future we should probably not detect this behavior as code at all.
+                    real_content = interpreter.messages[-1]["content"]
+                    interpreter.messages[-1] = {
+                        "role": "assistant",
+                        "type": "message",
+                        "content": f"```\n{real_content}\n```",
+                    }
                     continue
 
                 # Is this language enabled/supported?
diff --git a/interpreter/terminal_interface/terminal_interface.py b/interpreter/terminal_interface/terminal_interface.py
@@ -243,6 +243,10 @@ def terminal_interface(interpreter, message):
                     if "content" in chunk:
                         active_block.code += chunk["content"]
 
+                if chunk["type"] == "review" and chunk.get("content"):
+                    # Specialized models can emit a code review.
+                    print(chunk.get("content"), end="", flush=True)
+
                 # Execution notice
                 if chunk["type"] == "confirmation":
                     if not interpreter.auto_run:
diff --git a/interpreter/terminal_interface/validate_llm_settings.py b/interpreter/terminal_interface/validate_llm_settings.py
@@ -72,8 +72,8 @@ def validate_llm_settings(interpreter):
                         """
 
                     **Tip:** To save this key for later, run one of the following and then restart your terminal. 
-                    MacOS: `echo '\\nexport OPENAI_API_KEY=your_api_key' >> ~/.zshrc`
-                    Linux: `echo '\\nexport OPENAI_API_KEY=your_api_key' >> ~/.bashrc`
+                    MacOS: `echo 'export OPENAI_API_KEY=your_api_key' >> ~/.zshrc`
+                    Linux: `echo 'export OPENAI_API_KEY=your_api_key' >> ~/.bashrc`
                     Windows: `setx OPENAI_API_KEY your_api_key`
                     
                     ---"""
diff --git a/tests/test_interpreter.py b/tests/test_interpreter.py