add better request handling to openai-compatible server

benxu3 · benxu3 · commit a85094dc9ca6 · 2024-12-06T16:15:18.000-08:00
diff --git a/interpreter/interpreter.py b/interpreter/interpreter.py
@@ -207,15 +207,15 @@ def default_system_message(self):
             print("Error adding system capability for cwd")
 
         system_message += "</SYSTEM_CAPABILITY>"
-
+        
         # Add web search capability if enabled
         if (
             os.environ.get("INTERPRETER_EXPERIMENTAL_WEB_SEARCH", "false").lower()
             == "true"
         ):
             system_message = system_message.replace(
                 "</SYSTEM_CAPABILITY>",
-                "* For fast web searches (like up-to-date docs) curl https://api.openinterpreter.com/v0/browser/search?query=your+search+query\n</SYSTEM_CAPABILITY>",
+                "* For any web search requests, curl https://api.openinterpreter.com/v0/browser/search?query=your+search+query\n</SYSTEM_CAPABILITY>",
             )
 
         # Update system prompt for Mac OS, if computer tool is enabled
@@ -249,7 +249,7 @@ async def async_respond(self, user_input=None):
         provider = self.provider  # Keep existing provider if set
         max_tokens = self.max_tokens  # Keep existing max_tokens if set
 
-        if self.model == "claude-3-5-sonnet-latest":
+        if self.model == "claude-3-5-sonnet":
             # For some reason, Litellm can't find the model info for claude-3-5-sonnet-latest
             provider = "anthropic"
 
@@ -971,6 +971,12 @@ async def async_chat(self):
                 except KeyboardInterrupt:
                     self._spinner.stop()
                 except asyncio.CancelledError:
+                    print("ASYNC CHAT INSIDE INTERPRETER CANCELLED ERROR HERE")
+                    print("ASYNC CHAT INSIDE INTERPRETER CANCELLED ERROR HERE")
+                    print("ASYNC CHAT INSIDE INTERPRETER CANCELLED ERROR HERE")
+                    print("ASYNC CHAT INSIDE INTERPRETER CANCELLED ERROR HERE")
+                    print("ASYNC CHAT INSIDE INTERPRETER CANCELLED ERROR HERE")
+                    print("ASYNC CHAT INSIDE INTERPRETER CANCELLED ERROR HERE")
                     self._spinner.stop()
 
                 print()
diff --git a/interpreter/profiles.py b/interpreter/profiles.py
@@ -32,7 +32,7 @@ class Profile:
     def __init__(self):
         # Default values if no profile exists
         # Model configuration
-        self.model = "claude-3-5-sonnet-latest"  # The LLM model to use
+        self.model = "claude-3-5-sonnet"  # The LLM model to use
         self.provider = (
             None  # The model provider (e.g. anthropic, openai) None will auto-detect
         )
diff --git a/interpreter/server.py b/interpreter/server.py
@@ -8,6 +8,7 @@
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse, StreamingResponse
 from pydantic import BaseModel
+from asyncio import CancelledError, Task
 
 
 class ChatCompletionRequest(BaseModel):
@@ -35,14 +36,27 @@ def __init__(self, interpreter):
         # Setup routes
         self.app.post("/chat/completions")(self.chat_completion)
 
+        # Add a field to track the current request task
+        self._current_request: Optional[Task] = None
+
     async def chat_completion(self, request: Request):
         """Main chat completion endpoint"""
+        # Cancel any existing request
+        if self._current_request and not self._current_request.done():
+            self._current_request.cancel()
+            try:
+                await self._current_request
+            except CancelledError:
+                pass
+
         body = await request.json()
+        if self.interpreter.debug:
+            print("Request body:", body)
         try:
             req = ChatCompletionRequest(**body)
         except Exception as e:
-            print("Validation error:", str(e))  # Debug print
-            print("Request body:", body)  # Print the request body
+            print("Validation error:", str(e))
+            print("Request body:", body)
             raise
 
         # Filter out system message
@@ -75,18 +89,6 @@ async def _stream_response(self):
                             delta["function_call"] = choice.delta.function_call
                         if choice.delta.tool_calls is not None:
                             pass
-                            # Convert tool_calls to dict representation
-                            # delta["tool_calls"] = [
-                            #     {
-                            #         "index": tool_call.index,
-                            #         "id": tool_call.id,
-                            #         "type": tool_call.type,
-                            #         "function": {
-                            #             "name": tool_call.function.name,
-                            #             "arguments": tool_call.function.arguments
-                            #         }
-                            #     } for tool_call in choice.delta.tool_calls
-                            # ]
 
                     choices.append(
                         {
@@ -108,11 +110,16 @@ async def _stream_response(self):
                     data["system_fingerprint"] = chunk.system_fingerprint
 
                 yield f"data: {json.dumps(data)}\n\n"
-        except asyncio.CancelledError:
-            # Set stop flag when stream is cancelled
-            self.interpreter._stop_flag = True
+
+        except CancelledError:
+            # Handle cancellation gracefully
+            print("Request cancelled - cleaning up...")
+
             raise
+        except Exception as e:
+            print(f"Error in stream: {str(e)}")
         finally:
+            # Always send DONE message and cleanup
             yield "data: [DONE]\n\n"
 
     def run(self):

Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ class Profile:`
`32`	`32`	`def __init__(self):`
`33`	`33`	`# Default values if no profile exists`
`34`	`34`	`# Model configuration`
`35`		`- self.model = "claude-3-5-sonnet-latest" # The LLM model to use`
	`35`	`+ self.model = "claude-3-5-sonnet" # The LLM model to use`
`36`	`36`	`self.provider = (`
`37`	`37`	`None # The model provider (e.g. anthropic, openai) None will auto-detect`
`38`	`38`	`)`