Merge pull request #19 from samanthavbarron/add-sleep-option

michiosw · web-flow · commit 9b9581ea4e57 · 2025-03-27T17:21:43.000-06:00
Add option to await browser tasks via env var
diff --git a/.env.example b/.env.example
@@ -2,4 +2,7 @@
 CHROME_PATH=
 
 # OpenAI API key for OpenAI model access
-OPENAI_API_KEY=your-api-key-here
+OPENAI_API_KEY=your-api-key-here
+
+# Set to true if you want api calls to wait for tasks to complete (default is false)
+PATIENT=false
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -79,20 +79,16 @@ jobs:
 
     permissions:
       contents: read
-      packages: write
+      # packages: write
       # attestations: write
       id-token: write
 
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
 
-      - name: Log in to the Container registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Log in to registry
+        run: docker login ghcr.io -u "${{ secrets.COBROWSER_PACKAGE_USER }}" --password-stdin <<< "${{ secrets.COBROWSER_PACKAGE_TOKEN }}"
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
diff --git a/README.md b/README.md
@@ -25,6 +25,7 @@ uv run server --port 8000
 ```
 OPENAI_API_KEY=[your api key]
 CHROME_PATH=[only change this if you have a custom chrome build]
+PATIENT=false # Set to true if you want api calls to wait for tasks to complete (default is false)
 ```
 
 - we will be adding support for other LLM providers to power browser-use
diff --git a/server/server.py b/server/server.py
@@ -44,7 +44,26 @@
 load_dotenv()
 
 
-def init_configuration() -> Dict[str, any]:
+def parse_bool_env(env_var: str, default: bool = False) -> bool:
+    """
+    Parse a boolean environment variable.
+
+    Args:
+        env_var: The environment variable name
+        default: Default value if not set
+
+    Returns:
+        Boolean value of the environment variable
+    """
+    value = os.environ.get(env_var)
+    if value is None:
+        return default
+
+    # Consider various representations of boolean values
+    return value.lower() in ("true", "yes", "1", "y", "on")
+
+
+def init_configuration() -> Dict[str, Any]:
     """
     Initialize configuration from environment variables with defaults.
 
@@ -78,6 +97,8 @@ def init_configuration() -> Dict[str, any]:
             "--disable-dev-shm-usage",
             "--remote-debugging-port=0",  # Use random port to avoid conflicts
         ],
+        # Patient mode - if true, functions wait for task completion before returning
+        "PATIENT_MODE": parse_bool_env("PATIENT", False),
     }
 
     return config
@@ -163,6 +184,9 @@ async def run_browser_task_async(
     This function executes a browser automation task with the given URL and action,
     and updates the task store with progress and results.
 
+    When PATIENT_MODE is enabled, the calling function will wait for this function
+    to complete before returning to the client.
+
     Args:
         task_id: Unique identifier for the task
         url: URL to navigate to
@@ -382,7 +406,9 @@ async def call_tool(
             arguments: The arguments to pass to the tool
 
         Returns:
-            A list of content objects to return to the client
+            A list of content objects to return to the client.
+            When PATIENT_MODE is enabled, the browser_use tool will wait for the task to complete
+            and return the full result immediately instead of just the task ID.
 
         Raises:
             ValueError: If required arguments are missing
@@ -408,7 +434,7 @@ async def call_tool(
             }
 
             # Start task in background
-            asyncio.create_task(
+            _task = asyncio.create_task(
                 run_browser_task_async(
                     task_id=task_id,
                     url=arguments["url"],
@@ -420,6 +446,38 @@ async def call_tool(
                 )
             )
 
+            # If PATIENT is set, wait for the task to complete
+            if CONFIG["PATIENT_MODE"]:
+                try:
+                    await _task
+                    # Return the completed task result instead of just the ID
+                    task_data = task_store[task_id]
+                    if task_data["status"] == "failed":
+                        logger.error(
+                            f"Task {task_id} failed: {task_data.get('error', 'Unknown error')}"
+                        )
+                    return [
+                        types.TextContent(
+                            type="text",
+                            text=json.dumps(task_data, indent=2),
+                        )
+                    ]
+                except Exception as e:
+                    logger.error(f"Error in patient mode execution: {str(e)}")
+                    traceback_str = traceback.format_exc()
+                    # Update task store with error
+                    task_store[task_id]["status"] = "failed"
+                    task_store[task_id]["error"] = str(e)
+                    task_store[task_id]["traceback"] = traceback_str
+                    task_store[task_id]["end_time"] = datetime.now().isoformat()
+                    # Return error information
+                    return [
+                        types.TextContent(
+                            type="text",
+                            text=json.dumps(task_store[task_id], indent=2),
+                        )
+                    ]
+
             # Return task ID immediately with explicit sleep instruction
             return [
                 types.TextContent(
@@ -497,43 +555,85 @@ async def list_tools() -> list[types.Tool]:
         """
         List the available tools for the MCP client.
 
+        Returns different tool descriptions based on the PATIENT_MODE configuration.
+        When PATIENT_MODE is enabled, the browser_use tool description indicates it returns
+        complete results directly. When disabled, it indicates async operation.
+
         Returns:
-            A list of tool definitions
+            A list of tool definitions appropriate for the current configuration
         """
-        return [
-            types.Tool(
-                name="browser_use",
-                description="Performs a browser action and returns a task ID for async execution",
-                inputSchema={
-                    "type": "object",
-                    "required": ["url", "action"],
-                    "properties": {
-                        "url": {
-                            "type": "string",
-                            "description": "URL to navigate to",
+        patient_mode = CONFIG["PATIENT_MODE"]
+
+        if patient_mode:
+            return [
+                types.Tool(
+                    name="browser_use",
+                    description="Performs a browser action and returns the complete result directly (patient mode active)",
+                    inputSchema={
+                        "type": "object",
+                        "required": ["url", "action"],
+                        "properties": {
+                            "url": {
+                                "type": "string",
+                                "description": "URL to navigate to",
+                            },
+                            "action": {
+                                "type": "string",
+                                "description": "Action to perform in the browser",
+                            },
                         },
-                        "action": {
-                            "type": "string",
-                            "description": "Action to perform in the browser",
+                    },
+                ),
+                types.Tool(
+                    name="browser_get_result",
+                    description="Gets the result of an asynchronous browser task (not needed in patient mode as browser_use returns complete results directly)",
+                    inputSchema={
+                        "type": "object",
+                        "required": ["task_id"],
+                        "properties": {
+                            "task_id": {
+                                "type": "string",
+                                "description": "ID of the task to get results for",
+                            }
+                        },
+                    },
+                ),
+            ]
+        else:
+            return [
+                types.Tool(
+                    name="browser_use",
+                    description="Performs a browser action and returns a task ID for async execution",
+                    inputSchema={
+                        "type": "object",
+                        "required": ["url", "action"],
+                        "properties": {
+                            "url": {
+                                "type": "string",
+                                "description": "URL to navigate to",
+                            },
+                            "action": {
+                                "type": "string",
+                                "description": "Action to perform in the browser",
+                            },
                         },
                     },
-                },
-            ),
-            types.Tool(
-                name="browser_get_result",
-                description="Gets the result of an asynchronous browser task",
-                inputSchema={
-                    "type": "object",
-                    "required": ["task_id"],
-                    "properties": {
-                        "task_id": {
-                            "type": "string",
-                            "description": "ID of the task to get results for",
-                        }
+                ),
+                types.Tool(
+                    name="browser_get_result",
+                    description="Gets the result of an asynchronous browser task",
+                    inputSchema={
+                        "type": "object",
+                        "required": ["task_id"],
+                        "properties": {
+                            "task_id": {
+                                "type": "string",
+                                "description": "ID of the task to get results for",
+                            }
+                        },
                     },
-                },
-            ),
-        ]
+                ),
+            ]
 
     @app.list_resources()
     async def list_resources() -> list[types.Resource]: