Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
CHROME_PATH=

# OpenAI API key for OpenAI model access
OPENAI_API_KEY=your-api-key-here
OPENAI_API_KEY=your-api-key-here

# Set to true if you want api calls to wait for tasks to complete (default is false)
PATIENT=false
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ uv run server --port 8000
```
OPENAI_API_KEY=[your api key]
CHROME_PATH=[only change this if you have a custom chrome build]
PATIENT=false # Set to true if you want api calls to wait for tasks to complete (default is false)
```

- we will be adding support for other LLM providers to power browser-use
Expand Down
168 changes: 134 additions & 34 deletions server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,26 @@
load_dotenv()


def init_configuration() -> Dict[str, any]:
def parse_bool_env(env_var: str, default: bool = False) -> bool:
"""
Parse a boolean environment variable.

Args:
env_var: The environment variable name
default: Default value if not set

Returns:
Boolean value of the environment variable
"""
value = os.environ.get(env_var)
if value is None:
return default

# Consider various representations of boolean values
return value.lower() in ("true", "yes", "1", "y", "on")


def init_configuration() -> Dict[str, Any]:
"""
Initialize configuration from environment variables with defaults.

Expand Down Expand Up @@ -78,6 +97,8 @@ def init_configuration() -> Dict[str, any]:
"--disable-dev-shm-usage",
"--remote-debugging-port=0", # Use random port to avoid conflicts
],
# Patient mode - if true, functions wait for task completion before returning
"PATIENT_MODE": parse_bool_env("PATIENT", False),
}

return config
Expand Down Expand Up @@ -163,6 +184,9 @@ async def run_browser_task_async(
This function executes a browser automation task with the given URL and action,
and updates the task store with progress and results.

When PATIENT_MODE is enabled, the calling function will wait for this function
to complete before returning to the client.

Args:
task_id: Unique identifier for the task
url: URL to navigate to
Expand Down Expand Up @@ -382,7 +406,9 @@ async def call_tool(
arguments: The arguments to pass to the tool

Returns:
A list of content objects to return to the client
A list of content objects to return to the client.
When PATIENT_MODE is enabled, the browser_use tool will wait for the task to complete
and return the full result immediately instead of just the task ID.

Raises:
ValueError: If required arguments are missing
Expand All @@ -408,7 +434,7 @@ async def call_tool(
}

# Start task in background
asyncio.create_task(
_task = asyncio.create_task(
run_browser_task_async(
task_id=task_id,
url=arguments["url"],
Expand All @@ -420,6 +446,38 @@ async def call_tool(
)
)

# If PATIENT is set, wait for the task to complete
if CONFIG["PATIENT_MODE"]:
try:
await _task
# Return the completed task result instead of just the ID
task_data = task_store[task_id]
if task_data["status"] == "failed":
logger.error(
f"Task {task_id} failed: {task_data.get('error', 'Unknown error')}"
)
return [
types.TextContent(
type="text",
text=json.dumps(task_data, indent=2),
)
]
except Exception as e:
logger.error(f"Error in patient mode execution: {str(e)}")
traceback_str = traceback.format_exc()
# Update task store with error
task_store[task_id]["status"] = "failed"
task_store[task_id]["error"] = str(e)
task_store[task_id]["traceback"] = traceback_str
task_store[task_id]["end_time"] = datetime.now().isoformat()
# Return error information
return [
types.TextContent(
type="text",
text=json.dumps(task_store[task_id], indent=2),
)
]

# Return task ID immediately with explicit sleep instruction
return [
types.TextContent(
Expand Down Expand Up @@ -497,43 +555,85 @@ async def list_tools() -> list[types.Tool]:
"""
List the available tools for the MCP client.

Returns different tool descriptions based on the PATIENT_MODE configuration.
When PATIENT_MODE is enabled, the browser_use tool description indicates it returns
complete results directly. When disabled, it indicates async operation.

Returns:
A list of tool definitions
A list of tool definitions appropriate for the current configuration
"""
return [
types.Tool(
name="browser_use",
description="Performs a browser action and returns a task ID for async execution",
inputSchema={
"type": "object",
"required": ["url", "action"],
"properties": {
"url": {
"type": "string",
"description": "URL to navigate to",
patient_mode = CONFIG["PATIENT_MODE"]

if patient_mode:
return [
types.Tool(
name="browser_use",
description="Performs a browser action and returns the complete result directly (patient mode active)",
inputSchema={
"type": "object",
"required": ["url", "action"],
"properties": {
"url": {
"type": "string",
"description": "URL to navigate to",
},
"action": {
"type": "string",
"description": "Action to perform in the browser",
},
},
"action": {
"type": "string",
"description": "Action to perform in the browser",
},
),
types.Tool(
name="browser_get_result",
description="Gets the result of an asynchronous browser task (not needed in patient mode as browser_use returns complete results directly)",
inputSchema={
"type": "object",
"required": ["task_id"],
"properties": {
"task_id": {
"type": "string",
"description": "ID of the task to get results for",
}
},
},
),
]
else:
return [
types.Tool(
name="browser_use",
description="Performs a browser action and returns a task ID for async execution",
inputSchema={
"type": "object",
"required": ["url", "action"],
"properties": {
"url": {
"type": "string",
"description": "URL to navigate to",
},
"action": {
"type": "string",
"description": "Action to perform in the browser",
},
},
},
},
),
types.Tool(
name="browser_get_result",
description="Gets the result of an asynchronous browser task",
inputSchema={
"type": "object",
"required": ["task_id"],
"properties": {
"task_id": {
"type": "string",
"description": "ID of the task to get results for",
}
),
types.Tool(
name="browser_get_result",
description="Gets the result of an asynchronous browser task",
inputSchema={
"type": "object",
"required": ["task_id"],
"properties": {
"task_id": {
"type": "string",
"description": "ID of the task to get results for",
}
},
},
},
),
]
),
]

@app.list_resources()
async def list_resources() -> list[types.Resource]:
Expand Down
Loading