Skip to content

Commit 9b9581e

Browse files
authored
Merge pull request #19 from samanthavbarron/add-sleep-option
Add option to await browser tasks via env var
2 parents 2cfdaa1 + cfba722 commit 9b9581e

File tree

4 files changed

+142
-42
lines changed

4 files changed

+142
-42
lines changed

.env.example

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,7 @@
22
CHROME_PATH=
33

44
# OpenAI API key for OpenAI model access
5-
OPENAI_API_KEY=your-api-key-here
5+
OPENAI_API_KEY=your-api-key-here
6+
7+
# Set to true if you want api calls to wait for tasks to complete (default is false)
8+
PATIENT=false

.github/workflows/ci.yml

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,20 +79,16 @@ jobs:
7979

8080
permissions:
8181
contents: read
82-
packages: write
82+
# packages: write
8383
# attestations: write
8484
id-token: write
8585

8686
steps:
8787
- name: Checkout repository
8888
uses: actions/checkout@v4
8989

90-
- name: Log in to the Container registry
91-
uses: docker/login-action@v3
92-
with:
93-
registry: ${{ env.REGISTRY }}
94-
username: ${{ github.actor }}
95-
password: ${{ secrets.GITHUB_TOKEN }}
90+
- name: Log in to registry
91+
run: docker login ghcr.io -u "${{ secrets.COBROWSER_PACKAGE_USER }}" --password-stdin <<< "${{ secrets.COBROWSER_PACKAGE_TOKEN }}"
9692

9793
- name: Extract metadata (tags, labels) for Docker
9894
id: meta

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ uv run server --port 8000
2525
```
2626
OPENAI_API_KEY=[your api key]
2727
CHROME_PATH=[only change this if you have a custom chrome build]
28+
PATIENT=false # Set to true if you want api calls to wait for tasks to complete (default is false)
2829
```
2930

3031
- we will be adding support for other LLM providers to power browser-use

server/server.py

Lines changed: 134 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,26 @@
4444
load_dotenv()
4545

4646

47-
def init_configuration() -> Dict[str, any]:
47+
def parse_bool_env(env_var: str, default: bool = False) -> bool:
48+
"""
49+
Parse a boolean environment variable.
50+
51+
Args:
52+
env_var: The environment variable name
53+
default: Default value if not set
54+
55+
Returns:
56+
Boolean value of the environment variable
57+
"""
58+
value = os.environ.get(env_var)
59+
if value is None:
60+
return default
61+
62+
# Consider various representations of boolean values
63+
return value.lower() in ("true", "yes", "1", "y", "on")
64+
65+
66+
def init_configuration() -> Dict[str, Any]:
4867
"""
4968
Initialize configuration from environment variables with defaults.
5069
@@ -78,6 +97,8 @@ def init_configuration() -> Dict[str, any]:
7897
"--disable-dev-shm-usage",
7998
"--remote-debugging-port=0", # Use random port to avoid conflicts
8099
],
100+
# Patient mode - if true, functions wait for task completion before returning
101+
"PATIENT_MODE": parse_bool_env("PATIENT", False),
81102
}
82103

83104
return config
@@ -163,6 +184,9 @@ async def run_browser_task_async(
163184
This function executes a browser automation task with the given URL and action,
164185
and updates the task store with progress and results.
165186
187+
When PATIENT_MODE is enabled, the calling function will wait for this function
188+
to complete before returning to the client.
189+
166190
Args:
167191
task_id: Unique identifier for the task
168192
url: URL to navigate to
@@ -382,7 +406,9 @@ async def call_tool(
382406
arguments: The arguments to pass to the tool
383407
384408
Returns:
385-
A list of content objects to return to the client
409+
A list of content objects to return to the client.
410+
When PATIENT_MODE is enabled, the browser_use tool will wait for the task to complete
411+
and return the full result immediately instead of just the task ID.
386412
387413
Raises:
388414
ValueError: If required arguments are missing
@@ -408,7 +434,7 @@ async def call_tool(
408434
}
409435

410436
# Start task in background
411-
asyncio.create_task(
437+
_task = asyncio.create_task(
412438
run_browser_task_async(
413439
task_id=task_id,
414440
url=arguments["url"],
@@ -420,6 +446,38 @@ async def call_tool(
420446
)
421447
)
422448

449+
# If PATIENT is set, wait for the task to complete
450+
if CONFIG["PATIENT_MODE"]:
451+
try:
452+
await _task
453+
# Return the completed task result instead of just the ID
454+
task_data = task_store[task_id]
455+
if task_data["status"] == "failed":
456+
logger.error(
457+
f"Task {task_id} failed: {task_data.get('error', 'Unknown error')}"
458+
)
459+
return [
460+
types.TextContent(
461+
type="text",
462+
text=json.dumps(task_data, indent=2),
463+
)
464+
]
465+
except Exception as e:
466+
logger.error(f"Error in patient mode execution: {str(e)}")
467+
traceback_str = traceback.format_exc()
468+
# Update task store with error
469+
task_store[task_id]["status"] = "failed"
470+
task_store[task_id]["error"] = str(e)
471+
task_store[task_id]["traceback"] = traceback_str
472+
task_store[task_id]["end_time"] = datetime.now().isoformat()
473+
# Return error information
474+
return [
475+
types.TextContent(
476+
type="text",
477+
text=json.dumps(task_store[task_id], indent=2),
478+
)
479+
]
480+
423481
# Return task ID immediately with explicit sleep instruction
424482
return [
425483
types.TextContent(
@@ -497,43 +555,85 @@ async def list_tools() -> list[types.Tool]:
497555
"""
498556
List the available tools for the MCP client.
499557
558+
Returns different tool descriptions based on the PATIENT_MODE configuration.
559+
When PATIENT_MODE is enabled, the browser_use tool description indicates it returns
560+
complete results directly. When disabled, it indicates async operation.
561+
500562
Returns:
501-
A list of tool definitions
563+
A list of tool definitions appropriate for the current configuration
502564
"""
503-
return [
504-
types.Tool(
505-
name="browser_use",
506-
description="Performs a browser action and returns a task ID for async execution",
507-
inputSchema={
508-
"type": "object",
509-
"required": ["url", "action"],
510-
"properties": {
511-
"url": {
512-
"type": "string",
513-
"description": "URL to navigate to",
565+
patient_mode = CONFIG["PATIENT_MODE"]
566+
567+
if patient_mode:
568+
return [
569+
types.Tool(
570+
name="browser_use",
571+
description="Performs a browser action and returns the complete result directly (patient mode active)",
572+
inputSchema={
573+
"type": "object",
574+
"required": ["url", "action"],
575+
"properties": {
576+
"url": {
577+
"type": "string",
578+
"description": "URL to navigate to",
579+
},
580+
"action": {
581+
"type": "string",
582+
"description": "Action to perform in the browser",
583+
},
514584
},
515-
"action": {
516-
"type": "string",
517-
"description": "Action to perform in the browser",
585+
},
586+
),
587+
types.Tool(
588+
name="browser_get_result",
589+
description="Gets the result of an asynchronous browser task (not needed in patient mode as browser_use returns complete results directly)",
590+
inputSchema={
591+
"type": "object",
592+
"required": ["task_id"],
593+
"properties": {
594+
"task_id": {
595+
"type": "string",
596+
"description": "ID of the task to get results for",
597+
}
598+
},
599+
},
600+
),
601+
]
602+
else:
603+
return [
604+
types.Tool(
605+
name="browser_use",
606+
description="Performs a browser action and returns a task ID for async execution",
607+
inputSchema={
608+
"type": "object",
609+
"required": ["url", "action"],
610+
"properties": {
611+
"url": {
612+
"type": "string",
613+
"description": "URL to navigate to",
614+
},
615+
"action": {
616+
"type": "string",
617+
"description": "Action to perform in the browser",
618+
},
518619
},
519620
},
520-
},
521-
),
522-
types.Tool(
523-
name="browser_get_result",
524-
description="Gets the result of an asynchronous browser task",
525-
inputSchema={
526-
"type": "object",
527-
"required": ["task_id"],
528-
"properties": {
529-
"task_id": {
530-
"type": "string",
531-
"description": "ID of the task to get results for",
532-
}
621+
),
622+
types.Tool(
623+
name="browser_get_result",
624+
description="Gets the result of an asynchronous browser task",
625+
inputSchema={
626+
"type": "object",
627+
"required": ["task_id"],
628+
"properties": {
629+
"task_id": {
630+
"type": "string",
631+
"description": "ID of the task to get results for",
632+
}
633+
},
533634
},
534-
},
535-
),
536-
]
635+
),
636+
]
537637

538638
@app.list_resources()
539639
async def list_resources() -> list[types.Resource]:

0 commit comments

Comments
 (0)