Skip to content

Commit 4d121fd

Browse files
committed
make browser fill actions async
1 parent bc0098b commit 4d121fd

File tree

4 files changed

+79
-50
lines changed

4 files changed

+79
-50
lines changed

examples/integrations/cartesia/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ This project demonstrates an advanced voice agent that conducts phone questionna
88
- **Real-time Form Filling**: Automatically fills web forms as answers are collected
99
- **Browser Automation**: Uses Stagehand AI to interact with any web form
1010
- **Intelligent Mapping**: AI-powered mapping of voice answers to form fields
11-
- **Async Processing**: Non-blocking form filling maintains conversation flow
11+
- **Async Processing**: Non-blocking form filling maintains conversation flow - form fields are filled in background tasks without delaying voice responses
1212
- **Auto-submission**: Submits forms automatically when complete
1313

1414
## Architecture
@@ -68,7 +68,7 @@ The system can be configured through:
6868

6969
- `form.yaml`: Define questionnaire structure
7070
- `FORM_URL`: Target web form to fill
71-
- `headless`: Run browser in background (True) or visible (False)
71+
- `headless`: Run browser in background (True) or visible (False) - currently set to True for production use
7272
- `enable_browser`: Toggle browser automation on/off
7373

7474
## Example Flow
@@ -87,7 +87,7 @@ The system can be configured through:
8787

8888
## Advanced Features
8989

90-
- **Background Processing**: Form filling happens asynchronously
90+
- **Background Processing**: Form filling happens asynchronously using background tasks - conversation remains smooth and responsive
9191
- **Error Recovery**: Continues conversation even if form filling fails
9292
- **Progress Tracking**: Monitor form completion status
9393
- **Screenshot Debugging**: Captures screenshots after each field
@@ -104,7 +104,7 @@ Test with different scenarios:
104104

105105
## Production Considerations
106106

107-
- Set `headless=True` for production
107+
- Set `headless=True` for production (currently configured this way)
108108
- Configure proper error logging
109109
- Add retry logic for form submission
110110
- Implement form validation checks

examples/integrations/cartesia/form_filling_node.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -238,25 +238,33 @@ def _create_questions_from_analysis(self, form_analysis: Dict[str, Any]) -> list
238238

239239
return form_questions
240240

241+
async def _fill_form_field_async(self, field_name: str, value: str):
242+
"""Fill a form field asynchronously in background (non-blocking)"""
243+
try:
244+
await self._fill_form_field(field_name, value)
245+
except Exception as e:
246+
logger.error(f"❌ Background form filling error for {field_name}: {e}")
247+
241248
async def _fill_form_field(self, field_name: str, value: str):
242249
"""Fill a form field in the browser in real-time"""
243250
if not self.stagehand_filler:
244251
logger.warning("⚠️ Browser not initialized yet")
245252
return
246-
253+
247254
try:
248-
logger.info(f"🖊️ Filling field '{field_name}' with: {value} in real-time")
249-
255+
logger.info(f"🖊️ Filling field '{field_name}' with: {value} in background")
256+
250257
# Use StagehandFormFiller's fill_field method which handles the mapping
251258
success = await self.stagehand_filler.fill_field(field_name, value)
252-
259+
253260
if success:
254261
logger.info(f"✅ Successfully filled field: {field_name} in the browser")
255262
else:
256263
logger.warning(f"⚠️ Failed to fill field: {field_name}")
257-
264+
258265
except Exception as e:
259266
logger.error(f"Error filling field {field_name}: {e}")
267+
raise # Re-raise so background task can catch it
260268

261269
async def _submit_form(self):
262270
"""Submit the completed form"""
@@ -416,29 +424,29 @@ async def process_context(
416424
value = function_call.args.get("value", "")
417425

418426
logger.info(f"📝 Recording: {field_name} = {value}")
419-
427+
420428
# Store data first
421429
self.collected_data[field_name] = value
422-
423-
# Fill the form field immediately in real-time
424-
await self._fill_form_field(field_name, value)
425-
430+
431+
# Fill the form field asynchronously in background (non-blocking)
432+
asyncio.create_task(self._fill_form_field_async(field_name, value))
433+
426434
# Log the collected data
427435
logger.info(f"📊 Collected: {field_name}={value}")
428-
429-
# Move to next question
436+
437+
# Move to next question immediately (don't wait for form filling)
430438
self.current_question_index += 1
431439
field_recorded = True
432-
440+
433441
# Clear context
434442
self.clear_context()
435-
443+
436444
# Get next question
437445
next_question = self.get_current_question()
438446
if next_question:
439447
yield AgentResponse(content=f"Great! {next_question.question}")
440-
441-
# Yield tool result
448+
449+
# Yield tool result immediately
442450
yield ToolResult(
443451
tool_name="record_form_field",
444452
tool_args={"field_name": field_name, "value": value},

examples/integrations/cartesia/main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ async def handle_new_call(system: VoiceAgentSystem, call_request: CallRequest):
3333
system_prompt=SYSTEM_PROMPT,
3434
gemini_client=gemini_client,
3535
form_url=FORM_URL,
36-
headless=False, # Show browser for demo (set True for production)
36+
headless=True, # Run browser in background for production
3737
)
3838

3939
# Set up bridge for event handling
@@ -65,6 +65,6 @@ async def handle_new_call(system: VoiceAgentSystem, call_request: CallRequest):
6565
print("🚀 Starting Voice Agent with Web Form Automation")
6666
print(f"📝 Will fill form at: {FORM_URL}")
6767
print("📞 Ready to receive calls...")
68-
print("\nNote: The browser will open when the first call is received.")
69-
print("Set headless=True in production to run in background.\n")
68+
print("\nNote: The browser will run in background (headless mode).")
69+
print("Form filling happens invisibly while processing voice calls.\n")
7070
app.run()

examples/integrations/cartesia/stagehand_form_filler.py

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ async def initialize(self):
234234

235235
# Configure Stagehand
236236
config = StagehandConfig(
237-
env="LOCAL", # Use local browser
237+
env="BROWSERBASE", # Use local browser
238238
model_name="google/gemini-2.0-flash-exp", # Fast model for form filling
239239
model_api_key=os.getenv("GEMINI_API_KEY"),
240240
)
@@ -259,9 +259,11 @@ async def initialize(self):
259259
raise
260260

261261
async def fill_field(self, question_id: str, answer: str) -> bool:
262-
"""Fill a specific form field based on the question ID and answer"""
262+
"""Fill a specific form field based on the question ID and answer (non-blocking)"""
263263
if not self.is_initialized:
264-
await self.initialize()
264+
# Initialize asynchronously without blocking
265+
init_task = asyncio.create_task(self.initialize())
266+
await init_task
265267

266268
try:
267269
# Get field mapping
@@ -274,31 +276,38 @@ async def fill_field(self, question_id: str, answer: str) -> bool:
274276
transformed_answer = self.field_mapper.transform_answer(question_id, answer)
275277
self.collected_data[question_id] = transformed_answer
276278

277-
logger.info(f"🖊️ Filling field '{field.label}' with: {transformed_answer}")
279+
logger.info(f"🖊️ Async filling field '{field.label}' with: {transformed_answer}")
280+
281+
# Create async task for the actual field filling
282+
fill_action = None
278283

279284
# Use Stagehand's natural language API to fill the field
280285
if field.field_type in [FieldType.TEXT, FieldType.EMAIL, FieldType.PHONE]:
281-
await self.page.act(f"Fill in the '{field.label}' field with: {transformed_answer}")
286+
fill_action = self.page.act(f"Fill in the '{field.label}' field with: {transformed_answer}")
282287

283288
elif field.field_type == FieldType.ADDRESS:
284-
await self.page.act(f"Fill in the address field with: {transformed_answer}")
289+
fill_action = self.page.act(f"Fill in the address field with: {transformed_answer}")
285290

286291
elif field.field_type == FieldType.TEXTAREA:
287-
await self.page.act(f"Type in the '{field.label}' text area: {transformed_answer}")
292+
fill_action = self.page.act(f"Type in the '{field.label}' text area: {transformed_answer}")
288293

289294
elif field.field_type in [FieldType.SELECT, FieldType.RADIO]:
290-
await self.page.act(f"Select '{transformed_answer}' for the '{field.label}' field")
295+
fill_action = self.page.act(f"Select '{transformed_answer}' for the '{field.label}' field")
291296

292297
elif field.field_type == FieldType.CHECKBOX:
293298
# For role selection, check the specific role checkbox
294299
if question_id == "role_selection":
295-
await self.page.act(f"Check the '{transformed_answer}' checkbox")
300+
fill_action = self.page.act(f"Check the '{transformed_answer}' checkbox")
296301
else:
297302
# For other checkboxes, check/uncheck based on answer
298303
if transformed_answer.lower() in ["yes", "true"]:
299-
await self.page.act(f"Check the '{field.label}' checkbox")
304+
fill_action = self.page.act(f"Check the '{field.label}' checkbox")
300305
else:
301-
await self.page.act(f"Uncheck the '{field.label}' checkbox")
306+
fill_action = self.page.act(f"Uncheck the '{field.label}' checkbox")
307+
308+
# Execute the fill action asynchronously
309+
if fill_action:
310+
await fill_action
302311

303312
return True
304313

@@ -334,38 +343,50 @@ async def fill_collected_data(self):
334343
await asyncio.sleep(0.5) # Small delay between fields
335344

336345
async def navigate_to_next_page(self):
337-
"""Navigate to the next page of the form if multi-page"""
346+
"""Navigate to the next page of the form if multi-page (non-blocking)"""
338347
try:
339-
await self.page.act("Click the Next or Continue button")
340-
await asyncio.sleep(2) # Wait for page transition
348+
# Create async task for navigation
349+
nav_task = asyncio.create_task(
350+
self.page.act("Click the Next or Continue button")
351+
)
352+
await nav_task
353+
354+
# Small async delay for page transition
355+
await asyncio.sleep(1.5)
341356
return True
342357
except Exception as e:
343358
logger.debug(f"No next button found or single-page form: {e}")
344359
return False
345360

346361
async def submit_form(self) -> bool:
347-
"""Submit the completed form"""
362+
"""Submit the completed form (fully async)"""
348363
try:
349364
logger.info("📤 Attempting to submit the form")
350365
logger.info(f"📊 Form has {len(self.collected_data)} fields already filled in real-time")
351366

352367
# Data has already been filled in real-time during conversation
353368
# Just navigate and submit
354369

355-
# Navigate through pages if needed
356-
await self.navigate_to_next_page()
370+
# Navigate through pages if needed (async)
371+
nav_result = await self.navigate_to_next_page()
357372

358-
# Submit the form
359-
await self.page.act("Click the Submit button")
373+
# Submit the form asynchronously
374+
submit_task = asyncio.create_task(
375+
self.page.act("Click the Submit button")
376+
)
377+
await submit_task
360378

361-
# Wait for submission confirmation
362-
await asyncio.sleep(3)
379+
# Wait for submission confirmation (non-blocking)
380+
await asyncio.sleep(2.5)
363381

364-
# Check for success message
382+
# Check for success message asynchronously
365383
try:
366-
success_check = await self.page.extract({
367-
"success_indicator": "boolean indicating if form was submitted successfully"
368-
})
384+
extract_task = asyncio.create_task(
385+
self.page.extract({
386+
"success_indicator": "boolean indicating if form was submitted successfully"
387+
})
388+
)
389+
success_check = await extract_task
369390

370391
if success_check and hasattr(success_check, 'success_indicator'):
371392
logger.info("✅ Form submitted successfully!")
@@ -376,8 +397,8 @@ async def submit_form(self) -> bool:
376397
except Exception as e:
377398
logger.warning(f"⚠️ Could not verify submission: {e}")
378399

379-
logger.warning("⚠️ Form submission uncertain, checking page state")
380-
return False
400+
logger.info("📝 Form submission process completed")
401+
return True # Assume success if no errors
381402

382403
except Exception as e:
383404
logger.error(f"❌ Error submitting form: {e}")

0 commit comments

Comments
 (0)