Skip to content

Commit 7535c4f

Browse files
authored
fixed target text capture for a tags (#123)
* fixed target text capture for a tags * fixed linting issue * removed js check in a link
1 parent 263ff8f commit 7535c4f

File tree

5 files changed

+47
-15
lines changed

5 files changed

+47
-15
lines changed

workflows/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "workflow-use"
3-
version = "0.2.2"
3+
version = "0.2.3"
44
authors = [{ name = "Gregor Zunic" }]
55
description = "Create, edit, run deterministic workflows"
66
readme = "README.md"

workflows/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

workflows/workflow_use/healing/deterministic_converter.py

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -263,14 +263,33 @@ def _normalize_element_data(self, raw_data: Any) -> Dict[str, Any]:
263263

264264
# If it's already a dict from selector_map or interactive_elements
265265
if isinstance(raw_data, dict):
266+
# Extract tag name first
267+
tag_name = raw_data.get('tag_name') or raw_data.get('node_name') or ''
268+
269+
# Extract text value from multiple possible fields, filtering out browser-use bugs
270+
text_value = ''
271+
for text_field in ['text', 'inner_text', 'textContent', 'innerText', 'node_value']:
272+
potential_text = raw_data.get(text_field, '').strip()
273+
if potential_text:
274+
# IMPORTANT: browser-use sometimes provides JavaScript href as 'text' for anchor tags
275+
# Skip this and try other fields (case-insensitive check)
276+
if tag_name == 'a' and potential_text.lower().startswith('javascript:'):
277+
continue
278+
text_value = potential_text
279+
break
280+
266281
# Extract common fields with fallbacks
267282
result = {
268-
'node_name': raw_data.get('tag_name') or raw_data.get('node_name') or '',
269-
'node_value': raw_data.get('text') or raw_data.get('node_value') or '',
283+
'node_name': tag_name,
284+
'node_value': text_value,
270285
'attributes': raw_data.get('attributes', {}),
271286
'xpath': raw_data.get('xpath') or raw_data.get('x_path') or '',
272287
}
273288

289+
# IMPORTANT: Preserve selector_strategies for semantic/deterministic element finding
290+
if 'selector_strategies' in raw_data:
291+
result['selector_strategies'] = raw_data['selector_strategies']
292+
274293
# Compute element hash if we have the data
275294
tag_name = result['node_name'].lower()
276295
# Use xpath or a combination of attributes as hash source
@@ -389,10 +408,10 @@ def _convert_action_to_step(
389408
390409
Mapping (browser-use action names):
391410
- navigate → navigation step
392-
- input_text → input step with target_text
393-
- click → click step with target_text
411+
- input, input_text → input step with target_text
412+
- click, click_element → click step with target_text
394413
- send_keys → keypress step
395-
- extract_content → extract_page_content step
414+
- extract, extract_content, extract_page_content → extract_page_content step
396415
- scroll → scroll step
397416
"""
398417
agent_context = agent_context or {}
@@ -412,8 +431,8 @@ def _convert_action_to_step(
412431

413432
return step
414433

415-
# Input text actions
416-
elif action_type == 'input_text':
434+
# Input text actions (browser-use can use either 'input' or 'input_text')
435+
elif action_type in ['input', 'input_text']:
417436
target_text = self._extract_target_text(element_data, action_dict)
418437
# Ensure target_text is never empty
419438
if not target_text:
@@ -502,10 +521,16 @@ def _convert_action_to_step(
502521

503522
return step
504523

505-
# Extract content actions
506-
elif action_type in ['extract_page_content', 'extract_content']:
524+
# Extract content actions (browser-use can use 'extract', 'extract_content', or 'extract_page_content')
525+
elif action_type in ['extract', 'extract_page_content', 'extract_content']:
507526
# Browser-use may use different field names for extraction goal
508-
goal = action_dict.get('value') or action_dict.get('goal') or action_dict.get('content') or 'page content'
527+
goal = (
528+
action_dict.get('value')
529+
or action_dict.get('goal')
530+
or action_dict.get('content')
531+
or action_dict.get('query')
532+
or 'page content'
533+
)
509534
return {
510535
'type': 'extract_page_content',
511536
'goal': goal,

workflows/workflow_use/healing/service.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,6 @@ async def act(self, action, browser_session, *args, **kwargs):
488488
controller=CapturingController(self.selector_generator), # Pass selector_generator to controller
489489
enable_memory=False,
490490
max_failures=10,
491-
tool_calling_method='auto',
492491
)
493492

494493
# Store the element map for later use

workflows/workflow_use/workflow/semantic_executor.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -604,11 +604,15 @@ async def execute_click_step(self, step: ClickStep) -> ActionResult:
604604
selector_to_use = element_info['selectors']
605605
logger.info(f"Using semantic mapping: '{target_identifier}' -> {selector_to_use}")
606606

607-
# Final fallback to original CSS selector
607+
# Final fallback to original CSS selector or XPath
608608
if not selector_to_use:
609609
if step.cssSelector:
610610
selector_to_use = step.cssSelector
611611
logger.info(f'Falling back to original CSS selector: {selector_to_use}')
612+
elif hasattr(step, 'xpath') and step.xpath:
613+
# Try XPath as fallback if CSS selector is not available
614+
selector_to_use = f'xpath={step.xpath}'
615+
logger.info(f'Falling back to XPath selector: {step.xpath}')
612616
else:
613617
# Enhanced error message with debugging info
614618
available_texts = list(self.current_mapping.keys())[:15] # Show first 15 available options
@@ -1176,11 +1180,15 @@ async def execute_input_step(self, step: InputStep) -> ActionResult:
11761180
selector_to_use = element_info['selectors']
11771181
logger.info(f"Using semantic mapping: '{target_identifier}' -> {selector_to_use}")
11781182

1179-
# Final fallback to original CSS selector
1183+
# Final fallback to original CSS selector or XPath
11801184
if not selector_to_use:
11811185
if step.cssSelector:
11821186
selector_to_use = step.cssSelector
11831187
logger.info(f'Falling back to original CSS selector: {selector_to_use}')
1188+
elif hasattr(step, 'xpath') and step.xpath:
1189+
# Try XPath as fallback if CSS selector is not available
1190+
selector_to_use = f'xpath={step.xpath}'
1191+
logger.info(f'Falling back to XPath selector: {step.xpath}')
11841192
else:
11851193
# Enhanced error message with debugging info
11861194
available_texts = list(self.current_mapping.keys())[:15] # Show first 15 available options

0 commit comments

Comments
 (0)