Skip to content

Problem in click action/PDF interactionΒ #321

@DsDastgheib

Description

@DsDastgheib

It seems the click action does not work when agent want to click on View Pdf for archive article.
Here is a sample code (parse_content_to_elements, and find_matching_anchor borrowed from here)

import re
import browsergym.core  # register the openended task as a gym environment
from browsergym.utils.obs import flatten_axtree_to_str
from dataclasses import dataclass, field

def parse_content_to_elements(content: str):
    """Parse the observation content into a dictionary mapping anchors to their descriptions"""
    elements = {}
    current_anchor = None
    description_lines = []

    for line in content.split('\n'):
        line = line.strip()
        if not line:
            continue

        # Check for anchor line
        anchor_match = re.match(r'\[(\d+)\](.*)', line)
        if anchor_match:
            # Save previous element if it exists
            if current_anchor and description_lines:
                elements[current_anchor] = ' '.join(description_lines)

            # Start new element
            current_anchor = anchor_match.group(1)
            description_lines = [anchor_match.group(2).strip()]
        else:
            # Add to current description if we have an anchor
            if current_anchor:
                description_lines.append(line)

    # Save last element
    if current_anchor and description_lines:
        elements[current_anchor] = ' '.join(description_lines)

    return elements

def find_matching_anchor(content: str, selector: str):
    """Find the anchor ID that matches the given selector description"""
    elements = parse_content_to_elements(content)

    # Clean up selector and create a pattern
    selector = selector.lower().strip()

    for anchor, description in elements.items():
        description = description.lower().strip()
        if selector in description:
            return anchor

    return None


if __name__ == '__main__':


    env = gym.make(
        "browsergym/openended",
        task_kwargs={"start_url": "https://www.google.com/"},  # starting URL
        wait_for_user_message=False,  # wait for a user message after each agent message sent to the chat
    )
    # run the environment <> agent loop until termination
    obs, info = env.reset()





    action0 = 'goto("https://arxiv.org/abs/1706.03762")'
    obs, reward, terminated, truncated, info = env.step(action0)
    print(obs["url"])

    action1 = "noop(2000)"
    obs, reward, terminated, truncated, info = env.step(action1)
    print(obs["url"])

    extra_element_properties={}
    select = find_matching_anchor(flatten_axtree_to_str(obs["axtree_object"],
                                                        extra_properties=extra_element_properties,
                                                        with_clickable=True,
                                                        skip_generic=True,
                                                        filter_visible_only=True,
                                                        ), "link 'View PDF',")
    action2 = f'click("{select}", "left")'
    print(action2)
    obs, reward, terminated, truncated, info = env.step(action2)
    print(obs["url"])

    # release the environment
    env.close()

The output is as

https://arxiv.org/abs/1706.03762
https://arxiv.org/abs/1706.03762
https://arxiv.org/abs/1706.03762

We can see after the click action the URL doesn't change while we expect we redirect to https://arxiv.org/pdf/1706.03762.
I've tested for couple of archive articles and this did not work.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions