ServiceNow
diff --git a/‎.github/workflows/darglint.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/darglint.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/python_version_compatibility.yml‎
Lines changed: 40 additions & 0 deletions b/‎.github/workflows/python_version_compatibility.yml‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎main_workarena_debug.py‎
Lines changed: 77 additions & 0 deletions b/‎main_workarena_debug.py‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎src/agentlab/agents/agent_args.py‎
Lines changed: 2 additions & 2 deletions b/‎src/agentlab/agents/agent_args.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/agentlab/agents/agent_utils.py‎
Lines changed: 130 additions & 0 deletions b/‎src/agentlab/agents/agent_utils.py‎
Lines changed: 130 additions & 0 deletions
diff --git a/‎src/agentlab/agents/debug_agent.py‎
Lines changed: 90 additions & 0 deletions b/‎src/agentlab/agents/debug_agent.py‎
Lines changed: 90 additions & 0 deletions
@@ -21,7 +21,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.12'
           cache: 'pip' # caching pip dependencies
 
       - name: Pip install
 
@@ -0,0 +1,40 @@
+name: Python Compatibility (Info Only)
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  info-check:
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    steps:
+    - uses: actions/checkout@v4
+    
+    # Optional: Cache uv for faster runs
+    - name: Cache uv
+      uses: actions/cache@v4
+      with:
+        path: ~/.cargo/bin/uv
+        key: uv-${{ runner.os }}
+    
+    - name: Install uv
+      run: |
+        if [ ! -f ~/.cargo/bin/uv ]; then
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+        fi
+        
+    - name: Check Python ${{ matrix.python-version }}
+      continue-on-error: true
+      run: |
+        export PATH="$HOME/.cargo/bin:$PATH"
+        if uvx --python ${{ matrix.python-version }} --from python --with-requirements requirements.txt python -c "print('✅ Compatible')"; then
+          echo "✅ Python ${{ matrix.python-version }} works"
+        else
+          echo "❌ Python ${{ matrix.python-version }} incompatible"
+        fi
@@ -171,3 +171,7 @@ results/
 outputs/
 miniwob-plusplus/
 .miniwob-server.pid
+debugging_results/
+
+# working files
+experiments/*
@@ -0,0 +1,77 @@
+"""
+Note: This script is a convenience script to launch experiments instead of using
+the command line.
+
+Copy this script and modify at will, but don't push your changes to the
+repository.
+"""
+
+import logging
+from copy import deepcopy
+
+import bgym
+
+from agentlab.agents.tool_use_agent.tool_use_agent import (
+    DEFAULT_PROMPT_CONFIG,
+    GPT_4_1,
+    ToolUseAgentArgs,
+)
+from agentlab.experiments.study import Study
+
+logging.getLogger().setLevel(logging.INFO)
+
+config = deepcopy(DEFAULT_PROMPT_CONFIG)
+# config.keep_last_n_obs = 1
+config.obs.use_som = True
+
+
+agent_configs = [
+    ToolUseAgentArgs(
+        model_args=GPT_4_1,
+        config=config,
+    ),
+    # ToolUseAgentArgs(
+    #     model_args=GPT_4_1,
+    #     config=config,
+    # ),
+]
+
+for agent_config in agent_configs:
+    agent_config.config.action_subsets = ("workarena",)  # use the workarena action set
+
+
+# ## select the benchmark to run on
+# benchmark = "miniwob_tiny_test"
+benchmark = "workarena_l1"
+
+
+benchmark = bgym.DEFAULT_BENCHMARKS[benchmark](n_repeats=4)  # type: bgym.Benchmark
+benchmark = benchmark.subset_from_glob("task_name", "*create*")
+
+# for env_args in benchmark.env_args_list:
+#     print(env_args.task_name)
+#     env_args.max_steps = 15
+
+relaunch = False
+
+## Number of parallel jobs
+n_jobs = 10  # Make sure to use 1 job when debugging in VSCode
+parallel_backend = "ray"
+# parallel_backend = "sequential"  # activate sequential backend for debugging in VSCode
+
+if __name__ == "__main__":  # necessary for dask backend
+
+    if relaunch:
+        #  relaunch an existing study
+        study = Study.load_most_recent(contains=None)
+        study.find_incomplete(include_errors=True)
+
+    else:
+        study = Study(agent_configs, benchmark, logging_level_stdout=logging.WARNING)
+
+    study.run(
+        n_jobs=n_jobs,
+        parallel_backend=parallel_backend,  # "ray", "joblib" or "sequential"
+        strict_reproducibility=False,
+        n_relaunch=3,
+    )
@@ -1,5 +1,5 @@
 import bgym
-from bgym import AbstractAgentArgs
+from bgym import AbstractAgentArgs, Benchmark
 
 
 class AgentArgs(AbstractAgentArgs):
@@ -14,7 +14,7 @@ class MyAgentArgs(AgentArgs):
     Note: for working properly with AgentXRay, the arguments need to be serializable and hasable.
     """
 
-    def set_benchmark(self, benchmark: bgym.Benchmark, demo_mode: bool):
+    def set_benchmark(self, benchmark: Benchmark, demo_mode: bool):
         """Optional method to set benchmark specific flags.
 
         This allows the agent to have minor adjustments based on the benchmark.
 
@@ -0,0 +1,130 @@
+from PIL import Image, ImageDraw
+from playwright.sync_api import Page
+
+
+def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
+    """
+    Draws a semi-transparent mouse pointer at (x, y) on the image.
+    Returns a new image with the pointer drawn.
+
+    Args:
+        image: The image to draw the mouse pointer on.
+        x: The x coordinate for the mouse pointer.
+        y: The y coordinate for the mouse pointer.
+
+    Returns:
+        A new image with the mouse pointer drawn.
+    """
+    pointer_size = 20  # Length of the pointer
+    overlay = image.convert("RGBA").copy()
+    draw = ImageDraw.Draw(overlay)
+
+    # Define pointer shape (a simple arrow)
+    pointer_shape = [
+        (x, y),
+        (x + pointer_size, y + pointer_size // 2),
+        (x + pointer_size // 2, y + pointer_size // 2),
+        (x + pointer_size // 2, y + pointer_size),
+    ]
+
+    draw.polygon(pointer_shape, fill=(0, 0, 0, 128))  # 50% transparent black
+
+    return Image.alpha_composite(image.convert("RGBA"), overlay)
+
+
+def draw_arrowhead(draw, start, end, arrow_length=15, arrow_angle=30):
+    from math import atan2, cos, radians, sin
+
+    angle = atan2(end[1] - start[1], end[0] - start[0])
+    left = (
+        end[0] - arrow_length * cos(angle - radians(arrow_angle)),
+        end[1] - arrow_length * sin(angle - radians(arrow_angle)),
+    )
+    right = (
+        end[0] - arrow_length * cos(angle + radians(arrow_angle)),
+        end[1] - arrow_length * sin(angle + radians(arrow_angle)),
+    )
+    draw.line([end, left], fill="red", width=4)
+    draw.line([end, right], fill="red", width=4)
+
+
+def draw_click_indicator(image: Image.Image, x: int, y: int) -> Image.Image:
+    """
+    Draws a click indicator (+ shape with disconnected lines) at (x, y) on the image.
+    Returns a new image with the click indicator drawn.
+
+    Args:
+        image: The image to draw the click indicator on.
+        x: The x coordinate for the click indicator.
+        y: The y coordinate for the click indicator.
+
+    Returns:
+        A new image with the click indicator drawn.
+    """
+    line_length = 10  # Length of each line segment
+    gap = 4  # Gap from center point
+    line_width = 2  # Thickness of lines
+
+    overlay = image.convert("RGBA").copy()
+    draw = ImageDraw.Draw(overlay)
+
+    # Draw 4 lines forming a + shape with gaps in the center
+    # Each line has a white outline and black center for visibility on any background
+
+    # Top line
+    draw.line(
+        [(x, y - gap - line_length), (x, y - gap)], fill=(255, 255, 255, 200), width=line_width + 2
+    )  # White outline
+    draw.line(
+        [(x, y - gap - line_length), (x, y - gap)], fill=(0, 0, 0, 255), width=line_width
+    )  # Black center
+
+    # Bottom line
+    draw.line(
+        [(x, y + gap), (x, y + gap + line_length)], fill=(255, 255, 255, 200), width=line_width + 2
+    )  # White outline
+    draw.line(
+        [(x, y + gap), (x, y + gap + line_length)], fill=(0, 0, 0, 255), width=line_width
+    )  # Black center
+
+    # Left line
+    draw.line(
+        [(x - gap - line_length, y), (x - gap, y)], fill=(255, 255, 255, 200), width=line_width + 2
+    )  # White outline
+    draw.line(
+        [(x - gap - line_length, y), (x - gap, y)], fill=(0, 0, 0, 255), width=line_width
+    )  # Black center
+
+    # Right line
+    draw.line(
+        [(x + gap, y), (x + gap + line_length, y)], fill=(255, 255, 255, 200), width=line_width + 2
+    )  # White outline
+    draw.line(
+        [(x + gap, y), (x + gap + line_length, y)], fill=(0, 0, 0, 255), width=line_width
+    )  # Black center
+
+    return Image.alpha_composite(image.convert("RGBA"), overlay)
+
+
+def zoom_webpage(page: Page, zoom_factor: float = 1.5):
+    """
+    Zooms the webpage to the specified zoom factor.
+
+    NOTE: Click actions with bid doesn't work properly when zoomed in.
+
+    Args:
+        page: The Playwright Page object.
+        zoom_factor: The zoom factor to apply (default is 1.5).
+
+    Returns:
+        Page: The modified Playwright Page object.
+
+    Raises:
+        ValueError: If zoom_factor is less than or equal to 0.
+    """
+
+    if zoom_factor <= 0:
+        raise ValueError("Zoom factor must be greater than 0.")
+
+    page.evaluate(f"document.documentElement.style.zoom='{zoom_factor*100}%'")
+    return page
@@ -0,0 +1,90 @@
+from copy import deepcopy
+from dataclasses import asdict, dataclass
+from functools import partial
+
+import bgym
+from browsergym.experiments.agent import Agent, AgentInfo
+from browsergym.utils.obs import flatten_axtree_to_str, flatten_dom_to_str, overlay_som, prune_html
+
+from agentlab.agents.agent_args import AgentArgs
+from agentlab.llm.chat_api import BaseModelArgs
+from agentlab.llm.llm_utils import ParseError, image_to_png_base64_url, parse_html_tags_raise, retry
+from agentlab.llm.tracking import cost_tracker_decorator
+
+
+@dataclass
+class DebugAgentArgs(AgentArgs):
+
+    def __post_init__(self):
+        try:  # some attributes might be temporarily args.CrossProd for hyperparameter generation
+            self.agent_name = f"debug".replace("/", "_")
+        except AttributeError:
+            pass
+        self.action_set_args = bgym.DEFAULT_BENCHMARKS[
+            "miniwob_tiny_test"
+        ]().high_level_action_set_args
+        self.use_html = False
+
+    def set_benchmark(self, benchmark: bgym.Benchmark, demo_mode):
+        if benchmark.name.startswith("miniwob"):
+            self.use_html = True
+        self.action_set_args = benchmark.high_level_action_set_args
+
+    def make_agent(self):
+        return DebugAgent(self.action_set_args, use_html=self.use_html)
+
+
+class DebugAgent(Agent):
+    def __init__(
+        self,
+        action_set_args,
+        use_html=False,
+    ):
+        self.action_set = action_set_args.make_action_set()
+        self.use_html = use_html
+
+    def obs_preprocessor(self, obs):
+        obs = deepcopy(obs)
+        obs["dom_txt"] = flatten_dom_to_str(
+            obs["dom_object"],
+            extra_properties=obs["extra_element_properties"],
+            with_visible=True,
+            with_clickable=True,
+            with_center_coords=True,
+            with_bounding_box_coords=True,
+            filter_visible_only=False,
+            filter_with_bid_only=False,
+            filter_som_only=False,
+        )
+        obs["axtree_txt"] = flatten_axtree_to_str(
+            obs["axtree_object"],
+            extra_properties=obs["extra_element_properties"],
+            with_visible=True,
+            with_clickable=True,
+            with_center_coords=True,
+            with_bounding_box_coords=True,
+            filter_visible_only=False,
+            filter_with_bid_only=False,
+            filter_som_only=False,
+        )
+        obs["pruned_html"] = prune_html(obs["dom_txt"])
+        obs["screenshot_som"] = overlay_som(
+            obs["screenshot"], extra_properties=obs["extra_element_properties"]
+        )
+        return obs
+
+    def get_action(self, obs):
+
+        # print(obs["pruned_html"])
+        print("\n")
+        observation = obs["pruned_html"] if self.use_html else obs["axtree_txt"]
+        action = input(observation + "\n")
+        agent_info = AgentInfo(
+            think="nope",
+            chat_messages=[],
+            stats={},
+        )
+        return action, agent_info
+
+
+DEBUG_AGENT = DebugAgentArgs()