Skip to content

Commit 23ac63d

Browse files
Merge branch 'main' into agentlab-controller
2 parents bdd8411 + d92e0bf commit 23ac63d

38 files changed

+4234
-174
lines changed

.github/workflows/darglint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
- name: Set up Python
2222
uses: actions/setup-python@v5
2323
with:
24-
python-version: '3.10'
24+
python-version: '3.12'
2525
cache: 'pip' # caching pip dependencies
2626

2727
- name: Pip install
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: Python Compatibility (Info Only)
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
9+
jobs:
10+
info-check:
11+
runs-on: ubuntu-latest
12+
continue-on-error: true
13+
strategy:
14+
matrix:
15+
python-version: ["3.10", "3.11", "3.12"]
16+
steps:
17+
- uses: actions/checkout@v4
18+
19+
# Optional: Cache uv for faster runs
20+
- name: Cache uv
21+
uses: actions/cache@v4
22+
with:
23+
path: ~/.cargo/bin/uv
24+
key: uv-${{ runner.os }}
25+
26+
- name: Install uv
27+
run: |
28+
if [ ! -f ~/.cargo/bin/uv ]; then
29+
curl -LsSf https://astral.sh/uv/install.sh | sh
30+
fi
31+
32+
- name: Check Python ${{ matrix.python-version }}
33+
continue-on-error: true
34+
run: |
35+
export PATH="$HOME/.cargo/bin:$PATH"
36+
if uvx --python ${{ matrix.python-version }} --from python --with-requirements requirements.txt python -c "print('✅ Compatible')"; then
37+
echo "✅ Python ${{ matrix.python-version }} works"
38+
else
39+
echo "❌ Python ${{ matrix.python-version }} incompatible"
40+
fi

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,7 @@ results/
171171
outputs/
172172
miniwob-plusplus/
173173
.miniwob-server.pid
174+
debugging_results/
175+
176+
# working files
177+
experiments/*

main_workarena_debug.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""
2+
Note: This script is a convenience script to launch experiments instead of using
3+
the command line.
4+
5+
Copy this script and modify at will, but don't push your changes to the
6+
repository.
7+
"""
8+
9+
import logging
10+
from copy import deepcopy
11+
12+
import bgym
13+
14+
from agentlab.agents.tool_use_agent.tool_use_agent import (
15+
DEFAULT_PROMPT_CONFIG,
16+
GPT_4_1,
17+
ToolUseAgentArgs,
18+
)
19+
from agentlab.experiments.study import Study
20+
21+
logging.getLogger().setLevel(logging.INFO)
22+
23+
config = deepcopy(DEFAULT_PROMPT_CONFIG)
24+
# config.keep_last_n_obs = 1
25+
config.obs.use_som = True
26+
27+
28+
agent_configs = [
29+
ToolUseAgentArgs(
30+
model_args=GPT_4_1,
31+
config=config,
32+
),
33+
# ToolUseAgentArgs(
34+
# model_args=GPT_4_1,
35+
# config=config,
36+
# ),
37+
]
38+
39+
for agent_config in agent_configs:
40+
agent_config.config.action_subsets = ("workarena",) # use the workarena action set
41+
42+
43+
# ## select the benchmark to run on
44+
# benchmark = "miniwob_tiny_test"
45+
benchmark = "workarena_l1"
46+
47+
48+
benchmark = bgym.DEFAULT_BENCHMARKS[benchmark](n_repeats=4) # type: bgym.Benchmark
49+
benchmark = benchmark.subset_from_glob("task_name", "*create*")
50+
51+
# for env_args in benchmark.env_args_list:
52+
# print(env_args.task_name)
53+
# env_args.max_steps = 15
54+
55+
relaunch = False
56+
57+
## Number of parallel jobs
58+
n_jobs = 10 # Make sure to use 1 job when debugging in VSCode
59+
parallel_backend = "ray"
60+
# parallel_backend = "sequential" # activate sequential backend for debugging in VSCode
61+
62+
if __name__ == "__main__": # necessary for dask backend
63+
64+
if relaunch:
65+
# relaunch an existing study
66+
study = Study.load_most_recent(contains=None)
67+
study.find_incomplete(include_errors=True)
68+
69+
else:
70+
study = Study(agent_configs, benchmark, logging_level_stdout=logging.WARNING)
71+
72+
study.run(
73+
n_jobs=n_jobs,
74+
parallel_backend=parallel_backend, # "ray", "joblib" or "sequential"
75+
strict_reproducibility=False,
76+
n_relaunch=3,
77+
)

src/agentlab/agents/agent_args.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import bgym
2-
from bgym import AbstractAgentArgs
2+
from bgym import AbstractAgentArgs, Benchmark
33

44

55
class AgentArgs(AbstractAgentArgs):
@@ -14,7 +14,7 @@ class MyAgentArgs(AgentArgs):
1414
Note: for working properly with AgentXRay, the arguments need to be serializable and hasable.
1515
"""
1616

17-
def set_benchmark(self, benchmark: bgym.Benchmark, demo_mode: bool):
17+
def set_benchmark(self, benchmark: Benchmark, demo_mode: bool):
1818
"""Optional method to set benchmark specific flags.
1919
2020
This allows the agent to have minor adjustments based on the benchmark.

src/agentlab/agents/agent_utils.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
from PIL import Image, ImageDraw
2+
from playwright.sync_api import Page
3+
4+
5+
def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
6+
"""
7+
Draws a semi-transparent mouse pointer at (x, y) on the image.
8+
Returns a new image with the pointer drawn.
9+
10+
Args:
11+
image: The image to draw the mouse pointer on.
12+
x: The x coordinate for the mouse pointer.
13+
y: The y coordinate for the mouse pointer.
14+
15+
Returns:
16+
A new image with the mouse pointer drawn.
17+
"""
18+
pointer_size = 20 # Length of the pointer
19+
overlay = image.convert("RGBA").copy()
20+
draw = ImageDraw.Draw(overlay)
21+
22+
# Define pointer shape (a simple arrow)
23+
pointer_shape = [
24+
(x, y),
25+
(x + pointer_size, y + pointer_size // 2),
26+
(x + pointer_size // 2, y + pointer_size // 2),
27+
(x + pointer_size // 2, y + pointer_size),
28+
]
29+
30+
draw.polygon(pointer_shape, fill=(0, 0, 0, 128)) # 50% transparent black
31+
32+
return Image.alpha_composite(image.convert("RGBA"), overlay)
33+
34+
35+
def draw_arrowhead(draw, start, end, arrow_length=15, arrow_angle=30):
36+
from math import atan2, cos, radians, sin
37+
38+
angle = atan2(end[1] - start[1], end[0] - start[0])
39+
left = (
40+
end[0] - arrow_length * cos(angle - radians(arrow_angle)),
41+
end[1] - arrow_length * sin(angle - radians(arrow_angle)),
42+
)
43+
right = (
44+
end[0] - arrow_length * cos(angle + radians(arrow_angle)),
45+
end[1] - arrow_length * sin(angle + radians(arrow_angle)),
46+
)
47+
draw.line([end, left], fill="red", width=4)
48+
draw.line([end, right], fill="red", width=4)
49+
50+
51+
def draw_click_indicator(image: Image.Image, x: int, y: int) -> Image.Image:
52+
"""
53+
Draws a click indicator (+ shape with disconnected lines) at (x, y) on the image.
54+
Returns a new image with the click indicator drawn.
55+
56+
Args:
57+
image: The image to draw the click indicator on.
58+
x: The x coordinate for the click indicator.
59+
y: The y coordinate for the click indicator.
60+
61+
Returns:
62+
A new image with the click indicator drawn.
63+
"""
64+
line_length = 10 # Length of each line segment
65+
gap = 4 # Gap from center point
66+
line_width = 2 # Thickness of lines
67+
68+
overlay = image.convert("RGBA").copy()
69+
draw = ImageDraw.Draw(overlay)
70+
71+
# Draw 4 lines forming a + shape with gaps in the center
72+
# Each line has a white outline and black center for visibility on any background
73+
74+
# Top line
75+
draw.line(
76+
[(x, y - gap - line_length), (x, y - gap)], fill=(255, 255, 255, 200), width=line_width + 2
77+
) # White outline
78+
draw.line(
79+
[(x, y - gap - line_length), (x, y - gap)], fill=(0, 0, 0, 255), width=line_width
80+
) # Black center
81+
82+
# Bottom line
83+
draw.line(
84+
[(x, y + gap), (x, y + gap + line_length)], fill=(255, 255, 255, 200), width=line_width + 2
85+
) # White outline
86+
draw.line(
87+
[(x, y + gap), (x, y + gap + line_length)], fill=(0, 0, 0, 255), width=line_width
88+
) # Black center
89+
90+
# Left line
91+
draw.line(
92+
[(x - gap - line_length, y), (x - gap, y)], fill=(255, 255, 255, 200), width=line_width + 2
93+
) # White outline
94+
draw.line(
95+
[(x - gap - line_length, y), (x - gap, y)], fill=(0, 0, 0, 255), width=line_width
96+
) # Black center
97+
98+
# Right line
99+
draw.line(
100+
[(x + gap, y), (x + gap + line_length, y)], fill=(255, 255, 255, 200), width=line_width + 2
101+
) # White outline
102+
draw.line(
103+
[(x + gap, y), (x + gap + line_length, y)], fill=(0, 0, 0, 255), width=line_width
104+
) # Black center
105+
106+
return Image.alpha_composite(image.convert("RGBA"), overlay)
107+
108+
109+
def zoom_webpage(page: Page, zoom_factor: float = 1.5):
110+
"""
111+
Zooms the webpage to the specified zoom factor.
112+
113+
NOTE: Click actions with bid doesn't work properly when zoomed in.
114+
115+
Args:
116+
page: The Playwright Page object.
117+
zoom_factor: The zoom factor to apply (default is 1.5).
118+
119+
Returns:
120+
Page: The modified Playwright Page object.
121+
122+
Raises:
123+
ValueError: If zoom_factor is less than or equal to 0.
124+
"""
125+
126+
if zoom_factor <= 0:
127+
raise ValueError("Zoom factor must be greater than 0.")
128+
129+
page.evaluate(f"document.documentElement.style.zoom='{zoom_factor*100}%'")
130+
return page

src/agentlab/agents/debug_agent.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
from copy import deepcopy
2+
from dataclasses import asdict, dataclass
3+
from functools import partial
4+
5+
import bgym
6+
from browsergym.experiments.agent import Agent, AgentInfo
7+
from browsergym.utils.obs import flatten_axtree_to_str, flatten_dom_to_str, overlay_som, prune_html
8+
9+
from agentlab.agents.agent_args import AgentArgs
10+
from agentlab.llm.chat_api import BaseModelArgs
11+
from agentlab.llm.llm_utils import ParseError, image_to_png_base64_url, parse_html_tags_raise, retry
12+
from agentlab.llm.tracking import cost_tracker_decorator
13+
14+
15+
@dataclass
16+
class DebugAgentArgs(AgentArgs):
17+
18+
def __post_init__(self):
19+
try: # some attributes might be temporarily args.CrossProd for hyperparameter generation
20+
self.agent_name = f"debug".replace("/", "_")
21+
except AttributeError:
22+
pass
23+
self.action_set_args = bgym.DEFAULT_BENCHMARKS[
24+
"miniwob_tiny_test"
25+
]().high_level_action_set_args
26+
self.use_html = False
27+
28+
def set_benchmark(self, benchmark: bgym.Benchmark, demo_mode):
29+
if benchmark.name.startswith("miniwob"):
30+
self.use_html = True
31+
self.action_set_args = benchmark.high_level_action_set_args
32+
33+
def make_agent(self):
34+
return DebugAgent(self.action_set_args, use_html=self.use_html)
35+
36+
37+
class DebugAgent(Agent):
38+
def __init__(
39+
self,
40+
action_set_args,
41+
use_html=False,
42+
):
43+
self.action_set = action_set_args.make_action_set()
44+
self.use_html = use_html
45+
46+
def obs_preprocessor(self, obs):
47+
obs = deepcopy(obs)
48+
obs["dom_txt"] = flatten_dom_to_str(
49+
obs["dom_object"],
50+
extra_properties=obs["extra_element_properties"],
51+
with_visible=True,
52+
with_clickable=True,
53+
with_center_coords=True,
54+
with_bounding_box_coords=True,
55+
filter_visible_only=False,
56+
filter_with_bid_only=False,
57+
filter_som_only=False,
58+
)
59+
obs["axtree_txt"] = flatten_axtree_to_str(
60+
obs["axtree_object"],
61+
extra_properties=obs["extra_element_properties"],
62+
with_visible=True,
63+
with_clickable=True,
64+
with_center_coords=True,
65+
with_bounding_box_coords=True,
66+
filter_visible_only=False,
67+
filter_with_bid_only=False,
68+
filter_som_only=False,
69+
)
70+
obs["pruned_html"] = prune_html(obs["dom_txt"])
71+
obs["screenshot_som"] = overlay_som(
72+
obs["screenshot"], extra_properties=obs["extra_element_properties"]
73+
)
74+
return obs
75+
76+
def get_action(self, obs):
77+
78+
# print(obs["pruned_html"])
79+
print("\n")
80+
observation = obs["pruned_html"] if self.use_html else obs["axtree_txt"]
81+
action = input(observation + "\n")
82+
agent_info = AgentInfo(
83+
think="nope",
84+
chat_messages=[],
85+
stats={},
86+
)
87+
return action, agent_info
88+
89+
90+
DEBUG_AGENT = DebugAgentArgs()

0 commit comments

Comments
 (0)