Skip to content

Commit cd40737

Browse files
Enhance screenshot tagging with mouse drag-and-drop support and add arrowhead drawing functionality
1 parent a16f024 commit cd40737

File tree

1 file changed

+46
-0
lines changed

1 file changed

+46
-0
lines changed

src/agentlab/agents/agent_utils.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from logging import warning
2+
from typing import Optional, Tuple
23

4+
import numpy as np
35
from PIL import Image, ImageDraw
46
from playwright.sync_api import Page
57

@@ -42,10 +44,37 @@ def tag_screenshot_with_action(screenshot: Image, action: str) -> Image:
4244
)
4345
except (ValueError, IndexError) as e:
4446
warning(f"Failed to parse action '{action}': {e}")
47+
48+
elif action.startswith("mouse_drag_and_drop"):
49+
try:
50+
func_name, parsed_args = parse_func_call_string(action)
51+
if func_name == "mouse_drag_and_drop" and parsed_args is not None:
52+
args, kwargs = parsed_args
53+
x1, y1, x2, y2 = None, None, None, None
54+
55+
if args and len(args) >= 4:
56+
# Positional arguments: mouse_drag_and_drop(x1, y1, x2, y2)
57+
x1, y1, x2, y2 = map(float, args[:4])
58+
elif kwargs:
59+
# Keyword arguments: mouse_drag_and_drop(from_x=x1, from_y=y1, to_x=x2, to_y=y2)
60+
x1 = float(kwargs.get("from_x", 0))
61+
y1 = float(kwargs.get("from_y", 0))
62+
x2 = float(kwargs.get("to_x", 0))
63+
y2 = float(kwargs.get("to_y", 0))
64+
65+
if all(coord is not None for coord in [x1, y1, x2, y2]):
66+
draw = ImageDraw.Draw(screenshot)
67+
# Draw the main line
68+
draw.line((x1, y1, x2, y2), fill="red", width=2)
69+
# Draw arrowhead at the end point using the helper function
70+
draw_arrowhead(draw, (x1, y1), (x2, y2))
71+
except (ValueError, IndexError) as e:
72+
warning(f"Failed to parse action '{action}': {e}")
4573
return screenshot
4674

4775

4876
def add_mouse_pointer_from_action(screenshot: Image, action: str) -> Image.Image:
77+
4978
if action.startswith("mouse_click"):
5079
try:
5180
coords = action[action.index("(") + 1 : action.index(")")].split(",")
@@ -85,6 +114,23 @@ def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
85114
return Image.alpha_composite(image.convert("RGBA"), overlay)
86115

87116

117+
def draw_arrowhead(draw, start, end, arrow_length=15, arrow_angle=30):
118+
from math import atan2, cos, radians, sin
119+
120+
angle = atan2(end[1] - start[1], end[0] - start[0])
121+
left = (
122+
end[0] - arrow_length * cos(angle - radians(arrow_angle)),
123+
end[1] - arrow_length * sin(angle - radians(arrow_angle)),
124+
)
125+
right = (
126+
end[0] - arrow_length * cos(angle + radians(arrow_angle)),
127+
end[1] - arrow_length * sin(angle + radians(arrow_angle)),
128+
)
129+
draw.line([end, left], fill="red", width=4)
130+
draw.line([end, right], fill="red", width=4)
131+
132+
133+
88134
def draw_click_indicator(image: Image.Image, x: int, y: int) -> Image.Image:
89135
"""
90136
Draws a click indicator (+ shape with disconnected lines) at (x, y) on the image.

0 commit comments

Comments
 (0)