Skip to content

Commit a17c119

Browse files
committed
Add draw_click_indicator function to enhance image annotation with click indicators
1 parent 3fd1e59 commit a17c119

File tree

1 file changed

+52
-4
lines changed

1 file changed

+52
-4
lines changed

src/agentlab/agents/agent_utils.py

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
from logging import warning
2-
from playwright.sync_api import Page
32

43
from PIL import Image, ImageDraw
5-
from logging import warning
6-
from playwright.sync_api import Page
7-
4+
from playwright.sync_api import Page
85

96
"""
107
This module contains utility functions for handling observations and actions in the context of agent interactions.
@@ -87,6 +84,57 @@ def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
8784

8885
return Image.alpha_composite(image.convert("RGBA"), overlay)
8986

87+
88+
def draw_click_indicator(image: Image.Image, x: int, y: int) -> Image.Image:
89+
"""
90+
Draws a click indicator (+ shape with disconnected lines) at (x, y) on the image.
91+
Returns a new image with the click indicator drawn.
92+
"""
93+
line_length = 10 # Length of each line segment
94+
gap = 4 # Gap from center point
95+
line_width = 2 # Thickness of lines
96+
97+
overlay = image.convert("RGBA").copy()
98+
draw = ImageDraw.Draw(overlay)
99+
100+
# Draw 4 lines forming a + shape with gaps in the center
101+
# Each line has a white outline and black center for visibility on any background
102+
103+
# Top line
104+
draw.line(
105+
[(x, y - gap - line_length), (x, y - gap)], fill=(255, 255, 255, 200), width=line_width + 2
106+
) # White outline
107+
draw.line(
108+
[(x, y - gap - line_length), (x, y - gap)], fill=(0, 0, 0, 255), width=line_width
109+
) # Black center
110+
111+
# Bottom line
112+
draw.line(
113+
[(x, y + gap), (x, y + gap + line_length)], fill=(255, 255, 255, 200), width=line_width + 2
114+
) # White outline
115+
draw.line(
116+
[(x, y + gap), (x, y + gap + line_length)], fill=(0, 0, 0, 255), width=line_width
117+
) # Black center
118+
119+
# Left line
120+
draw.line(
121+
[(x - gap - line_length, y), (x - gap, y)], fill=(255, 255, 255, 200), width=line_width + 2
122+
) # White outline
123+
draw.line(
124+
[(x - gap - line_length, y), (x - gap, y)], fill=(0, 0, 0, 255), width=line_width
125+
) # Black center
126+
127+
# Right line
128+
draw.line(
129+
[(x + gap, y), (x + gap + line_length, y)], fill=(255, 255, 255, 200), width=line_width + 2
130+
) # White outline
131+
draw.line(
132+
[(x + gap, y), (x + gap + line_length, y)], fill=(0, 0, 0, 255), width=line_width
133+
) # Black center
134+
135+
return Image.alpha_composite(image.convert("RGBA"), overlay)
136+
137+
90138
def zoom_webpage(page: Page, zoom_factor: float = 1.5):
91139
"""
92140
Zooms the webpage to the specified zoom factor.

0 commit comments

Comments
 (0)