Skip to content

Commit c3e1792

Browse files
committed
adding a tag on screenshots for mouse_click coordinate actions
1 parent cad0629 commit c3e1792

File tree

1 file changed

+25
-2
lines changed

1 file changed

+25
-2
lines changed

src/agentlab/analyze/agent_xray.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from attr import dataclass
1515
from langchain.schema import BaseMessage, HumanMessage
1616
from openai import OpenAI
17-
from PIL import Image
17+
from PIL import Image, ImageDraw
1818

1919
from agentlab.analyze import inspect_results
2020
from agentlab.experiments.exp_utils import RESULTS_DIR
@@ -530,9 +530,29 @@ def wrapper(*args, **kwargs):
530530
return decorator
531531

532532

533+
def tag_screenshot_with_action(screenshot: Image, action: str) -> Image:
534+
"""If action is a coordinate action, try to render it on the screenshot.
535+
536+
e.g. mouse_click(120, 130) -> draw a dot at (120, 130) on the screenshot
537+
"""
538+
if action.startswith("mouse_click"):
539+
coords = action[action.index("(") + 1 : action.index(")")].split(",")
540+
coords = [c.strip() for c in coords]
541+
if coords[0].startswith("x="):
542+
coords[0] = coords[0][2:]
543+
if coords[1].startswith("y="):
544+
coords[1] = coords[1][2:]
545+
x, y = float(coords[0].strip()), float(coords[1].strip())
546+
draw = ImageDraw.Draw(screenshot)
547+
radius = 5
548+
draw.ellipse((x - radius, y - radius, x + radius, y + radius), fill="red", outline="red")
549+
return screenshot
550+
551+
533552
def update_screenshot(som_or_not: str):
534553
global info
535-
return get_screenshot(info, som_or_not=som_or_not)
554+
action = info.exp_result.steps_info[info.step].action
555+
return tag_screenshot_with_action(get_screenshot(info, som_or_not=som_or_not), action)
536556

537557

538558
def get_screenshot(info: Info, step: int = None, som_or_not: str = "Raw Screenshots"):
@@ -549,6 +569,9 @@ def update_screenshot_pair(som_or_not: str):
549569
global info
550570
s1 = get_screenshot(info, info.step, som_or_not)
551571
s2 = get_screenshot(info, info.step + 1, som_or_not)
572+
573+
if s1 is not None:
574+
s1 = tag_screenshot_with_action(s1, info.exp_result.steps_info[info.step].action)
552575
return s1, s2
553576

554577

0 commit comments

Comments
 (0)