Skip to content

Commit bf57591

Browse files
moving some utils to agent_utils.py
1 parent ab2d331 commit bf57591

File tree

2 files changed

+48
-40
lines changed

2 files changed

+48
-40
lines changed

src/agentlab/agents/agent_utils.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from PIL import Image, ImageDraw
2+
from logging import warning
3+
4+
5+
6+
"""
7+
This module contains utility functions for handling observations and actions in the context of agent interactions.
8+
"""
9+
10+
def tag_screenshot_with_action(screenshot: Image, action: str) -> Image:
11+
"""
12+
If action is a coordinate action, try to render it on the screenshot.
13+
14+
e.g. mouse_click(120, 130) -> draw a dot at (120, 130) on the screenshot
15+
16+
Args:
17+
screenshot: The screenshot to tag.
18+
action: The action to tag the screenshot with.
19+
20+
Returns:
21+
The tagged screenshot.
22+
23+
Raises:
24+
ValueError: If the action parsing fails.
25+
"""
26+
if action.startswith("mouse_click"):
27+
try:
28+
coords = action[action.index("(") + 1 : action.index(")")].split(",")
29+
coords = [c.strip() for c in coords]
30+
if len(coords) not in [2, 3]:
31+
raise ValueError(f"Invalid coordinate format: {coords}")
32+
if coords[0].startswith("x="):
33+
coords[0] = coords[0][2:]
34+
if coords[1].startswith("y="):
35+
coords[1] = coords[1][2:]
36+
x, y = float(coords[0].strip()), float(coords[1].strip())
37+
draw = ImageDraw.Draw(screenshot)
38+
radius = 5
39+
draw.ellipse(
40+
(x - radius, y - radius, x + radius, y + radius), fill="blue", outline="blue"
41+
)
42+
except (ValueError, IndexError) as e:
43+
warning(f"Failed to parse action '{action}': {e}")
44+
return screenshot

src/agentlab/analyze/agent_xray.py

Lines changed: 4 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from attr import dataclass
1515
from langchain.schema import BaseMessage, HumanMessage
1616
from openai import OpenAI
17-
from PIL import Image, ImageDraw
17+
from PIL import Image
1818

1919
from agentlab.analyze import inspect_results
2020
from agentlab.experiments.exp_utils import RESULTS_DIR
@@ -24,6 +24,7 @@
2424
from agentlab.llm.llm_utils import BaseMessage as AgentLabBaseMessage
2525
from agentlab.llm.llm_utils import Discussion
2626
from agentlab.llm.response_api import MessageBuilder
27+
from agentlab.agents import agent_utils
2728

2829
select_dir_instructions = "Select Experiment Directory"
2930
AGENT_NAME_KEY = "agent.agent_name"
@@ -531,47 +532,10 @@ def wrapper(*args, **kwargs):
531532
return decorator
532533

533534

534-
def tag_screenshot_with_action(screenshot: Image, action: str) -> Image:
535-
"""
536-
If action is a coordinate action, try to render it on the screenshot.
537-
538-
e.g. mouse_click(120, 130) -> draw a dot at (120, 130) on the screenshot
539-
540-
Args:
541-
screenshot: The screenshot to tag.
542-
action: The action to tag the screenshot with.
543-
544-
Returns:
545-
The tagged screenshot.
546-
547-
Raises:
548-
ValueError: If the action parsing fails.
549-
"""
550-
if action.startswith("mouse_click"):
551-
try:
552-
coords = action[action.index("(") + 1 : action.index(")")].split(",")
553-
coords = [c.strip() for c in coords]
554-
if len(coords) not in [2, 3]:
555-
raise ValueError(f"Invalid coordinate format: {coords}")
556-
if coords[0].startswith("x="):
557-
coords[0] = coords[0][2:]
558-
if coords[1].startswith("y="):
559-
coords[1] = coords[1][2:]
560-
x, y = float(coords[0].strip()), float(coords[1].strip())
561-
draw = ImageDraw.Draw(screenshot)
562-
radius = 5
563-
draw.ellipse(
564-
(x - radius, y - radius, x + radius, y + radius), fill="blue", outline="blue"
565-
)
566-
except (ValueError, IndexError) as e:
567-
warning(f"Failed to parse action '{action}': {e}")
568-
return screenshot
569-
570-
571535
def update_screenshot(som_or_not: str):
572536
global info
573537
action = info.exp_result.steps_info[info.step].action
574-
return tag_screenshot_with_action(get_screenshot(info, som_or_not=som_or_not), action)
538+
return agent_utils.tag_screenshot_with_action(get_screenshot(info, som_or_not=som_or_not), action)
575539

576540

577541
def get_screenshot(info: Info, step: int = None, som_or_not: str = "Raw Screenshots"):
@@ -590,7 +554,7 @@ def update_screenshot_pair(som_or_not: str):
590554
s2 = get_screenshot(info, info.step + 1, som_or_not)
591555

592556
if s1 is not None:
593-
s1 = tag_screenshot_with_action(s1, info.exp_result.steps_info[info.step].action)
557+
s1 = agent_utils.tag_screenshot_with_action(s1, info.exp_result.steps_info[info.step].action)
594558
return s1, s2
595559

596560

0 commit comments

Comments
 (0)