1414from attr import dataclass
1515from langchain .schema import BaseMessage , HumanMessage
1616from openai import OpenAI
17- from PIL import Image , ImageDraw
17+ from PIL import Image
1818
1919from agentlab .analyze import inspect_results
2020from agentlab .experiments .exp_utils import RESULTS_DIR
2424from agentlab .llm .llm_utils import BaseMessage as AgentLabBaseMessage
2525from agentlab .llm .llm_utils import Discussion
2626from agentlab .llm .response_api import MessageBuilder
27+ from agentlab .agents import agent_utils
2728
2829select_dir_instructions = "Select Experiment Directory"
2930AGENT_NAME_KEY = "agent.agent_name"
@@ -531,47 +532,10 @@ def wrapper(*args, **kwargs):
531532 return decorator
532533
533534
534- def tag_screenshot_with_action (screenshot : Image , action : str ) -> Image :
535- """
536- If action is a coordinate action, try to render it on the screenshot.
537-
538- e.g. mouse_click(120, 130) -> draw a dot at (120, 130) on the screenshot
539-
540- Args:
541- screenshot: The screenshot to tag.
542- action: The action to tag the screenshot with.
543-
544- Returns:
545- The tagged screenshot.
546-
547- Raises:
548- ValueError: If the action parsing fails.
549- """
550- if action .startswith ("mouse_click" ):
551- try :
552- coords = action [action .index ("(" ) + 1 : action .index (")" )].split ("," )
553- coords = [c .strip () for c in coords ]
554- if len (coords ) not in [2 , 3 ]:
555- raise ValueError (f"Invalid coordinate format: { coords } " )
556- if coords [0 ].startswith ("x=" ):
557- coords [0 ] = coords [0 ][2 :]
558- if coords [1 ].startswith ("y=" ):
559- coords [1 ] = coords [1 ][2 :]
560- x , y = float (coords [0 ].strip ()), float (coords [1 ].strip ())
561- draw = ImageDraw .Draw (screenshot )
562- radius = 5
563- draw .ellipse (
564- (x - radius , y - radius , x + radius , y + radius ), fill = "blue" , outline = "blue"
565- )
566- except (ValueError , IndexError ) as e :
567- warning (f"Failed to parse action '{ action } ': { e } " )
568- return screenshot
569-
570-
571535def update_screenshot (som_or_not : str ):
572536 global info
573537 action = info .exp_result .steps_info [info .step ].action
574- return tag_screenshot_with_action (get_screenshot (info , som_or_not = som_or_not ), action )
538+ return agent_utils . tag_screenshot_with_action (get_screenshot (info , som_or_not = som_or_not ), action )
575539
576540
577541def get_screenshot (info : Info , step : int = None , som_or_not : str = "Raw Screenshots" ):
@@ -590,7 +554,7 @@ def update_screenshot_pair(som_or_not: str):
590554 s2 = get_screenshot (info , info .step + 1 , som_or_not )
591555
592556 if s1 is not None :
593- s1 = tag_screenshot_with_action (s1 , info .exp_result .steps_info [info .step ].action )
557+ s1 = agent_utils . tag_screenshot_with_action (s1 , info .exp_result .steps_info [info .step ].action )
594558 return s1 , s2
595559
596560
0 commit comments