1414from attr import dataclass
1515from langchain .schema import BaseMessage , HumanMessage
1616from openai import OpenAI
17- from PIL import Image
17+ from PIL import Image , ImageDraw
1818
1919from agentlab .analyze import inspect_results
2020from agentlab .experiments .exp_utils import RESULTS_DIR
@@ -530,9 +530,29 @@ def wrapper(*args, **kwargs):
530530 return decorator
531531
532532
533+ def tag_screenshot_with_action (screenshot : Image , action : str ) -> Image :
534+ """If action is a coordinate action, try to render it on the screenshot.
535+
536+ e.g. mouse_click(120, 130) -> draw a dot at (120, 130) on the screenshot
537+ """
538+ if action .startswith ("mouse_click" ):
539+ coords = action [action .index ("(" ) + 1 : action .index (")" )].split ("," )
540+ coords = [c .strip () for c in coords ]
541+ if coords [0 ].startswith ("x=" ):
542+ coords [0 ] = coords [0 ][2 :]
543+ if coords [1 ].startswith ("y=" ):
544+ coords [1 ] = coords [1 ][2 :]
545+ x , y = float (coords [0 ].strip ()), float (coords [1 ].strip ())
546+ draw = ImageDraw .Draw (screenshot )
547+ radius = 5
548+ draw .ellipse ((x - radius , y - radius , x + radius , y + radius ), fill = "red" , outline = "red" )
549+ return screenshot
550+
551+
533552def update_screenshot (som_or_not : str ):
534553 global info
535- return get_screenshot (info , som_or_not = som_or_not )
554+ action = info .exp_result .steps_info [info .step ].action
555+ return tag_screenshot_with_action (get_screenshot (info , som_or_not = som_or_not ), action )
536556
537557
538558def get_screenshot (info : Info , step : int = None , som_or_not : str = "Raw Screenshots" ):
@@ -549,6 +569,9 @@ def update_screenshot_pair(som_or_not: str):
549569 global info
550570 s1 = get_screenshot (info , info .step , som_or_not )
551571 s2 = get_screenshot (info , info .step + 1 , som_or_not )
572+
573+ if s1 is not None :
574+ s1 = tag_screenshot_with_action (s1 , info .exp_result .steps_info [info .step ].action )
552575 return s1 , s2
553576
554577
0 commit comments