1414from attr import dataclass
1515from langchain .schema import BaseMessage , HumanMessage
1616from openai import OpenAI
17- from PIL import Image
17+ from PIL import Image , ImageDraw
1818
1919from agentlab .analyze import inspect_results
2020from agentlab .experiments .exp_utils import RESULTS_DIR
@@ -530,9 +530,47 @@ def wrapper(*args, **kwargs):
530530 return decorator
531531
532532
533+ def tag_screenshot_with_action (screenshot : Image , action : str ) -> Image :
534+ """
535+ If action is a coordinate action, try to render it on the screenshot.
536+
537+ e.g. mouse_click(120, 130) -> draw a dot at (120, 130) on the screenshot
538+
539+ Args:
540+ screenshot: The screenshot to tag.
541+ action: The action to tag the screenshot with.
542+
543+ Returns:
544+ The tagged screenshot.
545+
546+ Raises:
547+ ValueError: If the action parsing fails.
548+ """
549+ if action .startswith ("mouse_click" ):
550+ try :
551+ coords = action [action .index ("(" ) + 1 : action .index (")" )].split ("," )
552+ coords = [c .strip () for c in coords ]
553+ if len (coords ) != 2 :
554+ raise ValueError (f"Invalid coordinate format: { coords } " )
555+ if coords [0 ].startswith ("x=" ):
556+ coords [0 ] = coords [0 ][2 :]
557+ if coords [1 ].startswith ("y=" ):
558+ coords [1 ] = coords [1 ][2 :]
559+ x , y = float (coords [0 ].strip ()), float (coords [1 ].strip ())
560+ draw = ImageDraw .Draw (screenshot )
561+ radius = 5
562+ draw .ellipse (
563+ (x - radius , y - radius , x + radius , y + radius ), fill = "red" , outline = "red"
564+ )
565+ except (ValueError , IndexError ) as e :
566+ warning (f"Failed to parse action '{ action } ': { e } " )
567+ return screenshot
568+
569+
533570def update_screenshot (som_or_not : str ):
534571 global info
535- return get_screenshot (info , som_or_not = som_or_not )
572+ action = info .exp_result .steps_info [info .step ].action
573+ return tag_screenshot_with_action (get_screenshot (info , som_or_not = som_or_not ), action )
536574
537575
538576def get_screenshot (info : Info , step : int = None , som_or_not : str = "Raw Screenshots" ):
@@ -549,6 +587,9 @@ def update_screenshot_pair(som_or_not: str):
549587 global info
550588 s1 = get_screenshot (info , info .step , som_or_not )
551589 s2 = get_screenshot (info , info .step + 1 , som_or_not )
590+
591+ if s1 is not None :
592+ s1 = tag_screenshot_with_action (s1 , info .exp_result .steps_info [info .step ].action )
552593 return s1 , s2
553594
554595
0 commit comments