ServiceNow
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/agentlab/agents/agent_utils.py‎
Lines changed: 26 additions & 0 deletions b/‎src/agentlab/agents/agent_utils.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎src/agentlab/agents/hitl_agent/base_multi_candidate_agent.py‎
Lines changed: 50 additions & 0 deletions b/‎src/agentlab/agents/hitl_agent/base_multi_candidate_agent.py‎
Lines changed: 50 additions & 0 deletions
@@ -109,3 +109,4 @@ hint = [
 [project.scripts]
 agentlab-assistant = "agentlab.ui_assistant:main"
 agentlab-xray = "agentlab.analyze.agent_xray:main"
+agentlab-mentor = "agentlab.agents.hitl_agent.launch_hint_ui:main"
@@ -1,6 +1,11 @@
+import copy
+
 from PIL import Image, ImageDraw
 from playwright.sync_api import Page
 
+from agentlab.analyze import overlay_utils
+from agentlab.llm.llm_utils import img_to_base_64
+
 
 def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
     """
@@ -128,3 +133,24 @@ def zoom_webpage(page: Page, zoom_factor: float = 1.5):
 
     page.evaluate(f"document.documentElement.style.zoom='{zoom_factor*100}%'")
     return page
+
+
+def overlay_action(obs, action):
+    """Overlays actions on screenshot in-place"""
+    act_img = copy.deepcopy(obs["screenshot"])
+    act_img = Image.fromarray(act_img)
+
+    new_obs_properties = copy.deepcopy(obs["extra_element_properties"])
+    import os
+
+    if os.getenv("AGENTLAB_USE_RETINA"):
+        # HACK: divide everything by 2 in the obs
+        # TODO: make this more robust by changing login in annotate_action directly (or maybe in the obs section?)
+        for key, value in new_obs_properties.items():
+            try:
+                new_obs_properties[key]["bbox"] = [elem / 2 for elem in value["bbox"]]
+            except:
+                pass
+
+    overlay_utils.annotate_action(act_img, action, properties=new_obs_properties)
+    return img_to_base_64(act_img)
@@ -0,0 +1,50 @@
+from typing_extensions import Protocol
+
+from agentlab.agents.agent_args import AgentArgs
+
+
+class MultiCandidateAgent(Protocol):
+    """
+    Protocol for agents that generate multiple candidates for get_action.
+
+    This protocol defines the contract for agents that can generate
+    multiple candidate actions and allow selection of one of them for execution.
+    """
+
+    def get_candidate_generations(
+        self, obs: dict, hint: list[str] | None = None, n_candidates: int = 3
+    ) -> "list[dict]":
+        """
+        Generate multiple candidate actions for the given observation.
+
+        You can pass extra info in agent_info to update internal state of the
+        agent based on the selected candidate. Your internal state management
+        should be robust to multiple calls to the get_candidate_generations method
+        in a single step.
+
+        Args:
+            obs: The current observation dictionary containing environment state
+            hint: Optional list of hint strings to guide candidate generation
+            n_candidates: Number of candidate actions to generate
+        """
+        ...
+
+    def update_agent_state_from_selected_candidate(self, output: dict):
+        """
+        Update the agent's internal state based on the selected candidate.
+        This can include any memory or planning updates.
+
+        Args:
+            output: The selected candidate action dictionary
+        """
+        pass
+
+
+class MultiCandidateAgentArgs(AgentArgs):
+    def make_agent(self) -> MultiCandidateAgent: ...
+
+    def __post_init__(self):
+        """Prefix subagent name with 'MC-'."""
+        super().__post_init__()
+        if hasattr(self, "agent_name") and self.agent_name:
+            self.agent_name = "MC-" + self.agent_name