ServiceNow
diff --git a/‎src/agentlab/agents/hitl_agent/generic_human_guided_agent.py‎
Lines changed: 6 additions & 11 deletions b/‎src/agentlab/agents/hitl_agent/generic_human_guided_agent.py‎
Lines changed: 6 additions & 11 deletions
diff --git a/‎src/agentlab/agents/hitl_agent/hint_labelling.py‎
Lines changed: 23 additions & 16 deletions b/‎src/agentlab/agents/hitl_agent/hint_labelling.py‎
Lines changed: 23 additions & 16 deletions
@@ -18,7 +18,6 @@
     HintLabeling,
     HintLabelingInputs,
 )
-from agentlab.analyze import overlay_utils
 from agentlab.llm.llm_utils import (
     Discussion,
     HumanMessage,
@@ -208,7 +207,7 @@ def get_action(self, obs):
                 screenshots=[],  # no overlay screenshots yet
                 axtree=obs.get("axtree_txt", ""),
                 history=[],
-                hint="",
+                hints=[],
                 suggestions=[],  # no suggestions yet
             )
             self.ui.update_context(initial_inputs)
@@ -243,20 +242,16 @@ def get_action(self, obs):
                     screenshots=screenshots,  # list of overlay screenshots for hover
                     axtree=obs.get("axtree_txt", ""),
                     history=[],  # TODO: add history
-                    hint=(
-                        "\n".join(f"{i}. {c}" for i, c in enumerate(step_hint, 1))
-                        if step_hint
-                        else ""
-                    ),
+                    hints=step_hint,
                     suggestions=suggestions,
                 )
 
                 self.ui.update_context(hint_labeling_inputs)
                 response = self.ui.wait_for_response(timeout=600)
 
                 if response["type"] == "reprompt":
-                    hint = response["payload"]["hint"]
-                    step_hint.append(hint)
+                    new_hints = response["payload"].get("hints", [])
+                    step_hint = list(new_hints) if isinstance(new_hints, list) else step_hint
                     candidates, chat_messages = self.get_candidate_generation(
                         sys_prompt=system_prompt,
                         human_prompt=human_prompt,
@@ -354,8 +349,8 @@ def get_base_agent(llm_config):
     from agentlab.experiments.study import Study
 
     agent_configs = [HUMAN_GUIDED_GENERIC_AGENT]
-    benchmark = bgym.DEFAULT_BENCHMARKS["workarena_l1"]()
-    benchmark = benchmark.subset_from_glob("task_name", "*create*")
+    benchmark = bgym.DEFAULT_BENCHMARKS["miniwob"]()
+    benchmark = benchmark.subset_from_glob("task_name", "*book*")
     benchmark.env_args_list = benchmark.env_args_list[3:4]
 
     for env_args in benchmark.env_args_list:
 
@@ -8,11 +8,9 @@
 from browsergym.core import _get_global_playwright
 from pydantic import BaseModel, Field
 
-from agentlab.agents.hitl_agent import hint_labelling_ui_files
-
 logger = logging.getLogger(__name__)
-
-HINT_LABELING_DIR = resources.files(hint_labelling_ui_files)
+ 
+HINT_LABELING_DIR = resources.files("agentlab.agents.hitl_agent.hint_labelling_ui_files")
 
 
 class HintLabelingInputs(BaseModel):
@@ -22,41 +20,46 @@ class HintLabelingInputs(BaseModel):
     screenshots: List[str] = Field(default_factory=list)  # list of base64 screenshots for hover
     axtree: str
     history: List[Dict[str, str]] = Field(default_factory=list)
-    hint: str = ""
+    hints: List[str] = Field(default_factory=list)
     suggestions: List[Dict[str, str]] = Field(default_factory=list)
 
 
 class HintLabeling:
     def __init__(self, headless: bool, window_size=(600, 1000), *args, **kwargs):
-
-        pw: playwright.sync_api.Playwright = _get_global_playwright()
+        pw_opt = _get_global_playwright()
+        pw: playwright.sync_api.Playwright = pw_opt  # type: ignore[assignment]
         self.browser = pw.chromium.launch(
             headless=headless, args=[f"--window-size={window_size[0]},{window_size[1]}"]
         )
         self.context = self.browser.new_context(
             no_viewport=True,
         )
         self.page = self.context.new_page()
-        self._resp_queue: "Queue[dict]" = Queue()
+        self._resp_queue = Queue()
 
         self.page.route("**/api/reprompt", self._route_reprompt)
         self.page.route("**/api/submit", self._route_submit)
         self.page.set_content(get_hint_labeling_ui(HINT_LABELING_DIR))
 
         # internal state
-        self._context: HintLabelingInputs = None
+        self._context = None
         self._running = False
 
     def _route_reprompt(
         self, route: playwright.sync_api.Route, request: playwright.sync_api.Request
     ):
         logger.info("Route hit: %s %s", request.method, request.url)
         try:
-            body = json.loads(request.post_data() or "{}")
+            body = json.loads(request.post_data or "{}")
         except Exception:
             body = {}
         # enqueue output 1 (reprompt)
-        msg = {"type": "reprompt", "payload": {"hint": body.get("hint", "")}}
+        hints = body.get("hints")
+        if not isinstance(hints, list):
+            # Back-compat: accept single 'hint' string
+            h = body.get("hint")
+            hints = [h] if isinstance(h, str) and h.strip() else []
+        msg = {"type": "reprompt", "payload": {"hints": hints}}
         self._resp_queue.put(msg)
         # Respond something minimal so UI doesn’t break; it will be refreshed by a later update_context()
         route.fulfill(
@@ -68,10 +71,10 @@ def _route_reprompt(
     def _route_submit(self, route: playwright.sync_api.Route, request: playwright.sync_api.Request):
         logger.info("Route hit: %s %s", request.method, request.url)
         try:
-            body = json.loads(request.post_data() or "{}")
+            body = json.loads(request.post_data or "{}")
         except Exception:
             body = {}
-        # Map UI payload -> your step shape
+    # Map UI payload -> your step shape
         msg = {
             "type": "step",
             "payload": {
@@ -95,7 +98,7 @@ def _to_ui_bootstrap(self, ctx: HintLabelingInputs) -> dict:
             "screenshots": ctx.screenshots,  # list of screenshots for hover
             "axtree": ctx.axtree,
             "history": ctx.history,
-            "hint": ctx.hint,
+            "hints": ctx.hints,
             "suggestions": ctx.suggestions,
         }
 
@@ -116,7 +119,7 @@ def wait_for_response(self, timeout: Optional[float] = 600) -> dict:
 
         Returns:
             dict: A dictionary containing the parsed response with 'type' and 'payload' keys.
-                For /api/reprompt: {'type': 'reprompt', 'payload': {'hint': str}}
+                For /api/reprompt: {'type': 'reprompt', 'payload': {'hints': list[str]}}
                 For /api/submit: {'type': 'step', 'payload': {'think': str, 'action': str}}
 
         """
@@ -142,7 +145,11 @@ def is_api(req: playwright.sync_api.Request) -> bool:
             body = {}
 
         if req.url.endswith("/api/reprompt"):
-            msg = {"type": "reprompt", "payload": {"hint": body.get("hint", "")}}
+            hints = body.get("hints")
+            if not isinstance(hints, list):
+                h = body.get("hint")
+                hints = [h] if isinstance(h, str) and h.strip() else []
+            msg = {"type": "reprompt", "payload": {"hints": hints}}
         else:
             msg = {
                 "type": "step",