add hint labeling ui support in loop.py

patricebechard · patricebechard · commit 90c24685fca1 · 2025-08-28T00:44:53.000-04:00
diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py
@@ -1,3 +1,4 @@
+import base64
 import gzip
 import importlib.metadata
 import json
@@ -13,12 +14,15 @@
 from collections import defaultdict
 from dataclasses import asdict, dataclass, field, is_dataclass
 from datetime import datetime
+from io import BytesIO
 from pathlib import Path
 from typing import Optional
 
 import gymnasium as gym
 import numpy as np
+import PIL.Image
 from browsergym.core.chat import Chat
+from browsergym.core.hint_labeling import HintLabeling, HintLabelingInputs
 from browsergym.experiments.agent import Agent
 from browsergym.experiments.utils import count_tokens
 from dataclasses_json import DataClassJsonMixin
@@ -404,7 +408,7 @@ def _make_dir(self, exp_root):
     def run(self):
         """Run the experiment and save the results"""
         # start writing logs to run logfile
-        self._set_logger()
+        # self._set_logger()
 
         # log python environment info
         save_package_versions(Path(self.exp_dir))
@@ -443,15 +447,18 @@ def run(self):
                     # will end the episode after saving the step info.
                     step_info.truncated = True
 
-                step_info.save_step_info(
-                    self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som
-                )
-                logger.debug("Step info saved.")
+                # step_info.save_step_info(
+                #     self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som
+                # )
+                # logger.debug("Step info saved.")
 
                 if hasattr(env.unwrapped, "chat") and isinstance(env.unwrapped.chat, Chat):
                     _send_chat_info(env.unwrapped.chat, action, step_info.agent_info)
                     logger.debug("Chat info sent.")
 
+                if hasattr(env.unwrapped, "hint_labeling") and isinstance(env.unwrapped.hint_labeling, HintLabeling):
+                    _update_hint_labeling(env.unwrapped.hint_labeling, action, agent, step_info)
+
                 if action is None:
                     logger.debug("Agent returned None action. Ending episode.")
                     break
@@ -481,10 +488,11 @@ def run(self):
 
         finally:
             try:
-                if step_info is not None:
-                    step_info.save_step_info(
-                        self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som
-                    )
+                pass
+                # if step_info is not None:
+                #     step_info.save_step_info(
+                #         self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som
+                #     )
             except Exception as e:
                 logger.error(f"Error while saving step info in the finally block: {e}")
             try:
@@ -508,7 +516,8 @@ def run(self):
             except Exception as e:
                 logger.exception(f"Error while closing the environment: {e}")
             try:
-                self._unset_logger()  # stop writing logs to run logfile
+                # self._unset_logger()  # stop writing logs to run logfile
+                pass
             except Exception as e:
                 logger.exception(f"Error while unsetting the logger: {e}")
 
@@ -942,6 +951,76 @@ def _send_chat_info(chat: Chat, action: str, agent_info: dict):
     logger.info(msg)
     chat.add_message(role="info", msg=msg)
 
+def _convert_np_array_to_base64(np_array: np.ndarray):
+    im = PIL.Image.fromarray(np_array)
+    buffered = BytesIO()
+    im.save(buffered, format="PNG")
+    img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return img_b64
+
+def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent, step_info: StepInfo):
+    """Update the hint labeling with the action and agent info."""
+    context = HintLabelingInputs(
+        goal=step_info.obs.get("goal", ""), # TODO: is this goal deprecated?
+        error_feedback=step_info.obs.get("last_action_error", ""),
+        screenshot = _convert_np_array_to_base64(step_info.obs["screenshot"]),
+        axtree = step_info.obs["axtree_txt"],
+        history = [], # TODO: add history
+        hint = "",
+        suggestions = [
+            {
+                "id": "1",
+                "action": action,
+                "think": step_info.agent_info.think,
+            },
+            {
+                "id": "2",
+                "action": "test",
+                "think": "test",
+            }
+        ]
+    )
+    while True:
+        # update hint labeling ui context
+        logger.info("Updating Hint Labeling UI context...")
+        hint_labeling.update_context(context)
+
+        # wait for hint labeling response
+        logger.info("Waiting for Hint Labeling UI response...")
+        response = hint_labeling.wait_for_response()
+
+        # if payload is for reprompt, we ask for 5 suggestions and we combine everything
+        if response["type"] == "reprompt":
+            # reprompt model 5 times
+            hint = response["payload"]["hint"]
+            agent.flags.extra_instructions = hint
+            suggestions = []
+            for i in tqdm(range(5)):
+                # TODO: make this more optimal
+                action = step_info.from_action(agent)
+                think = step_info.agent_info.think
+                suggestions.append({"id": str(i+1), "action": action, "think": think})
+            
+            # update context
+            context = HintLabelingInputs(
+                goal="blablabli",
+                error_feedback=context.error_feedback,
+                screenshot = context.screenshot,
+                axtree = context.axtree,
+                history = context.history,
+                hint = hint,
+                suggestions = suggestions
+            )
+            continue
+
+        # otherwise, if payload is for action, we return the updated action and save the hint
+        elif response["type"] == "step":
+            step_info.agent_info.think = response["payload"]["think"]
+            action = response["payload"]["action"]
+            return action
+        else:
+            raise ValueError(f"Unknown response type: {response['type']}")
+            
 
 def _flatten_dict(d, parent_key="", sep="."):
     """Recursively flatten a nested dictionary."""