same hints retrieval for both generic and tooluse agents

ollmer · ollmer · commit 55ce26a2f85e · 2025-08-29T17:45:41.000+02:00
diff --git a/src/agentlab/agents/generic_agent_hinter/generic_agent.py b/src/agentlab/agents/generic_agent_hinter/generic_agent.py
@@ -111,10 +111,11 @@ def get_action(self, obs):
             previous_plan=self.plan,
             step=self.plan_step,
             flags=self.flags,
+            llm=self.chat_llm,
         )
 
         # Set task name for task hints if available
-        if self.flags.use_task_hint and hasattr(self, 'task_name'):
+        if self.flags.use_task_hint and hasattr(self, "task_name"):
             main_prompt.set_task_name(self.task_name)
 
         max_prompt_tokens, max_trunc_itr = self._get_maxes()
diff --git a/src/agentlab/agents/generic_agent_hinter/generic_agent_prompt.py b/src/agentlab/agents/generic_agent_hinter/generic_agent_prompt.py
@@ -6,15 +6,16 @@
 
 import logging
 from dataclasses import dataclass
+from pathlib import Path
+from typing import Literal
 
-from browsergym.core import action
+import pandas as pd
 from browsergym.core.action.base import AbstractActionSet
 
 from agentlab.agents import dynamic_prompting as dp
+from agentlab.agents.tool_use_agent.tool_use_agent import HintsSource
+from agentlab.llm.chat_api import ChatModel
 from agentlab.llm.llm_utils import HumanMessage, parse_html_tags_raise
-import fnmatch
-import pandas as pd
-from pathlib import Path
 
 
 @dataclass
@@ -49,6 +50,7 @@ class GenericPromptFlags(dp.Flags):
     use_abstract_example: bool = False
     use_hints: bool = False
     use_task_hint: bool = False
+    task_hint_retrieval_mode: Literal["direct", "llm", "emb"] = "direct"
     hint_db_path: str = None
     enable_chat: bool = False
     max_prompt_tokens: int = None
@@ -70,10 +72,12 @@ def __init__(
         previous_plan: str,
         step: int,
         flags: GenericPromptFlags,
+        llm: ChatModel,
     ) -> None:
         super().__init__()
         self.flags = flags
         self.history = dp.History(obs_history, actions, memories, thoughts, flags.obs)
+        goal = obs_history[-1]["goal_object"]
         if self.flags.enable_chat:
             self.instructions = dp.ChatInstructions(
                 obs_history[-1]["chat_messages"], extra_instructions=flags.extra_instructions
@@ -84,7 +88,7 @@ def __init__(
                     "Agent is in goal mode, but multiple user messages are present in the chat. Consider switching to `enable_chat=True`."
                 )
             self.instructions = dp.GoalInstructions(
-                obs_history[-1]["goal_object"], extra_instructions=flags.extra_instructions
+                goal, extra_instructions=flags.extra_instructions
             )
 
         self.obs = dp.Observation(
@@ -105,7 +109,10 @@ def time_for_caution():
         self.hints = dp.Hints(visible=lambda: flags.use_hints)
         self.task_hint = TaskHint(
             use_task_hint=flags.use_task_hint,
-            hint_db_path=flags.hint_db_path
+            hint_db_path=flags.hint_db_path,
+            goal=goal,
+            hint_retrieval_mode=flags.task_hint_retrieval_mode,
+            llm=llm,
         )
         self.plan = Plan(previous_plan, step, lambda: flags.use_plan)  # TODO add previous plan
         self.criticise = Criticise(visible=lambda: flags.use_criticise)
@@ -114,12 +121,12 @@ def time_for_caution():
     @property
     def _prompt(self) -> HumanMessage:
         prompt = HumanMessage(self.instructions.prompt)
-        
+
         # Add task hints if enabled
         task_hints_text = ""
-        if self.flags.use_task_hint and hasattr(self, 'task_name'):
+        if self.flags.use_task_hint and hasattr(self, "task_name"):
             task_hints_text = self.task_hint.get_hints_for_task(self.task_name)
-        
+
         prompt.add_text(
             f"""\
 {self.obs.prompt}\
@@ -286,11 +293,21 @@ def _parse_answer(self, text_answer):
 
 
 class TaskHint(dp.PromptElement):
-    def __init__(self, use_task_hint: bool = True, hint_db_path: str = None) -> None:
+    def __init__(
+        self,
+        use_task_hint: bool,
+        hint_db_path: str,
+        goal: str,
+        hint_retrieval_mode: Literal["direct", "llm", "emb"],
+        llm: ChatModel,
+    ) -> None:
         super().__init__(visible=use_task_hint)
         self.use_task_hint = use_task_hint
         self.hint_db_rel_path = "hint_db.csv"
         self.hint_db_path = hint_db_path  # Allow external path override
+        self.hint_retrieval_mode: Literal["direct", "llm", "emb"] = hint_retrieval_mode
+        self.goal = goal
+        self.llm = llm
         self._init()
 
     _prompt = ""  # Task hints are added dynamically in MainPrompt
@@ -316,39 +333,49 @@ def _init(self):
                 hint_db_path = Path(self.hint_db_path)
             else:
                 hint_db_path = Path(__file__).parent / self.hint_db_rel_path
-            
+
             if hint_db_path.exists():
                 self.hint_db = pd.read_csv(hint_db_path, header=0, index_col=None, dtype=str)
                 # Verify the expected columns exist
                 if "task_name" not in self.hint_db.columns or "hint" not in self.hint_db.columns:
-                    print(f"Warning: Hint database missing expected columns. Found: {list(self.hint_db.columns)}")
+                    print(
+                        f"Warning: Hint database missing expected columns. Found: {list(self.hint_db.columns)}"
+                    )
                     self.hint_db = pd.DataFrame(columns=["task_name", "hint"])
             else:
                 print(f"Warning: Hint database not found at {hint_db_path}")
                 self.hint_db = pd.DataFrame(columns=["task_name", "hint"])
+            self.hints_source = HintsSource(
+                hint_db_path=self.hint_db_rel_path,
+                hint_retrieval_mode=self.hint_retrieval_mode,
+            )
         except Exception as e:
             # Fallback to empty database on any error
             print(f"Warning: Could not load hint database: {e}")
             self.hint_db = pd.DataFrame(columns=["task_name", "hint"])
 
-
     def get_hints_for_task(self, task_name: str) -> str:
         """Get hints for a specific task."""
         if not self.use_task_hint:
             return ""
 
         # Ensure hint_db is initialized
-        if not hasattr(self, 'hint_db'):
+        if not hasattr(self, "hint_db"):
             self._init()
 
         # Check if hint_db has the expected structure
-        if self.hint_db.empty or "task_name" not in self.hint_db.columns or "hint" not in self.hint_db.columns:
+        if (
+            self.hint_db.empty
+            or "task_name" not in self.hint_db.columns
+            or "hint" not in self.hint_db.columns
+        ):
             return ""
 
         try:
-            task_hints = self.hint_db[
-                self.hint_db["task_name"].apply(lambda x: fnmatch.fnmatch(x, task_name))
-            ]
+            # task_hints = self.hint_db[
+            #     self.hint_db["task_name"].apply(lambda x: fnmatch.fnmatch(x, task_name))
+            # ]
+            task_hints = self.hints_source.choose_hints(self.llm, task_name, self.goal)
 
             hints = []
             for hint in task_hints["hint"]:
@@ -364,5 +391,5 @@ def get_hints_for_task(self, task_name: str) -> str:
                 return hints_str
         except Exception as e:
             print(f"Warning: Error getting hints for task {task_name}: {e}")
-        
+
         return ""
diff --git a/src/agentlab/agents/tool_use_agent/tool_use_agent.py b/src/agentlab/agents/tool_use_agent/tool_use_agent.py
@@ -28,6 +28,7 @@
 from agentlab.benchmarks.abstract_env import AbstractBenchmark as AgentLabBenchmark
 from agentlab.benchmarks.osworld import OSWorldActionSet
 from agentlab.llm.base_api import BaseModelArgs
+from agentlab.llm.chat_api import ChatModel
 from agentlab.llm.llm_utils import image_to_png_base64_url
 from agentlab.llm.response_api import (
     APIPayload,
@@ -316,39 +317,21 @@ class TaskHint(Block):
 
     def _init(self):
         """Initialize the block."""
-        if Path(self.hint_db_rel_path).is_absolute():
-            hint_db_path = Path(self.hint_db_rel_path)
-        else:
-            hint_db_path = Path(__file__).parent / self.hint_db_rel_path
-        self.hint_db = pd.read_csv(hint_db_path, header=0, index_col=None, dtype=str)
-        if self.hint_retrieval_mode == "emb":
-            self.encode_hints()
-
-    def oai_embed(self, text: str):
-        response = self._oai_emb.create(input=text, model="text-embedding-3-small")
-        return response.data[0].embedding
-
-    def encode_hints(self):
-        self.uniq_hints = self.hint_db.drop_duplicates(subset=["hint"], keep="first")
-        logger.info(
-            f"Encoding {len(self.uniq_hints)} unique hints with semantic keys using {self.embedder_model} model."
+        self.hints_source = HintsSource(
+            hint_db_path=self.hint_db_rel_path,
+            hint_retrieval_mode=self.hint_retrieval_mode,
+            top_n=self.top_n,
+            embedder_model=self.embedder_model,
+            embedder_server=self.embedder_server,
+            llm_prompt=self.llm_prompt,
         )
-        hints = self.uniq_hints["hint"].tolist()
-        semantic_keys = self.uniq_hints["semantic_keys"].tolist()
-        lines = [f"{k}: {h}" for h, k in zip(hints, semantic_keys)]
-        emb_path = f"{self.hint_db_rel_path}.embs.npy"
-        assert os.path.exists(emb_path), f"Embedding file not found: {emb_path}"
-        logger.info(f"Loading hint embeddings from: {emb_path}")
-        emb_dict = np.load(emb_path, allow_pickle=True).item()
-        self.hint_embeddings = np.array([emb_dict[k] for k in lines])
-        logger.info(f"Loaded hint embeddings shape: {self.hint_embeddings.shape}")
 
     def apply(self, llm, discussion: StructuredDiscussion, task_name: str) -> dict:
         if not self.use_task_hint:
             return {}
 
         goal = "\n".join([c.get("text", "") for c in discussion.groups[0].messages[1].content])
-        task_hints = self.choose_hints(llm, task_name, goal)
+        task_hints = self.hints_source.choose_hints(llm, task_name, goal)
 
         hints = []
         for hint in task_hints:
@@ -365,6 +348,49 @@ def apply(self, llm, discussion: StructuredDiscussion, task_name: str) -> dict:
 
             discussion.append(msg)
 
+
+class HintsSource:
+    def __init__(
+        self,
+        hint_db_path: str,
+        hint_retrieval_mode: Literal["direct", "llm", "emb"] = "direct",
+        top_n: int = 4,
+        embedder_model: str = "Qwen/Qwen3-Embedding-0.6B",
+        embedder_server: str = "http://localhost:5000",
+        llm_prompt: str = """We're choosing hints to help solve the following task:\n{goal}.\n
+You need to choose the most relevant hints topic from the following list:\n\nHint topics:\n{topics}\n
+Choose hint topic for the task and return only its number, e.g. 1. If you don't know the answer, return -1.""",
+    ) -> None:
+        self.hint_db_path = hint_db_path
+        self.hint_retrieval_mode = hint_retrieval_mode
+        self.top_n = top_n
+        self.embedder_model = embedder_model
+        self.embedder_server = embedder_server
+        self.llm_prompt = llm_prompt
+
+        if Path(hint_db_path).is_absolute():
+            self.hint_db_path = Path(hint_db_path).as_posix()
+        else:
+            self.hint_db_path = (Path(__file__).parent / self.hint_db_path).as_posix()
+        self.hint_db = pd.read_csv(self.hint_db_path, header=0, index_col=None, dtype=str)
+        if self.hint_retrieval_mode == "emb":
+            self.load_hint_vectors()
+
+    def load_hint_vectors(self):
+        self.uniq_hints = self.hint_db.drop_duplicates(subset=["hint"], keep="first")
+        logger.info(
+            f"Encoding {len(self.uniq_hints)} unique hints with semantic keys using {self.embedder_model} model."
+        )
+        hints = self.uniq_hints["hint"].tolist()
+        semantic_keys = self.uniq_hints["semantic_keys"].tolist()
+        lines = [f"{k}: {h}" for h, k in zip(hints, semantic_keys)]
+        emb_path = f"{self.hint_db_path}.embs.npy"
+        assert os.path.exists(emb_path), f"Embedding file not found: {emb_path}"
+        logger.info(f"Loading hint embeddings from: {emb_path}")
+        emb_dict = np.load(emb_path, allow_pickle=True).item()
+        self.hint_embeddings = np.array([emb_dict[k] for k in lines])
+        logger.info(f"Loaded hint embeddings shape: {self.hint_embeddings.shape}")
+
     def choose_hints(self, llm, task_name: str, goal: str) -> list[str]:
         """Choose hints based on the task name."""
         if self.hint_retrieval_mode == "llm":
@@ -384,11 +410,14 @@ def choose_hints_llm(self, llm, goal: str) -> list[str]:
         hint_topics = list(topic_to_hints.keys())
         topics = "\n".join([f"{i}. {h}" for i, h in enumerate(hint_topics)])
         prompt = self.llm_prompt.format(goal=goal, topics=topics)
-        response = llm(APIPayload(messages=[llm.msg.user().add_text(prompt)]))
+        if isinstance(llm, ChatModel):
+            response: str = llm(messages=[dict(role="user", content=prompt)])["content"]
+        else:
+            response: str = llm(APIPayload(messages=[llm.msg.user().add_text(prompt)])).think
         try:
-            hint_topic_idx = json.loads(response.think)
+            hint_topic_idx = json.loads(response)
             if hint_topic_idx < 0 or hint_topic_idx >= len(hint_topics):
-                logger.error(f"Wrong LLM hint id response: {response.think}, no hints")
+                logger.error(f"Wrong LLM hint id response: {response}, no hints")
                 return []
             hint_topic = hint_topics[hint_topic_idx]
             hint_indices = topic_to_hints[hint_topic]
@@ -397,7 +426,7 @@ def choose_hints_llm(self, llm, goal: str) -> list[str]:
             hints = df["hint"].tolist()
             logger.debug(f"LLM hint topic {hint_topic_idx}, chosen hints: {df['hint'].tolist()}")
         except json.JSONDecodeError:
-            logger.error(f"Failed to parse LLM hint id response: {response.think}, no hints")
+            logger.error(f"Failed to parse LLM hint id response: {response}, no hints")
             hints = []
         return hints
 
@@ -427,6 +456,7 @@ def _encode(self, texts: list[str], prompt: str = "", timeout: int = 10, max_ret
                     raise e
                 time.sleep(random.uniform(1, timeout))
                 continue
+        raise ValueError("Failed to encode hints")
 
     def _similarity(
         self, texts1: list[str], texts2: list[str], timeout: int = 2, max_retries: int = 5
@@ -446,6 +476,7 @@ def _similarity(
                     raise e
                 time.sleep(random.uniform(1, timeout))
                 continue
+        raise ValueError("Failed to compute similarity")
 
     def choose_hints_direct(self, task_name: str) -> list[str]:
         hints = self.hint_db[