stepwise hint retrieval

hnekoeiq · hnekoeiq · commit ee2653a0925b · 2025-09-08T19:41:27.000-04:00
diff --git a/src/agentlab/agents/generic_agent_hinter/generic_agent.py b/src/agentlab/agents/generic_agent_hinter/generic_agent.py
@@ -26,7 +26,7 @@
 from .generic_agent_prompt import (
     GenericPromptFlags,
     MainPrompt,
-    StepWiseRetrievalPrompt,
+    StepWiseContextIdentificationPrompt,
 )
 
 
@@ -111,10 +111,8 @@ def get_action(self, obs):
 
         queries, think_queries = self._get_queries()
 
-        # TODO
-        # use those queries to retreive from the database. e.g.:
-        # hints = self.hint_db.get_hints(queries)
-        # then add those hints to the main prompt
+        # use those queries to retrieve from the database and pass to prompt if step-level
+        queries_for_hints = queries if getattr(self.flags, "hint_level", "episode") == "step" else None
 
         main_prompt = MainPrompt(
             action_set=self.action_set,
@@ -126,6 +124,7 @@ def get_action(self, obs):
             step=self.plan_step,
             flags=self.flags,
             llm=self.chat_llm,
+            queries=queries_for_hints,
         )
 
         # Set task name for task hints if available
@@ -183,7 +182,7 @@ def get_action(self, obs):
     def _get_queries(self):
         """Retrieve queries for hinting."""
         system_prompt = SystemMessage(dp.SystemPrompt().prompt)
-        query_prompt = StepWiseRetrievalPrompt(
+        query_prompt = StepWiseContextIdentificationPrompt(
             obs_history=self.obs_history,
             actions=self.actions,
             thoughts=self.thoughts,
diff --git a/src/agentlab/agents/generic_agent_hinter/generic_agent_prompt.py b/src/agentlab/agents/generic_agent_hinter/generic_agent_prompt.py
@@ -62,6 +62,7 @@ class GenericPromptFlags(dp.Flags):
     max_trunc_itr: int = 20
     flag_group: str = None
     n_retrieval_queries: int = 3
+    hint_level: Literal["episode", "step"] = "episode"
 
 
 class MainPrompt(dp.Shrinkable):
@@ -76,6 +77,7 @@ def __init__(
         step: int,
         flags: GenericPromptFlags,
         llm: ChatModel,
+        queries: list[str] | None = None,
     ) -> None:
         super().__init__()
         self.flags = flags
@@ -118,6 +120,8 @@ def time_for_caution():
             hint_retrieval_mode=flags.task_hint_retrieval_mode,
             llm=llm,
             skip_hints_for_current_task=flags.skip_hints_for_current_task,
+            hint_level=flags.hint_level,
+            queries=queries,
         )
         self.plan = Plan(previous_plan, step, lambda: flags.use_plan)  # TODO add previous plan
         self.criticise = Criticise(visible=lambda: flags.use_criticise)
@@ -306,6 +310,8 @@ def __init__(
         hint_retrieval_mode: Literal["direct", "llm", "emb"],
         skip_hints_for_current_task: bool,
         llm: ChatModel,
+        hint_level: Literal["episode", "step"] = "episode",
+        queries: list[str] | None = None,
     ) -> None:
         super().__init__(visible=use_task_hint)
         self.use_task_hint = use_task_hint
@@ -315,6 +321,8 @@ def __init__(
         self.skip_hints_for_current_task = skip_hints_for_current_task
         self.goal = goal
         self.llm = llm
+        self.hint_level: Literal["episode", "step"] = hint_level
+        self.queries: list[str] | None = queries
         self._init()
 
     _prompt = ""  # Task hints are added dynamically in MainPrompt
@@ -352,6 +360,7 @@ def _init(self):
             else:
                 print(f"Warning: Hint database not found at {hint_db_path}")
                 self.hint_db = pd.DataFrame(columns=["task_name", "hint"])
+
             self.hints_source = HintsSource(
                 hint_db_path=hint_db_path.as_posix(),
                 hint_retrieval_mode=self.hint_retrieval_mode,
@@ -380,7 +389,16 @@ def get_hints_for_task(self, task_name: str) -> str:
             return ""
 
         try:
-            task_hints = self.hints_source.choose_hints(self.llm, task_name, self.goal)
+            # When step-level, pass queries as goal string to fit the llm_prompt
+            goal_or_queries = self.goal
+            if self.hint_level == "step" and self.queries:
+                goal_or_queries = "\n".join(self.queries)
+
+            task_hints = self.hints_source.choose_hints(
+                self.llm,
+                task_name,
+                goal_or_queries,
+            )
 
             hints = []
             for hint in task_hints:
@@ -400,14 +418,14 @@ def get_hints_for_task(self, task_name: str) -> str:
         return ""
 
 
-class StepWiseRetrievalPrompt(dp.Shrinkable):
+class StepWiseContextIdentificationPrompt(dp.Shrinkable):
     def __init__(
         self,
         obs_history: list[dict],
         actions: list[str],
         thoughts: list[str],
         obs_flags: dp.ObsFlags,
-        n_queries: int = 3,
+        n_queries: int = 1,
     ) -> None:
         super().__init__()
         self.obs_flags = obs_flags
@@ -430,10 +448,10 @@ def _prompt(self) -> HumanMessage:
         )
 
         example_queries = [
-            "How to sort with multiple columns on the ServiceNow platform?",
-            "What are the potential challenges of sorting by multiple columns?",
-            "How to handle sorting by multiple columns in a table?",
-            "Can I use the filter tool to sort by multiple columns?",
+            "The user has started sorting a table and needs to apply multiple column criteria simultaneously.",
+            "The user is attempting to configure advanced sorting options but the interface is unclear.",
+            "The user has selected the first sort column and is now looking for how to add a second sort criterion.",
+            "The user is in the middle of a multi-step sorting process and needs guidance on the next action.",
         ]
 
         example_queries_str = json.dumps(example_queries[: self.n_queries], indent=2)
@@ -442,8 +460,8 @@ def _prompt(self) -> HumanMessage:
             f"""
 # Querying memory
 
-Before choosing an action, let's search our available documentation and memory on how to approach this step.
-This could provide valuable hints on how to properly solve this task. Return your answer as follow
+Before choosing an action, let's search our available documentation and memory for relevant context.
+Generate a brief, general summary of the current status to help identify useful hints. Return your answer as follow
 <think>chain of thought</think>
 <queries>json list of strings</queries> for the queries. Return exactly {self.n_queries} 
 queries in the list.