update hinter agent

patricebechard · patricebechard · commit 70d701e9dce7 · 2025-09-11T13:37:53.000-04:00
diff --git a/experiments/generic/run_generic_agent.py b/experiments/generic/run_generic_agent.py
@@ -30,6 +30,16 @@ def main():
     agent_args = [get_base_agent(args.llm_config)]
     benchmark = DEFAULT_BENCHMARKS[args.benchmark]()
 
+    ##################### Shuffle env args list, pick subset
+    import numpy as np
+    rng = np.random.default_rng(42)
+    rng.shuffle(benchmark.env_args_list)
+    benchmark.env_args_list = benchmark.env_args_list[:33]
+    #####################
+
+    # for env_args in benchmark.env_args_list:
+        # env_args.max_steps = 100
+
     if args.relaunch:
         #  relaunch an existing study
         study = Study.load_most_recent(contains=None)
diff --git a/experiments/hinter/run_hinter_agent.py b/experiments/hinter/run_hinter_agent.py
@@ -49,6 +49,12 @@ def main():
     
     benchmark = DEFAULT_BENCHMARKS[args.benchmark]()
 
+    # # shuffle env_args_list and pick first 33
+    # import numpy as np
+    # rng = np.random.default_rng(42)
+    # rng.shuffle(benchmark.env_args_list)
+    # benchmark.env_args_list = benchmark.env_args_list[:33]
+
 
     if args.relaunch:
         #  relaunch an existing study
diff --git a/experiments/hinter/run_hinter_agent.sh b/experiments/hinter/run_hinter_agent.sh
@@ -9,15 +9,15 @@ PARALLEL_BACKEND="ray"
 HINT_TYPE="docs"    # human, llm, docs
 HINT_INDEX_TYPE="sparse" # sparse, dense
 HINT_QUERY_TYPE="goal" # goal, llm
-HINT_NUM_RESULTS=5
+HINT_NUM_RESULTS=3
 
 HINT_INDEX_PATH="indexes/servicenow-docs-bm25"
 # HINT_INDEX_PATH="indexes/servicenow-docs-embeddinggemma-300m"
 HINT_RETRIEVER_PATH="google/embeddinggemma-300m"
 
 N_JOBS=6
 
-python experiments/hint/run_hinter_agent.py \
+python experiments/hinter/run_hinter_agent.py \
     --benchmark $BENCHMARK \
     --llm-config $LLM_CONFIG \
     --parallel-backend $PARALLEL_BACKEND \
@@ -27,5 +27,4 @@ python experiments/hint/run_hinter_agent.py \
     --hint-query-type $HINT_QUERY_TYPE \
     --hint-index-path $HINT_INDEX_PATH \
     --hint-retriever-path $HINT_RETRIEVER_PATH \
-    --hint-num-results $HINT_NUM_RESULTS \
-    --relaunch
+    --hint-num-results $HINT_NUM_RESULTS
diff --git a/src/agentlab/agents/generic_agent_hinter/generic_agent.py b/src/agentlab/agents/generic_agent_hinter/generic_agent.py
@@ -120,6 +120,12 @@ def get_action(self, obs):
             queries if getattr(self.flags, "hint_level", "episode") == "step" else None
         )
 
+        # get hints
+        if self.flags.use_hints:
+            task_hints = self._get_task_hints()
+        else:
+            task_hints = []
+
         main_prompt = MainPrompt(
             action_set=self.action_set,
             obs_history=self.obs_history,
@@ -130,7 +136,7 @@ def get_action(self, obs):
             step=self.plan_step,
             flags=self.flags,
             llm=self.chat_llm,
-            queries=queries_for_hints,
+            task_hints=task_hints,
         )
 
         # Set task name for task hints if available
@@ -303,60 +309,57 @@ def _init_hints_index(self):
             print(f"Warning: Could not load hint database: {e}")
             self.hint_db = pd.DataFrame(columns=["task_name", "hint"])
 
-    def get_hints_for_task(self, task_name: str) -> str:
+    def _get_task_hints(self) -> list[str]:
         """Get hints for a specific task."""
-        if not self.use_task_hint:
-            return ""
+        if not self.flags.use_task_hint:
+            return []
 
-        if self.hint_type == "docs":
+        if self.flags.hint_type == "docs":
             if not hasattr(self, "hint_index"):
                 print("Initializing hint index new time")
                 self._init()
-            if self.hint_query_type == "goal":
-                query = self.goal
-            elif self.hint_query_type == "llm":
+            if self.flags.hint_query_type == "goal":
+                query = self.obs_history[-1]["goal_object"][0]["text"]
+            elif self.flags.hint_query_type == "llm":
                 query = self.llm.generate(self._prompt + self._abstract_ex + self._concrete_ex)
             else:
-                raise ValueError(f"Unknown hint query type: {self.hint_query_type}")
+                raise ValueError(f"Unknown hint query type: {self.flags.hint_query_type}")
 
-            if self.hint_index_type == "sparse":
+            print(f"Query: {query}")
+            if self.flags.hint_index_type == "sparse":
                 import bm25s
                 query_tokens = bm25s.tokenize(query)
-                docs, _ = self.hint_index.retrieve(query_tokens, k=self.hint_num_results)
+                docs, _ = self.hint_index.retrieve(query_tokens, k=self.flags.hint_num_results)
                 docs = [elem["text"] for elem in docs[0]]
                 # HACK: truncate to 20k characters (should cover >99% of the cases)
                 for doc in docs:
                     if len(doc) > 20000:
                         doc = doc[:20000]
                         doc += " ...[truncated]"
-            elif self.hint_index_type == "dense":
+            elif self.flags.hint_index_type == "dense":
                 query_embedding = self.hint_retriever.encode(query)
-                _, docs = self.hint_index.get_nearest_examples("embeddings", query_embedding, k=self.hint_num_results)
+                _, docs = self.hint_index.get_nearest_examples("embeddings", query_embedding, k=self.flags.hint_num_results)
                 docs = docs["text"]
 
-            hints_str = (
-                "# Hints:\nHere are some hints for the task you are working on:\n"
-                + "\n".join(docs)
-            )
-            return hints_str
+            return docs
 
         # Check if hint_db has the expected structure
         if (
             self.hint_db.empty
             or "task_name" not in self.hint_db.columns
             or "hint" not in self.hint_db.columns
         ):
-            return ""
+            return []
 
         try:
             # When step-level, pass queries as goal string to fit the llm_prompt
-            goal_or_queries = self.goal
-            if self.hint_level == "step" and self.queries:
+            goal_or_queries = self.obs_history[-1]["goal_object"][0]["text"]
+            if self.flags.hint_level == "step" and self.queries:
                 goal_or_queries = "\n".join(self.queries)
 
             task_hints = self.hints_source.choose_hints(
                 self.llm,
-                task_name,
+                self.task_name,
                 goal_or_queries,
             )
 
@@ -366,13 +369,8 @@ def get_hints_for_task(self, task_name: str) -> str:
                 if hint:
                     hints.append(f"- {hint}")
 
-            if len(hints) > 0:
-                hints_str = (
-                    "# Hints:\nHere are some hints for the task you are working on:\n"
-                    + "\n".join(hints)
-                )
-                return hints_str
+            return hints
         except Exception as e:
-            print(f"Warning: Error getting hints for task {task_name}: {e}")
+            print(f"Warning: Error getting hints for task {self.task_name}: {e}")
 
-        return ""
+        return []
diff --git a/src/agentlab/agents/generic_agent_hinter/generic_agent_prompt.py b/src/agentlab/agents/generic_agent_hinter/generic_agent_prompt.py
@@ -59,6 +59,16 @@ class GenericPromptFlags(dp.Flags):
     max_trunc_itr: int = 20
     flag_group: str = None
 
+    # hint related
+    use_task_hint: bool = False
+    hint_type: str = "docs"
+    hint_index_type: str = "sparse"
+    hint_query_type: str = "direct"
+    hint_index_path: str = "indexes/servicenow-docs-bm25"
+    hint_retriever_path: str = "google/embeddinggemma-300m"
+    hint_num_results: int = 5
+    n_retrieval_queries: int = 1
+
 
 class MainPrompt(dp.Shrinkable):
     def __init__(
@@ -68,7 +78,6 @@ def __init__(
         actions: list[str],
         memories: list[str],
         thoughts: list[str],
-        hints: list[str],
         previous_plan: str,
         step: int,
         flags: GenericPromptFlags,