enable reprompt tool use agent from controller

patricebechard · patricebechard · commit c41b8efc22e9 · 2025-07-22T11:34:01.000-04:00
diff --git a/src/agentlab/agents/tool_use_agent/hint_db.csv b/src/agentlab/agents/tool_use_agent/hint_db.csv
@@ -21,3 +21,25 @@ July 13,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,W
 July 13,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,allac,Filling form in WorkArena,"Before clicking submit, make sure that all fields are filled properly. Then click submit."
 July 13,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,allac,Filling form in WorkArena,Avoid back and forth from tabs to tabs to reduce the number of actions
 July 14,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,allac,Filling form in WorkArena,When you see auto-complete make sure to select an element from that list
+July 16,workarena.servicenow.sort-asset-list,406,gpt-4-1,ToolUseAgent-gpt-4-1,workarena,workarena,patricebechard,Sorting lists in ServiceNow,"1. **Navigate to Your Table/List**
+
+   * For example, go to **Incident > All** or any other table you want to view.
+
+2. **Sort by One or Multiple Columns**
+
+   * `click` on the ""show / hide filter"" button (funnel icon) at the top left of the page to open the filter row.
+   * Repeat the following steps for each column you want to sort by:
+     * `click` on the ""Add Sort"" button to add a new sort filter. This will create a new ordering filter row with two comboboxes under the heading ""Order results by the following fields"".
+     * `fill` the first combobox with the appropriate field name you want to sort by. MAKE SURE to use the exact field name provided.
+     * `press` Enter after typing the field name. It is VERY IMPORTANT that you do this before doing anything else. DO NOT click on the run filter button before having confirmed your choice by explicitly pressing ENTER.
+     * `select_option` for the appropriate ordering between ascending (a to z) or descending (z to a) in the second combobox.
+   * Once all sort filters have been added, `click` the ""Run filter"" button to apply the sort.
+
+Notes:
+   * NEVER directly sort the columns using the table header.
+   * NEVER add columns via the Personalize List menu.
+
+3. **Resetting or Clearing Sorting**
+
+   * To reset sorting, click another column, or click again to toggle.
+   * In the filter bar, you may see a ""Sorted by..."" indicator—clear or change it as needed."
diff --git a/src/agentlab/agents/tool_use_agent/tool_use_agent.py b/src/agentlab/agents/tool_use_agent/tool_use_agent.py
@@ -8,22 +8,14 @@
 
 import bgym
 import pandas as pd
-from bgym import Benchmark as BgymBenchmark
-from browsergym.core.observation import extract_screenshot
-from browsergym.utils.obs import (
-    flatten_axtree_to_str,
-    flatten_dom_to_str,
-    overlay_som,
-    prune_html,
-)
-
 from agentlab.agents.agent_args import AgentArgs
 from agentlab.benchmarks.abstract_env import AbstractBenchmark as AgentLabBenchmark
 from agentlab.benchmarks.osworld import OSWorldActionSet
 from agentlab.llm.base_api import BaseModelArgs
 from agentlab.llm.llm_utils import image_to_png_base64_url
 from agentlab.llm.response_api import (
     APIPayload,
+    AzureOpenAIResponseModelArgs,
     ClaudeResponseModelArgs,
     LLMOutput,
     MessageBuilder,
@@ -33,6 +25,14 @@
     ToolCalls,
 )
 from agentlab.llm.tracking import cost_tracker_decorator
+from bgym import Benchmark as BgymBenchmark
+from browsergym.core.observation import extract_screenshot
+from browsergym.utils.obs import (
+    flatten_axtree_to_str,
+    flatten_dom_to_str,
+    overlay_som,
+    prune_html,
+)
 
 
 @dataclass
@@ -43,8 +43,8 @@ def _init(self):
 
     def make(self) -> "Block":
         """Returns a copy so the init can start adding some stuff to `self` without changing the
-        original datatclass that should only contain a config.
-        The aim is avoid having 2 calss definition for each block, e.g. Block and BlockArgs.
+        original dataclass that should only contain a config.
+        The aim is avoid having 2 class definitions for each block, e.g. Block and BlockArgs.
 
         Returns:
             Block: A copy of the current block instance with initialization applied.
@@ -387,7 +387,6 @@ def __init__(
             self.config.action_subsets, multiaction=self.config.multiaction  # type: ignore
         )
         self.tools = self.action_set.to_tool_description(api=model_args.api)
-
         self.call_ids = []
 
         self.llm = model_args.make_model()
@@ -595,8 +594,8 @@ def get_action(self, obs: Any) -> float:
     task_hint=TaskHint(use_task_hint=True),
     keep_last_n_obs=None,
     multiaction=True,  # whether to use multi-action or not
-    # action_subsets=("bid",),
-    action_subsets=("coord"),
+    action_subsets=("bid",),
+    # action_subsets=("coord"),
     # action_subsets=("coord", "bid"),
 )
 
diff --git a/src/agentlab/analyze/agent_controller.py b/src/agentlab/analyze/agent_controller.py
@@ -25,7 +25,7 @@
 )
 from agentlab.experiments.exp_utils import RESULTS_DIR
 from agentlab.experiments.loop import ExpArgs, StepInfo, save_package_versions
-from agentlab.llm.llm_utils import Discussion
+from agentlab.llm.response_api import LLMOutput
 from bgym import DEFAULT_BENCHMARKS
 from dotenv import load_dotenv
 from transformers import AutoTokenizer
@@ -188,7 +188,12 @@ def step_agent_history(action, action_info):
     st.session_state.action_history.append(action)
     st.session_state.action_info_history.append(action_info)
     st.session_state.thought_history.append(action_info.think)
-    st.session_state.prompt_history.append(get_prompt(action_info))
+    if isinstance(st.session_state.agent, GenericAgent):
+        st.session_state.prompt_history.append(get_prompt(action_info))
+    elif isinstance(st.session_state.agent, ToolUseAgent):
+        st.session_state.prompt_history.append(
+            "\n".join([elem.to_markdown() for elem in st.session_state.agent.discussion.flatten()])
+        )
 
     # HACK: memory history can only be obtained via the agent
     if isinstance(st.session_state.agent, GenericAgent):
@@ -229,10 +234,31 @@ def revert_agent_history():
 
 def revert_agent_state():
     logger.info("Reverting agent state")
-    st.session_state.agent.obs_history.pop()
-    st.session_state.agent.actions.pop()
-    st.session_state.agent.thoughts.pop()
-    st.session_state.agent.memories.pop()
+    if isinstance(st.session_state.agent, GenericAgent):
+        st.session_state.agent.obs_history.pop()
+        st.session_state.agent.actions.pop()
+        st.session_state.agent.thoughts.pop()
+        st.session_state.agent.memories.pop()
+    elif isinstance(st.session_state.agent, ToolUseAgent):
+        num_groups = len(st.session_state.agent.discussion.groups)
+        if num_groups == 3:
+            # start from blank state
+            st.session_state.agent.discussion.groups = []
+            st.session_state.agent.last_response = LLMOutput()
+            st.session_state.agent._responses = []
+        elif num_groups > 3:
+            # get rid of the last group (last action), and remove everything from the other previous group except for the action
+            st.session_state.agent.discussion.groups.pop()
+            last_group = copy.deepcopy(st.session_state.agent.discussion.groups[-1])
+            last_group.summary = None
+            last_group.messages = last_group.messages[:0]  # remove everything from last group
+            st.session_state.agent.discussion.groups[-1] = last_group
+            st.session_state.agent._responses.pop()
+            st.session_state.agent.last_response = copy.deepcopy(
+                st.session_state.agent._responses[-1]
+            )
+        else:
+            raise Exception("Invalid number of groups")
 
 
 def restore_env_history(step: int):
@@ -534,9 +560,17 @@ def load_session(exp_files):
         st.session_state.action_history.append(step_info.action)
         st.session_state.action_info_history.append(step_info.agent_info)
         st.session_state.thought_history.append(step_info.agent_info.get("think", None))
-        st.session_state.prompt_history.append(get_prompt(step_info.agent_info))
         if isinstance(st.session_state.agent, GenericAgent):
             st.session_state.memory_history.append(step_info.agent_info.get("memory", None))
+            st.session_state.prompt_history.append(get_prompt(step_info.agent_info))
+        elif isinstance(st.session_state.agent, ToolUseAgent):
+            st.session_state.prompt_history.append(
+                "\n".join(
+                    [elem.to_markdown() for elem in st.session_state.agent.discussion.flatten()]
+                )
+            )
+        else:
+            raise ValueError(f"Unknown agent type: {type(st.session_state.agent)}")
         st.session_state.obs_history.append(step_info.obs)
         st.session_state.reward_history.append(step_info.reward)
         st.session_state.terminated_history.append(step_info.terminated)
@@ -573,7 +607,8 @@ def clean_session():
 def prepare_agent():
     st.session_state.agent_args.prepare()
     st.session_state.agent = st.session_state.agent_args.make_agent()
-    st.session_state.agent.set_task_name(st.session_state.task)
+    if isinstance(st.session_state.agent, ToolUseAgent):
+        st.session_state.agent.set_task_name(st.session_state.task)
 
 
 def set_environment_info():
@@ -863,9 +898,9 @@ def set_prompt_modifier():
             st.session_state.agent.config.obs.use_tabs = st.checkbox(
                 "Use tabs", value=st.session_state.agent.config.obs.use_tabs
             )
-            st.session_state.agent.config.obs.add_mouse_pointer = st.checkbox(
-                "Add mouse pointer", value=st.session_state.agent.config.obs.add_mouse_pointer
-            )
+            # st.session_state.agent.config.obs.add_mouse_pointer = st.checkbox(
+            #     "Add mouse pointer", value=st.session_state.agent.config.obs.add_mouse_pointer
+            # )
             st.session_state.agent.config.obs.use_zoomed_webpage = st.checkbox(
                 "Use zoomed webpage", value=st.session_state.agent.config.obs.use_zoomed_webpage
             )
@@ -1107,7 +1142,14 @@ def set_axtree_tab():
 
 
 def set_prompt_tab():
-    st.code(st.session_state.prompt_history[-1], language=None, wrap_lines=True)
+    if isinstance(st.session_state.agent, GenericAgent):
+        st.code(st.session_state.prompt_history[-1], language=None, wrap_lines=True)
+    elif isinstance(st.session_state.agent, ToolUseAgent):
+        st.markdown(st.session_state.prompt_history[-1])
+
+        st.markdown(f"## Last summary:\n{st.session_state.agent.discussion.get_last_summary()}")
+    else:
+        raise ValueError(f"Unknown agent type: {type(st.session_state.agent)}")
 
 
 def set_previous_steps_tab():
diff --git a/src/agentlab/llm/response_api.py b/src/agentlab/llm/response_api.py
@@ -589,29 +589,26 @@ class AzureOpenAIResponseModel(OpenAIResponseModel):
     def __init__(
         self,
         model_name: str,
+        base_url: Optional[str] = None,
         api_key: Optional[str] = None,
-        temperature: float = 0.5,
-        max_tokens: int = 100,
-        extra_kwargs: Optional[Dict[str, Any]] = None,
-        **kwargs,
+        temperature: float | None = None,
+        max_tokens: int | None = 100,
     ):
         api_key = os.getenv("AZURE_OPENAI_API_KEY")
-        self.tools = kwargs.pop("tools", None)
-        logging.info(f"Tools: {self.tools}")
-        super().__init__(
-            model_name=model_name,
-            api_key=api_key,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            extra_kwargs=extra_kwargs,
-            **kwargs,
-        )
-        # azure client takes extra kwargs
-        self.client = OpenAI(
-            api_key=api_key,
-            base_url=urljoin(os.getenv("AZURE_OPENAI_ENDPOINT"), "openai/v1"),
-            default_query={"api-version": "preview"},
+        base_url = urljoin(os.getenv("AZURE_OPENAI_ENDPOINT"), "openai/v1")
+        self.action_space_as_tools = True  # this should be a config
+        super().__init__(  # This is passed to BaseModel
+            model_name=model_name, api_key=api_key, temperature=temperature, max_tokens=max_tokens
         )
+        client_args = {}
+        if base_url is not None:
+            client_args["base_url"] = base_url
+        if api_key is not None:
+            client_args["api_key"] = api_key
+        client_args["default_query"] = {"api-version": "preview"}
+        self.client = OpenAI(**client_args)
+        # Init pricing tracker after super() so that all attributes have been set.
+        self.init_pricing_tracker(pricing_api="openai")  # Use the PricingMixin
 
 
 class OpenAIChatCompletionModel(BaseModelWithPricing):
@@ -958,9 +955,6 @@ def make_model(self, extra_kwargs=None, **kwargs):
             model_name=self.model_name,
             temperature=self.temperature,
             max_tokens=self.max_new_tokens,
-            extra_kwargs=extra_kwargs,
-            pricing_api="openai",
-            **kwargs,
         )