|
10 | 10 |
|
11 | 11 | from copy import deepcopy |
12 | 12 | from dataclasses import asdict, dataclass |
13 | | -from functools import partial |
14 | 13 | from warnings import warn |
15 | 14 |
|
16 | 15 | import bgym |
17 | 16 | from browsergym.experiments.agent import Agent, AgentInfo |
18 | 17 |
|
19 | 18 | from agentlab.agents import dynamic_prompting as dp |
20 | 19 | from agentlab.agents.agent_args import AgentArgs |
21 | | -from agentlab.llm.chat_api import BaseModelArgs, make_system_message, make_user_message |
| 20 | +from agentlab.llm.chat_api import BaseModelArgs |
22 | 21 | from agentlab.llm.llm_utils import Discussion, ParseError, SystemMessage, retry |
23 | 22 | from agentlab.llm.tracking import cost_tracker_decorator |
24 | 23 |
|
25 | 24 | from .generic_agent_prompt import GenericPromptFlags, MainPrompt |
| 25 | +from functools import partial |
26 | 26 |
|
27 | 27 |
|
28 | 28 | @dataclass |
@@ -200,82 +200,3 @@ def _get_maxes(self): |
200 | 200 | else 20 # dangerous to change the default value here? |
201 | 201 | ) |
202 | 202 | return max_prompt_tokens, max_trunc_itr |
203 | | - |
204 | | - |
205 | | -from functools import partial |
206 | | - |
207 | | - |
208 | | -def get_action_post_hoc(agent: GenericAgent, obs: dict, ans_dict: dict): |
209 | | - """ |
210 | | - Get the action post-hoc for the agent. |
211 | | -
|
212 | | - This function is used to get the action after the agent has already been run. |
213 | | - Its goal is to recreate the prompt and the output of the agent a posteriori. |
214 | | - The purpose is to build datasets for training the agents. |
215 | | -
|
216 | | - Args: |
217 | | - agent (GenericAgent): The agent for which the action is being determined. |
218 | | - obs (dict): The observation dictionary to append to the agent's history. |
219 | | - ans_dict (dict): The answer dictionary containing the plan, step, memory, think, and action. |
220 | | -
|
221 | | - Returns: |
222 | | - Tuple[str, str]: The complete prompt used for the agent and the reconstructed output based on the answer dictionary. |
223 | | - """ |
224 | | - system_prompt = dp.SystemPrompt().prompt |
225 | | - |
226 | | - agent.obs_history.append(obs) |
227 | | - |
228 | | - main_prompt = MainPrompt( |
229 | | - action_set=agent.action_set, |
230 | | - obs_history=agent.obs_history, |
231 | | - actions=agent.actions, |
232 | | - memories=agent.memories, |
233 | | - thoughts=agent.thoughts, |
234 | | - previous_plan=agent.plan, |
235 | | - step=agent.plan_step, |
236 | | - flags=agent.flags, |
237 | | - ) |
238 | | - |
239 | | - max_prompt_tokens, max_trunc_itr = agent._get_maxes() |
240 | | - |
241 | | - fit_function = partial( |
242 | | - dp.fit_tokens, |
243 | | - max_prompt_tokens=max_prompt_tokens, |
244 | | - model_name=agent.chat_model_args.model_name, |
245 | | - max_iterations=max_trunc_itr, |
246 | | - ) |
247 | | - |
248 | | - instruction_prompt = fit_function(shrinkable=main_prompt) |
249 | | - |
250 | | - if isinstance(instruction_prompt, list): |
251 | | - # NOTE: this is when we have images |
252 | | - instruction_prompt = instruction_prompt[0]["text"] |
253 | | - |
254 | | - # TODO: make sure the bid is in the prompt |
255 | | - |
256 | | - output = "" |
257 | | - |
258 | | - # TODO: validate this |
259 | | - agent.plan = ans_dict.get("plan", agent.plan) |
260 | | - if agent.plan != "No plan yet": |
261 | | - output += f"\n<plan>\n{agent.plan}\n</plan>\n" |
262 | | - |
263 | | - # TODO: is plan_step something that the agent's outputs? |
264 | | - agent.plan_step = ans_dict.get("step", agent.plan_step) |
265 | | - |
266 | | - memory = ans_dict.get("memory", None) |
267 | | - agent.memories.append(memory) |
268 | | - if memory is not None: |
269 | | - output += f"\n<memory>\n{memory}\n</memory>\n" |
270 | | - |
271 | | - thought = ans_dict.get("think", None) |
272 | | - agent.thoughts.append(thought) |
273 | | - if thought is not None: |
274 | | - output += f"\n<think>\n{thought}\n</think>\n" |
275 | | - |
276 | | - action = ans_dict["action"] |
277 | | - agent.actions.append(action) |
278 | | - if action is not None: |
279 | | - output += f"\n<action>\n{action}\n</action>" |
280 | | - |
281 | | - return system_prompt, instruction_prompt, output |
0 commit comments