max obs size limit, function to prepare pair of turn data for rl training

ollmer · ollmer · commit 462038e3e4bc · 2025-12-04T17:07:35.000Z
diff --git a/src/agentlab/agents/react_toolcall_agent.py b/src/agentlab/agents/react_toolcall_agent.py
@@ -44,6 +44,7 @@ class AgentConfig:
     use_axtree: bool = False
     use_screenshot: bool = True
     max_actions: int = 10
+    max_obs_chars: int = 100000 # truncate long observations to N chars
     max_history_tokens: int = 120000
     system_prompt: str = """
 You are an expert AI Agent trained to assist users with complex web tasks.
@@ -113,7 +114,7 @@ def obs_to_messages(self, obs: dict) -> list[dict]:
             goal = goal_obj[0]["text"]
             messages.append(user_message(f"Goal: {goal}"))
 
-        text = "\n\n".join([f"## {k}\n{v}" for k, v in texts.items()])
+        text = "\n\n".join([f"## {k}\n{v}" for k, v in texts.items()])[:self.config.max_obs_chars]
         if self.last_tool_call_id:
             message = {
                 "role": "tool",
@@ -182,6 +183,7 @@ def action_from_message(self, message: Message) -> ToolCall:
                 logger.warning("Multiple tool calls found in LLM response, using the first one.")
             tool_call = message.tool_calls[0]
             name = tool_call.function.name
+            assert name, "Tool call must have a name."
             args = json.loads(tool_call.function.arguments)
             action = ToolCall(id=tool_call.id, name=name, arguments=args)
             self.last_tool_call_id = action.id
@@ -213,7 +215,7 @@ def compact_history(self):
         ]
 
         try:
-            response = self.llm(messages=messages, tool_choice="none")
+            response = self.llm(messages=messages)
             summary = response.choices[0].message.content  # type: ignore
         except Exception as e:
             logger.exception(f"Error compacting history: {e}")
@@ -224,11 +226,19 @@ def compact_history(self):
         summary_message = {"role": "user", "content": f"## Previous Interaction :\n{summary}"}
         self.history = [system_msg, summary_message, *rest[midpoint:]]
 
+    def get_training_pairs(self) -> list[tuple[list[dict | Message], Message]]:
+        input_output_pairs = []
+        prev_history = []
+        for msg in self.history:
+            if isinstance(msg, Message):
+                input_output_pairs.append((prev_history, msg))
+            prev_history.append(msg)
+        return input_output_pairs
 
 @dataclass
 class ReactToolCallAgentArgs(AgentArgs):
-    llm_args: LLMArgs | None = None
-    config: AgentConfig | None = None
+    llm_args: LLMArgs = None # type: ignore
+    config: AgentConfig = None # type: ignore
 
     def make_agent(self, actions: list[ToolSpec]) -> ReactToolCallAgent:
         llm = self.llm_args.make_model()