treat tapes without stop step as truncated

ollmer · ollmer · commit b57a1ab0ecf8 · 2025-04-17T11:23:14.000+02:00
diff --git a/src/agentlab/agents/tapeagent/agent.py b/src/agentlab/agents/tapeagent/agent.py
@@ -7,7 +7,7 @@
 from omegaconf import DictConfig
 from pydantic import Field
 from tapeagents.agent import Agent
-from tapeagents.core import Action, Observation, TapeMetadata, Thought
+from tapeagents.core import Action, Observation, StopStep, TapeMetadata, Thought
 from tapeagents.core import Tape as BaseTape
 
 from agentlab.agents.agent_args import AgentArgs
@@ -98,5 +98,6 @@ def get_action(self, obs: Observation | list[Observation]) -> tuple[Action, Tape
 
     @property
     def final_tape(self) -> Tape:
-        self.tape.metadata = ExtendedMetadata(author=self.agent.name)
+        truncated = not any([isinstance(s, StopStep) for s in self.tape.steps])
+        self.tape.metadata = ExtendedMetadata(author=self.agent.name, truncated=truncated)
         return self.tape
diff --git a/src/agentlab/analyze/tapes.py b/src/agentlab/analyze/tapes.py
@@ -53,6 +53,10 @@ def render_step(self, step: WrapperStep, index: int, **kwargs):
             content = step_dict.get("code", pretty_yaml(step_dict))
         elif kind == "code_execution_result":
             content = pretty_yaml(step_dict.get("result"))
+        elif len(step_dict) == 1 and "content" in step_dict:
+            content = step_dict["content"]
+        elif len(step_dict) == 1 and "reasoning" in step_dict:
+            content = step_dict["reasoning"]
         else:
             content = pretty_yaml(step_dict)
 
@@ -137,7 +141,7 @@ def get_exp_label(self, filename: str, tapes: list[Tape]) -> str:
         avg_steps = np.mean([len(tape) for tape in tapes])
         std_steps = np.std([len(tape) for tape in tapes])
         for tape in tapes:
-            if not tape.metadata.terminated:
+            if tape.metadata.truncated:
                 no_result += 1
             if tape.metadata.error:
                 errors["fatal"] += 1
diff --git a/src/agentlab/benchmarks/tau_bench.py b/src/agentlab/benchmarks/tau_bench.py