Skip to content

Commit b57a1ab

Browse files
committed
treat tapes without stop step as truncated
1 parent dbba760 commit b57a1ab

File tree

3 files changed

+8
-85
lines changed

3 files changed

+8
-85
lines changed

src/agentlab/agents/tapeagent/agent.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from omegaconf import DictConfig
88
from pydantic import Field
99
from tapeagents.agent import Agent
10-
from tapeagents.core import Action, Observation, TapeMetadata, Thought
10+
from tapeagents.core import Action, Observation, StopStep, TapeMetadata, Thought
1111
from tapeagents.core import Tape as BaseTape
1212

1313
from agentlab.agents.agent_args import AgentArgs
@@ -98,5 +98,6 @@ def get_action(self, obs: Observation | list[Observation]) -> tuple[Action, Tape
9898

9999
@property
100100
def final_tape(self) -> Tape:
101-
self.tape.metadata = ExtendedMetadata(author=self.agent.name)
101+
truncated = not any([isinstance(s, StopStep) for s in self.tape.steps])
102+
self.tape.metadata = ExtendedMetadata(author=self.agent.name, truncated=truncated)
102103
return self.tape

src/agentlab/analyze/tapes.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ def render_step(self, step: WrapperStep, index: int, **kwargs):
5353
content = step_dict.get("code", pretty_yaml(step_dict))
5454
elif kind == "code_execution_result":
5555
content = pretty_yaml(step_dict.get("result"))
56+
elif len(step_dict) == 1 and "content" in step_dict:
57+
content = step_dict["content"]
58+
elif len(step_dict) == 1 and "reasoning" in step_dict:
59+
content = step_dict["reasoning"]
5660
else:
5761
content = pretty_yaml(step_dict)
5862

@@ -137,7 +141,7 @@ def get_exp_label(self, filename: str, tapes: list[Tape]) -> str:
137141
avg_steps = np.mean([len(tape) for tape in tapes])
138142
std_steps = np.std([len(tape) for tape in tapes])
139143
for tape in tapes:
140-
if not tape.metadata.terminated:
144+
if tape.metadata.truncated:
141145
no_result += 1
142146
if tape.metadata.error:
143147
errors["fatal"] += 1

src/agentlab/benchmarks/tau_bench.py

Lines changed: 0 additions & 82 deletions
This file was deleted.

0 commit comments

Comments
 (0)