address review comments

ollmer · ollmer · commit 09ef9a61009c · 2025-03-19T12:33:19.000+01:00
diff --git a/src/agentlab/benchmarks/abstract_env.py b/src/agentlab/benchmarks/abstract_env.py
@@ -1,12 +1,11 @@
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 
 import gymnasium as gym
 from dataclasses_json import DataClassJsonMixin
 from pydantic import BaseModel
 
 
-class AbstractEnvArgs(ABC):
+class AbstractEnvArgs(DataClassJsonMixin):
     @abstractmethod
     def make_env(self, action_mapping, exp_dir, exp_task_kwargs) -> "AbstractEnv":
         """Create an instance of the environment with the arguments stored in this object.
@@ -22,14 +21,6 @@ def make_env(self, action_mapping, exp_dir, exp_task_kwargs) -> "AbstractEnv":
         """
 
 
-@dataclass
-class SerializableEnvArgs(AbstractEnvArgs, DataClassJsonMixin):
-    """Easily serialiazable class to store the arguments of an environment"""
-
-    task_seed: int = 0
-    task_name: str = ""
-
-
 class AbstractBenchmark(BaseModel):
     name: str
     env_args_list: list
@@ -80,7 +71,3 @@ def step(self, action: str):
     @abstractmethod
     def close(self):
         """Close any resources used by the environment"""
-
-    @abstractmethod
-    def calculate_reward(self) -> float:
-        """Calculate the reward obtained in the last step"""
diff --git a/src/agentlab/benchmarks/gaia.py b/src/agentlab/benchmarks/gaia.py
@@ -3,6 +3,7 @@
 import re
 import shutil
 import string
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Literal
 
@@ -16,7 +17,7 @@
 from tapeagents.tools.media_reader import VideoReader
 from tapeagents.tools.web_search import WebSearch
 
-from agentlab.benchmarks.abstract_env import AbstractBenchmark, SerializableEnvArgs
+from agentlab.benchmarks.abstract_env import AbstractBenchmark, AbstractEnvArgs
 from agentlab.benchmarks.multitool_gym import MultiToolGym
 
 logger = logging.getLogger(__name__)
@@ -33,19 +34,16 @@ def __init__(self, tools: list[Tool | StatefulTool], task: dict, exp_dir: str):
         os.makedirs(".cache", exist_ok=True)
 
     def reset(self, seed=None) -> tuple[list[Observation], dict]:
+        """
+        Reset the state of all the tools and prepare initial observations from the task again
+        """
         super().reset()
         question = GaiaQuestion.from_task(self.task)
         steps = [question]
         if image_obs := with_image(question):
             steps.append(image_obs)
         return steps, {}
 
-    def step(self, action: Action) -> tuple[Observation, float, bool, bool, dict]:
-        logger.info(f"Gym step called with action {type(action)}")
-        observation, reward, terminated, truncated, env_info = super().step(action)
-        logger.info(f"Gym observation: {observation.short_view()}")
-        return observation, reward, terminated, truncated, env_info
-
     def calculate_reward(self, action: Action) -> float:
         if isinstance(action, GaiaAnswer):
             model_answer = action.answer
@@ -62,7 +60,8 @@ def calculate_reward(self, action: Action) -> float:
         return reward
 
 
-class GaiaGymArgs(SerializableEnvArgs):
+@dataclass
+class GaiaGymArgs(AbstractEnvArgs):
     task: dict[str, Any]
     viewport_chars: int
     task_seed: int
diff --git a/src/agentlab/benchmarks/multitool_gym.py b/src/agentlab/benchmarks/multitool_gym.py
@@ -1,28 +1,26 @@
+import logging
 import time
-from typing import Annotated, Union
 
-from pydantic import Field, TypeAdapter
 from tapeagents.core import Action, Observation, StopStep, Tape
 from tapeagents.environment import ToolCollectionEnvironment
 from tapeagents.tools.base import StatefulTool, Tool
 
 from agentlab.benchmarks.abstract_env import AbstractEnv
 
+logger = logging.getLogger(__name__)
 EnvTape = Tape[None, Action | Observation]
 
 
 class MultiToolGym(AbstractEnv):
     def __init__(self, tools: list[Tool | StatefulTool]):
         self._env = ToolCollectionEnvironment(tools)
         self._actions = self._env.actions()
-        self._actions_parser: TypeAdapter = TypeAdapter(
-            Annotated[Union[self._actions], Field(discriminator="kind")]
-        )
 
     def reset(self):
         self._env.reset()
 
     def step(self, action: Action) -> tuple[Observation, float, bool, bool, dict]:
+        logger.info(f"Gym {self.__class__.__name__} step called with action {type(action)}")
         assert isinstance(action, Action)
 
         action_exec_start = time.time()
@@ -43,9 +41,12 @@ def step(self, action: Action) -> tuple[Observation, float, bool, bool, dict]:
             "action_exec_stop": action_exec_stop,
             "action_exec_timeout": 0.0,
         }
+        obs_view = observation.short_view() if isinstance(observation, Observation) else observation
+        logger.info(f"Gym {self.__class__.__name__} observation: {obs_view}")
         return observation, reward, terminated, truncated, env_info
 
     def calculate_reward(self, action: Action) -> float:
+        logger.warning("Reward calculation is not implemented, returning 0")
         return 0.0
 
     def close(self):
diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py
@@ -23,10 +23,7 @@
 from browsergym.experiments.utils import count_messages_token, count_tokens
 from dataclasses_json import DataClassJsonMixin
 from PIL import Image
-from tapeagents.core import (
-    StepMetadata,
-    Tape,
-)
+from tapeagents.core import StepMetadata, Tape
 from tapeagents.dialog_tape import AssistantStep, AssistantThought
 from tqdm import tqdm
 
@@ -315,9 +312,8 @@ def run(self):
                     err_msg = f"Exception uncaught by agent or environment in task {self.env_args.task_name}.\n{type(e).__name__}:\n{e}"
                 logger.info("Saving experiment info.")
                 _save_summary_info(episode_info, self.exp_dir, err_msg, stack_trace)
-                self.save_tape(
-                    agent.final_tape if isinstance(agent, TapeAgent) else self.as_tape(episode_info)
-                )
+                tape = agent.final_tape if isinstance(agent, TapeAgent) else as_tape(episode_info)
+                self.save_tape(tape)
             except Exception as e:
                 logger.exception(f"Error while saving experiment info: {e}")
             try:
@@ -330,36 +326,11 @@ def run(self):
             except Exception as e:
                 logger.exception(f"Error while unsetting the logger: {e}")
 
-    def as_tape(self, steps_info: list["StepInfo"]) -> Tape:
-        """
-        Create a Tape object from the steps info.
-
-        Returns:
-            Tape: a Tape object containing the steps and metadata.
-        """
-        tape: Tape = []
-        for step_info in steps_info:
-            step_metadata = StepMetadata(
-                result=dict(
-                    reward=step_info.reward,
-                    raw_reward=step_info.raw_reward,
-                    terminated=step_info.terminated,
-                    truncated=step_info.truncated,
-                    agent_info=step_info.agent_info,
-                    stats=step_info.stats,
-                )
-            )
-            steps = [DictObservation(content=step_info.obs)]
-            if thought := step_info.agent_info.get("think"):
-                steps.append(AssistantThought(content=thought))
-            steps.append(AssistantStep(content=step_info.action, metadata=step_metadata))
-            tape += steps
-        return tape
-
     def save_tape(self, tape: Tape, filename: str = "tape.json"):
-        if os.path.exists(self.exp_dir / filename):
-            raise FileExistsError(f"{filename} already exists in {self.exp_dir}")
-        with open(self.exp_dir / filename, "w") as f:
+        tape_path = Path(self.exp_dir) / filename
+        if tape_path.exists():
+            raise FileExistsError(f"{tape_path} already exists")
+        with open(tape_path, "w") as f:
             json.dump(tape.model_dump(), f, indent=2, ensure_ascii=False)
 
     def _set_logger(self):
@@ -951,3 +922,31 @@ def _flatten_dict(d, parent_key="", sep="."):
         else:
             items.append((new_key, v))
     return dict(items)
+
+
+def as_tape(steps_info: list) -> Tape:
+    """
+    Create a Tape object from the steps info.
+
+    Returns:
+        Tape: a Tape object containing the steps and metadata.
+    """
+    tape: Tape = []
+    for step_info in steps_info:
+        step_metadata = StepMetadata(
+            other=dict(
+                reward=step_info.reward,
+                raw_reward=step_info.raw_reward,
+                terminated=step_info.terminated,
+                truncated=step_info.truncated,
+                agent_info=step_info.agent_info,
+                stats=step_info.stats,
+            )
+        )
+        steps = [DictObservation(content=step_info.obs)]
+        if thought := step_info.agent_info.get("think"):
+            steps.append(AssistantThought(content=thought))
+        if step_info.action is not None:
+            steps.append(AssistantStep(content=step_info.action, metadata=step_metadata))
+        tape += steps
+    return tape