|
| 1 | +import gym |
| 2 | +from abc import ABC, abstractmethod |
| 3 | + |
| 4 | + |
| 5 | +class AbstractEnvArgs(ABC): |
| 6 | + """Easily serialiazable class to store the arguments of an environment""" |
| 7 | + |
| 8 | + @abstractmethod |
| 9 | + def make_env(self, action_mapping, exp_dir, exp_task_kwargs) -> "AbstractEnv": |
| 10 | + """Create an instance of the environment with the arguments stored in this object. |
| 11 | +
|
| 12 | + Args: |
| 13 | + action_mapping (dict[str,str]): mapping from the agent's action space to the environment's action space |
| 14 | + see AbstractActionSet.to_python_code from BrowserGym for an example |
| 15 | + exp_dir (str): directory where the experiment is stored |
| 16 | + exp_task_kwargs (dict[str,Any]): additional arguments for the environment |
| 17 | +
|
| 18 | + Returns: |
| 19 | + env (AbstractEnv): instance of the environment. |
| 20 | + """ |
| 21 | + |
| 22 | + |
| 23 | +class AbstractEnv(gym.Env, ABC): |
| 24 | + |
| 25 | + @abstractmethod |
| 26 | + def reset(self, seed: int = None) -> tuple[dict[str, any], dict[str, any]]: |
| 27 | + """Reset the environment to the initial state, ready for an agent to start a new episode. |
| 28 | +
|
| 29 | + Args: |
| 30 | + seed (int): seed to be used for the environment's random number generator. Some task may |
| 31 | + be deterministic and not require a seed. |
| 32 | +
|
| 33 | + Returns: |
| 34 | + obs (dict[str,Any]): dictionary containing the observations |
| 35 | + env_info (dict[str,Any]): additional information about the environment (see step's docstring) |
| 36 | + """ |
| 37 | + |
| 38 | + @abstractmethod |
| 39 | + def step(self, action: str): |
| 40 | + """Exection action in the environment and return the next observations |
| 41 | +
|
| 42 | + Args: |
| 43 | + action (str): action to be executed in the environment, as a string |
| 44 | +
|
| 45 | + Returns: |
| 46 | + obs (dict[str,Any]): dictionary containing the observations |
| 47 | + reward (float): reward obtained after executing the action |
| 48 | + terminated (bool): whether the episode is terminated. The MDP reached a terminal state |
| 49 | + truncated (bool): whether the episode is truncated. The episode was truncated due to external reasons |
| 50 | + env_info (dict[str,Any]): additional information about the environment |
| 51 | + task_info (str): Some potential debugging information about the task, not intended for the agent |
| 52 | + action_exec_start (float): time when the action execution started |
| 53 | + action_exec_stop (float): time when the action execution ended |
| 54 | + action_exec_timeout (float): TODO I don't remember exactly what this is |
| 55 | + """ |
| 56 | + |
| 57 | + @abstractmethod |
| 58 | + def close(self): |
| 59 | + """Close any resources used by the environment""" |
0 commit comments