diff --git a/.github/workflows/pythonlint.yml b/.github/workflows/pythonlint.yml index 5497aeeb..5449425b 100644 --- a/.github/workflows/pythonlint.yml +++ b/.github/workflows/pythonlint.yml @@ -8,10 +8,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python 3.7 + - name: Set up Python 3.8 uses: actions/setup-python@v4 with: - python-version: '3.7' + python-version: 3.8 architecture: 'x64' - name: Install dependencies run: | diff --git a/.github/workflows/pythontests.yml b/.github/workflows/pythontests.yml index 0ea8c9ac..08c6cde3 100644 --- a/.github/workflows/pythontests.yml +++ b/.github/workflows/pythontests.yml @@ -21,10 +21,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python 3.7 + - name: Set up Python 3.8 uses: actions/setup-python@v4 with: - python-version: '3.7' + python-version: 3.8 architecture: 'x64' - name: Install dependencies run: | diff --git a/README.md b/README.md index dd627e4c..c75b4861 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ $ ./run_tests.sh ⚠️**Be sure to change your CWD to the human_aware_rl directory before running the script, as the test script uses the CWD to dynamically generate a path to save temporary training runs/checkpoints. The testing script will fail if not being run from the correct directory.** -This will run all tests belonging to the human_aware_rl module. You can checkout the README in the submodule for instructions of running target-specific tests. This can be initiated from any directory. +This will run all tests belonging to the human_aware_rl module. _These tests don't work anymore out of the box, due to package version issues_: if you fix them, feel free to make a PR. You can checkout the README in the submodule for instructions of running target-specific tests. This can be initiated from any directory. If you're thinking of using the planning code extensively, you should run the full testing suite that verifies all of the Overcooked accessory tools (this can take 5-10 mins): ``` diff --git a/setup.py b/setup.py index 726c4898..804bf4a4 100644 --- a/setup.py +++ b/setup.py @@ -42,8 +42,7 @@ "numpy", "scipy", "tqdm", - "gym", - "pettingzoo", + "gymnasium", "ipython", "pygame", "ipywidgets", diff --git a/src/human_aware_rl/imitation/behavior_cloning_tf2.py b/src/human_aware_rl/imitation/behavior_cloning_tf2.py index ae43c162..54dd7517 100644 --- a/src/human_aware_rl/imitation/behavior_cloning_tf2.py +++ b/src/human_aware_rl/imitation/behavior_cloning_tf2.py @@ -474,8 +474,8 @@ def __init__(self, observation_space, action_space, config): """ RLLib compatible constructor for initializing a behavior cloning model - observation_space (gym.Space|tuple) Shape of the featurized observations - action_space (gym.space|tuple) Shape of the action space (len(Action.All_ACTIONS),) + observation_space (gymnasium.Space|tuple) Shape of the featurized observations + action_space (gymnasium.space|tuple) Shape of the action space (len(Action.All_ACTIONS),) config (dict) Dictionary of relavant bc params - model_dir (str) Path to pickled keras.Model used to map observations to action logits - stochastic (bool) Whether action should return logit argmax or sample over distribution @@ -519,7 +519,7 @@ def __init__(self, observation_space, action_space, config): self.context = self._create_execution_context() def _setup_shapes(self): - # This is here to make the class compatible with both tuples or gym.Space objs for the spaces + # This is here to make the class compatible with both tuples or gymnasium.Space objs for the spaces # Note: action_space = (len(Action.ALL_ACTIONS,)) is technically NOT the action space shape, which would be () since actions are scalars self.observation_shape = ( self.observation_space diff --git a/src/human_aware_rl/rllib/rllib.py b/src/human_aware_rl/rllib/rllib.py index 162530bf..3b14168f 100644 --- a/src/human_aware_rl/rllib/rllib.py +++ b/src/human_aware_rl/rllib/rllib.py @@ -6,7 +6,7 @@ from datetime import datetime import dill -import gym +import gymnasium import numpy as np import ray from ray.rllib.agents.ppo import PPOTrainer @@ -32,8 +32,8 @@ OvercookedGridworld, ) -action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS)) -obs_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS)) +action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS)) +obs_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS)) timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S") @@ -218,9 +218,13 @@ def _validate_schedule(self, schedule): def _setup_action_space(self, agents): action_sp = {} for agent in agents: - action_sp[agent] = gym.spaces.Discrete(len(Action.ALL_ACTIONS)) - self.action_space = gym.spaces.Dict(action_sp) - self.shared_action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS)) + action_sp[agent] = gymnasium.spaces.Discrete( + len(Action.ALL_ACTIONS) + ) + self.action_space = gymnasium.spaces.Dict(action_sp) + self.shared_action_space = gymnasium.spaces.Discrete( + len(Action.ALL_ACTIONS) + ) def _setup_observation_space(self, agents): dummy_state = self.base_env.mdp.get_standard_start_state() @@ -232,7 +236,7 @@ def _setup_observation_space(self, agents): high = np.ones(obs_shape) * float("inf") low = np.ones(obs_shape) * 0 - self.ppo_observation_space = gym.spaces.Box( + self.ppo_observation_space = gymnasium.spaces.Box( np.float32(low), np.float32(high), dtype=np.float32 ) @@ -243,7 +247,7 @@ def _setup_observation_space(self, agents): obs_shape = featurize_fn_bc(dummy_state)[0].shape high = np.ones(obs_shape) * 100 low = np.ones(obs_shape) * -100 - self.bc_observation_space = gym.spaces.Box( + self.bc_observation_space = gymnasium.spaces.Box( np.float32(low), np.float32(high), dtype=np.float32 ) # hardcode mapping between action space and agent @@ -253,7 +257,7 @@ def _setup_observation_space(self, agents): ob_space[agent] = self.ppo_observation_space else: ob_space[agent] = self.bc_observation_space - self.observation_space = gym.spaces.Dict(ob_space) + self.observation_space = gymnasium.spaces.Dict(ob_space) def _get_featurize_fn(self, agent_id): if agent_id.startswith("ppo"): diff --git a/src/overcooked_ai_py/__init__.py b/src/overcooked_ai_py/__init__.py index 42a19935..dd0a830f 100644 --- a/src/overcooked_ai_py/__init__.py +++ b/src/overcooked_ai_py/__init__.py @@ -1,4 +1,4 @@ -from gym.envs.registration import register +from gymnasium.envs.registration import register register( id="Overcooked-v0", diff --git a/src/overcooked_ai_py/mdp/overcooked_env.py b/src/overcooked_ai_py/mdp/overcooked_env.py index b61c0ca6..862fcbc1 100644 --- a/src/overcooked_ai_py/mdp/overcooked_env.py +++ b/src/overcooked_ai_py/mdp/overcooked_env.py @@ -2,7 +2,6 @@ import time import cv2 -import gym import gymnasium import numpy as np import pygame @@ -667,120 +666,120 @@ def proportion_stuck_time(trajectories, agent_idx, stuck_time=3): return stuck_matrix -from pettingzoo.utils.env import ParallelEnv - -from overcooked_ai_py.agents.agent import AgentPair - - -class OvercookedEnvPettingZoo(ParallelEnv): - def __init__(self, base_env, agents): - """ - base_env: OvercookedEnv - agents: AgentPair - - Example creating a PettingZoo env from a base_env: - - mdp = OvercookedGridworld.from_layout_name("asymmetric_advantages") - base_env = OvercookedEnv.from_mdp(mdp, horizon=500) - agent_pair = load_agent_pair("path/to/checkpoint", "ppo", "ppo") - env = OvercookedEnvPettingZoo(base_env, agent_pair) - - """ - # we need agent-dependent observation space, and the best way to do it is just to include an agentPair - assert isinstance( - agents, AgentPair - ), "agents must be an AgentPair object" - - self.agents = ["agent_0", "agent_1"] - self.possible_agents = ["agent_0", "agent_1"] - self.agent_map = {"agent_0": agents.a0, "agent_1": agents.a1} - self.base_env = base_env - self.observation_spaces = { - agent: self.observation_space(agent) for agent in self.agents - } - self.action_spaces = { - agent: gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS)) - for agent in self.agents - } - # this is the AgentPair object - self.reset() - - import functools - - # we want to return the same space object every time - @functools.lru_cache(maxsize=2) - def observation_space(self, agent): - # the observation can be different for each agent - agent = self.agent_map[agent] - dummy_mdp = self.base_env.mdp - dummy_state = dummy_mdp.get_standard_start_state() - obs_shape = agent.featurize(dummy_state)[0].shape - high = np.ones(obs_shape) * float("inf") - low = np.zeros(obs_shape) - return gymnasium.spaces.Box(low, high, dtype=np.float32) - - # we want to return the same space object every time - @functools.lru_cache(maxsize=1) - def action_space(self, agent): - # the action space is the same for each agent - return gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS)) - - def step(self, joint_action): - joint_action = [ - Action.ALL_ACTIONS[joint_action[agent]] for agent in joint_action - ] - obs, reward, done, info = self.base_env.step(joint_action) - # https://gymnasium.farama.org/content/basic_usage/ - # we have no early termination condition in this env, and the environment only terminates when the time horizon is reached - # therefore the terminated is always False, and we set truncated to done - terminated = False - truncated = done - - def create_dict(value): - """ - Each agent should have the same reward, terminated, truncated, info - """ - return {agent: value for agent in self.agents} - - def create_obs_dict(obs): - """ - Observation is potentially different for each agent - """ - return { - agent: self.agent_map[agent].featurize(obs) - for agent in self.agents - } - - obs = create_obs_dict(obs) - reward = create_dict(reward) - terminated = create_dict(terminated) - truncated = create_dict(truncated) - info = create_dict(info) - if done: - self.agents = [] - return obs, reward, terminated, truncated, info - - def reset(self, seed=None, options=None): - """ - Reset the embedded OvercookedEnv envrionment to the starting state - """ - self.base_env.reset() - dummy_mdp = self.base_env.mdp - dummy_state = dummy_mdp.get_standard_start_state() - # when an environment terminates/truncates, PettingZoo wants all agents removed, so during reset we re-add them - self.agents = self.possible_agents[:] - # return the obsevations as dict - obs_dict = { - agent: self.agent_map[agent].featurize(dummy_state)[0] - for agent in self.agents - } - return obs_dict, None - - def render(self, mode="human", close=False): - pass - - -class Overcooked(gym.Env): +# from pettingzoo.utils.env import ParallelEnv + +# from overcooked_ai_py.agents.agent import AgentPair + + +# class OvercookedEnvPettingZoo(ParallelEnv): +# def __init__(self, base_env, agents): +# """ +# base_env: OvercookedEnv +# agents: AgentPair + +# Example creating a PettingZoo env from a base_env: + +# mdp = OvercookedGridworld.from_layout_name("asymmetric_advantages") +# base_env = OvercookedEnv.from_mdp(mdp, horizon=500) +# agent_pair = load_agent_pair("path/to/checkpoint", "ppo", "ppo") +# env = OvercookedEnvPettingZoo(base_env, agent_pair) + +# """ +# # we need agent-dependent observation space, and the best way to do it is just to include an agentPair +# assert isinstance( +# agents, AgentPair +# ), "agents must be an AgentPair object" + +# self.agents = ["agent_0", "agent_1"] +# self.possible_agents = ["agent_0", "agent_1"] +# self.agent_map = {"agent_0": agents.a0, "agent_1": agents.a1} +# self.base_env = base_env +# self.observation_spaces = { +# agent: self.observation_space(agent) for agent in self.agents +# } +# self.action_spaces = { +# agent: gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS)) +# for agent in self.agents +# } +# # this is the AgentPair object +# self.reset() + +# import functools + +# # we want to return the same space object every time +# @functools.lru_cache(maxsize=2) +# def observation_space(self, agent): +# # the observation can be different for each agent +# agent = self.agent_map[agent] +# dummy_mdp = self.base_env.mdp +# dummy_state = dummy_mdp.get_standard_start_state() +# obs_shape = agent.featurize(dummy_state)[0].shape +# high = np.ones(obs_shape, dtype=np.float32) * float("inf") +# low = np.zeros(obs_shape, dtype=np.float32) +# return gymnasium.spaces.Box(low, high, dtype=np.float32) + +# # we want to return the same space object every time +# @functools.lru_cache(maxsize=1) +# def action_space(self, agent): +# # the action space is the same for each agent +# return gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS)) + +# def step(self, joint_action): +# joint_action = [ +# Action.ALL_ACTIONS[joint_action[agent]] for agent in joint_action +# ] +# obs, reward, done, info = self.base_env.step(joint_action) +# # https://gymnasium.farama.org/content/basic_usage/ +# # we have no early termination condition in this env, and the environment only terminates when the time horizon is reached +# # therefore the terminated is always False, and we set truncated to done +# terminated = False +# truncated = done + +# def create_dict(value): +# """ +# Each agent should have the same reward, terminated, truncated, info +# """ +# return {agent: value for agent in self.agents} + +# def create_obs_dict(obs): +# """ +# Observation is potentially different for each agent +# """ +# return { +# agent: self.agent_map[agent].featurize(obs) +# for agent in self.agents +# } + +# obs = create_obs_dict(obs) +# reward = create_dict(reward) +# terminated = create_dict(terminated) +# truncated = create_dict(truncated) +# info = create_dict(info) +# if done: +# self.agents = [] +# return obs, reward, terminated, truncated, info + +# def reset(self, seed=None, options=None): +# """ +# Reset the embedded OvercookedEnv envrionment to the starting state +# """ +# self.base_env.reset() +# dummy_mdp = self.base_env.mdp +# dummy_state = dummy_mdp.get_standard_start_state() +# # when an environment terminates/truncates, PettingZoo wants all agents removed, so during reset we re-add them +# self.agents = self.possible_agents[:] +# # return the obsevations as dict +# obs_dict = { +# agent: self.agent_map[agent].featurize(dummy_state)[0] +# for agent in self.agents +# } +# return obs_dict, None + +# def render(self, mode="human", close=False): +# pass + + +class Overcooked(gymnasium.Env): """ Wrapper for the Env class above that is SOMEWHAT compatible with the standard gym API. Why only somewhat? Because we need to flatten a multi-agent env to be a single-agent env (as gym requires). @@ -814,7 +813,7 @@ def __init__(self, base_env, featurize_fn, baselines_reproducible=False): mdp = OvercookedGridworld.from_layout_name("asymmetric_advantages") base_env = OvercookedEnv.from_mdp(mdp, horizon=500) - env = gym.make("Overcooked-v0",base_env = base_env, featurize_fn =base_env.featurize_state_mdp) + env = gymnasium.make("Overcooked-v0",base_env = base_env, featurize_fn =base_env.featurize_state_mdp) """ if baselines_reproducible: # NOTE: @@ -830,7 +829,7 @@ def __init__(self, base_env, featurize_fn, baselines_reproducible=False): self.base_env = base_env self.featurize_fn = featurize_fn self.observation_space = self._setup_observation_space() - self.action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS)) + self.action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS)) self.reset() self.visualizer = StateVisualizer() @@ -838,9 +837,9 @@ def _setup_observation_space(self): dummy_mdp = self.base_env.mdp dummy_state = dummy_mdp.get_standard_start_state() obs_shape = self.featurize_fn(dummy_state)[0].shape - high = np.ones(obs_shape) * float("inf") - low = np.zeros(obs_shape) - return gym.spaces.Box(low, high, dtype=np.float32) + high = np.ones(obs_shape, dtype=np.float32) * float("inf") + low = np.zeros(obs_shape, dtype=np.float32) + return gymnasium.spaces.Box(low, high, dtype=np.float32) def step(self, action): """ diff --git a/testing/overcooked_test.py b/testing/overcooked_test.py index 2bd5b989..5f17a026 100644 --- a/testing/overcooked_test.py +++ b/testing/overcooked_test.py @@ -6,7 +6,7 @@ import unittest from math import factorial -import gym +import gymnasium import numpy as np from overcooked_ai_py.agents.agent import ( @@ -1699,13 +1699,13 @@ def setUp(self): np.random.seed(0) def test_creation(self): - env = gym.make( + env = gymnasium.make( "Overcooked-v0", base_env=self.env, featurize_fn=self.env.featurize_state_mdp, ) # verify that the action_space * obs_space are initialized correctly - self.assertEqual(env.action_space, gym.spaces.Discrete(6)) + self.assertEqual(env.action_space, gymnasium.spaces.Discrete(6)) self.assertEqual( env.observation_space.shape, self.base_mdp.get_featurize_state_shape(), @@ -1714,31 +1714,6 @@ def test_creation(self): # TODO: write more tests here -class TestPettingZooEnvironment(unittest.TestCase): - def test_api(self): - from pettingzoo.test import parallel_api_test - - # Check whether ray is installed and skip if not - try: - from human_aware_rl.rllib.rllib import load_agent_pair - except ModuleNotFoundError: - return - - base_mdp = OvercookedGridworld.from_layout_name("cramped_room") - # get the current directory of the file - current_dir = os.path.dirname(os.path.realpath(__file__)) - agent_dir = os.path.join( - current_dir, - "../src/overcooked_demo/server/static/assets/agents/RllibCrampedRoomSP/agent", - ) - ap = load_agent_pair(agent_dir, "ppo", "ppo") - env = OvercookedEnv.from_mdp(base_mdp, info_level=0, horizon=1000) - from overcooked_ai_py.mdp.overcooked_env import OvercookedEnvPettingZoo - - wrapped_env = OvercookedEnvPettingZoo(env, ap) - parallel_api_test(wrapped_env, num_cycles=1000) - - class TestTrajectories(unittest.TestCase): def setUp(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")