add mock gym.py update continuous_wrapper add example test (#685)

swag1ong · web-flow · commit 4d9ac1d9b88e · 2021-07-26T16:49:12.000+08:00
* add mock gym.py update continuous_wrapper add example test

* yapf

* update build.sh, update gym.py

* update build.sh

* update build.sh

* remove stick equality to prevent infinite loop

* add comment

* add copyright

* delete paddle_speed_test.py

* delete torch_speed_test.py

* add comment

* yapf

* update comment
diff --git a/.teamcity/build.sh b/.teamcity/build.sh
@@ -13,6 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+set -ex
 
 function init() {
     RED='\033[0;31m'
@@ -26,6 +27,24 @@ function init() {
     export LD_LIBRARY_PATH="/usr/local/TensorRT-6.0.1.5/lib:$LD_LIBRARY_PATH"
 }
 
+function run_example_test {
+    for exp in QuickStart DQN DQN_variant PPO SAC TD3 OAC DDPG
+    do
+        cp parl/tests/gym.py examples/${exp}/
+    done
+
+    python examples/QuickStart/train.py
+    python examples/DQN/train.py
+    python examples/DQN_variant/train.py --train_total_steps 5000 --algo DQN --env PongNoFrameskip-v4
+    python examples/DQN_variant/train.py --train_total_steps 5000 --algo DDQN --env PongNoFrameskip-v4
+    python examples/DQN_variant/train.py --train_total_steps 5000 --dueling True --env PongNoFrameskip-v4
+    python examples/PPO/train.py --train_total_steps 5000 --env HalfCheetah-v1
+    python examples/SAC/train.py --train_total_steps 5000 --env HalfCheetah-v1
+    python examples/TD3/train.py --train_total_steps 5000 --env HalfCheetah-v1
+    python examples/OAC/train.py --train_total_steps 5000 --env HalfCheetah-v1
+    python examples/DDPG/train.py --train_total_steps 5000 --env HalfCheetah-v1
+}
+
 function print_usage() {
     echo -e "\n${RED}Usage${NONE}:
     ${BOLD}$0${NONE} [OPTION]"
@@ -143,13 +162,6 @@ function run_test_with_fluid() {
     done
 }
 
-function run_cartpole_test {
-    for exp in QuickStart DQN
-    do
-        python examples/${exp}/train.py
-    done
-}
-
 function run_import_test {
     export CUDA_VISIBLE_DEVICES=""
 
@@ -237,7 +249,8 @@ function main() {
             pip install -r .teamcity/requirements.txt
             pip install /data/paddle_package/paddlepaddle_gpu-2.1.0.post101-cp38-cp38-linux_x86_64.whl
             run_test_with_gpu $env
-            run_cartpole_test $env
+            pip install tqdm # for example test
+            run_example_test $env
 
             run_test_with_fluid
             ############
diff --git a/parl/env/atari_wrappers.py b/parl/env/atari_wrappers.py
@@ -1,5 +1,18 @@
-# Third party code
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Third party code
 # The following code are copied or modified from:
 # https://github.com/ray-project/ray/blob/master/python/ray/rllib/env/atari_wrappers.py
 
@@ -282,7 +295,9 @@ def step(self, action):
 
     def reset(self, **kwargs):
         obs = self._env.reset(**kwargs)
-        if self._get_curr_episode() == self._end_episode:
+        # During the noop reset in NoopResetEnv, env may be reset multiple times(may occur in mock env,
+        # almost impossible in atari env), so the == condition may never be met. >= can avoid infinite loop.
+        if self._get_curr_episode() >= self._end_episode:
             self._was_real_done = True
             self._eval_rewards = \
                 self._monitor.get_episode_rewards()[-self._eval_episodes:]
diff --git a/parl/env/continuous_wrappers.py b/parl/env/continuous_wrappers.py
@@ -23,7 +23,12 @@ def __init__(self, env):
         [low_bound, high_bound].
         """
         gym.Wrapper.__init__(self, env)
-        assert isinstance(self.env.action_space, gym.spaces.Box)
+        assert hasattr(
+            self.env.action_space,
+            'low'), 'action space should be instance of gym.spaces.Box'
+        assert hasattr(
+            self.env.action_space,
+            'high'), 'action space should be instance of gym.spaces.Box'
         self.low_bound = self.env.action_space.low[0]
         self.high_bound = self.env.action_space.high[0]
         assert self.high_bound > self.low_bound
diff --git a/parl/tests/gym.py b/parl/tests/gym.py
@@ -0,0 +1,201 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# mock gym environment
+import numpy as np
+from random import random
+
+
+def make(env_name):
+    print('>>>>>>>>> you are testing mock gym env: ', env_name)
+    if env_name == 'CartPole-v0':
+        return CartPoleEnv()
+    elif env_name == 'PongNoFrameskip-v4':
+        return PongEnv()
+    elif env_name == 'HalfCheetah-v1':
+        return HalfCheetahEnv()
+    else:
+        raise NotImplementedError(
+            'Mock env not defined, please check your env name')
+
+
+# mock Box
+class Box(object):
+    def __init__(self, low, high, shape, dtype):
+        self.low = low
+        self.high = high
+        self.shape = shape
+        self.dtype = dtype
+
+
+# mock gym.Wrapper
+class Wrapper(object):
+    def __init__(self, env):
+        self.env = env
+
+    def __getattr__(self, name):
+        if name.startswith('_'):
+            raise AttributeError(
+                "attempted to get missing private attribute '{}'".format(name))
+        return getattr(self.env, name)
+
+
+# mock gym.ObservationWrapper
+class ObservationWrapper(Wrapper):
+    def __init__(self, env):
+        super().__init__(env)
+
+    def reset(self, **kwargs):
+        observation = self.env.reset(**kwargs)
+        return self.observation(observation)
+
+    def step(self, action):
+        observation, reward, done, info = self.env.step(action)
+        return self.observation(observation), reward, done, info
+
+
+# mock gym.RewardWrapper
+class RewardWrapper(Wrapper):
+    def __init__(self, env):
+        super().__init__(env)
+
+
+# Atari Specific
+# mock env.action_space
+class ActionSpace(object):
+    def __init__(self, n, shape=None):
+        self.n = n
+        self.shape = shape
+
+
+# mock env.observation_space
+class ObservationSpace(object):
+    def __init__(self, dim, dtype):
+        self.shape = dim
+        self.dtype = dtype
+
+
+# mock env.spec
+class Spec(object):
+    def __init__(self, id='PongNoFrameskip-v4'):
+        self.id = id
+
+
+# mock gym.spaces
+class spaces(object):
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def Box(high, low, shape, dtype):
+        return ObservationSpace(shape, dtype)
+
+
+# mock CartPole-v0
+class CartPoleEnv(object):
+    def __init__(self):
+        self.observation_space = ObservationSpace((4, ), dtype='int8')
+        self.action_space = ActionSpace(2)
+
+    def step(self, action):
+        action = int(action)
+        obs = np.random.random(4) * 2 - 1
+        reward = np.random.choice([0.0, 1.0])
+        done = np.random.choice([True, False], p=[0.1, 0.9])
+        info = {}
+        return obs, reward, done, info
+
+    def reset(self):
+        obs = np.random.random(4) * 2 - 1
+        return obs
+
+    def seed(self, val):
+        pass
+
+    def close(self):
+        pass
+
+
+# mock PongNoFrameskip-v4
+class PongEnv(object):
+    def __init__(self):
+        class Lives(object):
+            def lives(self):
+                return np.random.randint(0, 5)
+
+        class Ale(object):
+            def __init__(self):
+                self.ale = Lives()
+                self.np_random = np.random
+
+            def get_action_meanings(self):
+                return ['NOOP'] * 6
+
+        self.observation_space = ObservationSpace((210, 160, 3), 'unit8')
+        self.action_space = ActionSpace(6)
+        self.unwrapped = Ale()
+        self.metadata = {'render.modes': []}
+        self.reward_range = [0, 1]
+        self.spec = Spec('PongNoFrameskip-v4')
+
+    def step(self, action):
+        action = int(action)
+        obs = np.random.randint(0, 255, (210, 160, 3), dtype=np.uint8)
+        reward = np.random.choice([0.0, 1.0])
+        done = np.random.choice([True, False], p=[0.1, 0.9])
+        info = {}
+        return obs, reward, done, info
+
+    def reset(self):
+        obs = np.random.randint(0, 255, (210, 160, 3), dtype=np.uint8)
+        return obs
+
+    def close(self):
+        pass
+
+    def seed(self, val):
+        pass
+
+
+# mock mujoco envs
+class HalfCheetahEnv(object):
+    def __init__(self):
+        self.observation_space = Box(
+            high=np.array([np.inf] * 17),
+            low=np.array([-np.inf] * 17),
+            shape=(17, ),
+            dtype=None)
+        self.action_space = Box(
+            high=np.array([1.0] * 6),
+            low=np.array([-1.0] * 6),
+            shape=(6, ),
+            dtype=None)
+        self._max_episode_steps = 1000
+        self._elapsed_steps = 0
+
+    def step(self, action):
+        obs = np.random.randn(17)
+        reward = np.random.choice([0.0, 1.0])
+        done = np.random.choice([True, False], p=[0.01, 0.99])
+        info = {}
+        return obs, reward, done, info
+
+    def reset(self):
+        obs = np.random.randn(17)
+        return obs
+
+    def seed(self, val):
+        pass
+
+    def close(self):
+        pass